From 9a43dffa6c62f8a1b41d870a46270a5bfd3be0b7 Mon Sep 17 00:00:00 2001
From: UserIsMH <momohomma@googlemail.com>
Date: Sun, 19 Apr 2026 23:46:40 +0200
Subject: [PATCH] Articles: Paginierung, Timeline-Buckets,
 Sources-Summary-Endpunkt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backend:
- GET /{id}/articles paginiert jetzt per limit/offset (Default 500,
  Max 1000) und unterstuetzt optionalen search-Parameter (LIKE ueber
  headline/source/content). Response-Shape: {total, articles}.
- Neuer Endpunkt GET /{id}/articles/sources-summary liefert pro Quelle
  {source, article_count, languages} sowie language_counts gesamt —
  serverseitige Aggregation, unabhaengig von Artikel-Paginierung.
- Neuer Endpunkt GET /{id}/articles/timeline-buckets?granularity=hour|day|week|month
  aggregiert Artikel + Snapshot-Counts pro Zeitbucket (fuer spaetere
  Timeline-Zaehler ueber die volle Historie).
- database.py: Index idx_articles_incident_collected auf
  (incident_id, collected_at DESC) fuer schnelleres ORDER BY + Pagination.

Frontend:
- api.js: getArticles({limit, offset, search}),
  getArticlesSourcesSummary(), getArticlesTimelineBuckets().
- app.js: loadIncidentDetail laedt erste Seite (500 Artikel), startet
  _loadSourcesSummary parallel und zieht restliche Artikel
  batchweise (500er Bloecke) im Hintergrund nach, bis _currentArticlesTotal
  erreicht ist. rerenderTimeline nach jedem Batch.
- components.js: renderSourceOverviewFromSummary(data) rendert aus
  Aggregat-Daten (ersetzt clientseitige Zaehlung ueber geladene Artikel).

Hintergrund: /articles lieferte bei der Iran-Lage 22 MB (17.286 Artikel
mit SELECT *). Die Erstantwort sinkt auf ~650 KB (500 Artikel), weitere
werden progressiv im Hintergrund nachgeladen. Quellenuebersicht zeigt
dank Aggregat-Endpunkt sofort alle Quellen + Sprachen komplett.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/database.py             |   1 +
 src/routers/incidents.py    | 123 ++++++++++++++++++++++++++++++++++--
 src/static/js/api.js        |  16 ++++-
 src/static/js/app.js        |  83 ++++++++++++++++++++----
 src/static/js/components.js |  31 +++++++++
 5 files changed, 236 insertions(+), 18 deletions(-)

diff --git a/src/database.py b/src/database.py
index 4f2f7fd..91f814c 100644
--- a/src/database.py
+++ b/src/database.py
@@ -583,6 +583,7 @@ async def init_db():
         for idx_sql in [
             "CREATE INDEX IF NOT EXISTS idx_incidents_tenant_status ON incidents(tenant_id, status)",
             "CREATE INDEX IF NOT EXISTS idx_articles_tenant_incident ON articles(tenant_id, incident_id)",
+            "CREATE INDEX IF NOT EXISTS idx_articles_incident_collected ON articles(incident_id, collected_at DESC)",
         ]:
             try:
                 await db.execute(idx_sql)
diff --git a/src/routers/incidents.py b/src/routers/incidents.py
index 5585b15..e800bb2 100644
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -317,18 +317,133 @@ async def delete_incident(
 @router.get("/{incident_id}/articles")
 async def get_articles(
     incident_id: int,
+    limit: int = Query(500, ge=1, le=1000),
+    offset: int = Query(0, ge=0),
+    search: str | None = Query(None, min_length=0, max_length=200),
     current_user: dict = Depends(get_current_user),
     db: aiosqlite.Connection = Depends(db_dependency),
 ):
-    """Alle Artikel einer Lage abrufen."""
+    """Artikel einer Lage paginiert abrufen.
+
+    Response: ``{"total": int, "articles": [...]}``.
+    Optionaler ``search``-Param filtert per LIKE ueber
+    headline, headline_de, source, content_de, content_original.
+    """
+    tenant_id = current_user.get("tenant_id")
+    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
+
+    search_clean = (search or "").strip()
+    if search_clean:
+        like = f"%{search_clean}%"
+        params = (incident_id, like, like, like, like, like)
+        where = (
+            "WHERE incident_id = ? AND ("
+            "COALESCE(headline,'') LIKE ? OR "
+            "COALESCE(headline_de,'') LIKE ? OR "
+            "COALESCE(source,'') LIKE ? OR "
+            "COALESCE(content_de,'') LIKE ? OR "
+            "COALESCE(content_original,'') LIKE ?)"
+        )
+    else:
+        params = (incident_id,)
+        where = "WHERE incident_id = ?"
+
+    cursor = await db.execute(f"SELECT COUNT(*) AS cnt FROM articles {where}", params)
+    total = (await cursor.fetchone())["cnt"]
+
+    cursor = await db.execute(
+        f"SELECT * FROM articles {where} ORDER BY collected_at DESC LIMIT ? OFFSET ?",
+        (*params, limit, offset),
+    )
+    rows = await cursor.fetchall()
+    return {"total": total, "articles": [dict(row) for row in rows]}
+
+
+@router.get("/{incident_id}/articles/sources-summary")
+async def get_articles_sources_summary(
+    incident_id: int,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Aggregierte Quellen-Statistik fuer eine Lage (fuer Quellenuebersicht)."""
     tenant_id = current_user.get("tenant_id")
     await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
     cursor = await db.execute(
-        "SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
+        """SELECT source,
+                  COUNT(*) AS article_count,
+                  GROUP_CONCAT(DISTINCT COALESCE(language,'de')) AS languages
+           FROM articles WHERE incident_id = ?
+           GROUP BY source ORDER BY article_count DESC""",
         (incident_id,),
     )
-    rows = await cursor.fetchall()
-    return [dict(row) for row in rows]
+    sources = []
+    for r in await cursor.fetchall():
+        d = dict(r)
+        langs = (d.pop("languages") or "de").split(",")
+        d["languages"] = sorted({(l or "de").strip() for l in langs if l is not None})
+        sources.append(d)
+    # Sprach-Verteilung gesamt
+    cursor = await db.execute(
+        """SELECT COALESCE(language,'de') AS language, COUNT(*) AS cnt
+           FROM articles WHERE incident_id = ?
+           GROUP BY language ORDER BY cnt DESC""",
+        (incident_id,),
+    )
+    lang_counts = [dict(r) for r in await cursor.fetchall()]
+    total_cursor = await db.execute(
+        "SELECT COUNT(*) AS cnt FROM articles WHERE incident_id = ?",
+        (incident_id,),
+    )
+    total = (await total_cursor.fetchone())["cnt"]
+    return {"total": total, "sources": sources, "language_counts": lang_counts}
+
+
+@router.get("/{incident_id}/articles/timeline-buckets")
+async def get_articles_timeline_buckets(
+    incident_id: int,
+    granularity: str = Query("day", pattern="^(hour|day|week|month)$"),
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Aggregierte Zeit-Buckets fuer die Timeline-Achse.
+
+    Zaehlt Artikel und Snapshots pro Bucket. Kein Inhalt, nur Counts.
+    """
+    tenant_id = current_user.get("tenant_id")
+    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
+    fmt_map = {
+        "hour": "%Y-%m-%d %H:00",
+        "day": "%Y-%m-%d",
+        "week": "%Y-%W",
+        "month": "%Y-%m",
+    }
+    fmt = fmt_map[granularity]
+    cursor = await db.execute(
+        f"""SELECT strftime(?, collected_at) AS bucket, COUNT(*) AS article_count
+            FROM articles WHERE incident_id = ?
+            GROUP BY bucket ORDER BY bucket""",
+        (fmt, incident_id),
+    )
+    article_rows = {r["bucket"]: r["article_count"] for r in await cursor.fetchall()}
+    cursor = await db.execute(
+        f"""SELECT strftime(?, created_at) AS bucket, COUNT(*) AS snapshot_count
+            FROM incident_snapshots WHERE incident_id = ?
+            GROUP BY bucket ORDER BY bucket""",
+        (fmt, incident_id),
+    )
+    snapshot_rows = {r["bucket"]: r["snapshot_count"] for r in await cursor.fetchall()}
+    all_buckets = sorted(set(article_rows.keys()) | set(snapshot_rows.keys()))
+    return {
+        "granularity": granularity,
+        "buckets": [
+            {
+                "bucket": b,
+                "article_count": article_rows.get(b, 0),
+                "snapshot_count": snapshot_rows.get(b, 0),
+            }
+            for b in all_buckets
+        ],
+    }
 
 
 @router.get("/{incident_id}/snapshots")
diff --git a/src/static/js/api.js b/src/static/js/api.js
index 1dbdfa5..4df5018 100644
--- a/src/static/js/api.js
+++ b/src/static/js/api.js
@@ -99,8 +99,20 @@ const API = {
         return this._request('DELETE', `/incidents/${id}`);
     },
 
-    getArticles(incidentId) {
-        return this._request('GET', `/incidents/${incidentId}/articles`);
+    getArticles(incidentId, { limit = 500, offset = 0, search = null } = {}) {
+        const params = new URLSearchParams();
+        params.set('limit', String(limit));
+        params.set('offset', String(offset));
+        if (search) params.set('search', search);
+        return this._request('GET', `/incidents/${incidentId}/articles?${params.toString()}`);
+    },
+
+    getArticlesSourcesSummary(incidentId) {
+        return this._request('GET', `/incidents/${incidentId}/articles/sources-summary`);
+    },
+
+    getArticlesTimelineBuckets(incidentId, granularity = 'day') {
+        return this._request('GET', `/incidents/${incidentId}/articles/timeline-buckets?granularity=${encodeURIComponent(granularity)}`);
     },
 
     getFactChecks(incidentId) {
diff --git a/src/static/js/app.js b/src/static/js/app.js
index 9f3d1e1..7e8450a 100644
--- a/src/static/js/app.js
+++ b/src/static/js/app.js
@@ -787,15 +787,25 @@ const App = {
 
     async loadIncidentDetail(id) {
         try {
-            const [incident, articles, factchecks, snapshots, locationsResponse] = await Promise.all([
+            const [incident, articlesResponse, factchecks, snapshots, locationsResponse] = await Promise.all([
                 API.getIncident(id),
-                API.getArticles(id),
+                API.getArticles(id, { limit: 500, offset: 0 }),
                 API.getFactChecks(id),
                 API.getSnapshots(id),
                 API.getLocations(id).catch(() => []),
             ]);
 
-            // Locations-API gibt jetzt {category_labels, locations} oder Array (Rückwärtskompatibel)
+            // Articles: neue Shape {total, articles} oder alter nackter Array (Rueckwaertskompatibel)
+            let articles, articlesTotal;
+            if (Array.isArray(articlesResponse)) {
+                articles = articlesResponse;
+                articlesTotal = articlesResponse.length;
+            } else {
+                articles = articlesResponse.articles || [];
+                articlesTotal = articlesResponse.total || articles.length;
+            }
+
+            // Locations-API gibt jetzt {category_labels, locations} oder Array (Rueckwaertskompatibel)
             let locations, categoryLabels;
             if (Array.isArray(locationsResponse)) {
                 locations = locationsResponse;
@@ -808,13 +818,63 @@ const App = {
                 categoryLabels = null;
             }
 
+            this._currentArticlesTotal = articlesTotal;
+            this._currentArticlesLoaded = articles.length;
+            this._currentIncidentIdForLoad = id;
+
             this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels);
+
+            // Quellenuebersicht aus Aggregat-Endpunkt (alle Quellen, nicht nur erste Seite)
+            this._loadSourcesSummary(id).catch(err => console.warn('sources-summary:', err));
+
+            // Wenn mehr Artikel existieren als initial geladen: progressiver Hintergrund-Load
+            if (articlesTotal > articles.length) {
+                this._loadRemainingArticlesInBackground(id).catch(err => console.warn('bg-articles:', err));
+            }
         } catch (err) {
             console.error('loadIncidentDetail Fehler:', err);
             UI.showToast('Fehler beim Laden: ' + err.message, 'error');
         }
     },
 
+    /** Quellenuebersicht aus Aggregat-Endpunkt nachladen (ersetzt Client-Zaehlung). */
+    async _loadSourcesSummary(incidentId) {
+        const data = await API.getArticlesSourcesSummary(incidentId);
+        if (this.currentIncidentId !== incidentId) return; // User hat gewechselt
+        this._currentSourcesSummary = data;
+        const soEl = document.getElementById('source-overview-content');
+        const statsEl = document.getElementById('source-overview-header-stats');
+        if (soEl && typeof UI.renderSourceOverviewFromSummary === 'function') {
+            soEl.innerHTML = UI.renderSourceOverviewFromSummary(data);
+        }
+        if (statsEl && data) {
+            statsEl.textContent = `${data.total} Artikel aus ${data.sources.length} Quellen`;
+        }
+    },
+
+    /** Restliche Artikel seitenweise im Hintergrund nachladen und in _currentArticles mergen. */
+    async _loadRemainingArticlesInBackground(incidentId) {
+        const BATCH = 500;
+        while (this.currentIncidentId === incidentId
+               && this._currentArticlesLoaded < this._currentArticlesTotal) {
+            let resp;
+            try {
+                resp = await API.getArticles(incidentId, { limit: BATCH, offset: this._currentArticlesLoaded });
+            } catch (err) {
+                console.warn('Hintergrund-Load Artikel fehlgeschlagen:', err);
+                return;
+            }
+            if (this.currentIncidentId !== incidentId) return;
+            const batch = (resp && resp.articles) ? resp.articles : (Array.isArray(resp) ? resp : []);
+            if (!batch.length) break;
+            this._currentArticles = (this._currentArticles || []).concat(batch);
+            this._currentArticlesLoaded += batch.length;
+            this.rerenderTimeline();
+            // Kleiner Yield, damit das UI reaktiv bleibt
+            await new Promise(r => setTimeout(r, 30));
+        }
+    },
+
     renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels) {
         // Header Strip
         { const _e = document.getElementById('incident-title'); if (_e) _e.textContent = incident.title; }
@@ -940,17 +1000,16 @@ const App = {
             factcheckList.innerHTML = '<div style="padding:12px;font-size:13px;color:var(--text-tertiary);">Noch keine Fakten geprüft</div>';
         }
 
-        // Quellenübersicht
+        // Quellenuebersicht wird aus dem Aggregat-Endpunkt (_loadSourcesSummary) gefuellt,
+        // damit sie immer alle Artikel der Lage zeigt — unabhaengig von Paginierung.
         const sourceOverview = document.getElementById('source-overview-content');
         if (sourceOverview) {
-            sourceOverview.innerHTML = UI.renderSourceOverview(articles);
-            // Stats im Header aktualisieren (sichtbar im zugeklappten Zustand)
-            const _soStats = document.getElementById("source-overview-header-stats");
-            if (_soStats) {
-                const _soSources = new Set(articles.map(a => a.source).filter(Boolean));
-                _soStats.textContent = articles.length + " Artikel aus " + _soSources.size + " Quellen";
-            }
-            // Im Tab-Modus wird die Kachel vom Seiten-Layout bestimmt — kein Resize noetig
+            sourceOverview.innerHTML = '<div style="padding:12px;font-size:13px;color:var(--text-tertiary);">Quellenübersicht wird geladen…</div>';
+        }
+        const _soStats = document.getElementById("source-overview-header-stats");
+        if (_soStats) {
+            const total = (this._currentArticlesTotal != null) ? this._currentArticlesTotal : articles.length;
+            _soStats.textContent = total + " Artikel";
         }
 
         // Timeline - Artikel + Snapshots zwischenspeichern und rendern
diff --git a/src/static/js/components.js b/src/static/js/components.js
index ff2db9f..cf482a5 100644
--- a/src/static/js/components.js
+++ b/src/static/js/components.js
@@ -930,6 +930,37 @@ const UI = {
     /**
      * Quellenübersicht für eine Lage rendern.
      */
+    /**
+     * Quellenuebersicht aus Aggregat-Endpunkt rendern (alle Artikel der Lage,
+     * unabhaengig von Paginierung im Frontend).
+     * data: {total, sources: [{source, article_count, languages: []}], language_counts: [{language, cnt}]}
+     */
+    renderSourceOverviewFromSummary(data) {
+        if (!data || !data.sources || data.sources.length === 0) return '';
+
+        const langChips = (data.language_counts || [])
+            .map(l => `<span class="source-lang-chip">${(l.language || 'de').toUpperCase()} <strong>${l.cnt}</strong></span>`)
+            .join('');
+
+        let html = `<div class="source-overview-header">`;
+        html += `<span class="source-overview-stat">${data.total} Artikel aus ${data.sources.length} Quellen</span>`;
+        html += `<div class="source-lang-chips">${langChips}</div>`;
+        html += `</div>`;
+
+        html += '<div class="source-overview-grid">';
+        data.sources.forEach(s => {
+            const langs = (s.languages || ['de']).map(l => (l || 'de').toUpperCase()).join('/');
+            html += `<div class="source-overview-item">
+                <span class="source-overview-name">${this.escape(s.source || 'Unbekannt')}</span>
+                <span class="source-overview-lang">${langs}</span>
+                <span class="source-overview-count">${s.article_count}</span>
+            </div>`;
+        });
+        html += '</div>';
+
+        return html;
+    },
+
     renderSourceOverview(articles) {
         if (!articles || articles.length === 0) return '';