Articles: Paginierung, Timeline-Buckets, Sources-Summary-Endpunkt

Backend: - GET /{id}/articles paginiert jetzt per limit/offset (Default 500, Max 1000) und unterstuetzt optionalen search-Parameter (LIKE ueber headline/source/content). Response-Shape: {total, articles}. - Neuer Endpunkt GET /{id}/articles/sources-summary liefert pro Quelle {source, article_count, languages} sowie language_counts gesamt — serverseitige Aggregation, unabhaengig von Artikel-Paginierung. - Neuer Endpunkt GET /{id}/articles/timeline-buckets?granularity=hour|day|week|month aggregiert Artikel + Snapshot-Counts pro Zeitbucket (fuer spaetere Timeline-Zaehler ueber die volle Historie). - database.py: Index idx_articles_incident_collected auf (incident_id, collected_at DESC) fuer schnelleres ORDER BY + Pagination. Frontend: - api.js: getArticles({limit, offset, search}), getArticlesSourcesSummary(), getArticlesTimelineBuckets(). - app.js: loadIncidentDetail laedt erste Seite (500 Artikel), startet _loadSourcesSummary parallel und zieht restliche Artikel batchweise (500er Bloecke) im Hintergrund nach, bis _currentArticlesTotal erreicht ist. rerenderTimeline nach jedem Batch. - components.js: renderSourceOverviewFromSummary(data) rendert aus Aggregat-Daten (ersetzt clientseitige Zaehlung ueber geladene Artikel). Hintergrund: /articles lieferte bei der Iran-Lage 22 MB (17.286 Artikel mit SELECT *). Die Erstantwort sinkt auf ~650 KB (500 Artikel), weitere werden progressiv im Hintergrund nachgeladen. Quellenuebersicht zeigt dank Aggregat-Endpunkt sofort alle Quellen + Sprachen komplett. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 23:46:40 +02:00
Commit 9a43dffa6c
--- a/src/database.py
+++ b/src/database.py
@@ -583,6 +583,7 @@ async def init_db():
        for idx_sql in [
            "CREATE INDEX IF NOT EXISTS idx_incidents_tenant_status ON incidents(tenant_id, status)",
            "CREATE INDEX IF NOT EXISTS idx_articles_tenant_incident ON articles(tenant_id, incident_id)",
            "CREATE INDEX IF NOT EXISTS idx_articles_incident_collected ON articles(incident_id, collected_at DESC)",
        ]:
            try:
                await db.execute(idx_sql)
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -317,18 +317,133 @@ async def delete_incident(
@router.get("/{incident_id}/articles")
 async def get_articles(
    incident_id: int,
    limit: int = Query(500, ge=1, le=1000),
    offset: int = Query(0, ge=0),
    search: str | None = Query(None, min_length=0, max_length=200),
    current_user: dict = Depends(get_current_user),
    db: aiosqlite.Connection = Depends(db_dependency),
 ):
-    """Alle Artikel einer Lage abrufen."""
+    """Artikel einer Lage paginiert abrufen.
    Response: ``{"total": int, "articles": [...]}``.
    Optionaler ``search``-Param filtert per LIKE ueber
    headline, headline_de, source, content_de, content_original.
    """
    tenant_id = current_user.get("tenant_id")
    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
    search_clean = (search or "").strip()
    if search_clean:
        like = f"%{search_clean}%"
        params = (incident_id, like, like, like, like, like)
        where = (
            "WHERE incident_id = ? AND ("
            "COALESCE(headline,'') LIKE ? OR "
            "COALESCE(headline_de,'') LIKE ? OR "
            "COALESCE(source,'') LIKE ? OR "
            "COALESCE(content_de,'') LIKE ? OR "
            "COALESCE(content_original,'') LIKE ?)"
        )
    else:
        params = (incident_id,)
        where = "WHERE incident_id = ?"
    cursor = await db.execute(f"SELECT COUNT(*) AS cnt FROM articles {where}", params)
    total = (await cursor.fetchone())["cnt"]
    cursor = await db.execute(
        f"SELECT * FROM articles {where} ORDER BY collected_at DESC LIMIT ? OFFSET ?",
        (*params, limit, offset),
    )
    rows = await cursor.fetchall()
    return {"total": total, "articles": [dict(row) for row in rows]}
@router.get("/{incident_id}/articles/sources-summary")
 async def get_articles_sources_summary(
    incident_id: int,
    current_user: dict = Depends(get_current_user),
    db: aiosqlite.Connection = Depends(db_dependency),
 ):
    """Aggregierte Quellen-Statistik fuer eine Lage (fuer Quellenuebersicht)."""
    tenant_id = current_user.get("tenant_id")
    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
    cursor = await db.execute(
-        "SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
+        """SELECT source,
                  COUNT(*) AS article_count,
                  GROUP_CONCAT(DISTINCT COALESCE(language,'de')) AS languages
           FROM articles WHERE incident_id = ?
           GROUP BY source ORDER BY article_count DESC""",
        (incident_id,),
    )
-    rows = await cursor.fetchall()
+    sources = []
-    return [dict(row) for row in rows]
+    for r in await cursor.fetchall():
        d = dict(r)
        langs = (d.pop("languages") or "de").split(",")
        d["languages"] = sorted({(l or "de").strip() for l in langs if l is not None})
        sources.append(d)
    # Sprach-Verteilung gesamt
    cursor = await db.execute(
        """SELECT COALESCE(language,'de') AS language, COUNT(*) AS cnt
           FROM articles WHERE incident_id = ?
           GROUP BY language ORDER BY cnt DESC""",
        (incident_id,),
    )
    lang_counts = [dict(r) for r in await cursor.fetchall()]
    total_cursor = await db.execute(
        "SELECT COUNT(*) AS cnt FROM articles WHERE incident_id = ?",
        (incident_id,),
    )
    total = (await total_cursor.fetchone())["cnt"]
    return {"total": total, "sources": sources, "language_counts": lang_counts}
@router.get("/{incident_id}/articles/timeline-buckets")
 async def get_articles_timeline_buckets(
    incident_id: int,
    granularity: str = Query("day", pattern="^(hour|day|week|month)$"),
    current_user: dict = Depends(get_current_user),
    db: aiosqlite.Connection = Depends(db_dependency),
 ):
    """Aggregierte Zeit-Buckets fuer die Timeline-Achse.
    Zaehlt Artikel und Snapshots pro Bucket. Kein Inhalt, nur Counts.
    """
    tenant_id = current_user.get("tenant_id")
    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
    fmt_map = {
        "hour": "%Y-%m-%d %H:00",
        "day": "%Y-%m-%d",
        "week": "%Y-%W",
        "month": "%Y-%m",
    }
    fmt = fmt_map[granularity]
    cursor = await db.execute(
        f"""SELECT strftime(?, collected_at) AS bucket, COUNT(*) AS article_count
            FROM articles WHERE incident_id = ?
            GROUP BY bucket ORDER BY bucket""",
        (fmt, incident_id),
    )
    article_rows = {r["bucket"]: r["article_count"] for r in await cursor.fetchall()}
    cursor = await db.execute(
        f"""SELECT strftime(?, created_at) AS bucket, COUNT(*) AS snapshot_count
            FROM incident_snapshots WHERE incident_id = ?
            GROUP BY bucket ORDER BY bucket""",
        (fmt, incident_id),
    )
    snapshot_rows = {r["bucket"]: r["snapshot_count"] for r in await cursor.fetchall()}
    all_buckets = sorted(set(article_rows.keys()) | set(snapshot_rows.keys()))
    return {
        "granularity": granularity,
        "buckets": [
            {
                "bucket": b,
                "article_count": article_rows.get(b, 0),
                "snapshot_count": snapshot_rows.get(b, 0),
            }
            for b in all_buckets
        ],
    }
@router.get("/{incident_id}/snapshots")
--- a/src/static/js/api.js
+++ b/src/static/js/api.js
@@ -99,8 +99,20 @@ const API = {
        return this._request('DELETE', `/incidents/${id}`);
    },
-    getArticles(incidentId) {
+    getArticles(incidentId, { limit = 500, offset = 0, search = null } = {}) {
-        return this._request('GET', `/incidents/${incidentId}/articles`);
+        const params = new URLSearchParams();
        params.set('limit', String(limit));
        params.set('offset', String(offset));
        if (search) params.set('search', search);
        return this._request('GET', `/incidents/${incidentId}/articles?${params.toString()}`);
    },
    getArticlesSourcesSummary(incidentId) {
        return this._request('GET', `/incidents/${incidentId}/articles/sources-summary`);
    },
    getArticlesTimelineBuckets(incidentId, granularity = 'day') {
        return this._request('GET', `/incidents/${incidentId}/articles/timeline-buckets?granularity=${encodeURIComponent(granularity)}`);
    },
    getFactChecks(incidentId) {
--- a/src/static/js/app.js
+++ b/src/static/js/app.js
@@ -787,15 +787,25 @@ const App = {
    async loadIncidentDetail(id) {
        try {
-            const [incident, articles, factchecks, snapshots, locationsResponse] = await Promise.all([
+            const [incident, articlesResponse, factchecks, snapshots, locationsResponse] = await Promise.all([
                API.getIncident(id),
-                API.getArticles(id),
+                API.getArticles(id, { limit: 500, offset: 0 }),
                API.getFactChecks(id),
                API.getSnapshots(id),
                API.getLocations(id).catch(() => []),
            ]);
-            // Locations-API gibt jetzt {category_labels, locations} oder Array (Rückwärtskompatibel)
+            // Articles: neue Shape {total, articles} oder alter nackter Array (Rueckwaertskompatibel)
            let articles, articlesTotal;
            if (Array.isArray(articlesResponse)) {
                articles = articlesResponse;
                articlesTotal = articlesResponse.length;
            } else {
                articles = articlesResponse.articles || [];
                articlesTotal = articlesResponse.total || articles.length;
            }
            // Locations-API gibt jetzt {category_labels, locations} oder Array (Rueckwaertskompatibel)
            let locations, categoryLabels;
            if (Array.isArray(locationsResponse)) {
                locations = locationsResponse;
@@ -808,13 +818,63 @@ const App = {
                categoryLabels = null;
            }
            this._currentArticlesTotal = articlesTotal;
            this._currentArticlesLoaded = articles.length;
            this._currentIncidentIdForLoad = id;
            this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels);
            // Quellenuebersicht aus Aggregat-Endpunkt (alle Quellen, nicht nur erste Seite)
            this._loadSourcesSummary(id).catch(err => console.warn('sources-summary:', err));
            // Wenn mehr Artikel existieren als initial geladen: progressiver Hintergrund-Load
            if (articlesTotal > articles.length) {
                this._loadRemainingArticlesInBackground(id).catch(err => console.warn('bg-articles:', err));
            }
        } catch (err) {
            console.error('loadIncidentDetail Fehler:', err);
            UI.showToast('Fehler beim Laden: ' + err.message, 'error');
        }
    },
    /** Quellenuebersicht aus Aggregat-Endpunkt nachladen (ersetzt Client-Zaehlung). */
    async _loadSourcesSummary(incidentId) {
        const data = await API.getArticlesSourcesSummary(incidentId);
        if (this.currentIncidentId !== incidentId) return; // User hat gewechselt
        this._currentSourcesSummary = data;
        const soEl = document.getElementById('source-overview-content');
        const statsEl = document.getElementById('source-overview-header-stats');
        if (soEl && typeof UI.renderSourceOverviewFromSummary === 'function') {
            soEl.innerHTML = UI.renderSourceOverviewFromSummary(data);
        }
        if (statsEl && data) {
            statsEl.textContent = `${data.total} Artikel aus ${data.sources.length} Quellen`;
        }
    },
    /** Restliche Artikel seitenweise im Hintergrund nachladen und in _currentArticles mergen. */
    async _loadRemainingArticlesInBackground(incidentId) {
        const BATCH = 500;
        while (this.currentIncidentId === incidentId
               && this._currentArticlesLoaded < this._currentArticlesTotal) {
            let resp;
            try {
                resp = await API.getArticles(incidentId, { limit: BATCH, offset: this._currentArticlesLoaded });
            } catch (err) {
                console.warn('Hintergrund-Load Artikel fehlgeschlagen:', err);
                return;
            }
            if (this.currentIncidentId !== incidentId) return;
            const batch = (resp && resp.articles) ? resp.articles : (Array.isArray(resp) ? resp : []);
            if (!batch.length) break;
            this._currentArticles = (this._currentArticles || []).concat(batch);
            this._currentArticlesLoaded += batch.length;
            this.rerenderTimeline();
            // Kleiner Yield, damit das UI reaktiv bleibt
            await new Promise(r => setTimeout(r, 30));
        }
    },
    renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels) {
        // Header Strip
        { const _e = document.getElementById('incident-title'); if (_e) _e.textContent = incident.title; }
@@ -940,17 +1000,16 @@ const App = {
            factcheckList.innerHTML = '<div style="padding:12px;font-size:13px;color:var(--text-tertiary);">Noch keine Fakten geprüft</div>';
        }
-        // Quellenübersicht
+        // Quellenuebersicht wird aus dem Aggregat-Endpunkt (_loadSourcesSummary) gefuellt,
        // damit sie immer alle Artikel der Lage zeigt — unabhaengig von Paginierung.
        const sourceOverview = document.getElementById('source-overview-content');
        if (sourceOverview) {
-            sourceOverview.innerHTML = UI.renderSourceOverview(articles);
+            sourceOverview.innerHTML = '<div style="padding:12px;font-size:13px;color:var(--text-tertiary);">Quellenübersicht wird geladen…</div>';
-            // Stats im Header aktualisieren (sichtbar im zugeklappten Zustand)
+        }
-            const _soStats = document.getElementById("source-overview-header-stats");
+        const _soStats = document.getElementById("source-overview-header-stats");
-            if (_soStats) {
+        if (_soStats) {
-                const _soSources = new Set(articles.map(a => a.source).filter(Boolean));
+            const total = (this._currentArticlesTotal != null) ? this._currentArticlesTotal : articles.length;
-                _soStats.textContent = articles.length + " Artikel aus " + _soSources.size + " Quellen";
+            _soStats.textContent = total + " Artikel";
            }
            // Im Tab-Modus wird die Kachel vom Seiten-Layout bestimmt — kein Resize noetig
        }
        // Timeline - Artikel + Snapshots zwischenspeichern und rendern
--- a/src/static/js/components.js
+++ b/src/static/js/components.js
@@ -930,6 +930,37 @@ const UI = {
    /**
     * Quellenübersicht für eine Lage rendern.
     */
    /**
     * Quellenuebersicht aus Aggregat-Endpunkt rendern (alle Artikel der Lage,
     * unabhaengig von Paginierung im Frontend).
     * data: {total, sources: [{source, article_count, languages: []}], language_counts: [{language, cnt}]}
     */
    renderSourceOverviewFromSummary(data) {
        if (!data || !data.sources || data.sources.length === 0) return '';
        const langChips = (data.language_counts || [])
            .map(l => `<span class="source-lang-chip">${(l.language || 'de').toUpperCase()} <strong>${l.cnt}</strong></span>`)
            .join('');
        let html = `<div class="source-overview-header">`;
        html += `<span class="source-overview-stat">${data.total} Artikel aus ${data.sources.length} Quellen</span>`;
        html += `<div class="source-lang-chips">${langChips}</div>`;
        html += `</div>`;
        html += '<div class="source-overview-grid">';
        data.sources.forEach(s => {
            const langs = (s.languages || ['de']).map(l => (l || 'de').toUpperCase()).join('/');
            html += `<div class="source-overview-item">
                <span class="source-overview-name">${this.escape(s.source || 'Unbekannt')}</span>
                <span class="source-overview-lang">${langs}</span>
                <span class="source-overview-count">${s.article_count}</span>
            </div>`;
        });
        html += '</div>';
        return html;
    },
    renderSourceOverview(articles) {
        if (!articles || articles.length === 0) return '';