Articles: Paginierung, Timeline-Buckets, Sources-Summary-Endpunkt

Backend: - GET /{id}/articles paginiert jetzt per limit/offset (Default 500, Max 1000) und unterstuetzt optionalen search-Parameter (LIKE ueber headline/source/content). Response-Shape: {total, articles}. - Neuer Endpunkt GET /{id}/articles/sources-summary liefert pro Quelle {source, article_count, languages} sowie language_counts gesamt — serverseitige Aggregation, unabhaengig von Artikel-Paginierung. - Neuer Endpunkt GET /{id}/articles/timeline-buckets?granularity=hour|day|week|month aggregiert Artikel + Snapshot-Counts pro Zeitbucket (fuer spaetere Timeline-Zaehler ueber die volle Historie). - database.py: Index idx_articles_incident_collected auf (incident_id, collected_at DESC) fuer schnelleres ORDER BY + Pagination. Frontend: - api.js: getArticles({limit, offset, search}), getArticlesSourcesSummary(), getArticlesTimelineBuckets(). - app.js: loadIncidentDetail laedt erste Seite (500 Artikel), startet _loadSourcesSummary parallel und zieht restliche Artikel batchweise (500er Bloecke) im Hintergrund nach, bis _currentArticlesTotal erreicht ist. rerenderTimeline nach jedem Batch. - components.js: renderSourceOverviewFromSummary(data) rendert aus Aggregat-Daten (ersetzt clientseitige Zaehlung ueber geladene Artikel). Hintergrund: /articles lieferte bei der Iran-Lage 22 MB (17.286 Artikel mit SELECT *). Die Erstantwort sinkt auf ~650 KB (500 Artikel), weitere werden progressiv im Hintergrund nachgeladen. Quellenuebersicht zeigt dank Aggregat-Endpunkt sofort alle Quellen + Sprachen komplett. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 23:46:40 +02:00
Commit 9a43dffa6c
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -317,18 +317,133 @@ async def delete_incident(
@router.get("/{incident_id}/articles")
 async def get_articles(
    incident_id: int,
+    limit: int = Query(500, ge=1, le=1000),
+    offset: int = Query(0, ge=0),
+    search: str | None = Query(None, min_length=0, max_length=200),
    current_user: dict = Depends(get_current_user),
    db: aiosqlite.Connection = Depends(db_dependency),
 ):
-    """Alle Artikel einer Lage abrufen."""
+    """Artikel einer Lage paginiert abrufen.
+
+    Response: ``{"total": int, "articles": [...]}``.
+    Optionaler ``search``-Param filtert per LIKE ueber
+    headline, headline_de, source, content_de, content_original.
+    """
+    tenant_id = current_user.get("tenant_id")
+    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
+
+    search_clean = (search or "").strip()
+    if search_clean:
+        like = f"%{search_clean}%"
+        params = (incident_id, like, like, like, like, like)
+        where = (
+            "WHERE incident_id = ? AND ("
+            "COALESCE(headline,'') LIKE ? OR "
+            "COALESCE(headline_de,'') LIKE ? OR "
+            "COALESCE(source,'') LIKE ? OR "
+            "COALESCE(content_de,'') LIKE ? OR "
+            "COALESCE(content_original,'') LIKE ?)"
+        )
+    else:
+        params = (incident_id,)
+        where = "WHERE incident_id = ?"
+
+    cursor = await db.execute(f"SELECT COUNT(*) AS cnt FROM articles {where}", params)
+    total = (await cursor.fetchone())["cnt"]
+
+    cursor = await db.execute(
+        f"SELECT * FROM articles {where} ORDER BY collected_at DESC LIMIT ? OFFSET ?",
+        (*params, limit, offset),
+    )
+    rows = await cursor.fetchall()
+    return {"total": total, "articles": [dict(row) for row in rows]}
+
+
+@router.get("/{incident_id}/articles/sources-summary")
+async def get_articles_sources_summary(
+    incident_id: int,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Aggregierte Quellen-Statistik fuer eine Lage (fuer Quellenuebersicht)."""
    tenant_id = current_user.get("tenant_id")
    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
    cursor = await db.execute(
-        "SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
+        """SELECT source,
+                  COUNT(*) AS article_count,
+                  GROUP_CONCAT(DISTINCT COALESCE(language,'de')) AS languages
+           FROM articles WHERE incident_id = ?
+           GROUP BY source ORDER BY article_count DESC""",
        (incident_id,),
    )
-    rows = await cursor.fetchall()
-    return [dict(row) for row in rows]
+    sources = []
+    for r in await cursor.fetchall():
+        d = dict(r)
+        langs = (d.pop("languages") or "de").split(",")
+        d["languages"] = sorted({(l or "de").strip() for l in langs if l is not None})
+        sources.append(d)
+    # Sprach-Verteilung gesamt
+    cursor = await db.execute(
+        """SELECT COALESCE(language,'de') AS language, COUNT(*) AS cnt
+           FROM articles WHERE incident_id = ?
+           GROUP BY language ORDER BY cnt DESC""",
+        (incident_id,),
+    )
+    lang_counts = [dict(r) for r in await cursor.fetchall()]
+    total_cursor = await db.execute(
+        "SELECT COUNT(*) AS cnt FROM articles WHERE incident_id = ?",
+        (incident_id,),
+    )
+    total = (await total_cursor.fetchone())["cnt"]
+    return {"total": total, "sources": sources, "language_counts": lang_counts}
+
+
+@router.get("/{incident_id}/articles/timeline-buckets")
+async def get_articles_timeline_buckets(
+    incident_id: int,
+    granularity: str = Query("day", pattern="^(hour|day|week|month)$"),
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Aggregierte Zeit-Buckets fuer die Timeline-Achse.
+
+    Zaehlt Artikel und Snapshots pro Bucket. Kein Inhalt, nur Counts.
+    """
+    tenant_id = current_user.get("tenant_id")
+    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
+    fmt_map = {
+        "hour": "%Y-%m-%d %H:00",
+        "day": "%Y-%m-%d",
+        "week": "%Y-%W",
+        "month": "%Y-%m",
+    }
+    fmt = fmt_map[granularity]
+    cursor = await db.execute(
+        f"""SELECT strftime(?, collected_at) AS bucket, COUNT(*) AS article_count
+            FROM articles WHERE incident_id = ?
+            GROUP BY bucket ORDER BY bucket""",
+        (fmt, incident_id),
+    )
+    article_rows = {r["bucket"]: r["article_count"] for r in await cursor.fetchall()}
+    cursor = await db.execute(
+        f"""SELECT strftime(?, created_at) AS bucket, COUNT(*) AS snapshot_count
+            FROM incident_snapshots WHERE incident_id = ?
+            GROUP BY bucket ORDER BY bucket""",
+        (fmt, incident_id),
+    )
+    snapshot_rows = {r["bucket"]: r["snapshot_count"] for r in await cursor.fetchall()}
+    all_buckets = sorted(set(article_rows.keys()) | set(snapshot_rows.keys()))
+    return {
+        "granularity": granularity,
+        "buckets": [
+            {
+                "bucket": b,
+                "article_count": article_rows.get(b, 0),
+                "snapshot_count": snapshot_rows.get(b, 0),
+            }
+            for b in all_buckets
+        ],
+    }


@router.get("/{incident_id}/snapshots")