Locations: Aggregation in SQL (GROUP BY + Window)

Ersetzt den rohen JOIN ueber article_locations x articles (bei Iran 21.814 Zeilen, 11 MB Payload) durch drei kleine aggregierte Queries: 1. Orte per GROUP BY (name, lat, lon) — direkt die Ergebnismenge. 2. Kategorien pro Ort per GROUP BY fuer die dominante Kategorie. 3. Sample-Artikel (max. 10 pro Ort) via ROW_NUMBER() OVER PARTITION BY. Response-Shape unveraendert ({category_labels, locations: [...]}), keine Frontend-Aenderung noetig. Priorisierung primary > secondary > tertiary > mentioned bleibt erhalten. Erwarteter Effekt: Iran-Locations 11 MB -> <500 KB; Query-Zeit sinkt zusaetzlich, da kein 21k-Zeilen-JOIN mehr materialisiert werden muss. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 23:47:50 +02:00
Commit a302790777
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -543,60 +543,100 @@ async def get_locations(
    current_user: dict = Depends(get_current_user),
    db: aiosqlite.Connection = Depends(db_dependency),
 ):
-    """Geografische Orte einer Lage abrufen (aggregiert nach Ort)."""
+    """Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort).
    Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs:
      1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge.
      2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie.
      3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort.
    """
    tenant_id = current_user.get("tenant_id")
    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
    # 1. Orte-Aggregate
    cursor = await db.execute(
-        """SELECT al.location_name, al.location_name_normalized, al.country_code,
+        """SELECT
-                  al.latitude, al.longitude, al.confidence, al.category,
+             COALESCE(location_name_normalized, location_name) AS name,
-                  a.id as article_id, a.headline, a.headline_de, a.source, a.source_url
+             ROUND(latitude, 2)  AS lat,
-           FROM article_locations al
+             ROUND(longitude, 2) AS lon,
-           JOIN articles a ON a.id = al.article_id
+             MIN(country_code)   AS country_code,
-           WHERE al.incident_id = ?
+             MAX(confidence)     AS confidence,
-           ORDER BY al.location_name_normalized, a.collected_at DESC""",
+             COUNT(*)            AS article_count
           FROM article_locations
           WHERE incident_id = ?
           GROUP BY name, lat, lon
           ORDER BY article_count DESC""",
        (incident_id,),
    )
-    rows = await cursor.fetchall()
+    loc_rows = [dict(r) for r in await cursor.fetchall()]
-    # Aggregierung nach normalisiertem Ortsnamen + Koordinaten
+    # 2. Kategorien pro Ort
-    loc_map = {}
+    cursor = await db.execute(
-    for row in rows:
+        """SELECT
-        row = dict(row)
+             COALESCE(location_name_normalized, location_name) AS name,
-        key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2))
+             ROUND(latitude, 2)  AS lat,
-        if key not in loc_map:
+             ROUND(longitude, 2) AS lon,
-            loc_map[key] = {
+             COALESCE(category, 'mentioned') AS category,
-                "location_name": row["location_name_normalized"] or row["location_name"],
+             COUNT(*)            AS cnt
-                "lat": row["latitude"],
+           FROM article_locations
-                "lon": row["longitude"],
+           WHERE incident_id = ?
-                "country_code": row["country_code"],
+           GROUP BY name, lat, lon, category""",
-                "confidence": row["confidence"],
+        (incident_id,),
-                "article_count": 0,
+    )
-                "articles": [],
+    cat_map: dict[tuple, dict[str, int]] = {}
-                "categories": {},
+    for r in await cursor.fetchall():
-            }
+        key = (r["name"], r["lat"], r["lon"])
-        loc_map[key]["article_count"] += 1
+        cat_map.setdefault(key, {})[r["category"]] = r["cnt"]
        cat = row["category"] or "mentioned"
        loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1
        # Maximal 10 Artikel pro Ort mitliefern
        if len(loc_map[key]["articles"]) < 10:
            loc_map[key]["articles"].append({
                "id": row["article_id"],
                "headline": row["headline_de"] or row["headline"],
                "source": row["source"],
                "source_url": row["source_url"],
            })
-    # Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
+    # 3. Sample-Artikel pro Ort (max. 10, neueste zuerst)
    cursor = await db.execute(
        """SELECT name, lat, lon, article_id, headline, headline_de, source, source_url
           FROM (
             SELECT
               COALESCE(al.location_name_normalized, al.location_name) AS name,
               ROUND(al.latitude, 2)  AS lat,
               ROUND(al.longitude, 2) AS lon,
               a.id AS article_id,
               a.headline, a.headline_de, a.source, a.source_url,
               ROW_NUMBER() OVER (
                 PARTITION BY COALESCE(al.location_name_normalized, al.location_name),
                              ROUND(al.latitude, 2), ROUND(al.longitude, 2)
                 ORDER BY a.collected_at DESC
               ) AS rn
             FROM article_locations al
             JOIN articles a ON a.id = al.article_id
             WHERE al.incident_id = ?
           )
           WHERE rn <= 10""",
        (incident_id,),
    )
    sample_map: dict[tuple, list[dict]] = {}
    for r in await cursor.fetchall():
        key = (r["name"], r["lat"], r["lon"])
        sample_map.setdefault(key, []).append({
            "id": r["article_id"],
            "headline": r["headline_de"] or r["headline"],
            "source": r["source"],
            "source_url": r["source_url"],
        })
    # Zusammensetzen
    priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
    result = []
-    for loc in loc_map.values():
+    for loc in loc_rows:
-        cats = loc.pop("categories")
+        key = (loc["name"], loc["lat"], loc["lon"])
-        if cats:
+        cats = cat_map.get(key, {})
-            best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c]))
+        best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned"
-        else:
+        result.append({
-            best_cat = "mentioned"
+            "location_name": loc["name"],
-        loc["category"] = best_cat
+            "lat": loc["lat"],
-        result.append(loc)
+            "lon": loc["lon"],
            "country_code": loc["country_code"],
            "confidence": loc["confidence"],
            "article_count": loc["article_count"],
            "articles": sample_map.get(key, []),
            "category": best_cat,
        })
    # Category-Labels aus Incident laden
    cursor = await db.execute(