Locations: Aggregation in SQL (GROUP BY + Window)

Ersetzt den rohen JOIN ueber article_locations x articles (bei Iran 21.814 Zeilen, 11 MB Payload) durch drei kleine aggregierte Queries: 1. Orte per GROUP BY (name, lat, lon) — direkt die Ergebnismenge. 2. Kategorien pro Ort per GROUP BY fuer die dominante Kategorie. 3. Sample-Artikel (max. 10 pro Ort) via ROW_NUMBER() OVER PARTITION BY. Response-Shape unveraendert ({category_labels, locations: [...]}), keine Frontend-Aenderung noetig. Priorisierung primary > secondary > tertiary > mentioned bleibt erhalten. Erwarteter Effekt: Iran-Locations 11 MB -> <500 KB; Query-Zeit sinkt zusaetzlich, da kein 21k-Zeilen-JOIN mehr materialisiert werden muss. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 23:47:50 +02:00
Commit a302790777
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -543,60 +543,100 @@ async def get_locations(
    current_user: dict = Depends(get_current_user),
    db: aiosqlite.Connection = Depends(db_dependency),
 ):
-    """Geografische Orte einer Lage abrufen (aggregiert nach Ort)."""
+    """Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort).
+
+    Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs:
+      1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge.
+      2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie.
+      3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort.
+    """
    tenant_id = current_user.get("tenant_id")
    await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
+
+    # 1. Orte-Aggregate
    cursor = await db.execute(
-        """SELECT al.location_name, al.location_name_normalized, al.country_code,
-                  al.latitude, al.longitude, al.confidence, al.category,
-                  a.id as article_id, a.headline, a.headline_de, a.source, a.source_url
-           FROM article_locations al
-           JOIN articles a ON a.id = al.article_id
-           WHERE al.incident_id = ?
-           ORDER BY al.location_name_normalized, a.collected_at DESC""",
+        """SELECT
+             COALESCE(location_name_normalized, location_name) AS name,
+             ROUND(latitude, 2)  AS lat,
+             ROUND(longitude, 2) AS lon,
+             MIN(country_code)   AS country_code,
+             MAX(confidence)     AS confidence,
+             COUNT(*)            AS article_count
+           FROM article_locations
+           WHERE incident_id = ?
+           GROUP BY name, lat, lon
+           ORDER BY article_count DESC""",
        (incident_id,),
    )
-    rows = await cursor.fetchall()
+    loc_rows = [dict(r) for r in await cursor.fetchall()]

-    # Aggregierung nach normalisiertem Ortsnamen + Koordinaten
-    loc_map = {}
-    for row in rows:
-        row = dict(row)
-        key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2))
-        if key not in loc_map:
-            loc_map[key] = {
-                "location_name": row["location_name_normalized"] or row["location_name"],
-                "lat": row["latitude"],
-                "lon": row["longitude"],
-                "country_code": row["country_code"],
-                "confidence": row["confidence"],
-                "article_count": 0,
-                "articles": [],
-                "categories": {},
-            }
-        loc_map[key]["article_count"] += 1
-        cat = row["category"] or "mentioned"
-        loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1
-        # Maximal 10 Artikel pro Ort mitliefern
-        if len(loc_map[key]["articles"]) < 10:
-            loc_map[key]["articles"].append({
-                "id": row["article_id"],
-                "headline": row["headline_de"] or row["headline"],
-                "source": row["source"],
-                "source_url": row["source_url"],
-            })
+    # 2. Kategorien pro Ort
+    cursor = await db.execute(
+        """SELECT
+             COALESCE(location_name_normalized, location_name) AS name,
+             ROUND(latitude, 2)  AS lat,
+             ROUND(longitude, 2) AS lon,
+             COALESCE(category, 'mentioned') AS category,
+             COUNT(*)            AS cnt
+           FROM article_locations
+           WHERE incident_id = ?
+           GROUP BY name, lat, lon, category""",
+        (incident_id,),
+    )
+    cat_map: dict[tuple, dict[str, int]] = {}
+    for r in await cursor.fetchall():
+        key = (r["name"], r["lat"], r["lon"])
+        cat_map.setdefault(key, {})[r["category"]] = r["cnt"]

-    # Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
+    # 3. Sample-Artikel pro Ort (max. 10, neueste zuerst)
+    cursor = await db.execute(
+        """SELECT name, lat, lon, article_id, headline, headline_de, source, source_url
+           FROM (
+             SELECT
+               COALESCE(al.location_name_normalized, al.location_name) AS name,
+               ROUND(al.latitude, 2)  AS lat,
+               ROUND(al.longitude, 2) AS lon,
+               a.id AS article_id,
+               a.headline, a.headline_de, a.source, a.source_url,
+               ROW_NUMBER() OVER (
+                 PARTITION BY COALESCE(al.location_name_normalized, al.location_name),
+                              ROUND(al.latitude, 2), ROUND(al.longitude, 2)
+                 ORDER BY a.collected_at DESC
+               ) AS rn
+             FROM article_locations al
+             JOIN articles a ON a.id = al.article_id
+             WHERE al.incident_id = ?
+           )
+           WHERE rn <= 10""",
+        (incident_id,),
+    )
+    sample_map: dict[tuple, list[dict]] = {}
+    for r in await cursor.fetchall():
+        key = (r["name"], r["lat"], r["lon"])
+        sample_map.setdefault(key, []).append({
+            "id": r["article_id"],
+            "headline": r["headline_de"] or r["headline"],
+            "source": r["source"],
+            "source_url": r["source_url"],
+        })
+
+    # Zusammensetzen
    priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
    result = []
-    for loc in loc_map.values():
-        cats = loc.pop("categories")
-        if cats:
-            best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c]))
-        else:
-            best_cat = "mentioned"
-        loc["category"] = best_cat
-        result.append(loc)
+    for loc in loc_rows:
+        key = (loc["name"], loc["lat"], loc["lon"])
+        cats = cat_map.get(key, {})
+        best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned"
+        result.append({
+            "location_name": loc["name"],
+            "lat": loc["lat"],
+            "lon": loc["lon"],
+            "country_code": loc["country_code"],
+            "confidence": loc["confidence"],
+            "article_count": loc["article_count"],
+            "articles": sample_map.get(key, []),
+            "category": best_cat,
+        })

    # Category-Labels aus Incident laden
    cursor = await db.execute(