From a302790777e3e93006bf634093bea57d567f1e3a Mon Sep 17 00:00:00 2001 From: UserIsMH Date: Sun, 19 Apr 2026 23:47:50 +0200 Subject: [PATCH] Locations: Aggregation in SQL (GROUP BY + Window) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ersetzt den rohen JOIN ueber article_locations x articles (bei Iran 21.814 Zeilen, 11 MB Payload) durch drei kleine aggregierte Queries: 1. Orte per GROUP BY (name, lat, lon) — direkt die Ergebnismenge. 2. Kategorien pro Ort per GROUP BY fuer die dominante Kategorie. 3. Sample-Artikel (max. 10 pro Ort) via ROW_NUMBER() OVER PARTITION BY. Response-Shape unveraendert ({category_labels, locations: [...]}), keine Frontend-Aenderung noetig. Priorisierung primary > secondary > tertiary > mentioned bleibt erhalten. Erwarteter Effekt: Iran-Locations 11 MB -> <500 KB; Query-Zeit sinkt zusaetzlich, da kein 21k-Zeilen-JOIN mehr materialisiert werden muss. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/routers/incidents.py | 130 +++++++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 45 deletions(-) diff --git a/src/routers/incidents.py b/src/routers/incidents.py index e800bb2..377d03c 100644 --- a/src/routers/incidents.py +++ b/src/routers/incidents.py @@ -543,60 +543,100 @@ async def get_locations( current_user: dict = Depends(get_current_user), db: aiosqlite.Connection = Depends(db_dependency), ): - """Geografische Orte einer Lage abrufen (aggregiert nach Ort).""" + """Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort). + + Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs: + 1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge. + 2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie. + 3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort. + """ tenant_id = current_user.get("tenant_id") await _check_incident_access(db, incident_id, current_user["id"], tenant_id) + + # 1. Orte-Aggregate cursor = await db.execute( - """SELECT al.location_name, al.location_name_normalized, al.country_code, - al.latitude, al.longitude, al.confidence, al.category, - a.id as article_id, a.headline, a.headline_de, a.source, a.source_url - FROM article_locations al - JOIN articles a ON a.id = al.article_id - WHERE al.incident_id = ? - ORDER BY al.location_name_normalized, a.collected_at DESC""", + """SELECT + COALESCE(location_name_normalized, location_name) AS name, + ROUND(latitude, 2) AS lat, + ROUND(longitude, 2) AS lon, + MIN(country_code) AS country_code, + MAX(confidence) AS confidence, + COUNT(*) AS article_count + FROM article_locations + WHERE incident_id = ? + GROUP BY name, lat, lon + ORDER BY article_count DESC""", (incident_id,), ) - rows = await cursor.fetchall() + loc_rows = [dict(r) for r in await cursor.fetchall()] - # Aggregierung nach normalisiertem Ortsnamen + Koordinaten - loc_map = {} - for row in rows: - row = dict(row) - key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2)) - if key not in loc_map: - loc_map[key] = { - "location_name": row["location_name_normalized"] or row["location_name"], - "lat": row["latitude"], - "lon": row["longitude"], - "country_code": row["country_code"], - "confidence": row["confidence"], - "article_count": 0, - "articles": [], - "categories": {}, - } - loc_map[key]["article_count"] += 1 - cat = row["category"] or "mentioned" - loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1 - # Maximal 10 Artikel pro Ort mitliefern - if len(loc_map[key]["articles"]) < 10: - loc_map[key]["articles"].append({ - "id": row["article_id"], - "headline": row["headline_de"] or row["headline"], - "source": row["source"], - "source_url": row["source_url"], - }) + # 2. Kategorien pro Ort + cursor = await db.execute( + """SELECT + COALESCE(location_name_normalized, location_name) AS name, + ROUND(latitude, 2) AS lat, + ROUND(longitude, 2) AS lon, + COALESCE(category, 'mentioned') AS category, + COUNT(*) AS cnt + FROM article_locations + WHERE incident_id = ? + GROUP BY name, lat, lon, category""", + (incident_id,), + ) + cat_map: dict[tuple, dict[str, int]] = {} + for r in await cursor.fetchall(): + key = (r["name"], r["lat"], r["lon"]) + cat_map.setdefault(key, {})[r["category"]] = r["cnt"] - # Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned) + # 3. Sample-Artikel pro Ort (max. 10, neueste zuerst) + cursor = await db.execute( + """SELECT name, lat, lon, article_id, headline, headline_de, source, source_url + FROM ( + SELECT + COALESCE(al.location_name_normalized, al.location_name) AS name, + ROUND(al.latitude, 2) AS lat, + ROUND(al.longitude, 2) AS lon, + a.id AS article_id, + a.headline, a.headline_de, a.source, a.source_url, + ROW_NUMBER() OVER ( + PARTITION BY COALESCE(al.location_name_normalized, al.location_name), + ROUND(al.latitude, 2), ROUND(al.longitude, 2) + ORDER BY a.collected_at DESC + ) AS rn + FROM article_locations al + JOIN articles a ON a.id = al.article_id + WHERE al.incident_id = ? + ) + WHERE rn <= 10""", + (incident_id,), + ) + sample_map: dict[tuple, list[dict]] = {} + for r in await cursor.fetchall(): + key = (r["name"], r["lat"], r["lon"]) + sample_map.setdefault(key, []).append({ + "id": r["article_id"], + "headline": r["headline_de"] or r["headline"], + "source": r["source"], + "source_url": r["source_url"], + }) + + # Zusammensetzen priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1} result = [] - for loc in loc_map.values(): - cats = loc.pop("categories") - if cats: - best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) - else: - best_cat = "mentioned" - loc["category"] = best_cat - result.append(loc) + for loc in loc_rows: + key = (loc["name"], loc["lat"], loc["lon"]) + cats = cat_map.get(key, {}) + best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned" + result.append({ + "location_name": loc["name"], + "lat": loc["lat"], + "lon": loc["lon"], + "country_code": loc["country_code"], + "confidence": loc["confidence"], + "article_count": loc["article_count"], + "articles": sample_map.get(key, []), + "category": best_cat, + }) # Category-Labels aus Incident laden cursor = await db.execute(