Locations: Aggregation in SQL (GROUP BY + Window)

Ersetzt den rohen JOIN ueber article_locations x articles (bei Iran
21.814 Zeilen, 11 MB Payload) durch drei kleine aggregierte Queries:
  1. Orte per GROUP BY (name, lat, lon) — direkt die Ergebnismenge.
  2. Kategorien pro Ort per GROUP BY fuer die dominante Kategorie.
  3. Sample-Artikel (max. 10 pro Ort) via ROW_NUMBER() OVER PARTITION BY.

Response-Shape unveraendert ({category_labels, locations: [...]}), keine
Frontend-Aenderung noetig. Priorisierung primary > secondary > tertiary >
mentioned bleibt erhalten.

Erwarteter Effekt: Iran-Locations 11 MB -> <500 KB; Query-Zeit sinkt
zusaetzlich, da kein 21k-Zeilen-JOIN mehr materialisiert werden muss.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
2026-04-19 23:47:50 +02:00
Ursprung 9a43dffa6c
Commit a302790777

Datei anzeigen

@@ -543,60 +543,100 @@ async def get_locations(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Geografische Orte einer Lage abrufen (aggregiert nach Ort)."""
"""Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort).
Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs:
1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge.
2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie.
3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort.
"""
tenant_id = current_user.get("tenant_id")
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
# 1. Orte-Aggregate
cursor = await db.execute(
"""SELECT al.location_name, al.location_name_normalized, al.country_code,
al.latitude, al.longitude, al.confidence, al.category,
a.id as article_id, a.headline, a.headline_de, a.source, a.source_url
FROM article_locations al
JOIN articles a ON a.id = al.article_id
WHERE al.incident_id = ?
ORDER BY al.location_name_normalized, a.collected_at DESC""",
"""SELECT
COALESCE(location_name_normalized, location_name) AS name,
ROUND(latitude, 2) AS lat,
ROUND(longitude, 2) AS lon,
MIN(country_code) AS country_code,
MAX(confidence) AS confidence,
COUNT(*) AS article_count
FROM article_locations
WHERE incident_id = ?
GROUP BY name, lat, lon
ORDER BY article_count DESC""",
(incident_id,),
)
rows = await cursor.fetchall()
loc_rows = [dict(r) for r in await cursor.fetchall()]
# Aggregierung nach normalisiertem Ortsnamen + Koordinaten
loc_map = {}
for row in rows:
row = dict(row)
key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2))
if key not in loc_map:
loc_map[key] = {
"location_name": row["location_name_normalized"] or row["location_name"],
"lat": row["latitude"],
"lon": row["longitude"],
"country_code": row["country_code"],
"confidence": row["confidence"],
"article_count": 0,
"articles": [],
"categories": {},
}
loc_map[key]["article_count"] += 1
cat = row["category"] or "mentioned"
loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1
# Maximal 10 Artikel pro Ort mitliefern
if len(loc_map[key]["articles"]) < 10:
loc_map[key]["articles"].append({
"id": row["article_id"],
"headline": row["headline_de"] or row["headline"],
"source": row["source"],
"source_url": row["source_url"],
})
# 2. Kategorien pro Ort
cursor = await db.execute(
"""SELECT
COALESCE(location_name_normalized, location_name) AS name,
ROUND(latitude, 2) AS lat,
ROUND(longitude, 2) AS lon,
COALESCE(category, 'mentioned') AS category,
COUNT(*) AS cnt
FROM article_locations
WHERE incident_id = ?
GROUP BY name, lat, lon, category""",
(incident_id,),
)
cat_map: dict[tuple, dict[str, int]] = {}
for r in await cursor.fetchall():
key = (r["name"], r["lat"], r["lon"])
cat_map.setdefault(key, {})[r["category"]] = r["cnt"]
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
# 3. Sample-Artikel pro Ort (max. 10, neueste zuerst)
cursor = await db.execute(
"""SELECT name, lat, lon, article_id, headline, headline_de, source, source_url
FROM (
SELECT
COALESCE(al.location_name_normalized, al.location_name) AS name,
ROUND(al.latitude, 2) AS lat,
ROUND(al.longitude, 2) AS lon,
a.id AS article_id,
a.headline, a.headline_de, a.source, a.source_url,
ROW_NUMBER() OVER (
PARTITION BY COALESCE(al.location_name_normalized, al.location_name),
ROUND(al.latitude, 2), ROUND(al.longitude, 2)
ORDER BY a.collected_at DESC
) AS rn
FROM article_locations al
JOIN articles a ON a.id = al.article_id
WHERE al.incident_id = ?
)
WHERE rn <= 10""",
(incident_id,),
)
sample_map: dict[tuple, list[dict]] = {}
for r in await cursor.fetchall():
key = (r["name"], r["lat"], r["lon"])
sample_map.setdefault(key, []).append({
"id": r["article_id"],
"headline": r["headline_de"] or r["headline"],
"source": r["source"],
"source_url": r["source_url"],
})
# Zusammensetzen
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
result = []
for loc in loc_map.values():
cats = loc.pop("categories")
if cats:
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c]))
else:
best_cat = "mentioned"
loc["category"] = best_cat
result.append(loc)
for loc in loc_rows:
key = (loc["name"], loc["lat"], loc["lon"])
cats = cat_map.get(key, {})
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned"
result.append({
"location_name": loc["name"],
"lat": loc["lat"],
"lon": loc["lon"],
"country_code": loc["country_code"],
"confidence": loc["confidence"],
"article_count": loc["article_count"],
"articles": sample_map.get(key, []),
"category": best_cat,
})
# Category-Labels aus Incident laden
cursor = await db.execute(