Locations: Aggregation in SQL (GROUP BY + Window)
Ersetzt den rohen JOIN ueber article_locations x articles (bei Iran
21.814 Zeilen, 11 MB Payload) durch drei kleine aggregierte Queries:
1. Orte per GROUP BY (name, lat, lon) — direkt die Ergebnismenge.
2. Kategorien pro Ort per GROUP BY fuer die dominante Kategorie.
3. Sample-Artikel (max. 10 pro Ort) via ROW_NUMBER() OVER PARTITION BY.
Response-Shape unveraendert ({category_labels, locations: [...]}), keine
Frontend-Aenderung noetig. Priorisierung primary > secondary > tertiary >
mentioned bleibt erhalten.
Erwarteter Effekt: Iran-Locations 11 MB -> <500 KB; Query-Zeit sinkt
zusaetzlich, da kein 21k-Zeilen-JOIN mehr materialisiert werden muss.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -543,60 +543,100 @@ async def get_locations(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Geografische Orte einer Lage abrufen (aggregiert nach Ort)."""
|
||||
"""Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort).
|
||||
|
||||
Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs:
|
||||
1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge.
|
||||
2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie.
|
||||
3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort.
|
||||
"""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||
|
||||
# 1. Orte-Aggregate
|
||||
cursor = await db.execute(
|
||||
"""SELECT al.location_name, al.location_name_normalized, al.country_code,
|
||||
al.latitude, al.longitude, al.confidence, al.category,
|
||||
a.id as article_id, a.headline, a.headline_de, a.source, a.source_url
|
||||
FROM article_locations al
|
||||
JOIN articles a ON a.id = al.article_id
|
||||
WHERE al.incident_id = ?
|
||||
ORDER BY al.location_name_normalized, a.collected_at DESC""",
|
||||
"""SELECT
|
||||
COALESCE(location_name_normalized, location_name) AS name,
|
||||
ROUND(latitude, 2) AS lat,
|
||||
ROUND(longitude, 2) AS lon,
|
||||
MIN(country_code) AS country_code,
|
||||
MAX(confidence) AS confidence,
|
||||
COUNT(*) AS article_count
|
||||
FROM article_locations
|
||||
WHERE incident_id = ?
|
||||
GROUP BY name, lat, lon
|
||||
ORDER BY article_count DESC""",
|
||||
(incident_id,),
|
||||
)
|
||||
rows = await cursor.fetchall()
|
||||
loc_rows = [dict(r) for r in await cursor.fetchall()]
|
||||
|
||||
# Aggregierung nach normalisiertem Ortsnamen + Koordinaten
|
||||
loc_map = {}
|
||||
for row in rows:
|
||||
row = dict(row)
|
||||
key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2))
|
||||
if key not in loc_map:
|
||||
loc_map[key] = {
|
||||
"location_name": row["location_name_normalized"] or row["location_name"],
|
||||
"lat": row["latitude"],
|
||||
"lon": row["longitude"],
|
||||
"country_code": row["country_code"],
|
||||
"confidence": row["confidence"],
|
||||
"article_count": 0,
|
||||
"articles": [],
|
||||
"categories": {},
|
||||
}
|
||||
loc_map[key]["article_count"] += 1
|
||||
cat = row["category"] or "mentioned"
|
||||
loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1
|
||||
# Maximal 10 Artikel pro Ort mitliefern
|
||||
if len(loc_map[key]["articles"]) < 10:
|
||||
loc_map[key]["articles"].append({
|
||||
"id": row["article_id"],
|
||||
"headline": row["headline_de"] or row["headline"],
|
||||
"source": row["source"],
|
||||
"source_url": row["source_url"],
|
||||
})
|
||||
# 2. Kategorien pro Ort
|
||||
cursor = await db.execute(
|
||||
"""SELECT
|
||||
COALESCE(location_name_normalized, location_name) AS name,
|
||||
ROUND(latitude, 2) AS lat,
|
||||
ROUND(longitude, 2) AS lon,
|
||||
COALESCE(category, 'mentioned') AS category,
|
||||
COUNT(*) AS cnt
|
||||
FROM article_locations
|
||||
WHERE incident_id = ?
|
||||
GROUP BY name, lat, lon, category""",
|
||||
(incident_id,),
|
||||
)
|
||||
cat_map: dict[tuple, dict[str, int]] = {}
|
||||
for r in await cursor.fetchall():
|
||||
key = (r["name"], r["lat"], r["lon"])
|
||||
cat_map.setdefault(key, {})[r["category"]] = r["cnt"]
|
||||
|
||||
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
|
||||
# 3. Sample-Artikel pro Ort (max. 10, neueste zuerst)
|
||||
cursor = await db.execute(
|
||||
"""SELECT name, lat, lon, article_id, headline, headline_de, source, source_url
|
||||
FROM (
|
||||
SELECT
|
||||
COALESCE(al.location_name_normalized, al.location_name) AS name,
|
||||
ROUND(al.latitude, 2) AS lat,
|
||||
ROUND(al.longitude, 2) AS lon,
|
||||
a.id AS article_id,
|
||||
a.headline, a.headline_de, a.source, a.source_url,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY COALESCE(al.location_name_normalized, al.location_name),
|
||||
ROUND(al.latitude, 2), ROUND(al.longitude, 2)
|
||||
ORDER BY a.collected_at DESC
|
||||
) AS rn
|
||||
FROM article_locations al
|
||||
JOIN articles a ON a.id = al.article_id
|
||||
WHERE al.incident_id = ?
|
||||
)
|
||||
WHERE rn <= 10""",
|
||||
(incident_id,),
|
||||
)
|
||||
sample_map: dict[tuple, list[dict]] = {}
|
||||
for r in await cursor.fetchall():
|
||||
key = (r["name"], r["lat"], r["lon"])
|
||||
sample_map.setdefault(key, []).append({
|
||||
"id": r["article_id"],
|
||||
"headline": r["headline_de"] or r["headline"],
|
||||
"source": r["source"],
|
||||
"source_url": r["source_url"],
|
||||
})
|
||||
|
||||
# Zusammensetzen
|
||||
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
||||
result = []
|
||||
for loc in loc_map.values():
|
||||
cats = loc.pop("categories")
|
||||
if cats:
|
||||
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c]))
|
||||
else:
|
||||
best_cat = "mentioned"
|
||||
loc["category"] = best_cat
|
||||
result.append(loc)
|
||||
for loc in loc_rows:
|
||||
key = (loc["name"], loc["lat"], loc["lon"])
|
||||
cats = cat_map.get(key, {})
|
||||
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned"
|
||||
result.append({
|
||||
"location_name": loc["name"],
|
||||
"lat": loc["lat"],
|
||||
"lon": loc["lon"],
|
||||
"country_code": loc["country_code"],
|
||||
"confidence": loc["confidence"],
|
||||
"article_count": loc["article_count"],
|
||||
"articles": sample_map.get(key, []),
|
||||
"category": best_cat,
|
||||
})
|
||||
|
||||
# Category-Labels aus Incident laden
|
||||
cursor = await db.execute(
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren