Locations: Aggregation in SQL (GROUP BY + Window)
Ersetzt den rohen JOIN ueber article_locations x articles (bei Iran
21.814 Zeilen, 11 MB Payload) durch drei kleine aggregierte Queries:
1. Orte per GROUP BY (name, lat, lon) — direkt die Ergebnismenge.
2. Kategorien pro Ort per GROUP BY fuer die dominante Kategorie.
3. Sample-Artikel (max. 10 pro Ort) via ROW_NUMBER() OVER PARTITION BY.
Response-Shape unveraendert ({category_labels, locations: [...]}), keine
Frontend-Aenderung noetig. Priorisierung primary > secondary > tertiary >
mentioned bleibt erhalten.
Erwarteter Effekt: Iran-Locations 11 MB -> <500 KB; Query-Zeit sinkt
zusaetzlich, da kein 21k-Zeilen-JOIN mehr materialisiert werden muss.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -543,60 +543,100 @@ async def get_locations(
|
|||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Geografische Orte einer Lage abrufen (aggregiert nach Ort)."""
|
"""Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort).
|
||||||
|
|
||||||
|
Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs:
|
||||||
|
1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge.
|
||||||
|
2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie.
|
||||||
|
3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort.
|
||||||
|
"""
|
||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
|
||||||
|
# 1. Orte-Aggregate
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"""SELECT al.location_name, al.location_name_normalized, al.country_code,
|
"""SELECT
|
||||||
al.latitude, al.longitude, al.confidence, al.category,
|
COALESCE(location_name_normalized, location_name) AS name,
|
||||||
a.id as article_id, a.headline, a.headline_de, a.source, a.source_url
|
ROUND(latitude, 2) AS lat,
|
||||||
FROM article_locations al
|
ROUND(longitude, 2) AS lon,
|
||||||
JOIN articles a ON a.id = al.article_id
|
MIN(country_code) AS country_code,
|
||||||
WHERE al.incident_id = ?
|
MAX(confidence) AS confidence,
|
||||||
ORDER BY al.location_name_normalized, a.collected_at DESC""",
|
COUNT(*) AS article_count
|
||||||
|
FROM article_locations
|
||||||
|
WHERE incident_id = ?
|
||||||
|
GROUP BY name, lat, lon
|
||||||
|
ORDER BY article_count DESC""",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
)
|
)
|
||||||
rows = await cursor.fetchall()
|
loc_rows = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
|
||||||
# Aggregierung nach normalisiertem Ortsnamen + Koordinaten
|
# 2. Kategorien pro Ort
|
||||||
loc_map = {}
|
cursor = await db.execute(
|
||||||
for row in rows:
|
"""SELECT
|
||||||
row = dict(row)
|
COALESCE(location_name_normalized, location_name) AS name,
|
||||||
key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2))
|
ROUND(latitude, 2) AS lat,
|
||||||
if key not in loc_map:
|
ROUND(longitude, 2) AS lon,
|
||||||
loc_map[key] = {
|
COALESCE(category, 'mentioned') AS category,
|
||||||
"location_name": row["location_name_normalized"] or row["location_name"],
|
COUNT(*) AS cnt
|
||||||
"lat": row["latitude"],
|
FROM article_locations
|
||||||
"lon": row["longitude"],
|
WHERE incident_id = ?
|
||||||
"country_code": row["country_code"],
|
GROUP BY name, lat, lon, category""",
|
||||||
"confidence": row["confidence"],
|
(incident_id,),
|
||||||
"article_count": 0,
|
)
|
||||||
"articles": [],
|
cat_map: dict[tuple, dict[str, int]] = {}
|
||||||
"categories": {},
|
for r in await cursor.fetchall():
|
||||||
}
|
key = (r["name"], r["lat"], r["lon"])
|
||||||
loc_map[key]["article_count"] += 1
|
cat_map.setdefault(key, {})[r["category"]] = r["cnt"]
|
||||||
cat = row["category"] or "mentioned"
|
|
||||||
loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1
|
|
||||||
# Maximal 10 Artikel pro Ort mitliefern
|
|
||||||
if len(loc_map[key]["articles"]) < 10:
|
|
||||||
loc_map[key]["articles"].append({
|
|
||||||
"id": row["article_id"],
|
|
||||||
"headline": row["headline_de"] or row["headline"],
|
|
||||||
"source": row["source"],
|
|
||||||
"source_url": row["source_url"],
|
|
||||||
})
|
|
||||||
|
|
||||||
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
|
# 3. Sample-Artikel pro Ort (max. 10, neueste zuerst)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT name, lat, lon, article_id, headline, headline_de, source, source_url
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
COALESCE(al.location_name_normalized, al.location_name) AS name,
|
||||||
|
ROUND(al.latitude, 2) AS lat,
|
||||||
|
ROUND(al.longitude, 2) AS lon,
|
||||||
|
a.id AS article_id,
|
||||||
|
a.headline, a.headline_de, a.source, a.source_url,
|
||||||
|
ROW_NUMBER() OVER (
|
||||||
|
PARTITION BY COALESCE(al.location_name_normalized, al.location_name),
|
||||||
|
ROUND(al.latitude, 2), ROUND(al.longitude, 2)
|
||||||
|
ORDER BY a.collected_at DESC
|
||||||
|
) AS rn
|
||||||
|
FROM article_locations al
|
||||||
|
JOIN articles a ON a.id = al.article_id
|
||||||
|
WHERE al.incident_id = ?
|
||||||
|
)
|
||||||
|
WHERE rn <= 10""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
sample_map: dict[tuple, list[dict]] = {}
|
||||||
|
for r in await cursor.fetchall():
|
||||||
|
key = (r["name"], r["lat"], r["lon"])
|
||||||
|
sample_map.setdefault(key, []).append({
|
||||||
|
"id": r["article_id"],
|
||||||
|
"headline": r["headline_de"] or r["headline"],
|
||||||
|
"source": r["source"],
|
||||||
|
"source_url": r["source_url"],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Zusammensetzen
|
||||||
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
||||||
result = []
|
result = []
|
||||||
for loc in loc_map.values():
|
for loc in loc_rows:
|
||||||
cats = loc.pop("categories")
|
key = (loc["name"], loc["lat"], loc["lon"])
|
||||||
if cats:
|
cats = cat_map.get(key, {})
|
||||||
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c]))
|
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned"
|
||||||
else:
|
result.append({
|
||||||
best_cat = "mentioned"
|
"location_name": loc["name"],
|
||||||
loc["category"] = best_cat
|
"lat": loc["lat"],
|
||||||
result.append(loc)
|
"lon": loc["lon"],
|
||||||
|
"country_code": loc["country_code"],
|
||||||
|
"confidence": loc["confidence"],
|
||||||
|
"article_count": loc["article_count"],
|
||||||
|
"articles": sample_map.get(key, []),
|
||||||
|
"category": best_cat,
|
||||||
|
})
|
||||||
|
|
||||||
# Category-Labels aus Incident laden
|
# Category-Labels aus Incident laden
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren