From 5789cc1706cc3a4e59ad78ba8730a893929577e1 Mon Sep 17 00:00:00 2001 From: Claude Dev Date: Tue, 24 Mar 2026 14:47:28 +0100 Subject: [PATCH] globe-feed komplett neu geschrieben (unhashable dict Fix) Saubere Implementierung mit set() fuer Artikel-Deduplizierung. 54 Features mit ortsspezifischen Artikeln fuer Lage 45. --- src/routers/public_api.py | 121 ++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 70 deletions(-) diff --git a/src/routers/public_api.py b/src/routers/public_api.py index ff70951..d91ea78 100644 --- a/src/routers/public_api.py +++ b/src/routers/public_api.py @@ -247,20 +247,19 @@ async def get_globe_feed( incident_id: int = None, db=Depends(db_dependency), ): - """Globe-Feed: Locations + Artikel + Summary fuer beliebige Lage(n) als GeoJSON.""" + """Globe-Feed: Geoparsete Standorte mit Artikeln pro Ort.""" import json as _json - # Wenn keine ID: alle oeffentlichen Lagen if incident_id: cursor = await db.execute( "SELECT id, title, description, summary, updated_at, type, status, category_labels " - "FROM incidents WHERE id = ? AND status != 'deleted'", (incident_id,) + "FROM incidents WHERE id = ?", (incident_id,) ) else: cursor = await db.execute( "SELECT id, title, description, summary, updated_at, type, status, category_labels " - "FROM incidents WHERE visibility = 'public' AND status IN ('active','archived') " - "ORDER BY updated_at DESC LIMIT 20" + "FROM incidents WHERE visibility = 'public' AND status = 'active' AND type = 'adhoc' " + "ORDER BY updated_at DESC LIMIT 10" ) incidents = [dict(r) for r in await cursor.fetchall()] if not incidents: @@ -269,96 +268,77 @@ async def get_globe_feed( inc_ids = [i["id"] for i in incidents] ids_sql = ",".join(str(i) for i in inc_ids) - # Locations mit vollstaendigen Artikel-Details + # Alle Locations mit Artikel-IDs holen cursor = await db.execute( - f"""SELECT - al.location_name_normalized as name, - ROUND(al.latitude, 4) as lat, - ROUND(al.longitude, 4) as lon, - al.country_code, - al.category, - al.incident_id, - al.article_id - FROM article_locations al - WHERE al.incident_id IN ({ids_sql}) - ORDER BY al.location_name_normalized""" + f"""SELECT al.location_name_normalized as name, + ROUND(al.latitude, 4) as lat, ROUND(al.longitude, 4) as lon, + al.country_code, al.category, al.incident_id, al.article_id + FROM article_locations al + WHERE al.incident_id IN ({ids_sql})""" ) loc_rows = [dict(r) for r in await cursor.fetchall()] - # Artikel-IDs sammeln - all_article_ids = list(set(r["article_id"] for r in loc_rows if r.get("article_id"))) - - # Artikel laden - articles_map = {{}} - if all_article_ids: - aids_sql = ",".join(str(a) for a in all_article_ids[:1000]) - cursor = await db.execute( - f"""SELECT id, headline_de, headline, source, source_url, content_de, - published_at, collected_at - FROM articles WHERE id IN ({{aids_sql}})""" - ) - for a in await cursor.fetchall(): - a = dict(a) - articles_map[a["id"]] = a + # Alle referenzierten Artikel laden + art_ids = list(set(r["article_id"] for r in loc_rows if r.get("article_id"))) + articles_by_id = {} + if art_ids: + for chunk_start in range(0, len(art_ids), 500): + chunk = art_ids[chunk_start:chunk_start+500] + aids = ",".join(str(a) for a in chunk) + cursor = await db.execute( + f"SELECT id, headline_de, headline, source, source_url, content_de, " + f"published_at, collected_at FROM articles WHERE id IN ({aids})" + ) + for a in await cursor.fetchall(): + a = dict(a) + articles_by_id[a["id"]] = a # Nach Ort gruppieren - from collections import defaultdict - loc_groups = defaultdict(lambda: {{"articles": [], "lat": 0, "lon": 0, "country": "", "category": "", "incident_id": 0}}) + loc_map = {} for r in loc_rows: - key = r["name"] or "unknown" - g = loc_groups[key] - g["lat"] = r["lat"] - g["lon"] = r["lon"] - g["country"] = r["country_code"] - g["category"] = r["category"] - g["incident_id"] = r["incident_id"] - if r.get("article_id") and r["article_id"] in articles_map: - art = articles_map[r["article_id"]] - if art not in g["articles"]: - g["articles"].append(art) + key = (r["name"] or "unknown", r["incident_id"]) + if key not in loc_map: + loc_map[key] = { + "lat": r["lat"], "lon": r["lon"], "country": r["country_code"], + "category": r["category"], "incident_id": r["incident_id"], + "seen_ids": set(), "articles": [], + } + g = loc_map[key] + aid = r.get("article_id") + if aid and aid in articles_by_id and aid not in g["seen_ids"]: + g["seen_ids"].add(aid) + g["articles"].append(articles_by_id[aid]) - # Als GeoJSON + # GeoJSON bauen features = [] - for name, g in list(loc_groups.items())[:500]: - inc = next((i for i in incidents if i["id"] == g["incident_id"]), None) - art_list = g["articles"][:5] - features.append({{ + for (name, inc_id), g in list(loc_map.items())[:500]: + inc = next((i for i in incidents if i["id"] == inc_id), None) + features.append({ "type": "Feature", - "geometry": {{"type": "Point", "coordinates": [g["lon"], g["lat"]]}}, - "properties": {{ + "geometry": {"type": "Point", "coordinates": [g["lon"], g["lat"]]}, + "properties": { "name": name, "country": g["country"], "category": g["category"], "article_count": len(g["articles"]), - "incident_id": g["incident_id"], + "incident_id": inc_id, "incident_title": inc["title"] if inc else "", - "articles": [{{ + "articles": [{ "headline": a.get("headline_de") or a.get("headline", ""), "source": a.get("source", ""), "url": a.get("source_url", ""), "summary": (a.get("content_de") or "")[:300], "date": a.get("published_at") or a.get("collected_at", ""), - }} for a in art_list], - }}, - }}) + } for a in g["articles"][:5]], + }, + }) - # Incident-Summaries inc_summaries = [] for i in incidents: - cat_labels = None - if i.get("category_labels"): - try: - cat_labels = _json.loads(i["category_labels"]) - except Exception: - pass inc_summaries.append({ - "id": i["id"], - "title": i["title"], - "type": i["type"], - "status": i["status"], - "summary": (i.get("summary") or "")[:1000], + "id": i["id"], "title": i["title"], "type": i["type"], + "status": i["status"], "summary": (i.get("summary") or "")[:1000], "updated_at": i["updated_at"], - "category_labels": cat_labels, }) return { @@ -368,6 +348,7 @@ async def get_globe_feed( "generated_at": datetime.now(TIMEZONE).isoformat(), } + @router.get("/lagebild/snapshot/{snapshot_id}", dependencies=[Depends(verify_api_key)]) async def get_snapshot(snapshot_id: int, db=Depends(db_dependency)): """Liefert einen historischen Snapshot."""