Geoparsing von spaCy auf Haiku umgestellt

- geoparsing.py: Komplett-Rewrite (spaCy NER + Nominatim -> Haiku + geonamescache)
- orchestrator.py: incident_context an geoparse_articles, category in INSERT
- incidents.py: incident_context aus DB laden und an Geoparsing uebergeben
- public_api.py: Locations aggregiert im Lagebild-Endpoint
- components.js: response-Kategorie neben retaliation (beide akzeptiert)
- requirements.txt: spaCy und geopy entfernt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Dieser Commit ist enthalten in:
claude-dev
2026-03-07 22:00:40 +01:00
Ursprung 7e600184e8
Commit 5ae61a1379
6 geänderte Dateien mit 355 neuen und 369 gelöschten Zeilen

Datei anzeigen

@@ -351,6 +351,15 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
from agents.geoparsing import geoparse_articles
db = await get_db()
# Incident-Kontext fuer Haiku laden
cursor = await db.execute(
"SELECT title, description FROM incidents WHERE id = ?", (incident_id,)
)
inc_row = await cursor.fetchone()
incident_context = ""
if inc_row:
incident_context = f"{inc_row['title']} - {inc_row['description'] or ''}"
cursor = await db.execute(
"""SELECT a.* FROM articles a
WHERE a.incident_id = ?
@@ -373,7 +382,7 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
processed = 0
for i in range(0, total, batch_size):
batch = articles[i:i + batch_size]
geo_results = await geoparse_articles(batch)
geo_results = await geoparse_articles(batch, incident_context)
for art_id, locations in geo_results.items():
for loc in locations:
await db.execute(

Datei anzeigen

@@ -108,6 +108,23 @@ async def get_lagebild(db=Depends(db_dependency)):
except (json.JSONDecodeError, TypeError):
sources_json = []
# Locations aggregiert nach normalisierten Ortsnamen
cursor = await db.execute(
f"""SELECT
al.location_name_normalized as name,
al.latitude as lat,
al.longitude as lon,
al.country_code,
al.category,
COUNT(*) as article_count,
MAX(al.confidence) as confidence
FROM article_locations al
WHERE al.incident_id IN ({ids})
GROUP BY al.location_name_normalized
ORDER BY article_count DESC"""
)
locations = [dict(r) for r in await cursor.fetchall()]
return {
"generated_at": datetime.now(TIMEZONE).isoformat(),
"incident": {
@@ -130,6 +147,7 @@ async def get_lagebild(db=Depends(db_dependency)):
"articles": articles,
"fact_checks": fact_checks,
"available_snapshots": available_snapshots,
"locations": locations,
}