feat: Kontextabhängige Karten-Kategorien
4 feste Farbstufen (primary/secondary/tertiary/mentioned) mit variablen Labels pro Lage, die von Haiku generiert werden. - DB: category_labels Spalte in incidents, alte Kategorien migriert (target->primary, response/retaliation->secondary, actor->tertiary) - Geoparsing: generate_category_labels() + neuer Prompt mit neuen Keys - QC: Kategorieprüfung auf neue Keys umgestellt - Orchestrator: Tuple-Rückgabe + Labels in DB speichern - API: category_labels im Locations- und Lagebild-Response - Frontend: Dynamische Legende aus API-Labels mit Fallback-Defaults - Migrationsskript für bestehende Lagen Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
73
migrate_category_labels.py
Normale Datei
73
migrate_category_labels.py
Normale Datei
@@ -0,0 +1,73 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Einmaliges Migrationsskript: Generiert Haiku-Labels fuer alle bestehenden Lagen.
|
||||||
|
|
||||||
|
Ausfuehrung auf dem Monitor-Server:
|
||||||
|
cd /home/claude-dev/AegisSight-Monitor
|
||||||
|
.venvs_run: /home/claude-dev/.venvs/osint/bin/python migrate_category_labels.py
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Projektpfad setzen damit imports funktionieren
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s [%(name)s] %(levelname)s: %(message)s',
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("migrate_labels")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
from database import get_db
|
||||||
|
from agents.geoparsing import generate_category_labels
|
||||||
|
|
||||||
|
db = await get_db()
|
||||||
|
try:
|
||||||
|
# Alle Incidents ohne category_labels laden
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, title, description FROM incidents WHERE category_labels IS NULL"
|
||||||
|
)
|
||||||
|
incidents = [dict(row) for row in await cursor.fetchall()]
|
||||||
|
|
||||||
|
if not incidents:
|
||||||
|
logger.info("Keine Incidents ohne Labels gefunden. Nichts zu tun.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"{len(incidents)} Incidents ohne Labels gefunden. Starte Generierung...")
|
||||||
|
|
||||||
|
success = 0
|
||||||
|
for inc in incidents:
|
||||||
|
incident_id = inc["id"]
|
||||||
|
context = f"{inc['title']} - {inc.get('description') or ''}"
|
||||||
|
logger.info(f"Generiere Labels fuer Incident {incident_id}: {inc['title'][:60]}...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
labels = await generate_category_labels(context)
|
||||||
|
if labels:
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE incidents SET category_labels = ? WHERE id = ?",
|
||||||
|
(json.dumps(labels, ensure_ascii=False), incident_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
success += 1
|
||||||
|
logger.info(f" -> Labels: {labels}")
|
||||||
|
else:
|
||||||
|
logger.warning(f" -> Keine Labels generiert")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f" -> Fehler: {e}")
|
||||||
|
|
||||||
|
# Kurze Pause um Rate-Limits zu vermeiden
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
logger.info(f"\nMigration abgeschlossen: {success}/{len(incidents)} Incidents mit Labels versehen.")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -209,6 +209,90 @@ def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# Default-Labels (Fallback wenn Haiku keine generiert)
|
||||||
|
DEFAULT_CATEGORY_LABELS = {
|
||||||
|
"primary": "Hauptgeschehen",
|
||||||
|
"secondary": "Reaktionen",
|
||||||
|
"tertiary": "Beteiligte",
|
||||||
|
"mentioned": "Erwaehnt",
|
||||||
|
}
|
||||||
|
|
||||||
|
CATEGORY_LABELS_PROMPT = """Generiere kurze, praegnante Kategorie-Labels fuer Karten-Pins zu dieser Nachrichtenlage.
|
||||||
|
|
||||||
|
Lage: "{incident_context}"
|
||||||
|
|
||||||
|
Es gibt 4 Farbstufen fuer Orte auf der Karte:
|
||||||
|
1. primary (Rot): Wo das Hauptgeschehen stattfindet
|
||||||
|
2. secondary (Orange): Direkte Reaktionen/Gegenmassnahmen
|
||||||
|
3. tertiary (Blau): Entscheidungstraeger/Beteiligte
|
||||||
|
4. mentioned (Grau): Nur erwaehnt
|
||||||
|
|
||||||
|
Generiere fuer jede Stufe ein kurzes Label (1-3 Woerter), das zum Thema passt.
|
||||||
|
Wenn eine Stufe fuer dieses Thema nicht sinnvoll ist, setze null.
|
||||||
|
|
||||||
|
Beispiele:
|
||||||
|
- Militaerkonflikt Iran: {{"primary": "Kampfschauplätze", "secondary": "Vergeltungsschläge", "tertiary": "Strategische Akteure", "mentioned": "Erwähnt"}}
|
||||||
|
- Erdbeben Tuerkei: {{"primary": "Katastrophenzone", "secondary": "Hilfsoperationen", "tertiary": "Geberländer", "mentioned": "Erwähnt"}}
|
||||||
|
- Bundestagswahl: {{"primary": "Wahlkreise", "secondary": "Koalitionspartner", "tertiary": "Internationale Reaktionen", "mentioned": "Erwähnt"}}
|
||||||
|
|
||||||
|
Antworte NUR als JSON-Objekt:"""
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_category_labels(incident_context: str) -> dict[str, str | None]:
|
||||||
|
"""Generiert kontextabhaengige Kategorie-Labels via Haiku.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
incident_context: Lage-Titel + Beschreibung
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict mit Labels fuer primary/secondary/tertiary/mentioned (oder None wenn nicht passend)
|
||||||
|
"""
|
||||||
|
if not incident_context or not incident_context.strip():
|
||||||
|
return dict(DEFAULT_CATEGORY_LABELS)
|
||||||
|
|
||||||
|
prompt = CATEGORY_LABELS_PROMPT.format(incident_context=incident_context[:500])
|
||||||
|
|
||||||
|
try:
|
||||||
|
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
parsed = None
|
||||||
|
try:
|
||||||
|
parsed = json.loads(result_text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
match = re.search(r'\{.*\}', result_text, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(match.group())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not parsed or not isinstance(parsed, dict):
|
||||||
|
logger.warning("generate_category_labels: Kein gueltiges JSON erhalten")
|
||||||
|
return dict(DEFAULT_CATEGORY_LABELS)
|
||||||
|
|
||||||
|
# Validierung: Nur erlaubte Keys, Werte muessen str oder None sein
|
||||||
|
valid_keys = {"primary", "secondary", "tertiary", "mentioned"}
|
||||||
|
labels = {}
|
||||||
|
for key in valid_keys:
|
||||||
|
val = parsed.get(key)
|
||||||
|
if val is None or val == "null":
|
||||||
|
labels[key] = None
|
||||||
|
elif isinstance(val, str) and val.strip():
|
||||||
|
labels[key] = val.strip()
|
||||||
|
else:
|
||||||
|
labels[key] = DEFAULT_CATEGORY_LABELS.get(key)
|
||||||
|
|
||||||
|
# mentioned sollte immer einen Wert haben
|
||||||
|
if not labels.get("mentioned"):
|
||||||
|
labels["mentioned"] = "Erwaehnt"
|
||||||
|
|
||||||
|
logger.info(f"Kategorie-Labels generiert: {labels}")
|
||||||
|
return labels
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"generate_category_labels fehlgeschlagen: {e}")
|
||||||
|
return dict(DEFAULT_CATEGORY_LABELS)
|
||||||
|
|
||||||
|
|
||||||
HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines.
|
HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines.
|
||||||
|
|
||||||
Kontext der Lage: "{incident_context}"
|
Kontext der Lage: "{incident_context}"
|
||||||
@@ -222,9 +306,9 @@ Regeln:
|
|||||||
- Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte)
|
- Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte)
|
||||||
|
|
||||||
Klassifiziere basierend auf dem Lage-Kontext:
|
Klassifiziere basierend auf dem Lage-Kontext:
|
||||||
- "target": Wo das Ereignis passiert / Schaden entsteht
|
- "primary": Wo das Hauptgeschehen stattfindet (z.B. Angriffsziele, Katastrophenzone, Wahlkreise)
|
||||||
- "response": Wo Reaktionen / Gegenmassnahmen stattfinden
|
- "secondary": Direkte Reaktionen oder Gegenmassnahmen (z.B. Vergeltung, Hilfsoperationen)
|
||||||
- "actor": Wo Entscheidungen getroffen werden / Entscheider sitzen
|
- "tertiary": Entscheidungstraeger, Beteiligte (z.B. wo Entscheidungen getroffen werden)
|
||||||
- "mentioned": Nur erwaehnt, kein direkter Bezug
|
- "mentioned": Nur erwaehnt, kein direkter Bezug
|
||||||
|
|
||||||
Headlines:
|
Headlines:
|
||||||
@@ -233,7 +317,7 @@ Headlines:
|
|||||||
Antwort NUR als JSON-Array, kein anderer Text:
|
Antwort NUR als JSON-Array, kein anderer Text:
|
||||||
[{{"headline_idx": 0, "locations": [
|
[{{"headline_idx": 0, "locations": [
|
||||||
{{"name": "Teheran", "normalized": "Tehran", "country_code": "IR",
|
{{"name": "Teheran", "normalized": "Tehran", "country_code": "IR",
|
||||||
"type": "city", "category": "target",
|
"type": "city", "category": "primary",
|
||||||
"lat": 35.69, "lon": 51.42}}
|
"lat": 35.69, "lon": 51.42}}
|
||||||
]}}]"""
|
]}}]"""
|
||||||
|
|
||||||
@@ -314,12 +398,19 @@ async def _extract_locations_haiku(
|
|||||||
if not name:
|
if not name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
raw_cat = loc.get("category", "mentioned")
|
||||||
|
# Alte Kategorien mappen (falls Haiku sie noch generiert)
|
||||||
|
cat_map = {"target": "primary", "response": "secondary", "retaliation": "secondary", "actor": "tertiary", "context": "tertiary"}
|
||||||
|
category = cat_map.get(raw_cat, raw_cat)
|
||||||
|
if category not in ("primary", "secondary", "tertiary", "mentioned"):
|
||||||
|
category = "mentioned"
|
||||||
|
|
||||||
article_locs.append({
|
article_locs.append({
|
||||||
"name": name,
|
"name": name,
|
||||||
"normalized": loc.get("normalized", name),
|
"normalized": loc.get("normalized", name),
|
||||||
"country_code": loc.get("country_code", ""),
|
"country_code": loc.get("country_code", ""),
|
||||||
"type": loc_type,
|
"type": loc_type,
|
||||||
"category": loc.get("category", "mentioned"),
|
"category": category,
|
||||||
"lat": loc.get("lat"),
|
"lat": loc.get("lat"),
|
||||||
"lon": loc.get("lon"),
|
"lon": loc.get("lon"),
|
||||||
})
|
})
|
||||||
@@ -333,7 +424,7 @@ async def _extract_locations_haiku(
|
|||||||
async def geoparse_articles(
|
async def geoparse_articles(
|
||||||
articles: list[dict],
|
articles: list[dict],
|
||||||
incident_context: str = "",
|
incident_context: str = "",
|
||||||
) -> dict[int, list[dict]]:
|
) -> tuple[dict[int, list[dict]], dict[str, str | None] | None]:
|
||||||
"""Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache.
|
"""Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -341,11 +432,15 @@ async def geoparse_articles(
|
|||||||
incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung
|
incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict[article_id -> list[{location_name, location_name_normalized, country_code,
|
Tuple von (dict[article_id -> list[locations]], category_labels oder None)
|
||||||
lat, lon, confidence, source_text, category}]]
|
|
||||||
"""
|
"""
|
||||||
if not articles:
|
if not articles:
|
||||||
return {}
|
return {}, None
|
||||||
|
|
||||||
|
# Labels parallel zum Geoparsing generieren (nur wenn Kontext vorhanden)
|
||||||
|
labels_task = None
|
||||||
|
if incident_context:
|
||||||
|
labels_task = asyncio.create_task(generate_category_labels(incident_context))
|
||||||
|
|
||||||
# Headlines sammeln
|
# Headlines sammeln
|
||||||
headlines = []
|
headlines = []
|
||||||
@@ -363,7 +458,13 @@ async def geoparse_articles(
|
|||||||
headlines.append({"idx": article_id, "text": headline})
|
headlines.append({"idx": article_id, "text": headline})
|
||||||
|
|
||||||
if not headlines:
|
if not headlines:
|
||||||
return {}
|
category_labels = None
|
||||||
|
if labels_task:
|
||||||
|
try:
|
||||||
|
category_labels = await labels_task
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {}, category_labels
|
||||||
|
|
||||||
# Batches bilden (max 50 Headlines pro Haiku-Call)
|
# Batches bilden (max 50 Headlines pro Haiku-Call)
|
||||||
batch_size = 50
|
batch_size = 50
|
||||||
@@ -374,7 +475,13 @@ async def geoparse_articles(
|
|||||||
all_haiku_results.update(batch_results)
|
all_haiku_results.update(batch_results)
|
||||||
|
|
||||||
if not all_haiku_results:
|
if not all_haiku_results:
|
||||||
return {}
|
category_labels = None
|
||||||
|
if labels_task:
|
||||||
|
try:
|
||||||
|
category_labels = await labels_task
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {}, category_labels
|
||||||
|
|
||||||
# Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback)
|
# Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback)
|
||||||
result = {}
|
result = {}
|
||||||
@@ -406,4 +513,12 @@ async def geoparse_articles(
|
|||||||
if locations:
|
if locations:
|
||||||
result[article_id] = locations
|
result[article_id] = locations
|
||||||
|
|
||||||
return result
|
# Category-Labels abwarten
|
||||||
|
category_labels = None
|
||||||
|
if labels_task:
|
||||||
|
try:
|
||||||
|
category_labels = await labels_task
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Category-Labels konnten nicht generiert werden: {e}")
|
||||||
|
|
||||||
|
return result, category_labels
|
||||||
|
|||||||
@@ -782,7 +782,7 @@ class AgentOrchestrator:
|
|||||||
from agents.geoparsing import geoparse_articles
|
from agents.geoparsing import geoparse_articles
|
||||||
incident_context = f"{title} - {description}"
|
incident_context = f"{title} - {description}"
|
||||||
logger.info(f"Geoparsing fuer {len(new_articles_for_analysis)} neue Artikel...")
|
logger.info(f"Geoparsing fuer {len(new_articles_for_analysis)} neue Artikel...")
|
||||||
geo_results = await geoparse_articles(new_articles_for_analysis, incident_context)
|
geo_results, category_labels = await geoparse_articles(new_articles_for_analysis, incident_context)
|
||||||
geo_count = 0
|
geo_count = 0
|
||||||
for art_id, locations in geo_results.items():
|
for art_id, locations in geo_results.items():
|
||||||
for loc in locations:
|
for loc in locations:
|
||||||
@@ -799,6 +799,15 @@ class AgentOrchestrator:
|
|||||||
if geo_count > 0:
|
if geo_count > 0:
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info(f"Geoparsing: {geo_count} Orte aus {len(geo_results)} Artikeln gespeichert")
|
logger.info(f"Geoparsing: {geo_count} Orte aus {len(geo_results)} Artikeln gespeichert")
|
||||||
|
# Category-Labels in Incident speichern (nur wenn neu generiert)
|
||||||
|
if category_labels:
|
||||||
|
import json as _json
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL",
|
||||||
|
(_json.dumps(category_labels, ensure_ascii=False), incident_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
logger.info(f"Category-Labels gespeichert fuer Incident {incident_id}: {category_labels}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
||||||
|
|
||||||
|
|||||||
1183
src/database.py
1183
src/database.py
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -338,8 +338,8 @@ async def get_locations(
|
|||||||
"source_url": row["source_url"],
|
"source_url": row["source_url"],
|
||||||
})
|
})
|
||||||
|
|
||||||
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: target > retaliation > actor > mentioned)
|
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
|
||||||
priority = {"target": 4, "retaliation": 3, "actor": 2, "mentioned": 1}
|
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
||||||
result = []
|
result = []
|
||||||
for loc in loc_map.values():
|
for loc in loc_map.values():
|
||||||
cats = loc.pop("categories")
|
cats = loc.pop("categories")
|
||||||
@@ -349,7 +349,20 @@ async def get_locations(
|
|||||||
best_cat = "mentioned"
|
best_cat = "mentioned"
|
||||||
loc["category"] = best_cat
|
loc["category"] = best_cat
|
||||||
result.append(loc)
|
result.append(loc)
|
||||||
return result
|
|
||||||
|
# Category-Labels aus Incident laden
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT category_labels FROM incidents WHERE id = ?", (incident_id,)
|
||||||
|
)
|
||||||
|
inc_row = await cursor.fetchone()
|
||||||
|
category_labels = None
|
||||||
|
if inc_row and inc_row["category_labels"]:
|
||||||
|
try:
|
||||||
|
category_labels = json.loads(inc_row["category_labels"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {"category_labels": category_labels, "locations": result}
|
||||||
|
|
||||||
|
|
||||||
# Geoparse-Status pro Incident (in-memory)
|
# Geoparse-Status pro Incident (in-memory)
|
||||||
@@ -395,8 +408,23 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
|
|||||||
processed = 0
|
processed = 0
|
||||||
for i in range(0, total, batch_size):
|
for i in range(0, total, batch_size):
|
||||||
batch = articles[i:i + batch_size]
|
batch = articles[i:i + batch_size]
|
||||||
geo_results = await geoparse_articles(batch, incident_context)
|
geo_result = await geoparse_articles(batch, incident_context)
|
||||||
for art_id, locations in geo_results.items():
|
# Tuple-Rückgabe: (locations_dict, category_labels)
|
||||||
|
if isinstance(geo_result, tuple):
|
||||||
|
batch_geo_results, batch_labels = geo_result
|
||||||
|
# Labels beim ersten Batch speichern
|
||||||
|
if batch_labels and i == 0:
|
||||||
|
try:
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL",
|
||||||
|
(json.dumps(batch_labels, ensure_ascii=False), incident_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
batch_geo_results = geo_result
|
||||||
|
for art_id, locations in batch_geo_results.items():
|
||||||
for loc in locations:
|
for loc in locations:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""INSERT INTO article_locations
|
"""INSERT INTO article_locations
|
||||||
|
|||||||
@@ -64,6 +64,14 @@ async def get_lagebild(db=Depends(db_dependency)):
|
|||||||
raise HTTPException(status_code=404, detail="Incident not found")
|
raise HTTPException(status_code=404, detail="Incident not found")
|
||||||
incident = dict(incident)
|
incident = dict(incident)
|
||||||
|
|
||||||
|
# Category-Labels laden
|
||||||
|
category_labels = None
|
||||||
|
if incident.get("category_labels"):
|
||||||
|
try:
|
||||||
|
category_labels = json.loads(incident["category_labels"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
# Alle Artikel aus allen Iran-Incidents laden
|
# Alle Artikel aus allen Iran-Incidents laden
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
f"""SELECT id, headline, headline_de, source, source_url, language,
|
f"""SELECT id, headline, headline_de, source, source_url, language,
|
||||||
@@ -148,6 +156,7 @@ async def get_lagebild(db=Depends(db_dependency)):
|
|||||||
"fact_checks": fact_checks,
|
"fact_checks": fact_checks,
|
||||||
"available_snapshots": available_snapshots,
|
"available_snapshots": available_snapshots,
|
||||||
"locations": locations,
|
"locations": locations,
|
||||||
|
"category_labels": category_labels,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,389 +1,415 @@
|
|||||||
"""Post-Refresh Quality Check via Haiku.
|
"""Post-Refresh Quality Check via Haiku.
|
||||||
|
|
||||||
Prueft nach jedem Refresh:
|
Prueft nach jedem Refresh:
|
||||||
1. Semantische Faktencheck-Duplikate (Haiku-Clustering mit Fuzzy-Vorfilter)
|
1. Semantische Faktencheck-Duplikate (Haiku-Clustering mit Fuzzy-Vorfilter)
|
||||||
2. Falsch kategorisierte Karten-Locations (Haiku bewertet Kontext der Lage)
|
2. Falsch kategorisierte Karten-Locations (Haiku bewertet Kontext der Lage)
|
||||||
|
|
||||||
Regelbasierte Listen dienen als Fallback falls Haiku fehlschlaegt.
|
Regelbasierte Listen dienen als Fallback falls Haiku fehlschlaegt.
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from difflib import SequenceMatcher
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
from agents.claude_client import call_claude
|
from agents.claude_client import call_claude
|
||||||
from config import CLAUDE_MODEL_FAST
|
from config import CLAUDE_MODEL_FAST
|
||||||
|
|
||||||
logger = logging.getLogger("osint.post_refresh_qc")
|
logger = logging.getLogger("osint.post_refresh_qc")
|
||||||
|
|
||||||
STATUS_PRIORITY = {
|
STATUS_PRIORITY = {
|
||||||
"confirmed": 5, "established": 5,
|
"confirmed": 5, "established": 5,
|
||||||
"contradicted": 4, "disputed": 4,
|
"contradicted": 4, "disputed": 4,
|
||||||
"unconfirmed": 3, "unverified": 3,
|
"unconfirmed": 3, "unverified": 3,
|
||||||
"developing": 1,
|
"developing": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# 1. Faktencheck-Duplikate
|
# 1. Faktencheck-Duplikate
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_DEDUP_PROMPT = """\
|
_DEDUP_PROMPT = """\
|
||||||
Du bist ein Deduplizierungs-Agent fuer Faktenchecks eines OSINT-Monitors.
|
Du bist ein Deduplizierungs-Agent fuer Faktenchecks eines OSINT-Monitors.
|
||||||
|
|
||||||
LAGE: {incident_title}
|
LAGE: {incident_title}
|
||||||
|
|
||||||
Unten stehen Faktenchecks (ID + Status + Claim). Finde Gruppen von Fakten,
|
Unten stehen Faktenchecks (ID + Status + Claim). Finde Gruppen von Fakten,
|
||||||
die INHALTLICH DASSELBE aussagen, auch wenn sie unterschiedlich formuliert sind.
|
die INHALTLICH DASSELBE aussagen, auch wenn sie unterschiedlich formuliert sind.
|
||||||
|
|
||||||
REGELN:
|
REGELN:
|
||||||
- Gleicher Sachverhalt = gleiche Gruppe
|
- Gleicher Sachverhalt = gleiche Gruppe
|
||||||
(z.B. "Trump fordert Kapitulation" und "US-Praesident verlangt bedingungslose Aufgabe")
|
(z.B. "Trump fordert Kapitulation" und "US-Praesident verlangt bedingungslose Aufgabe")
|
||||||
- Unterschiedliche Detailtiefe zum SELBEN Fakt = gleiche Gruppe
|
- Unterschiedliche Detailtiefe zum SELBEN Fakt = gleiche Gruppe
|
||||||
- VERSCHIEDENE Sachverhalte = VERSCHIEDENE Gruppen
|
- VERSCHIEDENE Sachverhalte = VERSCHIEDENE Gruppen
|
||||||
(z.B. "Angriff auf Isfahan" vs "Angriff auf Teheran" sind NICHT dasselbe)
|
(z.B. "Angriff auf Isfahan" vs "Angriff auf Teheran" sind NICHT dasselbe)
|
||||||
- Eine Gruppe muss mindestens 2 Eintraege haben
|
- Eine Gruppe muss mindestens 2 Eintraege haben
|
||||||
|
|
||||||
Antworte NUR als JSON-Array von Gruppen. Jede Gruppe ist ein Array von IDs:
|
Antworte NUR als JSON-Array von Gruppen. Jede Gruppe ist ein Array von IDs:
|
||||||
[[1,5,12], [3,8]]
|
[[1,5,12], [3,8]]
|
||||||
|
|
||||||
Wenn keine Duplikate: antworte mit []
|
Wenn keine Duplikate: antworte mit []
|
||||||
|
|
||||||
FAKTEN:
|
FAKTEN:
|
||||||
{facts_text}"""
|
{facts_text}"""
|
||||||
|
|
||||||
|
|
||||||
async def _haiku_find_duplicate_clusters(
|
async def _haiku_find_duplicate_clusters(
|
||||||
facts: list[dict], incident_title: str
|
facts: list[dict], incident_title: str
|
||||||
) -> list[list[int]]:
|
) -> list[list[int]]:
|
||||||
"""Fragt Haiku welche Fakten semantische Duplikate sind."""
|
"""Fragt Haiku welche Fakten semantische Duplikate sind."""
|
||||||
facts_text = "\n".join(
|
facts_text = "\n".join(
|
||||||
f'ID={f["id"]} [{f["status"]}]: {f["claim"]}'
|
f'ID={f["id"]} [{f["status"]}]: {f["claim"]}'
|
||||||
for f in facts
|
for f in facts
|
||||||
)
|
)
|
||||||
prompt = _DEDUP_PROMPT.format(
|
prompt = _DEDUP_PROMPT.format(
|
||||||
incident_title=incident_title, facts_text=facts_text
|
incident_title=incident_title, facts_text=facts_text
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
data = json.loads(result)
|
data = json.loads(result)
|
||||||
if isinstance(data, list) and all(isinstance(g, list) for g in data):
|
if isinstance(data, list) and all(isinstance(g, list) for g in data):
|
||||||
return data
|
return data
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
match = re.search(r'\[.*\]', result, re.DOTALL)
|
||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
data = json.loads(match.group())
|
data = json.loads(match.group())
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
return data
|
return data
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Haiku Duplikat-Clustering fehlgeschlagen: %s", e)
|
logger.warning("Haiku Duplikat-Clustering fehlgeschlagen: %s", e)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _fuzzy_prefilter(all_facts: list[dict], max_candidates: int = 80) -> list[dict]:
|
def _fuzzy_prefilter(all_facts: list[dict], max_candidates: int = 80) -> list[dict]:
|
||||||
"""Waehlt Kandidaten fuer Haiku-Check per Fuzzy-Vorfilter aus.
|
"""Waehlt Kandidaten fuer Haiku-Check per Fuzzy-Vorfilter aus.
|
||||||
|
|
||||||
Findet Paare mit Aehnlichkeit >= 0.60 und gibt die betroffenen Fakten zurueck.
|
Findet Paare mit Aehnlichkeit >= 0.60 und gibt die betroffenen Fakten zurueck.
|
||||||
Begrenzt auf max_candidates um Haiku-Tokens zu sparen.
|
Begrenzt auf max_candidates um Haiku-Tokens zu sparen.
|
||||||
"""
|
"""
|
||||||
from agents.factchecker import normalize_claim, _keyword_set
|
from agents.factchecker import normalize_claim, _keyword_set
|
||||||
|
|
||||||
if len(all_facts) <= max_candidates:
|
if len(all_facts) <= max_candidates:
|
||||||
return all_facts
|
return all_facts
|
||||||
|
|
||||||
normalized = []
|
normalized = []
|
||||||
for f in all_facts:
|
for f in all_facts:
|
||||||
nc = normalize_claim(f["claim"])
|
nc = normalize_claim(f["claim"])
|
||||||
kw = _keyword_set(f["claim"])
|
kw = _keyword_set(f["claim"])
|
||||||
normalized.append((f, nc, kw))
|
normalized.append((f, nc, kw))
|
||||||
|
|
||||||
candidate_ids = set()
|
candidate_ids = set()
|
||||||
recent = normalized[:60]
|
recent = normalized[:60]
|
||||||
|
|
||||||
for i, (fact_a, norm_a, kw_a) in enumerate(recent):
|
for i, (fact_a, norm_a, kw_a) in enumerate(recent):
|
||||||
for j, (fact_b, norm_b, kw_b) in enumerate(normalized):
|
for j, (fact_b, norm_b, kw_b) in enumerate(normalized):
|
||||||
if i >= j or fact_b["id"] == fact_a["id"]:
|
if i >= j or fact_b["id"] == fact_a["id"]:
|
||||||
continue
|
continue
|
||||||
if not norm_a or not norm_b:
|
if not norm_a or not norm_b:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
len_ratio = len(norm_a) / len(norm_b) if norm_b else 0
|
len_ratio = len(norm_a) / len(norm_b) if norm_b else 0
|
||||||
if len_ratio > 2.5 or len_ratio < 0.4:
|
if len_ratio > 2.5 or len_ratio < 0.4:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
seq_ratio = SequenceMatcher(None, norm_a, norm_b).ratio()
|
seq_ratio = SequenceMatcher(None, norm_a, norm_b).ratio()
|
||||||
kw_union = kw_a | kw_b
|
kw_union = kw_a | kw_b
|
||||||
jaccard = len(kw_a & kw_b) / len(kw_union) if kw_union else 0.0
|
jaccard = len(kw_a & kw_b) / len(kw_union) if kw_union else 0.0
|
||||||
combined = 0.7 * seq_ratio + 0.3 * jaccard
|
combined = 0.7 * seq_ratio + 0.3 * jaccard
|
||||||
|
|
||||||
if combined >= 0.60:
|
if combined >= 0.60:
|
||||||
candidate_ids.add(fact_a["id"])
|
candidate_ids.add(fact_a["id"])
|
||||||
candidate_ids.add(fact_b["id"])
|
candidate_ids.add(fact_b["id"])
|
||||||
|
|
||||||
if len(candidate_ids) >= max_candidates:
|
if len(candidate_ids) >= max_candidates:
|
||||||
break
|
break
|
||||||
if len(candidate_ids) >= max_candidates:
|
if len(candidate_ids) >= max_candidates:
|
||||||
break
|
break
|
||||||
|
|
||||||
candidates = [f for f in all_facts if f["id"] in candidate_ids]
|
candidates = [f for f in all_facts if f["id"] in candidate_ids]
|
||||||
logger.info(
|
logger.info(
|
||||||
"Fuzzy-Vorfilter: %d/%d Fakten als Duplikat-Kandidaten identifiziert",
|
"Fuzzy-Vorfilter: %d/%d Fakten als Duplikat-Kandidaten identifiziert",
|
||||||
len(candidates), len(all_facts),
|
len(candidates), len(all_facts),
|
||||||
)
|
)
|
||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
async def check_fact_duplicates(db, incident_id: int, incident_title: str) -> int:
|
async def check_fact_duplicates(db, incident_id: int, incident_title: str) -> int:
|
||||||
"""Prueft auf semantische Faktencheck-Duplikate via Haiku.
|
"""Prueft auf semantische Faktencheck-Duplikate via Haiku.
|
||||||
|
|
||||||
1. Fuzzy-Vorfilter reduziert auf relevante Kandidaten
|
1. Fuzzy-Vorfilter reduziert auf relevante Kandidaten
|
||||||
2. Haiku clustert semantische Duplikate
|
2. Haiku clustert semantische Duplikate
|
||||||
3. Pro Cluster: behalte besten Fakt, loesche Rest
|
3. Pro Cluster: behalte besten Fakt, loesche Rest
|
||||||
|
|
||||||
Returns: Anzahl entfernter Duplikate.
|
Returns: Anzahl entfernter Duplikate.
|
||||||
"""
|
"""
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT id, claim, status, sources_count, evidence, checked_at "
|
"SELECT id, claim, status, sources_count, evidence, checked_at "
|
||||||
"FROM fact_checks WHERE incident_id = ? ORDER BY checked_at DESC",
|
"FROM fact_checks WHERE incident_id = ? ORDER BY checked_at DESC",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
)
|
)
|
||||||
all_facts = [dict(row) for row in await cursor.fetchall()]
|
all_facts = [dict(row) for row in await cursor.fetchall()]
|
||||||
|
|
||||||
if len(all_facts) < 2:
|
if len(all_facts) < 2:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Schritt 1: Fuzzy-Vorfilter
|
# Schritt 1: Fuzzy-Vorfilter
|
||||||
candidates = _fuzzy_prefilter(all_facts)
|
candidates = _fuzzy_prefilter(all_facts)
|
||||||
if len(candidates) < 2:
|
if len(candidates) < 2:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Schritt 2: Haiku-Clustering (in Batches von max 80)
|
# Schritt 2: Haiku-Clustering (in Batches von max 80)
|
||||||
all_clusters = []
|
all_clusters = []
|
||||||
batch_size = 80
|
batch_size = 80
|
||||||
for i in range(0, len(candidates), batch_size):
|
for i in range(0, len(candidates), batch_size):
|
||||||
batch = candidates[i:i + batch_size]
|
batch = candidates[i:i + batch_size]
|
||||||
clusters = await _haiku_find_duplicate_clusters(batch, incident_title)
|
clusters = await _haiku_find_duplicate_clusters(batch, incident_title)
|
||||||
all_clusters.extend(clusters)
|
all_clusters.extend(clusters)
|
||||||
|
|
||||||
if not all_clusters:
|
if not all_clusters:
|
||||||
logger.info("QC Fakten: Haiku fand keine Duplikate")
|
logger.info("QC Fakten: Haiku fand keine Duplikate")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Schritt 3: Pro Cluster besten behalten, Rest loeschen
|
# Schritt 3: Pro Cluster besten behalten, Rest loeschen
|
||||||
facts_by_id = {f["id"]: f for f in all_facts}
|
facts_by_id = {f["id"]: f for f in all_facts}
|
||||||
ids_to_delete = set()
|
ids_to_delete = set()
|
||||||
|
|
||||||
for cluster_ids in all_clusters:
|
for cluster_ids in all_clusters:
|
||||||
valid_ids = [cid for cid in cluster_ids if cid in facts_by_id]
|
valid_ids = [cid for cid in cluster_ids if cid in facts_by_id]
|
||||||
if len(valid_ids) <= 1:
|
if len(valid_ids) <= 1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cluster_facts = [facts_by_id[cid] for cid in valid_ids]
|
cluster_facts = [facts_by_id[cid] for cid in valid_ids]
|
||||||
best = max(cluster_facts, key=lambda f: (
|
best = max(cluster_facts, key=lambda f: (
|
||||||
STATUS_PRIORITY.get(f["status"], 0),
|
STATUS_PRIORITY.get(f["status"], 0),
|
||||||
f.get("sources_count", 0),
|
f.get("sources_count", 0),
|
||||||
f.get("checked_at", ""),
|
f.get("checked_at", ""),
|
||||||
))
|
))
|
||||||
|
|
||||||
for fact in cluster_facts:
|
for fact in cluster_facts:
|
||||||
if fact["id"] != best["id"]:
|
if fact["id"] != best["id"]:
|
||||||
ids_to_delete.add(fact["id"])
|
ids_to_delete.add(fact["id"])
|
||||||
logger.info(
|
logger.info(
|
||||||
"QC Duplikat: ID %d entfernt, behalte ID %d ('%s')",
|
"QC Duplikat: ID %d entfernt, behalte ID %d ('%s')",
|
||||||
fact["id"], best["id"], best["claim"][:60],
|
fact["id"], best["id"], best["claim"][:60],
|
||||||
)
|
)
|
||||||
|
|
||||||
if ids_to_delete:
|
if ids_to_delete:
|
||||||
placeholders = ",".join("?" * len(ids_to_delete))
|
placeholders = ",".join("?" * len(ids_to_delete))
|
||||||
await db.execute(
|
await db.execute(
|
||||||
f"DELETE FROM fact_checks WHERE id IN ({placeholders})",
|
f"DELETE FROM fact_checks WHERE id IN ({placeholders})",
|
||||||
list(ids_to_delete),
|
list(ids_to_delete),
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"QC: %d Faktencheck-Duplikate entfernt fuer Incident %d",
|
"QC: %d Faktencheck-Duplikate entfernt fuer Incident %d",
|
||||||
len(ids_to_delete), incident_id,
|
len(ids_to_delete), incident_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
return len(ids_to_delete)
|
return len(ids_to_delete)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# 2. Karten-Location-Kategorien
|
# 2. Karten-Location-Kategorien
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_LOCATION_PROMPT = """\
|
_LOCATION_PROMPT = """\
|
||||||
Du bist ein Geopolitik-Experte fuer einen OSINT-Monitor.
|
Du bist ein Geopolitik-Experte fuer einen OSINT-Monitor.
|
||||||
|
|
||||||
LAGE: {incident_title}
|
LAGE: {incident_title}
|
||||||
BESCHREIBUNG: {incident_desc}
|
BESCHREIBUNG: {incident_desc}
|
||||||
|
{labels_context}
|
||||||
Unten stehen Orte, die auf der Karte als "target" (Angriffsziel) markiert sind.
|
Unten stehen Orte, die auf der Karte als "primary" (Hauptgeschehen) markiert sind.
|
||||||
Pruefe fuer jeden Ort, ob die Kategorie "target" korrekt ist.
|
Pruefe fuer jeden Ort, ob die Kategorie "primary" korrekt ist.
|
||||||
|
|
||||||
KATEGORIEN:
|
KATEGORIEN:
|
||||||
- target: Ort wurde tatsaechlich militaerisch angegriffen oder bombardiert
|
- primary: {label_primary} — Wo das Hauptgeschehen stattfindet
|
||||||
- actor: Ort gehoert zu einer Konfliktpartei (z.B. Hauptstadt des Angreifers)
|
- secondary: {label_secondary} — Direkte Reaktionen/Gegenmassnahmen
|
||||||
- response: Ort reagiert auf den Konflikt (z.B. diplomatische Reaktion, Sanktionen)
|
- tertiary: {label_tertiary} — Entscheidungstraeger/Beteiligte
|
||||||
- mentioned: Ort wird nur im Kontext erwaehnt (z.B. wirtschaftliche Auswirkungen)
|
- mentioned: {label_mentioned} — Nur erwaehnt
|
||||||
|
|
||||||
REGELN:
|
REGELN:
|
||||||
- Nur Orte die TATSAECHLICH physisch angegriffen/bombardiert wurden = "target"
|
- Nur Orte die DIREKT vom Hauptgeschehen betroffen sind = "primary"
|
||||||
- Hauptstaedte von Angreiferlaendern (z.B. Washington DC) = "actor"
|
- Orte mit Reaktionen/Gegenmassnahmen = "secondary"
|
||||||
- Laender die nur wirtschaftlich betroffen sind (z.B. steigende Oelpreise) = "mentioned"
|
- Orte von Entscheidungstraegern (z.B. Hauptstaedte) = "tertiary"
|
||||||
- Laender die diplomatisch reagieren = "response"
|
- Nur erwaehnte Orte = "mentioned"
|
||||||
- Im Zweifel: "mentioned"
|
- Im Zweifel: "mentioned"
|
||||||
|
|
||||||
Antworte als JSON-Array mit Korrekturen. Nur Eintraege die GEAENDERT werden muessen:
|
Antworte als JSON-Array mit Korrekturen. Nur Eintraege die GEAENDERT werden muessen:
|
||||||
[{{"id": 123, "category": "mentioned"}}, {{"id": 456, "category": "actor"}}]
|
[{{"id": 123, "category": "mentioned"}}, {{"id": 456, "category": "tertiary"}}]
|
||||||
|
|
||||||
Wenn alle Kategorien korrekt sind: antworte mit []
|
Wenn alle Kategorien korrekt sind: antworte mit []
|
||||||
|
|
||||||
ORTE (aktuell alle als "target" markiert):
|
ORTE (aktuell alle als "primary" markiert):
|
||||||
{locations_text}"""
|
{locations_text}"""
|
||||||
|
|
||||||
|
|
||||||
async def check_location_categories(
|
async def check_location_categories(
|
||||||
db, incident_id: int, incident_title: str, incident_desc: str
|
db, incident_id: int, incident_title: str, incident_desc: str
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Prueft Karten-Location-Kategorien via Haiku.
|
"""Prueft Karten-Location-Kategorien via Haiku.
|
||||||
|
|
||||||
Returns: Anzahl korrigierter Eintraege.
|
Returns: Anzahl korrigierter Eintraege.
|
||||||
"""
|
"""
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT id, location_name, latitude, longitude, category "
|
"SELECT id, location_name, latitude, longitude, category "
|
||||||
"FROM article_locations WHERE incident_id = ? AND category = 'target'",
|
"FROM article_locations WHERE incident_id = ? AND category = 'primary'",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
)
|
)
|
||||||
targets = [dict(row) for row in await cursor.fetchall()]
|
targets = [dict(row) for row in await cursor.fetchall()]
|
||||||
|
|
||||||
if not targets:
|
if not targets:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Dedupliziere nach location_name fuer den Prompt (spart Tokens)
|
# Category-Labels aus DB laden (fuer kontextabhaengige Prompt-Beschreibungen)
|
||||||
unique_names = {}
|
cursor = await db.execute(
|
||||||
ids_by_name = {}
|
"SELECT category_labels FROM incidents WHERE id = ?", (incident_id,)
|
||||||
for loc in targets:
|
)
|
||||||
name = loc["location_name"]
|
inc_row = await cursor.fetchone()
|
||||||
if name not in unique_names:
|
labels = {}
|
||||||
unique_names[name] = loc
|
if inc_row and inc_row["category_labels"]:
|
||||||
ids_by_name[name] = []
|
try:
|
||||||
ids_by_name[name].append(loc["id"])
|
labels = json.loads(inc_row["category_labels"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
locations_text = "\n".join(
|
pass
|
||||||
f'ID={loc["id"]} | {loc["location_name"]} ({loc["latitude"]:.2f}, {loc["longitude"]:.2f})'
|
|
||||||
for loc in unique_names.values()
|
label_primary = labels.get("primary") or "Hauptgeschehen"
|
||||||
)
|
label_secondary = labels.get("secondary") or "Reaktionen"
|
||||||
|
label_tertiary = labels.get("tertiary") or "Beteiligte"
|
||||||
prompt = _LOCATION_PROMPT.format(
|
label_mentioned = labels.get("mentioned") or "Erwaehnt"
|
||||||
incident_title=incident_title,
|
|
||||||
incident_desc=incident_desc[:500] if incident_desc else "(keine Beschreibung)",
|
labels_context = ""
|
||||||
locations_text=locations_text,
|
if labels:
|
||||||
)
|
labels_context = f"KATEGORIE-LABELS: primary={label_primary}, secondary={label_secondary}, tertiary={label_tertiary}, mentioned={label_mentioned}\n"
|
||||||
|
|
||||||
fixes = []
|
# Dedupliziere nach location_name fuer den Prompt (spart Tokens)
|
||||||
try:
|
unique_names = {}
|
||||||
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
ids_by_name = {}
|
||||||
data = json.loads(result)
|
for loc in targets:
|
||||||
if isinstance(data, list):
|
name = loc["location_name"]
|
||||||
fixes = data
|
if name not in unique_names:
|
||||||
except json.JSONDecodeError:
|
unique_names[name] = loc
|
||||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
ids_by_name[name] = []
|
||||||
if match:
|
ids_by_name[name].append(loc["id"])
|
||||||
try:
|
|
||||||
data = json.loads(match.group())
|
locations_text = "\n".join(
|
||||||
if isinstance(data, list):
|
f'ID={loc["id"]} | {loc["location_name"]} ({loc["latitude"]:.2f}, {loc["longitude"]:.2f})'
|
||||||
fixes = data
|
for loc in unique_names.values()
|
||||||
except json.JSONDecodeError:
|
)
|
||||||
pass
|
|
||||||
except Exception as e:
|
prompt = _LOCATION_PROMPT.format(
|
||||||
logger.warning("Haiku Location-Check fehlgeschlagen: %s", e)
|
incident_title=incident_title,
|
||||||
return 0
|
incident_desc=incident_desc[:500] if incident_desc else "(keine Beschreibung)",
|
||||||
|
labels_context=labels_context,
|
||||||
if not fixes:
|
label_primary=label_primary,
|
||||||
logger.info("QC Locations: Haiku fand keine falschen Kategorien")
|
label_secondary=label_secondary,
|
||||||
return 0
|
label_tertiary=label_tertiary,
|
||||||
|
label_mentioned=label_mentioned,
|
||||||
# Korrekturen anwenden (auch auf alle IDs mit gleichem Namen)
|
locations_text=locations_text,
|
||||||
total_fixed = 0
|
)
|
||||||
representative_ids = {loc["id"]: name for name, loc in unique_names.items()}
|
|
||||||
|
fixes = []
|
||||||
for fix in fixes:
|
try:
|
||||||
fix_id = fix.get("id")
|
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
new_cat = fix.get("category")
|
data = json.loads(result)
|
||||||
if not fix_id or not new_cat:
|
if isinstance(data, list):
|
||||||
continue
|
fixes = data
|
||||||
if new_cat not in ("target", "actor", "response", "mentioned"):
|
except json.JSONDecodeError:
|
||||||
continue
|
match = re.search(r'\[.*\]', result, re.DOTALL)
|
||||||
|
if match:
|
||||||
# Finde den location_name fuer diese ID
|
try:
|
||||||
loc_name = representative_ids.get(fix_id)
|
data = json.loads(match.group())
|
||||||
if not loc_name:
|
if isinstance(data, list):
|
||||||
continue
|
fixes = data
|
||||||
|
except json.JSONDecodeError:
|
||||||
# Korrigiere ALLE Eintraege mit diesem Namen
|
pass
|
||||||
all_ids = ids_by_name.get(loc_name, [fix_id])
|
except Exception as e:
|
||||||
placeholders = ",".join("?" * len(all_ids))
|
logger.warning("Haiku Location-Check fehlgeschlagen: %s", e)
|
||||||
await db.execute(
|
return 0
|
||||||
f"UPDATE article_locations SET category = ? "
|
|
||||||
f"WHERE id IN ({placeholders}) AND category = 'target'",
|
if not fixes:
|
||||||
[new_cat] + all_ids,
|
logger.info("QC Locations: Haiku fand keine falschen Kategorien")
|
||||||
)
|
return 0
|
||||||
total_fixed += len(all_ids)
|
|
||||||
logger.info(
|
# Korrekturen anwenden (auch auf alle IDs mit gleichem Namen)
|
||||||
"QC Location: '%s' (%d Eintraege): target -> %s",
|
total_fixed = 0
|
||||||
loc_name, len(all_ids), new_cat,
|
representative_ids = {loc["id"]: name for name, loc in unique_names.items()}
|
||||||
)
|
|
||||||
|
for fix in fixes:
|
||||||
if total_fixed > 0:
|
fix_id = fix.get("id")
|
||||||
logger.info(
|
new_cat = fix.get("category")
|
||||||
"QC: %d Karten-Location-Kategorien korrigiert fuer Incident %d",
|
if not fix_id or not new_cat:
|
||||||
total_fixed, incident_id,
|
continue
|
||||||
)
|
if new_cat not in ("primary", "secondary", "tertiary", "mentioned"):
|
||||||
|
continue
|
||||||
return total_fixed
|
|
||||||
|
# Finde den location_name fuer diese ID
|
||||||
|
loc_name = representative_ids.get(fix_id)
|
||||||
# ---------------------------------------------------------------------------
|
if not loc_name:
|
||||||
# 3. Hauptfunktion
|
continue
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
# Korrigiere ALLE Eintraege mit diesem Namen
|
||||||
async def run_post_refresh_qc(db, incident_id: int) -> dict:
|
all_ids = ids_by_name.get(loc_name, [fix_id])
|
||||||
"""Fuehrt den kompletten Post-Refresh Quality Check via Haiku durch.
|
placeholders = ",".join("?" * len(all_ids))
|
||||||
|
await db.execute(
|
||||||
Returns: Dict mit Ergebnissen {facts_removed, locations_fixed}.
|
f"UPDATE article_locations SET category = ? "
|
||||||
"""
|
f"WHERE id IN ({placeholders}) AND category = 'primary'",
|
||||||
try:
|
[new_cat] + all_ids,
|
||||||
# Lage-Titel und Beschreibung laden
|
)
|
||||||
cursor = await db.execute(
|
total_fixed += len(all_ids)
|
||||||
"SELECT title, description FROM incidents WHERE id = ?",
|
logger.info(
|
||||||
(incident_id,),
|
"QC Location: '%s' (%d Eintraege): primary -> %s",
|
||||||
)
|
loc_name, len(all_ids), new_cat,
|
||||||
row = await cursor.fetchone()
|
)
|
||||||
if not row:
|
|
||||||
return {"facts_removed": 0, "locations_fixed": 0}
|
if total_fixed > 0:
|
||||||
|
logger.info(
|
||||||
incident_title = row["title"] or ""
|
"QC: %d Karten-Location-Kategorien korrigiert fuer Incident %d",
|
||||||
incident_desc = row["description"] or ""
|
total_fixed, incident_id,
|
||||||
|
)
|
||||||
facts_removed = await check_fact_duplicates(db, incident_id, incident_title)
|
|
||||||
locations_fixed = await check_location_categories(
|
return total_fixed
|
||||||
db, incident_id, incident_title, incident_desc
|
|
||||||
)
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
if facts_removed > 0 or locations_fixed > 0:
|
# Hauptfunktion
|
||||||
await db.commit()
|
# ---------------------------------------------------------------------------
|
||||||
logger.info(
|
|
||||||
"Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert",
|
async def run_post_refresh_qc(db, incident_id: int) -> dict:
|
||||||
incident_id, facts_removed, locations_fixed,
|
"""Fuehrt den kompletten Post-Refresh Quality Check via Haiku durch.
|
||||||
)
|
|
||||||
|
Returns: Dict mit Ergebnissen {facts_removed, locations_fixed}.
|
||||||
return {"facts_removed": facts_removed, "locations_fixed": locations_fixed}
|
"""
|
||||||
|
try:
|
||||||
except Exception as e:
|
# Lage-Titel und Beschreibung laden
|
||||||
logger.error(
|
cursor = await db.execute(
|
||||||
"Post-Refresh QC Fehler fuer Incident %d: %s",
|
"SELECT title, description FROM incidents WHERE id = ?",
|
||||||
incident_id, e, exc_info=True,
|
(incident_id,),
|
||||||
)
|
)
|
||||||
return {"facts_removed": 0, "locations_fixed": 0, "error": str(e)}
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
return {"facts_removed": 0, "locations_fixed": 0}
|
||||||
|
|
||||||
|
incident_title = row["title"] or ""
|
||||||
|
incident_desc = row["description"] or ""
|
||||||
|
|
||||||
|
facts_removed = await check_fact_duplicates(db, incident_id, incident_title)
|
||||||
|
locations_fixed = await check_location_categories(
|
||||||
|
db, incident_id, incident_title, incident_desc
|
||||||
|
)
|
||||||
|
|
||||||
|
if facts_removed > 0 or locations_fixed > 0:
|
||||||
|
await db.commit()
|
||||||
|
logger.info(
|
||||||
|
"Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert",
|
||||||
|
incident_id, facts_removed, locations_fixed,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"facts_removed": facts_removed, "locations_fixed": locations_fixed}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Post-Refresh QC Fehler fuer Incident %d: %s",
|
||||||
|
incident_id, e, exc_info=True,
|
||||||
|
)
|
||||||
|
return {"facts_removed": 0, "locations_fixed": 0, "error": str(e)}
|
||||||
|
|||||||
@@ -698,7 +698,7 @@ const App = {
|
|||||||
|
|
||||||
async loadIncidentDetail(id) {
|
async loadIncidentDetail(id) {
|
||||||
try {
|
try {
|
||||||
const [incident, articles, factchecks, snapshots, locations] = await Promise.all([
|
const [incident, articles, factchecks, snapshots, locationsResponse] = await Promise.all([
|
||||||
API.getIncident(id),
|
API.getIncident(id),
|
||||||
API.getArticles(id),
|
API.getArticles(id),
|
||||||
API.getFactChecks(id),
|
API.getFactChecks(id),
|
||||||
@@ -706,14 +706,27 @@ const App = {
|
|||||||
API.getLocations(id).catch(() => []),
|
API.getLocations(id).catch(() => []),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations);
|
// Locations-API gibt jetzt {category_labels, locations} oder Array (Rückwärtskompatibel)
|
||||||
|
let locations, categoryLabels;
|
||||||
|
if (Array.isArray(locationsResponse)) {
|
||||||
|
locations = locationsResponse;
|
||||||
|
categoryLabels = null;
|
||||||
|
} else if (locationsResponse && locationsResponse.locations) {
|
||||||
|
locations = locationsResponse.locations;
|
||||||
|
categoryLabels = locationsResponse.category_labels || null;
|
||||||
|
} else {
|
||||||
|
locations = [];
|
||||||
|
categoryLabels = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('loadIncidentDetail Fehler:', err);
|
console.error('loadIncidentDetail Fehler:', err);
|
||||||
UI.showToast('Fehler beim Laden: ' + err.message, 'error');
|
UI.showToast('Fehler beim Laden: ' + err.message, 'error');
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
renderIncidentDetail(incident, articles, factchecks, snapshots, locations) {
|
renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels) {
|
||||||
// Header Strip
|
// Header Strip
|
||||||
{ const _e = document.getElementById('incident-title'); if (_e) _e.textContent = incident.title; }
|
{ const _e = document.getElementById('incident-title'); if (_e) _e.textContent = incident.title; }
|
||||||
{ const _e = document.getElementById('incident-description'); if (_e) _e.textContent = incident.description || ''; }
|
{ const _e = document.getElementById('incident-description'); if (_e) _e.textContent = incident.description || ''; }
|
||||||
@@ -845,7 +858,7 @@ const App = {
|
|||||||
this._resizeTimelineTile();
|
this._resizeTimelineTile();
|
||||||
|
|
||||||
// Karte rendern
|
// Karte rendern
|
||||||
UI.renderMap(locations || []);
|
UI.renderMap(locations || [], categoryLabels);
|
||||||
},
|
},
|
||||||
|
|
||||||
_collectEntries(filterType, searchTerm, range) {
|
_collectEntries(filterType, searchTerm, range) {
|
||||||
@@ -1617,8 +1630,12 @@ const App = {
|
|||||||
if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
|
if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
|
||||||
if (st.status === 'done' && st.locations > 0) {
|
if (st.status === 'done' && st.locations > 0) {
|
||||||
UI.showToast(`${st.locations} Orte aus ${st.processed} Artikeln erkannt`, 'success');
|
UI.showToast(`${st.locations} Orte aus ${st.processed} Artikeln erkannt`, 'success');
|
||||||
const locations = await API.getLocations(incidentId).catch(() => []);
|
const locResp = await API.getLocations(incidentId).catch(() => []);
|
||||||
UI.renderMap(locations);
|
let locs, catLabels;
|
||||||
|
if (Array.isArray(locResp)) { locs = locResp; catLabels = null; }
|
||||||
|
else if (locResp && locResp.locations) { locs = locResp.locations; catLabels = locResp.category_labels || null; }
|
||||||
|
else { locs = []; catLabels = null; }
|
||||||
|
UI.renderMap(locs, catLabels);
|
||||||
} else if (st.status === 'done') {
|
} else if (st.status === 'done') {
|
||||||
UI.showToast('Keine neuen Orte gefunden', 'info');
|
UI.showToast('Keine neuen Orte gefunden', 'info');
|
||||||
} else if (st.status === 'error') {
|
} else if (st.status === 'error') {
|
||||||
|
|||||||
@@ -639,30 +639,29 @@ const UI = {
|
|||||||
_initMarkerIcons() {
|
_initMarkerIcons() {
|
||||||
if (this._markerIcons || typeof L === 'undefined') return;
|
if (this._markerIcons || typeof L === 'undefined') return;
|
||||||
this._markerIcons = {
|
this._markerIcons = {
|
||||||
target: this._createSvgIcon('#dc3545', '#a71d2a'),
|
primary: this._createSvgIcon('#dc3545', '#a71d2a'),
|
||||||
retaliation: this._createSvgIcon('#f39c12', '#c47d0a'),
|
secondary: this._createSvgIcon('#f39c12', '#c47d0a'),
|
||||||
response: this._createSvgIcon('#f39c12', '#c47d0a'),
|
tertiary: this._createSvgIcon('#2a81cb', '#1a5c8f'),
|
||||||
actor: this._createSvgIcon('#2a81cb', '#1a5c8f'),
|
|
||||||
mentioned: this._createSvgIcon('#7b7b7b', '#555555'),
|
mentioned: this._createSvgIcon('#7b7b7b', '#555555'),
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
|
|
||||||
_categoryLabels: {
|
_defaultCategoryLabels: {
|
||||||
target: 'Angegriffene Ziele',
|
primary: 'Hauptgeschehen',
|
||||||
retaliation: 'Vergeltung / Eskalation',
|
secondary: 'Reaktionen',
|
||||||
response: 'Reaktion / Gegenmassnahmen',
|
tertiary: 'Beteiligte',
|
||||||
actor: 'Strategische Akteure',
|
|
||||||
mentioned: 'Erwaehnt',
|
mentioned: 'Erwaehnt',
|
||||||
},
|
},
|
||||||
_categoryColors: {
|
_categoryColors: {
|
||||||
target: '#cb2b3e',
|
primary: '#cb2b3e',
|
||||||
retaliation: '#f39c12',
|
secondary: '#f39c12',
|
||||||
response: '#f39c12',
|
tertiary: '#2a81cb',
|
||||||
actor: '#2a81cb',
|
|
||||||
mentioned: '#7b7b7b',
|
mentioned: '#7b7b7b',
|
||||||
},
|
},
|
||||||
|
|
||||||
renderMap(locations) {
|
_activeCategoryLabels: null,
|
||||||
|
|
||||||
|
renderMap(locations, categoryLabels) {
|
||||||
const container = document.getElementById('map-container');
|
const container = document.getElementById('map-container');
|
||||||
const emptyEl = document.getElementById('map-empty');
|
const emptyEl = document.getElementById('map-empty');
|
||||||
const statsEl = document.getElementById('map-stats');
|
const statsEl = document.getElementById('map-stats');
|
||||||
@@ -741,6 +740,9 @@ const UI = {
|
|||||||
// Marker hinzufuegen
|
// Marker hinzufuegen
|
||||||
const bounds = [];
|
const bounds = [];
|
||||||
this._initMarkerIcons();
|
this._initMarkerIcons();
|
||||||
|
// Dynamische Labels verwenden (API > Default)
|
||||||
|
const catLabels = categoryLabels || this._activeCategoryLabels || this._defaultCategoryLabels;
|
||||||
|
this._activeCategoryLabels = catLabels;
|
||||||
const usedCategories = new Set();
|
const usedCategories = new Set();
|
||||||
|
|
||||||
locations.forEach(loc => {
|
locations.forEach(loc => {
|
||||||
@@ -751,7 +753,7 @@ const UI = {
|
|||||||
const marker = L.marker([loc.lat, loc.lon], markerOpts);
|
const marker = L.marker([loc.lat, loc.lon], markerOpts);
|
||||||
|
|
||||||
// Popup-Inhalt
|
// Popup-Inhalt
|
||||||
const catLabel = this._categoryLabels[cat] || cat;
|
const catLabel = catLabels[cat] || this._defaultCategoryLabels[cat] || cat;
|
||||||
const catColor = this._categoryColors[cat] || '#7b7b7b';
|
const catColor = this._categoryColors[cat] || '#7b7b7b';
|
||||||
let popupHtml = `<div class="map-popup">`;
|
let popupHtml = `<div class="map-popup">`;
|
||||||
popupHtml += `<div class="map-popup-title">${this.escape(loc.location_name)}`;
|
popupHtml += `<div class="map-popup-title">${this.escape(loc.location_name)}`;
|
||||||
@@ -798,12 +800,13 @@ const UI = {
|
|||||||
|
|
||||||
const legend = L.control({ position: 'bottomright' });
|
const legend = L.control({ position: 'bottomright' });
|
||||||
const self2 = this;
|
const self2 = this;
|
||||||
|
const legendLabels = catLabels;
|
||||||
legend.onAdd = function() {
|
legend.onAdd = function() {
|
||||||
const div = L.DomUtil.create('div', 'map-legend-ctrl');
|
const div = L.DomUtil.create('div', 'map-legend-ctrl');
|
||||||
let html = '<strong style="display:block;margin-bottom:6px;">Legende</strong>';
|
let html = '<strong style="display:block;margin-bottom:6px;">Legende</strong>';
|
||||||
['target', 'retaliation', 'response', 'actor', 'mentioned'].forEach(cat => {
|
['primary', 'secondary', 'tertiary', 'mentioned'].forEach(cat => {
|
||||||
if (usedCategories.has(cat)) {
|
if (usedCategories.has(cat) && legendLabels[cat]) {
|
||||||
html += `<div style="display:flex;align-items:center;gap:6px;margin:3px 0;"><span style="width:10px;height:10px;border-radius:50%;background:${self2._categoryColors[cat]};flex-shrink:0;"></span><span>${self2._categoryLabels[cat]}</span></div>`;
|
html += `<div style="display:flex;align-items:center;gap:6px;margin:3px 0;"><span style="width:10px;height:10px;border-radius:50%;background:${self2._categoryColors[cat]};flex-shrink:0;"></span><span>${legendLabels[cat]}</span></div>`;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
div.innerHTML = html;
|
div.innerHTML = html;
|
||||||
@@ -853,7 +856,7 @@ const UI = {
|
|||||||
if (this._pendingLocations && typeof L !== 'undefined') {
|
if (this._pendingLocations && typeof L !== 'undefined') {
|
||||||
const locs = this._pendingLocations;
|
const locs = this._pendingLocations;
|
||||||
this._pendingLocations = null;
|
this._pendingLocations = null;
|
||||||
this.renderMap(locs);
|
this.renderMap(locs, this._activeCategoryLabels);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren