feat: Kontextabhängige Karten-Kategorien
4 feste Farbstufen (primary/secondary/tertiary/mentioned) mit variablen Labels pro Lage, die von Haiku generiert werden. - DB: category_labels Spalte in incidents, alte Kategorien migriert (target->primary, response/retaliation->secondary, actor->tertiary) - Geoparsing: generate_category_labels() + neuer Prompt mit neuen Keys - QC: Kategorieprüfung auf neue Keys umgestellt - Orchestrator: Tuple-Rückgabe + Labels in DB speichern - API: category_labels im Locations- und Lagebild-Response - Frontend: Dynamische Legende aus API-Labels mit Fallback-Defaults - Migrationsskript für bestehende Lagen Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -209,6 +209,90 @@ def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[
|
||||
return result
|
||||
|
||||
|
||||
# Default-Labels (Fallback wenn Haiku keine generiert)
|
||||
DEFAULT_CATEGORY_LABELS = {
|
||||
"primary": "Hauptgeschehen",
|
||||
"secondary": "Reaktionen",
|
||||
"tertiary": "Beteiligte",
|
||||
"mentioned": "Erwaehnt",
|
||||
}
|
||||
|
||||
CATEGORY_LABELS_PROMPT = """Generiere kurze, praegnante Kategorie-Labels fuer Karten-Pins zu dieser Nachrichtenlage.
|
||||
|
||||
Lage: "{incident_context}"
|
||||
|
||||
Es gibt 4 Farbstufen fuer Orte auf der Karte:
|
||||
1. primary (Rot): Wo das Hauptgeschehen stattfindet
|
||||
2. secondary (Orange): Direkte Reaktionen/Gegenmassnahmen
|
||||
3. tertiary (Blau): Entscheidungstraeger/Beteiligte
|
||||
4. mentioned (Grau): Nur erwaehnt
|
||||
|
||||
Generiere fuer jede Stufe ein kurzes Label (1-3 Woerter), das zum Thema passt.
|
||||
Wenn eine Stufe fuer dieses Thema nicht sinnvoll ist, setze null.
|
||||
|
||||
Beispiele:
|
||||
- Militaerkonflikt Iran: {{"primary": "Kampfschauplätze", "secondary": "Vergeltungsschläge", "tertiary": "Strategische Akteure", "mentioned": "Erwähnt"}}
|
||||
- Erdbeben Tuerkei: {{"primary": "Katastrophenzone", "secondary": "Hilfsoperationen", "tertiary": "Geberländer", "mentioned": "Erwähnt"}}
|
||||
- Bundestagswahl: {{"primary": "Wahlkreise", "secondary": "Koalitionspartner", "tertiary": "Internationale Reaktionen", "mentioned": "Erwähnt"}}
|
||||
|
||||
Antworte NUR als JSON-Objekt:"""
|
||||
|
||||
|
||||
async def generate_category_labels(incident_context: str) -> dict[str, str | None]:
|
||||
"""Generiert kontextabhaengige Kategorie-Labels via Haiku.
|
||||
|
||||
Args:
|
||||
incident_context: Lage-Titel + Beschreibung
|
||||
|
||||
Returns:
|
||||
Dict mit Labels fuer primary/secondary/tertiary/mentioned (oder None wenn nicht passend)
|
||||
"""
|
||||
if not incident_context or not incident_context.strip():
|
||||
return dict(DEFAULT_CATEGORY_LABELS)
|
||||
|
||||
prompt = CATEGORY_LABELS_PROMPT.format(incident_context=incident_context[:500])
|
||||
|
||||
try:
|
||||
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
parsed = None
|
||||
try:
|
||||
parsed = json.loads(result_text)
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\{.*\}', result_text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
parsed = json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if not parsed or not isinstance(parsed, dict):
|
||||
logger.warning("generate_category_labels: Kein gueltiges JSON erhalten")
|
||||
return dict(DEFAULT_CATEGORY_LABELS)
|
||||
|
||||
# Validierung: Nur erlaubte Keys, Werte muessen str oder None sein
|
||||
valid_keys = {"primary", "secondary", "tertiary", "mentioned"}
|
||||
labels = {}
|
||||
for key in valid_keys:
|
||||
val = parsed.get(key)
|
||||
if val is None or val == "null":
|
||||
labels[key] = None
|
||||
elif isinstance(val, str) and val.strip():
|
||||
labels[key] = val.strip()
|
||||
else:
|
||||
labels[key] = DEFAULT_CATEGORY_LABELS.get(key)
|
||||
|
||||
# mentioned sollte immer einen Wert haben
|
||||
if not labels.get("mentioned"):
|
||||
labels["mentioned"] = "Erwaehnt"
|
||||
|
||||
logger.info(f"Kategorie-Labels generiert: {labels}")
|
||||
return labels
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"generate_category_labels fehlgeschlagen: {e}")
|
||||
return dict(DEFAULT_CATEGORY_LABELS)
|
||||
|
||||
|
||||
HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines.
|
||||
|
||||
Kontext der Lage: "{incident_context}"
|
||||
@@ -222,9 +306,9 @@ Regeln:
|
||||
- Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte)
|
||||
|
||||
Klassifiziere basierend auf dem Lage-Kontext:
|
||||
- "target": Wo das Ereignis passiert / Schaden entsteht
|
||||
- "response": Wo Reaktionen / Gegenmassnahmen stattfinden
|
||||
- "actor": Wo Entscheidungen getroffen werden / Entscheider sitzen
|
||||
- "primary": Wo das Hauptgeschehen stattfindet (z.B. Angriffsziele, Katastrophenzone, Wahlkreise)
|
||||
- "secondary": Direkte Reaktionen oder Gegenmassnahmen (z.B. Vergeltung, Hilfsoperationen)
|
||||
- "tertiary": Entscheidungstraeger, Beteiligte (z.B. wo Entscheidungen getroffen werden)
|
||||
- "mentioned": Nur erwaehnt, kein direkter Bezug
|
||||
|
||||
Headlines:
|
||||
@@ -233,7 +317,7 @@ Headlines:
|
||||
Antwort NUR als JSON-Array, kein anderer Text:
|
||||
[{{"headline_idx": 0, "locations": [
|
||||
{{"name": "Teheran", "normalized": "Tehran", "country_code": "IR",
|
||||
"type": "city", "category": "target",
|
||||
"type": "city", "category": "primary",
|
||||
"lat": 35.69, "lon": 51.42}}
|
||||
]}}]"""
|
||||
|
||||
@@ -314,12 +398,19 @@ async def _extract_locations_haiku(
|
||||
if not name:
|
||||
continue
|
||||
|
||||
raw_cat = loc.get("category", "mentioned")
|
||||
# Alte Kategorien mappen (falls Haiku sie noch generiert)
|
||||
cat_map = {"target": "primary", "response": "secondary", "retaliation": "secondary", "actor": "tertiary", "context": "tertiary"}
|
||||
category = cat_map.get(raw_cat, raw_cat)
|
||||
if category not in ("primary", "secondary", "tertiary", "mentioned"):
|
||||
category = "mentioned"
|
||||
|
||||
article_locs.append({
|
||||
"name": name,
|
||||
"normalized": loc.get("normalized", name),
|
||||
"country_code": loc.get("country_code", ""),
|
||||
"type": loc_type,
|
||||
"category": loc.get("category", "mentioned"),
|
||||
"category": category,
|
||||
"lat": loc.get("lat"),
|
||||
"lon": loc.get("lon"),
|
||||
})
|
||||
@@ -333,7 +424,7 @@ async def _extract_locations_haiku(
|
||||
async def geoparse_articles(
|
||||
articles: list[dict],
|
||||
incident_context: str = "",
|
||||
) -> dict[int, list[dict]]:
|
||||
) -> tuple[dict[int, list[dict]], dict[str, str | None] | None]:
|
||||
"""Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache.
|
||||
|
||||
Args:
|
||||
@@ -341,11 +432,15 @@ async def geoparse_articles(
|
||||
incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung
|
||||
|
||||
Returns:
|
||||
dict[article_id -> list[{location_name, location_name_normalized, country_code,
|
||||
lat, lon, confidence, source_text, category}]]
|
||||
Tuple von (dict[article_id -> list[locations]], category_labels oder None)
|
||||
"""
|
||||
if not articles:
|
||||
return {}
|
||||
return {}, None
|
||||
|
||||
# Labels parallel zum Geoparsing generieren (nur wenn Kontext vorhanden)
|
||||
labels_task = None
|
||||
if incident_context:
|
||||
labels_task = asyncio.create_task(generate_category_labels(incident_context))
|
||||
|
||||
# Headlines sammeln
|
||||
headlines = []
|
||||
@@ -363,7 +458,13 @@ async def geoparse_articles(
|
||||
headlines.append({"idx": article_id, "text": headline})
|
||||
|
||||
if not headlines:
|
||||
return {}
|
||||
category_labels = None
|
||||
if labels_task:
|
||||
try:
|
||||
category_labels = await labels_task
|
||||
except Exception:
|
||||
pass
|
||||
return {}, category_labels
|
||||
|
||||
# Batches bilden (max 50 Headlines pro Haiku-Call)
|
||||
batch_size = 50
|
||||
@@ -374,7 +475,13 @@ async def geoparse_articles(
|
||||
all_haiku_results.update(batch_results)
|
||||
|
||||
if not all_haiku_results:
|
||||
return {}
|
||||
category_labels = None
|
||||
if labels_task:
|
||||
try:
|
||||
category_labels = await labels_task
|
||||
except Exception:
|
||||
pass
|
||||
return {}, category_labels
|
||||
|
||||
# Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback)
|
||||
result = {}
|
||||
@@ -406,4 +513,12 @@ async def geoparse_articles(
|
||||
if locations:
|
||||
result[article_id] = locations
|
||||
|
||||
return result
|
||||
# Category-Labels abwarten
|
||||
category_labels = None
|
||||
if labels_task:
|
||||
try:
|
||||
category_labels = await labels_task
|
||||
except Exception as e:
|
||||
logger.warning(f"Category-Labels konnten nicht generiert werden: {e}")
|
||||
|
||||
return result, category_labels
|
||||
|
||||
@@ -782,7 +782,7 @@ class AgentOrchestrator:
|
||||
from agents.geoparsing import geoparse_articles
|
||||
incident_context = f"{title} - {description}"
|
||||
logger.info(f"Geoparsing fuer {len(new_articles_for_analysis)} neue Artikel...")
|
||||
geo_results = await geoparse_articles(new_articles_for_analysis, incident_context)
|
||||
geo_results, category_labels = await geoparse_articles(new_articles_for_analysis, incident_context)
|
||||
geo_count = 0
|
||||
for art_id, locations in geo_results.items():
|
||||
for loc in locations:
|
||||
@@ -799,6 +799,15 @@ class AgentOrchestrator:
|
||||
if geo_count > 0:
|
||||
await db.commit()
|
||||
logger.info(f"Geoparsing: {geo_count} Orte aus {len(geo_results)} Artikeln gespeichert")
|
||||
# Category-Labels in Incident speichern (nur wenn neu generiert)
|
||||
if category_labels:
|
||||
import json as _json
|
||||
await db.execute(
|
||||
"UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL",
|
||||
(_json.dumps(category_labels, ensure_ascii=False), incident_id),
|
||||
)
|
||||
await db.commit()
|
||||
logger.info(f"Category-Labels gespeichert fuer Incident {incident_id}: {category_labels}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
||||
|
||||
|
||||
1183
src/database.py
1183
src/database.py
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -338,8 +338,8 @@ async def get_locations(
|
||||
"source_url": row["source_url"],
|
||||
})
|
||||
|
||||
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: target > retaliation > actor > mentioned)
|
||||
priority = {"target": 4, "retaliation": 3, "actor": 2, "mentioned": 1}
|
||||
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
|
||||
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
||||
result = []
|
||||
for loc in loc_map.values():
|
||||
cats = loc.pop("categories")
|
||||
@@ -349,7 +349,20 @@ async def get_locations(
|
||||
best_cat = "mentioned"
|
||||
loc["category"] = best_cat
|
||||
result.append(loc)
|
||||
return result
|
||||
|
||||
# Category-Labels aus Incident laden
|
||||
cursor = await db.execute(
|
||||
"SELECT category_labels FROM incidents WHERE id = ?", (incident_id,)
|
||||
)
|
||||
inc_row = await cursor.fetchone()
|
||||
category_labels = None
|
||||
if inc_row and inc_row["category_labels"]:
|
||||
try:
|
||||
category_labels = json.loads(inc_row["category_labels"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
return {"category_labels": category_labels, "locations": result}
|
||||
|
||||
|
||||
# Geoparse-Status pro Incident (in-memory)
|
||||
@@ -395,8 +408,23 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
|
||||
processed = 0
|
||||
for i in range(0, total, batch_size):
|
||||
batch = articles[i:i + batch_size]
|
||||
geo_results = await geoparse_articles(batch, incident_context)
|
||||
for art_id, locations in geo_results.items():
|
||||
geo_result = await geoparse_articles(batch, incident_context)
|
||||
# Tuple-Rückgabe: (locations_dict, category_labels)
|
||||
if isinstance(geo_result, tuple):
|
||||
batch_geo_results, batch_labels = geo_result
|
||||
# Labels beim ersten Batch speichern
|
||||
if batch_labels and i == 0:
|
||||
try:
|
||||
await db.execute(
|
||||
"UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL",
|
||||
(json.dumps(batch_labels, ensure_ascii=False), incident_id),
|
||||
)
|
||||
await db.commit()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
batch_geo_results = geo_result
|
||||
for art_id, locations in batch_geo_results.items():
|
||||
for loc in locations:
|
||||
await db.execute(
|
||||
"""INSERT INTO article_locations
|
||||
|
||||
@@ -64,6 +64,14 @@ async def get_lagebild(db=Depends(db_dependency)):
|
||||
raise HTTPException(status_code=404, detail="Incident not found")
|
||||
incident = dict(incident)
|
||||
|
||||
# Category-Labels laden
|
||||
category_labels = None
|
||||
if incident.get("category_labels"):
|
||||
try:
|
||||
category_labels = json.loads(incident["category_labels"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# Alle Artikel aus allen Iran-Incidents laden
|
||||
cursor = await db.execute(
|
||||
f"""SELECT id, headline, headline_de, source, source_url, language,
|
||||
@@ -148,6 +156,7 @@ async def get_lagebild(db=Depends(db_dependency)):
|
||||
"fact_checks": fact_checks,
|
||||
"available_snapshots": available_snapshots,
|
||||
"locations": locations,
|
||||
"category_labels": category_labels,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,389 +1,415 @@
|
||||
"""Post-Refresh Quality Check via Haiku.
|
||||
|
||||
Prueft nach jedem Refresh:
|
||||
1. Semantische Faktencheck-Duplikate (Haiku-Clustering mit Fuzzy-Vorfilter)
|
||||
2. Falsch kategorisierte Karten-Locations (Haiku bewertet Kontext der Lage)
|
||||
|
||||
Regelbasierte Listen dienen als Fallback falls Haiku fehlschlaegt.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from agents.claude_client import call_claude
|
||||
from config import CLAUDE_MODEL_FAST
|
||||
|
||||
logger = logging.getLogger("osint.post_refresh_qc")
|
||||
|
||||
STATUS_PRIORITY = {
|
||||
"confirmed": 5, "established": 5,
|
||||
"contradicted": 4, "disputed": 4,
|
||||
"unconfirmed": 3, "unverified": 3,
|
||||
"developing": 1,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Faktencheck-Duplikate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEDUP_PROMPT = """\
|
||||
Du bist ein Deduplizierungs-Agent fuer Faktenchecks eines OSINT-Monitors.
|
||||
|
||||
LAGE: {incident_title}
|
||||
|
||||
Unten stehen Faktenchecks (ID + Status + Claim). Finde Gruppen von Fakten,
|
||||
die INHALTLICH DASSELBE aussagen, auch wenn sie unterschiedlich formuliert sind.
|
||||
|
||||
REGELN:
|
||||
- Gleicher Sachverhalt = gleiche Gruppe
|
||||
(z.B. "Trump fordert Kapitulation" und "US-Praesident verlangt bedingungslose Aufgabe")
|
||||
- Unterschiedliche Detailtiefe zum SELBEN Fakt = gleiche Gruppe
|
||||
- VERSCHIEDENE Sachverhalte = VERSCHIEDENE Gruppen
|
||||
(z.B. "Angriff auf Isfahan" vs "Angriff auf Teheran" sind NICHT dasselbe)
|
||||
- Eine Gruppe muss mindestens 2 Eintraege haben
|
||||
|
||||
Antworte NUR als JSON-Array von Gruppen. Jede Gruppe ist ein Array von IDs:
|
||||
[[1,5,12], [3,8]]
|
||||
|
||||
Wenn keine Duplikate: antworte mit []
|
||||
|
||||
FAKTEN:
|
||||
{facts_text}"""
|
||||
|
||||
|
||||
async def _haiku_find_duplicate_clusters(
|
||||
facts: list[dict], incident_title: str
|
||||
) -> list[list[int]]:
|
||||
"""Fragt Haiku welche Fakten semantische Duplikate sind."""
|
||||
facts_text = "\n".join(
|
||||
f'ID={f["id"]} [{f["status"]}]: {f["claim"]}'
|
||||
for f in facts
|
||||
)
|
||||
prompt = _DEDUP_PROMPT.format(
|
||||
incident_title=incident_title, facts_text=facts_text
|
||||
)
|
||||
try:
|
||||
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
data = json.loads(result)
|
||||
if isinstance(data, list) and all(isinstance(g, list) for g in data):
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group())
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("Haiku Duplikat-Clustering fehlgeschlagen: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def _fuzzy_prefilter(all_facts: list[dict], max_candidates: int = 80) -> list[dict]:
|
||||
"""Waehlt Kandidaten fuer Haiku-Check per Fuzzy-Vorfilter aus.
|
||||
|
||||
Findet Paare mit Aehnlichkeit >= 0.60 und gibt die betroffenen Fakten zurueck.
|
||||
Begrenzt auf max_candidates um Haiku-Tokens zu sparen.
|
||||
"""
|
||||
from agents.factchecker import normalize_claim, _keyword_set
|
||||
|
||||
if len(all_facts) <= max_candidates:
|
||||
return all_facts
|
||||
|
||||
normalized = []
|
||||
for f in all_facts:
|
||||
nc = normalize_claim(f["claim"])
|
||||
kw = _keyword_set(f["claim"])
|
||||
normalized.append((f, nc, kw))
|
||||
|
||||
candidate_ids = set()
|
||||
recent = normalized[:60]
|
||||
|
||||
for i, (fact_a, norm_a, kw_a) in enumerate(recent):
|
||||
for j, (fact_b, norm_b, kw_b) in enumerate(normalized):
|
||||
if i >= j or fact_b["id"] == fact_a["id"]:
|
||||
continue
|
||||
if not norm_a or not norm_b:
|
||||
continue
|
||||
|
||||
len_ratio = len(norm_a) / len(norm_b) if norm_b else 0
|
||||
if len_ratio > 2.5 or len_ratio < 0.4:
|
||||
continue
|
||||
|
||||
seq_ratio = SequenceMatcher(None, norm_a, norm_b).ratio()
|
||||
kw_union = kw_a | kw_b
|
||||
jaccard = len(kw_a & kw_b) / len(kw_union) if kw_union else 0.0
|
||||
combined = 0.7 * seq_ratio + 0.3 * jaccard
|
||||
|
||||
if combined >= 0.60:
|
||||
candidate_ids.add(fact_a["id"])
|
||||
candidate_ids.add(fact_b["id"])
|
||||
|
||||
if len(candidate_ids) >= max_candidates:
|
||||
break
|
||||
if len(candidate_ids) >= max_candidates:
|
||||
break
|
||||
|
||||
candidates = [f for f in all_facts if f["id"] in candidate_ids]
|
||||
logger.info(
|
||||
"Fuzzy-Vorfilter: %d/%d Fakten als Duplikat-Kandidaten identifiziert",
|
||||
len(candidates), len(all_facts),
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
async def check_fact_duplicates(db, incident_id: int, incident_title: str) -> int:
|
||||
"""Prueft auf semantische Faktencheck-Duplikate via Haiku.
|
||||
|
||||
1. Fuzzy-Vorfilter reduziert auf relevante Kandidaten
|
||||
2. Haiku clustert semantische Duplikate
|
||||
3. Pro Cluster: behalte besten Fakt, loesche Rest
|
||||
|
||||
Returns: Anzahl entfernter Duplikate.
|
||||
"""
|
||||
cursor = await db.execute(
|
||||
"SELECT id, claim, status, sources_count, evidence, checked_at "
|
||||
"FROM fact_checks WHERE incident_id = ? ORDER BY checked_at DESC",
|
||||
(incident_id,),
|
||||
)
|
||||
all_facts = [dict(row) for row in await cursor.fetchall()]
|
||||
|
||||
if len(all_facts) < 2:
|
||||
return 0
|
||||
|
||||
# Schritt 1: Fuzzy-Vorfilter
|
||||
candidates = _fuzzy_prefilter(all_facts)
|
||||
if len(candidates) < 2:
|
||||
return 0
|
||||
|
||||
# Schritt 2: Haiku-Clustering (in Batches von max 80)
|
||||
all_clusters = []
|
||||
batch_size = 80
|
||||
for i in range(0, len(candidates), batch_size):
|
||||
batch = candidates[i:i + batch_size]
|
||||
clusters = await _haiku_find_duplicate_clusters(batch, incident_title)
|
||||
all_clusters.extend(clusters)
|
||||
|
||||
if not all_clusters:
|
||||
logger.info("QC Fakten: Haiku fand keine Duplikate")
|
||||
return 0
|
||||
|
||||
# Schritt 3: Pro Cluster besten behalten, Rest loeschen
|
||||
facts_by_id = {f["id"]: f for f in all_facts}
|
||||
ids_to_delete = set()
|
||||
|
||||
for cluster_ids in all_clusters:
|
||||
valid_ids = [cid for cid in cluster_ids if cid in facts_by_id]
|
||||
if len(valid_ids) <= 1:
|
||||
continue
|
||||
|
||||
cluster_facts = [facts_by_id[cid] for cid in valid_ids]
|
||||
best = max(cluster_facts, key=lambda f: (
|
||||
STATUS_PRIORITY.get(f["status"], 0),
|
||||
f.get("sources_count", 0),
|
||||
f.get("checked_at", ""),
|
||||
))
|
||||
|
||||
for fact in cluster_facts:
|
||||
if fact["id"] != best["id"]:
|
||||
ids_to_delete.add(fact["id"])
|
||||
logger.info(
|
||||
"QC Duplikat: ID %d entfernt, behalte ID %d ('%s')",
|
||||
fact["id"], best["id"], best["claim"][:60],
|
||||
)
|
||||
|
||||
if ids_to_delete:
|
||||
placeholders = ",".join("?" * len(ids_to_delete))
|
||||
await db.execute(
|
||||
f"DELETE FROM fact_checks WHERE id IN ({placeholders})",
|
||||
list(ids_to_delete),
|
||||
)
|
||||
logger.info(
|
||||
"QC: %d Faktencheck-Duplikate entfernt fuer Incident %d",
|
||||
len(ids_to_delete), incident_id,
|
||||
)
|
||||
|
||||
return len(ids_to_delete)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Karten-Location-Kategorien
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LOCATION_PROMPT = """\
|
||||
Du bist ein Geopolitik-Experte fuer einen OSINT-Monitor.
|
||||
|
||||
LAGE: {incident_title}
|
||||
BESCHREIBUNG: {incident_desc}
|
||||
|
||||
Unten stehen Orte, die auf der Karte als "target" (Angriffsziel) markiert sind.
|
||||
Pruefe fuer jeden Ort, ob die Kategorie "target" korrekt ist.
|
||||
|
||||
KATEGORIEN:
|
||||
- target: Ort wurde tatsaechlich militaerisch angegriffen oder bombardiert
|
||||
- actor: Ort gehoert zu einer Konfliktpartei (z.B. Hauptstadt des Angreifers)
|
||||
- response: Ort reagiert auf den Konflikt (z.B. diplomatische Reaktion, Sanktionen)
|
||||
- mentioned: Ort wird nur im Kontext erwaehnt (z.B. wirtschaftliche Auswirkungen)
|
||||
|
||||
REGELN:
|
||||
- Nur Orte die TATSAECHLICH physisch angegriffen/bombardiert wurden = "target"
|
||||
- Hauptstaedte von Angreiferlaendern (z.B. Washington DC) = "actor"
|
||||
- Laender die nur wirtschaftlich betroffen sind (z.B. steigende Oelpreise) = "mentioned"
|
||||
- Laender die diplomatisch reagieren = "response"
|
||||
- Im Zweifel: "mentioned"
|
||||
|
||||
Antworte als JSON-Array mit Korrekturen. Nur Eintraege die GEAENDERT werden muessen:
|
||||
[{{"id": 123, "category": "mentioned"}}, {{"id": 456, "category": "actor"}}]
|
||||
|
||||
Wenn alle Kategorien korrekt sind: antworte mit []
|
||||
|
||||
ORTE (aktuell alle als "target" markiert):
|
||||
{locations_text}"""
|
||||
|
||||
|
||||
async def check_location_categories(
|
||||
db, incident_id: int, incident_title: str, incident_desc: str
|
||||
) -> int:
|
||||
"""Prueft Karten-Location-Kategorien via Haiku.
|
||||
|
||||
Returns: Anzahl korrigierter Eintraege.
|
||||
"""
|
||||
cursor = await db.execute(
|
||||
"SELECT id, location_name, latitude, longitude, category "
|
||||
"FROM article_locations WHERE incident_id = ? AND category = 'target'",
|
||||
(incident_id,),
|
||||
)
|
||||
targets = [dict(row) for row in await cursor.fetchall()]
|
||||
|
||||
if not targets:
|
||||
return 0
|
||||
|
||||
# Dedupliziere nach location_name fuer den Prompt (spart Tokens)
|
||||
unique_names = {}
|
||||
ids_by_name = {}
|
||||
for loc in targets:
|
||||
name = loc["location_name"]
|
||||
if name not in unique_names:
|
||||
unique_names[name] = loc
|
||||
ids_by_name[name] = []
|
||||
ids_by_name[name].append(loc["id"])
|
||||
|
||||
locations_text = "\n".join(
|
||||
f'ID={loc["id"]} | {loc["location_name"]} ({loc["latitude"]:.2f}, {loc["longitude"]:.2f})'
|
||||
for loc in unique_names.values()
|
||||
)
|
||||
|
||||
prompt = _LOCATION_PROMPT.format(
|
||||
incident_title=incident_title,
|
||||
incident_desc=incident_desc[:500] if incident_desc else "(keine Beschreibung)",
|
||||
locations_text=locations_text,
|
||||
)
|
||||
|
||||
fixes = []
|
||||
try:
|
||||
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
data = json.loads(result)
|
||||
if isinstance(data, list):
|
||||
fixes = data
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group())
|
||||
if isinstance(data, list):
|
||||
fixes = data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("Haiku Location-Check fehlgeschlagen: %s", e)
|
||||
return 0
|
||||
|
||||
if not fixes:
|
||||
logger.info("QC Locations: Haiku fand keine falschen Kategorien")
|
||||
return 0
|
||||
|
||||
# Korrekturen anwenden (auch auf alle IDs mit gleichem Namen)
|
||||
total_fixed = 0
|
||||
representative_ids = {loc["id"]: name for name, loc in unique_names.items()}
|
||||
|
||||
for fix in fixes:
|
||||
fix_id = fix.get("id")
|
||||
new_cat = fix.get("category")
|
||||
if not fix_id or not new_cat:
|
||||
continue
|
||||
if new_cat not in ("target", "actor", "response", "mentioned"):
|
||||
continue
|
||||
|
||||
# Finde den location_name fuer diese ID
|
||||
loc_name = representative_ids.get(fix_id)
|
||||
if not loc_name:
|
||||
continue
|
||||
|
||||
# Korrigiere ALLE Eintraege mit diesem Namen
|
||||
all_ids = ids_by_name.get(loc_name, [fix_id])
|
||||
placeholders = ",".join("?" * len(all_ids))
|
||||
await db.execute(
|
||||
f"UPDATE article_locations SET category = ? "
|
||||
f"WHERE id IN ({placeholders}) AND category = 'target'",
|
||||
[new_cat] + all_ids,
|
||||
)
|
||||
total_fixed += len(all_ids)
|
||||
logger.info(
|
||||
"QC Location: '%s' (%d Eintraege): target -> %s",
|
||||
loc_name, len(all_ids), new_cat,
|
||||
)
|
||||
|
||||
if total_fixed > 0:
|
||||
logger.info(
|
||||
"QC: %d Karten-Location-Kategorien korrigiert fuer Incident %d",
|
||||
total_fixed, incident_id,
|
||||
)
|
||||
|
||||
return total_fixed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Hauptfunktion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_post_refresh_qc(db, incident_id: int) -> dict:
|
||||
"""Fuehrt den kompletten Post-Refresh Quality Check via Haiku durch.
|
||||
|
||||
Returns: Dict mit Ergebnissen {facts_removed, locations_fixed}.
|
||||
"""
|
||||
try:
|
||||
# Lage-Titel und Beschreibung laden
|
||||
cursor = await db.execute(
|
||||
"SELECT title, description FROM incidents WHERE id = ?",
|
||||
(incident_id,),
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
return {"facts_removed": 0, "locations_fixed": 0}
|
||||
|
||||
incident_title = row["title"] or ""
|
||||
incident_desc = row["description"] or ""
|
||||
|
||||
facts_removed = await check_fact_duplicates(db, incident_id, incident_title)
|
||||
locations_fixed = await check_location_categories(
|
||||
db, incident_id, incident_title, incident_desc
|
||||
)
|
||||
|
||||
if facts_removed > 0 or locations_fixed > 0:
|
||||
await db.commit()
|
||||
logger.info(
|
||||
"Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert",
|
||||
incident_id, facts_removed, locations_fixed,
|
||||
)
|
||||
|
||||
return {"facts_removed": facts_removed, "locations_fixed": locations_fixed}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Post-Refresh QC Fehler fuer Incident %d: %s",
|
||||
incident_id, e, exc_info=True,
|
||||
)
|
||||
return {"facts_removed": 0, "locations_fixed": 0, "error": str(e)}
|
||||
"""Post-Refresh Quality Check via Haiku.
|
||||
|
||||
Prueft nach jedem Refresh:
|
||||
1. Semantische Faktencheck-Duplikate (Haiku-Clustering mit Fuzzy-Vorfilter)
|
||||
2. Falsch kategorisierte Karten-Locations (Haiku bewertet Kontext der Lage)
|
||||
|
||||
Regelbasierte Listen dienen als Fallback falls Haiku fehlschlaegt.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from agents.claude_client import call_claude
|
||||
from config import CLAUDE_MODEL_FAST
|
||||
|
||||
logger = logging.getLogger("osint.post_refresh_qc")
|
||||
|
||||
STATUS_PRIORITY = {
|
||||
"confirmed": 5, "established": 5,
|
||||
"contradicted": 4, "disputed": 4,
|
||||
"unconfirmed": 3, "unverified": 3,
|
||||
"developing": 1,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Faktencheck-Duplikate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEDUP_PROMPT = """\
|
||||
Du bist ein Deduplizierungs-Agent fuer Faktenchecks eines OSINT-Monitors.
|
||||
|
||||
LAGE: {incident_title}
|
||||
|
||||
Unten stehen Faktenchecks (ID + Status + Claim). Finde Gruppen von Fakten,
|
||||
die INHALTLICH DASSELBE aussagen, auch wenn sie unterschiedlich formuliert sind.
|
||||
|
||||
REGELN:
|
||||
- Gleicher Sachverhalt = gleiche Gruppe
|
||||
(z.B. "Trump fordert Kapitulation" und "US-Praesident verlangt bedingungslose Aufgabe")
|
||||
- Unterschiedliche Detailtiefe zum SELBEN Fakt = gleiche Gruppe
|
||||
- VERSCHIEDENE Sachverhalte = VERSCHIEDENE Gruppen
|
||||
(z.B. "Angriff auf Isfahan" vs "Angriff auf Teheran" sind NICHT dasselbe)
|
||||
- Eine Gruppe muss mindestens 2 Eintraege haben
|
||||
|
||||
Antworte NUR als JSON-Array von Gruppen. Jede Gruppe ist ein Array von IDs:
|
||||
[[1,5,12], [3,8]]
|
||||
|
||||
Wenn keine Duplikate: antworte mit []
|
||||
|
||||
FAKTEN:
|
||||
{facts_text}"""
|
||||
|
||||
|
||||
async def _haiku_find_duplicate_clusters(
|
||||
facts: list[dict], incident_title: str
|
||||
) -> list[list[int]]:
|
||||
"""Fragt Haiku welche Fakten semantische Duplikate sind."""
|
||||
facts_text = "\n".join(
|
||||
f'ID={f["id"]} [{f["status"]}]: {f["claim"]}'
|
||||
for f in facts
|
||||
)
|
||||
prompt = _DEDUP_PROMPT.format(
|
||||
incident_title=incident_title, facts_text=facts_text
|
||||
)
|
||||
try:
|
||||
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
data = json.loads(result)
|
||||
if isinstance(data, list) and all(isinstance(g, list) for g in data):
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group())
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("Haiku Duplikat-Clustering fehlgeschlagen: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def _fuzzy_prefilter(all_facts: list[dict], max_candidates: int = 80) -> list[dict]:
|
||||
"""Waehlt Kandidaten fuer Haiku-Check per Fuzzy-Vorfilter aus.
|
||||
|
||||
Findet Paare mit Aehnlichkeit >= 0.60 und gibt die betroffenen Fakten zurueck.
|
||||
Begrenzt auf max_candidates um Haiku-Tokens zu sparen.
|
||||
"""
|
||||
from agents.factchecker import normalize_claim, _keyword_set
|
||||
|
||||
if len(all_facts) <= max_candidates:
|
||||
return all_facts
|
||||
|
||||
normalized = []
|
||||
for f in all_facts:
|
||||
nc = normalize_claim(f["claim"])
|
||||
kw = _keyword_set(f["claim"])
|
||||
normalized.append((f, nc, kw))
|
||||
|
||||
candidate_ids = set()
|
||||
recent = normalized[:60]
|
||||
|
||||
for i, (fact_a, norm_a, kw_a) in enumerate(recent):
|
||||
for j, (fact_b, norm_b, kw_b) in enumerate(normalized):
|
||||
if i >= j or fact_b["id"] == fact_a["id"]:
|
||||
continue
|
||||
if not norm_a or not norm_b:
|
||||
continue
|
||||
|
||||
len_ratio = len(norm_a) / len(norm_b) if norm_b else 0
|
||||
if len_ratio > 2.5 or len_ratio < 0.4:
|
||||
continue
|
||||
|
||||
seq_ratio = SequenceMatcher(None, norm_a, norm_b).ratio()
|
||||
kw_union = kw_a | kw_b
|
||||
jaccard = len(kw_a & kw_b) / len(kw_union) if kw_union else 0.0
|
||||
combined = 0.7 * seq_ratio + 0.3 * jaccard
|
||||
|
||||
if combined >= 0.60:
|
||||
candidate_ids.add(fact_a["id"])
|
||||
candidate_ids.add(fact_b["id"])
|
||||
|
||||
if len(candidate_ids) >= max_candidates:
|
||||
break
|
||||
if len(candidate_ids) >= max_candidates:
|
||||
break
|
||||
|
||||
candidates = [f for f in all_facts if f["id"] in candidate_ids]
|
||||
logger.info(
|
||||
"Fuzzy-Vorfilter: %d/%d Fakten als Duplikat-Kandidaten identifiziert",
|
||||
len(candidates), len(all_facts),
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
async def check_fact_duplicates(db, incident_id: int, incident_title: str) -> int:
|
||||
"""Prueft auf semantische Faktencheck-Duplikate via Haiku.
|
||||
|
||||
1. Fuzzy-Vorfilter reduziert auf relevante Kandidaten
|
||||
2. Haiku clustert semantische Duplikate
|
||||
3. Pro Cluster: behalte besten Fakt, loesche Rest
|
||||
|
||||
Returns: Anzahl entfernter Duplikate.
|
||||
"""
|
||||
cursor = await db.execute(
|
||||
"SELECT id, claim, status, sources_count, evidence, checked_at "
|
||||
"FROM fact_checks WHERE incident_id = ? ORDER BY checked_at DESC",
|
||||
(incident_id,),
|
||||
)
|
||||
all_facts = [dict(row) for row in await cursor.fetchall()]
|
||||
|
||||
if len(all_facts) < 2:
|
||||
return 0
|
||||
|
||||
# Schritt 1: Fuzzy-Vorfilter
|
||||
candidates = _fuzzy_prefilter(all_facts)
|
||||
if len(candidates) < 2:
|
||||
return 0
|
||||
|
||||
# Schritt 2: Haiku-Clustering (in Batches von max 80)
|
||||
all_clusters = []
|
||||
batch_size = 80
|
||||
for i in range(0, len(candidates), batch_size):
|
||||
batch = candidates[i:i + batch_size]
|
||||
clusters = await _haiku_find_duplicate_clusters(batch, incident_title)
|
||||
all_clusters.extend(clusters)
|
||||
|
||||
if not all_clusters:
|
||||
logger.info("QC Fakten: Haiku fand keine Duplikate")
|
||||
return 0
|
||||
|
||||
# Schritt 3: Pro Cluster besten behalten, Rest loeschen
|
||||
facts_by_id = {f["id"]: f for f in all_facts}
|
||||
ids_to_delete = set()
|
||||
|
||||
for cluster_ids in all_clusters:
|
||||
valid_ids = [cid for cid in cluster_ids if cid in facts_by_id]
|
||||
if len(valid_ids) <= 1:
|
||||
continue
|
||||
|
||||
cluster_facts = [facts_by_id[cid] for cid in valid_ids]
|
||||
best = max(cluster_facts, key=lambda f: (
|
||||
STATUS_PRIORITY.get(f["status"], 0),
|
||||
f.get("sources_count", 0),
|
||||
f.get("checked_at", ""),
|
||||
))
|
||||
|
||||
for fact in cluster_facts:
|
||||
if fact["id"] != best["id"]:
|
||||
ids_to_delete.add(fact["id"])
|
||||
logger.info(
|
||||
"QC Duplikat: ID %d entfernt, behalte ID %d ('%s')",
|
||||
fact["id"], best["id"], best["claim"][:60],
|
||||
)
|
||||
|
||||
if ids_to_delete:
|
||||
placeholders = ",".join("?" * len(ids_to_delete))
|
||||
await db.execute(
|
||||
f"DELETE FROM fact_checks WHERE id IN ({placeholders})",
|
||||
list(ids_to_delete),
|
||||
)
|
||||
logger.info(
|
||||
"QC: %d Faktencheck-Duplikate entfernt fuer Incident %d",
|
||||
len(ids_to_delete), incident_id,
|
||||
)
|
||||
|
||||
return len(ids_to_delete)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Karten-Location-Kategorien
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LOCATION_PROMPT = """\
|
||||
Du bist ein Geopolitik-Experte fuer einen OSINT-Monitor.
|
||||
|
||||
LAGE: {incident_title}
|
||||
BESCHREIBUNG: {incident_desc}
|
||||
{labels_context}
|
||||
Unten stehen Orte, die auf der Karte als "primary" (Hauptgeschehen) markiert sind.
|
||||
Pruefe fuer jeden Ort, ob die Kategorie "primary" korrekt ist.
|
||||
|
||||
KATEGORIEN:
|
||||
- primary: {label_primary} — Wo das Hauptgeschehen stattfindet
|
||||
- secondary: {label_secondary} — Direkte Reaktionen/Gegenmassnahmen
|
||||
- tertiary: {label_tertiary} — Entscheidungstraeger/Beteiligte
|
||||
- mentioned: {label_mentioned} — Nur erwaehnt
|
||||
|
||||
REGELN:
|
||||
- Nur Orte die DIREKT vom Hauptgeschehen betroffen sind = "primary"
|
||||
- Orte mit Reaktionen/Gegenmassnahmen = "secondary"
|
||||
- Orte von Entscheidungstraegern (z.B. Hauptstaedte) = "tertiary"
|
||||
- Nur erwaehnte Orte = "mentioned"
|
||||
- Im Zweifel: "mentioned"
|
||||
|
||||
Antworte als JSON-Array mit Korrekturen. Nur Eintraege die GEAENDERT werden muessen:
|
||||
[{{"id": 123, "category": "mentioned"}}, {{"id": 456, "category": "tertiary"}}]
|
||||
|
||||
Wenn alle Kategorien korrekt sind: antworte mit []
|
||||
|
||||
ORTE (aktuell alle als "primary" markiert):
|
||||
{locations_text}"""
|
||||
|
||||
|
||||
async def check_location_categories(
|
||||
db, incident_id: int, incident_title: str, incident_desc: str
|
||||
) -> int:
|
||||
"""Prueft Karten-Location-Kategorien via Haiku.
|
||||
|
||||
Returns: Anzahl korrigierter Eintraege.
|
||||
"""
|
||||
cursor = await db.execute(
|
||||
"SELECT id, location_name, latitude, longitude, category "
|
||||
"FROM article_locations WHERE incident_id = ? AND category = 'primary'",
|
||||
(incident_id,),
|
||||
)
|
||||
targets = [dict(row) for row in await cursor.fetchall()]
|
||||
|
||||
if not targets:
|
||||
return 0
|
||||
|
||||
# Category-Labels aus DB laden (fuer kontextabhaengige Prompt-Beschreibungen)
|
||||
cursor = await db.execute(
|
||||
"SELECT category_labels FROM incidents WHERE id = ?", (incident_id,)
|
||||
)
|
||||
inc_row = await cursor.fetchone()
|
||||
labels = {}
|
||||
if inc_row and inc_row["category_labels"]:
|
||||
try:
|
||||
labels = json.loads(inc_row["category_labels"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
label_primary = labels.get("primary") or "Hauptgeschehen"
|
||||
label_secondary = labels.get("secondary") or "Reaktionen"
|
||||
label_tertiary = labels.get("tertiary") or "Beteiligte"
|
||||
label_mentioned = labels.get("mentioned") or "Erwaehnt"
|
||||
|
||||
labels_context = ""
|
||||
if labels:
|
||||
labels_context = f"KATEGORIE-LABELS: primary={label_primary}, secondary={label_secondary}, tertiary={label_tertiary}, mentioned={label_mentioned}\n"
|
||||
|
||||
# Dedupliziere nach location_name fuer den Prompt (spart Tokens)
|
||||
unique_names = {}
|
||||
ids_by_name = {}
|
||||
for loc in targets:
|
||||
name = loc["location_name"]
|
||||
if name not in unique_names:
|
||||
unique_names[name] = loc
|
||||
ids_by_name[name] = []
|
||||
ids_by_name[name].append(loc["id"])
|
||||
|
||||
locations_text = "\n".join(
|
||||
f'ID={loc["id"]} | {loc["location_name"]} ({loc["latitude"]:.2f}, {loc["longitude"]:.2f})'
|
||||
for loc in unique_names.values()
|
||||
)
|
||||
|
||||
prompt = _LOCATION_PROMPT.format(
|
||||
incident_title=incident_title,
|
||||
incident_desc=incident_desc[:500] if incident_desc else "(keine Beschreibung)",
|
||||
labels_context=labels_context,
|
||||
label_primary=label_primary,
|
||||
label_secondary=label_secondary,
|
||||
label_tertiary=label_tertiary,
|
||||
label_mentioned=label_mentioned,
|
||||
locations_text=locations_text,
|
||||
)
|
||||
|
||||
fixes = []
|
||||
try:
|
||||
result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
data = json.loads(result)
|
||||
if isinstance(data, list):
|
||||
fixes = data
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group())
|
||||
if isinstance(data, list):
|
||||
fixes = data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("Haiku Location-Check fehlgeschlagen: %s", e)
|
||||
return 0
|
||||
|
||||
if not fixes:
|
||||
logger.info("QC Locations: Haiku fand keine falschen Kategorien")
|
||||
return 0
|
||||
|
||||
# Korrekturen anwenden (auch auf alle IDs mit gleichem Namen)
|
||||
total_fixed = 0
|
||||
representative_ids = {loc["id"]: name for name, loc in unique_names.items()}
|
||||
|
||||
for fix in fixes:
|
||||
fix_id = fix.get("id")
|
||||
new_cat = fix.get("category")
|
||||
if not fix_id or not new_cat:
|
||||
continue
|
||||
if new_cat not in ("primary", "secondary", "tertiary", "mentioned"):
|
||||
continue
|
||||
|
||||
# Finde den location_name fuer diese ID
|
||||
loc_name = representative_ids.get(fix_id)
|
||||
if not loc_name:
|
||||
continue
|
||||
|
||||
# Korrigiere ALLE Eintraege mit diesem Namen
|
||||
all_ids = ids_by_name.get(loc_name, [fix_id])
|
||||
placeholders = ",".join("?" * len(all_ids))
|
||||
await db.execute(
|
||||
f"UPDATE article_locations SET category = ? "
|
||||
f"WHERE id IN ({placeholders}) AND category = 'primary'",
|
||||
[new_cat] + all_ids,
|
||||
)
|
||||
total_fixed += len(all_ids)
|
||||
logger.info(
|
||||
"QC Location: '%s' (%d Eintraege): primary -> %s",
|
||||
loc_name, len(all_ids), new_cat,
|
||||
)
|
||||
|
||||
if total_fixed > 0:
|
||||
logger.info(
|
||||
"QC: %d Karten-Location-Kategorien korrigiert fuer Incident %d",
|
||||
total_fixed, incident_id,
|
||||
)
|
||||
|
||||
return total_fixed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hauptfunktion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_post_refresh_qc(db, incident_id: int) -> dict:
|
||||
"""Fuehrt den kompletten Post-Refresh Quality Check via Haiku durch.
|
||||
|
||||
Returns: Dict mit Ergebnissen {facts_removed, locations_fixed}.
|
||||
"""
|
||||
try:
|
||||
# Lage-Titel und Beschreibung laden
|
||||
cursor = await db.execute(
|
||||
"SELECT title, description FROM incidents WHERE id = ?",
|
||||
(incident_id,),
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
return {"facts_removed": 0, "locations_fixed": 0}
|
||||
|
||||
incident_title = row["title"] or ""
|
||||
incident_desc = row["description"] or ""
|
||||
|
||||
facts_removed = await check_fact_duplicates(db, incident_id, incident_title)
|
||||
locations_fixed = await check_location_categories(
|
||||
db, incident_id, incident_title, incident_desc
|
||||
)
|
||||
|
||||
if facts_removed > 0 or locations_fixed > 0:
|
||||
await db.commit()
|
||||
logger.info(
|
||||
"Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert",
|
||||
incident_id, facts_removed, locations_fixed,
|
||||
)
|
||||
|
||||
return {"facts_removed": facts_removed, "locations_fixed": locations_fixed}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Post-Refresh QC Fehler fuer Incident %d: %s",
|
||||
incident_id, e, exc_info=True,
|
||||
)
|
||||
return {"facts_removed": 0, "locations_fixed": 0, "error": str(e)}
|
||||
|
||||
@@ -698,7 +698,7 @@ const App = {
|
||||
|
||||
async loadIncidentDetail(id) {
|
||||
try {
|
||||
const [incident, articles, factchecks, snapshots, locations] = await Promise.all([
|
||||
const [incident, articles, factchecks, snapshots, locationsResponse] = await Promise.all([
|
||||
API.getIncident(id),
|
||||
API.getArticles(id),
|
||||
API.getFactChecks(id),
|
||||
@@ -706,14 +706,27 @@ const App = {
|
||||
API.getLocations(id).catch(() => []),
|
||||
]);
|
||||
|
||||
this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations);
|
||||
// Locations-API gibt jetzt {category_labels, locations} oder Array (Rückwärtskompatibel)
|
||||
let locations, categoryLabels;
|
||||
if (Array.isArray(locationsResponse)) {
|
||||
locations = locationsResponse;
|
||||
categoryLabels = null;
|
||||
} else if (locationsResponse && locationsResponse.locations) {
|
||||
locations = locationsResponse.locations;
|
||||
categoryLabels = locationsResponse.category_labels || null;
|
||||
} else {
|
||||
locations = [];
|
||||
categoryLabels = null;
|
||||
}
|
||||
|
||||
this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels);
|
||||
} catch (err) {
|
||||
console.error('loadIncidentDetail Fehler:', err);
|
||||
UI.showToast('Fehler beim Laden: ' + err.message, 'error');
|
||||
}
|
||||
},
|
||||
|
||||
renderIncidentDetail(incident, articles, factchecks, snapshots, locations) {
|
||||
renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels) {
|
||||
// Header Strip
|
||||
{ const _e = document.getElementById('incident-title'); if (_e) _e.textContent = incident.title; }
|
||||
{ const _e = document.getElementById('incident-description'); if (_e) _e.textContent = incident.description || ''; }
|
||||
@@ -845,7 +858,7 @@ const App = {
|
||||
this._resizeTimelineTile();
|
||||
|
||||
// Karte rendern
|
||||
UI.renderMap(locations || []);
|
||||
UI.renderMap(locations || [], categoryLabels);
|
||||
},
|
||||
|
||||
_collectEntries(filterType, searchTerm, range) {
|
||||
@@ -1617,8 +1630,12 @@ const App = {
|
||||
if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
|
||||
if (st.status === 'done' && st.locations > 0) {
|
||||
UI.showToast(`${st.locations} Orte aus ${st.processed} Artikeln erkannt`, 'success');
|
||||
const locations = await API.getLocations(incidentId).catch(() => []);
|
||||
UI.renderMap(locations);
|
||||
const locResp = await API.getLocations(incidentId).catch(() => []);
|
||||
let locs, catLabels;
|
||||
if (Array.isArray(locResp)) { locs = locResp; catLabels = null; }
|
||||
else if (locResp && locResp.locations) { locs = locResp.locations; catLabels = locResp.category_labels || null; }
|
||||
else { locs = []; catLabels = null; }
|
||||
UI.renderMap(locs, catLabels);
|
||||
} else if (st.status === 'done') {
|
||||
UI.showToast('Keine neuen Orte gefunden', 'info');
|
||||
} else if (st.status === 'error') {
|
||||
|
||||
@@ -639,30 +639,29 @@ const UI = {
|
||||
_initMarkerIcons() {
|
||||
if (this._markerIcons || typeof L === 'undefined') return;
|
||||
this._markerIcons = {
|
||||
target: this._createSvgIcon('#dc3545', '#a71d2a'),
|
||||
retaliation: this._createSvgIcon('#f39c12', '#c47d0a'),
|
||||
response: this._createSvgIcon('#f39c12', '#c47d0a'),
|
||||
actor: this._createSvgIcon('#2a81cb', '#1a5c8f'),
|
||||
primary: this._createSvgIcon('#dc3545', '#a71d2a'),
|
||||
secondary: this._createSvgIcon('#f39c12', '#c47d0a'),
|
||||
tertiary: this._createSvgIcon('#2a81cb', '#1a5c8f'),
|
||||
mentioned: this._createSvgIcon('#7b7b7b', '#555555'),
|
||||
};
|
||||
},
|
||||
|
||||
_categoryLabels: {
|
||||
target: 'Angegriffene Ziele',
|
||||
retaliation: 'Vergeltung / Eskalation',
|
||||
response: 'Reaktion / Gegenmassnahmen',
|
||||
actor: 'Strategische Akteure',
|
||||
_defaultCategoryLabels: {
|
||||
primary: 'Hauptgeschehen',
|
||||
secondary: 'Reaktionen',
|
||||
tertiary: 'Beteiligte',
|
||||
mentioned: 'Erwaehnt',
|
||||
},
|
||||
_categoryColors: {
|
||||
target: '#cb2b3e',
|
||||
retaliation: '#f39c12',
|
||||
response: '#f39c12',
|
||||
actor: '#2a81cb',
|
||||
primary: '#cb2b3e',
|
||||
secondary: '#f39c12',
|
||||
tertiary: '#2a81cb',
|
||||
mentioned: '#7b7b7b',
|
||||
},
|
||||
|
||||
renderMap(locations) {
|
||||
_activeCategoryLabels: null,
|
||||
|
||||
renderMap(locations, categoryLabels) {
|
||||
const container = document.getElementById('map-container');
|
||||
const emptyEl = document.getElementById('map-empty');
|
||||
const statsEl = document.getElementById('map-stats');
|
||||
@@ -741,6 +740,9 @@ const UI = {
|
||||
// Marker hinzufuegen
|
||||
const bounds = [];
|
||||
this._initMarkerIcons();
|
||||
// Dynamische Labels verwenden (API > Default)
|
||||
const catLabels = categoryLabels || this._activeCategoryLabels || this._defaultCategoryLabels;
|
||||
this._activeCategoryLabels = catLabels;
|
||||
const usedCategories = new Set();
|
||||
|
||||
locations.forEach(loc => {
|
||||
@@ -751,7 +753,7 @@ const UI = {
|
||||
const marker = L.marker([loc.lat, loc.lon], markerOpts);
|
||||
|
||||
// Popup-Inhalt
|
||||
const catLabel = this._categoryLabels[cat] || cat;
|
||||
const catLabel = catLabels[cat] || this._defaultCategoryLabels[cat] || cat;
|
||||
const catColor = this._categoryColors[cat] || '#7b7b7b';
|
||||
let popupHtml = `<div class="map-popup">`;
|
||||
popupHtml += `<div class="map-popup-title">${this.escape(loc.location_name)}`;
|
||||
@@ -798,12 +800,13 @@ const UI = {
|
||||
|
||||
const legend = L.control({ position: 'bottomright' });
|
||||
const self2 = this;
|
||||
const legendLabels = catLabels;
|
||||
legend.onAdd = function() {
|
||||
const div = L.DomUtil.create('div', 'map-legend-ctrl');
|
||||
let html = '<strong style="display:block;margin-bottom:6px;">Legende</strong>';
|
||||
['target', 'retaliation', 'response', 'actor', 'mentioned'].forEach(cat => {
|
||||
if (usedCategories.has(cat)) {
|
||||
html += `<div style="display:flex;align-items:center;gap:6px;margin:3px 0;"><span style="width:10px;height:10px;border-radius:50%;background:${self2._categoryColors[cat]};flex-shrink:0;"></span><span>${self2._categoryLabels[cat]}</span></div>`;
|
||||
['primary', 'secondary', 'tertiary', 'mentioned'].forEach(cat => {
|
||||
if (usedCategories.has(cat) && legendLabels[cat]) {
|
||||
html += `<div style="display:flex;align-items:center;gap:6px;margin:3px 0;"><span style="width:10px;height:10px;border-radius:50%;background:${self2._categoryColors[cat]};flex-shrink:0;"></span><span>${legendLabels[cat]}</span></div>`;
|
||||
}
|
||||
});
|
||||
div.innerHTML = html;
|
||||
@@ -853,7 +856,7 @@ const UI = {
|
||||
if (this._pendingLocations && typeof L !== 'undefined') {
|
||||
const locs = this._pendingLocations;
|
||||
this._pendingLocations = null;
|
||||
this.renderMap(locs);
|
||||
this.renderMap(locs, this._activeCategoryLabels);
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren