From 19da0995832c0a353242d4c313004bf8f6e36b98 Mon Sep 17 00:00:00 2001 From: Claude Dev Date: Sun, 15 Mar 2026 15:04:02 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20Kontextabh=C3=A4ngige=20Karten-Kategori?= =?UTF-8?q?en?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4 feste Farbstufen (primary/secondary/tertiary/mentioned) mit variablen Labels pro Lage, die von Haiku generiert werden. - DB: category_labels Spalte in incidents, alte Kategorien migriert (target->primary, response/retaliation->secondary, actor->tertiary) - Geoparsing: generate_category_labels() + neuer Prompt mit neuen Keys - QC: Kategorieprüfung auf neue Keys umgestellt - Orchestrator: Tuple-Rückgabe + Labels in DB speichern - API: category_labels im Locations- und Lagebild-Response - Frontend: Dynamische Legende aus API-Labels mit Fallback-Defaults - Migrationsskript für bestehende Lagen Co-Authored-By: Claude Opus 4.6 (1M context) --- migrate_category_labels.py | 73 ++ src/agents/geoparsing.py | 139 +++- src/agents/orchestrator.py | 11 +- src/database.py | 1183 ++++++++++++++++--------------- src/routers/incidents.py | 38 +- src/routers/public_api.py | 9 + src/services/post_refresh_qc.py | 804 +++++++++++---------- src/static/js/app.js | 29 +- src/static/js/components.js | 41 +- 9 files changed, 1315 insertions(+), 1012 deletions(-) create mode 100644 migrate_category_labels.py diff --git a/migrate_category_labels.py b/migrate_category_labels.py new file mode 100644 index 0000000..6e70825 --- /dev/null +++ b/migrate_category_labels.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +"""Einmaliges Migrationsskript: Generiert Haiku-Labels fuer alle bestehenden Lagen. + +Ausfuehrung auf dem Monitor-Server: + cd /home/claude-dev/AegisSight-Monitor + .venvs_run: /home/claude-dev/.venvs/osint/bin/python migrate_category_labels.py +""" +import asyncio +import json +import logging +import os +import sys + +# Projektpfad setzen damit imports funktionieren +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s [%(name)s] %(levelname)s: %(message)s', +) +logger = logging.getLogger("migrate_labels") + + +async def main(): + from database import get_db + from agents.geoparsing import generate_category_labels + + db = await get_db() + try: + # Alle Incidents ohne category_labels laden + cursor = await db.execute( + "SELECT id, title, description FROM incidents WHERE category_labels IS NULL" + ) + incidents = [dict(row) for row in await cursor.fetchall()] + + if not incidents: + logger.info("Keine Incidents ohne Labels gefunden. Nichts zu tun.") + return + + logger.info(f"{len(incidents)} Incidents ohne Labels gefunden. Starte Generierung...") + + success = 0 + for inc in incidents: + incident_id = inc["id"] + context = f"{inc['title']} - {inc.get('description') or ''}" + logger.info(f"Generiere Labels fuer Incident {incident_id}: {inc['title'][:60]}...") + + try: + labels = await generate_category_labels(context) + if labels: + await db.execute( + "UPDATE incidents SET category_labels = ? WHERE id = ?", + (json.dumps(labels, ensure_ascii=False), incident_id), + ) + await db.commit() + success += 1 + logger.info(f" -> Labels: {labels}") + else: + logger.warning(f" -> Keine Labels generiert") + except Exception as e: + logger.error(f" -> Fehler: {e}") + + # Kurze Pause um Rate-Limits zu vermeiden + await asyncio.sleep(0.5) + + logger.info(f"\nMigration abgeschlossen: {success}/{len(incidents)} Incidents mit Labels versehen.") + + finally: + await db.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agents/geoparsing.py b/src/agents/geoparsing.py index 31b1c68..999a213 100644 --- a/src/agents/geoparsing.py +++ b/src/agents/geoparsing.py @@ -209,6 +209,90 @@ def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[ return result +# Default-Labels (Fallback wenn Haiku keine generiert) +DEFAULT_CATEGORY_LABELS = { + "primary": "Hauptgeschehen", + "secondary": "Reaktionen", + "tertiary": "Beteiligte", + "mentioned": "Erwaehnt", +} + +CATEGORY_LABELS_PROMPT = """Generiere kurze, praegnante Kategorie-Labels fuer Karten-Pins zu dieser Nachrichtenlage. + +Lage: "{incident_context}" + +Es gibt 4 Farbstufen fuer Orte auf der Karte: +1. primary (Rot): Wo das Hauptgeschehen stattfindet +2. secondary (Orange): Direkte Reaktionen/Gegenmassnahmen +3. tertiary (Blau): Entscheidungstraeger/Beteiligte +4. mentioned (Grau): Nur erwaehnt + +Generiere fuer jede Stufe ein kurzes Label (1-3 Woerter), das zum Thema passt. +Wenn eine Stufe fuer dieses Thema nicht sinnvoll ist, setze null. + +Beispiele: +- Militaerkonflikt Iran: {{"primary": "Kampfschauplätze", "secondary": "Vergeltungsschläge", "tertiary": "Strategische Akteure", "mentioned": "Erwähnt"}} +- Erdbeben Tuerkei: {{"primary": "Katastrophenzone", "secondary": "Hilfsoperationen", "tertiary": "Geberländer", "mentioned": "Erwähnt"}} +- Bundestagswahl: {{"primary": "Wahlkreise", "secondary": "Koalitionspartner", "tertiary": "Internationale Reaktionen", "mentioned": "Erwähnt"}} + +Antworte NUR als JSON-Objekt:""" + + +async def generate_category_labels(incident_context: str) -> dict[str, str | None]: + """Generiert kontextabhaengige Kategorie-Labels via Haiku. + + Args: + incident_context: Lage-Titel + Beschreibung + + Returns: + Dict mit Labels fuer primary/secondary/tertiary/mentioned (oder None wenn nicht passend) + """ + if not incident_context or not incident_context.strip(): + return dict(DEFAULT_CATEGORY_LABELS) + + prompt = CATEGORY_LABELS_PROMPT.format(incident_context=incident_context[:500]) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + parsed = None + try: + parsed = json.loads(result_text) + except json.JSONDecodeError: + match = re.search(r'\{.*\}', result_text, re.DOTALL) + if match: + try: + parsed = json.loads(match.group()) + except json.JSONDecodeError: + pass + + if not parsed or not isinstance(parsed, dict): + logger.warning("generate_category_labels: Kein gueltiges JSON erhalten") + return dict(DEFAULT_CATEGORY_LABELS) + + # Validierung: Nur erlaubte Keys, Werte muessen str oder None sein + valid_keys = {"primary", "secondary", "tertiary", "mentioned"} + labels = {} + for key in valid_keys: + val = parsed.get(key) + if val is None or val == "null": + labels[key] = None + elif isinstance(val, str) and val.strip(): + labels[key] = val.strip() + else: + labels[key] = DEFAULT_CATEGORY_LABELS.get(key) + + # mentioned sollte immer einen Wert haben + if not labels.get("mentioned"): + labels["mentioned"] = "Erwaehnt" + + logger.info(f"Kategorie-Labels generiert: {labels}") + return labels + + except Exception as e: + logger.error(f"generate_category_labels fehlgeschlagen: {e}") + return dict(DEFAULT_CATEGORY_LABELS) + + HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines. Kontext der Lage: "{incident_context}" @@ -222,9 +306,9 @@ Regeln: - Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte) Klassifiziere basierend auf dem Lage-Kontext: -- "target": Wo das Ereignis passiert / Schaden entsteht -- "response": Wo Reaktionen / Gegenmassnahmen stattfinden -- "actor": Wo Entscheidungen getroffen werden / Entscheider sitzen +- "primary": Wo das Hauptgeschehen stattfindet (z.B. Angriffsziele, Katastrophenzone, Wahlkreise) +- "secondary": Direkte Reaktionen oder Gegenmassnahmen (z.B. Vergeltung, Hilfsoperationen) +- "tertiary": Entscheidungstraeger, Beteiligte (z.B. wo Entscheidungen getroffen werden) - "mentioned": Nur erwaehnt, kein direkter Bezug Headlines: @@ -233,7 +317,7 @@ Headlines: Antwort NUR als JSON-Array, kein anderer Text: [{{"headline_idx": 0, "locations": [ {{"name": "Teheran", "normalized": "Tehran", "country_code": "IR", - "type": "city", "category": "target", + "type": "city", "category": "primary", "lat": 35.69, "lon": 51.42}} ]}}]""" @@ -314,12 +398,19 @@ async def _extract_locations_haiku( if not name: continue + raw_cat = loc.get("category", "mentioned") + # Alte Kategorien mappen (falls Haiku sie noch generiert) + cat_map = {"target": "primary", "response": "secondary", "retaliation": "secondary", "actor": "tertiary", "context": "tertiary"} + category = cat_map.get(raw_cat, raw_cat) + if category not in ("primary", "secondary", "tertiary", "mentioned"): + category = "mentioned" + article_locs.append({ "name": name, "normalized": loc.get("normalized", name), "country_code": loc.get("country_code", ""), "type": loc_type, - "category": loc.get("category", "mentioned"), + "category": category, "lat": loc.get("lat"), "lon": loc.get("lon"), }) @@ -333,7 +424,7 @@ async def _extract_locations_haiku( async def geoparse_articles( articles: list[dict], incident_context: str = "", -) -> dict[int, list[dict]]: +) -> tuple[dict[int, list[dict]], dict[str, str | None] | None]: """Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache. Args: @@ -341,11 +432,15 @@ async def geoparse_articles( incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung Returns: - dict[article_id -> list[{location_name, location_name_normalized, country_code, - lat, lon, confidence, source_text, category}]] + Tuple von (dict[article_id -> list[locations]], category_labels oder None) """ if not articles: - return {} + return {}, None + + # Labels parallel zum Geoparsing generieren (nur wenn Kontext vorhanden) + labels_task = None + if incident_context: + labels_task = asyncio.create_task(generate_category_labels(incident_context)) # Headlines sammeln headlines = [] @@ -363,7 +458,13 @@ async def geoparse_articles( headlines.append({"idx": article_id, "text": headline}) if not headlines: - return {} + category_labels = None + if labels_task: + try: + category_labels = await labels_task + except Exception: + pass + return {}, category_labels # Batches bilden (max 50 Headlines pro Haiku-Call) batch_size = 50 @@ -374,7 +475,13 @@ async def geoparse_articles( all_haiku_results.update(batch_results) if not all_haiku_results: - return {} + category_labels = None + if labels_task: + try: + category_labels = await labels_task + except Exception: + pass + return {}, category_labels # Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback) result = {} @@ -406,4 +513,12 @@ async def geoparse_articles( if locations: result[article_id] = locations - return result + # Category-Labels abwarten + category_labels = None + if labels_task: + try: + category_labels = await labels_task + except Exception as e: + logger.warning(f"Category-Labels konnten nicht generiert werden: {e}") + + return result, category_labels diff --git a/src/agents/orchestrator.py b/src/agents/orchestrator.py index bb57668..9fbfb43 100644 --- a/src/agents/orchestrator.py +++ b/src/agents/orchestrator.py @@ -782,7 +782,7 @@ class AgentOrchestrator: from agents.geoparsing import geoparse_articles incident_context = f"{title} - {description}" logger.info(f"Geoparsing fuer {len(new_articles_for_analysis)} neue Artikel...") - geo_results = await geoparse_articles(new_articles_for_analysis, incident_context) + geo_results, category_labels = await geoparse_articles(new_articles_for_analysis, incident_context) geo_count = 0 for art_id, locations in geo_results.items(): for loc in locations: @@ -799,6 +799,15 @@ class AgentOrchestrator: if geo_count > 0: await db.commit() logger.info(f"Geoparsing: {geo_count} Orte aus {len(geo_results)} Artikeln gespeichert") + # Category-Labels in Incident speichern (nur wenn neu generiert) + if category_labels: + import json as _json + await db.execute( + "UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL", + (_json.dumps(category_labels, ensure_ascii=False), incident_id), + ) + await db.commit() + logger.info(f"Category-Labels gespeichert fuer Incident {incident_id}: {category_labels}") except Exception as e: logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}") diff --git a/src/database.py b/src/database.py index fcb9fc0..9944eb7 100644 --- a/src/database.py +++ b/src/database.py @@ -1,580 +1,603 @@ -"""SQLite Datenbank-Setup und Zugriff.""" -import aiosqlite -import logging -import os -from config import DB_PATH, DATA_DIR - -logger = logging.getLogger("osint.database") - -SCHEMA = """ -CREATE TABLE IF NOT EXISTS organizations ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - slug TEXT UNIQUE NOT NULL, - is_active INTEGER DEFAULT 1, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE TABLE IF NOT EXISTS licenses ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - organization_id INTEGER NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, - license_type TEXT NOT NULL DEFAULT 'trial', - max_users INTEGER NOT NULL DEFAULT 5, - valid_from TIMESTAMP NOT NULL, - valid_until TIMESTAMP, - status TEXT NOT NULL DEFAULT 'active', - notes TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE TABLE IF NOT EXISTS users ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - email TEXT UNIQUE NOT NULL, - username TEXT NOT NULL, - password_hash TEXT, - organization_id INTEGER NOT NULL REFERENCES organizations(id), - role TEXT NOT NULL DEFAULT 'member', - is_active INTEGER DEFAULT 1, - last_login_at TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); -CREATE TABLE IF NOT EXISTS magic_links ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - email TEXT NOT NULL, - token TEXT UNIQUE NOT NULL, - code TEXT NOT NULL, - purpose TEXT NOT NULL DEFAULT 'login', - user_id INTEGER REFERENCES users(id), - is_used INTEGER DEFAULT 0, - expires_at TIMESTAMP NOT NULL, - ip_address TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - --- Hinweis: portal_admins wird von der Verwaltungs-App (Admin-Portal) genutzt, die dieselbe DB teilt. -CREATE TABLE IF NOT EXISTS portal_admins ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - username TEXT UNIQUE NOT NULL, - password_hash TEXT NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE TABLE IF NOT EXISTS incidents ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - title TEXT NOT NULL, - description TEXT, - status TEXT DEFAULT 'active', - type TEXT DEFAULT 'adhoc', - refresh_mode TEXT DEFAULT 'manual', - refresh_interval INTEGER DEFAULT 15, - retention_days INTEGER DEFAULT 0, - visibility TEXT DEFAULT 'public', - summary TEXT, - sources_json TEXT, - international_sources INTEGER DEFAULT 1, - tenant_id INTEGER REFERENCES organizations(id), - created_by INTEGER REFERENCES users(id), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); -CREATE TABLE IF NOT EXISTS articles ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - headline TEXT NOT NULL, - headline_de TEXT, - source TEXT NOT NULL, - source_url TEXT, - content_original TEXT, - content_de TEXT, - language TEXT DEFAULT 'de', - published_at TIMESTAMP, - collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - verification_status TEXT DEFAULT 'unverified', - tenant_id INTEGER REFERENCES organizations(id) -); -CREATE TABLE IF NOT EXISTS fact_checks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - claim TEXT NOT NULL, - status TEXT DEFAULT 'developing', - sources_count INTEGER DEFAULT 0, - evidence TEXT, - is_notification INTEGER DEFAULT 0, - checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - tenant_id INTEGER REFERENCES organizations(id) -); - -CREATE TABLE IF NOT EXISTS refresh_log ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - completed_at TIMESTAMP, - articles_found INTEGER DEFAULT 0, - status TEXT DEFAULT 'running', - tenant_id INTEGER REFERENCES organizations(id) -); - -CREATE TABLE IF NOT EXISTS incident_snapshots ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - summary TEXT, - sources_json TEXT, - article_count INTEGER DEFAULT 0, - fact_check_count INTEGER DEFAULT 0, - refresh_log_id INTEGER REFERENCES refresh_log(id), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - tenant_id INTEGER REFERENCES organizations(id) -); - -CREATE TABLE IF NOT EXISTS sources ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - url TEXT, - domain TEXT, - source_type TEXT NOT NULL DEFAULT 'rss_feed', - category TEXT NOT NULL DEFAULT 'sonstige', - status TEXT NOT NULL DEFAULT 'active', - notes TEXT, - added_by TEXT, - article_count INTEGER DEFAULT 0, - last_seen_at TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - tenant_id INTEGER REFERENCES organizations(id) -); - -CREATE TABLE IF NOT EXISTS notifications ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - type TEXT NOT NULL DEFAULT 'refresh_summary', - title TEXT NOT NULL, - text TEXT NOT NULL, - icon TEXT DEFAULT 'info', - is_read INTEGER DEFAULT 0, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - tenant_id INTEGER REFERENCES organizations(id) -); -CREATE INDEX IF NOT EXISTS idx_notifications_user_read ON notifications(user_id, is_read); - -CREATE TABLE IF NOT EXISTS incident_subscriptions ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, - incident_id INTEGER NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, - notify_email_summary INTEGER DEFAULT 0, - notify_email_new_articles INTEGER DEFAULT 0, - notify_email_status_change INTEGER DEFAULT 0, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(user_id, incident_id) -); - -CREATE TABLE IF NOT EXISTS article_locations ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - location_name TEXT NOT NULL, - location_name_normalized TEXT, - country_code TEXT, - latitude REAL NOT NULL, - longitude REAL NOT NULL, - confidence REAL DEFAULT 0.0, - source_text TEXT, - tenant_id INTEGER REFERENCES organizations(id) -); -CREATE INDEX IF NOT EXISTS idx_article_locations_incident ON article_locations(incident_id); -CREATE INDEX IF NOT EXISTS idx_article_locations_article ON article_locations(article_id); - - -CREATE TABLE IF NOT EXISTS source_health_checks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE, - check_type TEXT NOT NULL, - status TEXT NOT NULL, - message TEXT, - details TEXT, - checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); -CREATE INDEX IF NOT EXISTS idx_source_health_source ON source_health_checks(source_id); - -CREATE TABLE IF NOT EXISTS source_suggestions ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - suggestion_type TEXT NOT NULL, - title TEXT NOT NULL, - description TEXT, - source_id INTEGER REFERENCES sources(id) ON DELETE SET NULL, - suggested_data TEXT, - priority TEXT DEFAULT 'medium', - status TEXT DEFAULT 'pending', - reviewed_at TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); -CREATE TABLE IF NOT EXISTS user_excluded_domains ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, - domain TEXT NOT NULL, - notes TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(user_id, domain) -); -""" - - -async def get_db() -> aiosqlite.Connection: - """Erstellt eine neue Datenbankverbindung.""" - os.makedirs(DATA_DIR, exist_ok=True) - db = await aiosqlite.connect(DB_PATH) - db.row_factory = aiosqlite.Row - await db.execute("PRAGMA journal_mode=WAL") - await db.execute("PRAGMA foreign_keys=ON") - await db.execute("PRAGMA busy_timeout=5000") - return db - - -async def init_db(): - """Initialisiert die Datenbank mit dem Schema.""" - db = await get_db() - try: - await db.executescript(SCHEMA) - await db.commit() - - # --- Migrationen fuer bestehende Datenbanken --- - - # Incidents-Spalten pruefen - cursor = await db.execute("PRAGMA table_info(incidents)") - columns = [row[1] for row in await cursor.fetchall()] - - if "type" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN type TEXT DEFAULT 'adhoc'") - await db.commit() - - if "sources_json" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN sources_json TEXT") - await db.commit() - - if "international_sources" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN international_sources INTEGER DEFAULT 1") - await db.commit() - - if "visibility" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN visibility TEXT DEFAULT 'public'") - await db.commit() - - if "include_telegram" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN include_telegram INTEGER DEFAULT 0") - await db.commit() - logger.info("Migration: include_telegram zu incidents hinzugefuegt") - - if "telegram_categories" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN telegram_categories TEXT DEFAULT NULL") - await db.commit() - logger.info("Migration: telegram_categories zu incidents hinzugefuegt") - - - if "tenant_id" not in columns: - await db.execute("ALTER TABLE incidents ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - logger.info("Migration: tenant_id zu incidents hinzugefuegt") - - # Migration: Token-Spalten fuer refresh_log - cursor = await db.execute("PRAGMA table_info(refresh_log)") - rl_columns = [row[1] for row in await cursor.fetchall()] - if "input_tokens" not in rl_columns: - await db.execute("ALTER TABLE refresh_log ADD COLUMN input_tokens INTEGER DEFAULT 0") - await db.execute("ALTER TABLE refresh_log ADD COLUMN output_tokens INTEGER DEFAULT 0") - await db.execute("ALTER TABLE refresh_log ADD COLUMN cache_creation_tokens INTEGER DEFAULT 0") - await db.execute("ALTER TABLE refresh_log ADD COLUMN cache_read_tokens INTEGER DEFAULT 0") - await db.execute("ALTER TABLE refresh_log ADD COLUMN total_cost_usd REAL DEFAULT 0.0") - await db.execute("ALTER TABLE refresh_log ADD COLUMN api_calls INTEGER DEFAULT 0") - await db.commit() - - if "trigger_type" not in rl_columns: - await db.execute("ALTER TABLE refresh_log ADD COLUMN trigger_type TEXT DEFAULT 'manual'") - await db.commit() - - if "retry_count" not in rl_columns: - await db.execute("ALTER TABLE refresh_log ADD COLUMN retry_count INTEGER DEFAULT 0") - await db.execute("ALTER TABLE refresh_log ADD COLUMN error_message TEXT") - await db.commit() - - if "tenant_id" not in rl_columns: - await db.execute("ALTER TABLE refresh_log ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - - # Migration: notifications-Tabelle (fuer bestehende DBs) - cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='notifications'") - if not await cursor.fetchone(): - await db.executescript(""" - CREATE TABLE IF NOT EXISTS notifications ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - type TEXT NOT NULL DEFAULT 'refresh_summary', - title TEXT NOT NULL, - text TEXT NOT NULL, - icon TEXT DEFAULT 'info', - is_read INTEGER DEFAULT 0, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - tenant_id INTEGER REFERENCES organizations(id) - ); - CREATE INDEX IF NOT EXISTS idx_notifications_user_read ON notifications(user_id, is_read); - """) - await db.commit() - - # Migration: incident_subscriptions-Tabelle (fuer bestehende DBs) - cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='incident_subscriptions'") - if not await cursor.fetchone(): - await db.executescript(""" - CREATE TABLE IF NOT EXISTS incident_subscriptions ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, - incident_id INTEGER NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, - notify_email_summary INTEGER DEFAULT 0, - notify_email_new_articles INTEGER DEFAULT 0, - notify_email_status_change INTEGER DEFAULT 0, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(user_id, incident_id) - ); - """) - await db.commit() - logger.info("Migration: incident_subscriptions-Tabelle erstellt") - else: - # Migration: Spalte umbenennen contradiction -> new_articles - cursor = await db.execute("PRAGMA table_info(incident_subscriptions)") - sub_columns = [row[1] for row in await cursor.fetchall()] - if "notify_email_contradiction" in sub_columns: - await db.execute("ALTER TABLE incident_subscriptions RENAME COLUMN notify_email_contradiction TO notify_email_new_articles") - await db.commit() - logger.info("Migration: notify_email_contradiction -> notify_email_new_articles umbenannt") - - # Migration: role-Spalte fuer users - cursor = await db.execute("PRAGMA table_info(users)") - user_columns = [row[1] for row in await cursor.fetchall()] - if "role" not in user_columns: - await db.execute("ALTER TABLE users ADD COLUMN role TEXT DEFAULT 'member'") - await db.execute("UPDATE users SET role = 'org_admin'") - await db.commit() - logger.info("Migration: role-Spalte zu users hinzugefuegt") - - # Migration: email, organization_id, is_active, last_login_at fuer users - if "email" not in user_columns: - await db.execute("ALTER TABLE users ADD COLUMN email TEXT") - await db.commit() - logger.info("Migration: email zu users hinzugefuegt") - - if "organization_id" not in user_columns: - await db.execute("ALTER TABLE users ADD COLUMN organization_id INTEGER REFERENCES organizations(id)") - await db.commit() - logger.info("Migration: organization_id zu users hinzugefuegt") - - # Index erst nach Spalten-Migration erstellen - try: - await db.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_users_org_username ON users(organization_id, username)") - await db.commit() - except Exception: - pass # Index existiert bereits oder Spalte fehlt noch - - if "is_active" not in user_columns: - await db.execute("ALTER TABLE users ADD COLUMN is_active INTEGER DEFAULT 1") - await db.commit() - - if "last_login_at" not in user_columns: - await db.execute("ALTER TABLE users ADD COLUMN last_login_at TIMESTAMP") - await db.commit() - - # Migration: tenant_id fuer articles - cursor = await db.execute("PRAGMA table_info(articles)") - art_columns = [row[1] for row in await cursor.fetchall()] - if "tenant_id" not in art_columns: - await db.execute("ALTER TABLE articles ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - - # Migration: tenant_id fuer fact_checks - cursor = await db.execute("PRAGMA table_info(fact_checks)") - fc_columns = [row[1] for row in await cursor.fetchall()] - if "tenant_id" not in fc_columns: - await db.execute("ALTER TABLE fact_checks ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - - # Migration: status_history fuer fact_checks (Faktencheck-Verlauf) - if "status_history" not in fc_columns: - await db.execute("ALTER TABLE fact_checks ADD COLUMN status_history TEXT DEFAULT '[]'") - # Bestehende Eintraege initialisieren - cursor2 = await db.execute("SELECT id, status, checked_at FROM fact_checks") - for row2 in await cursor2.fetchall(): - import json as _json - initial_history = _json.dumps([{"status": row2[1], "at": str(row2[2])}]) - await db.execute("UPDATE fact_checks SET status_history = ? WHERE id = ?", (initial_history, row2[0])) - await db.commit() - logger.info("Migration: status_history zu fact_checks hinzugefuegt") - - # Migration: category fuer article_locations (Marker-Klassifizierung) - cursor = await db.execute("PRAGMA table_info(article_locations)") - al_columns = [row[1] for row in await cursor.fetchall()] - if "category" not in al_columns: - await db.execute("ALTER TABLE article_locations ADD COLUMN category TEXT DEFAULT 'mentioned'") - await db.commit() - logger.info("Migration: category zu article_locations hinzugefuegt") - - # Migration: tenant_id fuer incident_snapshots - cursor = await db.execute("PRAGMA table_info(incident_snapshots)") - snap_columns2 = [row[1] for row in await cursor.fetchall()] - if "tenant_id" not in snap_columns2: - await db.execute("ALTER TABLE incident_snapshots ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - - # Migration: tenant_id fuer sources - cursor = await db.execute("PRAGMA table_info(sources)") - src_columns = [row[1] for row in await cursor.fetchall()] - if "tenant_id" not in src_columns: - await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - - # Migration: tenant_id fuer notifications - cursor = await db.execute("PRAGMA table_info(notifications)") - notif_columns = [row[1] for row in await cursor.fetchall()] - if "tenant_id" not in notif_columns: - await db.execute("ALTER TABLE notifications ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") - await db.commit() - - # Indexes erstellen (nach Spalten-Migrationen) - for idx_sql in [ - "CREATE INDEX IF NOT EXISTS idx_incidents_tenant_status ON incidents(tenant_id, status)", - "CREATE INDEX IF NOT EXISTS idx_articles_tenant_incident ON articles(tenant_id, incident_id)", - ]: - try: - await db.execute(idx_sql) - await db.commit() - except Exception: - pass - - # Migration: article_locations-Tabelle (fuer bestehende DBs) - cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='article_locations'") - if not await cursor.fetchone(): - await db.executescript(""" - CREATE TABLE IF NOT EXISTS article_locations ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE, - incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, - location_name TEXT NOT NULL, - location_name_normalized TEXT, - country_code TEXT, - latitude REAL NOT NULL, - longitude REAL NOT NULL, - confidence REAL DEFAULT 0.0, - source_text TEXT, - tenant_id INTEGER REFERENCES organizations(id) - ); - CREATE INDEX IF NOT EXISTS idx_article_locations_incident ON article_locations(incident_id); - CREATE INDEX IF NOT EXISTS idx_article_locations_article ON article_locations(article_id); - """) - await db.commit() - logger.info("Migration: article_locations-Tabelle erstellt") - - # Verwaiste running-Eintraege beim Start als error markieren (aelter als 15 Min) - await db.execute( - """UPDATE refresh_log SET status = 'error', error_message = 'Verwaist beim Neustart', - completed_at = CURRENT_TIMESTAMP - WHERE status = 'running' - AND started_at < datetime('now', '-15 minutes')""" - ) - await db.commit() - - # Sources-Tabelle seeden (nur wenn leer) - cursor = await db.execute("SELECT COUNT(*) as cnt FROM sources") - row = await cursor.fetchone() - if row["cnt"] == 0: - await _seed_sources(db) - - finally: - await db.close() - - -async def _seed_sources(db: aiosqlite.Connection): - """Befuellt die sources-Tabelle aus der config.py-Konfiguration.""" - from config import RSS_FEEDS, EXCLUDED_SOURCES - - category_map = { - "tagesschau": "oeffentlich-rechtlich", - "ZDF heute": "oeffentlich-rechtlich", - "Deutsche Welle": "oeffentlich-rechtlich", - "Spiegel": "qualitaetszeitung", - "Zeit": "qualitaetszeitung", - "FAZ": "qualitaetszeitung", - "Süddeutsche": "qualitaetszeitung", - "NZZ": "qualitaetszeitung", - "Reuters": "nachrichtenagentur", - "AP News": "nachrichtenagentur", - "BBC World": "international", - "Al Jazeera": "international", - "France24": "international", - "BMI": "behoerde", - "Europol": "behoerde", - } - - for _rss_category, feeds in RSS_FEEDS.items(): - for feed in feeds: - name = feed["name"] - url = feed["url"] - try: - from urllib.parse import urlparse - domain = urlparse(url).netloc.lower().replace("www.", "") - except Exception: - domain = "" - - category = category_map.get(name, "sonstige") - await db.execute( - """INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id) - VALUES (?, ?, ?, 'rss_feed', ?, 'active', 'system', NULL)""", - (name, url, domain, category), - ) - - for excl in EXCLUDED_SOURCES: - await db.execute( - """INSERT INTO sources (name, domain, source_type, category, status, added_by, tenant_id) - VALUES (?, ?, 'excluded', 'sonstige', 'active', 'system', NULL)""", - (excl, excl), - ) - - await db.commit() - await refresh_source_counts(db) - - logger.info(f"Sources-Tabelle geseeded: {len(RSS_FEEDS.get('deutsch', []))+len(RSS_FEEDS.get('international', []))+len(RSS_FEEDS.get('behoerden', []))} RSS-Feeds, {len(EXCLUDED_SOURCES)} ausgeschlossene Quellen") - - -async def refresh_source_counts(db: aiosqlite.Connection): - """Berechnet Artikelzaehler und last_seen_at fuer alle Quellen neu.""" - cursor = await db.execute("SELECT id, name, domain FROM sources WHERE source_type != 'excluded'") - sources = await cursor.fetchall() - - for source in sources: - sid = source["id"] - name = source["name"] - domain = source["domain"] or "" - - if domain: - cursor = await db.execute( - """SELECT COUNT(*) as cnt, MAX(collected_at) as last_seen - FROM articles WHERE source = ? OR source_url LIKE ?""", - (name, f"%{domain}%"), - ) - else: - cursor = await db.execute( - "SELECT COUNT(*) as cnt, MAX(collected_at) as last_seen FROM articles WHERE source = ?", - (name,), - ) - row = await cursor.fetchone() - await db.execute( - "UPDATE sources SET article_count = ?, last_seen_at = ? WHERE id = ?", - (row["cnt"], row["last_seen"], sid), - ) - - await db.commit() - - -async def db_dependency(): - """FastAPI Dependency fuer Datenbankverbindungen.""" - db = await get_db() - try: - yield db - finally: - await db.close() +"""SQLite Datenbank-Setup und Zugriff.""" +import aiosqlite +import logging +import os +from config import DB_PATH, DATA_DIR + +logger = logging.getLogger("osint.database") + +SCHEMA = """ +CREATE TABLE IF NOT EXISTS organizations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + slug TEXT UNIQUE NOT NULL, + is_active INTEGER DEFAULT 1, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS licenses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + organization_id INTEGER NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + license_type TEXT NOT NULL DEFAULT 'trial', + max_users INTEGER NOT NULL DEFAULT 5, + valid_from TIMESTAMP NOT NULL, + valid_until TIMESTAMP, + status TEXT NOT NULL DEFAULT 'active', + notes TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + email TEXT UNIQUE NOT NULL, + username TEXT NOT NULL, + password_hash TEXT, + organization_id INTEGER NOT NULL REFERENCES organizations(id), + role TEXT NOT NULL DEFAULT 'member', + is_active INTEGER DEFAULT 1, + last_login_at TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +CREATE TABLE IF NOT EXISTS magic_links ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + email TEXT NOT NULL, + token TEXT UNIQUE NOT NULL, + code TEXT NOT NULL, + purpose TEXT NOT NULL DEFAULT 'login', + user_id INTEGER REFERENCES users(id), + is_used INTEGER DEFAULT 0, + expires_at TIMESTAMP NOT NULL, + ip_address TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Hinweis: portal_admins wird von der Verwaltungs-App (Admin-Portal) genutzt, die dieselbe DB teilt. +CREATE TABLE IF NOT EXISTS portal_admins ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT UNIQUE NOT NULL, + password_hash TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS incidents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + description TEXT, + status TEXT DEFAULT 'active', + type TEXT DEFAULT 'adhoc', + refresh_mode TEXT DEFAULT 'manual', + refresh_interval INTEGER DEFAULT 15, + retention_days INTEGER DEFAULT 0, + visibility TEXT DEFAULT 'public', + summary TEXT, + sources_json TEXT, + international_sources INTEGER DEFAULT 1, + category_labels TEXT, + tenant_id INTEGER REFERENCES organizations(id), + created_by INTEGER REFERENCES users(id), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +CREATE TABLE IF NOT EXISTS articles ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + headline TEXT NOT NULL, + headline_de TEXT, + source TEXT NOT NULL, + source_url TEXT, + content_original TEXT, + content_de TEXT, + language TEXT DEFAULT 'de', + published_at TIMESTAMP, + collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + verification_status TEXT DEFAULT 'unverified', + tenant_id INTEGER REFERENCES organizations(id) +); +CREATE TABLE IF NOT EXISTS fact_checks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + claim TEXT NOT NULL, + status TEXT DEFAULT 'developing', + sources_count INTEGER DEFAULT 0, + evidence TEXT, + is_notification INTEGER DEFAULT 0, + checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + tenant_id INTEGER REFERENCES organizations(id) +); + +CREATE TABLE IF NOT EXISTS refresh_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP, + articles_found INTEGER DEFAULT 0, + status TEXT DEFAULT 'running', + tenant_id INTEGER REFERENCES organizations(id) +); + +CREATE TABLE IF NOT EXISTS incident_snapshots ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + summary TEXT, + sources_json TEXT, + article_count INTEGER DEFAULT 0, + fact_check_count INTEGER DEFAULT 0, + refresh_log_id INTEGER REFERENCES refresh_log(id), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + tenant_id INTEGER REFERENCES organizations(id) +); + +CREATE TABLE IF NOT EXISTS sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + url TEXT, + domain TEXT, + source_type TEXT NOT NULL DEFAULT 'rss_feed', + category TEXT NOT NULL DEFAULT 'sonstige', + status TEXT NOT NULL DEFAULT 'active', + notes TEXT, + added_by TEXT, + article_count INTEGER DEFAULT 0, + last_seen_at TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + tenant_id INTEGER REFERENCES organizations(id) +); + +CREATE TABLE IF NOT EXISTS notifications ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + type TEXT NOT NULL DEFAULT 'refresh_summary', + title TEXT NOT NULL, + text TEXT NOT NULL, + icon TEXT DEFAULT 'info', + is_read INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + tenant_id INTEGER REFERENCES organizations(id) +); +CREATE INDEX IF NOT EXISTS idx_notifications_user_read ON notifications(user_id, is_read); + +CREATE TABLE IF NOT EXISTS incident_subscriptions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + incident_id INTEGER NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, + notify_email_summary INTEGER DEFAULT 0, + notify_email_new_articles INTEGER DEFAULT 0, + notify_email_status_change INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, incident_id) +); + +CREATE TABLE IF NOT EXISTS article_locations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + location_name TEXT NOT NULL, + location_name_normalized TEXT, + country_code TEXT, + latitude REAL NOT NULL, + longitude REAL NOT NULL, + confidence REAL DEFAULT 0.0, + source_text TEXT, + tenant_id INTEGER REFERENCES organizations(id) +); +CREATE INDEX IF NOT EXISTS idx_article_locations_incident ON article_locations(incident_id); +CREATE INDEX IF NOT EXISTS idx_article_locations_article ON article_locations(article_id); + + +CREATE TABLE IF NOT EXISTS source_health_checks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE, + check_type TEXT NOT NULL, + status TEXT NOT NULL, + message TEXT, + details TEXT, + checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +CREATE INDEX IF NOT EXISTS idx_source_health_source ON source_health_checks(source_id); + +CREATE TABLE IF NOT EXISTS source_suggestions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + suggestion_type TEXT NOT NULL, + title TEXT NOT NULL, + description TEXT, + source_id INTEGER REFERENCES sources(id) ON DELETE SET NULL, + suggested_data TEXT, + priority TEXT DEFAULT 'medium', + status TEXT DEFAULT 'pending', + reviewed_at TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +CREATE TABLE IF NOT EXISTS user_excluded_domains ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + domain TEXT NOT NULL, + notes TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, domain) +); +""" + + +async def get_db() -> aiosqlite.Connection: + """Erstellt eine neue Datenbankverbindung.""" + os.makedirs(DATA_DIR, exist_ok=True) + db = await aiosqlite.connect(DB_PATH) + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA foreign_keys=ON") + await db.execute("PRAGMA busy_timeout=5000") + return db + + +async def init_db(): + """Initialisiert die Datenbank mit dem Schema.""" + db = await get_db() + try: + await db.executescript(SCHEMA) + await db.commit() + + # --- Migrationen fuer bestehende Datenbanken --- + + # Incidents-Spalten pruefen + cursor = await db.execute("PRAGMA table_info(incidents)") + columns = [row[1] for row in await cursor.fetchall()] + + if "type" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN type TEXT DEFAULT 'adhoc'") + await db.commit() + + if "sources_json" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN sources_json TEXT") + await db.commit() + + if "international_sources" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN international_sources INTEGER DEFAULT 1") + await db.commit() + + if "visibility" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN visibility TEXT DEFAULT 'public'") + await db.commit() + + if "include_telegram" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN include_telegram INTEGER DEFAULT 0") + await db.commit() + logger.info("Migration: include_telegram zu incidents hinzugefuegt") + + if "telegram_categories" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN telegram_categories TEXT DEFAULT NULL") + await db.commit() + logger.info("Migration: telegram_categories zu incidents hinzugefuegt") + + if "category_labels" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN category_labels TEXT") + await db.commit() + logger.info("Migration: category_labels zu incidents hinzugefuegt") + + if "tenant_id" not in columns: + await db.execute("ALTER TABLE incidents ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + logger.info("Migration: tenant_id zu incidents hinzugefuegt") + + # Migration: Token-Spalten fuer refresh_log + cursor = await db.execute("PRAGMA table_info(refresh_log)") + rl_columns = [row[1] for row in await cursor.fetchall()] + if "input_tokens" not in rl_columns: + await db.execute("ALTER TABLE refresh_log ADD COLUMN input_tokens INTEGER DEFAULT 0") + await db.execute("ALTER TABLE refresh_log ADD COLUMN output_tokens INTEGER DEFAULT 0") + await db.execute("ALTER TABLE refresh_log ADD COLUMN cache_creation_tokens INTEGER DEFAULT 0") + await db.execute("ALTER TABLE refresh_log ADD COLUMN cache_read_tokens INTEGER DEFAULT 0") + await db.execute("ALTER TABLE refresh_log ADD COLUMN total_cost_usd REAL DEFAULT 0.0") + await db.execute("ALTER TABLE refresh_log ADD COLUMN api_calls INTEGER DEFAULT 0") + await db.commit() + + if "trigger_type" not in rl_columns: + await db.execute("ALTER TABLE refresh_log ADD COLUMN trigger_type TEXT DEFAULT 'manual'") + await db.commit() + + if "retry_count" not in rl_columns: + await db.execute("ALTER TABLE refresh_log ADD COLUMN retry_count INTEGER DEFAULT 0") + await db.execute("ALTER TABLE refresh_log ADD COLUMN error_message TEXT") + await db.commit() + + if "tenant_id" not in rl_columns: + await db.execute("ALTER TABLE refresh_log ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + + # Migration: notifications-Tabelle (fuer bestehende DBs) + cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='notifications'") + if not await cursor.fetchone(): + await db.executescript(""" + CREATE TABLE IF NOT EXISTS notifications ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + type TEXT NOT NULL DEFAULT 'refresh_summary', + title TEXT NOT NULL, + text TEXT NOT NULL, + icon TEXT DEFAULT 'info', + is_read INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + tenant_id INTEGER REFERENCES organizations(id) + ); + CREATE INDEX IF NOT EXISTS idx_notifications_user_read ON notifications(user_id, is_read); + """) + await db.commit() + + # Migration: incident_subscriptions-Tabelle (fuer bestehende DBs) + cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='incident_subscriptions'") + if not await cursor.fetchone(): + await db.executescript(""" + CREATE TABLE IF NOT EXISTS incident_subscriptions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + incident_id INTEGER NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, + notify_email_summary INTEGER DEFAULT 0, + notify_email_new_articles INTEGER DEFAULT 0, + notify_email_status_change INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, incident_id) + ); + """) + await db.commit() + logger.info("Migration: incident_subscriptions-Tabelle erstellt") + else: + # Migration: Spalte umbenennen contradiction -> new_articles + cursor = await db.execute("PRAGMA table_info(incident_subscriptions)") + sub_columns = [row[1] for row in await cursor.fetchall()] + if "notify_email_contradiction" in sub_columns: + await db.execute("ALTER TABLE incident_subscriptions RENAME COLUMN notify_email_contradiction TO notify_email_new_articles") + await db.commit() + logger.info("Migration: notify_email_contradiction -> notify_email_new_articles umbenannt") + + # Migration: role-Spalte fuer users + cursor = await db.execute("PRAGMA table_info(users)") + user_columns = [row[1] for row in await cursor.fetchall()] + if "role" not in user_columns: + await db.execute("ALTER TABLE users ADD COLUMN role TEXT DEFAULT 'member'") + await db.execute("UPDATE users SET role = 'org_admin'") + await db.commit() + logger.info("Migration: role-Spalte zu users hinzugefuegt") + + # Migration: email, organization_id, is_active, last_login_at fuer users + if "email" not in user_columns: + await db.execute("ALTER TABLE users ADD COLUMN email TEXT") + await db.commit() + logger.info("Migration: email zu users hinzugefuegt") + + if "organization_id" not in user_columns: + await db.execute("ALTER TABLE users ADD COLUMN organization_id INTEGER REFERENCES organizations(id)") + await db.commit() + logger.info("Migration: organization_id zu users hinzugefuegt") + + # Index erst nach Spalten-Migration erstellen + try: + await db.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_users_org_username ON users(organization_id, username)") + await db.commit() + except Exception: + pass # Index existiert bereits oder Spalte fehlt noch + + if "is_active" not in user_columns: + await db.execute("ALTER TABLE users ADD COLUMN is_active INTEGER DEFAULT 1") + await db.commit() + + if "last_login_at" not in user_columns: + await db.execute("ALTER TABLE users ADD COLUMN last_login_at TIMESTAMP") + await db.commit() + + # Migration: tenant_id fuer articles + cursor = await db.execute("PRAGMA table_info(articles)") + art_columns = [row[1] for row in await cursor.fetchall()] + if "tenant_id" not in art_columns: + await db.execute("ALTER TABLE articles ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + + # Migration: tenant_id fuer fact_checks + cursor = await db.execute("PRAGMA table_info(fact_checks)") + fc_columns = [row[1] for row in await cursor.fetchall()] + if "tenant_id" not in fc_columns: + await db.execute("ALTER TABLE fact_checks ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + + # Migration: status_history fuer fact_checks (Faktencheck-Verlauf) + if "status_history" not in fc_columns: + await db.execute("ALTER TABLE fact_checks ADD COLUMN status_history TEXT DEFAULT '[]'") + # Bestehende Eintraege initialisieren + cursor2 = await db.execute("SELECT id, status, checked_at FROM fact_checks") + for row2 in await cursor2.fetchall(): + import json as _json + initial_history = _json.dumps([{"status": row2[1], "at": str(row2[2])}]) + await db.execute("UPDATE fact_checks SET status_history = ? WHERE id = ?", (initial_history, row2[0])) + await db.commit() + logger.info("Migration: status_history zu fact_checks hinzugefuegt") + + # Migration: category fuer article_locations (Marker-Klassifizierung) + cursor = await db.execute("PRAGMA table_info(article_locations)") + al_columns = [row[1] for row in await cursor.fetchall()] + if "category" not in al_columns: + await db.execute("ALTER TABLE article_locations ADD COLUMN category TEXT DEFAULT 'mentioned'") + await db.commit() + logger.info("Migration: category zu article_locations hinzugefuegt") + + # Migration: Alte Kategorie-Werte auf neue Keys umbenennen + try: + await db.execute( + "UPDATE article_locations SET category = 'primary' WHERE category = 'target'" + ) + await db.execute( + "UPDATE article_locations SET category = 'secondary' WHERE category IN ('response', 'retaliation')" + ) + await db.execute( + "UPDATE article_locations SET category = 'tertiary' WHERE category IN ('actor', 'context')" + ) + changed = db.total_changes + await db.commit() + if changed > 0: + logger.info("Migration: article_locations Kategorien umbenannt (target->primary, response/retaliation->secondary, actor->tertiary)") + except Exception: + pass # Bereits migriert oder keine Daten + + # Migration: tenant_id fuer incident_snapshots + cursor = await db.execute("PRAGMA table_info(incident_snapshots)") + snap_columns2 = [row[1] for row in await cursor.fetchall()] + if "tenant_id" not in snap_columns2: + await db.execute("ALTER TABLE incident_snapshots ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + + # Migration: tenant_id fuer sources + cursor = await db.execute("PRAGMA table_info(sources)") + src_columns = [row[1] for row in await cursor.fetchall()] + if "tenant_id" not in src_columns: + await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + + # Migration: tenant_id fuer notifications + cursor = await db.execute("PRAGMA table_info(notifications)") + notif_columns = [row[1] for row in await cursor.fetchall()] + if "tenant_id" not in notif_columns: + await db.execute("ALTER TABLE notifications ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") + await db.commit() + + # Indexes erstellen (nach Spalten-Migrationen) + for idx_sql in [ + "CREATE INDEX IF NOT EXISTS idx_incidents_tenant_status ON incidents(tenant_id, status)", + "CREATE INDEX IF NOT EXISTS idx_articles_tenant_incident ON articles(tenant_id, incident_id)", + ]: + try: + await db.execute(idx_sql) + await db.commit() + except Exception: + pass + + # Migration: article_locations-Tabelle (fuer bestehende DBs) + cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='article_locations'") + if not await cursor.fetchone(): + await db.executescript(""" + CREATE TABLE IF NOT EXISTS article_locations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE, + incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE, + location_name TEXT NOT NULL, + location_name_normalized TEXT, + country_code TEXT, + latitude REAL NOT NULL, + longitude REAL NOT NULL, + confidence REAL DEFAULT 0.0, + source_text TEXT, + tenant_id INTEGER REFERENCES organizations(id) + ); + CREATE INDEX IF NOT EXISTS idx_article_locations_incident ON article_locations(incident_id); + CREATE INDEX IF NOT EXISTS idx_article_locations_article ON article_locations(article_id); + """) + await db.commit() + logger.info("Migration: article_locations-Tabelle erstellt") + + # Verwaiste running-Eintraege beim Start als error markieren (aelter als 15 Min) + await db.execute( + """UPDATE refresh_log SET status = 'error', error_message = 'Verwaist beim Neustart', + completed_at = CURRENT_TIMESTAMP + WHERE status = 'running' + AND started_at < datetime('now', '-15 minutes')""" + ) + await db.commit() + + # Sources-Tabelle seeden (nur wenn leer) + cursor = await db.execute("SELECT COUNT(*) as cnt FROM sources") + row = await cursor.fetchone() + if row["cnt"] == 0: + await _seed_sources(db) + + finally: + await db.close() + + +async def _seed_sources(db: aiosqlite.Connection): + """Befuellt die sources-Tabelle aus der config.py-Konfiguration.""" + from config import RSS_FEEDS, EXCLUDED_SOURCES + + category_map = { + "tagesschau": "oeffentlich-rechtlich", + "ZDF heute": "oeffentlich-rechtlich", + "Deutsche Welle": "oeffentlich-rechtlich", + "Spiegel": "qualitaetszeitung", + "Zeit": "qualitaetszeitung", + "FAZ": "qualitaetszeitung", + "Süddeutsche": "qualitaetszeitung", + "NZZ": "qualitaetszeitung", + "Reuters": "nachrichtenagentur", + "AP News": "nachrichtenagentur", + "BBC World": "international", + "Al Jazeera": "international", + "France24": "international", + "BMI": "behoerde", + "Europol": "behoerde", + } + + for _rss_category, feeds in RSS_FEEDS.items(): + for feed in feeds: + name = feed["name"] + url = feed["url"] + try: + from urllib.parse import urlparse + domain = urlparse(url).netloc.lower().replace("www.", "") + except Exception: + domain = "" + + category = category_map.get(name, "sonstige") + await db.execute( + """INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id) + VALUES (?, ?, ?, 'rss_feed', ?, 'active', 'system', NULL)""", + (name, url, domain, category), + ) + + for excl in EXCLUDED_SOURCES: + await db.execute( + """INSERT INTO sources (name, domain, source_type, category, status, added_by, tenant_id) + VALUES (?, ?, 'excluded', 'sonstige', 'active', 'system', NULL)""", + (excl, excl), + ) + + await db.commit() + await refresh_source_counts(db) + + logger.info(f"Sources-Tabelle geseeded: {len(RSS_FEEDS.get('deutsch', []))+len(RSS_FEEDS.get('international', []))+len(RSS_FEEDS.get('behoerden', []))} RSS-Feeds, {len(EXCLUDED_SOURCES)} ausgeschlossene Quellen") + + +async def refresh_source_counts(db: aiosqlite.Connection): + """Berechnet Artikelzaehler und last_seen_at fuer alle Quellen neu.""" + cursor = await db.execute("SELECT id, name, domain FROM sources WHERE source_type != 'excluded'") + sources = await cursor.fetchall() + + for source in sources: + sid = source["id"] + name = source["name"] + domain = source["domain"] or "" + + if domain: + cursor = await db.execute( + """SELECT COUNT(*) as cnt, MAX(collected_at) as last_seen + FROM articles WHERE source = ? OR source_url LIKE ?""", + (name, f"%{domain}%"), + ) + else: + cursor = await db.execute( + "SELECT COUNT(*) as cnt, MAX(collected_at) as last_seen FROM articles WHERE source = ?", + (name,), + ) + row = await cursor.fetchone() + await db.execute( + "UPDATE sources SET article_count = ?, last_seen_at = ? WHERE id = ?", + (row["cnt"], row["last_seen"], sid), + ) + + await db.commit() + + +async def db_dependency(): + """FastAPI Dependency fuer Datenbankverbindungen.""" + db = await get_db() + try: + yield db + finally: + await db.close() diff --git a/src/routers/incidents.py b/src/routers/incidents.py index daef06f..32a81c8 100644 --- a/src/routers/incidents.py +++ b/src/routers/incidents.py @@ -338,8 +338,8 @@ async def get_locations( "source_url": row["source_url"], }) - # Dominanteste Kategorie pro Ort bestimmen (Prioritaet: target > retaliation > actor > mentioned) - priority = {"target": 4, "retaliation": 3, "actor": 2, "mentioned": 1} + # Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned) + priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1} result = [] for loc in loc_map.values(): cats = loc.pop("categories") @@ -349,7 +349,20 @@ async def get_locations( best_cat = "mentioned" loc["category"] = best_cat result.append(loc) - return result + + # Category-Labels aus Incident laden + cursor = await db.execute( + "SELECT category_labels FROM incidents WHERE id = ?", (incident_id,) + ) + inc_row = await cursor.fetchone() + category_labels = None + if inc_row and inc_row["category_labels"]: + try: + category_labels = json.loads(inc_row["category_labels"]) + except (json.JSONDecodeError, TypeError): + pass + + return {"category_labels": category_labels, "locations": result} # Geoparse-Status pro Incident (in-memory) @@ -395,8 +408,23 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None): processed = 0 for i in range(0, total, batch_size): batch = articles[i:i + batch_size] - geo_results = await geoparse_articles(batch, incident_context) - for art_id, locations in geo_results.items(): + geo_result = await geoparse_articles(batch, incident_context) + # Tuple-Rückgabe: (locations_dict, category_labels) + if isinstance(geo_result, tuple): + batch_geo_results, batch_labels = geo_result + # Labels beim ersten Batch speichern + if batch_labels and i == 0: + try: + await db.execute( + "UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL", + (json.dumps(batch_labels, ensure_ascii=False), incident_id), + ) + await db.commit() + except Exception: + pass + else: + batch_geo_results = geo_result + for art_id, locations in batch_geo_results.items(): for loc in locations: await db.execute( """INSERT INTO article_locations diff --git a/src/routers/public_api.py b/src/routers/public_api.py index 61a995d..d8c4427 100644 --- a/src/routers/public_api.py +++ b/src/routers/public_api.py @@ -64,6 +64,14 @@ async def get_lagebild(db=Depends(db_dependency)): raise HTTPException(status_code=404, detail="Incident not found") incident = dict(incident) + # Category-Labels laden + category_labels = None + if incident.get("category_labels"): + try: + category_labels = json.loads(incident["category_labels"]) + except (json.JSONDecodeError, TypeError): + pass + # Alle Artikel aus allen Iran-Incidents laden cursor = await db.execute( f"""SELECT id, headline, headline_de, source, source_url, language, @@ -148,6 +156,7 @@ async def get_lagebild(db=Depends(db_dependency)): "fact_checks": fact_checks, "available_snapshots": available_snapshots, "locations": locations, + "category_labels": category_labels, } diff --git a/src/services/post_refresh_qc.py b/src/services/post_refresh_qc.py index 83a7b90..58032d4 100644 --- a/src/services/post_refresh_qc.py +++ b/src/services/post_refresh_qc.py @@ -1,389 +1,415 @@ -"""Post-Refresh Quality Check via Haiku. - -Prueft nach jedem Refresh: -1. Semantische Faktencheck-Duplikate (Haiku-Clustering mit Fuzzy-Vorfilter) -2. Falsch kategorisierte Karten-Locations (Haiku bewertet Kontext der Lage) - -Regelbasierte Listen dienen als Fallback falls Haiku fehlschlaegt. -""" -import json -import logging -import re -from difflib import SequenceMatcher - -from agents.claude_client import call_claude -from config import CLAUDE_MODEL_FAST - -logger = logging.getLogger("osint.post_refresh_qc") - -STATUS_PRIORITY = { - "confirmed": 5, "established": 5, - "contradicted": 4, "disputed": 4, - "unconfirmed": 3, "unverified": 3, - "developing": 1, -} - -# --------------------------------------------------------------------------- -# 1. Faktencheck-Duplikate -# --------------------------------------------------------------------------- - -_DEDUP_PROMPT = """\ -Du bist ein Deduplizierungs-Agent fuer Faktenchecks eines OSINT-Monitors. - -LAGE: {incident_title} - -Unten stehen Faktenchecks (ID + Status + Claim). Finde Gruppen von Fakten, -die INHALTLICH DASSELBE aussagen, auch wenn sie unterschiedlich formuliert sind. - -REGELN: -- Gleicher Sachverhalt = gleiche Gruppe - (z.B. "Trump fordert Kapitulation" und "US-Praesident verlangt bedingungslose Aufgabe") -- Unterschiedliche Detailtiefe zum SELBEN Fakt = gleiche Gruppe -- VERSCHIEDENE Sachverhalte = VERSCHIEDENE Gruppen - (z.B. "Angriff auf Isfahan" vs "Angriff auf Teheran" sind NICHT dasselbe) -- Eine Gruppe muss mindestens 2 Eintraege haben - -Antworte NUR als JSON-Array von Gruppen. Jede Gruppe ist ein Array von IDs: -[[1,5,12], [3,8]] - -Wenn keine Duplikate: antworte mit [] - -FAKTEN: -{facts_text}""" - - -async def _haiku_find_duplicate_clusters( - facts: list[dict], incident_title: str -) -> list[list[int]]: - """Fragt Haiku welche Fakten semantische Duplikate sind.""" - facts_text = "\n".join( - f'ID={f["id"]} [{f["status"]}]: {f["claim"]}' - for f in facts - ) - prompt = _DEDUP_PROMPT.format( - incident_title=incident_title, facts_text=facts_text - ) - try: - result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) - data = json.loads(result) - if isinstance(data, list) and all(isinstance(g, list) for g in data): - return data - except json.JSONDecodeError: - match = re.search(r'\[.*\]', result, re.DOTALL) - if match: - try: - data = json.loads(match.group()) - if isinstance(data, list): - return data - except json.JSONDecodeError: - pass - except Exception as e: - logger.warning("Haiku Duplikat-Clustering fehlgeschlagen: %s", e) - return [] - - -def _fuzzy_prefilter(all_facts: list[dict], max_candidates: int = 80) -> list[dict]: - """Waehlt Kandidaten fuer Haiku-Check per Fuzzy-Vorfilter aus. - - Findet Paare mit Aehnlichkeit >= 0.60 und gibt die betroffenen Fakten zurueck. - Begrenzt auf max_candidates um Haiku-Tokens zu sparen. - """ - from agents.factchecker import normalize_claim, _keyword_set - - if len(all_facts) <= max_candidates: - return all_facts - - normalized = [] - for f in all_facts: - nc = normalize_claim(f["claim"]) - kw = _keyword_set(f["claim"]) - normalized.append((f, nc, kw)) - - candidate_ids = set() - recent = normalized[:60] - - for i, (fact_a, norm_a, kw_a) in enumerate(recent): - for j, (fact_b, norm_b, kw_b) in enumerate(normalized): - if i >= j or fact_b["id"] == fact_a["id"]: - continue - if not norm_a or not norm_b: - continue - - len_ratio = len(norm_a) / len(norm_b) if norm_b else 0 - if len_ratio > 2.5 or len_ratio < 0.4: - continue - - seq_ratio = SequenceMatcher(None, norm_a, norm_b).ratio() - kw_union = kw_a | kw_b - jaccard = len(kw_a & kw_b) / len(kw_union) if kw_union else 0.0 - combined = 0.7 * seq_ratio + 0.3 * jaccard - - if combined >= 0.60: - candidate_ids.add(fact_a["id"]) - candidate_ids.add(fact_b["id"]) - - if len(candidate_ids) >= max_candidates: - break - if len(candidate_ids) >= max_candidates: - break - - candidates = [f for f in all_facts if f["id"] in candidate_ids] - logger.info( - "Fuzzy-Vorfilter: %d/%d Fakten als Duplikat-Kandidaten identifiziert", - len(candidates), len(all_facts), - ) - return candidates - - -async def check_fact_duplicates(db, incident_id: int, incident_title: str) -> int: - """Prueft auf semantische Faktencheck-Duplikate via Haiku. - - 1. Fuzzy-Vorfilter reduziert auf relevante Kandidaten - 2. Haiku clustert semantische Duplikate - 3. Pro Cluster: behalte besten Fakt, loesche Rest - - Returns: Anzahl entfernter Duplikate. - """ - cursor = await db.execute( - "SELECT id, claim, status, sources_count, evidence, checked_at " - "FROM fact_checks WHERE incident_id = ? ORDER BY checked_at DESC", - (incident_id,), - ) - all_facts = [dict(row) for row in await cursor.fetchall()] - - if len(all_facts) < 2: - return 0 - - # Schritt 1: Fuzzy-Vorfilter - candidates = _fuzzy_prefilter(all_facts) - if len(candidates) < 2: - return 0 - - # Schritt 2: Haiku-Clustering (in Batches von max 80) - all_clusters = [] - batch_size = 80 - for i in range(0, len(candidates), batch_size): - batch = candidates[i:i + batch_size] - clusters = await _haiku_find_duplicate_clusters(batch, incident_title) - all_clusters.extend(clusters) - - if not all_clusters: - logger.info("QC Fakten: Haiku fand keine Duplikate") - return 0 - - # Schritt 3: Pro Cluster besten behalten, Rest loeschen - facts_by_id = {f["id"]: f for f in all_facts} - ids_to_delete = set() - - for cluster_ids in all_clusters: - valid_ids = [cid for cid in cluster_ids if cid in facts_by_id] - if len(valid_ids) <= 1: - continue - - cluster_facts = [facts_by_id[cid] for cid in valid_ids] - best = max(cluster_facts, key=lambda f: ( - STATUS_PRIORITY.get(f["status"], 0), - f.get("sources_count", 0), - f.get("checked_at", ""), - )) - - for fact in cluster_facts: - if fact["id"] != best["id"]: - ids_to_delete.add(fact["id"]) - logger.info( - "QC Duplikat: ID %d entfernt, behalte ID %d ('%s')", - fact["id"], best["id"], best["claim"][:60], - ) - - if ids_to_delete: - placeholders = ",".join("?" * len(ids_to_delete)) - await db.execute( - f"DELETE FROM fact_checks WHERE id IN ({placeholders})", - list(ids_to_delete), - ) - logger.info( - "QC: %d Faktencheck-Duplikate entfernt fuer Incident %d", - len(ids_to_delete), incident_id, - ) - - return len(ids_to_delete) - - -# --------------------------------------------------------------------------- -# 2. Karten-Location-Kategorien -# --------------------------------------------------------------------------- - -_LOCATION_PROMPT = """\ -Du bist ein Geopolitik-Experte fuer einen OSINT-Monitor. - -LAGE: {incident_title} -BESCHREIBUNG: {incident_desc} - -Unten stehen Orte, die auf der Karte als "target" (Angriffsziel) markiert sind. -Pruefe fuer jeden Ort, ob die Kategorie "target" korrekt ist. - -KATEGORIEN: -- target: Ort wurde tatsaechlich militaerisch angegriffen oder bombardiert -- actor: Ort gehoert zu einer Konfliktpartei (z.B. Hauptstadt des Angreifers) -- response: Ort reagiert auf den Konflikt (z.B. diplomatische Reaktion, Sanktionen) -- mentioned: Ort wird nur im Kontext erwaehnt (z.B. wirtschaftliche Auswirkungen) - -REGELN: -- Nur Orte die TATSAECHLICH physisch angegriffen/bombardiert wurden = "target" -- Hauptstaedte von Angreiferlaendern (z.B. Washington DC) = "actor" -- Laender die nur wirtschaftlich betroffen sind (z.B. steigende Oelpreise) = "mentioned" -- Laender die diplomatisch reagieren = "response" -- Im Zweifel: "mentioned" - -Antworte als JSON-Array mit Korrekturen. Nur Eintraege die GEAENDERT werden muessen: -[{{"id": 123, "category": "mentioned"}}, {{"id": 456, "category": "actor"}}] - -Wenn alle Kategorien korrekt sind: antworte mit [] - -ORTE (aktuell alle als "target" markiert): -{locations_text}""" - - -async def check_location_categories( - db, incident_id: int, incident_title: str, incident_desc: str -) -> int: - """Prueft Karten-Location-Kategorien via Haiku. - - Returns: Anzahl korrigierter Eintraege. - """ - cursor = await db.execute( - "SELECT id, location_name, latitude, longitude, category " - "FROM article_locations WHERE incident_id = ? AND category = 'target'", - (incident_id,), - ) - targets = [dict(row) for row in await cursor.fetchall()] - - if not targets: - return 0 - - # Dedupliziere nach location_name fuer den Prompt (spart Tokens) - unique_names = {} - ids_by_name = {} - for loc in targets: - name = loc["location_name"] - if name not in unique_names: - unique_names[name] = loc - ids_by_name[name] = [] - ids_by_name[name].append(loc["id"]) - - locations_text = "\n".join( - f'ID={loc["id"]} | {loc["location_name"]} ({loc["latitude"]:.2f}, {loc["longitude"]:.2f})' - for loc in unique_names.values() - ) - - prompt = _LOCATION_PROMPT.format( - incident_title=incident_title, - incident_desc=incident_desc[:500] if incident_desc else "(keine Beschreibung)", - locations_text=locations_text, - ) - - fixes = [] - try: - result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) - data = json.loads(result) - if isinstance(data, list): - fixes = data - except json.JSONDecodeError: - match = re.search(r'\[.*\]', result, re.DOTALL) - if match: - try: - data = json.loads(match.group()) - if isinstance(data, list): - fixes = data - except json.JSONDecodeError: - pass - except Exception as e: - logger.warning("Haiku Location-Check fehlgeschlagen: %s", e) - return 0 - - if not fixes: - logger.info("QC Locations: Haiku fand keine falschen Kategorien") - return 0 - - # Korrekturen anwenden (auch auf alle IDs mit gleichem Namen) - total_fixed = 0 - representative_ids = {loc["id"]: name for name, loc in unique_names.items()} - - for fix in fixes: - fix_id = fix.get("id") - new_cat = fix.get("category") - if not fix_id or not new_cat: - continue - if new_cat not in ("target", "actor", "response", "mentioned"): - continue - - # Finde den location_name fuer diese ID - loc_name = representative_ids.get(fix_id) - if not loc_name: - continue - - # Korrigiere ALLE Eintraege mit diesem Namen - all_ids = ids_by_name.get(loc_name, [fix_id]) - placeholders = ",".join("?" * len(all_ids)) - await db.execute( - f"UPDATE article_locations SET category = ? " - f"WHERE id IN ({placeholders}) AND category = 'target'", - [new_cat] + all_ids, - ) - total_fixed += len(all_ids) - logger.info( - "QC Location: '%s' (%d Eintraege): target -> %s", - loc_name, len(all_ids), new_cat, - ) - - if total_fixed > 0: - logger.info( - "QC: %d Karten-Location-Kategorien korrigiert fuer Incident %d", - total_fixed, incident_id, - ) - - return total_fixed - - -# --------------------------------------------------------------------------- -# 3. Hauptfunktion -# --------------------------------------------------------------------------- - -async def run_post_refresh_qc(db, incident_id: int) -> dict: - """Fuehrt den kompletten Post-Refresh Quality Check via Haiku durch. - - Returns: Dict mit Ergebnissen {facts_removed, locations_fixed}. - """ - try: - # Lage-Titel und Beschreibung laden - cursor = await db.execute( - "SELECT title, description FROM incidents WHERE id = ?", - (incident_id,), - ) - row = await cursor.fetchone() - if not row: - return {"facts_removed": 0, "locations_fixed": 0} - - incident_title = row["title"] or "" - incident_desc = row["description"] or "" - - facts_removed = await check_fact_duplicates(db, incident_id, incident_title) - locations_fixed = await check_location_categories( - db, incident_id, incident_title, incident_desc - ) - - if facts_removed > 0 or locations_fixed > 0: - await db.commit() - logger.info( - "Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert", - incident_id, facts_removed, locations_fixed, - ) - - return {"facts_removed": facts_removed, "locations_fixed": locations_fixed} - - except Exception as e: - logger.error( - "Post-Refresh QC Fehler fuer Incident %d: %s", - incident_id, e, exc_info=True, - ) - return {"facts_removed": 0, "locations_fixed": 0, "error": str(e)} +"""Post-Refresh Quality Check via Haiku. + +Prueft nach jedem Refresh: +1. Semantische Faktencheck-Duplikate (Haiku-Clustering mit Fuzzy-Vorfilter) +2. Falsch kategorisierte Karten-Locations (Haiku bewertet Kontext der Lage) + +Regelbasierte Listen dienen als Fallback falls Haiku fehlschlaegt. +""" +import json +import logging +import re +from difflib import SequenceMatcher + +from agents.claude_client import call_claude +from config import CLAUDE_MODEL_FAST + +logger = logging.getLogger("osint.post_refresh_qc") + +STATUS_PRIORITY = { + "confirmed": 5, "established": 5, + "contradicted": 4, "disputed": 4, + "unconfirmed": 3, "unverified": 3, + "developing": 1, +} + +# --------------------------------------------------------------------------- +# 1. Faktencheck-Duplikate +# --------------------------------------------------------------------------- + +_DEDUP_PROMPT = """\ +Du bist ein Deduplizierungs-Agent fuer Faktenchecks eines OSINT-Monitors. + +LAGE: {incident_title} + +Unten stehen Faktenchecks (ID + Status + Claim). Finde Gruppen von Fakten, +die INHALTLICH DASSELBE aussagen, auch wenn sie unterschiedlich formuliert sind. + +REGELN: +- Gleicher Sachverhalt = gleiche Gruppe + (z.B. "Trump fordert Kapitulation" und "US-Praesident verlangt bedingungslose Aufgabe") +- Unterschiedliche Detailtiefe zum SELBEN Fakt = gleiche Gruppe +- VERSCHIEDENE Sachverhalte = VERSCHIEDENE Gruppen + (z.B. "Angriff auf Isfahan" vs "Angriff auf Teheran" sind NICHT dasselbe) +- Eine Gruppe muss mindestens 2 Eintraege haben + +Antworte NUR als JSON-Array von Gruppen. Jede Gruppe ist ein Array von IDs: +[[1,5,12], [3,8]] + +Wenn keine Duplikate: antworte mit [] + +FAKTEN: +{facts_text}""" + + +async def _haiku_find_duplicate_clusters( + facts: list[dict], incident_title: str +) -> list[list[int]]: + """Fragt Haiku welche Fakten semantische Duplikate sind.""" + facts_text = "\n".join( + f'ID={f["id"]} [{f["status"]}]: {f["claim"]}' + for f in facts + ) + prompt = _DEDUP_PROMPT.format( + incident_title=incident_title, facts_text=facts_text + ) + try: + result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + data = json.loads(result) + if isinstance(data, list) and all(isinstance(g, list) for g in data): + return data + except json.JSONDecodeError: + match = re.search(r'\[.*\]', result, re.DOTALL) + if match: + try: + data = json.loads(match.group()) + if isinstance(data, list): + return data + except json.JSONDecodeError: + pass + except Exception as e: + logger.warning("Haiku Duplikat-Clustering fehlgeschlagen: %s", e) + return [] + + +def _fuzzy_prefilter(all_facts: list[dict], max_candidates: int = 80) -> list[dict]: + """Waehlt Kandidaten fuer Haiku-Check per Fuzzy-Vorfilter aus. + + Findet Paare mit Aehnlichkeit >= 0.60 und gibt die betroffenen Fakten zurueck. + Begrenzt auf max_candidates um Haiku-Tokens zu sparen. + """ + from agents.factchecker import normalize_claim, _keyword_set + + if len(all_facts) <= max_candidates: + return all_facts + + normalized = [] + for f in all_facts: + nc = normalize_claim(f["claim"]) + kw = _keyword_set(f["claim"]) + normalized.append((f, nc, kw)) + + candidate_ids = set() + recent = normalized[:60] + + for i, (fact_a, norm_a, kw_a) in enumerate(recent): + for j, (fact_b, norm_b, kw_b) in enumerate(normalized): + if i >= j or fact_b["id"] == fact_a["id"]: + continue + if not norm_a or not norm_b: + continue + + len_ratio = len(norm_a) / len(norm_b) if norm_b else 0 + if len_ratio > 2.5 or len_ratio < 0.4: + continue + + seq_ratio = SequenceMatcher(None, norm_a, norm_b).ratio() + kw_union = kw_a | kw_b + jaccard = len(kw_a & kw_b) / len(kw_union) if kw_union else 0.0 + combined = 0.7 * seq_ratio + 0.3 * jaccard + + if combined >= 0.60: + candidate_ids.add(fact_a["id"]) + candidate_ids.add(fact_b["id"]) + + if len(candidate_ids) >= max_candidates: + break + if len(candidate_ids) >= max_candidates: + break + + candidates = [f for f in all_facts if f["id"] in candidate_ids] + logger.info( + "Fuzzy-Vorfilter: %d/%d Fakten als Duplikat-Kandidaten identifiziert", + len(candidates), len(all_facts), + ) + return candidates + + +async def check_fact_duplicates(db, incident_id: int, incident_title: str) -> int: + """Prueft auf semantische Faktencheck-Duplikate via Haiku. + + 1. Fuzzy-Vorfilter reduziert auf relevante Kandidaten + 2. Haiku clustert semantische Duplikate + 3. Pro Cluster: behalte besten Fakt, loesche Rest + + Returns: Anzahl entfernter Duplikate. + """ + cursor = await db.execute( + "SELECT id, claim, status, sources_count, evidence, checked_at " + "FROM fact_checks WHERE incident_id = ? ORDER BY checked_at DESC", + (incident_id,), + ) + all_facts = [dict(row) for row in await cursor.fetchall()] + + if len(all_facts) < 2: + return 0 + + # Schritt 1: Fuzzy-Vorfilter + candidates = _fuzzy_prefilter(all_facts) + if len(candidates) < 2: + return 0 + + # Schritt 2: Haiku-Clustering (in Batches von max 80) + all_clusters = [] + batch_size = 80 + for i in range(0, len(candidates), batch_size): + batch = candidates[i:i + batch_size] + clusters = await _haiku_find_duplicate_clusters(batch, incident_title) + all_clusters.extend(clusters) + + if not all_clusters: + logger.info("QC Fakten: Haiku fand keine Duplikate") + return 0 + + # Schritt 3: Pro Cluster besten behalten, Rest loeschen + facts_by_id = {f["id"]: f for f in all_facts} + ids_to_delete = set() + + for cluster_ids in all_clusters: + valid_ids = [cid for cid in cluster_ids if cid in facts_by_id] + if len(valid_ids) <= 1: + continue + + cluster_facts = [facts_by_id[cid] for cid in valid_ids] + best = max(cluster_facts, key=lambda f: ( + STATUS_PRIORITY.get(f["status"], 0), + f.get("sources_count", 0), + f.get("checked_at", ""), + )) + + for fact in cluster_facts: + if fact["id"] != best["id"]: + ids_to_delete.add(fact["id"]) + logger.info( + "QC Duplikat: ID %d entfernt, behalte ID %d ('%s')", + fact["id"], best["id"], best["claim"][:60], + ) + + if ids_to_delete: + placeholders = ",".join("?" * len(ids_to_delete)) + await db.execute( + f"DELETE FROM fact_checks WHERE id IN ({placeholders})", + list(ids_to_delete), + ) + logger.info( + "QC: %d Faktencheck-Duplikate entfernt fuer Incident %d", + len(ids_to_delete), incident_id, + ) + + return len(ids_to_delete) + + +# --------------------------------------------------------------------------- +# 2. Karten-Location-Kategorien +# --------------------------------------------------------------------------- + +_LOCATION_PROMPT = """\ +Du bist ein Geopolitik-Experte fuer einen OSINT-Monitor. + +LAGE: {incident_title} +BESCHREIBUNG: {incident_desc} +{labels_context} +Unten stehen Orte, die auf der Karte als "primary" (Hauptgeschehen) markiert sind. +Pruefe fuer jeden Ort, ob die Kategorie "primary" korrekt ist. + +KATEGORIEN: +- primary: {label_primary} — Wo das Hauptgeschehen stattfindet +- secondary: {label_secondary} — Direkte Reaktionen/Gegenmassnahmen +- tertiary: {label_tertiary} — Entscheidungstraeger/Beteiligte +- mentioned: {label_mentioned} — Nur erwaehnt + +REGELN: +- Nur Orte die DIREKT vom Hauptgeschehen betroffen sind = "primary" +- Orte mit Reaktionen/Gegenmassnahmen = "secondary" +- Orte von Entscheidungstraegern (z.B. Hauptstaedte) = "tertiary" +- Nur erwaehnte Orte = "mentioned" +- Im Zweifel: "mentioned" + +Antworte als JSON-Array mit Korrekturen. Nur Eintraege die GEAENDERT werden muessen: +[{{"id": 123, "category": "mentioned"}}, {{"id": 456, "category": "tertiary"}}] + +Wenn alle Kategorien korrekt sind: antworte mit [] + +ORTE (aktuell alle als "primary" markiert): +{locations_text}""" + + +async def check_location_categories( + db, incident_id: int, incident_title: str, incident_desc: str +) -> int: + """Prueft Karten-Location-Kategorien via Haiku. + + Returns: Anzahl korrigierter Eintraege. + """ + cursor = await db.execute( + "SELECT id, location_name, latitude, longitude, category " + "FROM article_locations WHERE incident_id = ? AND category = 'primary'", + (incident_id,), + ) + targets = [dict(row) for row in await cursor.fetchall()] + + if not targets: + return 0 + + # Category-Labels aus DB laden (fuer kontextabhaengige Prompt-Beschreibungen) + cursor = await db.execute( + "SELECT category_labels FROM incidents WHERE id = ?", (incident_id,) + ) + inc_row = await cursor.fetchone() + labels = {} + if inc_row and inc_row["category_labels"]: + try: + labels = json.loads(inc_row["category_labels"]) + except (json.JSONDecodeError, TypeError): + pass + + label_primary = labels.get("primary") or "Hauptgeschehen" + label_secondary = labels.get("secondary") or "Reaktionen" + label_tertiary = labels.get("tertiary") or "Beteiligte" + label_mentioned = labels.get("mentioned") or "Erwaehnt" + + labels_context = "" + if labels: + labels_context = f"KATEGORIE-LABELS: primary={label_primary}, secondary={label_secondary}, tertiary={label_tertiary}, mentioned={label_mentioned}\n" + + # Dedupliziere nach location_name fuer den Prompt (spart Tokens) + unique_names = {} + ids_by_name = {} + for loc in targets: + name = loc["location_name"] + if name not in unique_names: + unique_names[name] = loc + ids_by_name[name] = [] + ids_by_name[name].append(loc["id"]) + + locations_text = "\n".join( + f'ID={loc["id"]} | {loc["location_name"]} ({loc["latitude"]:.2f}, {loc["longitude"]:.2f})' + for loc in unique_names.values() + ) + + prompt = _LOCATION_PROMPT.format( + incident_title=incident_title, + incident_desc=incident_desc[:500] if incident_desc else "(keine Beschreibung)", + labels_context=labels_context, + label_primary=label_primary, + label_secondary=label_secondary, + label_tertiary=label_tertiary, + label_mentioned=label_mentioned, + locations_text=locations_text, + ) + + fixes = [] + try: + result, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + data = json.loads(result) + if isinstance(data, list): + fixes = data + except json.JSONDecodeError: + match = re.search(r'\[.*\]', result, re.DOTALL) + if match: + try: + data = json.loads(match.group()) + if isinstance(data, list): + fixes = data + except json.JSONDecodeError: + pass + except Exception as e: + logger.warning("Haiku Location-Check fehlgeschlagen: %s", e) + return 0 + + if not fixes: + logger.info("QC Locations: Haiku fand keine falschen Kategorien") + return 0 + + # Korrekturen anwenden (auch auf alle IDs mit gleichem Namen) + total_fixed = 0 + representative_ids = {loc["id"]: name for name, loc in unique_names.items()} + + for fix in fixes: + fix_id = fix.get("id") + new_cat = fix.get("category") + if not fix_id or not new_cat: + continue + if new_cat not in ("primary", "secondary", "tertiary", "mentioned"): + continue + + # Finde den location_name fuer diese ID + loc_name = representative_ids.get(fix_id) + if not loc_name: + continue + + # Korrigiere ALLE Eintraege mit diesem Namen + all_ids = ids_by_name.get(loc_name, [fix_id]) + placeholders = ",".join("?" * len(all_ids)) + await db.execute( + f"UPDATE article_locations SET category = ? " + f"WHERE id IN ({placeholders}) AND category = 'primary'", + [new_cat] + all_ids, + ) + total_fixed += len(all_ids) + logger.info( + "QC Location: '%s' (%d Eintraege): primary -> %s", + loc_name, len(all_ids), new_cat, + ) + + if total_fixed > 0: + logger.info( + "QC: %d Karten-Location-Kategorien korrigiert fuer Incident %d", + total_fixed, incident_id, + ) + + return total_fixed + + +# --------------------------------------------------------------------------- +# Hauptfunktion +# --------------------------------------------------------------------------- + +async def run_post_refresh_qc(db, incident_id: int) -> dict: + """Fuehrt den kompletten Post-Refresh Quality Check via Haiku durch. + + Returns: Dict mit Ergebnissen {facts_removed, locations_fixed}. + """ + try: + # Lage-Titel und Beschreibung laden + cursor = await db.execute( + "SELECT title, description FROM incidents WHERE id = ?", + (incident_id,), + ) + row = await cursor.fetchone() + if not row: + return {"facts_removed": 0, "locations_fixed": 0} + + incident_title = row["title"] or "" + incident_desc = row["description"] or "" + + facts_removed = await check_fact_duplicates(db, incident_id, incident_title) + locations_fixed = await check_location_categories( + db, incident_id, incident_title, incident_desc + ) + + if facts_removed > 0 or locations_fixed > 0: + await db.commit() + logger.info( + "Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert", + incident_id, facts_removed, locations_fixed, + ) + + return {"facts_removed": facts_removed, "locations_fixed": locations_fixed} + + except Exception as e: + logger.error( + "Post-Refresh QC Fehler fuer Incident %d: %s", + incident_id, e, exc_info=True, + ) + return {"facts_removed": 0, "locations_fixed": 0, "error": str(e)} diff --git a/src/static/js/app.js b/src/static/js/app.js index 8ba4063..27efad4 100644 --- a/src/static/js/app.js +++ b/src/static/js/app.js @@ -698,7 +698,7 @@ const App = { async loadIncidentDetail(id) { try { - const [incident, articles, factchecks, snapshots, locations] = await Promise.all([ + const [incident, articles, factchecks, snapshots, locationsResponse] = await Promise.all([ API.getIncident(id), API.getArticles(id), API.getFactChecks(id), @@ -706,14 +706,27 @@ const App = { API.getLocations(id).catch(() => []), ]); - this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations); + // Locations-API gibt jetzt {category_labels, locations} oder Array (Rückwärtskompatibel) + let locations, categoryLabels; + if (Array.isArray(locationsResponse)) { + locations = locationsResponse; + categoryLabels = null; + } else if (locationsResponse && locationsResponse.locations) { + locations = locationsResponse.locations; + categoryLabels = locationsResponse.category_labels || null; + } else { + locations = []; + categoryLabels = null; + } + + this.renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels); } catch (err) { console.error('loadIncidentDetail Fehler:', err); UI.showToast('Fehler beim Laden: ' + err.message, 'error'); } }, - renderIncidentDetail(incident, articles, factchecks, snapshots, locations) { + renderIncidentDetail(incident, articles, factchecks, snapshots, locations, categoryLabels) { // Header Strip { const _e = document.getElementById('incident-title'); if (_e) _e.textContent = incident.title; } { const _e = document.getElementById('incident-description'); if (_e) _e.textContent = incident.description || ''; } @@ -845,7 +858,7 @@ const App = { this._resizeTimelineTile(); // Karte rendern - UI.renderMap(locations || []); + UI.renderMap(locations || [], categoryLabels); }, _collectEntries(filterType, searchTerm, range) { @@ -1617,8 +1630,12 @@ const App = { if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; } if (st.status === 'done' && st.locations > 0) { UI.showToast(`${st.locations} Orte aus ${st.processed} Artikeln erkannt`, 'success'); - const locations = await API.getLocations(incidentId).catch(() => []); - UI.renderMap(locations); + const locResp = await API.getLocations(incidentId).catch(() => []); + let locs, catLabels; + if (Array.isArray(locResp)) { locs = locResp; catLabels = null; } + else if (locResp && locResp.locations) { locs = locResp.locations; catLabels = locResp.category_labels || null; } + else { locs = []; catLabels = null; } + UI.renderMap(locs, catLabels); } else if (st.status === 'done') { UI.showToast('Keine neuen Orte gefunden', 'info'); } else if (st.status === 'error') { diff --git a/src/static/js/components.js b/src/static/js/components.js index cdc7c55..51cc757 100644 --- a/src/static/js/components.js +++ b/src/static/js/components.js @@ -639,30 +639,29 @@ const UI = { _initMarkerIcons() { if (this._markerIcons || typeof L === 'undefined') return; this._markerIcons = { - target: this._createSvgIcon('#dc3545', '#a71d2a'), - retaliation: this._createSvgIcon('#f39c12', '#c47d0a'), - response: this._createSvgIcon('#f39c12', '#c47d0a'), - actor: this._createSvgIcon('#2a81cb', '#1a5c8f'), + primary: this._createSvgIcon('#dc3545', '#a71d2a'), + secondary: this._createSvgIcon('#f39c12', '#c47d0a'), + tertiary: this._createSvgIcon('#2a81cb', '#1a5c8f'), mentioned: this._createSvgIcon('#7b7b7b', '#555555'), }; }, - _categoryLabels: { - target: 'Angegriffene Ziele', - retaliation: 'Vergeltung / Eskalation', - response: 'Reaktion / Gegenmassnahmen', - actor: 'Strategische Akteure', + _defaultCategoryLabels: { + primary: 'Hauptgeschehen', + secondary: 'Reaktionen', + tertiary: 'Beteiligte', mentioned: 'Erwaehnt', }, _categoryColors: { - target: '#cb2b3e', - retaliation: '#f39c12', - response: '#f39c12', - actor: '#2a81cb', + primary: '#cb2b3e', + secondary: '#f39c12', + tertiary: '#2a81cb', mentioned: '#7b7b7b', }, - renderMap(locations) { + _activeCategoryLabels: null, + + renderMap(locations, categoryLabels) { const container = document.getElementById('map-container'); const emptyEl = document.getElementById('map-empty'); const statsEl = document.getElementById('map-stats'); @@ -741,6 +740,9 @@ const UI = { // Marker hinzufuegen const bounds = []; this._initMarkerIcons(); + // Dynamische Labels verwenden (API > Default) + const catLabels = categoryLabels || this._activeCategoryLabels || this._defaultCategoryLabels; + this._activeCategoryLabels = catLabels; const usedCategories = new Set(); locations.forEach(loc => { @@ -751,7 +753,7 @@ const UI = { const marker = L.marker([loc.lat, loc.lon], markerOpts); // Popup-Inhalt - const catLabel = this._categoryLabels[cat] || cat; + const catLabel = catLabels[cat] || this._defaultCategoryLabels[cat] || cat; const catColor = this._categoryColors[cat] || '#7b7b7b'; let popupHtml = `
`; popupHtml += `
${this.escape(loc.location_name)}`; @@ -798,12 +800,13 @@ const UI = { const legend = L.control({ position: 'bottomright' }); const self2 = this; + const legendLabels = catLabels; legend.onAdd = function() { const div = L.DomUtil.create('div', 'map-legend-ctrl'); let html = 'Legende'; - ['target', 'retaliation', 'response', 'actor', 'mentioned'].forEach(cat => { - if (usedCategories.has(cat)) { - html += `
${self2._categoryLabels[cat]}
`; + ['primary', 'secondary', 'tertiary', 'mentioned'].forEach(cat => { + if (usedCategories.has(cat) && legendLabels[cat]) { + html += `
${legendLabels[cat]}
`; } }); div.innerHTML = html; @@ -853,7 +856,7 @@ const UI = { if (this._pendingLocations && typeof L !== 'undefined') { const locs = this._pendingLocations; this._pendingLocations = null; - this.renderMap(locs); + this.renderMap(locs, this._activeCategoryLabels); } },