diff --git a/src/agents/entity_extractor.py b/src/agents/entity_extractor.py index b3186ac..b41fef5 100644 --- a/src/agents/entity_extractor.py +++ b/src/agents/entity_extractor.py @@ -1,4 +1,4 @@ -"""Netzwerkanalyse: Entity-Extraktion (Haiku) + Beziehungsanalyse (Batched).""" +"""Netzwerkanalyse: Entity-Extraktion (Sonnet) + Beziehungsanalyse (Batched) mit Artikel-Deduplizierung.""" import asyncio import hashlib import json @@ -9,7 +9,7 @@ from datetime import datetime from typing import Optional from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator -from config import CLAUDE_MODEL_FAST, TIMEZONE +from config import CLAUDE_MODEL_FAST, CLAUDE_MODEL_MEDIUM, TIMEZONE logger = logging.getLogger("osint.entity_extractor") @@ -194,6 +194,114 @@ def _compute_data_hash(article_ids, factcheck_ids, article_ts, factcheck_ts) -> return hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest() +# --------------------------------------------------------------------------- +# Artikel-Deduplizierung +# --------------------------------------------------------------------------- + +def _normalize_headline(headline: str) -> str: + """Normalisiert eine Headline fuer Vergleiche.""" + h = headline.lower().strip() + h = re.sub(r"[^a-z0-9\s]", "", h) + h = re.sub(r"\s+", " ", h).strip() + return h + + +def _headline_tokens(headline: str) -> set[str]: + """Extrahiert bedeutungstragende Tokens aus einer Headline.""" + tokens = set() + for word in _normalize_headline(headline).split(): + if len(word) >= 3 and word not in _STOP_WORDS: + tokens.add(word) + return tokens + + +def _jaccard_similarity(set_a: set, set_b: set) -> float: + """Jaccard-Aehnlichkeit zweier Mengen.""" + if not set_a or not set_b: + return 0.0 + intersection = set_a & set_b + union = set_a | set_b + return len(intersection) / len(union) if union else 0.0 + + +def _content_fingerprint(text: str) -> str: + """Kurzer Hash des Textinhalts fuer Near-Duplicate-Erkennung.""" + normalized = re.sub(r"\s+", " ", text.lower().strip())[:500] + return hashlib.md5(normalized.encode("utf-8")).hexdigest() + + +def _deduplicate_articles(articles: list[dict], factchecks: list[dict]) -> tuple[list[dict], list[dict]]: + """Entfernt redundante Artikel basierend auf Headline-Similarity und Content-Hash. + + Behaelt pro Duplikat-Gruppe den Artikel mit dem laengsten Content. + Faktenchecks werden nicht dedupliziert (sind bereits einzigartig). + + Returns: + Tuple von (deduplizierte_artikel, factchecks_unveraendert) + """ + if len(articles) <= 50: + return articles, factchecks + + logger.info(f"Artikel-Dedup: {len(articles)} Artikel pruefen") + + # Phase A: Exakte Content-Fingerprint-Dedup + seen_fingerprints: dict[str, int] = {} + + for i, art in enumerate(articles): + content = art.get("content_de") or art.get("content_original") or "" + headline = art.get("headline_de") or art.get("headline") or "" + + if not content and not headline: + continue + + fp = _content_fingerprint(headline + " " + content) + + if fp in seen_fingerprints: + existing_idx = seen_fingerprints[fp] + existing_content = articles[existing_idx].get("content_de") or articles[existing_idx].get("content_original") or "" + if len(content) > len(existing_content): + seen_fingerprints[fp] = i + else: + seen_fingerprints[fp] = i + + after_fp = list(seen_fingerprints.values()) + fp_removed = len(articles) - len(after_fp) + + # Phase B: Headline-Similarity-Dedup (Jaccard >= 0.7) + remaining = [articles[i] for i in sorted(after_fp)] + + token_sets = [] + for art in remaining: + headline = art.get("headline_de") or art.get("headline") or "" + token_sets.append(_headline_tokens(headline)) + + keep_mask = [True] * len(remaining) + + for i in range(len(remaining)): + if not keep_mask[i]: + continue + for j in range(i + 1, len(remaining)): + if not keep_mask[j]: + continue + if _jaccard_similarity(token_sets[i], token_sets[j]) >= 0.7: + content_i = remaining[i].get("content_de") or remaining[i].get("content_original") or "" + content_j = remaining[j].get("content_de") or remaining[j].get("content_original") or "" + if len(content_j) > len(content_i): + keep_mask[i] = False + break + else: + keep_mask[j] = False + + deduped = [art for art, keep in zip(remaining, keep_mask) if keep] + headline_removed = len(remaining) - len(deduped) + + logger.info( + f"Artikel-Dedup abgeschlossen: {len(articles)} -> {len(deduped)} " + f"({fp_removed} Content-Duplikate, {headline_removed} Headline-Duplikate entfernt)" + ) + + return deduped, factchecks + # --------------------------------------------------------------------------- # Entity-Merge Helper # --------------------------------------------------------------------------- @@ -279,8 +387,8 @@ async def _phase1_extract_entities( headline = art.get("headline_de") or art.get("headline") or "" content = art.get("content_de") or art.get("content_original") or "" source = art.get("source") or "" - if len(content) > 2000: - content = content[:2000] + "..." + if len(content) > 800: + content = content[:800] + "..." all_texts.append(f"[{source}] {headline}\n{content}") for fc in factchecks: @@ -293,7 +401,7 @@ async def _phase1_extract_entities( logger.warning(f"Analyse {analysis_id}: Keine Texte vorhanden") return [] - batch_size = 30 + batch_size = 50 batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] logger.info(f"{len(all_texts)} Texte in {len(batches)} Batches") @@ -304,10 +412,10 @@ async def _phase1_extract_entities( prompt = ENTITY_EXTRACTION_PROMPT.format(articles_text=articles_text) try: - result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_MEDIUM) usage_acc.add(usage) except Exception as e: - logger.error(f"Haiku Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") + logger.error(f"Sonnet Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") continue parsed = _parse_json_response(result_text) @@ -500,8 +608,8 @@ async def _phase2_analyze_relationships( headline = art.get("headline_de") or art.get("headline") or "" content = art.get("content_de") or art.get("content_original") or "" source = art.get("source") or "" - if len(content) > 2000: - content = content[:2000] + "..." + if len(content) > 800: + content = content[:800] + "..." all_texts.append(f"[{source}] {headline}\n{content}") for fc in factchecks: @@ -514,7 +622,7 @@ async def _phase2_analyze_relationships( return [] # --- Stufe A: Per-Batch Beziehungsextraktion --- - batch_size = 30 + batch_size = 50 batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] logger.info(f"Stufe A: {len(batches)} Batches für Beziehungsextraktion") @@ -545,7 +653,7 @@ async def _phase2_analyze_relationships( ) try: - result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_MEDIUM) usage_acc.add(usage) except Exception as e: logger.error(f"Relationship Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") @@ -1067,6 +1175,9 @@ async def extract_and_relate_entities(analysis_id: int, tenant_id: int, ws_manag logger.info(f"Analyse {analysis_id}: {len(articles)} Artikel, " f"{len(factchecks)} Faktenchecks aus {len(incident_ids)} Lagen") + # Artikel-Deduplizierung vor KI-Pipeline + articles, factchecks = _deduplicate_articles(articles, factchecks) + # Phase 1: Entity-Extraktion if not await _check_analysis_exists(db, analysis_id): return diff --git a/src/agents/entity_extractor.py.bak b/src/agents/entity_extractor.py.bak new file mode 100644 index 0000000..b3186ac --- /dev/null +++ b/src/agents/entity_extractor.py.bak @@ -0,0 +1,1144 @@ +"""Netzwerkanalyse: Entity-Extraktion (Haiku) + Beziehungsanalyse (Batched).""" +import asyncio +import hashlib +import json +import logging +import re +from collections import defaultdict +from datetime import datetime +from typing import Optional + +from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator +from config import CLAUDE_MODEL_FAST, TIMEZONE + +logger = logging.getLogger("osint.entity_extractor") + +# --------------------------------------------------------------------------- +# Konstanten +# --------------------------------------------------------------------------- + +TYPE_PRIORITY = {"location": 5, "organisation": 4, "military": 3, "event": 2, "person": 1} + +_STOP_WORDS = frozenset({ + "the", "of", "and", "for", "in", "on", "at", "to", "by", + "von", "der", "die", "das", "und", "für", "des", "den", "dem", + "ein", "eine", "zur", "zum", "bei", "mit", "aus", "nach", +}) + +# --------------------------------------------------------------------------- +# Prompts +# --------------------------------------------------------------------------- + +ENTITY_EXTRACTION_PROMPT = """Du bist ein OSINT-Analyst für ein Lagemonitoring-System. +AUFGABE: Extrahiere ALLE relevanten Entitäten aus den folgenden Nachrichtenartikeln. + +ARTIKEL: +{articles_text} + +REGELN: +- Extrahiere JEDE genannte Person, Organisation, Ort, Ereignis und militärische Einheit +- Normalisiere Namen: "Wladimir Putin", "Putin", "V. Putin" -> eine Entität +- Aliase erfassen: Alle Namensvarianten einer Entität als aliases[] +- mention_count: Wie oft wird die Entität insgesamt in allen Artikeln erwähnt? +- Beschreibung: Kurze Einordnung, wer/was die Entität ist (1 Satz) +- KEINE Duplikate: Gleiche Entitäten zusammenfassen + +ENTITY-TYPEN: +- "person": Individuelle Personen (Politiker, Militärs, Journalisten etc.) +- "organisation": Organisationen, Parteien, Behörden, Unternehmen, NGOs +- "location": Länder, Städte, Regionen, Gebiete +- "event": Konkrete Ereignisse (Wahlen, Anschläge, Konferenzen etc.) +- "military": Militärische Einheiten, Waffensysteme, Operationen + +AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON: +{{ + "entities": [ + {{ + "name": "Vollständiger Name", + "name_normalized": "vollständiger name", + "type": "person|organisation|location|event|military", + "description": "Kurze Einordnung (1 Satz)", + "aliases": ["Alias1", "Alias2"], + "mention_count": 5 + }} + ] +}}""" + + +RELATIONSHIP_BATCH_PROMPT = """Du bist ein Senior OSINT-Analyst für ein Lagemonitoring-System. +AUFGABE: Analysiere die Beziehungen zwischen den Entitäten basierend auf den Artikeln. + +BEKANNTE ENTITÄTEN (aus dem Gesamtdatensatz): +{entities_json} + +ARTIKEL: +{articles_text} + +AUFGABE: +Identifiziere ALLE Beziehungen zwischen den oben genannten Entitäten, die sich aus den Artikeln ergeben. +- Nur Beziehungen nennen, die im Artikeltext belegt sind +- source und target: Exakt die Namen aus der Entitäten-Liste verwenden +- Wenn eine Entität im Artikel vorkommt aber nicht in der Liste, verwende den Namen wie er in der Liste steht + +BEZIEHUNGS-KATEGORIEN: +- "alliance": Bündnis, Kooperation, Unterstützung, Partnerschaft +- "conflict": Konflikt, Krieg, Feindschaft, Sanktionen, Opposition +- "diplomacy": Diplomatische Beziehungen, Verhandlungen, Abkommen +- "economic": Wirtschaftsbeziehungen, Handel, Investitionen +- "legal": Rechtliche Beziehungen, Klagen, Verurteilungen +- "neutral": Sonstige Beziehung, Erwähnung, Verbindung + +REGELN: +- weight: 1 (schwach/indirekt) bis 5 (stark/direkt) +- evidence[]: 1-2 kurze Stichpunkte aus dem Artikeltext als Beleg +- status: "active" (aktuell), "historical" (vergangen), "emerging" (sich entwickelnd) + +AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON: +{{ + "relations": [ + {{ + "source": "Entität A", + "target": "Entität B", + "category": "alliance|conflict|diplomacy|economic|legal|neutral", + "label": "Kurzes Label (2-4 Wörter)", + "description": "Beschreibung (1-2 Sätze)", + "weight": 3, + "status": "active|historical|emerging", + "evidence": ["Beleg 1", "Beleg 2"] + }} + ] +}}""" + + +SEMANTIC_DEDUP_PROMPT = """Du bist ein OSINT-Analyst. Prüfe diese Entitäten auf Duplikate. + +ENTITÄTEN: +{entity_list} + +AUFGABE: Welche Entitäten bezeichnen DASSELBE reale Objekt? + +Typische Duplikate: +- Abkürzung vs. Vollname: "IRGC" = "Iranian Revolutionary Guard Corps" +- Sprachvarianten: "European Union" = "Europäische Union" +- Schreibvarianten: "Strait of Hormuz" = "Straße von Hormus" + +REGELN: +- NUR echte Duplikate zusammenführen (gleiche reale Entität) +- NICHT zusammenführen: Unterorganisationen (IRGC ≠ IRGC Navy), verschiedene Personen +- "keep": Nummer der Haupt-Entität (bevorzuge: mehr Erwähnungen, vollständiger Name) +- "merge": Nummern der Duplikate die in die Haupt-Entität zusammengeführt werden + +AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON: +{{ + "merges": [ + {{"keep": 1, "merge": [3, 5]}}, + {{"keep": 2, "merge": [4]}} + ] +}} + +Falls KEINE Duplikate: {{"merges": []}}""" + + +# --------------------------------------------------------------------------- +# Hilfsfunktionen +# --------------------------------------------------------------------------- + +async def _broadcast(ws_manager, msg_type: str, data: dict): + """Sendet eine WebSocket-Nachricht, falls ws_manager vorhanden.""" + if ws_manager: + try: + await ws_manager.broadcast({"type": msg_type, **data}) + except Exception: + pass + + +def _parse_json_response(text: str) -> Optional[dict]: + """Parst JSON aus Claude-Antwort. Handhabt Markdown-Fences.""" + if not text: + return None + try: + return json.loads(text) + except json.JSONDecodeError: + pass + fence_match = re.search(r'```(?:json)?\s*\n?(.*?)\n?\s*```', text, re.DOTALL) + if fence_match: + try: + return json.loads(fence_match.group(1)) + except json.JSONDecodeError: + pass + obj_match = re.search(r'\{.*\}', text, re.DOTALL) + if obj_match: + try: + return json.loads(obj_match.group()) + except json.JSONDecodeError: + pass + logger.warning("JSON-Parse fehlgeschlagen") + return None + + +async def _check_analysis_exists(db, analysis_id: int) -> bool: + """Prüft ob die Analyse noch existiert.""" + cursor = await db.execute( + "SELECT id FROM network_analyses WHERE id = ?", (analysis_id,) + ) + return await cursor.fetchone() is not None + + +def _compute_data_hash(article_ids, factcheck_ids, article_ts, factcheck_ts) -> str: + """SHA256-Hash über sortierte IDs und Timestamps.""" + parts = [] + for aid, ats in sorted(zip(article_ids, article_ts)): + parts.append(f"a:{aid}:{ats}") + for fid, fts in sorted(zip(factcheck_ids, factcheck_ts)): + parts.append(f"f:{fid}:{fts}") + return hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest() + + +# --------------------------------------------------------------------------- +# Entity-Merge Helper +# --------------------------------------------------------------------------- + +async def _merge_entity_in_db( + db, analysis_id: int, keep_ent: dict, merge_ent: dict, entities: list[dict], +): + """Führt merge_ent in keep_ent zusammen (DB + In-Memory).""" + keep_id = keep_ent["db_id"] + merge_id = merge_ent["db_id"] + + # Aliases vereinen + aliases = set(keep_ent.get("aliases", [])) + aliases.add(merge_ent["name"]) + for a in merge_ent.get("aliases", []): + if a and a.strip(): + aliases.add(a.strip()) + aliases.discard(keep_ent["name"]) + keep_ent["aliases"] = list(aliases) + + # Mention count addieren + keep_ent["mention_count"] = keep_ent.get("mention_count", 0) + merge_ent.get("mention_count", 0) + + # Längere Description behalten + if len(merge_ent.get("description", "")) > len(keep_ent.get("description", "")): + keep_ent["description"] = merge_ent["description"] + + # Entity-Mentions umhängen + await db.execute( + "UPDATE network_entity_mentions SET entity_id = ? WHERE entity_id = ?", + (keep_id, merge_id), + ) + + # Relations umhängen + await db.execute( + "UPDATE network_relations SET source_entity_id = ? WHERE source_entity_id = ? AND network_analysis_id = ?", + (keep_id, merge_id, analysis_id), + ) + await db.execute( + "UPDATE network_relations SET target_entity_id = ? WHERE target_entity_id = ? AND network_analysis_id = ?", + (keep_id, merge_id, analysis_id), + ) + + # Self-Loops entfernen die durch den Merge entstanden sein könnten + await db.execute( + "DELETE FROM network_relations WHERE source_entity_id = ? AND target_entity_id = ? AND network_analysis_id = ?", + (keep_id, keep_id, analysis_id), + ) + + # Keep-Entity in DB aktualisieren + await db.execute( + """UPDATE network_entities + SET aliases = ?, mention_count = ?, description = ? + WHERE id = ?""", + (json.dumps(keep_ent["aliases"], ensure_ascii=False), + keep_ent["mention_count"], keep_ent.get("description", ""), keep_id), + ) + + # Merge-Entity löschen + await db.execute("DELETE FROM network_entities WHERE id = ?", (merge_id,)) + + # Aus entities-Liste entfernen + try: + entities.remove(merge_ent) + except ValueError: + pass + + +# --------------------------------------------------------------------------- +# Phase 1: Entity-Extraktion (Haiku) +# --------------------------------------------------------------------------- + +async def _phase1_extract_entities( + db, analysis_id: int, tenant_id: int, + articles: list[dict], factchecks: list[dict], + usage_acc: UsageAccumulator, ws_manager=None, +) -> list[dict]: + """Extrahiert Entitäten aus Artikeln via Haiku in Batches.""" + logger.info(f"Phase 1: {len(articles)} Artikel, {len(factchecks)} Faktenchecks") + + all_texts = [] + for art in articles: + headline = art.get("headline_de") or art.get("headline") or "" + content = art.get("content_de") or art.get("content_original") or "" + source = art.get("source") or "" + if len(content) > 2000: + content = content[:2000] + "..." + all_texts.append(f"[{source}] {headline}\n{content}") + + for fc in factchecks: + claim = fc.get("claim") or "" + evidence = fc.get("evidence") or "" + status = fc.get("status") or "" + all_texts.append(f"[Faktencheck] {claim} (Status: {status})\n{evidence}") + + if not all_texts: + logger.warning(f"Analyse {analysis_id}: Keine Texte vorhanden") + return [] + + batch_size = 30 + batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] + logger.info(f"{len(all_texts)} Texte in {len(batches)} Batches") + + entity_map: dict[str, dict] = {} + + for batch_idx, batch in enumerate(batches): + articles_text = "\n\n---\n\n".join(batch) + prompt = ENTITY_EXTRACTION_PROMPT.format(articles_text=articles_text) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + usage_acc.add(usage) + except Exception as e: + logger.error(f"Haiku Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") + continue + + parsed = _parse_json_response(result_text) + if not parsed or "entities" not in parsed: + logger.warning(f"Batch {batch_idx + 1}: Kein gültiges JSON") + continue + + entities = parsed["entities"] + if not isinstance(entities, list): + continue + + for ent in entities: + if not isinstance(ent, dict): + continue + name = (ent.get("name") or "").strip() + if not name: + continue + + name_normalized = (ent.get("name_normalized") or name.lower()).strip().lower() + entity_type = (ent.get("type") or "organisation").lower().strip() + valid_types = {"person", "organisation", "location", "event", "military"} + if entity_type not in valid_types: + entity_type = "organisation" + + key = name_normalized + + if key in entity_map: + existing = entity_map[key] + aliases = set(existing.get("aliases", [])) + for alias in ent.get("aliases", []): + if alias and alias.strip(): + aliases.add(alias.strip()) + if name != existing["name"]: + aliases.add(name) + existing["aliases"] = list(aliases) + existing["mention_count"] = existing.get("mention_count", 1) + ent.get("mention_count", 1) + new_desc = ent.get("description", "") + if len(new_desc) > len(existing.get("description", "")): + existing["description"] = new_desc + # Typ-Priorität: höherwertigen Typ behalten + if TYPE_PRIORITY.get(entity_type, 0) > TYPE_PRIORITY.get(existing["type"], 0): + existing["type"] = entity_type + else: + entity_map[key] = { + "name": name, + "name_normalized": name_normalized, + "type": entity_type, + "description": ent.get("description", ""), + "aliases": [a.strip() for a in ent.get("aliases", []) if a and a.strip()], + "mention_count": ent.get("mention_count", 1), + } + + logger.info(f"Batch {batch_idx + 1}/{len(batches)}: {len(entity_map)} Entitäten gesamt") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "entity_extraction", + "progress": int((batch_idx + 1) / len(batches) * 100), + }) + + all_entities = list(entity_map.values()) + + for ent in all_entities: + try: + cursor = await db.execute( + """INSERT OR IGNORE INTO network_entities + (network_analysis_id, name, name_normalized, entity_type, + description, aliases, mention_count, tenant_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", + ( + analysis_id, ent["name"], ent["name_normalized"], ent["type"], + ent.get("description", ""), + json.dumps(ent.get("aliases", []), ensure_ascii=False), + ent.get("mention_count", 1), + tenant_id, + ), + ) + ent["db_id"] = cursor.lastrowid + except Exception as e: + logger.warning(f"Entity speichern fehlgeschlagen '{ent['name']}': {e}") + + await db.commit() + logger.info(f"Phase 1 abgeschlossen: {len(all_entities)} Entitäten gespeichert") + return all_entities + + +# --------------------------------------------------------------------------- +# Phase 2a: Entity-Deduplication nach name_normalized +# --------------------------------------------------------------------------- + +async def _phase2a_deduplicate_entities( + db, analysis_id: int, entities: list[dict], ws_manager=None, +) -> None: + """Dedupliziert Entities mit gleichem name_normalized (unabhängig vom Typ).""" + logger.info(f"Phase 2a: Prüfe {len(entities)} Entitäten auf name_normalized-Duplikate") + + groups = defaultdict(list) + for ent in entities: + if ent.get("db_id"): + groups[ent["name_normalized"]].append(ent) + + merge_count = 0 + merged_ids = set() + + for nn, group in groups.items(): + if len(group) < 2: + continue + # Sortierung: höchste Typ-Priorität, dann meiste Erwähnungen + group.sort( + key=lambda e: (TYPE_PRIORITY.get(e["type"], 0), e.get("mention_count", 0)), + reverse=True, + ) + keep = group[0] + for merge in group[1:]: + if merge["db_id"] in merged_ids: + continue + await _merge_entity_in_db(db, analysis_id, keep, merge, entities) + merged_ids.add(merge["db_id"]) + merge_count += 1 + + if merge_count > 0: + await db.commit() + + logger.info( + f"Phase 2a abgeschlossen: {merge_count} Duplikate zusammengeführt, " + f"{len(entities)} Entitäten verbleiben" + ) + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "entity_dedup", + "progress": 100, + }) + + +# --------------------------------------------------------------------------- +# Phase 2: Beziehungsanalyse (Batched — pro Artikel-Batch) +# --------------------------------------------------------------------------- + +def _build_entity_name_map(entities: list[dict]) -> dict[str, int]: + """Mapping: normalisierter Name/Alias -> DB-ID.""" + name_to_id: dict[str, int] = {} + for ent in entities: + db_id = ent.get("db_id") + if not db_id: + continue + name_to_id[ent["name"].lower()] = db_id + name_to_id[ent["name_normalized"]] = db_id + for alias in ent.get("aliases", []): + if alias and alias.strip(): + name_to_id[alias.strip().lower()] = db_id + return name_to_id + + +def _find_relevant_entities(batch_texts: list[str], entities: list[dict]) -> list[dict]: + """Findet Entitäten, die in den Batch-Texten vorkommen.""" + combined_text = " ".join(batch_texts).lower() + relevant = [] + for ent in entities: + if ent["name"].lower() in combined_text or ent["name_normalized"] in combined_text: + relevant.append(ent) + continue + for alias in ent.get("aliases", []): + if alias and alias.strip().lower() in combined_text: + relevant.append(ent) + break + return relevant + + +async def _phase2_analyze_relationships( + db, analysis_id: int, tenant_id: int, + entities: list[dict], articles: list[dict], factchecks: list[dict], + usage_acc: UsageAccumulator, ws_manager=None, +) -> list[dict]: + """Analysiert Beziehungen batch-weise und merged die Ergebnisse.""" + if not entities: + return [] + + logger.info(f"Phase 2: {len(entities)} Entitäten, batched Beziehungsanalyse") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 0, + }) + + # --- Texte vorbereiten (gleiche Logik wie Phase 1) --- + all_texts = [] + for art in articles: + headline = art.get("headline_de") or art.get("headline") or "" + content = art.get("content_de") or art.get("content_original") or "" + source = art.get("source") or "" + if len(content) > 2000: + content = content[:2000] + "..." + all_texts.append(f"[{source}] {headline}\n{content}") + + for fc in factchecks: + claim = fc.get("claim") or "" + evidence = fc.get("evidence") or "" + status = fc.get("status") or "" + all_texts.append(f"[Faktencheck] {claim} (Status: {status})\n{evidence}") + + if not all_texts: + return [] + + # --- Stufe A: Per-Batch Beziehungsextraktion --- + batch_size = 30 + batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] + logger.info(f"Stufe A: {len(batches)} Batches für Beziehungsextraktion") + + all_raw_relations: list[dict] = [] + name_to_id = _build_entity_name_map(entities) + + for batch_idx, batch in enumerate(batches): + relevant = _find_relevant_entities(batch, entities) + if len(relevant) < 2: + logger.debug(f"Batch {batch_idx + 1}: Weniger als 2 Entitäten, überspringe") + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": int((batch_idx + 1) / len(batches) * 70), + }) + continue + + entities_for_prompt = [ + {"name": e["name"], "type": e["type"]} + for e in relevant + ] + entities_json = json.dumps(entities_for_prompt, ensure_ascii=False) + articles_text = "\n\n---\n\n".join(batch) + + prompt = RELATIONSHIP_BATCH_PROMPT.format( + entities_json=entities_json, + articles_text=articles_text, + ) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + usage_acc.add(usage) + except Exception as e: + logger.error(f"Relationship Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") + continue + + parsed = _parse_json_response(result_text) + if not parsed: + logger.warning(f"Batch {batch_idx + 1}: Kein gültiges JSON") + continue + + relations = parsed.get("relations", []) + if not isinstance(relations, list): + continue + + batch_count = 0 + for rel in relations: + if not isinstance(rel, dict): + continue + source_name = (rel.get("source") or "").strip() + target_name = (rel.get("target") or "").strip() + if not source_name or not target_name: + continue + rel["_batch"] = batch_idx + all_raw_relations.append(rel) + batch_count += 1 + + logger.info(f"Batch {batch_idx + 1}/{len(batches)}: {batch_count} Beziehungen, {len(relevant)} Entitäten") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": int((batch_idx + 1) / len(batches) * 70), + }) + + logger.info(f"Stufe A abgeschlossen: {len(all_raw_relations)} rohe Beziehungen aus {len(batches)} Batches") + + # --- Stufe B: Merge + Deduplizierung --- + logger.info("Stufe B: Merge und Deduplizierung") + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 75, + }) + + valid_categories = {"alliance", "conflict", "diplomacy", "economic", "legal", "neutral"} + merged: dict[tuple[int, int, str], dict] = {} + + for rel in all_raw_relations: + source_name = (rel.get("source") or "").strip() + target_name = (rel.get("target") or "").strip() + + source_id = name_to_id.get(source_name.lower()) + target_id = name_to_id.get(target_name.lower()) + if not source_id or not target_id or source_id == target_id: + continue + + # Normalisiere Richtung um A->B und B->A zu mergen + if source_id > target_id: + source_id, target_id = target_id, source_id + source_name, target_name = target_name, source_name + + category = (rel.get("category") or "neutral").lower().strip() + if category not in valid_categories: + category = "neutral" + + key = (source_id, target_id, category) + + weight = rel.get("weight", 3) + try: + weight = max(1, min(5, int(weight))) + except (ValueError, TypeError): + weight = 3 + + status = (rel.get("status") or "active").lower().strip() + if status not in {"active", "historical", "emerging"}: + status = "active" + + evidence = rel.get("evidence", []) + if not isinstance(evidence, list): + evidence = [] + + if key in merged: + existing = merged[key] + existing["weight"] = max(existing["weight"], weight) + existing_evidence = set(existing["evidence"]) + for ev in evidence: + if isinstance(ev, str) and ev.strip() and ev.strip() not in existing_evidence: + existing["evidence"].append(ev.strip()) + existing_evidence.add(ev.strip()) + if len(existing["evidence"]) > 10: + existing["evidence"] = existing["evidence"][:10] + status_priority = {"active": 3, "emerging": 2, "historical": 1} + if status_priority.get(status, 0) > status_priority.get(existing["status"], 0): + existing["status"] = status + new_desc = rel.get("description", "") + if len(new_desc) > len(existing.get("description", "")): + existing["description"] = new_desc + existing["label"] = rel.get("label", existing["label"]) + existing["_count"] = existing.get("_count", 1) + 1 + else: + merged[key] = { + "source_id": source_id, + "target_id": target_id, + "source_name": source_name, + "target_name": target_name, + "category": category, + "label": rel.get("label", ""), + "description": rel.get("description", ""), + "weight": weight, + "status": status, + "evidence": [ev.strip() for ev in evidence if isinstance(ev, str) and ev.strip()][:10], + "_count": 1, + } + + logger.info(f"Stufe B abgeschlossen: {len(all_raw_relations)} roh -> {len(merged)} gemerged") + + # Gewichts-Boost für mehrfach belegte Beziehungen + for m in merged.values(): + if m["_count"] >= 3 and m["weight"] < 5: + m["weight"] = min(5, m["weight"] + 1) + + # --- In DB speichern --- + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 85, + }) + + saved_relations = [] + for m in merged.values(): + try: + cursor = await db.execute( + """INSERT INTO network_relations + (network_analysis_id, source_entity_id, target_entity_id, + category, label, description, weight, status, evidence, tenant_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + analysis_id, m["source_id"], m["target_id"], m["category"], + m["label"], m["description"], m["weight"], m["status"], + json.dumps(m["evidence"], ensure_ascii=False), + tenant_id, + ), + ) + saved_relations.append({"id": cursor.lastrowid}) + except Exception as e: + logger.warning(f"Beziehung speichern fehlgeschlagen: {e}") + + await db.commit() + logger.info(f"Phase 2 abgeschlossen: {len(saved_relations)} Beziehungen gespeichert") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 100, + }) + + return saved_relations + + +# --------------------------------------------------------------------------- +# Phase 2c: Semantische Deduplication (Opus) +# --------------------------------------------------------------------------- + +async def _phase2c_semantic_dedup( + db, analysis_id: int, tenant_id: int, + entities: list[dict], usage_acc: UsageAccumulator, ws_manager=None, +) -> None: + """Semantische Deduplizierung via Opus — erkennt Synonyme, Abkürzungen, Sprachvarianten.""" + if len(entities) < 10: + return + + logger.info(f"Phase 2c: Semantische Dedup für {len(entities)} Entitäten") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "semantic_dedup", + "progress": 0, + }) + + # --- Clustering: Token-basiert + Abbreviation-Matching --- + token_to_ids = defaultdict(set) + db_id_map = {e["db_id"]: e for e in entities if e.get("db_id")} + + for ent in entities: + db_id = ent.get("db_id") + if not db_id: + continue + + all_names = [ent["name_normalized"]] + for a in ent.get("aliases", []): + if a: + all_names.append(a.lower()) + + # Token-basiertes Clustering + for name in all_names: + for word in re.split(r'[\s\-_/(),.;:]+', name): + word = word.strip() + if len(word) >= 3 and word not in _STOP_WORDS: + token_to_ids[word].add(db_id) + + # Abbreviation-Matching + all_display_names = [ent["name"]] + [a for a in ent.get("aliases", []) if a] + for name in all_display_names: + words = [w for w in re.split(r'[\s\-_/(),.;:]+', name) if w and len(w) >= 2] + if len(words) >= 2: + abbr = "".join(w[0] for w in words).lower() + if len(abbr) >= 2: + token_to_ids[f"_abbr_{abbr}"].add(db_id) + # Kurze Namen als potenzielle Abkürzungen + name_clean = re.sub(r'[^a-zA-Z]', '', name).lower() + if 2 <= len(name_clean) <= 6: + token_to_ids[f"_abbr_{name_clean}"].add(db_id) + + # Eindeutige Cluster filtern (≥ 2 Entities, ≤ 40 Entities) + seen_clusters = set() + candidate_clusters = [] + for token, db_ids in sorted(token_to_ids.items(), key=lambda x: len(x[1])): + if len(db_ids) < 2 or len(db_ids) > 40: + continue + key = frozenset(db_ids) + if key in seen_clusters: + continue + seen_clusters.add(key) + candidate_clusters.append(list(db_ids)) + + logger.info(f"Phase 2c: {len(candidate_clusters)} Kandidaten-Cluster gefunden") + + # --- Opus-Calls für jeden Cluster --- + merged_away = set() + total_merges = 0 + opus_calls = 0 + max_calls = 50 + + for ci, cluster_ids in enumerate(candidate_clusters): + if opus_calls >= max_calls: + logger.warning(f"Phase 2c: Max {max_calls} Opus-Calls erreicht, stoppe") + break + + # Bereits gemergte Entities filtern + active_ids = [did for did in cluster_ids if did not in merged_away and did in db_id_map] + if len(active_ids) < 2: + continue + + active_ents = [db_id_map[did] for did in active_ids] + + # Prompt bauen + lines = [] + for i, ent in enumerate(active_ents, 1): + aliases_str = ", ".join((ent.get("aliases") or [])[:5]) + line = f"{i}. {ent['name']} ({ent['type']}, {ent.get('mention_count', 0)} Erwähnungen)" + if aliases_str: + line += f" [Aliases: {aliases_str}]" + lines.append(line) + + prompt = SEMANTIC_DEDUP_PROMPT.format(entity_list="\n".join(lines)) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=None) + usage_acc.add(usage) + opus_calls += 1 + except Exception as e: + logger.error(f"Phase 2c Opus-Call {opus_calls + 1} fehlgeschlagen: {e}") + continue + + parsed = _parse_json_response(result_text) + if not parsed: + continue + + for merge_group in parsed.get("merges", []): + keep_idx = merge_group.get("keep") + merge_indices = merge_group.get("merge", []) + if keep_idx is None or not merge_indices: + continue + if not isinstance(merge_indices, list): + merge_indices = [merge_indices] + + keep_idx -= 1 # 1-indexed → 0-indexed + if keep_idx < 0 or keep_idx >= len(active_ents): + continue + keep_ent = active_ents[keep_idx] + if keep_ent["db_id"] in merged_away: + continue + + for mi in merge_indices: + mi -= 1 + if mi < 0 or mi >= len(active_ents) or mi == keep_idx: + continue + merge_ent = active_ents[mi] + if merge_ent["db_id"] in merged_away: + continue + + logger.info(f"Semantic Merge: '{merge_ent['name']}' → '{keep_ent['name']}'") + await _merge_entity_in_db(db, analysis_id, keep_ent, merge_ent, entities) + merged_away.add(merge_ent["db_id"]) + db_id_map.pop(merge_ent["db_id"], None) + total_merges += 1 + + # Progress + progress = int((ci + 1) / len(candidate_clusters) * 100) if candidate_clusters else 100 + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "semantic_dedup", + "progress": min(progress, 100), + }) + + if total_merges > 0: + await db.commit() + + logger.info( + f"Phase 2c abgeschlossen: {total_merges} Merges, {opus_calls} Opus-Calls, " + f"{len(entities)} Entitäten verbleiben" + ) + + +# --------------------------------------------------------------------------- +# Phase 2d: Cleanup +# --------------------------------------------------------------------------- + +async def _phase2d_cleanup( + db, analysis_id: int, entities: list[dict], ws_manager=None, +) -> None: + """Cleanup: Self-Loops, Richtungsnormalisierung, Duplikat-Relations, verwaiste Entities.""" + logger.info("Phase 2d: Cleanup") + + # 1. Self-Loops entfernen + cursor = await db.execute( + "DELETE FROM network_relations WHERE network_analysis_id = ? AND source_entity_id = target_entity_id", + (analysis_id,), + ) + self_loops = cursor.rowcount + + # 2. Richtungsnormalisierung (nach Merges können Richtungen inkonsistent sein) + await db.execute( + """UPDATE network_relations + SET source_entity_id = target_entity_id, target_entity_id = source_entity_id + WHERE source_entity_id > target_entity_id AND network_analysis_id = ?""", + (analysis_id,), + ) + + # 3. Duplikat-Relations entfernen (gleiche source+target+category nach Normalisierung) + cursor = await db.execute( + """DELETE FROM network_relations + WHERE network_analysis_id = ? AND id NOT IN ( + SELECT MIN(id) FROM network_relations + WHERE network_analysis_id = ? + GROUP BY source_entity_id, target_entity_id, category + )""", + (analysis_id, analysis_id), + ) + dup_relations = cursor.rowcount + + # 4. Verwaiste Entities entfernen (keine Verbindungen) + cursor = await db.execute( + """DELETE FROM network_entities + WHERE network_analysis_id = ? AND id NOT IN ( + SELECT source_entity_id FROM network_relations WHERE network_analysis_id = ? + UNION + SELECT target_entity_id FROM network_relations WHERE network_analysis_id = ? + )""", + (analysis_id, analysis_id, analysis_id), + ) + orphans = cursor.rowcount + + # Entities-Liste aktualisieren + remaining_ids = set() + cursor = await db.execute( + "SELECT id FROM network_entities WHERE network_analysis_id = ?", (analysis_id,) + ) + for row in await cursor.fetchall(): + remaining_ids.add(row["id"]) + entities[:] = [e for e in entities if e.get("db_id") in remaining_ids] + + await db.commit() + + logger.info( + f"Phase 2d abgeschlossen: {self_loops} Self-Loops, {dup_relations} Duplikat-Relations, " + f"{orphans} verwaiste Entities entfernt, {len(entities)} Entitäten verbleiben" + ) + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "cleanup", + "progress": 100, + }) + + +# --------------------------------------------------------------------------- +# Phase 3: Finalisierung +# --------------------------------------------------------------------------- + +async def _phase3_finalize( + db, analysis_id, tenant_id, entity_count, relation_count, + article_ids, factcheck_ids, article_ts, factcheck_ts, + usage_acc, ws_manager=None, +): + """Finalisiert: Zähler, Hash, Log, Status.""" + data_hash = _compute_data_hash(article_ids, factcheck_ids, article_ts, factcheck_ts) + now = datetime.now(TIMEZONE).strftime("%Y-%m-%d %H:%M:%S") + + await db.execute( + """UPDATE network_analyses + SET entity_count = ?, relation_count = ?, status = 'ready', + last_generated_at = ?, data_hash = ? + WHERE id = ?""", + (entity_count, relation_count, now, data_hash, analysis_id), + ) + + await db.execute( + """INSERT INTO network_generation_log + (network_analysis_id, completed_at, status, input_tokens, output_tokens, + cache_creation_tokens, cache_read_tokens, total_cost_usd, api_calls, + entity_count, relation_count, tenant_id) + VALUES (?, ?, 'completed', ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + (analysis_id, now, usage_acc.input_tokens, usage_acc.output_tokens, + usage_acc.cache_creation_tokens, usage_acc.cache_read_tokens, + usage_acc.total_cost_usd, usage_acc.call_count, + entity_count, relation_count, tenant_id), + ) + + await db.commit() + + logger.info(f"Analyse {analysis_id} finalisiert: {entity_count} Entitäten, " + f"{relation_count} Beziehungen, ${usage_acc.total_cost_usd:.4f}") + + await _broadcast(ws_manager, "network_complete", { + "analysis_id": analysis_id, + "entity_count": entity_count, + "relation_count": relation_count, + "cost_usd": round(usage_acc.total_cost_usd, 4), + }) + + +# --------------------------------------------------------------------------- +# Hauptfunktion +# --------------------------------------------------------------------------- + +async def extract_and_relate_entities(analysis_id: int, tenant_id: int, ws_manager=None): + """Hauptfunktion: Entity-Extraktion + Beziehungsanalyse. + + Phase 1: Haiku extrahiert Entitäten aus Artikeln (in Batches) + Phase 2a: Entity-Dedup nach name_normalized (Code, kein API) + Phase 2: Haiku extrahiert Beziehungen pro Batch, dann Merge + Deduplizierung + Phase 2c: Semantische Dedup via Opus (Cluster-weise) + Phase 2d: Cleanup (Self-Loops, Verwaiste, Duplikat-Relations) + Phase 3: Finalisierung (Zähler, Hash, Log) + """ + from database import get_db + + db = await get_db() + usage_acc = UsageAccumulator() + + try: + if not await _check_analysis_exists(db, analysis_id): + logger.warning(f"Analyse {analysis_id} existiert nicht") + return + + await db.execute( + "UPDATE network_analyses SET status = 'generating' WHERE id = ?", + (analysis_id,), + ) + await db.commit() + + # Incident-IDs laden + cursor = await db.execute( + "SELECT incident_id FROM network_analysis_incidents WHERE network_analysis_id = ?", + (analysis_id,), + ) + incident_ids = [row["incident_id"] for row in await cursor.fetchall()] + + if not incident_ids: + logger.warning(f"Analyse {analysis_id}: Keine Lagen verknüpft") + await db.execute("UPDATE network_analyses SET status = 'error' WHERE id = ?", (analysis_id,)) + await db.commit() + await _broadcast(ws_manager, "network_error", { + "analysis_id": analysis_id, "error": "Keine Lagen verknüpft", + }) + return + + # Artikel laden + placeholders = ",".join("?" * len(incident_ids)) + cursor = await db.execute( + f"""SELECT id, incident_id, headline, headline_de, source, source_url, + content_original, content_de, collected_at + FROM articles WHERE incident_id IN ({placeholders})""", + incident_ids, + ) + article_rows = await cursor.fetchall() + articles = [] + article_ids = [] + article_ts = [] + for r in article_rows: + articles.append({ + "id": r["id"], "incident_id": r["incident_id"], + "headline": r["headline"], "headline_de": r["headline_de"], + "source": r["source"], "source_url": r["source_url"], + "content_original": r["content_original"], "content_de": r["content_de"], + }) + article_ids.append(r["id"]) + article_ts.append(r["collected_at"] or "") + + # Faktenchecks laden + cursor = await db.execute( + f"""SELECT id, incident_id, claim, status, evidence, checked_at + FROM fact_checks WHERE incident_id IN ({placeholders})""", + incident_ids, + ) + fc_rows = await cursor.fetchall() + factchecks = [] + factcheck_ids = [] + factcheck_ts = [] + for r in fc_rows: + factchecks.append({ + "id": r["id"], "incident_id": r["incident_id"], + "claim": r["claim"], "status": r["status"], "evidence": r["evidence"], + }) + factcheck_ids.append(r["id"]) + factcheck_ts.append(r["checked_at"] or "") + + logger.info(f"Analyse {analysis_id}: {len(articles)} Artikel, " + f"{len(factchecks)} Faktenchecks aus {len(incident_ids)} Lagen") + + # Phase 1: Entity-Extraktion + if not await _check_analysis_exists(db, analysis_id): + return + + entities = await _phase1_extract_entities( + db, analysis_id, tenant_id, articles, factchecks, usage_acc, ws_manager, + ) + + # Phase 2a: Entity-Deduplication + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase2a_deduplicate_entities(db, analysis_id, entities, ws_manager) + + # Phase 2: Beziehungsextraktion + if not await _check_analysis_exists(db, analysis_id): + return + + relations = await _phase2_analyze_relationships( + db, analysis_id, tenant_id, entities, articles, factchecks, usage_acc, ws_manager, + ) + + # Phase 2c: Semantische Deduplication + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase2c_semantic_dedup( + db, analysis_id, tenant_id, entities, usage_acc, ws_manager, + ) + + # Phase 2d: Cleanup + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase2d_cleanup(db, analysis_id, entities, ws_manager) + + # Finale Zähler aus DB (nach allen Cleanups) + cursor = await db.execute( + "SELECT COUNT(*) as cnt FROM network_entities WHERE network_analysis_id = ?", + (analysis_id,), + ) + row = await cursor.fetchone() + final_entity_count = row["cnt"] if row else len(entities) + + cursor = await db.execute( + "SELECT COUNT(*) as cnt FROM network_relations WHERE network_analysis_id = ?", + (analysis_id,), + ) + row = await cursor.fetchone() + final_relation_count = row["cnt"] if row else len(relations) + + # Phase 3: Finalisierung + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase3_finalize( + db, analysis_id, tenant_id, + entity_count=final_entity_count, relation_count=final_relation_count, + article_ids=article_ids, factcheck_ids=factcheck_ids, + article_ts=article_ts, factcheck_ts=factcheck_ts, + usage_acc=usage_acc, ws_manager=ws_manager, + ) + + except Exception as e: + logger.error(f"Entity-Extraktion fehlgeschlagen (Analyse {analysis_id}): {e}", exc_info=True) + try: + await db.execute("UPDATE network_analyses SET status = 'error' WHERE id = ?", (analysis_id,)) + await db.commit() + except Exception: + pass + await _broadcast(ws_manager, "network_error", { + "analysis_id": analysis_id, "error": str(e), + }) + finally: + await db.close() diff --git a/src/config.py b/src/config.py index a7cd097..b620a24 100644 --- a/src/config.py +++ b/src/config.py @@ -24,6 +24,7 @@ CLAUDE_PATH = os.environ.get("CLAUDE_PATH", "/usr/bin/claude") CLAUDE_TIMEOUT = 1800 # Sekunden (30 Min - Lage-Updates mit vielen Artikeln brauchen mehr Zeit) # Claude Modelle CLAUDE_MODEL_FAST = "claude-haiku-4-5-20251001" # Für einfache Aufgaben (Feed-Selektion) +CLAUDE_MODEL_MEDIUM = "claude-sonnet-4-6" # Für qualitätskritische Aufgaben (Netzwerkanalyse) # Ausgabesprache (Lagebilder, Faktenchecks, Zusammenfassungen) OUTPUT_LANGUAGE = "Deutsch" diff --git a/src/main.py b/src/main.py index 138b857..de19369 100644 --- a/src/main.py +++ b/src/main.py @@ -298,11 +298,11 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): response = await call_next(request) response.headers["Content-Security-Policy"] = ( "default-src 'self'; " - "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://unpkg.com; " + "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; " "style-src 'self' 'unsafe-inline' https://fonts.googleapis.com https://cdn.jsdelivr.net; " "font-src 'self' https://fonts.gstatic.com; " - "img-src 'self' data: https://tile.openstreetmap.de https://server.arcgisonline.com; " - "connect-src 'self' wss: ws: https://earthquake.usgs.gov https://api.gdeltproject.org; " + "img-src 'self' data: https://tile.openstreetmap.de; " + "connect-src 'self' wss: ws:; " "frame-ancestors 'none'" ) response.headers["Permissions-Policy"] = ( @@ -334,7 +334,6 @@ from routers.public_api import router as public_api_router from routers.chat import router as chat_router from routers.network_analysis import router as network_analysis_router from routers.tutorial import router as tutorial_router -from routers.geoint import router as geoint_router app.include_router(auth_router) app.include_router(incidents_router) @@ -345,16 +344,6 @@ app.include_router(public_api_router) app.include_router(chat_router, prefix="/api/chat") app.include_router(network_analysis_router) app.include_router(tutorial_router) -app.include_router(geoint_router, prefix="/api/geoint") - -@app.on_event("startup") -@app.on_event("startup") -async def _start_aisstream_on_startup(): - import asyncio - from routers.geoint import _start_aisstream - await asyncio.sleep(3) - _start_aisstream() - @app.websocket("/api/ws") diff --git a/src/routers/geoint.py b/src/routers/geoint.py deleted file mode 100644 index f1e36ea..0000000 --- a/src/routers/geoint.py +++ /dev/null @@ -1,300 +0,0 @@ -"""GEOINT-Router: Proxy fuer externe Echtzeit-Datenquellen (Flugverkehr, Schiffsverkehr, GDELT).""" -import asyncio -import json as _json -import logging -import time -from typing import Optional - -import httpx -import websockets -from fastapi import APIRouter, Depends, Query - -from auth import get_current_user - -logger = logging.getLogger("osint.geoint") - -router = APIRouter(tags=["geoint"]) - -# --------------------------------------------------------------------------- -# Einfacher In-Memory-Cache -# --------------------------------------------------------------------------- -_cache: dict[str, tuple[float, dict]] = {} - - -def _get_cached(key: str, ttl: float) -> Optional[dict]: - if key in _cache: - ts, data = _cache[key] - if time.time() - ts < ttl: - return data - return None - - -def _set_cache(key: str, data: dict): - _cache[key] = (time.time(), data) - if len(_cache) > 50: - oldest = min(_cache, key=lambda k: _cache[k][0]) - del _cache[oldest] - - -# --------------------------------------------------------------------------- -# Flugverkehr: Globaler Snapshot (airplanes.live) -# --------------------------------------------------------------------------- - -_FLIGHT_GRID = [ - # Europa - (48.0, 2.0), # Westeuropa (Paris) - (48.0, 16.0), # Mitteleuropa (Wien) - (55.0, 10.0), # Nordeuropa (Daenemark) - (40.0, -4.0), # Iberische Halbinsel - (41.0, 12.0), # Suedeuropa (Rom) - (38.0, 24.0), # Suedosteuropa (Griechenland) - (55.0, 25.0), # Baltikum - (60.0, 25.0), # Skandinavien-Ost - (52.0, 30.0), # Osteuropa - (45.0, 37.0), # Schwarzes Meer / Tuerkei Ost - # UK / Island - (54.0, -2.0), # UK - (63.0, -19.0), # Island - # Naher Osten (erweitert) - (33.0, 36.0), # Levante (Syrien/Libanon/Israel) - (30.0, 31.0), # Aegypten / Kairo - (25.0, 45.0), # Saudi-Arabien Zentral - (26.5, 56.0), # Strasse von Hormuz / VAE - (25.0, 51.5), # Katar / Bahrain - (33.0, 44.0), # Irak (Bagdad) - (33.0, 52.0), # Iran (Teheran) - (15.0, 45.0), # Jemen / Rotes Meer - (21.0, 40.0), # Saudi-Arabien West (Dschidda) - # Nordafrika - (34.0, 2.0), # Maghreb (Algier) - (33.0, -7.0), # Marokko (Casablanca) - (32.0, 13.0), # Libyen (Tripolis) - # Zentralasien - (41.0, 69.0), # Usbekistan (Taschkent) - (39.0, 63.0), # Turkmenistan - # Nordamerika Ostkueste - (40.0, -74.0), # New York - (33.0, -84.0), # Atlanta - (42.0, -88.0), # Chicago - (26.0, -80.0), # Florida (Miami) - (45.0, -74.0), # Montreal - # Nordamerika Westkueste - (34.0, -118.0), # Los Angeles - (47.0, -122.0), # Seattle - (37.0, -122.0), # San Francisco - # Nordamerika Zentral - (30.0, -97.0), # Texas (Austin) - (39.0, -105.0), # Denver - # Ostasien - (35.0, 140.0), # Japan (Tokio) - (37.0, 127.0), # Korea (Seoul) - (31.0, 121.0), # Shanghai - (40.0, 117.0), # Peking - (22.0, 114.0), # Hongkong - (25.0, 121.0), # Taiwan - # Suedasien - (19.0, 73.0), # Mumbai - (28.0, 77.0), # Delhi - (13.0, 80.0), # Chennai - (7.0, 80.0), # Sri Lanka - # Suedostasien - (1.0, 104.0), # Singapur - (14.0, 101.0), # Bangkok - (-6.0, 107.0), # Jakarta - (10.0, 107.0), # Ho-Chi-Minh - # Ozeanien - (-34.0, 151.0), # Sydney - (-37.0, 175.0), # Neuseeland - # Afrika - (-1.0, 37.0), # Nairobi - (-34.0, 18.0), # Kapstadt - (6.0, 3.0), # Lagos - (9.0, 39.0), # Addis Abeba - # Suedamerika - (-23.0, -43.0), # Rio de Janeiro - (-34.0, -58.0), # Buenos Aires - (-12.0, -77.0), # Lima - (4.0, -74.0), # Bogota -] - -_flight_lock = asyncio.Lock() - - -async def _fetch_global_flights() -> dict: - """Holt Flugdaten fuer alle Stuetzpunkte parallel.""" - cached = _get_cached("flights_global", ttl=30) - if cached: - return cached - - async with _flight_lock: - cached = _get_cached("flights_global", ttl=30) - if cached: - return cached - - seen: dict[str, dict] = {} - errors = 0 - - async with httpx.AsyncClient(timeout=10) as client: - for i in range(0, len(_FLIGHT_GRID), 8): - batch = _FLIGHT_GRID[i:i + 8] - tasks = [client.get(f"https://api.airplanes.live/v2/point/{lat:.2f}/{lon:.2f}/250") - for lat, lon in batch] - results = await asyncio.gather(*tasks, return_exceptions=True) - for r in results: - if isinstance(r, Exception): - errors += 1 - continue - try: - data = r.json() - for ac in data.get("ac", []): - hex_id = ac.get("hex") - if hex_id and hex_id not in seen: - seen[hex_id] = ac - except Exception: - errors += 1 - if i + 8 < len(_FLIGHT_GRID): - await asyncio.sleep(0.3) - - result = {"ac": list(seen.values()), "total": len(seen), "errors": errors} - logger.info(f"GEOINT Flights: {len(seen)} Flugzeuge ({errors} Fehler)") - _set_cache("flights_global", result) - return result - - -@router.get("/flights") -async def get_flights(_user: dict = Depends(get_current_user)): - """Globaler Flugverkehr-Snapshot. 30s Cache.""" - return await _fetch_global_flights() - - -# --------------------------------------------------------------------------- -# Schiffsverkehr: AISStream.io (globales Echtzeit-AIS via WebSocket) -# --------------------------------------------------------------------------- - -_AISSTREAM_KEY = "1a56b078db829727abd4d617937bae51c6f9973e" -_AISSTREAM_URL = "wss://stream.aisstream.io/v0/stream" - -# Globaler Schiffs-Store: {mmsi: {lat, lon, sog, cog, heading, name, ship_type, ts}} -_ships_store: dict[int, dict] = {} -_ships_lock = asyncio.Lock() -_ships_ws_task: Optional[asyncio.Task] = None -_ships_connected = False - - -async def _aisstream_listener(): - """Dauerhafter WebSocket-Client fuer AISStream. Sammelt Schiffspositionen.""" - global _ships_connected - while True: - try: - logger.info("AISStream: Verbinde...") - async with websockets.connect(_AISSTREAM_URL, ping_interval=30, ping_timeout=10, - close_timeout=5) as ws: - # Subscription: globale BoundingBox, nur Positionsberichte - sub = { - "APIKey": _AISSTREAM_KEY, - "BoundingBoxes": [[[-90, -180], [90, 180]]], - "FilterMessageTypes": ["PositionReport"], - } - await ws.send(_json.dumps(sub)) - _ships_connected = True - logger.info("AISStream: Verbunden, empfange Schiffsdaten...") - - async for raw in ws: - try: - text = raw.decode("utf-8") if isinstance(raw, bytes) else raw - msg = _json.loads(text) - meta = msg.get("MetaData", {}) - mmsi = meta.get("MMSI") - if not mmsi: - continue - - pos = msg.get("Message", {}).get("PositionReport", {}) - lat = meta.get("latitude") or pos.get("Latitude") - lon = meta.get("longitude") or pos.get("Longitude") - if not lat or not lon or not (-90 <= lat <= 90 and -180 <= lon <= 180): - continue - - _ships_store[mmsi] = { - "mmsi": mmsi, - "lat": round(lat, 5), - "lon": round(lon, 5), - "sog": round(pos.get("Sog", 0), 1), - "cog": round(pos.get("Cog", 0), 1), - "heading": pos.get("TrueHeading", 0), - "name": (meta.get("ShipName") or "").strip(), - "ts": time.time(), - } - if len(_ships_store) % 1000 == 0: - logger.info(f"AISStream: {len(_ships_store)} Schiffe gesammelt") - - # Alte Eintraege alle 60s bereinigen (>15 Min alt) - if len(_ships_store) % 500 == 0: - cutoff = time.time() - 900 - stale = [k for k, v in _ships_store.items() if v["ts"] < cutoff] - for k in stale: - del _ships_store[k] - - except Exception as parse_err: - if len(_ships_store) < 5: - logger.warning(f"AISStream Parse-Fehler: {parse_err}, raw type: {type(raw)}, first 100: {str(raw)[:100]}") - continue - - except Exception as e: - _ships_connected = False - logger.warning(f"AISStream Fehler: {e}. Reconnect in 10s...") - await asyncio.sleep(10) - - -def _start_aisstream(): - """Startet den AISStream-Listener als Background-Task.""" - global _ships_ws_task - if _ships_ws_task is None or _ships_ws_task.done(): - _ships_ws_task = asyncio.create_task(_aisstream_listener()) - logger.info("AISStream Background-Task gestartet") - - -@router.get("/ships") -async def get_ships(_user: dict = Depends(get_current_user)): - """Globaler Schiffsverkehr aus AISStream. Echtzeit-Positionen.""" - # Lazy-Start: WebSocket-Listener beim ersten Abruf starten - _start_aisstream() - - ships = list(_ships_store.values()) - return { - "ships": ships, - "total": len(ships), - "connected": _ships_connected, - } - - -# --------------------------------------------------------------------------- -# GDELT Nachrichten -# --------------------------------------------------------------------------- -@router.get("/gdelt") -async def get_gdelt( - query: str = Query("conflict", max_length=200), - _user: dict = Depends(get_current_user), -): - """Proxy fuer GDELT GEO 2.0 API. 60s Cache.""" - cache_key = f"gdelt:{query[:50]}" - cached = _get_cached(cache_key, ttl=60) - if cached: - return cached - - url = ( - "https://api.gdeltproject.org/api/v2/geo/geo" - f"?query={query}&mode=PointData&format=GeoJSON" - "×pan=24h&maxrows=200" - ) - try: - async with httpx.AsyncClient(timeout=12) as client: - resp = await client.get(url) - resp.raise_for_status() - data = resp.json() - except Exception as e: - logger.warning(f"GDELT Fehler: {e}") - return {"type": "FeatureCollection", "features": []} - - _set_cache(cache_key, data) - return data - diff --git a/src/static/css/geoint.css b/src/static/css/geoint.css deleted file mode 100644 index b8d9c2c..0000000 --- a/src/static/css/geoint.css +++ /dev/null @@ -1,321 +0,0 @@ -/* ===================================================================== - GEOINT-Modus: Taktische Kartenansicht mit Echtzeit-Datenlayern - ===================================================================== */ - -/* --- Toggle-Checkbox im Card-Header --- */ -.geoint-toggle { - display: inline-flex; - align-items: center; - gap: 5px; - cursor: pointer; - user-select: none; - margin-right: 8px; -} -.geoint-toggle input[type="checkbox"] { - accent-color: #00ff88; - width: 13px; - height: 13px; - cursor: pointer; -} -.geoint-toggle-label { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - font-weight: 700; - font-weight: 700; - letter-spacing: 1.5px; - text-transform: uppercase; - color: var(--text-secondary); - transition: color 0.2s; -} -.geoint-toggle input:checked + .geoint-toggle-label { - color: #00ff88; - text-shadow: 0 0 6px rgba(0, 255, 136, 0.4); -} - -/* --- Taktisches Styling (aktiv) --- */ -.geoint-active .leaflet-tile-pane { - filter: brightness(0.88) contrast(1.08) saturate(0.85); - transition: filter 0.4s ease; -} -/* Scanline-Overlay (subtiler Effekt, kein Blocking) */ -.geoint-active .map-empty { display: none !important; } - -/* Gruener Akzent am Card-Header wenn aktiv */ -.map-card.geoint-card-active .card-header { - border-bottom: 2px solid rgba(0, 255, 136, 0.25); -} - -/* --- Sub-Layer Control Panel --- */ -.geoint-sub-control { - background: rgba(11, 17, 33, 0.92); - border: 1px solid rgba(0, 255, 136, 0.2); - border-radius: 6px; - padding: 10px 12px; - min-width: 170px; - backdrop-filter: blur(8px); - box-shadow: 0 4px 16px rgba(0, 0, 0, 0.4); -} -.geoint-sub-control h4 { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 9px; - font-weight: 700; - letter-spacing: 2px; - text-transform: uppercase; - color: #00ff88; - margin: 0 0 8px 0; - padding-bottom: 6px; - border-bottom: 1px solid rgba(0, 255, 136, 0.15); -} -.geoint-sub-item { - display: flex; - align-items: center; - gap: 6px; - padding: 3px 0; -} -.geoint-sub-item input[type="checkbox"] { - accent-color: #00ff88; - width: 12px; - height: 12px; - cursor: pointer; - flex-shrink: 0; -} -.geoint-sub-item label { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 11px; - color: rgba(255, 255, 255, 0.8); - cursor: pointer; - white-space: nowrap; -} -.geoint-sub-item label .geoint-dot { - display: inline-block; - width: 7px; - height: 7px; - border-radius: 50%; - margin-right: 4px; - vertical-align: middle; -} -.geoint-dot-flights { background: #00ff88; } -.geoint-dot-ships { background: #4499ff; } -.geoint-dot-quakes { background: #ff4444; } -.geoint-dot-gdelt { background: #44aaff; } -.geoint-dot-heatmap { background: #ff8800; } -.geoint-dot-coords { background: #aaaaaa; } -.geoint-dot-distance { background: #ff2222; } -.geoint-sub-separator { - height: 1px; - background: rgba(0, 255, 136, 0.1); - margin: 5px 0; -} - -/* --- Flugzeug-Icons --- */ -.geoint-aircraft { - display: flex; - align-items: center; - justify-content: center; - transition: filter 0.15s; -} -.geoint-aircraft:hover { - filter: drop-shadow(0 0 6px #00ff88); -} -.geoint-aircraft svg { - width: 14px; - height: 14px; -} - -/* --- Schiffs-Icons --- */ -.geoint-ship { - display: flex; - align-items: center; - justify-content: center; - transition: filter 0.15s; -} -.geoint-ship:hover { - filter: drop-shadow(0 0 4px #4499ff); -} - -/* --- Erdbeben Puls-Animation --- */ -.geoint-quake-marker { - animation: geoint-pulse 2.5s ease-in-out infinite; -} -@keyframes geoint-pulse { - 0%, 100% { opacity: 0.7; } - 50% { opacity: 1; } -} - -/* --- GDELT Nachrichtenmarker --- */ -.geoint-gdelt-icon { - display: flex; - align-items: center; - justify-content: center; - width: 18px; - height: 18px; - background: rgba(68, 170, 255, 0.85); - border: 1.5px solid rgba(68, 170, 255, 1); - border-radius: 50%; - font-size: 12px; - font-weight: 700; - color: #fff; - font-weight: 700; - box-shadow: 0 0 4px rgba(68, 170, 255, 0.5); -} - -/* --- Koordinatenanzeige --- */ -.geoint-coord-display { - background: rgba(11, 17, 33, 0.88); - border: 1px solid rgba(0, 255, 136, 0.2); - border-radius: 4px; - padding: 4px 8px; - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 11px; - color: #00ff88; - letter-spacing: 0.5px; - white-space: nowrap; - backdrop-filter: blur(4px); -} - -/* --- Distanzmessung --- */ -.geoint-distance-label { - box-shadow: 0 2px 8px rgba(0,0,0,0.6); - background: rgba(11, 17, 33, 0.9); - border: 1px solid rgba(255, 34, 34, 0.6); - border-radius: 3px; - padding: 2px 6px; - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - font-weight: 700; - color: #ffffff; - white-space: nowrap; -} - -/* --- Timeline-Slider --- */ -.geoint-timeline { - display: none; - padding: 6px 12px 8px; - background: rgba(11, 17, 33, 0.6); - border-top: 1px solid rgba(0, 255, 136, 0.1); -} -.geoint-active .geoint-timeline { - display: flex; - align-items: center; - gap: 10px; -} -.geoint-timeline input[type="range"] { - flex: 1; - height: 4px; - -webkit-appearance: none; - appearance: none; - background: rgba(255, 255, 255, 0.12); - border-radius: 2px; - outline: none; - cursor: pointer; -} -.geoint-timeline input[type="range"]::-webkit-slider-thumb { - -webkit-appearance: none; - width: 14px; - height: 14px; - background: #00ff88; - border-radius: 50%; - border: 2px solid rgba(11, 17, 33, 0.8); - cursor: pointer; - box-shadow: 0 0 6px rgba(0, 255, 136, 0.5); -} -.geoint-timeline input[type="range"]::-moz-range-thumb { - width: 14px; - height: 14px; - background: #00ff88; - border-radius: 50%; - border: 2px solid rgba(11, 17, 33, 0.8); - cursor: pointer; -} -.geoint-timeline-label { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - font-weight: 700; - color: #00ff88; - min-width: 90px; - text-align: center; -} -.geoint-timeline-btn { - background: none; - border: 1px solid rgba(0, 255, 136, 0.3); - border-radius: 3px; - color: #00ff88; - font-size: 11px; - padding: 2px 6px; - cursor: pointer; - font-family: var(--font-mono, 'Courier New', monospace); -} -.geoint-timeline-btn:hover { - background: rgba(0, 255, 136, 0.1); -} - -/* --- Popup-Styling fuer GEOINT-Layer --- */ -/* Dunkler Popup-Hintergrund fuer GEOINT-Layer */ -.geoint-leaflet-popup .leaflet-popup-content-wrapper { - background: rgba(11, 17, 33, 0.95); - border: 1px solid rgba(0, 255, 136, 0.25); - border-radius: 6px; - box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5); -} -.geoint-leaflet-popup .leaflet-popup-tip { - background: rgba(11, 17, 33, 0.95); -} -.geoint-leaflet-popup .leaflet-popup-close-button { - color: rgba(255, 255, 255, 0.6); -} -.geoint-leaflet-popup .leaflet-popup-close-button:hover { - color: #00ff88; -} -.geoint-popup { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - line-height: 1.6; - color: #ffffff; -} -.geoint-popup strong { - color: #00ff88; - font-size: 13px; -} -.geoint-popup .geoint-popup-row { - display: flex; - gap: 8px; -} -.geoint-popup .geoint-popup-key { - color: rgba(255, 255, 255, 0.55); - min-width: 40px; -} - -/* --- Light Theme Overrides --- */ -[data-theme="light"] .geoint-sub-control { - background: rgba(240, 243, 248, 0.95); - border-color: rgba(0, 160, 80, 0.25); -} -[data-theme="light"] .geoint-sub-control h4 { - color: #008844; -} -[data-theme="light"] .geoint-sub-item label { - color: rgba(0, 0, 0, 0.75); -} -[data-theme="light"] .geoint-coord-display { - background: rgba(240, 243, 248, 0.92); - color: #006633; - border-color: rgba(0, 160, 80, 0.25); -} -[data-theme="light"] .geoint-timeline { - background: rgba(240, 243, 248, 0.7); - border-top-color: rgba(0, 160, 80, 0.15); -} -[data-theme="light"] .geoint-timeline input[type="range"]::-webkit-slider-thumb { - background: #008844; -} -[data-theme="light"] .geoint-timeline-label { - color: #006633; -} -[data-theme="light"] .geoint-toggle input:checked + .geoint-toggle-label { - color: #008844; - text-shadow: none; -} -[data-theme="light"] .geoint-active .leaflet-tile-pane { - filter: brightness(0.95) contrast(1.05) saturate(0.9); -} -/* Light theme scanline overlay disabled */ diff --git a/src/static/css/network-cluster.css b/src/static/css/network-cluster.css new file mode 100644 index 0000000..c7244bc --- /dev/null +++ b/src/static/css/network-cluster.css @@ -0,0 +1,188 @@ +/* ================================================================= + AegisSight OSINT Monitor - Cluster Graph Styles + Hierarchical country-based network visualization + ================================================================= */ + +/* ---- Breadcrumb ---- */ + +.cluster-breadcrumb { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 16px; + background: rgba(15, 23, 42, 0.6); + border-bottom: 1px solid var(--border, #1e293b); + font-size: 13px; + min-height: 36px; + flex-shrink: 0; +} + +.breadcrumb-item { + color: #94a3b8; + font-size: 13px; +} + +.breadcrumb-item.active { + color: #f1f5f9; + font-weight: 600; +} + +.breadcrumb-item.clickable { + cursor: pointer; + color: #60a5fa; + transition: color 0.15s; +} + +.breadcrumb-item.clickable:hover { + color: #93bbfc; + text-decoration: underline; +} + +.breadcrumb-separator { + color: #475569; + font-size: 14px; + user-select: none; +} + +.cluster-back-btn { + display: inline-flex; + align-items: center; + gap: 4px; + background: transparent; + border: 1px solid #334155; + color: #94a3b8; + padding: 3px 10px; + border-radius: 4px; + cursor: pointer; + font-size: 12px; + font-family: inherit; + transition: all 0.15s; + white-space: nowrap; +} + +.cluster-back-btn:hover { + border-color: #60a5fa; + color: #60a5fa; + background: rgba(96, 165, 250, 0.08); +} + +/* ---- View Toggle Button ---- */ + +.network-view-toggle { + display: inline-flex; + align-items: center; + gap: 0; + background: rgba(30, 41, 59, 0.6); + border: 1px solid #334155; + border-radius: 6px; + padding: 2px; + margin-right: 8px; +} + +.network-view-toggle-btn { + padding: 5px 12px; + background: transparent; + border: none; + color: #94a3b8; + font-size: 12px; + font-family: inherit; + cursor: pointer; + border-radius: 4px; + transition: all 0.2s; + white-space: nowrap; +} + +.network-view-toggle-btn.active { + background: #334155; + color: #f1f5f9; + font-weight: 600; +} + +.network-view-toggle-btn:hover:not(.active) { + color: #e2e8f0; + background: rgba(51, 65, 85, 0.4); +} + +/* ---- Cluster Graph SVG ---- */ + +.cg-zoom-layer { + /* Smooth transitions handled by d3 */ +} + +/* Country nodes */ +.cg-country-node { + transition: filter 0.2s; +} + +.cg-country-circle { + transition: stroke-width 0.2s, opacity 0.2s; + filter: drop-shadow(0 2px 8px rgba(0, 0, 0, 0.3)); +} + +.cg-country-node:hover .cg-country-circle { + filter: drop-shadow(0 4px 16px rgba(241, 245, 249, 0.15)); +} + +.cg-country-label { + text-shadow: 0 1px 3px rgba(0, 0, 0, 0.7); + user-select: none; +} + +.cg-country-count { + user-select: none; +} + +/* Detail nodes */ +.cg-detail-node circle { + transition: stroke 0.15s, stroke-width 0.15s, opacity 0.15s; +} + +.cg-detail-node text { + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8); + user-select: none; +} + +/* Links */ +.cg-links line, +.cg-detail-links line { + pointer-events: none; +} + +/* Legend */ +.cg-legend text { + user-select: none; +} + +/* ---- Tooltip ---- */ + +.cg-tooltip { + pointer-events: none; + backdrop-filter: blur(8px); + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4); + line-height: 1.5; +} + +.cg-tooltip hr { + border: none; + border-top: 1px solid #334155; + margin: 4px 0; +} + +/* ---- Responsive ---- */ + +@media (max-width: 768px) { + .cluster-breadcrumb { + padding: 6px 12px; + font-size: 12px; + } + + .cluster-back-btn { + font-size: 11px; + padding: 2px 8px; + } + + .network-view-toggle-btn { + padding: 4px 8px; + font-size: 11px; + } +} diff --git a/src/static/dashboard.html b/src/static/dashboard.html index 5e56ae5..0400388 100644 --- a/src/static/dashboard.html +++ b/src/static/dashboard.html @@ -19,7 +19,6 @@ -
Zum Hauptinhalt springen @@ -407,10 +406,6 @@' + this._esc(entity.description) + '
'; + html += '' + - this._esc(entity.description) + '
'; - } - - // Aliases - if (entity.aliases && entity.aliases.length > 0) { - html += 'Klicke auf einen Knoten, um Details anzuzeigen.
'; - } - }, - - // ---- tooltip helpers ------------------------------------------------------ - - _showTooltip(event, html) { - if (!this._tooltip) return; - this._tooltip - .style('display', 'block') - .html(html); - this._moveTooltip(event); - }, - - _moveTooltip(event) { - if (!this._tooltip) return; - this._tooltip - .style('left', (event.offsetX + 14) + 'px') - .style('top', (event.offsetY - 10) + 'px'); - }, - - _hideTooltip() { - if (!this._tooltip) return; - this._tooltip.style('display', 'none'); - }, - - // ---- string helpers ------------------------------------------------------- - - _esc(str) { - if (!str) return ''; - const div = document.createElement('div'); - div.appendChild(document.createTextNode(str)); - return div.innerHTML; - }, - - _csvField(val) { - const s = String(val == null ? '' : val); - if (s.includes(',') || s.includes('"') || s.includes('\n')) { - return '"' + s.replace(/"/g, '""') + '"'; - } - return s; - }, -}; +/** + * AegisSight OSINT Monitor - Network Graph Visualization + * + * Force-directed graph powered by d3.js v7. + * Expects d3 to be loaded globally from CDN before this script runs. + * + * Usage: + * NetworkGraph.init('network-graph-area', data); + * NetworkGraph.filterByType(new Set(['person', 'organisation'])); + * NetworkGraph.search('Russland'); + * NetworkGraph.destroy(); + */ + +/* global d3 */ + +const NetworkGraph = { + + // ---- internal state ------------------------------------------------------- + _svg: null, + _simulation: null, + _data: null, // raw data as received + _filtered: null, // currently visible subset + _container: null, //' + + this._esc(entity.description) + '
'; + } + + // Aliases + if (entity.aliases && entity.aliases.length > 0) { + html += 'Klicke auf einen Knoten, um Details anzuzeigen.
'; + } + }, + + // ---- tooltip helpers ------------------------------------------------------ + + _showTooltip(event, html) { + if (!this._tooltip) return; + this._tooltip + .style('display', 'block') + .html(html); + this._moveTooltip(event); + }, + + _moveTooltip(event) { + if (!this._tooltip) return; + this._tooltip + .style('left', (event.offsetX + 14) + 'px') + .style('top', (event.offsetY - 10) + 'px'); + }, + + _hideTooltip() { + if (!this._tooltip) return; + this._tooltip.style('display', 'none'); + }, + + // ---- string helpers ------------------------------------------------------- + + _esc(str) { + if (!str) return ''; + const div = document.createElement('div'); + div.appendChild(document.createTextNode(str)); + return div.innerHTML; + }, + + _csvField(val) { + const s = String(val == null ? '' : val); + if (s.includes(',') || s.includes('"') || s.includes('\n')) { + return '"' + s.replace(/"/g, '""') + '"'; + } + return s; + }, +};