From e64447ab7feea7a8f0837e7fcb2db9342528b990 Mon Sep 17 00:00:00 2001 From: Claude Dev Date: Tue, 24 Mar 2026 11:06:19 +0100 Subject: [PATCH] GEOINT-Modus aus Monitor entfernt Wird als eigenstaendige Anwendung auf separater Subdomain neu aufgebaut. Alle GEOINT-Dateien entfernt, dashboard.html/components.js/main.py auf pre-GEOINT Stand zurueckgesetzt. --- src/agents/entity_extractor.py | 133 ++- src/agents/entity_extractor.py.bak | 1144 +++++++++++++++++++ src/config.py | 1 + src/main.py | 17 +- src/routers/geoint.py | 300 ----- src/static/css/geoint.css | 321 ------ src/static/css/network-cluster.css | 188 ++++ src/static/dashboard.html | 16 - src/static/js/app_network.js | 108 ++ src/static/js/cluster-data.js | 721 ++++++++++++ src/static/js/components.js | 14 - src/static/js/geoint.js | 492 -------- src/static/js/network-cluster.js | 993 +++++++++++++++++ src/static/js/network-graph.js | 1663 ++++++++++++++-------------- 14 files changed, 4112 insertions(+), 1999 deletions(-) create mode 100644 src/agents/entity_extractor.py.bak delete mode 100644 src/routers/geoint.py delete mode 100644 src/static/css/geoint.css create mode 100644 src/static/css/network-cluster.css create mode 100644 src/static/js/cluster-data.js delete mode 100644 src/static/js/geoint.js create mode 100644 src/static/js/network-cluster.js diff --git a/src/agents/entity_extractor.py b/src/agents/entity_extractor.py index b3186ac..b41fef5 100644 --- a/src/agents/entity_extractor.py +++ b/src/agents/entity_extractor.py @@ -1,4 +1,4 @@ -"""Netzwerkanalyse: Entity-Extraktion (Haiku) + Beziehungsanalyse (Batched).""" +"""Netzwerkanalyse: Entity-Extraktion (Sonnet) + Beziehungsanalyse (Batched) mit Artikel-Deduplizierung.""" import asyncio import hashlib import json @@ -9,7 +9,7 @@ from datetime import datetime from typing import Optional from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator -from config import CLAUDE_MODEL_FAST, TIMEZONE +from config import CLAUDE_MODEL_FAST, CLAUDE_MODEL_MEDIUM, TIMEZONE logger = logging.getLogger("osint.entity_extractor") @@ -194,6 +194,114 @@ def _compute_data_hash(article_ids, factcheck_ids, article_ts, factcheck_ts) -> return hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest() +# --------------------------------------------------------------------------- +# Artikel-Deduplizierung +# --------------------------------------------------------------------------- + +def _normalize_headline(headline: str) -> str: + """Normalisiert eine Headline fuer Vergleiche.""" + h = headline.lower().strip() + h = re.sub(r"[^a-z0-9\s]", "", h) + h = re.sub(r"\s+", " ", h).strip() + return h + + +def _headline_tokens(headline: str) -> set[str]: + """Extrahiert bedeutungstragende Tokens aus einer Headline.""" + tokens = set() + for word in _normalize_headline(headline).split(): + if len(word) >= 3 and word not in _STOP_WORDS: + tokens.add(word) + return tokens + + +def _jaccard_similarity(set_a: set, set_b: set) -> float: + """Jaccard-Aehnlichkeit zweier Mengen.""" + if not set_a or not set_b: + return 0.0 + intersection = set_a & set_b + union = set_a | set_b + return len(intersection) / len(union) if union else 0.0 + + +def _content_fingerprint(text: str) -> str: + """Kurzer Hash des Textinhalts fuer Near-Duplicate-Erkennung.""" + normalized = re.sub(r"\s+", " ", text.lower().strip())[:500] + return hashlib.md5(normalized.encode("utf-8")).hexdigest() + + +def _deduplicate_articles(articles: list[dict], factchecks: list[dict]) -> tuple[list[dict], list[dict]]: + """Entfernt redundante Artikel basierend auf Headline-Similarity und Content-Hash. + + Behaelt pro Duplikat-Gruppe den Artikel mit dem laengsten Content. + Faktenchecks werden nicht dedupliziert (sind bereits einzigartig). + + Returns: + Tuple von (deduplizierte_artikel, factchecks_unveraendert) + """ + if len(articles) <= 50: + return articles, factchecks + + logger.info(f"Artikel-Dedup: {len(articles)} Artikel pruefen") + + # Phase A: Exakte Content-Fingerprint-Dedup + seen_fingerprints: dict[str, int] = {} + + for i, art in enumerate(articles): + content = art.get("content_de") or art.get("content_original") or "" + headline = art.get("headline_de") or art.get("headline") or "" + + if not content and not headline: + continue + + fp = _content_fingerprint(headline + " " + content) + + if fp in seen_fingerprints: + existing_idx = seen_fingerprints[fp] + existing_content = articles[existing_idx].get("content_de") or articles[existing_idx].get("content_original") or "" + if len(content) > len(existing_content): + seen_fingerprints[fp] = i + else: + seen_fingerprints[fp] = i + + after_fp = list(seen_fingerprints.values()) + fp_removed = len(articles) - len(after_fp) + + # Phase B: Headline-Similarity-Dedup (Jaccard >= 0.7) + remaining = [articles[i] for i in sorted(after_fp)] + + token_sets = [] + for art in remaining: + headline = art.get("headline_de") or art.get("headline") or "" + token_sets.append(_headline_tokens(headline)) + + keep_mask = [True] * len(remaining) + + for i in range(len(remaining)): + if not keep_mask[i]: + continue + for j in range(i + 1, len(remaining)): + if not keep_mask[j]: + continue + if _jaccard_similarity(token_sets[i], token_sets[j]) >= 0.7: + content_i = remaining[i].get("content_de") or remaining[i].get("content_original") or "" + content_j = remaining[j].get("content_de") or remaining[j].get("content_original") or "" + if len(content_j) > len(content_i): + keep_mask[i] = False + break + else: + keep_mask[j] = False + + deduped = [art for art, keep in zip(remaining, keep_mask) if keep] + headline_removed = len(remaining) - len(deduped) + + logger.info( + f"Artikel-Dedup abgeschlossen: {len(articles)} -> {len(deduped)} " + f"({fp_removed} Content-Duplikate, {headline_removed} Headline-Duplikate entfernt)" + ) + + return deduped, factchecks + # --------------------------------------------------------------------------- # Entity-Merge Helper # --------------------------------------------------------------------------- @@ -279,8 +387,8 @@ async def _phase1_extract_entities( headline = art.get("headline_de") or art.get("headline") or "" content = art.get("content_de") or art.get("content_original") or "" source = art.get("source") or "" - if len(content) > 2000: - content = content[:2000] + "..." + if len(content) > 800: + content = content[:800] + "..." all_texts.append(f"[{source}] {headline}\n{content}") for fc in factchecks: @@ -293,7 +401,7 @@ async def _phase1_extract_entities( logger.warning(f"Analyse {analysis_id}: Keine Texte vorhanden") return [] - batch_size = 30 + batch_size = 50 batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] logger.info(f"{len(all_texts)} Texte in {len(batches)} Batches") @@ -304,10 +412,10 @@ async def _phase1_extract_entities( prompt = ENTITY_EXTRACTION_PROMPT.format(articles_text=articles_text) try: - result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_MEDIUM) usage_acc.add(usage) except Exception as e: - logger.error(f"Haiku Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") + logger.error(f"Sonnet Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") continue parsed = _parse_json_response(result_text) @@ -500,8 +608,8 @@ async def _phase2_analyze_relationships( headline = art.get("headline_de") or art.get("headline") or "" content = art.get("content_de") or art.get("content_original") or "" source = art.get("source") or "" - if len(content) > 2000: - content = content[:2000] + "..." + if len(content) > 800: + content = content[:800] + "..." all_texts.append(f"[{source}] {headline}\n{content}") for fc in factchecks: @@ -514,7 +622,7 @@ async def _phase2_analyze_relationships( return [] # --- Stufe A: Per-Batch Beziehungsextraktion --- - batch_size = 30 + batch_size = 50 batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] logger.info(f"Stufe A: {len(batches)} Batches für Beziehungsextraktion") @@ -545,7 +653,7 @@ async def _phase2_analyze_relationships( ) try: - result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_MEDIUM) usage_acc.add(usage) except Exception as e: logger.error(f"Relationship Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") @@ -1067,6 +1175,9 @@ async def extract_and_relate_entities(analysis_id: int, tenant_id: int, ws_manag logger.info(f"Analyse {analysis_id}: {len(articles)} Artikel, " f"{len(factchecks)} Faktenchecks aus {len(incident_ids)} Lagen") + # Artikel-Deduplizierung vor KI-Pipeline + articles, factchecks = _deduplicate_articles(articles, factchecks) + # Phase 1: Entity-Extraktion if not await _check_analysis_exists(db, analysis_id): return diff --git a/src/agents/entity_extractor.py.bak b/src/agents/entity_extractor.py.bak new file mode 100644 index 0000000..b3186ac --- /dev/null +++ b/src/agents/entity_extractor.py.bak @@ -0,0 +1,1144 @@ +"""Netzwerkanalyse: Entity-Extraktion (Haiku) + Beziehungsanalyse (Batched).""" +import asyncio +import hashlib +import json +import logging +import re +from collections import defaultdict +from datetime import datetime +from typing import Optional + +from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator +from config import CLAUDE_MODEL_FAST, TIMEZONE + +logger = logging.getLogger("osint.entity_extractor") + +# --------------------------------------------------------------------------- +# Konstanten +# --------------------------------------------------------------------------- + +TYPE_PRIORITY = {"location": 5, "organisation": 4, "military": 3, "event": 2, "person": 1} + +_STOP_WORDS = frozenset({ + "the", "of", "and", "for", "in", "on", "at", "to", "by", + "von", "der", "die", "das", "und", "für", "des", "den", "dem", + "ein", "eine", "zur", "zum", "bei", "mit", "aus", "nach", +}) + +# --------------------------------------------------------------------------- +# Prompts +# --------------------------------------------------------------------------- + +ENTITY_EXTRACTION_PROMPT = """Du bist ein OSINT-Analyst für ein Lagemonitoring-System. +AUFGABE: Extrahiere ALLE relevanten Entitäten aus den folgenden Nachrichtenartikeln. + +ARTIKEL: +{articles_text} + +REGELN: +- Extrahiere JEDE genannte Person, Organisation, Ort, Ereignis und militärische Einheit +- Normalisiere Namen: "Wladimir Putin", "Putin", "V. Putin" -> eine Entität +- Aliase erfassen: Alle Namensvarianten einer Entität als aliases[] +- mention_count: Wie oft wird die Entität insgesamt in allen Artikeln erwähnt? +- Beschreibung: Kurze Einordnung, wer/was die Entität ist (1 Satz) +- KEINE Duplikate: Gleiche Entitäten zusammenfassen + +ENTITY-TYPEN: +- "person": Individuelle Personen (Politiker, Militärs, Journalisten etc.) +- "organisation": Organisationen, Parteien, Behörden, Unternehmen, NGOs +- "location": Länder, Städte, Regionen, Gebiete +- "event": Konkrete Ereignisse (Wahlen, Anschläge, Konferenzen etc.) +- "military": Militärische Einheiten, Waffensysteme, Operationen + +AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON: +{{ + "entities": [ + {{ + "name": "Vollständiger Name", + "name_normalized": "vollständiger name", + "type": "person|organisation|location|event|military", + "description": "Kurze Einordnung (1 Satz)", + "aliases": ["Alias1", "Alias2"], + "mention_count": 5 + }} + ] +}}""" + + +RELATIONSHIP_BATCH_PROMPT = """Du bist ein Senior OSINT-Analyst für ein Lagemonitoring-System. +AUFGABE: Analysiere die Beziehungen zwischen den Entitäten basierend auf den Artikeln. + +BEKANNTE ENTITÄTEN (aus dem Gesamtdatensatz): +{entities_json} + +ARTIKEL: +{articles_text} + +AUFGABE: +Identifiziere ALLE Beziehungen zwischen den oben genannten Entitäten, die sich aus den Artikeln ergeben. +- Nur Beziehungen nennen, die im Artikeltext belegt sind +- source und target: Exakt die Namen aus der Entitäten-Liste verwenden +- Wenn eine Entität im Artikel vorkommt aber nicht in der Liste, verwende den Namen wie er in der Liste steht + +BEZIEHUNGS-KATEGORIEN: +- "alliance": Bündnis, Kooperation, Unterstützung, Partnerschaft +- "conflict": Konflikt, Krieg, Feindschaft, Sanktionen, Opposition +- "diplomacy": Diplomatische Beziehungen, Verhandlungen, Abkommen +- "economic": Wirtschaftsbeziehungen, Handel, Investitionen +- "legal": Rechtliche Beziehungen, Klagen, Verurteilungen +- "neutral": Sonstige Beziehung, Erwähnung, Verbindung + +REGELN: +- weight: 1 (schwach/indirekt) bis 5 (stark/direkt) +- evidence[]: 1-2 kurze Stichpunkte aus dem Artikeltext als Beleg +- status: "active" (aktuell), "historical" (vergangen), "emerging" (sich entwickelnd) + +AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON: +{{ + "relations": [ + {{ + "source": "Entität A", + "target": "Entität B", + "category": "alliance|conflict|diplomacy|economic|legal|neutral", + "label": "Kurzes Label (2-4 Wörter)", + "description": "Beschreibung (1-2 Sätze)", + "weight": 3, + "status": "active|historical|emerging", + "evidence": ["Beleg 1", "Beleg 2"] + }} + ] +}}""" + + +SEMANTIC_DEDUP_PROMPT = """Du bist ein OSINT-Analyst. Prüfe diese Entitäten auf Duplikate. + +ENTITÄTEN: +{entity_list} + +AUFGABE: Welche Entitäten bezeichnen DASSELBE reale Objekt? + +Typische Duplikate: +- Abkürzung vs. Vollname: "IRGC" = "Iranian Revolutionary Guard Corps" +- Sprachvarianten: "European Union" = "Europäische Union" +- Schreibvarianten: "Strait of Hormuz" = "Straße von Hormus" + +REGELN: +- NUR echte Duplikate zusammenführen (gleiche reale Entität) +- NICHT zusammenführen: Unterorganisationen (IRGC ≠ IRGC Navy), verschiedene Personen +- "keep": Nummer der Haupt-Entität (bevorzuge: mehr Erwähnungen, vollständiger Name) +- "merge": Nummern der Duplikate die in die Haupt-Entität zusammengeführt werden + +AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON: +{{ + "merges": [ + {{"keep": 1, "merge": [3, 5]}}, + {{"keep": 2, "merge": [4]}} + ] +}} + +Falls KEINE Duplikate: {{"merges": []}}""" + + +# --------------------------------------------------------------------------- +# Hilfsfunktionen +# --------------------------------------------------------------------------- + +async def _broadcast(ws_manager, msg_type: str, data: dict): + """Sendet eine WebSocket-Nachricht, falls ws_manager vorhanden.""" + if ws_manager: + try: + await ws_manager.broadcast({"type": msg_type, **data}) + except Exception: + pass + + +def _parse_json_response(text: str) -> Optional[dict]: + """Parst JSON aus Claude-Antwort. Handhabt Markdown-Fences.""" + if not text: + return None + try: + return json.loads(text) + except json.JSONDecodeError: + pass + fence_match = re.search(r'```(?:json)?\s*\n?(.*?)\n?\s*```', text, re.DOTALL) + if fence_match: + try: + return json.loads(fence_match.group(1)) + except json.JSONDecodeError: + pass + obj_match = re.search(r'\{.*\}', text, re.DOTALL) + if obj_match: + try: + return json.loads(obj_match.group()) + except json.JSONDecodeError: + pass + logger.warning("JSON-Parse fehlgeschlagen") + return None + + +async def _check_analysis_exists(db, analysis_id: int) -> bool: + """Prüft ob die Analyse noch existiert.""" + cursor = await db.execute( + "SELECT id FROM network_analyses WHERE id = ?", (analysis_id,) + ) + return await cursor.fetchone() is not None + + +def _compute_data_hash(article_ids, factcheck_ids, article_ts, factcheck_ts) -> str: + """SHA256-Hash über sortierte IDs und Timestamps.""" + parts = [] + for aid, ats in sorted(zip(article_ids, article_ts)): + parts.append(f"a:{aid}:{ats}") + for fid, fts in sorted(zip(factcheck_ids, factcheck_ts)): + parts.append(f"f:{fid}:{fts}") + return hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest() + + +# --------------------------------------------------------------------------- +# Entity-Merge Helper +# --------------------------------------------------------------------------- + +async def _merge_entity_in_db( + db, analysis_id: int, keep_ent: dict, merge_ent: dict, entities: list[dict], +): + """Führt merge_ent in keep_ent zusammen (DB + In-Memory).""" + keep_id = keep_ent["db_id"] + merge_id = merge_ent["db_id"] + + # Aliases vereinen + aliases = set(keep_ent.get("aliases", [])) + aliases.add(merge_ent["name"]) + for a in merge_ent.get("aliases", []): + if a and a.strip(): + aliases.add(a.strip()) + aliases.discard(keep_ent["name"]) + keep_ent["aliases"] = list(aliases) + + # Mention count addieren + keep_ent["mention_count"] = keep_ent.get("mention_count", 0) + merge_ent.get("mention_count", 0) + + # Längere Description behalten + if len(merge_ent.get("description", "")) > len(keep_ent.get("description", "")): + keep_ent["description"] = merge_ent["description"] + + # Entity-Mentions umhängen + await db.execute( + "UPDATE network_entity_mentions SET entity_id = ? WHERE entity_id = ?", + (keep_id, merge_id), + ) + + # Relations umhängen + await db.execute( + "UPDATE network_relations SET source_entity_id = ? WHERE source_entity_id = ? AND network_analysis_id = ?", + (keep_id, merge_id, analysis_id), + ) + await db.execute( + "UPDATE network_relations SET target_entity_id = ? WHERE target_entity_id = ? AND network_analysis_id = ?", + (keep_id, merge_id, analysis_id), + ) + + # Self-Loops entfernen die durch den Merge entstanden sein könnten + await db.execute( + "DELETE FROM network_relations WHERE source_entity_id = ? AND target_entity_id = ? AND network_analysis_id = ?", + (keep_id, keep_id, analysis_id), + ) + + # Keep-Entity in DB aktualisieren + await db.execute( + """UPDATE network_entities + SET aliases = ?, mention_count = ?, description = ? + WHERE id = ?""", + (json.dumps(keep_ent["aliases"], ensure_ascii=False), + keep_ent["mention_count"], keep_ent.get("description", ""), keep_id), + ) + + # Merge-Entity löschen + await db.execute("DELETE FROM network_entities WHERE id = ?", (merge_id,)) + + # Aus entities-Liste entfernen + try: + entities.remove(merge_ent) + except ValueError: + pass + + +# --------------------------------------------------------------------------- +# Phase 1: Entity-Extraktion (Haiku) +# --------------------------------------------------------------------------- + +async def _phase1_extract_entities( + db, analysis_id: int, tenant_id: int, + articles: list[dict], factchecks: list[dict], + usage_acc: UsageAccumulator, ws_manager=None, +) -> list[dict]: + """Extrahiert Entitäten aus Artikeln via Haiku in Batches.""" + logger.info(f"Phase 1: {len(articles)} Artikel, {len(factchecks)} Faktenchecks") + + all_texts = [] + for art in articles: + headline = art.get("headline_de") or art.get("headline") or "" + content = art.get("content_de") or art.get("content_original") or "" + source = art.get("source") or "" + if len(content) > 2000: + content = content[:2000] + "..." + all_texts.append(f"[{source}] {headline}\n{content}") + + for fc in factchecks: + claim = fc.get("claim") or "" + evidence = fc.get("evidence") or "" + status = fc.get("status") or "" + all_texts.append(f"[Faktencheck] {claim} (Status: {status})\n{evidence}") + + if not all_texts: + logger.warning(f"Analyse {analysis_id}: Keine Texte vorhanden") + return [] + + batch_size = 30 + batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] + logger.info(f"{len(all_texts)} Texte in {len(batches)} Batches") + + entity_map: dict[str, dict] = {} + + for batch_idx, batch in enumerate(batches): + articles_text = "\n\n---\n\n".join(batch) + prompt = ENTITY_EXTRACTION_PROMPT.format(articles_text=articles_text) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + usage_acc.add(usage) + except Exception as e: + logger.error(f"Haiku Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") + continue + + parsed = _parse_json_response(result_text) + if not parsed or "entities" not in parsed: + logger.warning(f"Batch {batch_idx + 1}: Kein gültiges JSON") + continue + + entities = parsed["entities"] + if not isinstance(entities, list): + continue + + for ent in entities: + if not isinstance(ent, dict): + continue + name = (ent.get("name") or "").strip() + if not name: + continue + + name_normalized = (ent.get("name_normalized") or name.lower()).strip().lower() + entity_type = (ent.get("type") or "organisation").lower().strip() + valid_types = {"person", "organisation", "location", "event", "military"} + if entity_type not in valid_types: + entity_type = "organisation" + + key = name_normalized + + if key in entity_map: + existing = entity_map[key] + aliases = set(existing.get("aliases", [])) + for alias in ent.get("aliases", []): + if alias and alias.strip(): + aliases.add(alias.strip()) + if name != existing["name"]: + aliases.add(name) + existing["aliases"] = list(aliases) + existing["mention_count"] = existing.get("mention_count", 1) + ent.get("mention_count", 1) + new_desc = ent.get("description", "") + if len(new_desc) > len(existing.get("description", "")): + existing["description"] = new_desc + # Typ-Priorität: höherwertigen Typ behalten + if TYPE_PRIORITY.get(entity_type, 0) > TYPE_PRIORITY.get(existing["type"], 0): + existing["type"] = entity_type + else: + entity_map[key] = { + "name": name, + "name_normalized": name_normalized, + "type": entity_type, + "description": ent.get("description", ""), + "aliases": [a.strip() for a in ent.get("aliases", []) if a and a.strip()], + "mention_count": ent.get("mention_count", 1), + } + + logger.info(f"Batch {batch_idx + 1}/{len(batches)}: {len(entity_map)} Entitäten gesamt") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "entity_extraction", + "progress": int((batch_idx + 1) / len(batches) * 100), + }) + + all_entities = list(entity_map.values()) + + for ent in all_entities: + try: + cursor = await db.execute( + """INSERT OR IGNORE INTO network_entities + (network_analysis_id, name, name_normalized, entity_type, + description, aliases, mention_count, tenant_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", + ( + analysis_id, ent["name"], ent["name_normalized"], ent["type"], + ent.get("description", ""), + json.dumps(ent.get("aliases", []), ensure_ascii=False), + ent.get("mention_count", 1), + tenant_id, + ), + ) + ent["db_id"] = cursor.lastrowid + except Exception as e: + logger.warning(f"Entity speichern fehlgeschlagen '{ent['name']}': {e}") + + await db.commit() + logger.info(f"Phase 1 abgeschlossen: {len(all_entities)} Entitäten gespeichert") + return all_entities + + +# --------------------------------------------------------------------------- +# Phase 2a: Entity-Deduplication nach name_normalized +# --------------------------------------------------------------------------- + +async def _phase2a_deduplicate_entities( + db, analysis_id: int, entities: list[dict], ws_manager=None, +) -> None: + """Dedupliziert Entities mit gleichem name_normalized (unabhängig vom Typ).""" + logger.info(f"Phase 2a: Prüfe {len(entities)} Entitäten auf name_normalized-Duplikate") + + groups = defaultdict(list) + for ent in entities: + if ent.get("db_id"): + groups[ent["name_normalized"]].append(ent) + + merge_count = 0 + merged_ids = set() + + for nn, group in groups.items(): + if len(group) < 2: + continue + # Sortierung: höchste Typ-Priorität, dann meiste Erwähnungen + group.sort( + key=lambda e: (TYPE_PRIORITY.get(e["type"], 0), e.get("mention_count", 0)), + reverse=True, + ) + keep = group[0] + for merge in group[1:]: + if merge["db_id"] in merged_ids: + continue + await _merge_entity_in_db(db, analysis_id, keep, merge, entities) + merged_ids.add(merge["db_id"]) + merge_count += 1 + + if merge_count > 0: + await db.commit() + + logger.info( + f"Phase 2a abgeschlossen: {merge_count} Duplikate zusammengeführt, " + f"{len(entities)} Entitäten verbleiben" + ) + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "entity_dedup", + "progress": 100, + }) + + +# --------------------------------------------------------------------------- +# Phase 2: Beziehungsanalyse (Batched — pro Artikel-Batch) +# --------------------------------------------------------------------------- + +def _build_entity_name_map(entities: list[dict]) -> dict[str, int]: + """Mapping: normalisierter Name/Alias -> DB-ID.""" + name_to_id: dict[str, int] = {} + for ent in entities: + db_id = ent.get("db_id") + if not db_id: + continue + name_to_id[ent["name"].lower()] = db_id + name_to_id[ent["name_normalized"]] = db_id + for alias in ent.get("aliases", []): + if alias and alias.strip(): + name_to_id[alias.strip().lower()] = db_id + return name_to_id + + +def _find_relevant_entities(batch_texts: list[str], entities: list[dict]) -> list[dict]: + """Findet Entitäten, die in den Batch-Texten vorkommen.""" + combined_text = " ".join(batch_texts).lower() + relevant = [] + for ent in entities: + if ent["name"].lower() in combined_text or ent["name_normalized"] in combined_text: + relevant.append(ent) + continue + for alias in ent.get("aliases", []): + if alias and alias.strip().lower() in combined_text: + relevant.append(ent) + break + return relevant + + +async def _phase2_analyze_relationships( + db, analysis_id: int, tenant_id: int, + entities: list[dict], articles: list[dict], factchecks: list[dict], + usage_acc: UsageAccumulator, ws_manager=None, +) -> list[dict]: + """Analysiert Beziehungen batch-weise und merged die Ergebnisse.""" + if not entities: + return [] + + logger.info(f"Phase 2: {len(entities)} Entitäten, batched Beziehungsanalyse") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 0, + }) + + # --- Texte vorbereiten (gleiche Logik wie Phase 1) --- + all_texts = [] + for art in articles: + headline = art.get("headline_de") or art.get("headline") or "" + content = art.get("content_de") or art.get("content_original") or "" + source = art.get("source") or "" + if len(content) > 2000: + content = content[:2000] + "..." + all_texts.append(f"[{source}] {headline}\n{content}") + + for fc in factchecks: + claim = fc.get("claim") or "" + evidence = fc.get("evidence") or "" + status = fc.get("status") or "" + all_texts.append(f"[Faktencheck] {claim} (Status: {status})\n{evidence}") + + if not all_texts: + return [] + + # --- Stufe A: Per-Batch Beziehungsextraktion --- + batch_size = 30 + batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)] + logger.info(f"Stufe A: {len(batches)} Batches für Beziehungsextraktion") + + all_raw_relations: list[dict] = [] + name_to_id = _build_entity_name_map(entities) + + for batch_idx, batch in enumerate(batches): + relevant = _find_relevant_entities(batch, entities) + if len(relevant) < 2: + logger.debug(f"Batch {batch_idx + 1}: Weniger als 2 Entitäten, überspringe") + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": int((batch_idx + 1) / len(batches) * 70), + }) + continue + + entities_for_prompt = [ + {"name": e["name"], "type": e["type"]} + for e in relevant + ] + entities_json = json.dumps(entities_for_prompt, ensure_ascii=False) + articles_text = "\n\n---\n\n".join(batch) + + prompt = RELATIONSHIP_BATCH_PROMPT.format( + entities_json=entities_json, + articles_text=articles_text, + ) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) + usage_acc.add(usage) + except Exception as e: + logger.error(f"Relationship Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}") + continue + + parsed = _parse_json_response(result_text) + if not parsed: + logger.warning(f"Batch {batch_idx + 1}: Kein gültiges JSON") + continue + + relations = parsed.get("relations", []) + if not isinstance(relations, list): + continue + + batch_count = 0 + for rel in relations: + if not isinstance(rel, dict): + continue + source_name = (rel.get("source") or "").strip() + target_name = (rel.get("target") or "").strip() + if not source_name or not target_name: + continue + rel["_batch"] = batch_idx + all_raw_relations.append(rel) + batch_count += 1 + + logger.info(f"Batch {batch_idx + 1}/{len(batches)}: {batch_count} Beziehungen, {len(relevant)} Entitäten") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": int((batch_idx + 1) / len(batches) * 70), + }) + + logger.info(f"Stufe A abgeschlossen: {len(all_raw_relations)} rohe Beziehungen aus {len(batches)} Batches") + + # --- Stufe B: Merge + Deduplizierung --- + logger.info("Stufe B: Merge und Deduplizierung") + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 75, + }) + + valid_categories = {"alliance", "conflict", "diplomacy", "economic", "legal", "neutral"} + merged: dict[tuple[int, int, str], dict] = {} + + for rel in all_raw_relations: + source_name = (rel.get("source") or "").strip() + target_name = (rel.get("target") or "").strip() + + source_id = name_to_id.get(source_name.lower()) + target_id = name_to_id.get(target_name.lower()) + if not source_id or not target_id or source_id == target_id: + continue + + # Normalisiere Richtung um A->B und B->A zu mergen + if source_id > target_id: + source_id, target_id = target_id, source_id + source_name, target_name = target_name, source_name + + category = (rel.get("category") or "neutral").lower().strip() + if category not in valid_categories: + category = "neutral" + + key = (source_id, target_id, category) + + weight = rel.get("weight", 3) + try: + weight = max(1, min(5, int(weight))) + except (ValueError, TypeError): + weight = 3 + + status = (rel.get("status") or "active").lower().strip() + if status not in {"active", "historical", "emerging"}: + status = "active" + + evidence = rel.get("evidence", []) + if not isinstance(evidence, list): + evidence = [] + + if key in merged: + existing = merged[key] + existing["weight"] = max(existing["weight"], weight) + existing_evidence = set(existing["evidence"]) + for ev in evidence: + if isinstance(ev, str) and ev.strip() and ev.strip() not in existing_evidence: + existing["evidence"].append(ev.strip()) + existing_evidence.add(ev.strip()) + if len(existing["evidence"]) > 10: + existing["evidence"] = existing["evidence"][:10] + status_priority = {"active": 3, "emerging": 2, "historical": 1} + if status_priority.get(status, 0) > status_priority.get(existing["status"], 0): + existing["status"] = status + new_desc = rel.get("description", "") + if len(new_desc) > len(existing.get("description", "")): + existing["description"] = new_desc + existing["label"] = rel.get("label", existing["label"]) + existing["_count"] = existing.get("_count", 1) + 1 + else: + merged[key] = { + "source_id": source_id, + "target_id": target_id, + "source_name": source_name, + "target_name": target_name, + "category": category, + "label": rel.get("label", ""), + "description": rel.get("description", ""), + "weight": weight, + "status": status, + "evidence": [ev.strip() for ev in evidence if isinstance(ev, str) and ev.strip()][:10], + "_count": 1, + } + + logger.info(f"Stufe B abgeschlossen: {len(all_raw_relations)} roh -> {len(merged)} gemerged") + + # Gewichts-Boost für mehrfach belegte Beziehungen + for m in merged.values(): + if m["_count"] >= 3 and m["weight"] < 5: + m["weight"] = min(5, m["weight"] + 1) + + # --- In DB speichern --- + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 85, + }) + + saved_relations = [] + for m in merged.values(): + try: + cursor = await db.execute( + """INSERT INTO network_relations + (network_analysis_id, source_entity_id, target_entity_id, + category, label, description, weight, status, evidence, tenant_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + analysis_id, m["source_id"], m["target_id"], m["category"], + m["label"], m["description"], m["weight"], m["status"], + json.dumps(m["evidence"], ensure_ascii=False), + tenant_id, + ), + ) + saved_relations.append({"id": cursor.lastrowid}) + except Exception as e: + logger.warning(f"Beziehung speichern fehlgeschlagen: {e}") + + await db.commit() + logger.info(f"Phase 2 abgeschlossen: {len(saved_relations)} Beziehungen gespeichert") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "relationship_extraction", + "progress": 100, + }) + + return saved_relations + + +# --------------------------------------------------------------------------- +# Phase 2c: Semantische Deduplication (Opus) +# --------------------------------------------------------------------------- + +async def _phase2c_semantic_dedup( + db, analysis_id: int, tenant_id: int, + entities: list[dict], usage_acc: UsageAccumulator, ws_manager=None, +) -> None: + """Semantische Deduplizierung via Opus — erkennt Synonyme, Abkürzungen, Sprachvarianten.""" + if len(entities) < 10: + return + + logger.info(f"Phase 2c: Semantische Dedup für {len(entities)} Entitäten") + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "semantic_dedup", + "progress": 0, + }) + + # --- Clustering: Token-basiert + Abbreviation-Matching --- + token_to_ids = defaultdict(set) + db_id_map = {e["db_id"]: e for e in entities if e.get("db_id")} + + for ent in entities: + db_id = ent.get("db_id") + if not db_id: + continue + + all_names = [ent["name_normalized"]] + for a in ent.get("aliases", []): + if a: + all_names.append(a.lower()) + + # Token-basiertes Clustering + for name in all_names: + for word in re.split(r'[\s\-_/(),.;:]+', name): + word = word.strip() + if len(word) >= 3 and word not in _STOP_WORDS: + token_to_ids[word].add(db_id) + + # Abbreviation-Matching + all_display_names = [ent["name"]] + [a for a in ent.get("aliases", []) if a] + for name in all_display_names: + words = [w for w in re.split(r'[\s\-_/(),.;:]+', name) if w and len(w) >= 2] + if len(words) >= 2: + abbr = "".join(w[0] for w in words).lower() + if len(abbr) >= 2: + token_to_ids[f"_abbr_{abbr}"].add(db_id) + # Kurze Namen als potenzielle Abkürzungen + name_clean = re.sub(r'[^a-zA-Z]', '', name).lower() + if 2 <= len(name_clean) <= 6: + token_to_ids[f"_abbr_{name_clean}"].add(db_id) + + # Eindeutige Cluster filtern (≥ 2 Entities, ≤ 40 Entities) + seen_clusters = set() + candidate_clusters = [] + for token, db_ids in sorted(token_to_ids.items(), key=lambda x: len(x[1])): + if len(db_ids) < 2 or len(db_ids) > 40: + continue + key = frozenset(db_ids) + if key in seen_clusters: + continue + seen_clusters.add(key) + candidate_clusters.append(list(db_ids)) + + logger.info(f"Phase 2c: {len(candidate_clusters)} Kandidaten-Cluster gefunden") + + # --- Opus-Calls für jeden Cluster --- + merged_away = set() + total_merges = 0 + opus_calls = 0 + max_calls = 50 + + for ci, cluster_ids in enumerate(candidate_clusters): + if opus_calls >= max_calls: + logger.warning(f"Phase 2c: Max {max_calls} Opus-Calls erreicht, stoppe") + break + + # Bereits gemergte Entities filtern + active_ids = [did for did in cluster_ids if did not in merged_away and did in db_id_map] + if len(active_ids) < 2: + continue + + active_ents = [db_id_map[did] for did in active_ids] + + # Prompt bauen + lines = [] + for i, ent in enumerate(active_ents, 1): + aliases_str = ", ".join((ent.get("aliases") or [])[:5]) + line = f"{i}. {ent['name']} ({ent['type']}, {ent.get('mention_count', 0)} Erwähnungen)" + if aliases_str: + line += f" [Aliases: {aliases_str}]" + lines.append(line) + + prompt = SEMANTIC_DEDUP_PROMPT.format(entity_list="\n".join(lines)) + + try: + result_text, usage = await call_claude(prompt, tools=None, model=None) + usage_acc.add(usage) + opus_calls += 1 + except Exception as e: + logger.error(f"Phase 2c Opus-Call {opus_calls + 1} fehlgeschlagen: {e}") + continue + + parsed = _parse_json_response(result_text) + if not parsed: + continue + + for merge_group in parsed.get("merges", []): + keep_idx = merge_group.get("keep") + merge_indices = merge_group.get("merge", []) + if keep_idx is None or not merge_indices: + continue + if not isinstance(merge_indices, list): + merge_indices = [merge_indices] + + keep_idx -= 1 # 1-indexed → 0-indexed + if keep_idx < 0 or keep_idx >= len(active_ents): + continue + keep_ent = active_ents[keep_idx] + if keep_ent["db_id"] in merged_away: + continue + + for mi in merge_indices: + mi -= 1 + if mi < 0 or mi >= len(active_ents) or mi == keep_idx: + continue + merge_ent = active_ents[mi] + if merge_ent["db_id"] in merged_away: + continue + + logger.info(f"Semantic Merge: '{merge_ent['name']}' → '{keep_ent['name']}'") + await _merge_entity_in_db(db, analysis_id, keep_ent, merge_ent, entities) + merged_away.add(merge_ent["db_id"]) + db_id_map.pop(merge_ent["db_id"], None) + total_merges += 1 + + # Progress + progress = int((ci + 1) / len(candidate_clusters) * 100) if candidate_clusters else 100 + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "semantic_dedup", + "progress": min(progress, 100), + }) + + if total_merges > 0: + await db.commit() + + logger.info( + f"Phase 2c abgeschlossen: {total_merges} Merges, {opus_calls} Opus-Calls, " + f"{len(entities)} Entitäten verbleiben" + ) + + +# --------------------------------------------------------------------------- +# Phase 2d: Cleanup +# --------------------------------------------------------------------------- + +async def _phase2d_cleanup( + db, analysis_id: int, entities: list[dict], ws_manager=None, +) -> None: + """Cleanup: Self-Loops, Richtungsnormalisierung, Duplikat-Relations, verwaiste Entities.""" + logger.info("Phase 2d: Cleanup") + + # 1. Self-Loops entfernen + cursor = await db.execute( + "DELETE FROM network_relations WHERE network_analysis_id = ? AND source_entity_id = target_entity_id", + (analysis_id,), + ) + self_loops = cursor.rowcount + + # 2. Richtungsnormalisierung (nach Merges können Richtungen inkonsistent sein) + await db.execute( + """UPDATE network_relations + SET source_entity_id = target_entity_id, target_entity_id = source_entity_id + WHERE source_entity_id > target_entity_id AND network_analysis_id = ?""", + (analysis_id,), + ) + + # 3. Duplikat-Relations entfernen (gleiche source+target+category nach Normalisierung) + cursor = await db.execute( + """DELETE FROM network_relations + WHERE network_analysis_id = ? AND id NOT IN ( + SELECT MIN(id) FROM network_relations + WHERE network_analysis_id = ? + GROUP BY source_entity_id, target_entity_id, category + )""", + (analysis_id, analysis_id), + ) + dup_relations = cursor.rowcount + + # 4. Verwaiste Entities entfernen (keine Verbindungen) + cursor = await db.execute( + """DELETE FROM network_entities + WHERE network_analysis_id = ? AND id NOT IN ( + SELECT source_entity_id FROM network_relations WHERE network_analysis_id = ? + UNION + SELECT target_entity_id FROM network_relations WHERE network_analysis_id = ? + )""", + (analysis_id, analysis_id, analysis_id), + ) + orphans = cursor.rowcount + + # Entities-Liste aktualisieren + remaining_ids = set() + cursor = await db.execute( + "SELECT id FROM network_entities WHERE network_analysis_id = ?", (analysis_id,) + ) + for row in await cursor.fetchall(): + remaining_ids.add(row["id"]) + entities[:] = [e for e in entities if e.get("db_id") in remaining_ids] + + await db.commit() + + logger.info( + f"Phase 2d abgeschlossen: {self_loops} Self-Loops, {dup_relations} Duplikat-Relations, " + f"{orphans} verwaiste Entities entfernt, {len(entities)} Entitäten verbleiben" + ) + + await _broadcast(ws_manager, "network_status", { + "analysis_id": analysis_id, + "phase": "cleanup", + "progress": 100, + }) + + +# --------------------------------------------------------------------------- +# Phase 3: Finalisierung +# --------------------------------------------------------------------------- + +async def _phase3_finalize( + db, analysis_id, tenant_id, entity_count, relation_count, + article_ids, factcheck_ids, article_ts, factcheck_ts, + usage_acc, ws_manager=None, +): + """Finalisiert: Zähler, Hash, Log, Status.""" + data_hash = _compute_data_hash(article_ids, factcheck_ids, article_ts, factcheck_ts) + now = datetime.now(TIMEZONE).strftime("%Y-%m-%d %H:%M:%S") + + await db.execute( + """UPDATE network_analyses + SET entity_count = ?, relation_count = ?, status = 'ready', + last_generated_at = ?, data_hash = ? + WHERE id = ?""", + (entity_count, relation_count, now, data_hash, analysis_id), + ) + + await db.execute( + """INSERT INTO network_generation_log + (network_analysis_id, completed_at, status, input_tokens, output_tokens, + cache_creation_tokens, cache_read_tokens, total_cost_usd, api_calls, + entity_count, relation_count, tenant_id) + VALUES (?, ?, 'completed', ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + (analysis_id, now, usage_acc.input_tokens, usage_acc.output_tokens, + usage_acc.cache_creation_tokens, usage_acc.cache_read_tokens, + usage_acc.total_cost_usd, usage_acc.call_count, + entity_count, relation_count, tenant_id), + ) + + await db.commit() + + logger.info(f"Analyse {analysis_id} finalisiert: {entity_count} Entitäten, " + f"{relation_count} Beziehungen, ${usage_acc.total_cost_usd:.4f}") + + await _broadcast(ws_manager, "network_complete", { + "analysis_id": analysis_id, + "entity_count": entity_count, + "relation_count": relation_count, + "cost_usd": round(usage_acc.total_cost_usd, 4), + }) + + +# --------------------------------------------------------------------------- +# Hauptfunktion +# --------------------------------------------------------------------------- + +async def extract_and_relate_entities(analysis_id: int, tenant_id: int, ws_manager=None): + """Hauptfunktion: Entity-Extraktion + Beziehungsanalyse. + + Phase 1: Haiku extrahiert Entitäten aus Artikeln (in Batches) + Phase 2a: Entity-Dedup nach name_normalized (Code, kein API) + Phase 2: Haiku extrahiert Beziehungen pro Batch, dann Merge + Deduplizierung + Phase 2c: Semantische Dedup via Opus (Cluster-weise) + Phase 2d: Cleanup (Self-Loops, Verwaiste, Duplikat-Relations) + Phase 3: Finalisierung (Zähler, Hash, Log) + """ + from database import get_db + + db = await get_db() + usage_acc = UsageAccumulator() + + try: + if not await _check_analysis_exists(db, analysis_id): + logger.warning(f"Analyse {analysis_id} existiert nicht") + return + + await db.execute( + "UPDATE network_analyses SET status = 'generating' WHERE id = ?", + (analysis_id,), + ) + await db.commit() + + # Incident-IDs laden + cursor = await db.execute( + "SELECT incident_id FROM network_analysis_incidents WHERE network_analysis_id = ?", + (analysis_id,), + ) + incident_ids = [row["incident_id"] for row in await cursor.fetchall()] + + if not incident_ids: + logger.warning(f"Analyse {analysis_id}: Keine Lagen verknüpft") + await db.execute("UPDATE network_analyses SET status = 'error' WHERE id = ?", (analysis_id,)) + await db.commit() + await _broadcast(ws_manager, "network_error", { + "analysis_id": analysis_id, "error": "Keine Lagen verknüpft", + }) + return + + # Artikel laden + placeholders = ",".join("?" * len(incident_ids)) + cursor = await db.execute( + f"""SELECT id, incident_id, headline, headline_de, source, source_url, + content_original, content_de, collected_at + FROM articles WHERE incident_id IN ({placeholders})""", + incident_ids, + ) + article_rows = await cursor.fetchall() + articles = [] + article_ids = [] + article_ts = [] + for r in article_rows: + articles.append({ + "id": r["id"], "incident_id": r["incident_id"], + "headline": r["headline"], "headline_de": r["headline_de"], + "source": r["source"], "source_url": r["source_url"], + "content_original": r["content_original"], "content_de": r["content_de"], + }) + article_ids.append(r["id"]) + article_ts.append(r["collected_at"] or "") + + # Faktenchecks laden + cursor = await db.execute( + f"""SELECT id, incident_id, claim, status, evidence, checked_at + FROM fact_checks WHERE incident_id IN ({placeholders})""", + incident_ids, + ) + fc_rows = await cursor.fetchall() + factchecks = [] + factcheck_ids = [] + factcheck_ts = [] + for r in fc_rows: + factchecks.append({ + "id": r["id"], "incident_id": r["incident_id"], + "claim": r["claim"], "status": r["status"], "evidence": r["evidence"], + }) + factcheck_ids.append(r["id"]) + factcheck_ts.append(r["checked_at"] or "") + + logger.info(f"Analyse {analysis_id}: {len(articles)} Artikel, " + f"{len(factchecks)} Faktenchecks aus {len(incident_ids)} Lagen") + + # Phase 1: Entity-Extraktion + if not await _check_analysis_exists(db, analysis_id): + return + + entities = await _phase1_extract_entities( + db, analysis_id, tenant_id, articles, factchecks, usage_acc, ws_manager, + ) + + # Phase 2a: Entity-Deduplication + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase2a_deduplicate_entities(db, analysis_id, entities, ws_manager) + + # Phase 2: Beziehungsextraktion + if not await _check_analysis_exists(db, analysis_id): + return + + relations = await _phase2_analyze_relationships( + db, analysis_id, tenant_id, entities, articles, factchecks, usage_acc, ws_manager, + ) + + # Phase 2c: Semantische Deduplication + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase2c_semantic_dedup( + db, analysis_id, tenant_id, entities, usage_acc, ws_manager, + ) + + # Phase 2d: Cleanup + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase2d_cleanup(db, analysis_id, entities, ws_manager) + + # Finale Zähler aus DB (nach allen Cleanups) + cursor = await db.execute( + "SELECT COUNT(*) as cnt FROM network_entities WHERE network_analysis_id = ?", + (analysis_id,), + ) + row = await cursor.fetchone() + final_entity_count = row["cnt"] if row else len(entities) + + cursor = await db.execute( + "SELECT COUNT(*) as cnt FROM network_relations WHERE network_analysis_id = ?", + (analysis_id,), + ) + row = await cursor.fetchone() + final_relation_count = row["cnt"] if row else len(relations) + + # Phase 3: Finalisierung + if not await _check_analysis_exists(db, analysis_id): + return + + await _phase3_finalize( + db, analysis_id, tenant_id, + entity_count=final_entity_count, relation_count=final_relation_count, + article_ids=article_ids, factcheck_ids=factcheck_ids, + article_ts=article_ts, factcheck_ts=factcheck_ts, + usage_acc=usage_acc, ws_manager=ws_manager, + ) + + except Exception as e: + logger.error(f"Entity-Extraktion fehlgeschlagen (Analyse {analysis_id}): {e}", exc_info=True) + try: + await db.execute("UPDATE network_analyses SET status = 'error' WHERE id = ?", (analysis_id,)) + await db.commit() + except Exception: + pass + await _broadcast(ws_manager, "network_error", { + "analysis_id": analysis_id, "error": str(e), + }) + finally: + await db.close() diff --git a/src/config.py b/src/config.py index a7cd097..b620a24 100644 --- a/src/config.py +++ b/src/config.py @@ -24,6 +24,7 @@ CLAUDE_PATH = os.environ.get("CLAUDE_PATH", "/usr/bin/claude") CLAUDE_TIMEOUT = 1800 # Sekunden (30 Min - Lage-Updates mit vielen Artikeln brauchen mehr Zeit) # Claude Modelle CLAUDE_MODEL_FAST = "claude-haiku-4-5-20251001" # Für einfache Aufgaben (Feed-Selektion) +CLAUDE_MODEL_MEDIUM = "claude-sonnet-4-6" # Für qualitätskritische Aufgaben (Netzwerkanalyse) # Ausgabesprache (Lagebilder, Faktenchecks, Zusammenfassungen) OUTPUT_LANGUAGE = "Deutsch" diff --git a/src/main.py b/src/main.py index 138b857..de19369 100644 --- a/src/main.py +++ b/src/main.py @@ -298,11 +298,11 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): response = await call_next(request) response.headers["Content-Security-Policy"] = ( "default-src 'self'; " - "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://unpkg.com; " + "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; " "style-src 'self' 'unsafe-inline' https://fonts.googleapis.com https://cdn.jsdelivr.net; " "font-src 'self' https://fonts.gstatic.com; " - "img-src 'self' data: https://tile.openstreetmap.de https://server.arcgisonline.com; " - "connect-src 'self' wss: ws: https://earthquake.usgs.gov https://api.gdeltproject.org; " + "img-src 'self' data: https://tile.openstreetmap.de; " + "connect-src 'self' wss: ws:; " "frame-ancestors 'none'" ) response.headers["Permissions-Policy"] = ( @@ -334,7 +334,6 @@ from routers.public_api import router as public_api_router from routers.chat import router as chat_router from routers.network_analysis import router as network_analysis_router from routers.tutorial import router as tutorial_router -from routers.geoint import router as geoint_router app.include_router(auth_router) app.include_router(incidents_router) @@ -345,16 +344,6 @@ app.include_router(public_api_router) app.include_router(chat_router, prefix="/api/chat") app.include_router(network_analysis_router) app.include_router(tutorial_router) -app.include_router(geoint_router, prefix="/api/geoint") - -@app.on_event("startup") -@app.on_event("startup") -async def _start_aisstream_on_startup(): - import asyncio - from routers.geoint import _start_aisstream - await asyncio.sleep(3) - _start_aisstream() - @app.websocket("/api/ws") diff --git a/src/routers/geoint.py b/src/routers/geoint.py deleted file mode 100644 index f1e36ea..0000000 --- a/src/routers/geoint.py +++ /dev/null @@ -1,300 +0,0 @@ -"""GEOINT-Router: Proxy fuer externe Echtzeit-Datenquellen (Flugverkehr, Schiffsverkehr, GDELT).""" -import asyncio -import json as _json -import logging -import time -from typing import Optional - -import httpx -import websockets -from fastapi import APIRouter, Depends, Query - -from auth import get_current_user - -logger = logging.getLogger("osint.geoint") - -router = APIRouter(tags=["geoint"]) - -# --------------------------------------------------------------------------- -# Einfacher In-Memory-Cache -# --------------------------------------------------------------------------- -_cache: dict[str, tuple[float, dict]] = {} - - -def _get_cached(key: str, ttl: float) -> Optional[dict]: - if key in _cache: - ts, data = _cache[key] - if time.time() - ts < ttl: - return data - return None - - -def _set_cache(key: str, data: dict): - _cache[key] = (time.time(), data) - if len(_cache) > 50: - oldest = min(_cache, key=lambda k: _cache[k][0]) - del _cache[oldest] - - -# --------------------------------------------------------------------------- -# Flugverkehr: Globaler Snapshot (airplanes.live) -# --------------------------------------------------------------------------- - -_FLIGHT_GRID = [ - # Europa - (48.0, 2.0), # Westeuropa (Paris) - (48.0, 16.0), # Mitteleuropa (Wien) - (55.0, 10.0), # Nordeuropa (Daenemark) - (40.0, -4.0), # Iberische Halbinsel - (41.0, 12.0), # Suedeuropa (Rom) - (38.0, 24.0), # Suedosteuropa (Griechenland) - (55.0, 25.0), # Baltikum - (60.0, 25.0), # Skandinavien-Ost - (52.0, 30.0), # Osteuropa - (45.0, 37.0), # Schwarzes Meer / Tuerkei Ost - # UK / Island - (54.0, -2.0), # UK - (63.0, -19.0), # Island - # Naher Osten (erweitert) - (33.0, 36.0), # Levante (Syrien/Libanon/Israel) - (30.0, 31.0), # Aegypten / Kairo - (25.0, 45.0), # Saudi-Arabien Zentral - (26.5, 56.0), # Strasse von Hormuz / VAE - (25.0, 51.5), # Katar / Bahrain - (33.0, 44.0), # Irak (Bagdad) - (33.0, 52.0), # Iran (Teheran) - (15.0, 45.0), # Jemen / Rotes Meer - (21.0, 40.0), # Saudi-Arabien West (Dschidda) - # Nordafrika - (34.0, 2.0), # Maghreb (Algier) - (33.0, -7.0), # Marokko (Casablanca) - (32.0, 13.0), # Libyen (Tripolis) - # Zentralasien - (41.0, 69.0), # Usbekistan (Taschkent) - (39.0, 63.0), # Turkmenistan - # Nordamerika Ostkueste - (40.0, -74.0), # New York - (33.0, -84.0), # Atlanta - (42.0, -88.0), # Chicago - (26.0, -80.0), # Florida (Miami) - (45.0, -74.0), # Montreal - # Nordamerika Westkueste - (34.0, -118.0), # Los Angeles - (47.0, -122.0), # Seattle - (37.0, -122.0), # San Francisco - # Nordamerika Zentral - (30.0, -97.0), # Texas (Austin) - (39.0, -105.0), # Denver - # Ostasien - (35.0, 140.0), # Japan (Tokio) - (37.0, 127.0), # Korea (Seoul) - (31.0, 121.0), # Shanghai - (40.0, 117.0), # Peking - (22.0, 114.0), # Hongkong - (25.0, 121.0), # Taiwan - # Suedasien - (19.0, 73.0), # Mumbai - (28.0, 77.0), # Delhi - (13.0, 80.0), # Chennai - (7.0, 80.0), # Sri Lanka - # Suedostasien - (1.0, 104.0), # Singapur - (14.0, 101.0), # Bangkok - (-6.0, 107.0), # Jakarta - (10.0, 107.0), # Ho-Chi-Minh - # Ozeanien - (-34.0, 151.0), # Sydney - (-37.0, 175.0), # Neuseeland - # Afrika - (-1.0, 37.0), # Nairobi - (-34.0, 18.0), # Kapstadt - (6.0, 3.0), # Lagos - (9.0, 39.0), # Addis Abeba - # Suedamerika - (-23.0, -43.0), # Rio de Janeiro - (-34.0, -58.0), # Buenos Aires - (-12.0, -77.0), # Lima - (4.0, -74.0), # Bogota -] - -_flight_lock = asyncio.Lock() - - -async def _fetch_global_flights() -> dict: - """Holt Flugdaten fuer alle Stuetzpunkte parallel.""" - cached = _get_cached("flights_global", ttl=30) - if cached: - return cached - - async with _flight_lock: - cached = _get_cached("flights_global", ttl=30) - if cached: - return cached - - seen: dict[str, dict] = {} - errors = 0 - - async with httpx.AsyncClient(timeout=10) as client: - for i in range(0, len(_FLIGHT_GRID), 8): - batch = _FLIGHT_GRID[i:i + 8] - tasks = [client.get(f"https://api.airplanes.live/v2/point/{lat:.2f}/{lon:.2f}/250") - for lat, lon in batch] - results = await asyncio.gather(*tasks, return_exceptions=True) - for r in results: - if isinstance(r, Exception): - errors += 1 - continue - try: - data = r.json() - for ac in data.get("ac", []): - hex_id = ac.get("hex") - if hex_id and hex_id not in seen: - seen[hex_id] = ac - except Exception: - errors += 1 - if i + 8 < len(_FLIGHT_GRID): - await asyncio.sleep(0.3) - - result = {"ac": list(seen.values()), "total": len(seen), "errors": errors} - logger.info(f"GEOINT Flights: {len(seen)} Flugzeuge ({errors} Fehler)") - _set_cache("flights_global", result) - return result - - -@router.get("/flights") -async def get_flights(_user: dict = Depends(get_current_user)): - """Globaler Flugverkehr-Snapshot. 30s Cache.""" - return await _fetch_global_flights() - - -# --------------------------------------------------------------------------- -# Schiffsverkehr: AISStream.io (globales Echtzeit-AIS via WebSocket) -# --------------------------------------------------------------------------- - -_AISSTREAM_KEY = "1a56b078db829727abd4d617937bae51c6f9973e" -_AISSTREAM_URL = "wss://stream.aisstream.io/v0/stream" - -# Globaler Schiffs-Store: {mmsi: {lat, lon, sog, cog, heading, name, ship_type, ts}} -_ships_store: dict[int, dict] = {} -_ships_lock = asyncio.Lock() -_ships_ws_task: Optional[asyncio.Task] = None -_ships_connected = False - - -async def _aisstream_listener(): - """Dauerhafter WebSocket-Client fuer AISStream. Sammelt Schiffspositionen.""" - global _ships_connected - while True: - try: - logger.info("AISStream: Verbinde...") - async with websockets.connect(_AISSTREAM_URL, ping_interval=30, ping_timeout=10, - close_timeout=5) as ws: - # Subscription: globale BoundingBox, nur Positionsberichte - sub = { - "APIKey": _AISSTREAM_KEY, - "BoundingBoxes": [[[-90, -180], [90, 180]]], - "FilterMessageTypes": ["PositionReport"], - } - await ws.send(_json.dumps(sub)) - _ships_connected = True - logger.info("AISStream: Verbunden, empfange Schiffsdaten...") - - async for raw in ws: - try: - text = raw.decode("utf-8") if isinstance(raw, bytes) else raw - msg = _json.loads(text) - meta = msg.get("MetaData", {}) - mmsi = meta.get("MMSI") - if not mmsi: - continue - - pos = msg.get("Message", {}).get("PositionReport", {}) - lat = meta.get("latitude") or pos.get("Latitude") - lon = meta.get("longitude") or pos.get("Longitude") - if not lat or not lon or not (-90 <= lat <= 90 and -180 <= lon <= 180): - continue - - _ships_store[mmsi] = { - "mmsi": mmsi, - "lat": round(lat, 5), - "lon": round(lon, 5), - "sog": round(pos.get("Sog", 0), 1), - "cog": round(pos.get("Cog", 0), 1), - "heading": pos.get("TrueHeading", 0), - "name": (meta.get("ShipName") or "").strip(), - "ts": time.time(), - } - if len(_ships_store) % 1000 == 0: - logger.info(f"AISStream: {len(_ships_store)} Schiffe gesammelt") - - # Alte Eintraege alle 60s bereinigen (>15 Min alt) - if len(_ships_store) % 500 == 0: - cutoff = time.time() - 900 - stale = [k for k, v in _ships_store.items() if v["ts"] < cutoff] - for k in stale: - del _ships_store[k] - - except Exception as parse_err: - if len(_ships_store) < 5: - logger.warning(f"AISStream Parse-Fehler: {parse_err}, raw type: {type(raw)}, first 100: {str(raw)[:100]}") - continue - - except Exception as e: - _ships_connected = False - logger.warning(f"AISStream Fehler: {e}. Reconnect in 10s...") - await asyncio.sleep(10) - - -def _start_aisstream(): - """Startet den AISStream-Listener als Background-Task.""" - global _ships_ws_task - if _ships_ws_task is None or _ships_ws_task.done(): - _ships_ws_task = asyncio.create_task(_aisstream_listener()) - logger.info("AISStream Background-Task gestartet") - - -@router.get("/ships") -async def get_ships(_user: dict = Depends(get_current_user)): - """Globaler Schiffsverkehr aus AISStream. Echtzeit-Positionen.""" - # Lazy-Start: WebSocket-Listener beim ersten Abruf starten - _start_aisstream() - - ships = list(_ships_store.values()) - return { - "ships": ships, - "total": len(ships), - "connected": _ships_connected, - } - - -# --------------------------------------------------------------------------- -# GDELT Nachrichten -# --------------------------------------------------------------------------- -@router.get("/gdelt") -async def get_gdelt( - query: str = Query("conflict", max_length=200), - _user: dict = Depends(get_current_user), -): - """Proxy fuer GDELT GEO 2.0 API. 60s Cache.""" - cache_key = f"gdelt:{query[:50]}" - cached = _get_cached(cache_key, ttl=60) - if cached: - return cached - - url = ( - "https://api.gdeltproject.org/api/v2/geo/geo" - f"?query={query}&mode=PointData&format=GeoJSON" - "×pan=24h&maxrows=200" - ) - try: - async with httpx.AsyncClient(timeout=12) as client: - resp = await client.get(url) - resp.raise_for_status() - data = resp.json() - except Exception as e: - logger.warning(f"GDELT Fehler: {e}") - return {"type": "FeatureCollection", "features": []} - - _set_cache(cache_key, data) - return data - diff --git a/src/static/css/geoint.css b/src/static/css/geoint.css deleted file mode 100644 index b8d9c2c..0000000 --- a/src/static/css/geoint.css +++ /dev/null @@ -1,321 +0,0 @@ -/* ===================================================================== - GEOINT-Modus: Taktische Kartenansicht mit Echtzeit-Datenlayern - ===================================================================== */ - -/* --- Toggle-Checkbox im Card-Header --- */ -.geoint-toggle { - display: inline-flex; - align-items: center; - gap: 5px; - cursor: pointer; - user-select: none; - margin-right: 8px; -} -.geoint-toggle input[type="checkbox"] { - accent-color: #00ff88; - width: 13px; - height: 13px; - cursor: pointer; -} -.geoint-toggle-label { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - font-weight: 700; - font-weight: 700; - letter-spacing: 1.5px; - text-transform: uppercase; - color: var(--text-secondary); - transition: color 0.2s; -} -.geoint-toggle input:checked + .geoint-toggle-label { - color: #00ff88; - text-shadow: 0 0 6px rgba(0, 255, 136, 0.4); -} - -/* --- Taktisches Styling (aktiv) --- */ -.geoint-active .leaflet-tile-pane { - filter: brightness(0.88) contrast(1.08) saturate(0.85); - transition: filter 0.4s ease; -} -/* Scanline-Overlay (subtiler Effekt, kein Blocking) */ -.geoint-active .map-empty { display: none !important; } - -/* Gruener Akzent am Card-Header wenn aktiv */ -.map-card.geoint-card-active .card-header { - border-bottom: 2px solid rgba(0, 255, 136, 0.25); -} - -/* --- Sub-Layer Control Panel --- */ -.geoint-sub-control { - background: rgba(11, 17, 33, 0.92); - border: 1px solid rgba(0, 255, 136, 0.2); - border-radius: 6px; - padding: 10px 12px; - min-width: 170px; - backdrop-filter: blur(8px); - box-shadow: 0 4px 16px rgba(0, 0, 0, 0.4); -} -.geoint-sub-control h4 { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 9px; - font-weight: 700; - letter-spacing: 2px; - text-transform: uppercase; - color: #00ff88; - margin: 0 0 8px 0; - padding-bottom: 6px; - border-bottom: 1px solid rgba(0, 255, 136, 0.15); -} -.geoint-sub-item { - display: flex; - align-items: center; - gap: 6px; - padding: 3px 0; -} -.geoint-sub-item input[type="checkbox"] { - accent-color: #00ff88; - width: 12px; - height: 12px; - cursor: pointer; - flex-shrink: 0; -} -.geoint-sub-item label { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 11px; - color: rgba(255, 255, 255, 0.8); - cursor: pointer; - white-space: nowrap; -} -.geoint-sub-item label .geoint-dot { - display: inline-block; - width: 7px; - height: 7px; - border-radius: 50%; - margin-right: 4px; - vertical-align: middle; -} -.geoint-dot-flights { background: #00ff88; } -.geoint-dot-ships { background: #4499ff; } -.geoint-dot-quakes { background: #ff4444; } -.geoint-dot-gdelt { background: #44aaff; } -.geoint-dot-heatmap { background: #ff8800; } -.geoint-dot-coords { background: #aaaaaa; } -.geoint-dot-distance { background: #ff2222; } -.geoint-sub-separator { - height: 1px; - background: rgba(0, 255, 136, 0.1); - margin: 5px 0; -} - -/* --- Flugzeug-Icons --- */ -.geoint-aircraft { - display: flex; - align-items: center; - justify-content: center; - transition: filter 0.15s; -} -.geoint-aircraft:hover { - filter: drop-shadow(0 0 6px #00ff88); -} -.geoint-aircraft svg { - width: 14px; - height: 14px; -} - -/* --- Schiffs-Icons --- */ -.geoint-ship { - display: flex; - align-items: center; - justify-content: center; - transition: filter 0.15s; -} -.geoint-ship:hover { - filter: drop-shadow(0 0 4px #4499ff); -} - -/* --- Erdbeben Puls-Animation --- */ -.geoint-quake-marker { - animation: geoint-pulse 2.5s ease-in-out infinite; -} -@keyframes geoint-pulse { - 0%, 100% { opacity: 0.7; } - 50% { opacity: 1; } -} - -/* --- GDELT Nachrichtenmarker --- */ -.geoint-gdelt-icon { - display: flex; - align-items: center; - justify-content: center; - width: 18px; - height: 18px; - background: rgba(68, 170, 255, 0.85); - border: 1.5px solid rgba(68, 170, 255, 1); - border-radius: 50%; - font-size: 12px; - font-weight: 700; - color: #fff; - font-weight: 700; - box-shadow: 0 0 4px rgba(68, 170, 255, 0.5); -} - -/* --- Koordinatenanzeige --- */ -.geoint-coord-display { - background: rgba(11, 17, 33, 0.88); - border: 1px solid rgba(0, 255, 136, 0.2); - border-radius: 4px; - padding: 4px 8px; - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 11px; - color: #00ff88; - letter-spacing: 0.5px; - white-space: nowrap; - backdrop-filter: blur(4px); -} - -/* --- Distanzmessung --- */ -.geoint-distance-label { - box-shadow: 0 2px 8px rgba(0,0,0,0.6); - background: rgba(11, 17, 33, 0.9); - border: 1px solid rgba(255, 34, 34, 0.6); - border-radius: 3px; - padding: 2px 6px; - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - font-weight: 700; - color: #ffffff; - white-space: nowrap; -} - -/* --- Timeline-Slider --- */ -.geoint-timeline { - display: none; - padding: 6px 12px 8px; - background: rgba(11, 17, 33, 0.6); - border-top: 1px solid rgba(0, 255, 136, 0.1); -} -.geoint-active .geoint-timeline { - display: flex; - align-items: center; - gap: 10px; -} -.geoint-timeline input[type="range"] { - flex: 1; - height: 4px; - -webkit-appearance: none; - appearance: none; - background: rgba(255, 255, 255, 0.12); - border-radius: 2px; - outline: none; - cursor: pointer; -} -.geoint-timeline input[type="range"]::-webkit-slider-thumb { - -webkit-appearance: none; - width: 14px; - height: 14px; - background: #00ff88; - border-radius: 50%; - border: 2px solid rgba(11, 17, 33, 0.8); - cursor: pointer; - box-shadow: 0 0 6px rgba(0, 255, 136, 0.5); -} -.geoint-timeline input[type="range"]::-moz-range-thumb { - width: 14px; - height: 14px; - background: #00ff88; - border-radius: 50%; - border: 2px solid rgba(11, 17, 33, 0.8); - cursor: pointer; -} -.geoint-timeline-label { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - font-weight: 700; - color: #00ff88; - min-width: 90px; - text-align: center; -} -.geoint-timeline-btn { - background: none; - border: 1px solid rgba(0, 255, 136, 0.3); - border-radius: 3px; - color: #00ff88; - font-size: 11px; - padding: 2px 6px; - cursor: pointer; - font-family: var(--font-mono, 'Courier New', monospace); -} -.geoint-timeline-btn:hover { - background: rgba(0, 255, 136, 0.1); -} - -/* --- Popup-Styling fuer GEOINT-Layer --- */ -/* Dunkler Popup-Hintergrund fuer GEOINT-Layer */ -.geoint-leaflet-popup .leaflet-popup-content-wrapper { - background: rgba(11, 17, 33, 0.95); - border: 1px solid rgba(0, 255, 136, 0.25); - border-radius: 6px; - box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5); -} -.geoint-leaflet-popup .leaflet-popup-tip { - background: rgba(11, 17, 33, 0.95); -} -.geoint-leaflet-popup .leaflet-popup-close-button { - color: rgba(255, 255, 255, 0.6); -} -.geoint-leaflet-popup .leaflet-popup-close-button:hover { - color: #00ff88; -} -.geoint-popup { - font-family: var(--font-mono, 'Courier New', monospace); - font-size: 12px; - line-height: 1.6; - color: #ffffff; -} -.geoint-popup strong { - color: #00ff88; - font-size: 13px; -} -.geoint-popup .geoint-popup-row { - display: flex; - gap: 8px; -} -.geoint-popup .geoint-popup-key { - color: rgba(255, 255, 255, 0.55); - min-width: 40px; -} - -/* --- Light Theme Overrides --- */ -[data-theme="light"] .geoint-sub-control { - background: rgba(240, 243, 248, 0.95); - border-color: rgba(0, 160, 80, 0.25); -} -[data-theme="light"] .geoint-sub-control h4 { - color: #008844; -} -[data-theme="light"] .geoint-sub-item label { - color: rgba(0, 0, 0, 0.75); -} -[data-theme="light"] .geoint-coord-display { - background: rgba(240, 243, 248, 0.92); - color: #006633; - border-color: rgba(0, 160, 80, 0.25); -} -[data-theme="light"] .geoint-timeline { - background: rgba(240, 243, 248, 0.7); - border-top-color: rgba(0, 160, 80, 0.15); -} -[data-theme="light"] .geoint-timeline input[type="range"]::-webkit-slider-thumb { - background: #008844; -} -[data-theme="light"] .geoint-timeline-label { - color: #006633; -} -[data-theme="light"] .geoint-toggle input:checked + .geoint-toggle-label { - color: #008844; - text-shadow: none; -} -[data-theme="light"] .geoint-active .leaflet-tile-pane { - filter: brightness(0.95) contrast(1.05) saturate(0.9); -} -/* Light theme scanline overlay disabled */ diff --git a/src/static/css/network-cluster.css b/src/static/css/network-cluster.css new file mode 100644 index 0000000..c7244bc --- /dev/null +++ b/src/static/css/network-cluster.css @@ -0,0 +1,188 @@ +/* ================================================================= + AegisSight OSINT Monitor - Cluster Graph Styles + Hierarchical country-based network visualization + ================================================================= */ + +/* ---- Breadcrumb ---- */ + +.cluster-breadcrumb { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 16px; + background: rgba(15, 23, 42, 0.6); + border-bottom: 1px solid var(--border, #1e293b); + font-size: 13px; + min-height: 36px; + flex-shrink: 0; +} + +.breadcrumb-item { + color: #94a3b8; + font-size: 13px; +} + +.breadcrumb-item.active { + color: #f1f5f9; + font-weight: 600; +} + +.breadcrumb-item.clickable { + cursor: pointer; + color: #60a5fa; + transition: color 0.15s; +} + +.breadcrumb-item.clickable:hover { + color: #93bbfc; + text-decoration: underline; +} + +.breadcrumb-separator { + color: #475569; + font-size: 14px; + user-select: none; +} + +.cluster-back-btn { + display: inline-flex; + align-items: center; + gap: 4px; + background: transparent; + border: 1px solid #334155; + color: #94a3b8; + padding: 3px 10px; + border-radius: 4px; + cursor: pointer; + font-size: 12px; + font-family: inherit; + transition: all 0.15s; + white-space: nowrap; +} + +.cluster-back-btn:hover { + border-color: #60a5fa; + color: #60a5fa; + background: rgba(96, 165, 250, 0.08); +} + +/* ---- View Toggle Button ---- */ + +.network-view-toggle { + display: inline-flex; + align-items: center; + gap: 0; + background: rgba(30, 41, 59, 0.6); + border: 1px solid #334155; + border-radius: 6px; + padding: 2px; + margin-right: 8px; +} + +.network-view-toggle-btn { + padding: 5px 12px; + background: transparent; + border: none; + color: #94a3b8; + font-size: 12px; + font-family: inherit; + cursor: pointer; + border-radius: 4px; + transition: all 0.2s; + white-space: nowrap; +} + +.network-view-toggle-btn.active { + background: #334155; + color: #f1f5f9; + font-weight: 600; +} + +.network-view-toggle-btn:hover:not(.active) { + color: #e2e8f0; + background: rgba(51, 65, 85, 0.4); +} + +/* ---- Cluster Graph SVG ---- */ + +.cg-zoom-layer { + /* Smooth transitions handled by d3 */ +} + +/* Country nodes */ +.cg-country-node { + transition: filter 0.2s; +} + +.cg-country-circle { + transition: stroke-width 0.2s, opacity 0.2s; + filter: drop-shadow(0 2px 8px rgba(0, 0, 0, 0.3)); +} + +.cg-country-node:hover .cg-country-circle { + filter: drop-shadow(0 4px 16px rgba(241, 245, 249, 0.15)); +} + +.cg-country-label { + text-shadow: 0 1px 3px rgba(0, 0, 0, 0.7); + user-select: none; +} + +.cg-country-count { + user-select: none; +} + +/* Detail nodes */ +.cg-detail-node circle { + transition: stroke 0.15s, stroke-width 0.15s, opacity 0.15s; +} + +.cg-detail-node text { + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8); + user-select: none; +} + +/* Links */ +.cg-links line, +.cg-detail-links line { + pointer-events: none; +} + +/* Legend */ +.cg-legend text { + user-select: none; +} + +/* ---- Tooltip ---- */ + +.cg-tooltip { + pointer-events: none; + backdrop-filter: blur(8px); + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4); + line-height: 1.5; +} + +.cg-tooltip hr { + border: none; + border-top: 1px solid #334155; + margin: 4px 0; +} + +/* ---- Responsive ---- */ + +@media (max-width: 768px) { + .cluster-breadcrumb { + padding: 6px 12px; + font-size: 12px; + } + + .cluster-back-btn { + font-size: 11px; + padding: 2px 8px; + } + + .network-view-toggle-btn { + padding: 4px 8px; + font-size: 11px; + } +} diff --git a/src/static/dashboard.html b/src/static/dashboard.html index 5e56ae5..0400388 100644 --- a/src/static/dashboard.html +++ b/src/static/dashboard.html @@ -19,7 +19,6 @@ - @@ -407,10 +406,6 @@
Geografische Verteilung
- - - -- -
@@ -786,8 +776,6 @@ - - @@ -804,10 +792,6 @@
Geografische Verteilung
- diff --git a/src/static/js/app_network.js b/src/static/js/app_network.js index 607ecec..3c70a92 100644 --- a/src/static/js/app_network.js +++ b/src/static/js/app_network.js @@ -76,6 +76,7 @@ App.selectNetworkAnalysis = async function(id) { if (analysis.status === 'ready') { this._hideNetworkProgress(); var graphData = await API.getNetworkGraph(id); + document.getElementById('network-graph-area').innerHTML = ''; NetworkGraph.init('network-graph-area', graphData); this._setupNetworkFilters(graphData); @@ -394,6 +395,7 @@ App._handleNetworkComplete = async function(msg) { // Graph neu laden try { var graphData = await API.getNetworkGraph(msg.analysis_id); + document.getElementById('network-graph-area').innerHTML = ''; NetworkGraph.init('network-graph-area', graphData); this._setupNetworkFilters(graphData); @@ -452,3 +454,109 @@ function _escHtml(text) { d.textContent = text || ''; return d.innerHTML; } + + +// ========================================================================== +// Cluster View Integration + + +// ========================================================================== +// Cluster Graph Integration (replaces flat NetworkGraph view) +// ========================================================================== + +App._cachedGraphData = null; + +/** + * Hide sidebar filter controls that dont apply to cluster view. + */ +App._hideNetworkSidebarFilters = function() { + var sidebar = document.querySelector('.network-sidebar'); + if (!sidebar) return; + var sections = sidebar.querySelectorAll('.network-sidebar-section'); + // Hide ALL old filter sections — ClusterGraph uses the detail panel directly + for (var i = 0; i < sections.length; i++) { + sections[i].style.display = 'none'; + } +}; + +// Override selectNetworkAnalysis to use ClusterGraph +(function() { + App.selectNetworkAnalysis = async function(id) { + this.currentNetworkId = id; + this.currentIncidentId = null; + localStorage.removeItem('selectedIncidentId'); + localStorage.setItem('selectedNetworkId', id); + + document.getElementById('empty-state').style.display = 'none'; + document.getElementById('incident-view').style.display = 'none'; + document.getElementById('network-view').style.display = 'flex'; + + this.renderSidebar(); + this.renderNetworkSidebar(); + + try { + var analysis = await API.getNetworkAnalysis(id); + this._renderNetworkHeader(analysis); + + if (analysis.status === 'ready') { + this._hideNetworkProgress(); + var graphData = await API.getNetworkGraph(id); + this._cachedGraphData = graphData; + + var graphArea = document.getElementById('network-graph-area'); + graphArea.innerHTML = ''; + + var breadcrumb = document.getElementById('cluster-breadcrumb'); + if (breadcrumb) breadcrumb.style.display = 'flex'; + + ClusterGraph.init('network-graph-area', graphData.entities, graphData.relations); + this._hideNetworkSidebarFilters(); + + try { + var updateCheck = await API.checkNetworkUpdate(id); + var badge = document.getElementById('network-update-badge'); + if (badge) badge.style.display = updateCheck.has_update ? 'inline-flex' : 'none'; + } catch (e) { /* ignorieren */ } + } else if (analysis.status === 'generating') { + this._showNetworkProgress('entity_extraction', 0); + } else if (analysis.status === 'error') { + this._hideNetworkProgress(); + var errArea = document.getElementById('network-graph-area'); + if (errArea) errArea.innerHTML = '
Fehler bei der Generierung. Versuche es erneut.
'; + } + } catch (err) { + UI.showToast('Fehler beim Laden der Netzwerkanalyse: ' + err.message, 'error'); + } + }; +})(); + +// Override _handleNetworkComplete to use ClusterGraph +(function() { + App._handleNetworkComplete = async function(msg) { + this._networkGenerating.delete(msg.analysis_id); + + if (msg.analysis_id === this.currentNetworkId) { + this._hideNetworkProgress(); + try { + var graphData = await API.getNetworkGraph(msg.analysis_id); + this._cachedGraphData = graphData; + var graphArea = document.getElementById('network-graph-area'); + graphArea.innerHTML = ''; + + var breadcrumb = document.getElementById('cluster-breadcrumb'); + if (breadcrumb) breadcrumb.style.display = 'flex'; + + ClusterGraph.init('network-graph-area', graphData.entities, graphData.relations); + this._hideNetworkSidebarFilters(); + + var analysis = await API.getNetworkAnalysis(msg.analysis_id); + this._renderNetworkHeader(analysis); + } catch (e) { + console.error('Graph nach Generierung laden fehlgeschlagen:', e); + } + UI.showToast('Netzwerkanalyse fertig: ' + (msg.entity_count || 0) + ' Entitaeten, ' + (msg.relation_count || 0) + ' Beziehungen', 'success'); + } + + await this.loadNetworkAnalyses(); + }; +})(); diff --git a/src/static/js/cluster-data.js b/src/static/js/cluster-data.js new file mode 100644 index 0000000..c5ec7cf --- /dev/null +++ b/src/static/js/cluster-data.js @@ -0,0 +1,721 @@ +/** + * AegisSight OSINT Monitor - Cluster Data Transformation + * + * Transforms flat entity/relation data into hierarchical country-based clusters. + * Used by ClusterGraph for the hierarchical network visualization. + * + * Usage: + * const result = ClusterData.buildClusterData(entities, relations); + * // result = { countries: [...], edges: [...], assignments: Map, entityToCountry: Map } + */ + +/* exported ClusterData */ + +const ClusterData = { + + /** + * Canonical country names with all known aliases (lowercase). + * Maps alias -> canonical name (German UI labels). + */ + COUNTRY_ALIASES: { + // Hauptakteure Irankonflikt + 'iran': 'Iran', + 'islamic republic of iran': 'Iran', + 'islamische republik iran': 'Iran', + 'persia': 'Iran', + 'persien': 'Iran', + + 'israel': 'Israel', + 'state of israel': 'Israel', + 'staat israel': 'Israel', + + 'united states': 'USA', + 'united states of america': 'USA', + 'usa': 'USA', + 'us': 'USA', + 'u.s.': 'USA', + 'u.s.a.': 'USA', + 'amerika': 'USA', + 'vereinigte staaten': 'USA', + + // Naher Osten + 'lebanon': 'Libanon', + 'libanon': 'Libanon', + 'lebanese republic': 'Libanon', + + 'syria': 'Syrien', + 'syrien': 'Syrien', + 'syrian arab republic': 'Syrien', + + 'iraq': 'Irak', + 'irak': 'Irak', + 'republic of iraq': 'Irak', + + 'yemen': 'Jemen', + 'jemen': 'Jemen', + 'republic of yemen': 'Jemen', + + 'saudi arabia': 'Saudi-Arabien', + 'saudi-arabien': 'Saudi-Arabien', + 'kingdom of saudi arabia': 'Saudi-Arabien', + 'ksa': 'Saudi-Arabien', + + 'united arab emirates': 'VAE', + 'uae': 'VAE', + 'vae': 'VAE', + 'vereinigte arabische emirate': 'VAE', + + 'jordan': 'Jordanien', + 'jordanien': 'Jordanien', + + 'egypt': 'Ägypten', + 'ägypten': 'Ägypten', + 'aegypten': 'Ägypten', + + 'bahrain': 'Bahrain', + 'kingdom of bahrain': 'Bahrain', + + 'kuwait': 'Kuwait', + 'state of kuwait': 'Kuwait', + + 'qatar': 'Katar', + 'katar': 'Katar', + + 'oman': 'Oman', + 'sultanate of oman': 'Oman', + + 'palestine': 'Palästina', + 'palästina': 'Palästina', + 'palestinian territories': 'Palästina', + 'state of palestine': 'Palästina', + 'gaza': 'Palästina', + 'gaza strip': 'Palästina', + 'west bank': 'Palästina', + + // Großmächte + 'russia': 'Russland', + 'russland': 'Russland', + 'russian federation': 'Russland', + 'russische föderation': 'Russland', + + 'china': 'China', + 'people\'s republic of china': 'China', + 'volksrepublik china': 'China', + 'prc': 'China', + + 'united kingdom': 'Großbritannien', + 'uk': 'Großbritannien', + 'großbritannien': 'Großbritannien', + 'grossbritannien': 'Großbritannien', + 'great britain': 'Großbritannien', + 'britain': 'Großbritannien', + 'england': 'Großbritannien', + + 'france': 'Frankreich', + 'frankreich': 'Frankreich', + 'french republic': 'Frankreich', + + 'germany': 'Deutschland', + 'deutschland': 'Deutschland', + 'federal republic of germany': 'Deutschland', + 'bundesrepublik deutschland': 'Deutschland', + + // Weitere relevante Staaten + 'turkey': 'Türkei', + 'türkei': 'Türkei', + 'turkei': 'Türkei', + 'republic of turkey': 'Türkei', + 'türkiye': 'Türkei', + + 'india': 'Indien', + 'indien': 'Indien', + 'republic of india': 'Indien', + + 'pakistan': 'Pakistan', + 'islamic republic of pakistan': 'Pakistan', + + 'afghanistan': 'Afghanistan', + + 'ukraine': 'Ukraine', + + 'north korea': 'Nordkorea', + 'nordkorea': 'Nordkorea', + 'dprk': 'Nordkorea', + + 'south korea': 'Südkorea', + 'südkorea': 'Südkorea', + 'republic of korea': 'Südkorea', + + 'japan': 'Japan', + + 'italy': 'Italien', + 'italien': 'Italien', + + 'spain': 'Spanien', + 'spanien': 'Spanien', + + 'netherlands': 'Niederlande', + 'niederlande': 'Niederlande', + 'holland': 'Niederlande', + + 'poland': 'Polen', + 'polen': 'Polen', + + 'canada': 'Kanada', + 'kanada': 'Kanada', + + 'australia': 'Australien', + 'australien': 'Australien', + + 'brazil': 'Brasilien', + 'brasilien': 'Brasilien', + + 'mexico': 'Mexiko', + 'mexiko': 'Mexiko', + + 'south africa': 'Südafrika', + 'südafrika': 'Südafrika', + + 'nigeria': 'Nigeria', + + 'ethiopia': 'Äthiopien', + 'äthiopien': 'Äthiopien', + + 'somalia': 'Somalia', + + 'sudan': 'Sudan', + + 'libya': 'Libyen', + 'libyen': 'Libyen', + + 'tunisia': 'Tunesien', + 'tunesien': 'Tunesien', + + 'morocco': 'Marokko', + 'marokko': 'Marokko', + + 'algeria': 'Algerien', + 'algerien': 'Algerien', + + 'sweden': 'Schweden', + 'schweden': 'Schweden', + + 'norway': 'Norwegen', + 'norwegen': 'Norwegen', + + 'switzerland': 'Schweiz', + 'schweiz': 'Schweiz', + + 'austria': 'Österreich', + 'österreich': 'Österreich', + 'oesterreich': 'Österreich', + }, + + /** + * Country keyword patterns for name/description matching. + * Each entry: [regex, canonical country name] + * Order matters: more specific patterns first. + */ + COUNTRY_PATTERNS: [ + [/\biran/i, 'Iran'], + [/\bpersi/i, 'Iran'], + [/\bisrael/i, 'Israel'], + [/\bjewish state/i, 'Israel'], + [/\bunited states/i, 'USA'], + [/\bamerican?\b/i, 'USA'], + [/\bu\.?s\.?\b(?![\w-])/i, 'USA'], + [/\bpentagon/i, 'USA'], + [/\bwhite house/i, 'USA'], + [/\bcongress\b/i, 'USA'], + [/\bleban/i, 'Libanon'], + [/\bhezbollah/i, 'Libanon'], + [/\bhisbollah/i, 'Libanon'], + [/\bsyri/i, 'Syrien'], + [/\biraq/i, 'Irak'], + [/\birak/i, 'Irak'], + [/\byemen/i, 'Jemen'], + [/\bjemen/i, 'Jemen'], + [/\bhouthi/i, 'Jemen'], + [/\bsaudi/i, 'Saudi-Arabien'], + [/\bemira/i, 'VAE'], + [/\bdubai/i, 'VAE'], + [/\bjordan/i, 'Jordanien'], + [/\begypt/i, 'Ägypten'], + [/\bägypt/i, 'Ägypten'], + [/\bbahrain/i, 'Bahrain'], + [/\bkuwait/i, 'Kuwait'], + [/\bqatar/i, 'Katar'], + [/\bkatar/i, 'Katar'], + [/\bpalesti/i, 'Palästina'], + [/\bgaza/i, 'Palästina'], + [/\bhamas\b/i, 'Palästina'], + [/\brussi/i, 'Russland'], + [/\bkreml/i, 'Russland'], + [/\bputin/i, 'Russland'], + [/\bmoscow/i, 'Russland'], + [/\bmoskau/i, 'Russland'], + [/\bchines/i, 'China'], + [/\bchinai/i, 'China'], + [/\bchina/i, 'China'], + [/\bbeijing/i, 'China'], + [/\bpeking/i, 'China'], + [/\bbriti/i, 'Großbritannien'], + [/\bengland/i, 'Großbritannien'], + [/\blondon\b/i, 'Großbritannien'], + [/\bfrench/i, 'Frankreich'], + [/\bfranz/i, 'Frankreich'], + [/\bfrance/i, 'Frankreich'], + [/\bgerman/i, 'Deutschland'], + [/\bdeutsch/i, 'Deutschland'], + [/\bturk/i, 'Türkei'], + [/\btürk/i, 'Türkei'], + [/\bankara/i, 'Türkei'], + [/\bindia/i, 'Indien'], + [/\bindisch/i, 'Indien'], + [/\bpakistan/i, 'Pakistan'], + [/\bafghan/i, 'Afghanistan'], + [/\bukrain/i, 'Ukraine'], + [/\bnorth.?korea/i, 'Nordkorea'], + [/\bnordkorea/i, 'Nordkorea'], + [/\bpjöngjang/i, 'Nordkorea'], + [/\bpyongyang/i, 'Nordkorea'], + [/\bjapan/i, 'Japan'], + [/\boman\b/i, 'Oman'], + ], + + /** + * Main entry: transform flat entity/relation data into clustered structure. + * + * @param {Array} entities - All entities from getNetworkGraph + * @param {Array} relations - All relations from getNetworkGraph + * @returns {Object} { countries, edges, assignments, entityToCountry } + */ + buildClusterData(entities, relations) { + // 1. Identify which entities are countries and merge duplicates + var countryMap = this._identifyCountries(entities); + + // 2. Build adjacency for fast lookup + var adjacency = this._buildAdjacency(relations); + + // 3. Multi-strategy assignment: + // a) Relation-based (direct country connections) + // b) Name/Description keyword matching + // c) Propagation through assigned neighbors (multiple passes) + var result = this._assignEntities(entities, relations, countryMap, adjacency); + + // 4. Aggregate cross-country relations + var edges = this._aggregateEdges(relations, result.entityToCountry); + + // 5. Build country node objects for rendering + var countries = this._buildCountryNodes(countryMap, result.assignments, entities); + + return { + countries: countries, + edges: edges, + assignments: result.assignments, + entityToCountry: result.entityToCountry + }; + }, + + // ---- Step 1: Identify countries ------------------------------------------ + + _identifyCountries(entities) { + // Map: canonical country name -> [entity_id, ...] + var countryMap = new Map(); + + for (var i = 0; i < entities.length; i++) { + var entity = entities[i]; + + var normalized = (entity.name_normalized || entity.name || '') + .toLowerCase().trim(); + + // Strip common suffixes/brackets for matching + var cleaned = normalized + .replace(/\s*\(als organisation\)/i, '') + .replace(/\s*\(organisation\)/i, '') + .replace(/^the\s+/, '') + .replace(/\s+republic$/, '') + .replace(/\s+federation$/, ''); + + // Try direct alias match first (exact match in COUNTRY_ALIASES) + var directMatch = this.COUNTRY_ALIASES[normalized]; + var cleanedMatch = !directMatch ? this.COUNTRY_ALIASES[cleaned] : null; + var canonical = directMatch || cleanedMatch; + + // For non-location entities: only accept direct alias matches + // (prevents "Iranian Drones" from being a country, but allows + // "Islamic Republic of Iran" which is a direct alias) + if (canonical && entity.entity_type !== 'location' && !directMatch) { + // Match came from cleaning — apply length check + if (cleaned.length > canonical.length + 15) continue; + } + + if (canonical) { + if (!countryMap.has(canonical)) { + countryMap.set(canonical, []); + } + countryMap.get(canonical).push(entity.id); + } + } + + return countryMap; + }, + + // ---- Step 2: Build adjacency --------------------------------------------- + + _buildAdjacency(relations) { + var adj = new Map(); + for (var i = 0; i < relations.length; i++) { + var r = relations[i]; + var src = r.source_entity_id; + var tgt = r.target_entity_id; + + if (!adj.has(src)) adj.set(src, []); + if (!adj.has(tgt)) adj.set(tgt, []); + adj.get(src).push(r); + adj.get(tgt).push(r); + } + return adj; + }, + + // ---- Step 3: Assign entities to countries (multi-strategy) ---------------- + + _assignEntities(entities, relations, countryMap, adjacency) { + var self = this; + var entityToCountry = new Map(); + var countryEntityIds = new Set(); + + // Build entity lookup + var entityMap = new Map(); + for (var i = 0; i < entities.length; i++) { + entityMap.set(entities[i].id, entities[i]); + } + + // Mark all country entity IDs + countryMap.forEach(function(ids, canonical) { + for (var i = 0; i < ids.length; i++) { + entityToCountry.set(ids[i], canonical); + countryEntityIds.add(ids[i]); + } + }); + + // Ensure all country keys exist in assignments + var assignments = new Map(); + countryMap.forEach(function(_, canonical) { + assignments.set(canonical, []); + }); + assignments.set('__unassigned__', []); + + // Collect unassigned entity IDs + var unassigned = []; + for (var i = 0; i < entities.length; i++) { + if (!countryEntityIds.has(entities[i].id)) { + unassigned.push(entities[i].id); + } + } + + // --- Strategy A: Relation-based (direct connection to country entity) --- + var stillUnassigned = []; + for (var a = 0; a < unassigned.length; a++) { + var eid = unassigned[a]; + var country = this._findByRelation(eid, adjacency, entityToCountry, countryEntityIds); + if (country) { + entityToCountry.set(eid, country); + if (!assignments.has(country)) assignments.set(country, []); + assignments.get(country).push(eid); + } else { + stillUnassigned.push(eid); + } + } + + // --- Strategy B: Name + Description keyword matching --- + var afterKeyword = []; + for (var b = 0; b < stillUnassigned.length; b++) { + var eid2 = stillUnassigned[b]; + var entity = entityMap.get(eid2); + var country2 = this._findByKeywords(entity); + if (country2) { + entityToCountry.set(eid2, country2); + if (!assignments.has(country2)) assignments.set(country2, []); + assignments.get(country2).push(eid2); + } else { + afterKeyword.push(eid2); + } + } + + // --- Strategy C: Propagation through assigned neighbors (max 5 passes) --- + var remaining = afterKeyword; + for (var pass = 0; pass < 5 && remaining.length > 0; pass++) { + var nextRemaining = []; + for (var c = 0; c < remaining.length; c++) { + var eid3 = remaining[c]; + var country3 = this._findByNeighborPropagation(eid3, adjacency, entityToCountry); + if (country3) { + entityToCountry.set(eid3, country3); + if (!assignments.has(country3)) assignments.set(country3, []); + assignments.get(country3).push(eid3); + } else { + nextRemaining.push(eid3); + } + } + if (nextRemaining.length === remaining.length) break; // No progress + remaining = nextRemaining; + } + + // Everything still unassigned goes to "Sonstige" + for (var u = 0; u < remaining.length; u++) { + assignments.get('__unassigned__').push(remaining[u]); + } + + return { entityToCountry: entityToCountry, assignments: assignments }; + }, + + /** + * Strategy A: Direct relation to a country entity. + */ + _findByRelation: function(entityId, adjacency, entityToCountry, countryEntityIds) { + var rels = adjacency.get(entityId); + if (!rels || rels.length === 0) return null; + + var scores = new Map(); + for (var i = 0; i < rels.length; i++) { + var r = rels[i]; + var otherId = r.source_entity_id === entityId + ? r.target_entity_id : r.source_entity_id; + + if (countryEntityIds.has(otherId)) { + var country = entityToCountry.get(otherId); + scores.set(country, (scores.get(country) || 0) + (r.weight || 1)); + } + } + + return this._bestFromScores(scores); + }, + + /** + * Strategy B: Match country keywords in entity name, aliases and description. + * For events mentioning multiple countries, uses first-mentioned country in name + * with a bonus, so "Iran-Israel-US War" → Iran. + */ + _findByKeywords: function(entity) { + if (!entity) return null; + + var scores = new Map(); + var patterns = this.COUNTRY_PATTERNS; + var name = entity.name || ''; + var desc = entity.description || ''; + + // For name matches: track position to boost first-mentioned country + var firstMatchPos = Infinity; + var firstMatchCountry = null; + + for (var i = 0; i < patterns.length; i++) { + var pattern = patterns[i][0]; + var country = patterns[i][1]; + + // Check name (stronger signal) + var nameMatch = pattern.exec(name); + if (nameMatch) { + scores.set(country, (scores.get(country) || 0) + 3); + // Track first-mentioned country by position in name + if (nameMatch.index < firstMatchPos) { + firstMatchPos = nameMatch.index; + firstMatchCountry = country; + } + } + // Reset regex lastIndex (stateless) + pattern.lastIndex = 0; + + // Check description (weaker signal) + if (desc && pattern.test(desc)) { + scores.set(country, (scores.get(country) || 0) + 1); + } + pattern.lastIndex = 0; + } + + // Check aliases + if (entity.aliases && entity.aliases.length > 0) { + var aliasText = entity.aliases.join(' '); + for (var j = 0; j < patterns.length; j++) { + if (patterns[j][0].test(aliasText)) { + var c = patterns[j][1]; + scores.set(c, (scores.get(c) || 0) + 1); + } + patterns[j][0].lastIndex = 0; + } + } + + // Boost first-mentioned country in name (important for multi-country events) + if (firstMatchCountry && scores.size > 1) { + scores.set(firstMatchCountry, (scores.get(firstMatchCountry) || 0) + 2); + } + + return this._bestFromScores(scores); + }, + + /** + * Strategy C: Propagate from already-assigned neighbors. + */ + _findByNeighborPropagation: function(entityId, adjacency, entityToCountry) { + var rels = adjacency.get(entityId); + if (!rels || rels.length === 0) return null; + + var scores = new Map(); + for (var i = 0; i < rels.length; i++) { + var r = rels[i]; + var otherId = r.source_entity_id === entityId + ? r.target_entity_id : r.source_entity_id; + + if (entityToCountry.has(otherId)) { + var country = entityToCountry.get(otherId); + scores.set(country, (scores.get(country) || 0) + (r.weight || 1)); + } + } + + return this._bestFromScores(scores); + }, + + /** + * Helper: return country with highest score, or null. + */ + _bestFromScores: function(scores) { + if (scores.size === 0) return null; + var best = null; + var bestScore = 0; + scores.forEach(function(score, country) { + if (score > bestScore) { + best = country; + bestScore = score; + } + }); + return best; + }, + + // ---- Step 4: Aggregate cross-country edges ------------------------------- + + _aggregateEdges(relations, entityToCountry) { + var edgeMap = new Map(); // "A|B" -> { source, target, count, categories, totalWeight } + + for (var i = 0; i < relations.length; i++) { + var r = relations[i]; + var c1 = entityToCountry.get(r.source_entity_id); + var c2 = entityToCountry.get(r.target_entity_id); + + // Skip if same country, or either entity unassigned + if (!c1 || !c2 || c1 === c2) continue; + + var key = c1 < c2 ? c1 + '|' + c2 : c2 + '|' + c1; + + if (!edgeMap.has(key)) { + edgeMap.set(key, { + source: c1 < c2 ? c1 : c2, + target: c1 < c2 ? c2 : c1, + count: 0, + totalWeight: 0, + categories: {} + }); + } + + var edge = edgeMap.get(key); + edge.count += 1; + edge.totalWeight += (r.weight || 1); + var cat = r.category || 'neutral'; + edge.categories[cat] = (edge.categories[cat] || 0) + 1; + } + + // Determine dominant category per edge + var edges = []; + edgeMap.forEach(function(edge) { + var bestCat = 'neutral'; + var bestCount = 0; + for (var cat in edge.categories) { + if (edge.categories[cat] > bestCount) { + bestCat = cat; + bestCount = edge.categories[cat]; + } + } + edge.dominantCategory = bestCat; + edges.push(edge); + }); + + // Sort by count descending + edges.sort(function(a, b) { return b.count - a.count; }); + + return edges; + }, + + // ---- Step 5: Build country node objects ----------------------------------- + + _buildCountryNodes(countryMap, assignments, entities) { + var entityMap = new Map(); + for (var i = 0; i < entities.length; i++) { + entityMap.set(entities[i].id, entities[i]); + } + + var countries = []; + + assignments.forEach(function(entityIds, countryName) { + if (countryName === '__unassigned__') { + if (entityIds.length > 0) { + countries.push({ + name: 'Sonstige', + canonicalName: '__unassigned__', + entityCount: entityIds.length, + isUnassigned: true, + typeCounts: ClusterData._countTypes(entityIds, entityMap), + topEntities: ClusterData._getTopEntities(entityIds, entityMap, 5) + }); + } + return; + } + + // Count includes the country entity IDs themselves? No — only affiliated entities + var totalCount = entityIds.length; + if (totalCount === 0) return; // Skip countries with no affiliated entities + + countries.push({ + name: countryName, + canonicalName: countryName, + entityCount: totalCount, + isUnassigned: false, + countryEntityIds: countryMap.get(countryName) || [], + typeCounts: ClusterData._countTypes(entityIds, entityMap), + topEntities: ClusterData._getTopEntities(entityIds, entityMap, 5) + }); + }); + + // Sort by entity count descending + countries.sort(function(a, b) { return b.entityCount - a.entityCount; }); + + return countries; + }, + + /** + * Count entities by type within a set of IDs. + */ + _countTypes(entityIds, entityMap) { + var counts = { person: 0, organisation: 0, location: 0, event: 0, military: 0 }; + for (var i = 0; i < entityIds.length; i++) { + var e = entityMap.get(entityIds[i]); + if (e && counts.hasOwnProperty(e.entity_type)) { + counts[e.entity_type]++; + } + } + return counts; + }, + + /** + * Get top N entities by mention_count from a set of IDs. + */ + _getTopEntities(entityIds, entityMap, n) { + var ents = []; + for (var i = 0; i < entityIds.length; i++) { + var e = entityMap.get(entityIds[i]); + if (e) ents.push(e); + } + ents.sort(function(a, b) { + return (b.mention_count || 0) - (a.mention_count || 0); + }); + return ents.slice(0, n); + } +}; diff --git a/src/static/js/components.js b/src/static/js/components.js index cfffa0a..801f84b 100644 --- a/src/static/js/components.js +++ b/src/static/js/components.js @@ -716,9 +716,6 @@ const UI = { if (emptyEl) emptyEl.style.display = 'none'; - // Locations fuer GEOINT merken - this._lastLocations = locations; - // Statistik const totalArticles = locations.reduce((s, l) => s + l.article_count, 0); if (statsEl) statsEl.textContent = `${locations.length} Orte / ${totalArticles} Artikel`; @@ -745,10 +742,6 @@ const UI = { }).setView([51.1657, 10.4515], 5); // Deutschland-Zentrum this._applyMapTiles(); - - // GEOINT-Modus wiederherstellen - if (typeof GEOINT !== 'undefined') GEOINT.restoreState(this._map); - this._mapCluster = L.markerClusterGroup({ maxClusterRadius: 40, iconCreateFunction: function(cluster) { @@ -860,11 +853,6 @@ const UI = { this._mapLegendControl = legend; } - // GEOINT: Timeline mit Artikel-Daten initialisieren - if (typeof GEOINT !== 'undefined' && typeof App !== 'undefined') { - GEOINT.initTimeline(App._currentArticles || []); - } - // Resize-Fix fuer gridstack (mehrere Versuche, da Container-Hoehe erst spaeter steht) const self = this; [100, 300, 800].forEach(delay => { @@ -882,8 +870,6 @@ const UI = { _applyMapTiles() { if (!this._map) return; - // Im GEOINT-Modus Tiles nicht ueberschreiben - if (typeof GEOINT !== 'undefined' && GEOINT._active) return; // Alte Tile-Layer entfernen this._map.eachLayer(layer => { if (layer instanceof L.TileLayer) this._map.removeLayer(layer); diff --git a/src/static/js/geoint.js b/src/static/js/geoint.js deleted file mode 100644 index ecfb49e..0000000 --- a/src/static/js/geoint.js +++ /dev/null @@ -1,492 +0,0 @@ -/** - * GEOINT-Modus: Taktische Kartenansicht mit Echtzeit-Datenlayern. - */ -const GEOINT = { - _active: false, - _map: null, - _sublayers: {}, - _canvasRenderer: null, - // Layer references - _flightLayer: null, _quakeLayer: null, _gdeltLayer: null, - _heatLayer: null, _shipsLayer: null, - // Data caches - _flightsData: null, _shipsData: null, - // Intervals - _flightInterval: null, _quakeInterval: null, _gdeltInterval: null, _shipsInterval: null, - _flightFetching: false, - // UI controls - _moveHandler: null, _subControl: null, - _coordControl: null, _coordHandler: null, - _distanceLayers: null, _distancePoints: [], _distanceHandler: null, - _satTileLayer: null, _satLabelLayer: null, - _timelineData: null, - - // === HAUPTSCHALTER ===================================================== - toggle(enabled, map) { - if (!map) map = this._map; - if (!map && typeof UI !== 'undefined') map = UI._map; - if (!map) return; - this._active = enabled; - this._map = map; - - var container = document.getElementById('map-container'); - if (container) container.classList.toggle('geoint-active', enabled); - var fsContainer = document.getElementById('map-fullscreen-container'); - if (fsContainer) fsContainer.classList.toggle('geoint-active', enabled); - var card = container ? container.closest('.map-card') : null; - if (card) card.classList.toggle('geoint-card-active', enabled); - - document.querySelectorAll('#geoint-mode-cb, #geoint-mode-cb-fs').forEach(function(cb) { cb.checked = enabled; }); - - if (enabled) { - if (!this._canvasRenderer) this._canvasRenderer = L.canvas({ padding: 0.5 }); - this._applySatelliteTiles(map); - this._createSubControl(map); - this._restoreSublayers(map); - } else { - this.cleanup(); - this._restoreOsmTiles(map); - } - this._saveState(); - }, - - // === TILES ============================================================== - _applySatelliteTiles(map) { - if (!map) return; - var toRemove = []; - map.eachLayer(function(l) { if (l instanceof L.TileLayer) toRemove.push(l); }); - toRemove.forEach(function(l) { map.removeLayer(l); }); - this._satTileLayer = L.tileLayer( - 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', - { attribution: 'Tiles © Esri', maxZoom: 19, noWrap: true } - ).addTo(map); - this._satLabelLayer = L.tileLayer( - 'https://server.arcgisonline.com/ArcGIS/rest/services/Reference/World_Boundaries_and_Places/MapServer/tile/{z}/{y}/{x}', - { maxZoom: 19, noWrap: true } - ).addTo(map); - if (this._satTileLayer.bringToBack) this._satTileLayer.bringToBack(); - }, - - _restoreOsmTiles(map) { - if (!map) return; - var toRemove = []; - map.eachLayer(function(l) { if (l instanceof L.TileLayer) toRemove.push(l); }); - toRemove.forEach(function(l) { map.removeLayer(l); }); - if (typeof UI !== 'undefined' && UI._applyMapTiles) { - UI._applyMapTiles(); - } else { - L.tileLayer('https://tile.openstreetmap.de/{z}/{x}/{y}.png', { - attribution: '© OpenStreetMap', maxZoom: 18, noWrap: true - }).addTo(map); - } - }, - - // === SUB-LAYER CONTROL ================================================== - _createSubControl(map) { - if (this._subControl) return; - var self = this; - var items = [ - ['flights', 'Flugverkehr', 'flights'], - ['ships', 'Schiffsverkehr', 'ships'], - ['quakes', 'Erdbeben', 'quakes'], - ['gdelt', 'Nachrichten', 'gdelt'], - ['_sep'], - ['heatmap', 'Heatmap', 'heatmap'], - ['coords', 'Koordinaten', 'coords'], - ['distance', 'Distanz', 'distance'], - ]; - var SubCtrl = L.Control.extend({ - options: { position: 'topright' }, - onAdd: function() { - var div = L.DomUtil.create('div', 'geoint-sub-control'); - L.DomEvent.disableClickPropagation(div); - L.DomEvent.disableScrollPropagation(div); - var html = '

GEOINT Layer

'; - items.forEach(function(it) { - if (it[0] === '_sep') { html += '
'; return; } - var checked = self._sublayers[it[0]] ? ' checked' : ''; - html += '
' + - '' + - '
'; - }); - div.innerHTML = html; - return div; - } - }); - this._subControl = new SubCtrl(); - map.addControl(this._subControl); - items.forEach(function(it) { - if (it[0] === '_sep') return; - var cb = document.getElementById('geoint-sub-' + it[0]); - if (cb) cb.addEventListener('change', function() { self._toggleSub(it[0], this.checked, map); }); - }); - }, - - _removeSubControl() { - if (this._subControl && this._map) { this._map.removeControl(this._subControl); this._subControl = null; } - }, - - _toggleSub(id, on, map) { - this._sublayers[id] = on; - this._saveState(); - var m = { flights: ['_startFlights','_stopFlights'], ships: ['_startShips','_stopShips'], - quakes: ['_startQuakes','_stopQuakes'], gdelt: ['_startGdelt','_stopGdelt'], - heatmap: ['_startHeatmap','_stopHeatmap'], coords: ['_startCoords','_stopCoords'], - distance: ['_startDistance','_stopDistance'] }; - if (m[id]) this[m[id][on ? 0 : 1]](map); - }, - - _restoreSublayers(map) { - var self = this; - Object.keys(this._sublayers).forEach(function(id) { - if (self._sublayers[id]) self._toggleSub(id, true, map); - }); - }, - - // === FLUGVERKEHR ======================================================== - _startFlights(map) { - if (this._flightLayer) return; - this._flightLayer = L.layerGroup().addTo(map); - var self = this; - this._fetchFlights(map); - this._flightInterval = setInterval(function() { self._fetchFlights(map); }, 30000); - this._moveHandler = function() { - clearTimeout(self._moveDebounce); - self._moveDebounce = setTimeout(function() { - self._renderFlights(map); - self._renderShips(map); - }, 500); - }; - map.on('moveend', this._moveHandler); - }, - - _stopFlights() { - if (this._flightInterval) { clearInterval(this._flightInterval); this._flightInterval = null; } - if (this._moveHandler && this._map) { this._map.off('moveend', this._moveHandler); this._moveHandler = null; } - if (this._flightLayer && this._map) { this._map.removeLayer(this._flightLayer); this._flightLayer = null; } - }, - - _fetchFlights(map) { - if (this._flightFetching || !map) return; - this._flightFetching = true; - var self = this; - var token = localStorage.getItem('osint_token') || ''; - fetch('/api/geoint/flights', { headers: token ? { 'Authorization': 'Bearer ' + token } : {} }) - .then(function(r) { return r.ok ? r.json() : { ac: [] }; }) - .then(function(data) { - self._flightsData = data.ac || data.aircraft || []; - self._renderFlights(map); - }) - .catch(function() {}) - .finally(function() { self._flightFetching = false; }); - }, - - _renderFlights(map) { - if (!map || !this._flightLayer || !this._flightsData) return; - var newLayer = L.layerGroup(); - var bounds = map.getBounds(); - var zoom = map.getZoom(); - var max = zoom >= 10 ? 600 : zoom >= 7 ? 400 : zoom >= 5 ? 200 : 80; - var r = zoom >= 10 ? 4 : zoom >= 7 ? 3 : 2; - var count = 0; - for (var i = 0; i < this._flightsData.length && count < max; i++) { - var a = this._flightsData[i]; - if (!a.lat || !a.lon || !bounds.contains([a.lat, a.lon])) continue; - count++; - var cs = (a.flight || a.callsign || a.hex || '???').trim(); - var alt = a.alt_baro || a.altitude || '?'; - var spd = a.gs || a.ground_speed || '?'; - var popup = '
' + cs + '' + - '
ALT ' + (typeof alt === 'number' ? alt.toLocaleString() + ' ft' : alt) + - '
SPD ' + (typeof spd === 'number' ? Math.round(spd) + ' kts' : spd) + '
'; - L.circleMarker([a.lat, a.lon], { - radius: r, fillColor: '#00ff88', color: '#004422', - fillOpacity: 0.9, weight: 1, renderer: this._canvasRenderer - }).bindPopup(popup, { className: 'geoint-leaflet-popup' }).addTo(newLayer); - } - this._map.removeLayer(this._flightLayer); - this._flightLayer = newLayer.addTo(this._map); - }, - - // === SCHIFFSVERKEHR ===================================================== - _startShips(map) { - if (this._shipsLayer) return; - this._shipsLayer = L.layerGroup().addTo(map); - var self = this; - this._fetchShips(map); - this._shipsInterval = setInterval(function() { self._fetchShips(map); }, 60000); - }, - - _stopShips() { - if (this._shipsInterval) { clearInterval(this._shipsInterval); this._shipsInterval = null; } - if (this._shipsLayer && this._map) { this._map.removeLayer(this._shipsLayer); this._shipsLayer = null; } - }, - - _fetchShips(map) { - var self = this; - var token = localStorage.getItem('osint_token') || ''; - fetch('/api/geoint/ships', { headers: token ? { 'Authorization': 'Bearer ' + token } : {} }) - .then(function(r) { return r.ok ? r.json() : { ships: [] }; }) - .then(function(data) { - self._shipsData = data.ships || []; - self._renderShips(map); - }) - .catch(function() {}); - }, - - _renderShips(map) { - if (!map || !this._shipsLayer || !this._shipsData) return; - var newLayer = L.layerGroup(); - var bounds = map.getBounds(); - var zoom = map.getZoom(); - var max = zoom >= 10 ? 800 : zoom >= 7 ? 400 : zoom >= 5 ? 150 : 50; - var minSog = zoom >= 8 ? 0 : zoom >= 5 ? 0.3 : 1.0; - var r = zoom >= 10 ? 3.5 : zoom >= 7 ? 2.5 : 2; - var count = 0; - for (var i = 0; i < this._shipsData.length && count < max; i++) { - var s = this._shipsData[i]; - if (!s.lat || !s.lon || !bounds.contains([s.lat, s.lon])) continue; - if ((s.sog || 0) < minSog) continue; - count++; - var color = (s.sog || 0) > 0.5 ? '#4499ff' : '#556688'; - var navLabels = {0:'Motor',1:'Anker',2:'N.steuerb.',3:'Eingeschr.',5:'Festgemacht',7:'Fischfang',8:'Segel'}; - var shipName = s.name || ('MMSI ' + (s.mmsi||'?')); - var popup = '
' + shipName + '' + - (s.name ? '
MMSI ' + (s.mmsi||'?') : '') + - '
SOG ' + (s.sog||0).toFixed(1) + ' kn' + - '
COG ' + Math.round(s.cog||0) + '\u00b0' + '
'; - L.circleMarker([s.lat, s.lon], { - radius: r, fillColor: color, color: '#223355', - fillOpacity: 0.85, weight: 0.5, renderer: this._canvasRenderer - }).bindPopup(popup, { className: 'geoint-leaflet-popup' }).addTo(newLayer); - } - this._map.removeLayer(this._shipsLayer); - this._shipsLayer = newLayer.addTo(this._map); - }, - - // === ERDBEBEN ============================================================ - _startQuakes(map) { - if (this._quakeLayer) return; - this._quakeLayer = L.layerGroup().addTo(map); - this._fetchQuakes(map); - var self = this; - this._quakeInterval = setInterval(function() { self._fetchQuakes(map); }, 300000); - }, - - _stopQuakes() { - if (this._quakeInterval) { clearInterval(this._quakeInterval); this._quakeInterval = null; } - if (this._quakeLayer && this._map) { this._map.removeLayer(this._quakeLayer); this._quakeLayer = null; } - }, - - _fetchQuakes() { - if (!this._quakeLayer) return; - var self = this; - fetch('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson') - .then(function(r) { return r.json(); }) - .then(function(data) { - if (!self._quakeLayer) return; - self._quakeLayer.clearLayers(); - var now = Date.now(); - (data.features || []).forEach(function(f) { - var c = f.geometry.coordinates, p = f.properties; - var mag = p.mag || 1; - var ageH = (now - p.time) / 3600000; - var color = ageH < 1 ? '#ff0000' : ageH < 6 ? '#ff6600' : ageH < 12 ? '#ffaa00' : '#ffdd00'; - var cls = ageH < 2 ? 'geoint-quake-marker' : ''; - L.circleMarker([c[1], c[0]], { - radius: Math.max(mag * 3.5, 5), fillColor: color, color: color, - weight: 1.5, fillOpacity: 0.6, className: cls - }).bindPopup('
M' + mag.toFixed(1) + ' ' + (p.place||'') + - '
TIEFE ' + (c[2]||'?') + ' km
', - { className: 'geoint-leaflet-popup' } - ).addTo(self._quakeLayer); - }); - }) - .catch(function() {}); - }, - - // === GDELT NACHRICHTEN =================================================== - _startGdelt(map) { - if (this._gdeltLayer) return; - this._gdeltLayer = L.markerClusterGroup({ - maxClusterRadius: 30, - iconCreateFunction: function(cluster) { - var n = cluster.getChildCount(); - return L.divIcon({ html: '
' + (n > 99 ? '99+' : n) + '
', className: '', iconSize: [22, 22] }); - } - }).addTo(map); - this._fetchGdelt(map); - var self = this; - this._gdeltInterval = setInterval(function() { self._fetchGdelt(map); }, 600000); - }, - - _stopGdelt() { - if (this._gdeltInterval) { clearInterval(this._gdeltInterval); this._gdeltInterval = null; } - if (this._gdeltLayer && this._map) { this._map.removeLayer(this._gdeltLayer); this._gdeltLayer = null; } - }, - - _fetchGdelt() { - if (!this._gdeltLayer) return; - var self = this; - var query = 'conflict OR crisis OR disaster'; - if (typeof App !== 'undefined' && App.currentIncidentId) { - var inc = (App.incidents || []).find(function(i) { return i.id === App.currentIncidentId; }); - if (inc && inc.title) query = encodeURIComponent(inc.title.substring(0, 80)); - } - var token = localStorage.getItem('osint_token') || ''; - fetch('/api/geoint/gdelt?query=' + query, { headers: token ? { 'Authorization': 'Bearer ' + token } : {} }) - .then(function(r) { return r.ok ? r.json() : { features: [] }; }) - .then(function(data) { - if (!self._gdeltLayer) return; - self._gdeltLayer.clearLayers(); - (data.features || []).slice(0, 200).forEach(function(f) { - var c = f.geometry.coordinates, p = f.properties || {}; - var icon = L.divIcon({ className: '', html: '
N
', iconSize: [18, 18], iconAnchor: [9, 9] }); - var popup = '
' + (p.name || p.title || 'Nachricht').substring(0, 100) + '' + - (p.url ? '
Quelle' : '') + '
'; - L.marker([c[1], c[0]], { icon: icon }).bindPopup(popup, { className: 'geoint-leaflet-popup' }).addTo(self._gdeltLayer); - }); - }) - .catch(function() {}); - }, - - // === HEATMAP ============================================================= - _startHeatmap(map) { - if (this._heatLayer || typeof L.heatLayer === 'undefined') return; - var locs = (typeof UI !== 'undefined' && UI._lastLocations) ? UI._lastLocations : []; - if (!locs.length) return; - var maxC = Math.max.apply(null, locs.map(function(l) { return l.article_count || 1; })); - var pts = locs.map(function(l) { return [l.lat, l.lon, (l.article_count || 1) / maxC]; }); - this._heatLayer = L.heatLayer(pts, { - radius: 30, blur: 20, maxZoom: 12, - gradient: { 0.2: '#004400', 0.4: '#00ff88', 0.6: '#ffaa00', 0.8: '#ff4400', 1.0: '#ff0000' } - }).addTo(map); - }, - - _stopHeatmap() { - if (this._heatLayer && this._map) { this._map.removeLayer(this._heatLayer); this._heatLayer = null; } - }, - - // === KOORDINATENANZEIGE ================================================== - _startCoords(map) { - if (this._coordControl) return; - var Ctrl = L.Control.extend({ - options: { position: 'bottomleft' }, - onAdd: function() { - var div = L.DomUtil.create('div', 'geoint-coord-display'); - div.id = 'geoint-coord-text'; - div.textContent = 'LAT: -- LON: --'; - return div; - } - }); - this._coordControl = new Ctrl(); - map.addControl(this._coordControl); - var el = document.getElementById('geoint-coord-text'); - this._coordHandler = function(e) { - if (el) el.textContent = 'LAT: ' + e.latlng.lat.toFixed(4) + ' LON: ' + e.latlng.lng.toFixed(4); - }; - map.on('mousemove', this._coordHandler); - }, - - _stopCoords() { - if (this._coordHandler && this._map) { this._map.off('mousemove', this._coordHandler); this._coordHandler = null; } - if (this._coordControl && this._map) { this._map.removeControl(this._coordControl); this._coordControl = null; } - }, - - // === DISTANZMESSUNG ====================================================== - _startDistance(map) { - if (this._distanceLayers) return; - this._distanceLayers = L.layerGroup().addTo(map); - this._distancePoints = []; - map.getContainer().style.cursor = 'crosshair'; - var self = this; - this._distanceHandler = function(e) { - self._distancePoints.push(e.latlng); - L.circleMarker(e.latlng, { radius: 6, fillColor: '#ff2222', color: '#ffffff', fillOpacity: 1, weight: 2 }).addTo(self._distanceLayers); - if (self._distancePoints.length >= 2) { - var p1 = self._distancePoints[self._distancePoints.length - 2]; - var p2 = self._distancePoints[self._distancePoints.length - 1]; - L.polyline([p1, p2], { color: '#000000', weight: 5, opacity: 0.5 }).addTo(self._distanceLayers); - L.polyline([p1, p2], { color: '#ff2222', weight: 3, dashArray: '8 5' }).addTo(self._distanceLayers); - var dist = p1.distanceTo(p2); - var total = 0; - for (var i = 1; i < self._distancePoints.length; i++) total += self._distancePoints[i-1].distanceTo(self._distancePoints[i]); - var label = dist >= 1000 ? (dist/1000).toFixed(1) + ' km' : Math.round(dist) + ' m'; - var tLabel = total >= 1000 ? (total/1000).toFixed(1) + ' km' : Math.round(total) + ' m'; - var text = self._distancePoints.length > 2 ? label + ' (\u03a3 ' + tLabel + ')' : label; - L.marker([(p1.lat+p2.lat)/2, (p1.lng+p2.lng)/2], { - icon: L.divIcon({ className: '', html: '
' + text + '
', iconSize: [0,0], iconAnchor: [0,12] }) - }).addTo(self._distanceLayers); - } - }; - map.on('click', this._distanceHandler); - }, - - _stopDistance() { - this._distancePoints = []; - if (this._distanceHandler && this._map) { this._map.off('click', this._distanceHandler); this._distanceHandler = null; this._map.getContainer().style.cursor = ''; } - if (this._distanceLayers && this._map) { this._map.removeLayer(this._distanceLayers); this._distanceLayers = null; } - }, - - // === TIMELINE ============================================================ - initTimeline(articles) { - if (!articles || !articles.length) return; - var dates = articles.map(function(a) { return a.collected_at || a.published_at; }) - .filter(Boolean).map(function(d) { return new Date(d).getTime(); }) - .filter(function(t) { return !isNaN(t); }).sort(function(a,b) { return a-b; }); - if (dates.length < 2) return; - this._timelineData = { min: dates[0], max: dates[dates.length-1], articles: articles }; - var slider = document.getElementById('geoint-timeline-slider'); - if (slider) { slider.min = dates[0]; slider.max = dates[dates.length-1]; slider.value = dates[dates.length-1]; } - var label = document.getElementById('geoint-timeline-label'); - if (label) label.textContent = this._fmtDate(dates[dates.length-1]); - }, - - _onTimelineChange(val) { - var label = document.getElementById('geoint-timeline-label'); - if (label) label.textContent = this._fmtDate(parseInt(val)); - if (!this._map || !UI._mapCluster || !this._timelineData) return; - var maxT = parseInt(val), arts = this._timelineData.articles; - var vis = new Set(); - arts.forEach(function(a) { if (new Date(a.collected_at || a.published_at || 0).getTime() <= maxT) vis.add(a.id); }); - UI._mapCluster.eachLayer(function(m) { - if (m._articleIds) m.setOpacity(m._articleIds.some(function(id) { return vis.has(id); }) ? 1 : 0.08); - }); - }, - - _resetTimeline() { - if (this._timelineData) { - var slider = document.getElementById('geoint-timeline-slider'); - if (slider) { slider.value = this._timelineData.max; this._onTimelineChange(this._timelineData.max); } - } - }, - - _fmtDate(ts) { - var d = new Date(ts); - return d.toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit' }) + ' ' + - d.toLocaleTimeString('de-DE', { hour: '2-digit', minute: '2-digit' }); - }, - - // === CLEANUP ============================================================= - cleanup() { - this._stopFlights(); this._stopShips(); this._stopQuakes(); - this._stopGdelt(); this._stopHeatmap(); this._stopCoords(); this._stopDistance(); - this._removeSubControl(); - document.querySelectorAll('.geoint-active').forEach(function(el) { el.classList.remove('geoint-active'); }); - }, - - // === STATE =============================================================== - _saveState() { - try { - localStorage.setItem('geoint_mode', this._active ? 'true' : 'false'); - localStorage.setItem('geoint_sublayers', JSON.stringify(this._sublayers)); - } catch(e) {} - }, - - restoreState(map) { - if (!map) return; - this._map = map; - try { var s = localStorage.getItem('geoint_sublayers'); if (s) this._sublayers = JSON.parse(s); } catch(e) { this._sublayers = {}; } - if (localStorage.getItem('geoint_mode') === 'true') this.toggle(true, map); - }, -}; diff --git a/src/static/js/network-cluster.js b/src/static/js/network-cluster.js new file mode 100644 index 0000000..403a272 --- /dev/null +++ b/src/static/js/network-cluster.js @@ -0,0 +1,993 @@ +/** + * AegisSight OSINT Monitor - Cluster Graph Visualization v2 + * + * Hierarchical country-based network visualization powered by d3.js v7. + * Level 1: Country overview with prominent inter-country edges + * Level 2: Country drill-down (entities within a country) + * + * Requires: d3 (global), ClusterData (cluster-data.js) + */ + +/* global d3, ClusterData, NetworkGraph */ + +var ClusterGraph = { + + _svg: null, + _g: null, + _zoom: null, + _simulation: null, + _tooltip: null, + _container: null, + _allEntities: null, + _allRelations: null, + _clusterData: null, + _entityMap: null, + _currentLevel: 'overview', + _currentCountry: null, + _width: 960, + _height: 640, + + _categoryColors: { + conflict: '#EF4444', + alliance: '#22C55E', + diplomacy: '#3B82F6', + economic: '#FBBF24', + neutral: '#6B7280', + legal: '#A855F7' + }, + + _entityTypeColors: { + person: '#60A5FA', + organisation: '#C084FC', + location: '#34D399', + event: '#FBBF24', + military: '#F87171' + }, + + _categoryLabels: { + conflict: 'Konflikt', alliance: 'Allianz', diplomacy: 'Diplomatie', + economic: 'Ökonomie', neutral: 'Neutral', legal: 'Recht' + }, + + _typeLabels: { + person: 'Personen', organisation: 'Organisationen', + location: 'Orte', event: 'Ereignisse', military: 'Militär' + }, + + // ---- public API ----------------------------------------------------------- + + init: function(containerId, entities, relations) { + this.destroy(); + var wrapper = document.getElementById(containerId); + if (!wrapper) return; + wrapper.innerHTML = ''; + this._container = wrapper; + this._allEntities = entities; + this._allRelations = relations; + + this._entityMap = new Map(); + for (var i = 0; i < entities.length; i++) { + this._entityMap.set(entities[i].id, entities[i]); + } + + var rect = wrapper.getBoundingClientRect(); + this._width = rect.width || 960; + this._height = rect.height || 640; + + this._svg = d3.select(wrapper) + .append('svg') + .attr('width', '100%') + .attr('height', '100%') + .attr('viewBox', '0 0 ' + this._width + ' ' + this._height) + .attr('preserveAspectRatio', 'xMidYMid meet') + .style('background', 'transparent'); + + this._createDefs(); + this._g = this._svg.append('g').attr('class', 'cg-zoom-layer'); + + this._zoom = d3.zoom() + .scaleExtent([0.2, 6]) + .on('zoom', function(event) { + ClusterGraph._g.attr('transform', event.transform); + }); + this._svg.call(this._zoom); + this._svg.on('dblclick.zoom', null); + + this._tooltip = d3.select(wrapper) + .append('div') + .attr('class', 'cg-tooltip') + .style('position', 'absolute') + .style('pointer-events', 'none') + .style('background', 'rgba(15,23,42,0.95)') + .style('color', '#e2e8f0') + .style('border', '1px solid #334155') + .style('border-radius', '8px') + .style('padding', '10px 14px') + .style('font-size', '12px') + .style('max-width', '320px') + .style('z-index', '1000') + .style('display', 'none') + .style('line-height', '1.6'); + + this._clusterData = ClusterData.buildClusterData(entities, relations); + this._currentLevel = 'overview'; + this._currentCountry = null; + this._renderOverview(); + this._updateBreadcrumb(); + this._renderCountrySidebar(); + }, + + destroy: function() { + if (this._simulation) { this._simulation.stop(); this._simulation = null; } + if (this._svg) { this._svg.remove(); this._svg = null; } + if (this._tooltip) { this._tooltip.remove(); this._tooltip = null; } + this._g = null; + this._clusterData = null; + this._allEntities = null; + this._allRelations = null; + this._entityMap = null; + this._currentLevel = 'overview'; + this._currentCountry = null; + }, + + // ---- LEVEL 1: Country Overview ------------------------------------------- + + _renderOverview: function() { + var self = this; + if (this._simulation) this._simulation.stop(); + this._g.selectAll('*').remove(); + + // Filter: no "Sonstige", no empty, minimum 10 entities + var countries = this._clusterData.countries.filter(function(c) { + return c.entityCount >= 10 && !c.isUnassigned; + }); + + var edges = this._clusterData.edges.slice(); + + // Radius scale + var maxCount = 1; + for (var i = 0; i < countries.length; i++) { + if (countries[i].entityCount > maxCount) maxCount = countries[i].entityCount; + } + var rScale = d3.scaleSqrt().domain([0, maxCount]).range([22, 65]); + + for (var ci = 0; ci < countries.length; ci++) { + countries[ci]._radius = rScale(countries[ci].entityCount); + countries[ci].id = countries[ci].canonicalName; + } + + // Visible edges only + var countryNames = new Set(countries.map(function(c) { return c.canonicalName; })); + var visibleEdges = edges.filter(function(e) { + return countryNames.has(e.source) && countryNames.has(e.target) && e.count >= 3; + }); + + // Edge scale + var maxEdgeCount = 1; + for (var ei = 0; ei < visibleEdges.length; ei++) { + if (visibleEdges[ei].count > maxEdgeCount) maxEdgeCount = visibleEdges[ei].count; + } + var edgeScale = d3.scaleSqrt().domain([1, maxEdgeCount]).range([2, 18]); + + // ---- EDGES (drawn first = behind nodes) ---- + var linkGroup = this._g.append('g').attr('class', 'cg-links'); + var linkSel = linkGroup.selectAll('line') + .data(visibleEdges) + .join('line') + .attr('stroke', function(d) { + return self._categoryColors[d.dominantCategory] || '#6B7280'; + }) + .attr('stroke-width', function(d) { return edgeScale(d.count); }) + .attr('stroke-opacity', 0.6) + .attr('stroke-linecap', 'round') + .style('cursor', 'pointer') + .on('mouseover', function(event, d) { + d3.select(this).attr('stroke-opacity', 1); + var lines = ['' + self._esc(d.source) + ' \u2194 ' + self._esc(d.target) + '']; + lines.push('' + d.count + ' Beziehungen'); + var cats = Object.keys(d.categories).sort(function(a, b) { + return d.categories[b] - d.categories[a]; + }); + for (var ci = 0; ci < Math.min(cats.length, 4); ci++) { + var c = cats[ci]; + var color = self._categoryColors[c] || '#6B7280'; + lines.push('\u25CF ' + + (self._categoryLabels[c] || c) + ': ' + d.categories[c]); + } + self._showTooltip(event, lines.join('
')); + }) + .on('mousemove', function(event) { self._moveTooltip(event); }) + .on('mouseout', function() { + d3.select(this).attr('stroke-opacity', 0.6); + self._hideTooltip(); + }); + + // Edge labels (count) on top edges + var topEdges = visibleEdges.filter(function(e) { return e.count >= 10; }); + var edgeLabelGroup = this._g.append('g').attr('class', 'cg-edge-labels'); + var edgeLabelSel = edgeLabelGroup.selectAll('text') + .data(topEdges) + .join('text') + .attr('text-anchor', 'middle') + .attr('fill', function(d) { + return self._categoryColors[d.dominantCategory] || '#94a3b8'; + }) + .attr('font-size', '11px') + .attr('font-weight', '700') + .attr('pointer-events', 'none') + .text(function(d) { return d.count; }); + + // ---- NODES ---- + var nodeGroup = this._g.append('g').attr('class', 'cg-nodes'); + var nodeSel = nodeGroup.selectAll('g') + .data(countries) + .join('g') + .attr('class', 'cg-country-node') + .style('cursor', 'pointer') + .call(this._drag()); + + // Main circle + nodeSel.append('circle') + .attr('class', 'cg-country-circle') + .attr('r', function(d) { return d._radius; }) + .attr('fill', function(d) { return self._getCountryFill(d); }) + .attr('stroke', '#e2e8f0') + .attr('stroke-width', 2) + .attr('opacity', 0.9); + + // Mini donut + nodeSel.each(function(d) { + self._renderMiniDonut(d3.select(this), d); + }); + + // Country name + nodeSel.append('text') + .attr('class', 'cg-country-label') + .text(function(d) { return d.name; }) + .attr('text-anchor', 'middle') + .attr('dy', -6) + .attr('fill', '#f1f5f9') + .attr('font-size', function(d) { + return Math.max(10, Math.min(15, d._radius / 3.5)) + 'px'; + }) + .attr('font-weight', '700') + .attr('pointer-events', 'none'); + + // Entity count + nodeSel.append('text') + .attr('text-anchor', 'middle') + .attr('dy', 8) + .attr('fill', '#cbd5e1') + .attr('font-size', '10px') + .attr('pointer-events', 'none') + .text(function(d) { return d.entityCount; }); + + // Top actor name below circle + nodeSel.append('text') + .attr('text-anchor', 'middle') + .attr('dy', function(d) { return d._radius + 16; }) + .attr('fill', '#94a3b8') + .attr('font-size', '9px') + .attr('font-style', 'italic') + .attr('pointer-events', 'none') + .text(function(d) { + if (!d.topEntities || d.topEntities.length === 0) return ''; + var top = d.topEntities[0]; + var name = top.name.length > 22 ? top.name.slice(0, 20) + '\u2026' : top.name; + return name; + }); + + // Click -> drill down + nodeSel.on('click', function(event, d) { + event.stopPropagation(); + self._drillDown(d.canonicalName); + }); + + // Hover + nodeSel.on('mouseover', function(event, d) { + d3.select(this).select('.cg-country-circle') + .transition().duration(150) + .attr('stroke-width', 4).attr('opacity', 1); + + // Highlight connected edges + linkSel.attr('stroke-opacity', function(e) { + return (e.source === d.canonicalName || e.target === d.canonicalName || + (e.source.id && e.source.id === d.canonicalName) || + (e.target.id && e.target.id === d.canonicalName)) ? 0.9 : 0.15; + }); + + var lines = ['' + self._esc(d.name) + '']; + lines.push(d.entityCount + ' Entitäten'); + var tc = d.typeCounts; + var parts = []; + if (tc.person) parts.push(tc.person + ' Pers.'); + if (tc.organisation) parts.push(tc.organisation + ' Org.'); + if (tc.military) parts.push(tc.military + ' Mil.'); + if (tc.event) parts.push(tc.event + ' Ereig.'); + if (parts.length) lines.push(parts.join(' \u00B7 ')); + if (d.topEntities && d.topEntities.length > 0) { + lines.push('
'); + for (var ti = 0; ti < Math.min(d.topEntities.length, 4); ti++) { + var te = d.topEntities[ti]; + var typeColor = self._entityTypeColors[te.entity_type] || '#94a3b8'; + lines.push('\u25CF ' + + self._esc(te.name)); + } + } + self._showTooltip(event, lines.join('
')); + }); + nodeSel.on('mousemove', function(event) { self._moveTooltip(event); }); + nodeSel.on('mouseout', function(event, d) { + d3.select(this).select('.cg-country-circle') + .transition().duration(150) + .attr('stroke-width', 2).attr('opacity', 0.9); + linkSel.attr('stroke-opacity', 0.6); + self._hideTooltip(); + }); + + // ---- Force simulation ---- + var simLinks = visibleEdges.map(function(e) { + return { source: e.source, target: e.target, count: e.count }; + }); + + this._simulation = d3.forceSimulation(countries) + .force('link', d3.forceLink(simLinks) + .id(function(d) { return d.id; }) + .distance(function(d) { return 180; }) + .strength(0.5)) + .force('charge', d3.forceManyBody() + .strength(function(d) { return -400 - d._radius * 6; })) + .force('center', d3.forceCenter(self._width / 2, self._height / 2)) + .force('collide', d3.forceCollide() + .radius(function(d) { return d._radius + 30; }) + .strength(0.9)) + .alphaDecay(0.025); + + this._simulation.on('tick', function() { + linkSel + .attr('x1', function(d) { return d.source.x; }) + .attr('y1', function(d) { return d.source.y; }) + .attr('x2', function(d) { return d.target.x; }) + .attr('y2', function(d) { return d.target.y; }); + + edgeLabelSel + .attr('x', function(d) { return (d.source.x + d.target.x) / 2; }) + .attr('y', function(d) { return (d.source.y + d.target.y) / 2 - 4; }); + + nodeSel.attr('transform', function(d) { + return 'translate(' + d.x + ',' + d.y + ')'; + }); + }); + + // Background click + this._svg.on('click', function() { + linkSel.attr('stroke-opacity', 0.6); + }); + + // Zoom-to-fit after simulation stabilizes + var tickCount = 0; + this._simulation.on('tick.zoomfit', function() { + tickCount++; + if (tickCount === 120) { + self._zoomToFit(countries, 40); + self._simulation.on('tick.zoomfit', null); // Remove this listener + } + }); + }, + + _zoomToFit: function(nodes, padding) { + if (!nodes || nodes.length === 0 || !this._svg || !this._zoom) return; + + var minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity; + for (var i = 0; i < nodes.length; i++) { + var n = nodes[i]; + if (n.x === undefined) continue; + var r = n._radius || 30; + if (n.x - r < minX) minX = n.x - r; + if (n.y - r < minY) minY = n.y - r; + if (n.x + r > maxX) maxX = n.x + r; + if (n.y + r > maxY) maxY = n.y + r; + } + + var graphWidth = maxX - minX + padding * 2; + var graphHeight = maxY - minY + padding * 2; + var scale = Math.min( + this._width / graphWidth, + this._height / graphHeight, + 1.5 // Max zoom + ); + scale = Math.max(scale, 0.3); // Min zoom + + var cx = (minX + maxX) / 2; + var cy = (minY + maxY) / 2; + var tx = this._width / 2 - cx * scale; + var ty = this._height / 2 - cy * scale; + + this._svg.transition().duration(600).call( + this._zoom.transform, + d3.zoomIdentity.translate(tx, ty).scale(scale) + ); + }, + + // ---- LEVEL 2: Country Drill-down ----------------------------------------- + + _drillDown: function(countryName) { + var self = this; + this._currentLevel = 'country'; + this._currentCountry = countryName; + this._updateBreadcrumb(); + this._renderCountrySidebar(); + + this._g.transition().duration(350).style('opacity', 0) + .on('end', function() { + if (self._simulation) self._simulation.stop(); + self._g.selectAll('*').remove(); + self._renderCountryDetail(countryName); + self._g.style('opacity', 0) + .transition().duration(350).style('opacity', 1); + }); + + this._svg.transition().duration(350).call( + this._zoom.transform, d3.zoomIdentity + ); + }, + + _renderCountryDetail: function(countryName) { + var self = this; + var entityIds = this._clusterData.assignments.get(countryName) || []; + if (entityIds.length === 0) { + this._g.append('text') + .attr('x', this._width / 2).attr('y', this._height / 2) + .attr('text-anchor', 'middle').attr('fill', '#94a3b8') + .attr('font-size', '16px') + .text('Keine Entitäten für ' + countryName); + return; + } + + var idSet = new Set(entityIds); + var entities = []; + for (var i = 0; i < entityIds.length; i++) { + var e = this._entityMap.get(entityIds[i]); + if (e) entities.push({ ...e }); + } + + var internalRelations = []; + for (var ri = 0; ri < this._allRelations.length; ri++) { + var r = this._allRelations[ri]; + if (idSet.has(r.source_entity_id) && idSet.has(r.target_entity_id)) { + internalRelations.push(r); + } + } + + // Connection counts for sizing + var connCounts = {}; + for (var ii = 0; ii < internalRelations.length; ii++) { + var ir = internalRelations[ii]; + connCounts[ir.source_entity_id] = (connCounts[ir.source_entity_id] || 0) + 1; + connCounts[ir.target_entity_id] = (connCounts[ir.target_entity_id] || 0) + 1; + } + + var maxConn = 1; + for (var k in connCounts) { + if (connCounts[k] > maxConn) maxConn = connCounts[k]; + } + var rScale = d3.scaleSqrt().domain([0, maxConn]).range([4, 26]); + + entities.forEach(function(n) { + n._connections = connCounts[n.id] || 0; + n._radius = rScale(n._connections); + }); + + // Show labels for top 30 or nodes with radius >= 10 + var sorted = entities.slice().sort(function(a, b) { return b._connections - a._connections; }); + var labelThreshold = sorted.length > 30 ? sorted[29]._connections : 0; + + // Links + var linkGroup = this._g.append('g'); + var simLinks = internalRelations.map(function(r) { + return { source: r.source_entity_id, target: r.target_entity_id, + category: r.category, weight: r.weight || 1 }; + }); + + var linkSel = linkGroup.selectAll('line') + .data(simLinks).join('line') + .attr('stroke', function(d) { return self._categoryColors[d.category] || '#6B7280'; }) + .attr('stroke-width', function(d) { return Math.max(0.5, Math.min(3, d.weight * 0.6)); }) + .attr('stroke-opacity', 0.25); + + // Nodes + var nodeGroup = this._g.append('g'); + var nodeSel = nodeGroup.selectAll('g') + .data(entities, function(d) { return d.id; }) + .join('g').style('cursor', 'pointer').call(this._drag()); + + nodeSel.append('circle') + .attr('r', function(d) { return d._radius; }) + .attr('fill', function(d) { return self._entityTypeColors[d.entity_type] || '#94A3B8'; }) + .attr('stroke', '#0f172a').attr('stroke-width', 1.5).attr('opacity', 0.85); + + nodeSel.filter(function(d) { + return d._connections >= labelThreshold || d._radius >= 10; + }).append('text') + .text(function(d) { return d.name.length > 20 ? d.name.slice(0, 18) + '\u2026' : d.name; }) + .attr('dy', function(d) { return d._radius + 13; }) + .attr('text-anchor', 'middle').attr('fill', '#cbd5e1') + .attr('font-size', '10px').attr('pointer-events', 'none'); + + // Hover + nodeSel.on('mouseover', function(event, d) { + d3.select(this).select('circle') + .transition().duration(100) + .attr('stroke', '#FBBF24').attr('stroke-width', 3).attr('opacity', 1); + var lines = ['' + self._esc(d.name) + '']; + lines.push(self._typeLabels[d.entity_type] || d.entity_type); + if (d.description) { + lines.push('' + + self._esc(d.description.length > 100 ? d.description.slice(0, 97) + '...' : d.description) + + ''); + } + lines.push('Verbindungen: ' + d._connections); + self._showTooltip(event, lines.join('
')); + }); + nodeSel.on('mousemove', function(event) { self._moveTooltip(event); }); + nodeSel.on('mouseout', function() { + d3.select(this).select('circle') + .transition().duration(100) + .attr('stroke', '#0f172a').attr('stroke-width', 1.5).attr('opacity', 0.85); + self._hideTooltip(); + }); + + // Click: highlight neighborhood + nodeSel.on('click', function(event, d) { + event.stopPropagation(); + var connIds = new Set([d.id]); + linkSel.each(function(l) { + var s = typeof l.source === 'object' ? l.source.id : l.source; + var t = typeof l.target === 'object' ? l.target.id : l.target; + if (s === d.id || t === d.id) { connIds.add(s); connIds.add(t); } + }); + linkSel.attr('stroke-opacity', function(l) { + var s = typeof l.source === 'object' ? l.source.id : l.source; + var t = typeof l.target === 'object' ? l.target.id : l.target; + return (s === d.id || t === d.id) ? 0.8 : 0.04; + }); + nodeSel.select('circle').attr('opacity', function(n) { return connIds.has(n.id) ? 1 : 0.12; }); + nodeSel.select('text').attr('opacity', function(n) { return connIds.has(n.id) ? 1 : 0.08; }); + self._updateDetailPanel(d); + }); + + this._svg.on('click', function() { + nodeSel.select('circle').attr('stroke', '#0f172a').attr('stroke-width', 1.5).attr('opacity', 0.85); + linkSel.attr('stroke-opacity', 0.25); + self._clearDetailPanel(); + }); + + // Force + this._simulation = d3.forceSimulation(entities) + .force('link', d3.forceLink(simLinks).id(function(d) { return d.id; }) + .distance(function(d) { return Math.max(30, 100 - d.weight * 10); })) + .force('charge', d3.forceManyBody().strength(function(d) { return -60 - d._radius * 3; })) + .force('center', d3.forceCenter(self._width / 2, self._height / 2)) + .force('collide', d3.forceCollide().radius(function(d) { return d._radius + 3; })) + .alphaDecay(0.02); + + this._simulation.on('tick', function() { + linkSel.attr('x1', function(d) { return d.source.x; }).attr('y1', function(d) { return d.source.y; }) + .attr('x2', function(d) { return d.target.x; }).attr('y2', function(d) { return d.target.y; }); + nodeSel.attr('transform', function(d) { return 'translate(' + d.x + ',' + d.y + ')'; }); + }); + + // Zoom-to-fit for detail view + var detailTickCount = 0; + this._simulation.on('tick.zoomfit', function() { + detailTickCount++; + if (detailTickCount === 100) { + self._zoomToFit(entities, 30); + self._simulation.on('tick.zoomfit', null); + } + }); + }, + + // ---- Sidebar: Country List ----------------------------------------------- + + // ---- Filter state -------------------------------------------------------- + + _activeCategories: null, // null = all active + _searchTerm: '', + + _initFilters: function() { + this._activeCategories = new Set(['conflict', 'alliance', 'diplomacy', 'economic', 'neutral', 'legal']); + this._searchTerm = ''; + }, + + _applyEdgeFilter: function() { + if (!this._g) return; + var active = this._activeCategories; + this._g.selectAll('.cg-links line').attr('display', function(d) { + return active.has(d.dominantCategory) ? null : 'none'; + }); + this._g.selectAll('.cg-edge-labels text').attr('display', function(d) { + return active.has(d.dominantCategory) ? null : 'none'; + }); + }, + + _applySearch: function(term) { + this._searchTerm = (term || '').toLowerCase().trim(); + if (!this._g || !this._clusterData) return; + + if (!this._searchTerm) { + // Reset all nodes + this._g.selectAll('.cg-country-node').select('.cg-country-circle') + .attr('opacity', 0.9).attr('stroke-width', 2).attr('stroke', '#e2e8f0'); + this._g.selectAll('.cg-links line').attr('stroke-opacity', 0.6); + return; + } + + // Find which countries contain matching entities + var matchingCountries = new Set(); + var self = this; + this._allEntities.forEach(function(e) { + var text = (e.name || '') + ' ' + (e.description || ''); + if (e.aliases) text += ' ' + e.aliases.join(' '); + if (text.toLowerCase().indexOf(self._searchTerm) !== -1) { + var country = self._clusterData.entityToCountry.get(e.id); + if (country) matchingCountries.add(country); + } + }); + + // Highlight matching country nodes + this._g.selectAll('.cg-country-node').select('.cg-country-circle') + .attr('opacity', function(d) { + return matchingCountries.has(d.canonicalName) ? 1 : 0.15; + }) + .attr('stroke', function(d) { + return matchingCountries.has(d.canonicalName) ? '#FBBF24' : '#e2e8f0'; + }) + .attr('stroke-width', function(d) { + return matchingCountries.has(d.canonicalName) ? 4 : 2; + }); + + this._g.selectAll('.cg-links line').attr('stroke-opacity', 0.15); + }, + + toggleCategory: function(cat) { + if (!this._activeCategories) this._initFilters(); + if (this._activeCategories.has(cat)) { + this._activeCategories.delete(cat); + } else { + this._activeCategories.add(cat); + } + this._applyEdgeFilter(); + // Update button inline styles + var btn = document.querySelector('.cg-cat-btn[data-cat="' + cat + '"]'); + if (btn) { + var isActive = this._activeCategories.has(cat); + var color = this._categoryColors[cat] || '#6B7280'; + btn.style.border = '1px solid ' + (isActive ? color : '#334155'); + btn.style.background = isActive ? color + '22' : 'transparent'; + btn.style.color = isActive ? color : '#64748b'; + } + }, + + // ---- Sidebar: Country List ----------------------------------------------- + + _renderCountrySidebar: function() { + var panel = document.getElementById('network-detail-panel'); + if (!panel) return; + var self = this; + + if (!this._activeCategories) this._initFilters(); + + if (this._currentLevel === 'overview') { + var countries = this._clusterData.countries.filter(function(c) { + return c.entityCount >= 10 && !c.isUnassigned; + }); + + var html = ''; + + // Search + html += '
'; + html += ''; + html += '
'; + + // Category filter + html += '
'; + html += '
Beziehungsfilter
'; + html += '
'; + var cats = ['conflict', 'alliance', 'diplomacy', 'economic', 'neutral', 'legal']; + for (var fi = 0; fi < cats.length; fi++) { + var cat = cats[fi]; + var color = self._categoryColors[cat]; + var label = self._categoryLabels[cat]; + var isActive = self._activeCategories.has(cat); + html += ''; + } + html += '
'; + + // Summary + html += '
'; + html += '

' + + countries.length + ' Akteure

'; + var unassigned = this._clusterData.countries.find(function(c) { return c.isUnassigned; }); + if (unassigned && unassigned.entityCount > 0) { + html += '
' + + unassigned.entityCount + ' ohne Zuordnung
'; + } + html += '
'; + + // Top edges + var topEdges = this._clusterData.edges.slice(0, 6); + if (topEdges.length > 0) { + html += '
'; + html += '
Top-Beziehungen
'; + for (var ei = 0; ei < topEdges.length; ei++) { + var edge = topEdges[ei]; + var eColor = self._categoryColors[edge.dominantCategory] || '#6B7280'; + html += '
'; + html += '\u25CF'; + html += '' + + self._esc(edge.source) + ' \u2194 ' + self._esc(edge.target) + ''; + html += '' + edge.count + ''; + html += '
'; + } + html += '
'; + } + + // Country list + html += '
Akteure
'; + for (var ci = 0; ci < countries.length; ci++) { + var c = countries[ci]; + html += '
'; + html += '' + self._esc(c.name) + ''; + html += '' + c.entityCount + ''; + html += '
'; + } + + panel.innerHTML = html; + panel.style.display = 'block'; + } else if (this._currentLevel === 'country') { + // Show type legend for detail view + var countryData = null; + for (var fi = 0; fi < this._clusterData.countries.length; fi++) { + if (this._clusterData.countries[fi].canonicalName === this._currentCountry) { + countryData = this._clusterData.countries[fi]; break; + } + } + + var html2 = ''; + if (countryData) { + html2 += '

' + + self._esc(countryData.name) + '

'; + html2 += '
' + + countryData.entityCount + ' Entitäten
'; + + var tc = countryData.typeCounts; + var types = ['person', 'organisation', 'military', 'event', 'location']; + html2 += '
'; + for (var ti = 0; ti < types.length; ti++) { + var t = types[ti]; + var cnt = tc[t] || 0; + if (cnt === 0) continue; + var tColor = self._entityTypeColors[t]; + html2 += '
'; + html2 += ''; + html2 += '' + (self._typeLabels[t] || t) + ''; + html2 += '' + cnt + ''; + html2 += '
'; + } + html2 += '
'; + + // Top entities + if (countryData.topEntities && countryData.topEntities.length > 0) { + html2 += '
Top-Akteure
'; + for (var tei = 0; tei < countryData.topEntities.length; tei++) { + var te = countryData.topEntities[tei]; + var teColor = self._entityTypeColors[te.entity_type] || '#94a3b8'; + html2 += '
'; + html2 += '\u25CF '; + html2 += '' + self._esc(te.name) + ''; + html2 += '
'; + } + } + } + + html2 += '
Klicke auf einen Knoten für Details.
'; + panel.innerHTML = html2; + panel.style.display = 'block'; + } + }, + + // ---- Detail panel for entity click --------------------------------------- + + _updateDetailPanel: function(entity) { + if (typeof NetworkGraph !== 'undefined' && NetworkGraph._updateDetailPanel) { + var tempData = NetworkGraph._data; + NetworkGraph._data = { entities: this._allEntities, relations: this._allRelations }; + NetworkGraph._updateDetailPanel(entity); + NetworkGraph._data = tempData; + return; + } + var panel = document.getElementById('network-detail-panel'); + if (!panel) return; + var typeColor = this._entityTypeColors[entity.entity_type] || '#94A3B8'; + var html = '

' + this._esc(entity.name) + '

'; + html += '' + this._esc(entity.entity_type) + ''; + if (entity.description) html += '

' + this._esc(entity.description) + '

'; + html += '
Verbindungen: ' + (entity._connections || 0) + '
'; + panel.innerHTML = html; + }, + + _clearDetailPanel: function() { + this._renderCountrySidebar(); + }, + + // ---- Navigation ---------------------------------------------------------- + + goBack: function() { + var self = this; + if (this._currentLevel !== 'country') return; + this._currentLevel = 'overview'; + this._currentCountry = null; + this._updateBreadcrumb(); + + this._g.transition().duration(300).style('opacity', 0) + .on('end', function() { + if (self._simulation) self._simulation.stop(); + self._g.selectAll('*').remove(); + self._clusterData = ClusterData.buildClusterData(self._allEntities, self._allRelations); + self._renderOverview(); + self._renderCountrySidebar(); + self._g.style('opacity', 0).transition().duration(300).style('opacity', 1); + }); + this._svg.transition().duration(300).call(this._zoom.transform, d3.zoomIdentity); + }, + + _updateBreadcrumb: function() { + var container = document.getElementById('cluster-breadcrumb'); + if (!container) return; + var self = this; + container.innerHTML = ''; + container.style.display = 'flex'; + + if (this._currentLevel === 'country') { + var backBtn = document.createElement('button'); + backBtn.className = 'cluster-back-btn'; + backBtn.innerHTML = '\u2190 Zurück'; + backBtn.onclick = function() { self.goBack(); }; + container.appendChild(backBtn); + + var sep = document.createElement('span'); + sep.className = 'breadcrumb-separator'; + sep.textContent = ' / '; + container.appendChild(sep); + } + + var overviewSpan = document.createElement('span'); + overviewSpan.textContent = 'Länder-Übersicht'; + overviewSpan.className = 'breadcrumb-item' + (this._currentLevel === 'overview' ? ' active' : ' clickable'); + if (this._currentLevel !== 'overview') overviewSpan.onclick = function() { self.goBack(); }; + container.appendChild(overviewSpan); + + if (this._currentCountry) { + var sep2 = document.createElement('span'); + sep2.className = 'breadcrumb-separator'; + sep2.textContent = ' \u203A '; + container.appendChild(sep2); + + var cd = null; + for (var i = 0; i < this._clusterData.countries.length; i++) { + if (this._clusterData.countries[i].canonicalName === this._currentCountry) { cd = this._clusterData.countries[i]; break; } + } + var cs = document.createElement('span'); + cs.className = 'breadcrumb-item active'; + cs.textContent = this._currentCountry + (cd ? ' (' + cd.entityCount + ')' : ''); + container.appendChild(cs); + } + }, + + // ---- Visual helpers ------------------------------------------------------- + + _getCountryFill: function(d) { + // Subtle gradient based on dominant relationship + var edges = this._clusterData.edges; + var catCounts = {}; + for (var i = 0; i < edges.length; i++) { + var e = edges[i]; + if (e.source === d.canonicalName || e.target === d.canonicalName) { + for (var cat in e.categories) catCounts[cat] = (catCounts[cat] || 0) + e.categories[cat]; + } + } + var bestCat = 'neutral', bestCount = 0; + for (var c in catCounts) { if (catCounts[c] > bestCount) { bestCat = c; bestCount = catCounts[c]; } } + return this._darken(this._categoryColors[bestCat] || '#6B7280', 0.45); + }, + + _renderMiniDonut: function(gSel, d) { + var types = ['person', 'organisation', 'military', 'event', 'location']; + var counts = [], colors = []; + for (var i = 0; i < types.length; i++) { + var c = d.typeCounts[types[i]] || 0; + if (c > 0) { counts.push(c); colors.push(this._entityTypeColors[types[i]]); } + } + if (counts.length === 0) return; + var outerR = d._radius + 5, innerR = d._radius + 1; + var arc = d3.arc().innerRadius(innerR).outerRadius(outerR); + var pie = d3.pie().sort(null).value(function(v) { return v; }); + var arcs = pie(counts); + for (var ai = 0; ai < arcs.length; ai++) { + gSel.append('path').attr('d', arc(arcs[ai])).attr('fill', colors[ai]) + .attr('opacity', 0.8).attr('pointer-events', 'none'); + } + }, + + _createDefs: function() { + var defs = this._svg.append('defs'); + var filter = defs.append('filter') + .attr('id', 'cg-glow').attr('x', '-50%').attr('y', '-50%') + .attr('width', '200%').attr('height', '200%'); + filter.append('feGaussianBlur').attr('in', 'SourceGraphic').attr('stdDeviation', 6).attr('result', 'blur'); + filter.append('feColorMatrix').attr('in', 'blur').attr('type', 'matrix') + .attr('values', '0 0 0 0 0.24 0 0 0 0 0.51 0 0 0 0 0.96 0 0 0 0.5 0').attr('result', 'glow'); + var merge = filter.append('feMerge'); + merge.append('feMergeNode').attr('in', 'glow'); + merge.append('feMergeNode').attr('in', 'SourceGraphic'); + }, + + _drag: function() { + var self = this; + return d3.drag() + .on('start', function(event, d) { + if (!event.active && self._simulation) self._simulation.alphaTarget(0.3).restart(); + d.fx = d.x; d.fy = d.y; + }) + .on('drag', function(event, d) { d.fx = event.x; d.fy = event.y; }) + .on('end', function(event, d) { + if (!event.active && self._simulation) self._simulation.alphaTarget(0); + d.fx = null; d.fy = null; + }); + }, + + _showTooltip: function(event, html) { + if (!this._tooltip) return; + this._tooltip.style('display', 'block').html(html); + this._moveTooltip(event); + }, + _moveTooltip: function(event) { + if (!this._tooltip) return; + this._tooltip.style('left', (event.offsetX + 16) + 'px').style('top', (event.offsetY - 10) + 'px'); + }, + _hideTooltip: function() { + if (this._tooltip) this._tooltip.style('display', 'none'); + }, + + _esc: function(str) { + if (!str) return ''; + var div = document.createElement('div'); + div.appendChild(document.createTextNode(str)); + return div.innerHTML; + }, + + _darken: function(hex, amount) { + var r = parseInt(hex.slice(1, 3), 16); + var g = parseInt(hex.slice(3, 5), 16); + var b = parseInt(hex.slice(5, 7), 16); + r = Math.round(r * (1 - amount)); + g = Math.round(g * (1 - amount)); + b = Math.round(b * (1 - amount)); + return '#' + ((1 << 24) + (r << 16) + (g << 8) + b).toString(16).slice(1); + } +}; diff --git a/src/static/js/network-graph.js b/src/static/js/network-graph.js index b9e788d..b14a7c5 100644 --- a/src/static/js/network-graph.js +++ b/src/static/js/network-graph.js @@ -1,831 +1,832 @@ -/** - * AegisSight OSINT Monitor - Network Graph Visualization - * - * Force-directed graph powered by d3.js v7. - * Expects d3 to be loaded globally from CDN before this script runs. - * - * Usage: - * NetworkGraph.init('network-graph-area', data); - * NetworkGraph.filterByType(new Set(['person', 'organisation'])); - * NetworkGraph.search('Russland'); - * NetworkGraph.destroy(); - */ - -/* global d3 */ - -const NetworkGraph = { - - // ---- internal state ------------------------------------------------------- - _svg: null, - _simulation: null, - _data: null, // raw data as received - _filtered: null, // currently visible subset - _container: null, // inside SVG that receives zoom transforms - _zoom: null, - _selectedNode: null, - _tooltip: null, - - _filters: { - types: new Set(), // empty = all visible - minWeight: 1, - searchTerm: '', - }, - - _colorMap: { - node: { - person: '#60A5FA', - organisation: '#C084FC', - location: '#34D399', - event: '#FBBF24', - military: '#F87171', - }, - edge: { - alliance: '#34D399', - conflict: '#EF4444', - diplomacy: '#FBBF24', - economic: '#60A5FA', - legal: '#C084FC', - neutral: '#6B7280', - }, - }, - - // ---- public API ----------------------------------------------------------- - - /** - * Initialise the graph inside the given container element. - * @param {string} containerId – DOM id of the wrapper element - * @param {object} data – { entities: [], relations: [] } - */ - init(containerId, data) { - this.destroy(); - - const wrapper = document.getElementById(containerId); - if (!wrapper) { - console.error('[NetworkGraph] Container #' + containerId + ' not found.'); - return; - } - - this._data = this._prepareData(data); - this._filters = { types: new Set(), minWeight: 1, searchTerm: '' }; - this._selectedNode = null; - - const rect = wrapper.getBoundingClientRect(); - const width = rect.width || 960; - const height = rect.height || 640; - - // SVG - this._svg = d3.select(wrapper) - .append('svg') - .attr('width', '100%') - .attr('height', '100%') - .attr('viewBox', [0, 0, width, height].join(' ')) - .attr('preserveAspectRatio', 'xMidYMid meet') - .style('background', 'transparent'); - - // Defs: arrow markers per category - this._createMarkers(); - - // Defs: glow filter for top-connected nodes - this._createGlowFilter(); - - // Zoom container - this._container = this._svg.append('g').attr('class', 'ng-zoom-layer'); - - // Zoom behaviour - this._zoom = d3.zoom() - .scaleExtent([0.1, 8]) - .on('zoom', (event) => { - this._container.attr('transform', event.transform); - }); - - this._svg.call(this._zoom); - - // Double-click resets zoom - this._svg.on('dblclick.zoom', null); - this._svg.on('dblclick', () => this.resetView()); - - // Tooltip - this._tooltip = d3.select(wrapper) - .append('div') - .attr('class', 'ng-tooltip') - .style('position', 'absolute') - .style('pointer-events', 'none') - .style('background', 'rgba(15,23,42,0.92)') - .style('color', '#e2e8f0') - .style('border', '1px solid #334155') - .style('border-radius', '6px') - .style('padding', '6px 10px') - .style('font-size', '12px') - .style('max-width', '260px') - .style('z-index', '1000') - .style('display', 'none'); - - // Simulation - this._simulation = d3.forceSimulation() - .force('link', d3.forceLink().id(d => d.id).distance(d => { - // Inverse weight: higher weight -> closer - return Math.max(40, 200 - d.weight * 25); - })) - .force('charge', d3.forceManyBody().strength(-300)) - .force('center', d3.forceCenter(width / 2, height / 2)) - .force('collide', d3.forceCollide().radius(d => d._radius + 6)) - .alphaDecay(0.02); - - this.render(); - }, - - /** - * Tear down the graph completely. - */ - destroy() { - if (this._simulation) { - this._simulation.stop(); - this._simulation = null; - } - if (this._svg) { - this._svg.remove(); - this._svg = null; - } - if (this._tooltip) { - this._tooltip.remove(); - this._tooltip = null; - } - this._container = null; - this._data = null; - this._filtered = null; - this._selectedNode = null; - }, - - /** - * Full re-render based on current filters. - */ - render() { - if (!this._data || !this._container) return; - - this._applyFilters(); - - const nodes = this._filtered.entities; - const links = this._filtered.relations; - - // Clear previous drawing - this._container.selectAll('*').remove(); - - // Determine top-5 most connected node IDs - const connectionCounts = {}; - this._data.relations.forEach(r => { - connectionCounts[r.source_entity_id] = (connectionCounts[r.source_entity_id] || 0) + 1; - connectionCounts[r.target_entity_id] = (connectionCounts[r.target_entity_id] || 0) + 1; - }); - const top5Ids = new Set( - Object.entries(connectionCounts) - .sort((a, b) => b[1] - a[1]) - .slice(0, 5) - .map(e => e[0]) - ); - - // Radius scale (sqrt of connection count) - const maxConn = Math.max(1, ...Object.values(connectionCounts)); - const rScale = d3.scaleSqrt().domain([0, maxConn]).range([8, 40]); - - nodes.forEach(n => { - n._connections = connectionCounts[n.id] || 0; - n._radius = rScale(n._connections); - n._isTop5 = top5Ids.has(n.id); - }); - - // ---- edges ------------------------------------------------------------ - const linkGroup = this._container.append('g').attr('class', 'ng-links'); - - const linkSel = linkGroup.selectAll('line') - .data(links, d => d.id) - .join('line') - .attr('stroke', d => this._colorMap.edge[d.category] || this._colorMap.edge.neutral) - .attr('stroke-width', d => Math.max(1, d.weight * 0.8)) - .attr('stroke-opacity', d => Math.min(1, 0.3 + d.weight * 0.14)) - .attr('marker-end', d => 'url(#ng-arrow-' + (d.category || 'neutral') + ')') - .style('cursor', 'pointer') - .on('mouseover', (event, d) => { - const lines = []; - if (d.label) lines.push('' + this._esc(d.label) + ''); - if (d.description) lines.push(this._esc(d.description)); - lines.push('Kategorie: ' + this._esc(d.category) + ' | Gewicht: ' + d.weight); - this._showTooltip(event, lines.join('
')); - }) - .on('mousemove', (event) => this._moveTooltip(event)) - .on('mouseout', () => this._hideTooltip()); - - // ---- nodes ------------------------------------------------------------ - const nodeGroup = this._container.append('g').attr('class', 'ng-nodes'); - - const nodeSel = nodeGroup.selectAll('g') - .data(nodes, d => d.id) - .join('g') - .attr('class', 'ng-node') - .style('cursor', 'pointer') - .call(this._drag(this._simulation)) - .on('mouseover', (event, d) => { - this._showTooltip(event, '' + this._esc(d.name) + '
' + - this._esc(d.entity_type) + ' | Verbindungen: ' + d._connections); - }) - .on('mousemove', (event) => this._moveTooltip(event)) - .on('mouseout', () => this._hideTooltip()) - .on('click', (event, d) => { - event.stopPropagation(); - this._onNodeClick(d, linkSel, nodeSel); - }); - - // Circle - nodeSel.append('circle') - .attr('r', d => d._radius) - .attr('fill', d => this._colorMap.node[d.entity_type] || '#94A3B8') - .attr('stroke', '#0f172a') - .attr('stroke-width', 1.5) - .attr('filter', d => d._isTop5 ? 'url(#ng-glow)' : null); - - // Label - nodeSel.append('text') - .text(d => d.name.length > 15 ? d.name.slice(0, 14) + '\u2026' : d.name) - .attr('dy', d => d._radius + 14) - .attr('text-anchor', 'middle') - .attr('fill', '#cbd5e1') - .attr('font-size', '10px') - .attr('pointer-events', 'none'); - - // ---- simulation ------------------------------------------------------- - // Build link data with object references (d3 expects id strings or objects) - const simNodes = nodes; - const simLinks = links.map(l => ({ - ...l, - source: typeof l.source === 'object' ? l.source.id : l.source_entity_id, - target: typeof l.target === 'object' ? l.target.id : l.target_entity_id, - })); - - this._simulation.nodes(simNodes); - this._simulation.force('link').links(simLinks); - this._simulation.force('collide').radius(d => d._radius + 6); - this._simulation.alpha(1).restart(); - - this._simulation.on('tick', () => { - linkSel - .attr('x1', d => d.source.x) - .attr('y1', d => d.source.y) - .attr('x2', d => { - // Shorten line so arrow doesn't overlap circle - const target = d.target; - const dx = target.x - d.source.x; - const dy = target.y - d.source.y; - const dist = Math.sqrt(dx * dx + dy * dy) || 1; - return target.x - (dx / dist) * (target._radius + 4); - }) - .attr('y2', d => { - const target = d.target; - const dx = target.x - d.source.x; - const dy = target.y - d.source.y; - const dist = Math.sqrt(dx * dx + dy * dy) || 1; - return target.y - (dy / dist) * (target._radius + 4); - }); - - nodeSel.attr('transform', d => 'translate(' + d.x + ',' + d.y + ')'); - }); - - // Click on background to deselect - this._svg.on('click', () => { - this._selectedNode = null; - nodeSel.select('circle').attr('stroke', '#0f172a').attr('stroke-width', 1.5); - linkSel.attr('stroke-opacity', d => Math.min(1, 0.3 + d.weight * 0.14)); - this._clearDetailPanel(); - }); - - // Apply search highlight if active - if (this._filters.searchTerm) { - this._applySearchHighlight(nodeSel); - } - }, - - // ---- filtering ------------------------------------------------------------ - - /** - * Compute the visible subset from raw data + current filters. - */ - _applyFilters() { - let entities = this._data.entities.slice(); - let relations = this._data.relations.slice(); - - // Type filter - if (this._filters.types.size > 0) { - const allowed = this._filters.types; - entities = entities.filter(e => allowed.has(e.entity_type)); - const visibleIds = new Set(entities.map(e => e.id)); - relations = relations.filter(r => - visibleIds.has(r.source_entity_id) && visibleIds.has(r.target_entity_id) - ); - } - - // Weight filter - if (this._filters.minWeight > 1) { - relations = relations.filter(r => r.weight >= this._filters.minWeight); - } - - // Cluster isolation - if (this._filters._isolateId) { - const centerId = this._filters._isolateId; - const connectedIds = new Set([centerId]); - relations.forEach(r => { - if (r.source_entity_id === centerId) connectedIds.add(r.target_entity_id); - if (r.target_entity_id === centerId) connectedIds.add(r.source_entity_id); - }); - entities = entities.filter(e => connectedIds.has(e.id)); - relations = relations.filter(r => - connectedIds.has(r.source_entity_id) && connectedIds.has(r.target_entity_id) - ); - } - - this._filtered = { entities, relations }; - }, - - /** - * Populate the detail panel (#network-detail-panel) with entity info. - * @param {object} entity - */ - _updateDetailPanel(entity) { - const panel = document.getElementById('network-detail-panel'); - if (!panel) return; - - const typeColor = this._colorMap.node[entity.entity_type] || '#94A3B8'; - - // Connected relations - const connected = this._data.relations.filter( - r => r.source_entity_id === entity.id || r.target_entity_id === entity.id - ); - - // Group by category - const grouped = {}; - connected.forEach(r => { - const cat = r.category || 'neutral'; - if (!grouped[cat]) grouped[cat] = []; - // Determine the "other" entity - const otherId = r.source_entity_id === entity.id ? r.target_entity_id : r.source_entity_id; - const other = this._data.entities.find(e => e.id === otherId); - grouped[cat].push({ relation: r, other }); - }); - - let html = ''; - - // Header - html += '
'; - html += '

' + this._esc(entity.name) + '

'; - html += '' + - this._esc(entity.entity_type) + ''; - if (entity.corrected_by_opus) { - html += ' Corrected by Opus'; - } - html += '
'; - - // Description - if (entity.description) { - html += '

' + - this._esc(entity.description) + '

'; - } - - // Aliases - if (entity.aliases && entity.aliases.length > 0) { - html += '
'; - html += 'Aliase:
'; - entity.aliases.forEach(a => { - html += '' + - this._esc(a) + ''; - }); - html += '
'; - } - - // Mention count - html += '
'; - html += 'Erw\u00e4hnungen: ' + - (entity.mention_count || 0) + ''; - html += '
'; - - // Relations grouped by category - const categoryLabels = { - alliance: 'Allianz', conflict: 'Konflikt', diplomacy: 'Diplomatie', - economic: '\u00d6konomie', legal: 'Recht', neutral: 'Neutral', - }; - - if (Object.keys(grouped).length > 0) { - html += '
'; - html += 'Verbindungen (' + connected.length + '):'; - - Object.keys(grouped).sort().forEach(cat => { - const catColor = this._colorMap.edge[cat] || this._colorMap.edge.neutral; - const catLabel = categoryLabels[cat] || cat; - html += '
'; - html += '' + - this._esc(catLabel) + ''; - grouped[cat].forEach(item => { - const r = item.relation; - const otherName = item.other ? item.other.name : '?'; - const direction = r.source_entity_id === entity.id ? '\u2192' : '\u2190'; - html += '
'; - html += direction + ' ' + this._esc(otherName) + ''; - if (r.label) html += ' — ' + this._esc(r.label); - html += ' (G:' + r.weight + ')'; - html += '
'; - }); - html += '
'; - }); - - html += '
'; - } - - panel.innerHTML = html; - panel.style.display = 'block'; - }, - - /** - * Filter nodes by entity type. - * @param {Set|Array} types – entity_type values to show. Empty = all. - */ - filterByType(types) { - this._filters.types = types instanceof Set ? types : new Set(types); - this._filters._isolateId = null; - this.render(); - }, - - /** - * Filter edges by minimum weight. - * @param {number} minWeight - */ - filterByWeight(minWeight) { - this._filters.minWeight = minWeight; - this.render(); - }, - - /** - * Highlight nodes matching the search term (name, aliases, description). - * @param {string} term - */ - search(term) { - this._filters.searchTerm = (term || '').trim().toLowerCase(); - this.render(); - }, - - /** - * Show only the 1-hop neighbourhood of the given entity. - * @param {string} entityId - */ - isolateCluster(entityId) { - this._filters._isolateId = entityId; - this.render(); - }, - - /** - * Reset zoom, filters and selection to initial state. - */ - resetView() { - this._filters = { types: new Set(), minWeight: 1, searchTerm: '' }; - this._selectedNode = null; - this._clearDetailPanel(); - - if (this._svg && this._zoom) { - this._svg.transition().duration(500).call( - this._zoom.transform, d3.zoomIdentity - ); - } - - this.render(); - }, - - // ---- export --------------------------------------------------------------- - - /** - * Export the current graph as a PNG image. - */ - exportPNG() { - if (!this._svg) return; - - const svgNode = this._svg.node(); - const serializer = new XMLSerializer(); - const svgString = serializer.serializeToString(svgNode); - const svgBlob = new Blob([svgString], { type: 'image/svg+xml;charset=utf-8' }); - const url = URL.createObjectURL(svgBlob); - - const img = new Image(); - img.onload = function () { - const canvas = document.createElement('canvas'); - const bbox = svgNode.getBoundingClientRect(); - canvas.width = bbox.width * 2; // 2x for retina - canvas.height = bbox.height * 2; - const ctx = canvas.getContext('2d'); - ctx.scale(2, 2); - ctx.fillStyle = '#0f172a'; - ctx.fillRect(0, 0, bbox.width, bbox.height); - ctx.drawImage(img, 0, 0, bbox.width, bbox.height); - URL.revokeObjectURL(url); - - canvas.toBlob(function (blob) { - if (!blob) return; - const a = document.createElement('a'); - a.href = URL.createObjectURL(blob); - a.download = 'aegis-network-' + Date.now() + '.png'; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(a.href); - }, 'image/png'); - }; - img.src = url; - }, - - /** - * Export the current relations as CSV. - */ - exportCSV() { - if (!this._data) return; - - const entityMap = {}; - this._data.entities.forEach(e => { entityMap[e.id] = e.name; }); - - const rows = [['source', 'target', 'category', 'label', 'weight', 'description'].join(',')]; - this._data.relations.forEach(r => { - rows.push([ - this._csvField(entityMap[r.source_entity_id] || r.source_entity_id), - this._csvField(entityMap[r.target_entity_id] || r.target_entity_id), - this._csvField(r.category), - this._csvField(r.label), - r.weight, - this._csvField(r.description || ''), - ].join(',')); - }); - - const blob = new Blob([rows.join('\n')], { type: 'text/csv;charset=utf-8' }); - const a = document.createElement('a'); - a.href = URL.createObjectURL(blob); - a.download = 'aegis-network-' + Date.now() + '.csv'; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(a.href); - }, - - /** - * Export the full data as JSON. - */ - exportJSON() { - if (!this._data) return; - - const exportData = { - entities: this._data.entities.map(e => ({ - id: e.id, - name: e.name, - name_normalized: e.name_normalized, - entity_type: e.entity_type, - description: e.description, - aliases: e.aliases, - mention_count: e.mention_count, - corrected_by_opus: e.corrected_by_opus, - metadata: e.metadata, - })), - relations: this._data.relations.map(r => ({ - id: r.id, - source_entity_id: r.source_entity_id, - target_entity_id: r.target_entity_id, - category: r.category, - label: r.label, - description: r.description, - weight: r.weight, - status: r.status, - evidence: r.evidence, - })), - }; - - const blob = new Blob( - [JSON.stringify(exportData, null, 2)], - { type: 'application/json;charset=utf-8' } - ); - const a = document.createElement('a'); - a.href = URL.createObjectURL(blob); - a.download = 'aegis-network-' + Date.now() + '.json'; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(a.href); - }, - - // ---- internal helpers ----------------------------------------------------- - - /** - * Prepare / clone data so we do not mutate the original. - */ - _prepareData(raw) { - return { - entities: (raw.entities || []).map(e => ({ ...e })), - relations: (raw.relations || []).map(r => ({ ...r })), - }; - }, - - /** - * Create SVG arrow markers for each edge category. - */ - _createMarkers() { - const defs = this._svg.append('defs'); - const categories = Object.keys(this._colorMap.edge); - - categories.forEach(cat => { - defs.append('marker') - .attr('id', 'ng-arrow-' + cat) - .attr('viewBox', '0 -5 10 10') - .attr('refX', 10) - .attr('refY', 0) - .attr('markerWidth', 8) - .attr('markerHeight', 8) - .attr('orient', 'auto') - .append('path') - .attr('d', 'M0,-4L10,0L0,4') - .attr('fill', this._colorMap.edge[cat]); - }); - }, - - /** - * Create SVG glow filter for top-5 nodes. - */ - _createGlowFilter() { - const defs = this._svg.select('defs'); - const filter = defs.append('filter') - .attr('id', 'ng-glow') - .attr('x', '-50%') - .attr('y', '-50%') - .attr('width', '200%') - .attr('height', '200%'); - - filter.append('feGaussianBlur') - .attr('in', 'SourceGraphic') - .attr('stdDeviation', 4) - .attr('result', 'blur'); - - filter.append('feColorMatrix') - .attr('in', 'blur') - .attr('type', 'matrix') - .attr('values', '0 0 0 0 0.98 0 0 0 0 0.75 0 0 0 0 0.14 0 0 0 0.7 0') - .attr('result', 'glow'); - - const merge = filter.append('feMerge'); - merge.append('feMergeNode').attr('in', 'glow'); - merge.append('feMergeNode').attr('in', 'SourceGraphic'); - }, - - /** - * d3 drag behaviour. - */ - _drag(simulation) { - function dragstarted(event, d) { - if (!event.active) simulation.alphaTarget(0.3).restart(); - d.fx = d.x; - d.fy = d.y; - } - - function dragged(event, d) { - d.fx = event.x; - d.fy = event.y; - } - - function dragended(event, d) { - if (!event.active) simulation.alphaTarget(0); - d.fx = null; - d.fy = null; - } - - return d3.drag() - .on('start', dragstarted) - .on('drag', dragged) - .on('end', dragended); - }, - - /** - * Handle node click – highlight edges, show detail panel. - */ - _onNodeClick(d, linkSel, nodeSel) { - this._selectedNode = d; - - // Highlight selected node - nodeSel.select('circle') - .attr('stroke', n => n.id === d.id ? '#FBBF24' : '#0f172a') - .attr('stroke-width', n => n.id === d.id ? 3 : 1.5); - - // Highlight connected edges - const connectedNodeIds = new Set([d.id]); - linkSel.each(function (l) { - const srcId = typeof l.source === 'object' ? l.source.id : l.source; - const tgtId = typeof l.target === 'object' ? l.target.id : l.target; - if (srcId === d.id || tgtId === d.id) { - connectedNodeIds.add(srcId); - connectedNodeIds.add(tgtId); - } - }); - - linkSel.attr('stroke-opacity', l => { - const srcId = typeof l.source === 'object' ? l.source.id : l.source; - const tgtId = typeof l.target === 'object' ? l.target.id : l.target; - if (srcId === d.id || tgtId === d.id) { - return Math.min(1, 0.3 + l.weight * 0.14) + 0.3; - } - return 0.08; - }); - - nodeSel.select('circle').attr('opacity', n => - connectedNodeIds.has(n.id) ? 1 : 0.25 - ); - nodeSel.select('text').attr('opacity', n => - connectedNodeIds.has(n.id) ? 1 : 0.2 - ); - - // Detail panel - const entity = this._data.entities.find(e => e.id === d.id); - if (entity) { - this._updateDetailPanel(entity); - } - }, - - /** - * Apply search highlighting (glow matching, dim rest). - */ - _applySearchHighlight(nodeSel) { - const term = this._filters.searchTerm; - if (!term) return; - - nodeSel.each(function (d) { - const matches = NetworkGraph._matchesSearch(d, term); - d3.select(this).select('circle') - .attr('opacity', matches ? 1 : 0.15) - .attr('filter', matches ? 'url(#ng-glow)' : null); - d3.select(this).select('text') - .attr('opacity', matches ? 1 : 0.1); - }); - }, - - /** - * Check if entity matches the search term. - */ - _matchesSearch(entity, term) { - if (!term) return true; - if (entity.name && entity.name.toLowerCase().includes(term)) return true; - if (entity.name_normalized && entity.name_normalized.toLowerCase().includes(term)) return true; - if (entity.description && entity.description.toLowerCase().includes(term)) return true; - if (entity.aliases) { - for (let i = 0; i < entity.aliases.length; i++) { - if (entity.aliases[i].toLowerCase().includes(term)) return true; - } - } - return false; - }, - - /** - * Clear the detail panel. - */ - _clearDetailPanel() { - const panel = document.getElementById('network-detail-panel'); - if (panel) { - panel.innerHTML = '

Klicke auf einen Knoten, um Details anzuzeigen.

'; - } - }, - - // ---- tooltip helpers ------------------------------------------------------ - - _showTooltip(event, html) { - if (!this._tooltip) return; - this._tooltip - .style('display', 'block') - .html(html); - this._moveTooltip(event); - }, - - _moveTooltip(event) { - if (!this._tooltip) return; - this._tooltip - .style('left', (event.offsetX + 14) + 'px') - .style('top', (event.offsetY - 10) + 'px'); - }, - - _hideTooltip() { - if (!this._tooltip) return; - this._tooltip.style('display', 'none'); - }, - - // ---- string helpers ------------------------------------------------------- - - _esc(str) { - if (!str) return ''; - const div = document.createElement('div'); - div.appendChild(document.createTextNode(str)); - return div.innerHTML; - }, - - _csvField(val) { - const s = String(val == null ? '' : val); - if (s.includes(',') || s.includes('"') || s.includes('\n')) { - return '"' + s.replace(/"/g, '""') + '"'; - } - return s; - }, -}; +/** + * AegisSight OSINT Monitor - Network Graph Visualization + * + * Force-directed graph powered by d3.js v7. + * Expects d3 to be loaded globally from CDN before this script runs. + * + * Usage: + * NetworkGraph.init('network-graph-area', data); + * NetworkGraph.filterByType(new Set(['person', 'organisation'])); + * NetworkGraph.search('Russland'); + * NetworkGraph.destroy(); + */ + +/* global d3 */ + +const NetworkGraph = { + + // ---- internal state ------------------------------------------------------- + _svg: null, + _simulation: null, + _data: null, // raw data as received + _filtered: null, // currently visible subset + _container: null, // inside SVG that receives zoom transforms + _zoom: null, + _selectedNode: null, + _tooltip: null, + + _filters: { + types: new Set(), // empty = all visible + minWeight: 1, + searchTerm: '', + }, + + _colorMap: { + node: { + person: '#60A5FA', + organisation: '#C084FC', + location: '#34D399', + event: '#FBBF24', + military: '#F87171', + }, + edge: { + alliance: '#34D399', + conflict: '#EF4444', + diplomacy: '#FBBF24', + economic: '#60A5FA', + legal: '#C084FC', + neutral: '#6B7280', + }, + }, + + // ---- public API ----------------------------------------------------------- + + /** + * Initialise the graph inside the given container element. + * @param {string} containerId – DOM id of the wrapper element + * @param {object} data – { entities: [], relations: [] } + */ + init(containerId, data) { + this.destroy(); + + const wrapper = document.getElementById(containerId); + if (!wrapper) { + console.error('[NetworkGraph] Container #' + containerId + ' not found.'); + return; + } + wrapper.innerHTML = ''; + + this._data = this._prepareData(data); + this._filters = { types: new Set(), minWeight: 1, searchTerm: '' }; + this._selectedNode = null; + + const rect = wrapper.getBoundingClientRect(); + const width = rect.width || 960; + const height = rect.height || 640; + + // SVG + this._svg = d3.select(wrapper) + .append('svg') + .attr('width', '100%') + .attr('height', '100%') + .attr('viewBox', [0, 0, width, height].join(' ')) + .attr('preserveAspectRatio', 'xMidYMid meet') + .style('background', 'transparent'); + + // Defs: arrow markers per category + this._createMarkers(); + + // Defs: glow filter for top-connected nodes + this._createGlowFilter(); + + // Zoom container + this._container = this._svg.append('g').attr('class', 'ng-zoom-layer'); + + // Zoom behaviour + this._zoom = d3.zoom() + .scaleExtent([0.1, 8]) + .on('zoom', (event) => { + this._container.attr('transform', event.transform); + }); + + this._svg.call(this._zoom); + + // Double-click resets zoom + this._svg.on('dblclick.zoom', null); + this._svg.on('dblclick', () => this.resetView()); + + // Tooltip + this._tooltip = d3.select(wrapper) + .append('div') + .attr('class', 'ng-tooltip') + .style('position', 'absolute') + .style('pointer-events', 'none') + .style('background', 'rgba(15,23,42,0.92)') + .style('color', '#e2e8f0') + .style('border', '1px solid #334155') + .style('border-radius', '6px') + .style('padding', '6px 10px') + .style('font-size', '12px') + .style('max-width', '260px') + .style('z-index', '1000') + .style('display', 'none'); + + // Simulation + this._simulation = d3.forceSimulation() + .force('link', d3.forceLink().id(d => d.id).distance(d => { + // Inverse weight: higher weight -> closer + return Math.max(40, 200 - d.weight * 25); + })) + .force('charge', d3.forceManyBody().strength(-300)) + .force('center', d3.forceCenter(width / 2, height / 2)) + .force('collide', d3.forceCollide().radius(d => d._radius + 6)) + .alphaDecay(0.02); + + this.render(); + }, + + /** + * Tear down the graph completely. + */ + destroy() { + if (this._simulation) { + this._simulation.stop(); + this._simulation = null; + } + if (this._svg) { + this._svg.remove(); + this._svg = null; + } + if (this._tooltip) { + this._tooltip.remove(); + this._tooltip = null; + } + this._container = null; + this._data = null; + this._filtered = null; + this._selectedNode = null; + }, + + /** + * Full re-render based on current filters. + */ + render() { + if (!this._data || !this._container) return; + + this._applyFilters(); + + const nodes = this._filtered.entities; + const links = this._filtered.relations; + + // Clear previous drawing + this._container.selectAll('*').remove(); + + // Determine top-5 most connected node IDs + const connectionCounts = {}; + this._data.relations.forEach(r => { + connectionCounts[r.source_entity_id] = (connectionCounts[r.source_entity_id] || 0) + 1; + connectionCounts[r.target_entity_id] = (connectionCounts[r.target_entity_id] || 0) + 1; + }); + const top5Ids = new Set( + Object.entries(connectionCounts) + .sort((a, b) => b[1] - a[1]) + .slice(0, 5) + .map(e => e[0]) + ); + + // Radius scale (sqrt of connection count) + const maxConn = Math.max(1, ...Object.values(connectionCounts)); + const rScale = d3.scaleSqrt().domain([0, maxConn]).range([8, 40]); + + nodes.forEach(n => { + n._connections = connectionCounts[n.id] || 0; + n._radius = rScale(n._connections); + n._isTop5 = top5Ids.has(n.id); + }); + + // ---- edges ------------------------------------------------------------ + const linkGroup = this._container.append('g').attr('class', 'ng-links'); + + const linkSel = linkGroup.selectAll('line') + .data(links, d => d.id) + .join('line') + .attr('stroke', d => this._colorMap.edge[d.category] || this._colorMap.edge.neutral) + .attr('stroke-width', d => Math.max(1, d.weight * 0.8)) + .attr('stroke-opacity', d => Math.min(1, 0.3 + d.weight * 0.14)) + .attr('marker-end', d => 'url(#ng-arrow-' + (d.category || 'neutral') + ')') + .style('cursor', 'pointer') + .on('mouseover', (event, d) => { + const lines = []; + if (d.label) lines.push('' + this._esc(d.label) + ''); + if (d.description) lines.push(this._esc(d.description)); + lines.push('Kategorie: ' + this._esc(d.category) + ' | Gewicht: ' + d.weight); + this._showTooltip(event, lines.join('
')); + }) + .on('mousemove', (event) => this._moveTooltip(event)) + .on('mouseout', () => this._hideTooltip()); + + // ---- nodes ------------------------------------------------------------ + const nodeGroup = this._container.append('g').attr('class', 'ng-nodes'); + + const nodeSel = nodeGroup.selectAll('g') + .data(nodes, d => d.id) + .join('g') + .attr('class', 'ng-node') + .style('cursor', 'pointer') + .call(this._drag(this._simulation)) + .on('mouseover', (event, d) => { + this._showTooltip(event, '' + this._esc(d.name) + '
' + + this._esc(d.entity_type) + ' | Verbindungen: ' + d._connections); + }) + .on('mousemove', (event) => this._moveTooltip(event)) + .on('mouseout', () => this._hideTooltip()) + .on('click', (event, d) => { + event.stopPropagation(); + this._onNodeClick(d, linkSel, nodeSel); + }); + + // Circle + nodeSel.append('circle') + .attr('r', d => d._radius) + .attr('fill', d => this._colorMap.node[d.entity_type] || '#94A3B8') + .attr('stroke', '#0f172a') + .attr('stroke-width', 1.5) + .attr('filter', d => d._isTop5 ? 'url(#ng-glow)' : null); + + // Label + nodeSel.append('text') + .text(d => d.name.length > 15 ? d.name.slice(0, 14) + '\u2026' : d.name) + .attr('dy', d => d._radius + 14) + .attr('text-anchor', 'middle') + .attr('fill', '#cbd5e1') + .attr('font-size', '10px') + .attr('pointer-events', 'none'); + + // ---- simulation ------------------------------------------------------- + // Build link data with object references (d3 expects id strings or objects) + const simNodes = nodes; + const simLinks = links.map(l => ({ + ...l, + source: typeof l.source === 'object' ? l.source.id : l.source_entity_id, + target: typeof l.target === 'object' ? l.target.id : l.target_entity_id, + })); + + this._simulation.nodes(simNodes); + this._simulation.force('link').links(simLinks); + this._simulation.force('collide').radius(d => d._radius + 6); + this._simulation.alpha(1).restart(); + + this._simulation.on('tick', () => { + linkSel + .attr('x1', d => d.source.x) + .attr('y1', d => d.source.y) + .attr('x2', d => { + // Shorten line so arrow doesn't overlap circle + const target = d.target; + const dx = target.x - d.source.x; + const dy = target.y - d.source.y; + const dist = Math.sqrt(dx * dx + dy * dy) || 1; + return target.x - (dx / dist) * (target._radius + 4); + }) + .attr('y2', d => { + const target = d.target; + const dx = target.x - d.source.x; + const dy = target.y - d.source.y; + const dist = Math.sqrt(dx * dx + dy * dy) || 1; + return target.y - (dy / dist) * (target._radius + 4); + }); + + nodeSel.attr('transform', d => 'translate(' + d.x + ',' + d.y + ')'); + }); + + // Click on background to deselect + this._svg.on('click', () => { + this._selectedNode = null; + nodeSel.select('circle').attr('stroke', '#0f172a').attr('stroke-width', 1.5); + linkSel.attr('stroke-opacity', d => Math.min(1, 0.3 + d.weight * 0.14)); + this._clearDetailPanel(); + }); + + // Apply search highlight if active + if (this._filters.searchTerm) { + this._applySearchHighlight(nodeSel); + } + }, + + // ---- filtering ------------------------------------------------------------ + + /** + * Compute the visible subset from raw data + current filters. + */ + _applyFilters() { + let entities = this._data.entities.slice(); + let relations = this._data.relations.slice(); + + // Type filter + if (this._filters.types.size > 0) { + const allowed = this._filters.types; + entities = entities.filter(e => allowed.has(e.entity_type)); + const visibleIds = new Set(entities.map(e => e.id)); + relations = relations.filter(r => + visibleIds.has(r.source_entity_id) && visibleIds.has(r.target_entity_id) + ); + } + + // Weight filter + if (this._filters.minWeight > 1) { + relations = relations.filter(r => r.weight >= this._filters.minWeight); + } + + // Cluster isolation + if (this._filters._isolateId) { + const centerId = this._filters._isolateId; + const connectedIds = new Set([centerId]); + relations.forEach(r => { + if (r.source_entity_id === centerId) connectedIds.add(r.target_entity_id); + if (r.target_entity_id === centerId) connectedIds.add(r.source_entity_id); + }); + entities = entities.filter(e => connectedIds.has(e.id)); + relations = relations.filter(r => + connectedIds.has(r.source_entity_id) && connectedIds.has(r.target_entity_id) + ); + } + + this._filtered = { entities, relations }; + }, + + /** + * Populate the detail panel (#network-detail-panel) with entity info. + * @param {object} entity + */ + _updateDetailPanel(entity) { + const panel = document.getElementById('network-detail-panel'); + if (!panel) return; + + const typeColor = this._colorMap.node[entity.entity_type] || '#94A3B8'; + + // Connected relations + const connected = this._data.relations.filter( + r => r.source_entity_id === entity.id || r.target_entity_id === entity.id + ); + + // Group by category + const grouped = {}; + connected.forEach(r => { + const cat = r.category || 'neutral'; + if (!grouped[cat]) grouped[cat] = []; + // Determine the "other" entity + const otherId = r.source_entity_id === entity.id ? r.target_entity_id : r.source_entity_id; + const other = this._data.entities.find(e => e.id === otherId); + grouped[cat].push({ relation: r, other }); + }); + + let html = ''; + + // Header + html += '
'; + html += '

' + this._esc(entity.name) + '

'; + html += '' + + this._esc(entity.entity_type) + ''; + if (entity.corrected_by_opus) { + html += ' Corrected by Opus'; + } + html += '
'; + + // Description + if (entity.description) { + html += '

' + + this._esc(entity.description) + '

'; + } + + // Aliases + if (entity.aliases && entity.aliases.length > 0) { + html += '
'; + html += 'Aliase:
'; + entity.aliases.forEach(a => { + html += '' + + this._esc(a) + ''; + }); + html += '
'; + } + + // Mention count + html += '
'; + html += 'Erw\u00e4hnungen: ' + + (entity.mention_count || 0) + ''; + html += '
'; + + // Relations grouped by category + const categoryLabels = { + alliance: 'Allianz', conflict: 'Konflikt', diplomacy: 'Diplomatie', + economic: '\u00d6konomie', legal: 'Recht', neutral: 'Neutral', + }; + + if (Object.keys(grouped).length > 0) { + html += '
'; + html += 'Verbindungen (' + connected.length + '):'; + + Object.keys(grouped).sort().forEach(cat => { + const catColor = this._colorMap.edge[cat] || this._colorMap.edge.neutral; + const catLabel = categoryLabels[cat] || cat; + html += '
'; + html += '' + + this._esc(catLabel) + ''; + grouped[cat].forEach(item => { + const r = item.relation; + const otherName = item.other ? item.other.name : '?'; + const direction = r.source_entity_id === entity.id ? '\u2192' : '\u2190'; + html += '
'; + html += direction + ' ' + this._esc(otherName) + ''; + if (r.label) html += ' — ' + this._esc(r.label); + html += ' (G:' + r.weight + ')'; + html += '
'; + }); + html += '
'; + }); + + html += '
'; + } + + panel.innerHTML = html; + panel.style.display = 'block'; + }, + + /** + * Filter nodes by entity type. + * @param {Set|Array} types – entity_type values to show. Empty = all. + */ + filterByType(types) { + this._filters.types = types instanceof Set ? types : new Set(types); + this._filters._isolateId = null; + this.render(); + }, + + /** + * Filter edges by minimum weight. + * @param {number} minWeight + */ + filterByWeight(minWeight) { + this._filters.minWeight = minWeight; + this.render(); + }, + + /** + * Highlight nodes matching the search term (name, aliases, description). + * @param {string} term + */ + search(term) { + this._filters.searchTerm = (term || '').trim().toLowerCase(); + this.render(); + }, + + /** + * Show only the 1-hop neighbourhood of the given entity. + * @param {string} entityId + */ + isolateCluster(entityId) { + this._filters._isolateId = entityId; + this.render(); + }, + + /** + * Reset zoom, filters and selection to initial state. + */ + resetView() { + this._filters = { types: new Set(), minWeight: 1, searchTerm: '' }; + this._selectedNode = null; + this._clearDetailPanel(); + + if (this._svg && this._zoom) { + this._svg.transition().duration(500).call( + this._zoom.transform, d3.zoomIdentity + ); + } + + this.render(); + }, + + // ---- export --------------------------------------------------------------- + + /** + * Export the current graph as a PNG image. + */ + exportPNG() { + if (!this._svg) return; + + const svgNode = this._svg.node(); + const serializer = new XMLSerializer(); + const svgString = serializer.serializeToString(svgNode); + const svgBlob = new Blob([svgString], { type: 'image/svg+xml;charset=utf-8' }); + const url = URL.createObjectURL(svgBlob); + + const img = new Image(); + img.onload = function () { + const canvas = document.createElement('canvas'); + const bbox = svgNode.getBoundingClientRect(); + canvas.width = bbox.width * 2; // 2x for retina + canvas.height = bbox.height * 2; + const ctx = canvas.getContext('2d'); + ctx.scale(2, 2); + ctx.fillStyle = '#0f172a'; + ctx.fillRect(0, 0, bbox.width, bbox.height); + ctx.drawImage(img, 0, 0, bbox.width, bbox.height); + URL.revokeObjectURL(url); + + canvas.toBlob(function (blob) { + if (!blob) return; + const a = document.createElement('a'); + a.href = URL.createObjectURL(blob); + a.download = 'aegis-network-' + Date.now() + '.png'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(a.href); + }, 'image/png'); + }; + img.src = url; + }, + + /** + * Export the current relations as CSV. + */ + exportCSV() { + if (!this._data) return; + + const entityMap = {}; + this._data.entities.forEach(e => { entityMap[e.id] = e.name; }); + + const rows = [['source', 'target', 'category', 'label', 'weight', 'description'].join(',')]; + this._data.relations.forEach(r => { + rows.push([ + this._csvField(entityMap[r.source_entity_id] || r.source_entity_id), + this._csvField(entityMap[r.target_entity_id] || r.target_entity_id), + this._csvField(r.category), + this._csvField(r.label), + r.weight, + this._csvField(r.description || ''), + ].join(',')); + }); + + const blob = new Blob([rows.join('\n')], { type: 'text/csv;charset=utf-8' }); + const a = document.createElement('a'); + a.href = URL.createObjectURL(blob); + a.download = 'aegis-network-' + Date.now() + '.csv'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(a.href); + }, + + /** + * Export the full data as JSON. + */ + exportJSON() { + if (!this._data) return; + + const exportData = { + entities: this._data.entities.map(e => ({ + id: e.id, + name: e.name, + name_normalized: e.name_normalized, + entity_type: e.entity_type, + description: e.description, + aliases: e.aliases, + mention_count: e.mention_count, + corrected_by_opus: e.corrected_by_opus, + metadata: e.metadata, + })), + relations: this._data.relations.map(r => ({ + id: r.id, + source_entity_id: r.source_entity_id, + target_entity_id: r.target_entity_id, + category: r.category, + label: r.label, + description: r.description, + weight: r.weight, + status: r.status, + evidence: r.evidence, + })), + }; + + const blob = new Blob( + [JSON.stringify(exportData, null, 2)], + { type: 'application/json;charset=utf-8' } + ); + const a = document.createElement('a'); + a.href = URL.createObjectURL(blob); + a.download = 'aegis-network-' + Date.now() + '.json'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(a.href); + }, + + // ---- internal helpers ----------------------------------------------------- + + /** + * Prepare / clone data so we do not mutate the original. + */ + _prepareData(raw) { + return { + entities: (raw.entities || []).map(e => ({ ...e })), + relations: (raw.relations || []).map(r => ({ ...r })), + }; + }, + + /** + * Create SVG arrow markers for each edge category. + */ + _createMarkers() { + const defs = this._svg.append('defs'); + const categories = Object.keys(this._colorMap.edge); + + categories.forEach(cat => { + defs.append('marker') + .attr('id', 'ng-arrow-' + cat) + .attr('viewBox', '0 -5 10 10') + .attr('refX', 10) + .attr('refY', 0) + .attr('markerWidth', 8) + .attr('markerHeight', 8) + .attr('orient', 'auto') + .append('path') + .attr('d', 'M0,-4L10,0L0,4') + .attr('fill', this._colorMap.edge[cat]); + }); + }, + + /** + * Create SVG glow filter for top-5 nodes. + */ + _createGlowFilter() { + const defs = this._svg.select('defs'); + const filter = defs.append('filter') + .attr('id', 'ng-glow') + .attr('x', '-50%') + .attr('y', '-50%') + .attr('width', '200%') + .attr('height', '200%'); + + filter.append('feGaussianBlur') + .attr('in', 'SourceGraphic') + .attr('stdDeviation', 4) + .attr('result', 'blur'); + + filter.append('feColorMatrix') + .attr('in', 'blur') + .attr('type', 'matrix') + .attr('values', '0 0 0 0 0.98 0 0 0 0 0.75 0 0 0 0 0.14 0 0 0 0.7 0') + .attr('result', 'glow'); + + const merge = filter.append('feMerge'); + merge.append('feMergeNode').attr('in', 'glow'); + merge.append('feMergeNode').attr('in', 'SourceGraphic'); + }, + + /** + * d3 drag behaviour. + */ + _drag(simulation) { + function dragstarted(event, d) { + if (!event.active) simulation.alphaTarget(0.3).restart(); + d.fx = d.x; + d.fy = d.y; + } + + function dragged(event, d) { + d.fx = event.x; + d.fy = event.y; + } + + function dragended(event, d) { + if (!event.active) simulation.alphaTarget(0); + d.fx = null; + d.fy = null; + } + + return d3.drag() + .on('start', dragstarted) + .on('drag', dragged) + .on('end', dragended); + }, + + /** + * Handle node click – highlight edges, show detail panel. + */ + _onNodeClick(d, linkSel, nodeSel) { + this._selectedNode = d; + + // Highlight selected node + nodeSel.select('circle') + .attr('stroke', n => n.id === d.id ? '#FBBF24' : '#0f172a') + .attr('stroke-width', n => n.id === d.id ? 3 : 1.5); + + // Highlight connected edges + const connectedNodeIds = new Set([d.id]); + linkSel.each(function (l) { + const srcId = typeof l.source === 'object' ? l.source.id : l.source; + const tgtId = typeof l.target === 'object' ? l.target.id : l.target; + if (srcId === d.id || tgtId === d.id) { + connectedNodeIds.add(srcId); + connectedNodeIds.add(tgtId); + } + }); + + linkSel.attr('stroke-opacity', l => { + const srcId = typeof l.source === 'object' ? l.source.id : l.source; + const tgtId = typeof l.target === 'object' ? l.target.id : l.target; + if (srcId === d.id || tgtId === d.id) { + return Math.min(1, 0.3 + l.weight * 0.14) + 0.3; + } + return 0.08; + }); + + nodeSel.select('circle').attr('opacity', n => + connectedNodeIds.has(n.id) ? 1 : 0.25 + ); + nodeSel.select('text').attr('opacity', n => + connectedNodeIds.has(n.id) ? 1 : 0.2 + ); + + // Detail panel + const entity = this._data.entities.find(e => e.id === d.id); + if (entity) { + this._updateDetailPanel(entity); + } + }, + + /** + * Apply search highlighting (glow matching, dim rest). + */ + _applySearchHighlight(nodeSel) { + const term = this._filters.searchTerm; + if (!term) return; + + nodeSel.each(function (d) { + const matches = NetworkGraph._matchesSearch(d, term); + d3.select(this).select('circle') + .attr('opacity', matches ? 1 : 0.15) + .attr('filter', matches ? 'url(#ng-glow)' : null); + d3.select(this).select('text') + .attr('opacity', matches ? 1 : 0.1); + }); + }, + + /** + * Check if entity matches the search term. + */ + _matchesSearch(entity, term) { + if (!term) return true; + if (entity.name && entity.name.toLowerCase().includes(term)) return true; + if (entity.name_normalized && entity.name_normalized.toLowerCase().includes(term)) return true; + if (entity.description && entity.description.toLowerCase().includes(term)) return true; + if (entity.aliases) { + for (let i = 0; i < entity.aliases.length; i++) { + if (entity.aliases[i].toLowerCase().includes(term)) return true; + } + } + return false; + }, + + /** + * Clear the detail panel. + */ + _clearDetailPanel() { + const panel = document.getElementById('network-detail-panel'); + if (panel) { + panel.innerHTML = '

Klicke auf einen Knoten, um Details anzuzeigen.

'; + } + }, + + // ---- tooltip helpers ------------------------------------------------------ + + _showTooltip(event, html) { + if (!this._tooltip) return; + this._tooltip + .style('display', 'block') + .html(html); + this._moveTooltip(event); + }, + + _moveTooltip(event) { + if (!this._tooltip) return; + this._tooltip + .style('left', (event.offsetX + 14) + 'px') + .style('top', (event.offsetY - 10) + 'px'); + }, + + _hideTooltip() { + if (!this._tooltip) return; + this._tooltip.style('display', 'none'); + }, + + // ---- string helpers ------------------------------------------------------- + + _esc(str) { + if (!str) return ''; + const div = document.createElement('div'); + div.appendChild(document.createTextNode(str)); + return div.innerHTML; + }, + + _csvField(val) { + const s = String(val == null ? '' : val); + if (s.includes(',') || s.includes('"') || s.includes('\n')) { + return '"' + s.replace(/"/g, '""') + '"'; + } + return s; + }, +};