refactor: Netzwerkanalyse Phase 2 auf batched Extraktion umgestellt

- Statt einem Mega-Opus-Call: Haiku extrahiert Beziehungen pro Artikel-Batch - Stufe A: Per-Batch Extraktion mit nur den relevanten Entitäten (~20-50 statt 3.463) - Stufe B: Globaler Merge + Deduplizierung (Richtungsnormalisierung, Gewichts-Boost) - Phase 2b: Separater Opus-Korrekturpass (name_fix, merge, add) in 500er-Batches - Löst das Problem: 0 Relations bei großen Analysen (3.463 Entitäten -> 1.791 Relations) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
feat: Tutorial-Fortschritt serverseitig persistieren (Resume/Restart)
2026-03-17 22:51:17 +01:00 · 2026-03-17 22:51:06 +01:00
--- a/src/agents/entity_extractor.py
+++ b/src/agents/entity_extractor.py
@@ -1,4 +1,4 @@
-"""Netzwerkanalyse: Entity-Extraktion (Haiku) + Beziehungsanalyse (Opus)."""
+"""Netzwerkanalyse: Entity-Extraktion (Haiku) + Beziehungsanalyse (Batched)."""
 import asyncio
 import hashlib
 import json
@@ -52,23 +52,20 @@ AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON:
 }}"""


-RELATIONSHIP_ANALYSIS_PROMPT = """Du bist ein Senior OSINT-Analyst für ein Lagemonitoring-System.
-AUFGABE: Analysiere die Beziehungen zwischen den extrahierten Entitäten und korrigiere ggf. Fehler.
+RELATIONSHIP_BATCH_PROMPT = """Du bist ein Senior OSINT-Analyst für ein Lagemonitoring-System.
+AUFGABE: Analysiere die Beziehungen zwischen den Entitäten basierend auf den Artikeln.

-EXTRAHIERTE ENTITÄTEN:
+BEKANNTE ENTITÄTEN (aus dem Gesamtdatensatz):
 {entities_json}

-QUELLMATERIAL:
-{source_texts}
+ARTIKEL:
+{articles_text}

-TEIL 1 — KORREKTUREN (optional):
-Prüfe die extrahierten Entitäten auf Fehler:
- "name_fix": Name ist falsch geschrieben oder unvollständig
- "merge": Zwei Entitäten sind dieselbe -> zusammenführen
- "add": Wichtige Entität fehlt komplett
-
-TEIL 2 — BEZIEHUNGEN:
-Identifiziere ALLE relevanten Beziehungen zwischen den Entitäten.
+AUFGABE:
+Identifiziere ALLE Beziehungen zwischen den oben genannten Entitäten, die sich aus den Artikeln ergeben.
+- Nur Beziehungen nennen, die im Artikeltext belegt sind
+- source und target: Exakt die Namen aus der Entitäten-Liste verwenden
+- Wenn eine Entität im Artikel vorkommt aber nicht in der Liste, verwende den Namen wie er in der Liste steht

 BEZIEHUNGS-KATEGORIEN:
 - "alliance": Bündnis, Kooperation, Unterstützung, Partnerschaft
@@ -80,17 +77,11 @@ BEZIEHUNGS-KATEGORIEN:

 REGELN:
 - weight: 1 (schwach/indirekt) bis 5 (stark/direkt)
- evidence[]: Stichpunkte aus dem Quellmaterial die die Beziehung belegen
+- evidence[]: 1-2 kurze Stichpunkte aus dem Artikeltext als Beleg
 - status: "active" (aktuell), "historical" (vergangen), "emerging" (sich entwickelnd)
- source und target: Exakt die Namen aus der Entitäten-Liste verwenden

 AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON:
 {{
-  "corrections": [
-    {{"type": "name_fix", "entity_name": "Falscher Name", "corrected_name": "Korrekter Name"}},
-    {{"type": "merge", "entity_name": "Behalten", "merge_with": "Duplikat löschen"}},
-    {{"type": "add", "entity_name": "Neuer Name", "entity_type": "person", "description": "Einordnung"}}
-  ],
  "relations": [
    {{
      "source": "Entität A",
@@ -106,6 +97,28 @@ AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON:
 }}"""


+CORRECTION_PROMPT = """Du bist ein Senior OSINT-Analyst. Prüfe die extrahierten Entitäten auf Fehler.
+
+ENTITÄTEN:
+{entities_json}
+
+AUFGABE: Prüfe auf folgende Fehler und gib NUR Korrekturen zurück:
+1. "name_fix": Name ist falsch geschrieben oder unvollständig
+2. "merge": Zwei Entitäten bezeichnen dasselbe -> zusammenführen (z.B. "USA" als Organisation und "USA" als Location)
+3. "add": Wichtige Entität fehlt komplett (nur bei offensichtlichen Lücken)
+
+WICHTIG: Nur echte Fehler korrigieren. Im Zweifel KEINE Korrektur.
+
+AUSGABEFORMAT — Antworte AUSSCHLIESSLICH mit diesem JSON:
+{{
+  "corrections": [
+    {{"type": "name_fix", "entity_name": "Falscher Name", "corrected_name": "Korrekter Name"}},
+    {{"type": "merge", "entity_name": "Behalten", "merge_with": "Duplikat löschen"}},
+    {{"type": "add", "entity_name": "Neuer Name", "entity_type": "person", "description": "Einordnung"}}
+  ]
+}}"""
+
+
 # ---------------------------------------------------------------------------
 # Hilfsfunktionen
 # ---------------------------------------------------------------------------
@@ -123,19 +136,16 @@ def _parse_json_response(text: str) -> Optional[dict]:
    """Parst JSON aus Claude-Antwort. Handhabt Markdown-Fences."""
    if not text:
        return None
-    # Direkt
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass
-    # Markdown-Fences
    fence_match = re.search(r'```(?:json)?\s*\n?(.*?)\n?\s*```', text, re.DOTALL)
    if fence_match:
        try:
            return json.loads(fence_match.group(1))
        except json.JSONDecodeError:
            pass
-    # Erstes JSON-Objekt
    obj_match = re.search(r'\{.*\}', text, re.DOTALL)
    if obj_match:
        try:
@@ -181,7 +191,6 @@ async def _phase1_extract_entities(
        headline = art.get("headline_de") or art.get("headline") or ""
        content = art.get("content_de") or art.get("content_original") or ""
        source = art.get("source") or ""
-        # Inhalt kürzen falls sehr lang
        if len(content) > 2000:
            content = content[:2000] + "..."
        all_texts.append(f"[{source}] {headline}\n{content}")
@@ -196,7 +205,6 @@ async def _phase1_extract_entities(
        logger.warning(f"Analyse {analysis_id}: Keine Texte vorhanden")
        return []

-    # Batching
    batch_size = 30
    batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)]
    logger.info(f"{len(all_texts)} Texte in {len(batches)} Batches")
@@ -269,7 +277,6 @@ async def _phase1_extract_entities(
            "progress": int((batch_idx + 1) / len(batches) * 100),
        })

-    # In DB speichern
    all_entities = list(entity_map.values())

    for ent in all_entities:
@@ -297,19 +304,49 @@ async def _phase1_extract_entities(


 # ---------------------------------------------------------------------------
-# Phase 2: Beziehungsanalyse (Opus)
+# Phase 2: Beziehungsanalyse (Batched — pro Artikel-Batch)
 # ---------------------------------------------------------------------------

+def _build_entity_name_map(entities: list[dict]) -> dict[str, int]:
+    """Mapping: normalisierter Name/Alias -> DB-ID."""
+    name_to_id: dict[str, int] = {}
+    for ent in entities:
+        db_id = ent.get("db_id")
+        if not db_id:
+            continue
+        name_to_id[ent["name"].lower()] = db_id
+        name_to_id[ent["name_normalized"]] = db_id
+        for alias in ent.get("aliases", []):
+            if alias and alias.strip():
+                name_to_id[alias.strip().lower()] = db_id
+    return name_to_id
+
+
+def _find_relevant_entities(batch_texts: list[str], entities: list[dict]) -> list[dict]:
+    """Findet Entitäten, die in den Batch-Texten vorkommen."""
+    combined_text = " ".join(batch_texts).lower()
+    relevant = []
+    for ent in entities:
+        if ent["name"].lower() in combined_text or ent["name_normalized"] in combined_text:
+            relevant.append(ent)
+            continue
+        for alias in ent.get("aliases", []):
+            if alias and alias.strip().lower() in combined_text:
+                relevant.append(ent)
+                break
+    return relevant
+
+
 async def _phase2_analyze_relationships(
    db, analysis_id: int, tenant_id: int,
    entities: list[dict], articles: list[dict], factchecks: list[dict],
    usage_acc: UsageAccumulator, ws_manager=None,
 ) -> list[dict]:
-    """Analysiert Beziehungen via Opus und wendet Korrekturen an."""
+    """Analysiert Beziehungen batch-weise und merged die Ergebnisse."""
    if not entities:
        return []

-    logger.info(f"Phase 2: {len(entities)} Entitäten, Beziehungsanalyse")
+    logger.info(f"Phase 2: {len(entities)} Entitäten, batched Beziehungsanalyse")

    await _broadcast(ws_manager, "network_status", {
        "analysis_id": analysis_id,
@@ -317,95 +354,125 @@ async def _phase2_analyze_relationships(
        "progress": 0,
    })

-    # Entitäten für Prompt
-    entities_for_prompt = [
-        {"name": e["name"], "type": e["type"],
-         "description": e.get("description", ""), "aliases": e.get("aliases", [])}
-        for e in entities
-    ]
-    entities_json = json.dumps(entities_for_prompt, ensure_ascii=False, indent=2)
-
-    # Quelltexte
-    source_parts = []
+    # --- Texte vorbereiten (gleiche Logik wie Phase 1) ---
+    all_texts = []
    for art in articles:
        headline = art.get("headline_de") or art.get("headline") or ""
        content = art.get("content_de") or art.get("content_original") or ""
        source = art.get("source") or ""
-        source_parts.append(f"[{source}] {headline}\n{content}")
+        if len(content) > 2000:
+            content = content[:2000] + "..."
+        all_texts.append(f"[{source}] {headline}\n{content}")

    for fc in factchecks:
        claim = fc.get("claim") or ""
        evidence = fc.get("evidence") or ""
-        source_parts.append(f"[Faktencheck] {claim}\n{evidence}")
+        status = fc.get("status") or ""
+        all_texts.append(f"[Faktencheck] {claim} (Status: {status})\n{evidence}")

-    source_texts = "\n\n---\n\n".join(source_parts)
+    if not all_texts:
+        return []

-    # Kürzen falls zu lang
-    if len(source_texts) > 150_000:
-        logger.info(f"Quelltexte zu lang ({len(source_texts)} Zeichen), kürze")
-        short_parts = []
-        for art in articles:
-            headline = art.get("headline_de") or art.get("headline") or ""
-            content = art.get("content_de") or art.get("content_original") or ""
-            short = content[:500] + "..." if len(content) > 500 else content
-            short_parts.append(f"[{art.get('source', '')}] {headline}: {short}")
-        for fc in factchecks:
-            short_parts.append(f"[Faktencheck] {fc.get('claim', '')} (Status: {fc.get('status', '')})")
-        source_texts = "\n\n".join(short_parts)
+    # --- Stufe A: Per-Batch Beziehungsextraktion ---
+    batch_size = 30
+    batches = [all_texts[i:i + batch_size] for i in range(0, len(all_texts), batch_size)]
+    logger.info(f"Stufe A: {len(batches)} Batches für Beziehungsextraktion")

-    prompt = RELATIONSHIP_ANALYSIS_PROMPT.format(
-        entities_json=entities_json, source_texts=source_texts,
+    all_raw_relations: list[dict] = []
+    name_to_id = _build_entity_name_map(entities)
+
+    for batch_idx, batch in enumerate(batches):
+        relevant = _find_relevant_entities(batch, entities)
+        if len(relevant) < 2:
+            logger.debug(f"Batch {batch_idx + 1}: Weniger als 2 Entitäten, überspringe")
+            await _broadcast(ws_manager, "network_status", {
+                "analysis_id": analysis_id,
+                "phase": "relationship_extraction",
+                "progress": int((batch_idx + 1) / len(batches) * 70),
+            })
+            continue
+
+        entities_for_prompt = [
+            {"name": e["name"], "type": e["type"]}
+            for e in relevant
+        ]
+        entities_json = json.dumps(entities_for_prompt, ensure_ascii=False)
+        articles_text = "\n\n---\n\n".join(batch)
+
+        prompt = RELATIONSHIP_BATCH_PROMPT.format(
+            entities_json=entities_json,
+            articles_text=articles_text,
        )

        try:
-        result_text, usage = await call_claude(prompt, tools=None, model=None)
+            result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
            usage_acc.add(usage)
        except Exception as e:
-        logger.error(f"Opus Beziehungsanalyse fehlgeschlagen: {e}")
-        return []
+            logger.error(f"Relationship Batch {batch_idx + 1}/{len(batches)} fehlgeschlagen: {e}")
+            continue

        parsed = _parse_json_response(result_text)
        if not parsed:
-        logger.warning("Kein gültiges JSON von Opus")
-        return []
+            logger.warning(f"Batch {batch_idx + 1}: Kein gültiges JSON")
+            continue

-    # Korrekturen anwenden
-    corrections = parsed.get("corrections", [])
-    if corrections and isinstance(corrections, list):
-        await _broadcast(ws_manager, "network_status", {
-            "analysis_id": analysis_id,
-            "phase": "correction",
-            "progress": 0,
-        })
-        await _apply_corrections(db, analysis_id, tenant_id, entities, corrections)
-
-    # Beziehungen speichern
        relations = parsed.get("relations", [])
        if not isinstance(relations, list):
-        return []
-
-    name_to_id = _build_entity_name_map(entities)
-    valid_categories = {"alliance", "conflict", "diplomacy", "economic", "legal", "neutral"}
-    saved_relations = []
+            continue

+        batch_count = 0
        for rel in relations:
            if not isinstance(rel, dict):
                continue
-
            source_name = (rel.get("source") or "").strip()
            target_name = (rel.get("target") or "").strip()
            if not source_name or not target_name:
                continue
+            rel["_batch"] = batch_idx
+            all_raw_relations.append(rel)
+            batch_count += 1
+
+        logger.info(f"Batch {batch_idx + 1}/{len(batches)}: {batch_count} Beziehungen, {len(relevant)} Entitäten")
+
+        await _broadcast(ws_manager, "network_status", {
+            "analysis_id": analysis_id,
+            "phase": "relationship_extraction",
+            "progress": int((batch_idx + 1) / len(batches) * 70),
+        })
+
+    logger.info(f"Stufe A abgeschlossen: {len(all_raw_relations)} rohe Beziehungen aus {len(batches)} Batches")
+
+    # --- Stufe B: Merge + Deduplizierung ---
+    logger.info("Stufe B: Merge und Deduplizierung")
+    await _broadcast(ws_manager, "network_status", {
+        "analysis_id": analysis_id,
+        "phase": "relationship_extraction",
+        "progress": 75,
+    })
+
+    valid_categories = {"alliance", "conflict", "diplomacy", "economic", "legal", "neutral"}
+    merged: dict[tuple[int, int, str], dict] = {}
+
+    for rel in all_raw_relations:
+        source_name = (rel.get("source") or "").strip()
+        target_name = (rel.get("target") or "").strip()

        source_id = name_to_id.get(source_name.lower())
        target_id = name_to_id.get(target_name.lower())
        if not source_id or not target_id or source_id == target_id:
            continue

+        # Normalisiere Richtung um A->B und B->A zu mergen
+        if source_id > target_id:
+            source_id, target_id = target_id, source_id
+            source_name, target_name = target_name, source_name
+
        category = (rel.get("category") or "neutral").lower().strip()
        if category not in valid_categories:
            category = "neutral"

+        key = (source_id, target_id, category)
+
        weight = rel.get("weight", 3)
        try:
            weight = max(1, min(5, int(weight)))
@@ -420,6 +487,55 @@ async def _phase2_analyze_relationships(
        if not isinstance(evidence, list):
            evidence = []

+        if key in merged:
+            existing = merged[key]
+            existing["weight"] = max(existing["weight"], weight)
+            existing_evidence = set(existing["evidence"])
+            for ev in evidence:
+                if isinstance(ev, str) and ev.strip() and ev.strip() not in existing_evidence:
+                    existing["evidence"].append(ev.strip())
+                    existing_evidence.add(ev.strip())
+            if len(existing["evidence"]) > 10:
+                existing["evidence"] = existing["evidence"][:10]
+            status_priority = {"active": 3, "emerging": 2, "historical": 1}
+            if status_priority.get(status, 0) > status_priority.get(existing["status"], 0):
+                existing["status"] = status
+            new_desc = rel.get("description", "")
+            if len(new_desc) > len(existing.get("description", "")):
+                existing["description"] = new_desc
+                existing["label"] = rel.get("label", existing["label"])
+            existing["_count"] = existing.get("_count", 1) + 1
+        else:
+            merged[key] = {
+                "source_id": source_id,
+                "target_id": target_id,
+                "source_name": source_name,
+                "target_name": target_name,
+                "category": category,
+                "label": rel.get("label", ""),
+                "description": rel.get("description", ""),
+                "weight": weight,
+                "status": status,
+                "evidence": [ev.strip() for ev in evidence if isinstance(ev, str) and ev.strip()][:10],
+                "_count": 1,
+            }
+
+    logger.info(f"Stufe B abgeschlossen: {len(all_raw_relations)} roh -> {len(merged)} gemerged")
+
+    # Gewichts-Boost für mehrfach belegte Beziehungen
+    for m in merged.values():
+        if m["_count"] >= 3 and m["weight"] < 5:
+            m["weight"] = min(5, m["weight"] + 1)
+
+    # --- In DB speichern ---
+    await _broadcast(ws_manager, "network_status", {
+        "analysis_id": analysis_id,
+        "phase": "relationship_extraction",
+        "progress": 85,
+    })
+
+    saved_relations = []
+    for m in merged.values():
        try:
            cursor = await db.execute(
                """INSERT INTO network_relations
@@ -427,10 +543,9 @@ async def _phase2_analyze_relationships(
                    category, label, description, weight, status, evidence, tenant_id)
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
-                    analysis_id, source_id, target_id, category,
-                    rel.get("label", ""), rel.get("description", ""),
-                    weight, status,
-                    json.dumps(evidence, ensure_ascii=False),
+                    analysis_id, m["source_id"], m["target_id"], m["category"],
+                    m["label"], m["description"], m["weight"], m["status"],
+                    json.dumps(m["evidence"], ensure_ascii=False),
                    tenant_id,
                ),
            )
@@ -439,7 +554,7 @@ async def _phase2_analyze_relationships(
            logger.warning(f"Beziehung speichern fehlgeschlagen: {e}")

    await db.commit()
-    logger.info(f"Phase 2 abgeschlossen: {len(corrections)} Korrekturen, {len(saved_relations)} Beziehungen")
+    logger.info(f"Phase 2 abgeschlossen: {len(saved_relations)} Beziehungen gespeichert")

    await _broadcast(ws_manager, "network_status", {
        "analysis_id": analysis_id,
@@ -450,19 +565,71 @@ async def _phase2_analyze_relationships(
    return saved_relations


-def _build_entity_name_map(entities: list[dict]) -> dict[str, int]:
-    """Mapping: normalisierter Name/Alias -> DB-ID."""
-    name_to_id: dict[str, int] = {}
-    for ent in entities:
-        db_id = ent.get("db_id")
-        if not db_id:
+# ---------------------------------------------------------------------------
+# Phase 2b: Korrekturen (Opus)
+# ---------------------------------------------------------------------------
+
+async def _phase2b_corrections(
+    db, analysis_id: int, tenant_id: int,
+    entities: list[dict], relation_count: int,
+    usage_acc: UsageAccumulator, ws_manager=None,
+) -> None:
+    """Opus prüft die Entitäten auf Fehler (Merge, Name-Fix, Add)."""
+    if len(entities) < 10:
+        return
+
+    logger.info(f"Phase 2b: Opus-Korrekturpass für {len(entities)} Entitäten")
+
+    await _broadcast(ws_manager, "network_status", {
+        "analysis_id": analysis_id,
+        "phase": "correction",
+        "progress": 0,
+    })
+
+    entities_for_prompt = []
+    for e in entities:
+        entities_for_prompt.append({
+            "name": e["name"],
+            "type": e["type"],
+            "description": e.get("description", ""),
+            "aliases": e.get("aliases", []),
+        })
+
+    batch_size = 500
+    entity_batches = [entities_for_prompt[i:i + batch_size]
+                      for i in range(0, len(entities_for_prompt), batch_size)]
+
+    all_corrections = []
+    for bi, eb in enumerate(entity_batches):
+        entities_json = json.dumps(eb, ensure_ascii=False, indent=1)
+        prompt = CORRECTION_PROMPT.format(entities_json=entities_json)
+
+        try:
+            result_text, usage = await call_claude(prompt, tools=None, model=None)
+            usage_acc.add(usage)
+        except Exception as e:
+            logger.error(f"Opus Korrektur-Batch {bi + 1} fehlgeschlagen: {e}")
            continue
-        name_to_id[ent["name"].lower()] = db_id
-        name_to_id[ent["name_normalized"]] = db_id
-        for alias in ent.get("aliases", []):
-            if alias and alias.strip():
-                name_to_id[alias.strip().lower()] = db_id
-    return name_to_id
+
+        parsed = _parse_json_response(result_text)
+        if not parsed:
+            continue
+
+        corrections = parsed.get("corrections", [])
+        if isinstance(corrections, list):
+            all_corrections.extend(corrections)
+
+        logger.info(f"Korrektur-Batch {bi + 1}/{len(entity_batches)}: {len(corrections)} Korrekturen")
+
+    if all_corrections:
+        await _apply_corrections(db, analysis_id, tenant_id, entities, all_corrections)
+        logger.info(f"Phase 2b abgeschlossen: {len(all_corrections)} Korrekturen angewendet")
+
+    await _broadcast(ws_manager, "network_status", {
+        "analysis_id": analysis_id,
+        "phase": "correction",
+        "progress": 100,
+    })


 async def _apply_corrections(db, analysis_id, tenant_id, entities, corrections):
@@ -523,11 +690,18 @@ async def _apply_corrections(db, analysis_id, tenant_id, entities, corrections):
                    keep_ent["aliases"] = list(aliases)
                    keep_ent["mention_count"] = keep_ent.get("mention_count", 0) + merge_ent.get("mention_count", 0)

-                    # Mentions übertragen
                    await db.execute(
                        "UPDATE network_entity_mentions SET entity_id = ? WHERE entity_id = ?",
                        (keep_ent["db_id"], merge_ent["db_id"]),
                    )
+                    await db.execute(
+                        "UPDATE network_relations SET source_entity_id = ? WHERE source_entity_id = ? AND network_analysis_id = ?",
+                        (keep_ent["db_id"], merge_ent["db_id"], analysis_id),
+                    )
+                    await db.execute(
+                        "UPDATE network_relations SET target_entity_id = ? WHERE target_entity_id = ? AND network_analysis_id = ?",
+                        (keep_ent["db_id"], merge_ent["db_id"], analysis_id),
+                    )
                    await db.execute(
                        """UPDATE network_entities SET aliases = ?, mention_count = ?, corrected_by_opus = 1
                           WHERE id = ?""",
@@ -625,8 +799,9 @@ async def _phase3_finalize(
 async def extract_and_relate_entities(analysis_id: int, tenant_id: int, ws_manager=None):
    """Hauptfunktion: Entity-Extraktion + Beziehungsanalyse.

-    Phase 1: Haiku extrahiert Entitäten aus Artikeln
-    Phase 2: Opus analysiert Beziehungen und korrigiert Haiku-Fehler
+    Phase 1: Haiku extrahiert Entitäten aus Artikeln (in Batches)
+    Phase 2: Haiku extrahiert Beziehungen pro Batch, dann Merge + Deduplizierung
+    Phase 2b: Opus korrigiert Entitäten (Name-Fix, Merge, Add)
    Phase 3: Finalisierung (Zähler, Hash, Log)
    """
    from database import get_db
@@ -720,13 +895,29 @@ async def extract_and_relate_entities(analysis_id: int, tenant_id: int, ws_manag
            db, analysis_id, tenant_id, entities, articles, factchecks, usage_acc, ws_manager,
        )

+        # Phase 2b: Korrekturen
+        if not await _check_analysis_exists(db, analysis_id):
+            return
+
+        await _phase2b_corrections(
+            db, analysis_id, tenant_id, entities, len(relations), usage_acc, ws_manager,
+        )
+
+        # Entity-Count nach Korrekturen aktualisieren
+        cursor = await db.execute(
+            "SELECT COUNT(*) as cnt FROM network_entities WHERE network_analysis_id = ?",
+            (analysis_id,),
+        )
+        row = await cursor.fetchone()
+        final_entity_count = row["cnt"] if row else len(entities)
+
        # Phase 3
        if not await _check_analysis_exists(db, analysis_id):
            return

        await _phase3_finalize(
            db, analysis_id, tenant_id,
-            entity_count=len(entities), relation_count=len(relations),
+            entity_count=final_entity_count, relation_count=len(relations),
            article_ids=article_ids, factcheck_ids=factcheck_ids,
            article_ts=article_ts, factcheck_ts=factcheck_ts,
            usage_acc=usage_acc, ws_manager=ws_manager,
--- a/src/database.py
+++ b/src/database.py
@@ -464,6 +464,13 @@ async def init_db():
            await db.execute("ALTER TABLE users ADD COLUMN is_active INTEGER DEFAULT 1")
            await db.commit()

+        # Migration: Tutorial-Fortschritt pro User
+        if "tutorial_step" not in user_columns:
+            await db.execute("ALTER TABLE users ADD COLUMN tutorial_step INTEGER DEFAULT NULL")
+            await db.execute("ALTER TABLE users ADD COLUMN tutorial_completed INTEGER DEFAULT 0")
+            await db.commit()
+            logger.info("Migration: tutorial_step + tutorial_completed zu users hinzugefuegt")
+
        if "last_login_at" not in user_columns:
            await db.execute("ALTER TABLE users ADD COLUMN last_login_at TIMESTAMP")
            await db.commit()
--- a/src/main.py
+++ b/src/main.py
@@ -333,6 +333,7 @@ from routers.feedback import router as feedback_router
 from routers.public_api import router as public_api_router
 from routers.chat import router as chat_router
 from routers.network_analysis import router as network_analysis_router
+from routers.tutorial import router as tutorial_router

 app.include_router(auth_router)
 app.include_router(incidents_router)
@@ -342,6 +343,7 @@ app.include_router(feedback_router)
 app.include_router(public_api_router)
 app.include_router(chat_router, prefix="/api/chat")
 app.include_router(network_analysis_router)
+app.include_router(tutorial_router)


@app.websocket("/api/ws")
--- a/src/routers/tutorial.py
+++ b/src/routers/tutorial.py
@@ -0,0 +1,77 @@
+"""Tutorial-Router: Fortschritt serverseitig pro User speichern."""
+import logging
+from fastapi import APIRouter, Depends
+from auth import get_current_user
+from database import db_dependency
+import aiosqlite
+
+logger = logging.getLogger("osint.tutorial")
+
+router = APIRouter(prefix="/api/tutorial", tags=["tutorial"])
+
+
+@router.get("/state")
+async def get_tutorial_state(
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Tutorial-Fortschritt des aktuellen Nutzers abrufen."""
+    cursor = await db.execute(
+        "SELECT tutorial_step, tutorial_completed FROM users WHERE id = ?",
+        (current_user["id"],),
+    )
+    row = await cursor.fetchone()
+    if not row:
+        return {"current_step": None, "completed": False}
+    return {
+        "current_step": row["tutorial_step"],
+        "completed": bool(row["tutorial_completed"]),
+    }
+
+
+@router.put("/state")
+async def save_tutorial_state(
+    body: dict,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Tutorial-Fortschritt speichern (current_step und/oder completed)."""
+    updates = []
+    params = []
+
+    if "current_step" in body:
+        step = body["current_step"]
+        if step is not None and (not isinstance(step, int) or step < 0 or step > 31):
+            from fastapi import HTTPException
+            raise HTTPException(status_code=422, detail="current_step muss 0-31 oder null sein")
+        updates.append("tutorial_step = ?")
+        params.append(step)
+
+    if "completed" in body:
+        updates.append("tutorial_completed = ?")
+        params.append(1 if body["completed"] else 0)
+
+    if not updates:
+        return {"ok": True}
+
+    params.append(current_user["id"])
+    await db.execute(
+        f"UPDATE users SET {', '.join(updates)} WHERE id = ?",
+        params,
+    )
+    await db.commit()
+    return {"ok": True}
+
+
+@router.delete("/state")
+async def reset_tutorial_state(
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Tutorial-Fortschritt zuruecksetzen (fuer Neustart)."""
+    await db.execute(
+        "UPDATE users SET tutorial_step = NULL, tutorial_completed = 0 WHERE id = ?",
+        (current_user["id"],),
+    )
+    await db.commit()
+    return {"ok": True}
--- a/src/static/js/api.js
+++ b/src/static/js/api.js
@@ -215,6 +215,19 @@ const API = {
    },

    // Export
+
+    // Tutorial-Fortschritt
+    getTutorialState() {
+        return this._request('GET', '/tutorial/state');
+    },
+
+    saveTutorialState(data) {
+        return this._request('PUT', '/tutorial/state', data);
+    },
+
+    resetTutorialState() {
+        return this._request('DELETE', '/tutorial/state');
+    },
    exportIncident(id, format, scope) {
        const token = localStorage.getItem('osint_token');
        return fetch(`${this.baseUrl}/incidents/${id}/export?format=${format}&scope=${scope}`, {
--- a/src/static/js/chat.js
+++ b/src/static/js/chat.js
@@ -6,6 +6,7 @@ const Chat = {
    _isOpen: false,
    _isLoading: false,
    _hasGreeted: false,
+    _tutorialHintDismissed: false,
    _isFullscreen: false,

    init() {
@@ -64,10 +65,13 @@ const Chat = {
        if (!this._hasGreeted) {
            this._hasGreeted = true;
            this.addMessage('assistant', 'Hallo! Ich bin der AegisSight Assistent. Stell mir gerne jede Frage rund um die Bedienung des Monitors, ich helfe dir weiter.');
-            // Tutorial-Hinweis beim ersten Chat-Oeffnen der Session
-            if (typeof Tutorial !== 'undefined' && !sessionStorage.getItem('osint_tutorial_hint_dismissed')) {
-                this._showTutorialHint();
        }
+
+        // Tutorial-Hinweis bei jedem Oeffnen aktualisieren (wenn nicht dismissed)
+        if (typeof Tutorial !== 'undefined' && !this._tutorialHintDismissed) {
+            var oldHint = document.getElementById('chat-tutorial-hint');
+            if (oldHint) oldHint.remove();
+            this._showTutorialHint();
        }

        // Focus auf Input
@@ -288,29 +292,57 @@ const Chat = {
        }
    },

-    _showTutorialHint() {
+    async _showTutorialHint() {
        var container = document.getElementById('chat-messages');
        if (!container) return;
+
+        // API-State laden (Fallback: Standard-Hint)
+        var state = null;
+        try { state = await API.getTutorialState(); } catch(e) {}
+
        var hint = document.createElement('div');
        hint.className = 'chat-tutorial-hint';
        hint.id = 'chat-tutorial-hint';
        var textDiv = document.createElement('div');
        textDiv.className = 'chat-tutorial-hint-text';
-        textDiv.innerHTML = '<strong>Tipp:</strong> Kennen Sie schon den interaktiven Rundgang? Er zeigt Ihnen Schritt f\u00fcr Schritt alle Funktionen des Monitors. Klicken Sie hier, um ihn zu starten.';
        textDiv.style.cursor = 'pointer';
+
+        if (state && !state.completed && state.current_step !== null && state.current_step > 0) {
+            // Mittendrin abgebrochen
+            var totalSteps = (typeof Tutorial !== 'undefined') ? Tutorial._steps.length : 32;
+            textDiv.innerHTML = '<strong>Tipp:</strong> Sie haben den Rundgang bei Schritt ' + (state.current_step + 1) + '/' + totalSteps + ' unterbrochen. Klicken Sie hier, um fortzusetzen.';
            textDiv.addEventListener('click', function() {
                Chat.close();
-            sessionStorage.setItem('osint_tutorial_hint_dismissed', '1');
+                Chat._tutorialHintDismissed = true;
                if (typeof Tutorial !== 'undefined') Tutorial.start();
            });
+        } else if (state && state.completed) {
+            // Bereits abgeschlossen
+            textDiv.innerHTML = '<strong>Tipp:</strong> Sie haben den Rundgang bereits abgeschlossen. <span style="text-decoration:underline;">Erneut starten?</span>';
+            textDiv.addEventListener('click', async function() {
+                Chat.close();
+                Chat._tutorialHintDismissed = true;
+                try { await API.resetTutorialState(); } catch(e) {}
+                if (typeof Tutorial !== 'undefined') Tutorial.start(true);
+            });
+        } else {
+            // Nie gestartet
+            textDiv.innerHTML = '<strong>Tipp:</strong> Kennen Sie schon den interaktiven Rundgang? Er zeigt Ihnen Schritt für Schritt alle Funktionen des Monitors. Klicken Sie hier, um ihn zu starten.';
+            textDiv.addEventListener('click', function() {
+                Chat.close();
+                Chat._tutorialHintDismissed = true;
+                if (typeof Tutorial !== 'undefined') Tutorial.start();
+            });
+        }
+
        var closeBtn = document.createElement('button');
        closeBtn.className = 'chat-tutorial-hint-close';
-        closeBtn.title = 'Schlie\u00dfen';
+        closeBtn.title = 'Schließen';
        closeBtn.innerHTML = '&times;';
        closeBtn.addEventListener('click', function(e) {
            e.stopPropagation();
            hint.remove();
-            sessionStorage.setItem('osint_tutorial_hint_dismissed', '1');
+            Chat._tutorialHintDismissed = true;
        });
        hint.appendChild(textDiv);
        hint.appendChild(closeBtn);
--- a/src/static/js/tutorial.js
+++ b/src/static/js/tutorial.js
@@ -12,6 +12,7 @@ const Tutorial = {
    _resizeHandler: null,
    _demoRunning: false,
    _lastExitedStep: -1,
+    _highestStep: -1,
    _stepTimers: [],      // setTimeout-IDs fuer den aktuellen Step
    _savedState: null,   // Dashboard-Zustand vor dem Tutorial

@@ -1152,14 +1153,52 @@ const Tutorial = {
    // -----------------------------------------------------------------------
    // Lifecycle
    // -----------------------------------------------------------------------
-    start() {
+    async start(forceRestart) {
        if (this._isActive) return;
-        this._isActive = true;
-        this._currentStep = -1;

-        // Chat schließen falls offen
+        // Chat schliessen falls offen
        if (typeof Chat !== 'undefined' && Chat._isOpen) Chat.close();

+        // Server-State laden (Fallback: direkt starten)
+        var state = null;
+        try { state = await API.getTutorialState(); } catch(e) {}
+
+        // Resume-Dialog wenn mittendrin abgebrochen
+        if (!forceRestart && state && !state.completed && state.current_step !== null && state.current_step > 0) {
+            this._showResumeDialog(state.current_step);
+            return;
+        }
+
+        this._startInternal(forceRestart ? 0 : null);
+    },
+
+    _showResumeDialog(step) {
+        var self = this;
+        var overlay = document.createElement('div');
+        overlay.className = 'tutorial-resume-overlay';
+        overlay.innerHTML = '<div class=tutorial-resume-dialog>'
+            + '<p>Sie haben den Rundgang bei <strong>Schritt ' + (step + 1) + '/' + this._steps.length + '</strong> unterbrochen.</p>'
+            + '<div class=tutorial-resume-actions>'
+            + '<button class=tutorial-btn tutorial-btn-next id=tutorial-resume-btn>Fortsetzen</button>'
+            + '<button class=tutorial-btn tutorial-btn-secondary id=tutorial-restart-btn>Neu starten</button>'
+            + '</div></div>';
+        document.body.appendChild(overlay);
+        document.getElementById('tutorial-resume-btn').addEventListener('click', function() {
+            overlay.remove();
+            self._startInternal(step);
+        });
+        document.getElementById('tutorial-restart-btn').addEventListener('click', async function() {
+            overlay.remove();
+            try { await API.resetTutorialState(); } catch(e) {}
+            self._startInternal(0);
+        });
+    },
+
+    _startInternal(resumeStep) {
+        this._isActive = true;
+        this._highestStep = -1;
+        this._currentStep = -1;
+
        // Overlay einblenden + Klicks blockieren
        this._els.overlay.classList.add('active');
        document.body.classList.add('tutorial-active');
@@ -1172,7 +1211,11 @@ const Tutorial = {
        this._resizeHandler = this._onResize.bind(this);
        window.addEventListener('resize', this._resizeHandler);

+        if (resumeStep && resumeStep > 0) {
+            this.goToStep(resumeStep);
+        } else {
            this.next();
+        }
    },

    stop() {
@@ -1230,7 +1273,14 @@ const Tutorial = {
            this._resizeHandler = null;
        }

+        // Fortschritt serverseitig speichern
+        if (this._lastExitedStep >= 0 && this._lastExitedStep < this._steps.length - 1) {
+            // Mittendrin abgebrochen — Schritt speichern
+            API.saveTutorialState({ current_step: this._lastExitedStep }).catch(function() {});
+        } else {
+            // Komplett durchlaufen oder letzter Schritt
            this._markSeen();
+        }
    },

    // -----------------------------------------------------------------------
@@ -1258,6 +1308,7 @@ const Tutorial = {

        if (this._currentStep >= 0) this._exitStep(this._currentStep);
        this._currentStep = index;
+        if (index > this._highestStep) this._highestStep = index;
        this._enterStep(index);
    },

@@ -2231,6 +2282,7 @@ const Tutorial = {
    // -----------------------------------------------------------------------
    _markSeen() {
        try { localStorage.setItem('osint_tutorial_seen', '1'); } catch(e) {}
+        API.saveTutorialState({ completed: true, current_step: null }).catch(function() {});
    },

    _hasSeen() {