AegisSight-Monitor/src/agents/orchestrator.py

"""Agenten-Orchestrierung: Queue und Steuerung der Claude-Agenten."""
import asyncio
import json
import logging
import re
from datetime import datetime
from config import TIMEZONE
from typing import Optional
from urllib.parse import urlparse, urlunparse, quote_plus

import httpx

from agents.claude_client import UsageAccumulator, _cancel_event_var
from agents.factchecker import find_matching_claim, deduplicate_new_facts, TWOPHASE_MIN_FACTS
from source_rules import (
    _detect_category,
    _extract_domain,
    discover_source,
    domain_to_display_name,
)

logger = logging.getLogger("osint.orchestrator")

# Reputations-Score nach Quellenkategorie (fuer Relevanz-Scoring).
# Keys muessen mit den tatsaechlichen DB-Werten in sources.category uebereinstimmen
# (siehe DOMAIN_CATEGORY_MAP in source_rules.py).
CATEGORY_REPUTATION = {
    "nachrichtenagentur":    1.0,   # Reuters, AP, dpa, AFP — Primärquellen
    "behoerde":              1.0,   # BMI, BSI, Europol — offizielle Quellen
    "oeffentlich-rechtlich": 0.95,  # tagesschau, ZDF, ARD, BBC, ORF
    "qualitaetszeitung":     0.85,  # Spiegel, Zeit, FAZ, NZZ, Süddeutsche
    "think-tank":            0.85,  # SWP, IISS, Brookings, Chatham House
    "fachmedien":            0.8,   # heise, golem, netzpolitik, Handelsblatt
    "international":         0.75,  # CNN, Guardian, NYT, Al Jazeera, France24
    "regional":              0.65,  # regionale Tageszeitungen
    "telegram":              0.5,   # OSINT-Kanaele — gemischte Qualitaet
    "sonstige":              0.4,   # unkategorisiert
    "boulevard":             0.3,   # Bild, Sun etc.
}

# Research-Modus: Automatisch 3 Durchläufe für optimale Ergebnisse
RESEARCH_MULTI_PASS_COUNT = 3
RESEARCH_PASS_LABELS = {1: "Breite Erfassung", 2: "Vertiefung", 3: "Konsolidierung"}


def _normalize_url(url: str) -> str:
    """URL normalisieren für Duplikat-Erkennung."""
    if not url:
        return ""
    url = url.strip()
    try:
        parsed = urlparse(url)
        # Scheme normalisieren
        scheme = parsed.scheme.lower() or "https"
        # Host normalisieren (www entfernen, lowercase)
        netloc = parsed.netloc.lower()
        if netloc.startswith("www."):
            netloc = netloc[4:]
        # Pfad normalisieren (trailing slash entfernen)
        path = parsed.path.rstrip("/")
        # Query-Parameter und Fragment entfernen (Tracking-Params etc.)
        return urlunparse((scheme, netloc, path, "", "", ""))
    except Exception:
        return url.lower().strip().rstrip("/")


def _normalize_headline(headline: str) -> str:
    """Überschrift normalisieren für Ähnlichkeitsvergleich."""
    if not headline:
        return ""
    h = headline.lower().strip()
    # Umlaute normalisieren
    h = h.replace("ä", "ae").replace("ö", "oe").replace("ü", "ue").replace("ß", "ss")
    # Sonderzeichen entfernen
    h = re.sub(r"[^\w\s]", "", h)
    h = re.sub(r"\s+", " ", h).strip()
    return h


def _is_duplicate(article: dict, seen_urls: set, seen_headlines: set) -> bool:
    """Prüft ob ein Artikel ein Duplikat ist (URL oder Headline)."""
    url = article.get("source_url", "")
    headline = article.get("headline", "")

    # URL-Duplikat
    if url:
        norm_url = _normalize_url(url)
        if norm_url in seen_urls:
            return True
        seen_urls.add(norm_url)

    # Headline-Duplikat (nur wenn Überschrift lang genug)
    if headline and len(headline) > 20:
        norm_headline = _normalize_headline(headline)
        if norm_headline and norm_headline in seen_headlines:
            return True
        if norm_headline:
            seen_headlines.add(norm_headline)

    return False


def _score_relevance(article: dict, search_words: list[str] = None) -> float:
    """Berechnet einen Relevanz-Score (0.0-1.0) für einen Artikel.

    Gewichtung:
    - 40% Keyword-Dichte (wie gut passt der Artikel zum Suchbegriff)
    - 30% Quellen-Reputation (basierend auf Kategorie)
    - 20% Inhaltstiefe (hat der Artikel substantiellen Inhalt)
    - 10% RSS-Score (falls vorhanden, vom RSS-Parser)
    """
    score = 0.0

    # 1. Keyword-Dichte (40%)
    rss_score = article.get("relevance_score", 0.0)
    if rss_score > 0:
        score += 0.4 * rss_score
    elif search_words:
        text = f"{article.get('headline', '')} {article.get('content_original', '')}".lower()
        match_count = sum(1 for w in search_words if w in text)
        score += 0.4 * (match_count / len(search_words)) if search_words else 0.0

    # 2. Quellen-Reputation (30%)
    source_url = article.get("source_url", "")
    if source_url:
        domain = _extract_domain(source_url)
        category = _detect_category(domain)
        score += 0.3 * CATEGORY_REPUTATION.get(category, 0.4)
    else:
        score += 0.3 * 0.4  # Unbekannte Quelle

    # 3. Inhaltstiefe (20%)
    content = article.get("content_original") or article.get("content_de") or ""
    if len(content) > 500:
        score += 0.2
    elif len(content) > 200:
        score += 0.1
    elif len(content) > 50:
        score += 0.05

    # 4. RSS-Score Bonus (10%)
    score += 0.1 * rss_score

    return min(1.0, score)


async def _verify_article_urls(
    articles: list[dict],
    concurrency: int = 10,
    timeout: float = 8.0,
) -> list[dict]:
    """Prueft WebSearch-URLs auf Erreichbarkeit. Ersetzt unerreichbare URLs durch Suchlinks."""
    if not articles:
        return []

    sem = asyncio.Semaphore(concurrency)
    results: list[dict | None] = [None] * len(articles)

    async def _check(idx: int, article: dict, client: httpx.AsyncClient):
        url = article.get("source_url", "").strip()
        if not url:
            results[idx] = article  # Kein URL -> behalten (wird eh nicht verlinkt)
            return
        async with sem:
            try:
                resp = await client.head(url)
                if resp.status_code == 405:
                    # Manche Server unterstuetzen kein HEAD
                    resp = await client.get(url, headers={"Range": "bytes=0-0"})
                if 200 <= resp.status_code < 400:
                    results[idx] = article
                    return
                # 404 oder anderer Fehler -> Fallback-Suchlink
                logger.info(f"URL-Verifizierung: {resp.status_code} fuer {url}")
            except Exception as e:
                logger.debug(f"URL-Verifizierung fehlgeschlagen fuer {url}: {e}")

            # Fallback: Google-Suchlink aus Headline + Source-Domain
            headline = article.get("headline", "")
            source = article.get("source", "")
            domain = ""
            try:
                from urllib.parse import urlparse as _urlparse
                domain = _urlparse(url).netloc
            except Exception:
                pass
            if headline:
                search_query = f"site:{domain} {headline}" if domain else f"{source} {headline}"
                fallback_url = f"https://www.google.com/search?q={quote_plus(search_query)}"
                article_copy = dict(article)
                article_copy["source_url"] = fallback_url
                article_copy["_url_repaired"] = True
                results[idx] = article_copy
                logger.info(f"URL-Fallback: {url} -> Google-Suche fuer \"{headline[:60]}...\"")
            else:
                results[idx] = article  # Kein Headline -> Original behalten

    async with httpx.AsyncClient(
        timeout=timeout,
        follow_redirects=True,
        headers={"User-Agent": "Mozilla/5.0 (compatible; AegisSight-Monitor/1.0)"},
    ) as client:
        await asyncio.gather(*[_check(i, a, client) for i, a in enumerate(articles)])

    verified = [r for r in results if r is not None]
    repaired = sum(1 for r in verified if r.get("_url_repaired"))
    ok = len(verified) - repaired

    if repaired > 0:
        logger.warning(
            f"URL-Verifizierung: {ok} OK, {repaired} durch Suchlinks ersetzt "
            f"(von {len(articles)} WebSearch-Artikeln)"
        )
    else:
        logger.info(f"URL-Verifizierung: Alle {len(articles)} WebSearch-URLs erreichbar")

    return verified


async def _background_discover_sources(articles: list[dict]):
    """Background-Task: Registriert seriöse, unbekannte Quellen aus Recherche-Ergebnissen."""
    from database import get_db

    db = await get_db()
    try:
        # 1. Unique Domains extrahieren
        seen_domains: set[str] = set()
        domains_to_check: list[tuple[str, str, str]] = []
        for article in articles:
            url = article.get("source_url")
            if not url:
                continue
            domain = _extract_domain(url)
            if not domain or domain in seen_domains:
                continue
            seen_domains.add(domain)

            # 2. Nur seriöse Domains (in DOMAIN_CATEGORY_MAP, nicht "sonstige")
            category = _detect_category(domain)
            if category == "sonstige":
                continue
            domains_to_check.append((domain, url, category))

        if not domains_to_check:
            return

        # 3. Gegen DB prüfen — welche Domains existieren schon?
        new_count = 0
        for domain, url, category in domains_to_check:
            cursor = await db.execute(
                "SELECT id FROM sources WHERE LOWER(domain) = ?",
                (domain.lower(),),
            )
            if await cursor.fetchone():
                continue  # Domain schon bekannt

            # 4. RSS-Feed-Erkennung
            try:
                result = await discover_source(url)
                name = domain_to_display_name(domain)
                source_type = result["source_type"]  # "rss_feed" oder "web_source"
                feed_url = result.get("rss_url")

                await db.execute(
                    """INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by)
                       VALUES (?, ?, ?, ?, ?, 'active', 'Auto-entdeckt via Recherche', 'system')""",
                    (name, feed_url or f"https://{domain}", domain, source_type, category),
                )
                new_count += 1
                logger.info(f"Neue Quelle registriert: {name} ({domain}) als {source_type}")
            except Exception as e:
                logger.debug(f"Discovery fehlgeschlagen für {domain}: {e}")

        if new_count > 0:
            await db.commit()
            logger.info(f"Background-Discovery: {new_count} neue Quellen registriert")
    except Exception as e:
        logger.warning(f"Background-Discovery Fehler: {e}")
    finally:
        await db.close()


async def _create_notifications_for_incident(
    db, incident_id: int, visibility: str, created_by: int, tenant_id: int, notifications: list[dict]
):
    """Erzeugt DB-Notifications fuer alle betroffenen Nutzer der Organisation.

    - Oeffentliche Lagen -> alle Nutzer der Org
    - Private Lagen -> nur Ersteller
    """
    if not notifications:
        return

    now = datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S')

    if visibility == "public" and tenant_id:
        cursor = await db.execute(
            "SELECT id FROM users WHERE organization_id = ? AND is_active = 1 AND last_login_at IS NOT NULL",
            (tenant_id,),
        )
        user_ids = [row["id"] for row in await cursor.fetchall()]
    else:
        user_ids = [created_by]

    for user_id in user_ids:
        for notif in notifications:
            await db.execute(
                """INSERT INTO notifications (user_id, incident_id, type, title, text, icon, tenant_id, created_at)
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    user_id,
                    incident_id,
                    notif.get("type", "refresh_summary"),
                    notif["title"],
                    notif["text"],
                    notif.get("icon", "info"),
                    tenant_id,
                    now,
                ),
            )

    await db.commit()
    logger.info(f"Notifications erstellt: {len(notifications)} x {len(user_ids)} Nutzer fuer Lage {incident_id}")


async def _send_email_notifications_for_incident(
    db, incident_id: int, incident_title: str, visibility: str,
    created_by: int, tenant_id: int, notifications: list[dict],
    incident_type: str = "adhoc",
):
    """Sendet E-Mail-Benachrichtigungen basierend auf individuellen Nutzer-Abos.

    Jeder Nutzer hat eigene E-Mail-Praeferenzen pro Lage (incident_subscriptions).
    Nur Nutzer die aktiv sind und sich mindestens einmal eingeloggt haben
    (last_login_at IS NOT NULL) werden beruecksichtigt.
    """
    if not notifications:
        return

    from email_utils.sender import send_email
    from email_utils.templates import incident_notification_email
    from config import MAGIC_LINK_BASE_URL
    from services.org_settings import get_org_language

    # Sprache der Org bestimmen (die Lage gehoert genau einer Org)
    org_lang_iso = await get_org_language(db, tenant_id) if tenant_id else "de"

    # Alle Nutzer mit aktiven Abos fuer diese Lage laden
    cursor = await db.execute(
        """SELECT s.notify_email_summary, s.notify_email_new_articles,
                  s.notify_email_status_change, u.id, u.email, u.username
           FROM incident_subscriptions s
           JOIN users u ON u.id = s.user_id
           WHERE s.incident_id = ?
             AND u.is_active = 1
             AND u.last_login_at IS NOT NULL
             AND (s.notify_email_summary = 1
                  OR s.notify_email_new_articles = 1
                  OR s.notify_email_status_change = 1)""",
        (incident_id,),
    )
    subscribers = await cursor.fetchall()
    if not subscribers:
        return

    dashboard_url = f"{MAGIC_LINK_BASE_URL}/dashboard"

    for sub in subscribers:
        prefs = dict(sub)

        # Relevante Notifications basierend auf Nutzer-Praeferenzen filtern
        filtered_notifications = []
        for n in notifications:
            ntype = n.get("type", "refresh_summary")

            if ntype == "refresh_summary" and prefs.get("notify_email_summary"):
                filtered_notifications.append(n)
            elif ntype == "new_articles" and prefs.get("notify_email_new_articles"):
                filtered_notifications.append(n)
            elif ntype == "status_change" and prefs.get("notify_email_status_change"):
                filtered_notifications.append(n)

        if not filtered_notifications:
            continue

        subject, html = incident_notification_email(
            username=prefs["email"].split("@")[0],
            incident_title=incident_title,
            notifications=filtered_notifications,
            dashboard_url=dashboard_url,
            incident_type=incident_type,
            lang=org_lang_iso,
        )
        try:
            await send_email(prefs["email"], subject, html)
            logger.info(f"E-Mail-Benachrichtigung gesendet an {prefs['email']} fuer Lage {incident_id} ({len(filtered_notifications)} Items)")
        except Exception as e:
            logger.error(f"E-Mail-Benachrichtigung fehlgeschlagen fuer {prefs['email']}: {e}")


class AgentOrchestrator:
    """Verwaltet die Claude-Agenten-Queue und koordiniert Recherche-Zyklen."""

    def __init__(self):
        self._queue: asyncio.Queue = asyncio.Queue()
        self._running = False
        self._current_task: Optional[int] = None
        # Session-Start des aktuellen Tasks (UTC ISO mit 'Z'). Ueberspannt Multi-Pass
        # und Retries innerhalb derselben Queue-Abarbeitung — verhindert, dass der
        # Frontend-Timer beim Seiten-Reload auf den Pass/Retry-Start zurueckspringt.
        self._current_task_started_at: Optional[str] = None
        self._ws_manager = None
        self._queued_ids: set[int] = set()
        self._cancel_requested: set[int] = set()
        self._cancel_event: asyncio.Event | None = None

    def set_ws_manager(self, ws_manager):
        """WebSocket-Manager setzen für Echtzeit-Updates."""
        self._ws_manager = ws_manager

    async def start(self):
        """Queue-Worker starten."""
        self._running = True
        asyncio.create_task(self._worker())
        logger.info("Agenten-Orchestrator gestartet")

    async def stop(self):
        """Queue-Worker stoppen."""
        self._running = False
        logger.info("Agenten-Orchestrator gestoppt")

    async def enqueue_refresh(self, incident_id: int, trigger_type: str = "manual", user_id: int = None) -> bool:
        """Refresh-Auftrag in die Queue stellen. Gibt False zurueck wenn bereits in Queue/aktiv."""
        if incident_id in self._queued_ids or self._current_task == incident_id:
            logger.info(f"Refresh fuer Lage {incident_id} uebersprungen: bereits aktiv/in Queue")
            return False

        visibility, created_by, tenant_id = await self._get_incident_visibility(incident_id)

        self._queued_ids.add(incident_id)
        await self._queue.put((incident_id, trigger_type, user_id))
        queue_size = self._queue.qsize()
        logger.info(f"Refresh fuer Lage {incident_id} eingereiht (Queue: {queue_size}, Trigger: {trigger_type})")

        if self._ws_manager:
            await self._ws_manager.broadcast_for_incident({
                "type": "status_update",
                "incident_id": incident_id,
                "data": {"status": "queued", "queue_position": queue_size},
            }, visibility, created_by, tenant_id)

        return True

    async def cancel_refresh(self, incident_id: int) -> bool:
        """Fordert Abbruch eines laufenden oder wartenden Refreshes an."""
        # Check if it's the currently running task
        if self._current_task == incident_id:
            self._cancel_requested.add(incident_id)
            if self._cancel_event:
                self._cancel_event.set()
            logger.info(f"Cancel angefordert fuer laufende Lage {incident_id}")
            if self._ws_manager:
                try:
                    vis, cb, tid = await self._get_incident_visibility(incident_id)
                except Exception:
                    vis, cb, tid = "public", None, None
                await self._ws_manager.broadcast_for_incident({
                    "type": "status_update",
                    "incident_id": incident_id,
                    "data": {"status": "cancelling", "detail": "Wird abgebrochen..."},
                }, vis, cb, tid)
            return True

        # Check if it's in the queue (not yet started)
        if incident_id in self._queued_ids:
            self._queued_ids.discard(incident_id)
            # Remove from asyncio queue (rebuild without this ID)
            removed = False
            new_items = []
            while not self._queue.empty():
                try:
                    item = self._queue.get_nowait()
                    iid = item[0] if isinstance(item, tuple) else item
                    if iid == incident_id:
                        removed = True
                        self._queue.task_done()
                    else:
                        new_items.append(item)
                except Exception:
                    break
            for item in new_items:
                self._queue.put_nowait(item)

            logger.info(f"Lage {incident_id} aus Warteschlange entfernt (removed={removed})")

            # refresh_log-Eintrag schreiben, damit Auto-Refresh nicht im naechsten Tick erneut einreiht
            await self._log_queued_cancellation(incident_id)

            # Send cancelled event
            if self._ws_manager:
                try:
                    vis, cb, tid = await self._get_incident_visibility(incident_id)
                except Exception:
                    vis, cb, tid = "public", None, None
                await self._ws_manager.broadcast_for_incident({
                    "type": "refresh_cancelled",
                    "incident_id": incident_id,
                    "data": {"status": "cancelled"},
                }, vis, cb, tid)
            return True

        return False

    def _check_cancelled(self, incident_id: int):
        """Prüft ob Abbruch angefordert wurde und wirft CancelledError."""
        if incident_id in self._cancel_requested:
            self._cancel_requested.discard(incident_id)
            raise asyncio.CancelledError("Vom Nutzer abgebrochen")

    async def _worker(self):
        """Verarbeitet Refresh-Aufträge sequentiell."""
        while self._running:
            try:
                item = await asyncio.wait_for(self._queue.get(), timeout=5.0)
            except asyncio.TimeoutError:
                continue

            if len(item) == 3:
                incident_id, trigger_type, user_id = item
            else:
                incident_id, trigger_type = item
                user_id = None
            self._queued_ids.discard(incident_id)
            self._current_task = incident_id
            # Session-Start EINMAL setzen — bleibt ueber Multi-Pass/Retry hinweg stabil
            self._current_task_started_at = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
            self._cancel_event = asyncio.Event()
            _cancel_event_var.set(self._cancel_event)
            logger.info(f"Starte Refresh für Lage {incident_id} (Trigger: {trigger_type})")

            RETRY_DELAYS = [0, 120, 300]  # Sekunden: sofort, 2min, 5min
            TRANSIENT_ERRORS = (asyncio.TimeoutError, TimeoutError, ConnectionError, OSError)
            from agents.claude_client import ClaudeCliError
            last_error = None

            def _is_transient_cli(err: Exception) -> bool:
                return isinstance(err, ClaudeCliError) and err.error_type in ("rate_limit", "timeout")

            try:
                # Research-Lagen: Automatisch 3 Durchläufe nur beim ersten Refresh
                incident_type, has_summary = await self._get_incident_info(incident_id)
                use_multi_pass = incident_type == "research" and not has_summary

                for attempt in range(3):
                    try:
                        if use_multi_pass:
                            await self._run_research_multi_pass(incident_id, trigger_type=trigger_type, user_id=user_id)
                        else:
                            await self._run_refresh(incident_id, trigger_type=trigger_type, retry_count=attempt, user_id=user_id)
                        last_error = None
                        break  # Erfolg
                    except asyncio.CancelledError:
                        logger.info(f"Refresh fuer Lage {incident_id} abgebrochen")
                        await self._mark_refresh_cancelled(incident_id)
                        try:
                            _vis, _cb, _tid = await self._get_incident_visibility(incident_id)
                        except Exception:
                            _vis, _cb, _tid = "public", None, None
                        if self._ws_manager:
                            await self._ws_manager.broadcast_for_incident({
                                "type": "refresh_cancelled",
                                "incident_id": incident_id,
                                "data": {"status": "cancelled"},
                            }, _vis, _cb, _tid)
                        last_error = None
                        break
                    except Exception as e:
                        # Auth/CLI-Fehler: sofort abbrechen, kein Retry sinnvoll
                        if isinstance(e, ClaudeCliError) and e.error_type in ("auth_error", "cli_error"):
                            last_error = e
                            logger.error(f"Permanenter Claude-Fehler [{e.error_type}] bei Lage {incident_id}: {e}")
                            await self._mark_refresh_failed(incident_id, str(e))
                            break

                        # Transiente Fehler: Retry bis 3x
                        if isinstance(e, TRANSIENT_ERRORS) or _is_transient_cli(e):
                            last_error = e
                            kind = e.error_type if isinstance(e, ClaudeCliError) else type(e).__name__
                            logger.warning(f"Transienter Fehler [{kind}] bei Lage {incident_id} (Versuch {attempt + 1}/3): {e}")
                            if attempt < 2:
                                await self._mark_refresh_failed(incident_id, str(e))
                                delay = RETRY_DELAYS[attempt + 1]
                                logger.info(f"Retry in {delay}s für Lage {incident_id}")
                                if self._ws_manager:
                                    try:
                                        _vis, _cb, _tid = await self._get_incident_visibility(incident_id)
                                    except Exception:
                                        _vis, _cb, _tid = "public", None, None
                                    await self._ws_manager.broadcast_for_incident({
                                        "type": "status_update",
                                        "incident_id": incident_id,
                                        "data": {"status": "retrying", "attempt": attempt + 1, "delay": delay},
                                    }, _vis, _cb, _tid)
                                await asyncio.sleep(delay)
                                continue
                            else:
                                await self._mark_refresh_failed(incident_id, f"Endgültig fehlgeschlagen nach 3 Versuchen: {e}")
                                break

                        # Alles andere: permanent
                        last_error = e
                        logger.error(f"Permanenter Fehler bei Refresh für Lage {incident_id}: {e}")
                        await self._mark_refresh_failed(incident_id, str(e))
                        break

                if last_error and self._ws_manager:
                    try:
                        _vis, _cb, _tid = await self._get_incident_visibility(incident_id)
                    except Exception:
                        _vis, _cb, _tid = "public", None, None
                    await self._ws_manager.broadcast_for_incident({
                        "type": "refresh_error",
                        "incident_id": incident_id,
                        "data": {"error": str(last_error)},
                    }, _vis, _cb, _tid)
            finally:
                self._current_task = None
                self._current_task_started_at = None
                self._cancel_event = None
                _cancel_event_var.set(None)
                self._queue.task_done()

    async def _mark_refresh_cancelled(self, incident_id: int):
        """Markiert den laufenden Refresh-Log-Eintrag als cancelled und schliesst
        alle noch aktiven Pipeline-Schritte. Ohne den zweiten Schritt blieb der
        zuletzt aktive Step-Eintrag verwaist und das Frontend zeigte dauerhaft
        'Schritt X laeuft', weil /api/incidents/<id>/pipeline aus
        refresh_pipeline_steps liest."""
        from database import get_db
        from services.pipeline_tracker import cancel_active_steps
        db = await get_db()
        try:
            now_str = datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S')
            cur = await db.execute(
                "SELECT id FROM refresh_log WHERE incident_id = ? AND status = 'running'",
                (incident_id,),
            )
            row = await cur.fetchone()
            refresh_log_id = row["id"] if row else None

            await db.execute(
                """UPDATE refresh_log SET status = 'cancelled', error_message = 'Vom Nutzer abgebrochen',
                   completed_at = ? WHERE incident_id = ? AND status = 'running'""",
                (now_str, incident_id),
            )
            await db.commit()

            if refresh_log_id is not None:
                await cancel_active_steps(db, refresh_log_id=refresh_log_id)
        except Exception as e:
            logger.warning(f"Konnte Refresh-Log nicht als abgebrochen markieren: {e}")
        finally:
            await db.close()

    async def _log_queued_cancellation(self, incident_id: int):
        """Schreibt einen cancelled-Eintrag fuer einen Queue-Abbruch (Lage war noch nicht laufend).
        Verhindert, dass der Auto-Refresh-Scheduler im naechsten Tick sofort wieder einreiht."""
        from database import get_db
        db = await get_db()
        try:
            cur = await db.execute("SELECT tenant_id FROM incidents WHERE id = ?", (incident_id,))
            row = await cur.fetchone()
            tid = row["tenant_id"] if row else None
            now_str = datetime.now(TIMEZONE).strftime("%Y-%m-%d %H:%M:%S")
            await db.execute(
                """INSERT INTO refresh_log (incident_id, started_at, completed_at, status,
                   trigger_type, error_message, tenant_id)
                   VALUES (?, ?, ?, 'cancelled', 'manual', 'Aus Warteschlange entfernt', ?)""",
                (incident_id, now_str, now_str, tid),
            )
            await db.commit()
        except Exception as e:
            logger.warning(f"Konnte Queue-Cancel nicht in refresh_log loggen: {e}")
        finally:
            await db.close()

    async def _mark_refresh_failed(self, incident_id: int, error: str):
        """Markiert den laufenden Refresh-Log-Eintrag als error."""
        from database import get_db
        db = await get_db()
        try:
            await db.execute(
                """UPDATE refresh_log SET status = 'error', error_message = ?,
                   completed_at = ? WHERE incident_id = ? AND status = 'running'""",
                (error[:500], datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S'), incident_id),
            )
            await db.commit()
        except Exception as e:
            logger.warning(f"Konnte Refresh-Log nicht als fehlgeschlagen markieren: {e}")
        finally:
            await db.close()

    async def _get_incident_visibility(self, incident_id: int) -> tuple[str, Optional[int], Optional[int]]:
        """Incident-Visibility, created_by und tenant_id laden."""
        from database import get_db
        visibility = "public"
        created_by = None
        tenant_id = None
        db = await get_db()
        try:
            cursor = await db.execute(
                "SELECT visibility, created_by, tenant_id FROM incidents WHERE id = ?", (incident_id,)
            )
            row = await cursor.fetchone()
            if row:
                visibility = row["visibility"] or "public"
                created_by = row["created_by"]
                tenant_id = row["tenant_id"]
        finally:
            await db.close()
        return visibility, created_by, tenant_id

    async def _run_refresh(self, incident_id: int, trigger_type: str = "manual", retry_count: int = 0, user_id: int = None, _suppress_complete: bool = False, _pass_info: dict = None):
        """Führt einen kompletten Refresh-Zyklus durch."""
        import aiosqlite
        from database import get_db
        from agents.researcher import ResearcherAgent
        from agents.analyzer import AnalyzerAgent
        from agents.factchecker import FactCheckerAgent
        from feeds.rss_parser import RSSParser
        from services import pipeline_tracker as _pipe

        db = await get_db()
        try:
            # Lage laden
            cursor = await db.execute("SELECT * FROM incidents WHERE id = ?", (incident_id,))
            incident = await cursor.fetchone()
            if not incident:
                logger.warning(f"Lage {incident_id} nicht gefunden")
                return

            title = incident["title"]
            description = incident["description"] or ""
            incident_type = incident["type"] or "adhoc"
            international = bool(incident["international_sources"]) if "international_sources" in incident.keys() else True
            include_telegram = bool(incident["include_telegram"]) if "include_telegram" in incident.keys() else False
            visibility = incident["visibility"] if "visibility" in incident.keys() else "public"
            created_by = incident["created_by"] if "created_by" in incident.keys() else None
            tenant_id = incident["tenant_id"] if "tenant_id" in incident.keys() else None
            # Org-Sprache fuer alle KI-Agenten (Lagebild, Faktencheck, Recherche)
            from services.org_settings import get_org_language, language_display
            output_language_iso = await get_org_language(db, tenant_id) if tenant_id else "de"
            output_language = language_display(output_language_iso)
            previous_summary = incident["summary"] or ""
            previous_sources_json = incident["sources_json"] if "sources_json" in incident.keys() else None
            previous_developments = incident["latest_developments"] if "latest_developments" in incident.keys() else None

            # Bei Retry: vorherigen running-Eintrag als error markieren
            if retry_count > 0:
                await db.execute(
                    """UPDATE refresh_log SET status = 'error', error_message = 'Retry gestartet',
                       completed_at = ? WHERE incident_id = ? AND status = 'running'""",
                    (datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S'), incident_id),
                )
                await db.commit()

            # Refresh-Log starten
            now = datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S')
            now_utc = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
            cursor = await db.execute(
                "INSERT INTO refresh_log (incident_id, started_at, status, trigger_type, retry_count, tenant_id) VALUES (?, ?, 'running', ?, ?, ?)",
                (incident_id, now, trigger_type, retry_count, tenant_id),
            )
            await db.commit()
            log_id = cursor.lastrowid
            usage_acc = UsageAccumulator()

            # --- Pipeline-Tracking (Analysepipeline-Visualisierung) ---
            _pass_nr = (_pass_info or {}).get("nr", 1)
            _step_ids: dict[str, Optional[int]] = {}

            async def _pipe_start(step_key: str):
                try:
                    sid = await _pipe.start_step(
                        db, self._ws_manager,
                        refresh_log_id=log_id, incident_id=incident_id, step_key=step_key,
                        pass_number=_pass_nr, tenant_id=tenant_id,
                        visibility=visibility, created_by=created_by,
                    )
                    _step_ids[step_key] = sid
                    return sid
                except Exception as _e:
                    logger.debug(f"_pipe_start({step_key}) ignoriert: {_e}")
                    return None

            async def _pipe_done(step_key: str, count_value=None, count_secondary=None):
                try:
                    sid = _step_ids.pop(step_key, None)
                    await _pipe.complete_step(
                        db, self._ws_manager, step_id=sid,
                        refresh_log_id=log_id, incident_id=incident_id, step_key=step_key,
                        pass_number=_pass_nr, count_value=count_value, count_secondary=count_secondary,
                        tenant_id=tenant_id, visibility=visibility, created_by=created_by,
                    )
                except Exception as _e:
                    logger.debug(f"_pipe_done({step_key}) ignoriert: {_e}")

            async def _pipe_skip(step_key: str):
                try:
                    await _pipe.skip_step(
                        db, self._ws_manager,
                        refresh_log_id=log_id, incident_id=incident_id, step_key=step_key,
                        pass_number=_pass_nr, tenant_id=tenant_id,
                        visibility=visibility, created_by=created_by,
                    )
                except Exception as _e:
                    logger.debug(f"_pipe_skip({step_key}) ignoriert: {_e}")

            research_status = "deep_researching" if incident_type == "research" else "researching"
            research_detail = "Hintergrundrecherche im Web läuft..." if incident_type == "research" else "RSS-Feeds und Web werden durchsucht..."
            # Multi-Pass: Detail-Text mit Durchlauf-Info versehen
            _ws_extra = {}
            if _pass_info:
                _pnr, _ptotal, _plabel = _pass_info["nr"], _pass_info["total"], _pass_info["label"]
                research_detail = f"Recherche {_pnr}/{_ptotal}: {_plabel}..."
                _ws_extra = {"research_pass": _pnr, "research_total_passes": _ptotal}
            if self._ws_manager:
                await self._ws_manager.broadcast_for_incident({
                    "type": "status_update",
                    "incident_id": incident_id,
                    "data": {"status": research_status, "detail": research_detail, "started_at": now_utc, **_ws_extra},
                }, visibility, created_by, tenant_id)

            # Bestehende Artikel vorladen (für Dedup UND Kontext)
            cursor = await db.execute(
                "SELECT id, source_url, headline, source FROM articles WHERE incident_id = ?",
                (incident_id,),
            )
            existing_db_articles_full = await cursor.fetchall()

            # Pipeline-Schritt 1: Quellen sichten (vorbereitet)
            await _pipe_start("sources_review")
            try:
                if incident_type == "adhoc":
                    _src_cursor = await db.execute(
                        "SELECT COUNT(*) AS cnt FROM sources "
                        "WHERE status = 'active' "
                        "AND (tenant_id IS NULL OR tenant_id = ?)",
                        (tenant_id,),
                    )
                    _src_row = await _src_cursor.fetchone()
                    _src_total = _src_row["cnt"] if _src_row else 0
                else:
                    _src_total = None
            except Exception:
                _src_total = None
            # secondary wird später mit der Anzahl tatsächlich liefernder Quellen ergänzt
            await _pipe_done("sources_review", count_value=_src_total, count_secondary=None)

            # Schritt 1+2: RSS-Feeds und Claude-Recherche parallel ausführen
            async def _rss_pipeline():
                """RSS-Feed-Suche (Feed-Selektion + dynamische Keywords + Parsing)."""
                if incident_type != "adhoc":
                    logger.info("Recherche-Modus: RSS-Feeds übersprungen")
                    return [], None

                rss_researcher = ResearcherAgent()
                rss_parser = RSSParser()

                from source_rules import get_feeds_with_metadata
                all_feeds = await get_feeds_with_metadata(tenant_id=tenant_id)

                # Dynamische Keywords aus den letzten Headlines extrahieren
                cursor_hl = await db.execute(
                    """SELECT COALESCE(headline_de, headline) as hl
                       FROM articles WHERE incident_id = ?
                       AND COALESCE(headline_de, headline) IS NOT NULL
                       ORDER BY collected_at DESC LIMIT 30""",
                    (incident_id,),
                )
                recent_headlines = [row["hl"] for row in await cursor_hl.fetchall() if row["hl"]]
                dynamic_keywords, kw_usage = await rss_researcher.extract_dynamic_keywords(title, recent_headlines)
                if kw_usage:
                    usage_acc.add(kw_usage)

                feed_usage = None
                keywords = dynamic_keywords  # Dynamische Keywords bevorzugen
                if len(all_feeds) > 20:
                    selected_feeds, feed_sel_keywords, feed_usage = await rss_researcher.select_relevant_feeds(
                        title, description, international, all_feeds
                    )
                    logger.info(f"Feed-Selektion: {len(selected_feeds)} von {len(all_feeds)} Feeds ausgewählt")
                    # Feed-Selektion-Keywords nur als Fallback wenn dynamische fehlen
                    if not keywords:
                        keywords = feed_sel_keywords
                    articles = await rss_parser.search_feeds_selective(title, selected_feeds, keywords=keywords)
                else:
                    articles = await rss_parser.search_feeds(title, international=international, tenant_id=tenant_id, keywords=keywords, user_id=user_id)

                logger.info(f"RSS: {len(articles)} relevante Artikel gefunden (international={international})")
                return articles, feed_usage

            async def _web_search_pipeline():
                """Claude WebSearch-Recherche mit Vorselektion eingetragener Web-Quellen."""
                researcher = ResearcherAgent()
                # Bestehende Artikel als Kontext mitgeben (Research + Adhoc)
                existing_for_context = None
                if existing_db_articles_full:
                    existing_for_context = [
                        {"source": row["source"] if "source" in row.keys() else "",
                         "headline": row["headline"],
                         "source_url": row["source_url"]}
                        for row in existing_db_articles_full
                    ]

                # Web-Quellen vorselektieren (Haiku) — nur thematisch passende werden Claude im Prompt empfohlen
                preferred_sources = []
                try:
                    from source_rules import get_feeds_with_metadata
                    web_sources = await get_feeds_with_metadata(tenant_id=tenant_id, source_type="web_source")
                    if web_sources:
                        preferred_sources, web_sel_usage = await researcher.select_relevant_web_sources(
                            title, description, web_sources,
                        )
                        if web_sel_usage:
                            usage_acc.add(web_sel_usage)
                except Exception as e:
                    logger.warning(f"Web-Source-Vorselektion fehlgeschlagen (Pipeline laeuft weiter): {e}")
                    preferred_sources = []

                results, usage, parse_failed = await researcher.search(
                    title, description, incident_type,
                    international=international, user_id=user_id,
                    existing_articles=existing_for_context,
                    preferred_sources=preferred_sources,
                    output_language=output_language,
                    output_language_iso=output_language_iso,
                )
                logger.info(
                    f"Claude-Recherche: {len(results)} Ergebnisse"
                    + (f" (mit {len(preferred_sources)} Web-Quellen-Hinweis)" if preferred_sources else "")
                    + (" (Parser fehlgeschlagen)" if parse_failed else "")
                )
                return results, usage, parse_failed

            async def _podcast_pipeline():
                """Podcast-Episoden-Suche (nur adhoc-Lagen, nur mit vorhandenen Transkripten)."""
                if incident_type != "adhoc":
                    logger.info("Recherche-Modus: Podcasts uebersprungen")
                    return [], None

                from source_rules import get_feeds_with_metadata
                podcast_feeds = await get_feeds_with_metadata(tenant_id=tenant_id, source_type="podcast_feed")
                if not podcast_feeds:
                    return [], None

                from feeds.podcast_parser import PodcastFeedParser
                pd_parser = PodcastFeedParser()
                pd_researcher = ResearcherAgent()

                # Dynamische Keywords (eigener Haiku-Call, parallel zu RSS —
                # billig und hält Pipelines unabhaengig)
                cursor_pd_hl = await db.execute(
                    """SELECT COALESCE(headline_de, headline) as hl
                       FROM articles WHERE incident_id = ?
                       AND COALESCE(headline_de, headline) IS NOT NULL
                       ORDER BY collected_at DESC LIMIT 30""",
                    (incident_id,),
                )
                pd_headlines = [row["hl"] for row in await cursor_pd_hl.fetchall() if row["hl"]]
                pd_keywords, pd_kw_usage = await pd_researcher.extract_dynamic_keywords(title, pd_headlines)
                if pd_kw_usage:
                    usage_acc.add(pd_kw_usage)

                # Podcast-Parser erwartet (noch) eine flache Liste – Podcasts sind
                # primaer deutschsprachig, daher reicht das gemeinsame Flatten.
                from agents.researcher import flatten_keywords
                pd_keywords_flat = flatten_keywords(pd_keywords)
                articles = await pd_parser.search_feeds_selective(title, podcast_feeds, keywords=pd_keywords_flat or None)
                logger.info(f"Podcast-Pipeline: {len(articles)} Episoden gefunden")
                return articles, None

            async def _telegram_pipeline():
                """Telegram-Kanal-Suche mit KI-basierter Kanal-Selektion."""
                from feeds.telegram_parser import TelegramParser
                tg_parser = TelegramParser()

                # Alle Telegram-Kanaele laden
                all_channels = await tg_parser._get_telegram_channels(tenant_id=tenant_id)
                if not all_channels:
                    logger.info("Keine Telegram-Kanaele konfiguriert")
                    return [], None

                # KI waehlt relevante Kanaele aus
                tg_researcher = ResearcherAgent()
                selected_channels, tg_sel_usage = await tg_researcher.select_relevant_telegram_channels(
                    title, description, all_channels
                )
                if tg_sel_usage:
                    usage_acc.add(tg_sel_usage)

                selected_ids = [ch["id"] for ch in selected_channels]
                logger.info(f"Telegram-Selektion: {len(selected_ids)} von {len(all_channels)} Kanaelen")

                # Dynamische Keywords fuer Telegram (eigener Aufruf, da parallel zu RSS)
                cursor_tg_hl = await db.execute(
                    """SELECT COALESCE(headline_de, headline) as hl
                       FROM articles WHERE incident_id = ?
                       AND COALESCE(headline_de, headline) IS NOT NULL
                       ORDER BY collected_at DESC LIMIT 30""",
                    (incident_id,),
                )
                tg_headlines = [row["hl"] for row in await cursor_tg_hl.fetchall() if row["hl"]]
                tg_keywords, tg_kw_usage = await tg_researcher.extract_dynamic_keywords(title, tg_headlines)
                if tg_kw_usage:
                    usage_acc.add(tg_kw_usage)
                if isinstance(tg_keywords, dict):
                    logger.info(f"Telegram-Keywords (Sprachen): { {k: len(v) for k, v in tg_keywords.items()} }")
                else:
                    logger.info(f"Telegram-Keywords: {tg_keywords}")

                articles = await tg_parser.search_channels(title, tenant_id=tenant_id, keywords=tg_keywords, channel_ids=selected_ids)
                logger.info(f"Telegram-Pipeline: {len(articles)} Nachrichten")
                return articles, None

            # Pipeline-Schritt 2: Nachrichten sammeln (Start)
            await _pipe_start("collect")

            # Pipelines parallel starten (RSS + WebSearch + Podcasts + optional Telegram)
            pipelines = [_rss_pipeline(), _web_search_pipeline(), _podcast_pipeline()]
            if include_telegram:
                pipelines.append(_telegram_pipeline())

            pipeline_results = await asyncio.gather(*pipelines)

            (rss_articles, rss_feed_usage) = pipeline_results[0]
            (search_results, search_usage, search_parse_failed) = pipeline_results[1]
            (podcast_articles, _podcast_usage) = pipeline_results[2]
            telegram_articles = pipeline_results[3][0] if include_telegram else []

            # Podcast-Artikel in die RSS-Liste einfuegen (gleicher Downstream-Pfad)
            if podcast_articles:
                rss_articles = (rss_articles or []) + podcast_articles

            # URL-Verifizierung nur fuer WebSearch-Ergebnisse (RSS-URLs sind bereits verifiziert)
            if search_results:
                search_results = await _verify_article_urls(search_results)

            if rss_feed_usage:
                usage_acc.add(rss_feed_usage)
            if search_usage:
                usage_acc.add(search_usage)

            # Checkpoint 1: Cancel prüfen nach RSS/WebSearch
            self._check_cancelled(incident_id)

            # Alle Ergebnisse zusammenführen
            all_results = rss_articles + search_results + telegram_articles
            # Pipeline-Schritt 2: Nachrichten sammeln (fertig)
            try:
                _delivering_sources = len({a.get("source", "") for a in all_results if a.get("source")})
            except Exception:
                _delivering_sources = None
            await _pipe_done("collect", count_value=len(all_results), count_secondary=_delivering_sources)

            # Pipeline-Schritt 3: Doppeltes filtern (Start)
            await _pipe_start("dedup")

            # Duplikate entfernen (normalisierte URL + Headline-Ähnlichkeit)
            seen_urls = set()
            seen_headlines = set()
            unique_results = []
            for article in all_results:
                if not _is_duplicate(article, seen_urls, seen_headlines):
                    unique_results.append(article)

            dupes_removed = len(all_results) - len(unique_results)
            if dupes_removed > 0:
                logger.info(f"Deduplizierung: {dupes_removed} Duplikate entfernt, {len(unique_results)} verbleibend")
            await _pipe_done("dedup", count_value=dupes_removed, count_secondary=len(unique_results))

            # Relevanz-Scoring und Sortierung
            for article in unique_results:
                if "relevance_score" not in article or article["relevance_score"] == 0:
                    article["relevance_score"] = _score_relevance(article)
            unique_results.sort(key=lambda a: a.get("relevance_score", 0), reverse=True)

            source_count = len(set(a.get("source", "") for a in unique_results))
            _analyze_detail = f"Analysiert {len(unique_results)} Meldungen aus {source_count} Quellen..."
            if _pass_info:
                _analyze_detail = f"[{_pass_info['nr']}/{_pass_info['total']}] {_analyze_detail}"
            if self._ws_manager:
                await self._ws_manager.broadcast_for_incident({
                    "type": "status_update",
                    "incident_id": incident_id,
                    "data": {
                        "status": "analyzing",
                        "detail": _analyze_detail,
                        "started_at": now,
                        **_ws_extra,
                    },
                }, visibility, created_by, tenant_id)

            # --- Set-basierte DB-Deduplizierung (statt N×M Queries) ---
            # existing_db_articles_full wurde bereits oben geladen
            existing_urls = set()
            existing_headlines = set()
            for row in existing_db_articles_full:
                if row["source_url"]:
                    existing_urls.add(_normalize_url(row["source_url"]))
                if row["headline"] and len(row["headline"]) > 20:
                    norm_h = _normalize_headline(row["headline"])
                    if norm_h:
                        existing_headlines.add(norm_h)

            logger.info(f"DB-Dedup: {len(existing_urls)} URLs, {len(existing_headlines)} Headlines im Bestand")

            # --- Dedup gegen Bestand: nur neue (noch nicht gespeicherte) Kandidaten behalten ---
            new_candidates = []
            for article in unique_results:
                if article.get("source_url"):
                    norm_url = _normalize_url(article["source_url"])
                    if norm_url in existing_urls:
                        continue
                    existing_urls.add(norm_url)

                headline = article.get("headline", "")
                if headline and len(headline) > 20:
                    norm_h = _normalize_headline(headline)
                    if norm_h and norm_h in existing_headlines:
                        continue
                    if norm_h:
                        existing_headlines.add(norm_h)

                new_candidates.append(article)

            # Pipeline-Schritt 4: Relevanz bewerten (Start)
            await _pipe_start("relevance")
            _candidates_before_topic = len(new_candidates)

            # --- Pre-Topic-Übersetzung: fremdsprachige Headlines ins Englische ---
            # Damit der nachgelagerte Topic-Filter (Haiku) auch CJK/Arabisch/
            # Hebräisch/Kyrillisch-Headlines fair beurteilen kann statt sie aus
            # Sicherheit zu verwerfen.
            if new_candidates:
                try:
                    from agents.translator import translate_headlines_for_topic_filter
                    _pt_count, _pt_usage = await translate_headlines_for_topic_filter(new_candidates)
                    if _pt_usage:
                        usage_acc.add(_pt_usage)
                    if _pt_count:
                        logger.info(
                            f"Pre-Topic-Translate: {_pt_count} fremdsprachige Headlines übersetzt"
                        )
                except Exception as e:
                    logger.warning(
                        f"Pre-Topic-Translate fehlgeschlagen (Pipeline laeuft weiter): {e}"
                    )

            # --- Semantischer Topic-Filter (Haiku) ---
            # Wirft Artikel raus, die zwar Keyword-Treffer hatten, aber das Kernthema
            # der Lage nicht inhaltlich behandeln. Bei Fehler Fallback auf alle Kandidaten.
            if new_candidates:
                _tf_agent = AnalyzerAgent()
                new_candidates, _tf_usage = await _tf_agent.filter_relevant_articles(
                    title, description, new_candidates,
                )
                if _tf_usage:
                    usage_acc.add(_tf_usage)
            await _pipe_done("relevance", count_value=len(new_candidates), count_secondary=_candidates_before_topic)

            # --- Neue (thematisch gefilterte) Artikel speichern und für Analyse tracken ---
            new_count = 0
            new_articles_for_analysis = []
            for article in new_candidates:
                cursor = await db.execute(
                    """INSERT INTO articles (incident_id, headline, headline_de, source,
                       source_url, content_original, content_de, language, published_at, tenant_id)
                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                    (
                        incident_id,
                        article.get("headline", ""),
                        article.get("headline_de"),
                        article.get("source", "Unbekannt"),
                        article.get("source_url"),
                        article.get("content_original"),
                        article.get("content_de"),
                        article.get("language", "de"),
                        article.get("published_at"),
                        tenant_id,
                    ),
                )
                new_count += 1
                article_with_id = dict(article)
                article_with_id["id"] = cursor.lastrowid
                new_articles_for_analysis.append(article_with_id)

            await db.commit()

            # Geoparsing: Orte aus neuen Artikeln extrahieren und speichern
            if new_articles_for_analysis:
                # Pipeline-Schritt 5: Orte erkennen (Start)
                await _pipe_start("geoparsing")
                try:
                    from agents.geoparsing import geoparse_articles
                    incident_context = f"{title} - {description}"
                    logger.info(f"Geoparsing fuer {len(new_articles_for_analysis)} neue Artikel...")
                    geo_results, category_labels = await geoparse_articles(new_articles_for_analysis, incident_context)
                    geo_count = 0
                    for art_id, locations in geo_results.items():
                        for loc in locations:
                            await db.execute(
                                """INSERT INTO article_locations
                                   (article_id, incident_id, location_name, location_name_normalized,
                                    country_code, latitude, longitude, confidence, source_text, tenant_id, category)
                                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                                (art_id, incident_id, loc["location_name"], loc["location_name_normalized"],
                                 loc["country_code"], loc["lat"], loc["lon"], loc["confidence"],
                                 loc.get("source_text", ""), tenant_id, loc.get("category", "mentioned")),
                            )
                            geo_count += 1
                    if geo_count > 0:
                        await db.commit()
                        logger.info(f"Geoparsing: {geo_count} Orte aus {len(geo_results)} Artikeln gespeichert")
                    # Category-Labels in Incident speichern (nur wenn neu generiert)
                    if category_labels:
                        import json as _json
                        await db.execute(
                            "UPDATE incidents SET category_labels = ? WHERE id = ? AND category_labels IS NULL",
                            (_json.dumps(category_labels, ensure_ascii=False), incident_id),
                        )
                        await db.commit()
                        logger.info(f"Category-Labels gespeichert fuer Incident {incident_id}: {category_labels}")
                    await _pipe_done("geoparsing", count_value=geo_count, count_secondary=len(geo_results) if geo_results else 0)
                except Exception as e:
                    logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}")
                    await _pipe_done("geoparsing", count_value=0, count_secondary=0)
            else:
                await _pipe_skip("geoparsing")

            # Quellen-Statistiken aktualisieren
            if new_count > 0:
                try:
                    from database import refresh_source_counts
                    await refresh_source_counts(db)
                except Exception as e:
                    logger.warning(f"Quellen-Statistiken konnten nicht aktualisiert werden: {e}")

            # Schritt 3+4: Analyse und Faktencheck PARALLEL
            if new_count > 0 or not previous_summary:
                is_first_summary = not previous_summary

                # Snapshot des alten Lagebilds sichern BEVOR parallele Verarbeitung startet
                if previous_summary:
                    cursor = await db.execute(
                        "SELECT COUNT(*) as cnt FROM articles WHERE incident_id = ?",
                        (incident_id,),
                    )
                    snap_articles = (await cursor.fetchone())["cnt"]
                    cursor = await db.execute(
                        "SELECT COUNT(*) as cnt FROM fact_checks WHERE incident_id = ?",
                        (incident_id,),
                    )
                    snap_fcs = (await cursor.fetchone())["cnt"]
                    await db.execute(
                        """INSERT INTO incident_snapshots
                           (incident_id, summary, sources_json,
                            article_count, fact_check_count, refresh_log_id, created_at, tenant_id)
                           VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                        (incident_id, previous_summary, previous_sources_json,
                         snap_articles, snap_fcs, log_id, now, tenant_id),
                    )
                    await db.commit()

                # Bestehende Fakten und alle Artikel vorladen (für parallele Tasks)
                cursor = await db.execute(
                    "SELECT id, claim, status, sources_count, evidence FROM fact_checks WHERE incident_id = ?",
                    (incident_id,),
                )
                existing_facts = [dict(row) for row in await cursor.fetchall()]

                # Alle Artikel vorladen für Erstanalyse/Erstcheck
                all_articles_preloaded = None
                if not previous_summary or new_count == 0 or not existing_facts:
                    cursor = await db.execute(
                        "SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
                        (incident_id,),
                    )
                    all_articles_preloaded = [dict(row) for row in await cursor.fetchall()]

                _parallel_detail = "Analyse und Faktencheck laufen parallel..."
                if _pass_info:
                    _parallel_detail = f"[{_pass_info['nr']}/{_pass_info['total']}] {_parallel_detail}"
                if self._ws_manager:
                    await self._ws_manager.broadcast_for_incident({
                        "type": "status_update",
                        "incident_id": incident_id,
                        "data": {"status": "analyzing", "detail": _parallel_detail, "started_at": now_utc, **_ws_extra},
                    }, visibility, created_by, tenant_id)

                # Quelleneinordnung (Bias) an Artikel anhaengen
                try:
                    cursor = await db.execute(
                        "SELECT name, domain, bias FROM sources WHERE bias IS NOT NULL"
                    )
                    _bias_rows = await cursor.fetchall()
                    _bias_by_domain = {}
                    _bias_by_name = {}
                    for br in _bias_rows:
                        brd = dict(br)
                        if brd.get("domain"):
                            _bias_by_domain[brd["domain"].lower()] = brd["bias"]
                        if brd.get("name"):
                            _bias_by_name[brd["name"].lower()] = brd["bias"]

                    def _enrich_bias(articles_list):
                        if not articles_list:
                            return
                        for art in articles_list:
                            if art.get("source_bias"):
                                continue
                            src = (art.get("source") or "").lower()
                            url = (art.get("source_url") or "").lower()
                            # Match by name
                            bias = _bias_by_name.get(src)
                            if not bias:
                                # Match by domain in URL
                                for dom, b in _bias_by_domain.items():
                                    if dom and dom in url:
                                        bias = b
                                        break
                            if bias:
                                art["source_bias"] = bias

                    _enrich_bias(new_articles_for_analysis)
                    _enrich_bias(all_articles_preloaded)
                except Exception as e:
                    logger.warning("Bias-Anreicherung fehlgeschlagen (Pipeline laeuft weiter): %s", e)

                # --- Analyse-Task (wird nach _do_factcheck mit fact_context_block aufgerufen) ---
                async def _do_analysis(fact_context_block: str = ""):
                    analyzer = AnalyzerAgent()
                    if previous_summary and new_count > 0:
                        logger.info(f"Inkrementelle Analyse: {new_count} neue Artikel zum bestehenden Lagebild")
                        return await analyzer.analyze_incremental(
                            title, description, new_articles_for_analysis,
                            previous_summary, previous_sources_json, incident_type,
                            fact_context_block=fact_context_block,
                            output_language=output_language,
                        )
                    else:
                        logger.info("Erstanalyse: Alle Artikel werden analysiert")
                        return await analyzer.analyze(
                            title, description, all_articles_preloaded, incident_type,
                            fact_context_block=fact_context_block,
                            output_language=output_language,
                        )

                # --- Faktencheck-Task ---
                async def _do_factcheck():
                    factchecker = FactCheckerAgent()
                    if existing_facts and new_count > 0:
                        if len(existing_facts) >= TWOPHASE_MIN_FACTS:
                            logger.info(
                                f"Zwei-Phasen-Faktencheck: {new_count} neue Artikel, "
                                f"{len(existing_facts)} bestehende Fakten"
                            )
                            return await factchecker.check_incremental_twophase(
                                title, new_articles_for_analysis, existing_facts, incident_type,
                                output_language=output_language,
                            )
                        else:
                            logger.info(
                                f"Inkrementeller Faktencheck: {new_count} neue Artikel, "
                                f"{len(existing_facts)} bestehende Fakten"
                            )
                            return await factchecker.check_incremental(
                                title, new_articles_for_analysis, existing_facts, incident_type,
                                output_language=output_language,
                            )
                    else:
                        # Alle Artikel laden falls nicht vorab geladen (Henne-Ei-Problem:
                        # Summary existiert aber noch keine Factchecks)
                        articles_for_check = all_articles_preloaded
                        if not articles_for_check:
                            cursor = await db.execute(
                                "SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
                                (incident_id,),
                            )
                            articles_for_check = [dict(row) for row in await cursor.fetchall()]
                        return await factchecker.check(title, articles_for_check, incident_type, output_language=output_language)

                # Pipeline-Schritt 6: Faktencheck zuerst (sequenziell). Liefert den
                # Faktenkontext fuer das Lagebild, damit dieses auf geprueftem Stand
                # schreibt und Unklarheiten explizit benennt. Variante 1: bei
                # Faktencheck-Fehler faellt das Lagebild auf den alten Pfad ohne
                # Faktenkontext zurueck (Refresh bricht NICHT ab).
                await _pipe_start("factcheck")
                factcheck_result: tuple = ([], None)
                fact_context_block = ""
                factcheck_failed_reason: str | None = None
                try:
                    factcheck_result = await _do_factcheck()
                except Exception as fc_err:
                    factcheck_failed_reason = str(fc_err)
                    logger.warning(
                        "Faktencheck fehlgeschlagen, Lagebild laeuft ohne Faktenkontext: %s",
                        fc_err, exc_info=True,
                    )

                fact_checks, fc_usage = factcheck_result if factcheck_result else ([], None)

                # Pipeline-Schritt 6 done direkt nach dem Aufruf — die finale
                # DB-Persistierung passiert weiter unten, aber fuer die UI ist
                # der Faktencheck-Aufruf hier abgeschlossen. Der count_value
                # ist eine Schaetzung (echte Zahl steht spaeter in der DB).
                _fc_estimated_new = max(0, len(fact_checks or []) - len(existing_facts or []))
                await _pipe_done(
                    "factcheck",
                    count_value=_fc_estimated_new,
                    count_secondary=len(fact_checks) if fact_checks else 0,
                )

                # Faktenkontext fuer das Lagebild bauen.
                try:
                    from agents.analyzer import build_fact_context_block as _build_fc_ctx
                    fact_context_block = _build_fc_ctx(
                        existing_facts or [], fact_checks or [], incident_type,
                    )
                    if fact_context_block:
                        logger.info(
                            "Faktenkontext fuer Lagebild: %d Zeichen, basierend auf %d alten + %d neuen Fakten",
                            len(fact_context_block), len(existing_facts or []), len(fact_checks or []),
                        )
                except Exception as ctx_err:
                    logger.warning("build_fact_context_block fehlgeschlagen: %s", ctx_err, exc_info=True)
                    fact_context_block = ""

                # Pipeline-Schritt 7: Lagebild verfassen (jetzt mit Faktenkontext)
                await _pipe_start("summary")
                logger.info(
                    "Starte Lagebild (sequenziell nach Faktencheck%s)",
                    " — OHNE Faktenkontext (Fallback)" if factcheck_failed_reason else "",
                )
                analysis_result = await _do_analysis(fact_context_block)

                analysis, analysis_usage = analysis_result
                await _pipe_done("summary", count_value=None, count_secondary=None)

                # --- Analyse-Ergebnisse verarbeiten ---
                if analysis_usage:
                    usage_acc.add(analysis_usage)

                if analysis:
                    sources = analysis.get("sources", [])
                    new_summary = analysis.get("summary", "") or previous_summary

                    # Validierung: Fehlende Quellennummern im Summary erkennen und reparieren
                    if sources and new_summary:
                        import re as _re
                        # Auch alphanumerische Refs wie [389a] erkennen
                        referenced_raw = set(_re.findall(r'\[(\d+[a-z]?)\]', new_summary))
                        referenced_nrs = set()
                        for r in referenced_raw:
                            try:
                                referenced_nrs.add(int(r))
                            except ValueError:
                                referenced_nrs.add(r)  # Keep alphanumeric as string
                        defined_nrs = set()
                        for s in sources:
                            nr = s.get("nr", 0)
                            if isinstance(nr, int):
                                defined_nrs.add(nr)
                            elif isinstance(nr, str):
                                try:
                                    defined_nrs.add(int(nr))
                                except ValueError:
                                    defined_nrs.add(nr)  # Keep alphanumeric like '389a'
                        missing_nrs = sorted(referenced_nrs - defined_nrs)
                        if missing_nrs:
                            truly_missing = []
                            for nr in missing_nrs:
                                # Buchstaben-Suffix (z.B. "22b") -> Basisnummer (22) aufloesen
                                if isinstance(nr, str) and _re.match(r"^\d+[a-z]$", nr):
                                    base_nr = int(nr[:-1])
                                    if base_nr in defined_nrs:
                                        new_summary = new_summary.replace(f"[{nr}]", f"[{base_nr}]")
                                        logger.info(
                                            "Incident %d: Suffix-Ref [%s] auf Basisquelle [%d] aufgeloest",
                                            incident_id, nr, base_nr
                                        )
                                        continue
                                truly_missing.append(nr)
                            if truly_missing:
                                logger.warning(
                                    "Incident %d: %d Quellennummern im Summary ohne Eintrag in sources: %s",
                                    incident_id, len(truly_missing), truly_missing[:20]
                                )
                                for nr in truly_missing:
                                    sources.append({"nr": nr, "name": "Quelle", "url": ""})
                                    logger.info("Platzhalter fuer fehlende Quelle [%s] eingefuegt", nr)
                                sources.sort(key=lambda s: int(s.get("nr", 0)) if isinstance(s.get("nr"), int) or (isinstance(s.get("nr"), str) and str(s.get("nr", "")).isdigit()) else 9999)

                    # Sicherstellen dass alle nr-Werte Integer sind (Claude liefert manchmal Strings)
                    if sources:
                        for s in sources:
                            nr = s.get("nr")
                            if isinstance(nr, str):
                                try:
                                    s["nr"] = int(nr)
                                except ValueError:
                                    pass
                    sources_json = json.dumps(sources, ensure_ascii=False) if sources else previous_sources_json

                    await db.execute(
                        "UPDATE incidents SET summary = ?, sources_json = ?, executive_summary = NULL, updated_at = ? WHERE id = ?",
                        (new_summary, sources_json, now, incident_id),
                    )

                    # Beim ersten Refresh: Snapshot des neuen Lagebilds erstellen
                    if is_first_summary and new_summary:
                        cursor = await db.execute(
                            "SELECT COUNT(*) as cnt FROM articles WHERE incident_id = ?",
                            (incident_id,),
                        )
                        snap_articles = (await cursor.fetchone())["cnt"]
                        cursor = await db.execute(
                            "SELECT COUNT(*) as cnt FROM fact_checks WHERE incident_id = ?",
                            (incident_id,),
                        )
                        snap_fcs = (await cursor.fetchone())["cnt"]
                        await db.execute(
                            """INSERT INTO incident_snapshots
                               (incident_id, summary, sources_json,
                                article_count, fact_check_count, refresh_log_id, created_at, tenant_id)
                               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                            (incident_id, new_summary, sources_json,
                             snap_articles, snap_fcs, log_id, now, tenant_id),
                        )

                    # Translations werden vom dedizierten Translator-Agent unten
                    # erzeugt (frueher inline im Analyzer-Output, das war token-
                    # instabil und schaetzte regelmaessig content_de aus).

                    await db.commit()

                # Cancel-Check nach paralleler Verarbeitung
                self._check_cancelled(incident_id)

                # --- Translator (Haiku) fuer fremdsprachige Artikel ohne DE-Texte ---
                # Idempotent: nur Artikel ohne headline_de/content_de werden geholt.
                # Lauft nach der Analyse (Lagebild ist schon committed) und vor QC
                # (damit normalize_umlaut_articles auch die frischen DE-Texte fasst).
                try:
                    tr_cursor = await db.execute(
                        """SELECT id, headline, content_original, language
                           FROM articles
                           WHERE incident_id = ?
                             AND language IS NOT NULL AND LOWER(language) != 'de'
                             AND (headline_de IS NULL OR headline_de = ''
                                  OR content_de IS NULL OR content_de = '')""",
                        (incident_id,),
                    )
                    pending_translations = [dict(r) for r in await tr_cursor.fetchall()]
                    if pending_translations:
                        logger.info(
                            "Translator fuer Incident %d: %d Artikel ohne DE-Uebersetzung",
                            incident_id, len(pending_translations),
                        )
                        from agents.translator import translate_articles
                        from services.post_refresh_qc import normalize_german_umlauts as _norm_de2
                        translations = await translate_articles(
                            pending_translations,
                            output_lang="de",
                            usage_accumulator=usage_acc,
                        )
                        for t in translations:
                            hd = t.get("headline_de")
                            cd = t.get("content_de")
                            if hd:
                                hd, _ = _norm_de2(hd)
                            if cd:
                                cd, _ = _norm_de2(cd)
                            if hd or cd:
                                await db.execute(
                                    "UPDATE articles SET headline_de = COALESCE(?, headline_de), "
                                    "content_de = COALESCE(?, content_de) WHERE id = ? AND incident_id = ?",
                                    (hd, cd, t["id"], incident_id),
                                )
                        await db.commit()
                        logger.info(
                            "Translator fuer Incident %d: %d/%d Artikel uebersetzt",
                            incident_id, len(translations), len(pending_translations),
                        )
                except Exception as e:
                    logger.error("Translator-Fehler fuer Incident %d: %s", incident_id, e, exc_info=True)
                    # Refresh trotz Translator-Fehler weiterlaufen lassen

                # --- Neueste Entwicklungen (nur Live-Monitoring / adhoc) ---
                # Basis ist jetzt das frisch generierte Lagebild (autoritativ, thematisch sauber).
                # Zeitstempel und Quellen kommen aus den jüngsten belegenden Artikeln.
                dev_summary_source = (locals().get("new_summary") or previous_summary or "").strip()
                if incident_type == "adhoc" and dev_summary_source:
                    try:
                        # Top-60 neueste Artikel mit Publikationsdatum als Beleg-Pool.
                        dev_cursor = await db.execute(
                            """SELECT id, headline, headline_de, source, source_url, published_at
                               FROM articles
                               WHERE incident_id = ? AND published_at IS NOT NULL
                               ORDER BY published_at DESC LIMIT 60""",
                            (incident_id,),
                        )
                        dev_articles = [dict(row) for row in await dev_cursor.fetchall()]

                        dev_analyzer = AnalyzerAgent()
                        dev_text, dev_usage = await dev_analyzer.generate_latest_developments(
                            title, description, dev_summary_source, dev_articles, previous_developments,
                            output_language=output_language,
                        )
                        if dev_usage:
                            usage_acc.add(dev_usage)
                        if dev_text is not None:
                            await db.execute(
                                "UPDATE incidents SET latest_developments = ? WHERE id = ?",
                                (dev_text, incident_id),
                            )
                            await db.commit()
                            previous_developments = dev_text
                    except Exception as e:
                        logger.warning(f"Latest-Developments-Generator fehlgeschlagen: {e}")

                # Cancel-Check nach Analyse+Faktencheck
                self._check_cancelled(incident_id)

                # --- Faktencheck-Ergebnisse verarbeiten ---
                # Pre-Dedup: Duplikate aus LLM-Antwort entfernen
                fact_checks = deduplicate_new_facts(fact_checks)

                if fc_usage:
                    usage_acc.add(fc_usage)

                # Prüfen ob dies der erste Refresh ist (keine vorherigen Faktenchecks)
                is_first_refresh = len(existing_facts) == 0

                # Notification-Summary sammeln
                confirmed_count = 0
                contradicted_count = 0
                status_changes = []

                # --- Schutz gegen Massen-Downgrades ---
                # Wenn >50% der established/confirmed Fakten auf unverified/unconfirmed
                # herabgestuft wuerden, verwerfe die FC-Ergebnisse komplett.
                established_ids = {ef["id"] for ef in existing_facts if ef.get("status") in ("established", "confirmed")}
                if established_ids and fact_checks:
                    _downgrade_count = 0
                    _remaining_tmp = list(existing_facts)
                    for _fc in fact_checks:
                        _matched = find_matching_claim(_fc.get("claim", ""), _remaining_tmp)
                        if _matched and _matched["id"] in established_ids:
                            _new_st = _fc.get("status", "developing")
                            if _new_st in ("unverified", "unconfirmed", "developing"):
                                _downgrade_count += 1
                            _remaining_tmp = [ef for ef in _remaining_tmp if ef["id"] != _matched["id"]]
                    _downgrade_ratio = _downgrade_count / len(established_ids) if established_ids else 0
                    if _downgrade_ratio > 0.5:
                        logger.warning(
                            f"Faktencheck-Ergebnisse verworfen: {_downgrade_count}/{len(established_ids)} "
                            f"established Fakten wuerden herabgestuft ({_downgrade_ratio:.0%}). "
                            f"Bestehende Fakten bleiben unveraendert."
                        )
                        fact_checks = []

                # Mutable Kopie für Fuzzy-Matching
                remaining_existing = list(existing_facts)

                for fc in fact_checks:
                    new_claim = fc.get("claim", "")
                    new_status = fc.get("status", "developing")

                    # Fuzzy-Matching gegen bestehende Claims
                    matched = find_matching_claim(new_claim, remaining_existing)

                    if matched:
                        old_status = matched.get("status")
                        # status_history aktualisieren bei Statusaenderung
                        history_update = ""
                        if old_status and old_status != new_status:
                            import json as _json
                            cursor_hist = await db.execute(
                                "SELECT status_history FROM fact_checks WHERE id = ?",
                                (matched["id"],),
                            )
                            hist_row = await cursor_hist.fetchone()
                            try:
                                history = _json.loads(hist_row[0] or "[]") if hist_row else []
                            except (ValueError, TypeError):
                                history = []
                            history.append({"status": new_status, "at": now})
                            history_update = _json.dumps(history)
                        # Evidence: Alte URLs beibehalten wenn neue keine hat
                        new_evidence = fc.get("evidence") or ""
                        import re as _re
                        if not _re.search(r"https?://", new_evidence) and matched.get("evidence"):
                            old_evidence = matched["evidence"] or ""
                            if _re.search(r"https?://", old_evidence):
                                bracket_match = _re.search(r"\[(?:Quellen|Weitere Quellen|Ursprungsquellen):.*?\]", old_evidence)
                                if bracket_match:
                                    new_evidence = new_evidence.rstrip(". ") + ". " + bracket_match.group()
                                else:
                                    new_evidence = old_evidence

                        await db.execute(
                            "UPDATE fact_checks SET claim = ?, status = ?, sources_count = ?, evidence = ?, is_notification = ?, checked_at = ?"
                            + (", status_history = ?" if history_update else "")
                            + " WHERE id = ?",
                            (new_claim, new_status, fc.get("sources_count", 0), new_evidence, fc.get("is_notification", 0), now)
                            + ((history_update,) if history_update else ())
                            + (matched["id"],),
                        )
                        # Aus der Liste entfernen damit nicht doppelt gematcht wird
                        remaining_existing = [ef for ef in remaining_existing if ef["id"] != matched["id"]]

                        # Status-Änderung tracken
                        if not is_first_refresh and old_status and old_status != new_status:
                            status_changes.append({
                                "claim": new_claim,
                                "old_status": old_status,
                                "new_status": new_status,
                            })
                    else:
                        import json as _json
                        initial_history = _json.dumps([{"status": new_status, "at": now}])
                        await db.execute(
                            """INSERT INTO fact_checks (incident_id, claim, status, sources_count, evidence, is_notification, tenant_id, status_history)
                               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                            (incident_id, new_claim, new_status, fc.get("sources_count", 0), fc.get("evidence"), fc.get("is_notification", 0), tenant_id, initial_history),
                        )

                    # Status-Statistik sammeln
                    if new_status in ("confirmed", "established"):
                        confirmed_count += 1
                    elif new_status in ("contradicted", "disputed"):
                        contradicted_count += 1

                await db.commit()

                # Pipeline-Schritt 7 (Fakten pruefen) wurde bereits frueher als done
                # markiert (siehe weiter oben — direkt nach dem _do_factcheck-Aufruf,
                # bevor das Lagebild generiert wurde). Hier nur noch die DB-
                # Persistierung der Fakten, ohne den Step erneut zu schliessen.

                # Pipeline-Schritt 8: Qualitätscheck (Start, ohne Zahlen)
                await _pipe_start("qc")

                # Post-Refresh Quality Check: Duplikate und Karten-Kategorien pruefen
                try:
                    from services.post_refresh_qc import run_post_refresh_qc
                    qc_result = await run_post_refresh_qc(db, incident_id)
                    if qc_result.get("facts_removed", 0) > 0 or qc_result.get("locations_fixed", 0) > 0:
                        logger.info(
                            f"QC: {qc_result['facts_removed']} Duplikate, "
                            f"{qc_result['locations_fixed']} Location-Fixes"
                        )
                except Exception as qc_err:
                    logger.warning(f"Post-Refresh QC fehlgeschlagen: {qc_err}")
                await _pipe_done("qc", count_value=None, count_secondary=None)

                # Pipeline-Schritt 9: Benachrichtigen (Start)
                await _pipe_start("notify")
                _notify_count = 0

                # Gebündelte Notification senden (nicht beim ersten Refresh)
                if not is_first_refresh:
                    if self._ws_manager:
                        await self._ws_manager.broadcast_for_incident({
                            "type": "refresh_summary",
                            "incident_id": incident_id,
                            "data": {
                                "new_articles": new_count,
                                "confirmed_count": confirmed_count,
                                "contradicted_count": contradicted_count,
                                "status_changes": status_changes,
                                "is_first_refresh": False,
                                "incident_title": title,
                            },
                        }, visibility, created_by, tenant_id)

                    # DB-Notifications erzeugen (Texte org-sprach-relativ)
                    is_en = output_language_iso == "en"
                    parts = []
                    if is_en:
                        if new_count > 0:
                            parts.append(f"{new_count} new article{'s' if new_count != 1 else ''}")
                        if confirmed_count > 0:
                            parts.append(f"{confirmed_count} confirmed")
                        if contradicted_count > 0:
                            parts.append(f"{contradicted_count} contradicted")
                        summary_text = ", ".join(parts) if parts else "No new developments"
                        research_prefix = "Research"
                        new_articles_msg = f"{new_count} new article{'s' if new_count != 1 else ''} found"
                    else:
                        if new_count > 0:
                            parts.append(f"{new_count} neue Meldung{'en' if new_count != 1 else ''}")
                        if confirmed_count > 0:
                            parts.append(f"{confirmed_count} bestätigt")
                        if contradicted_count > 0:
                            parts.append(f"{contradicted_count} widersprochen")
                        summary_text = ", ".join(parts) if parts else "Keine neuen Entwicklungen"
                        research_prefix = "Recherche"
                        new_articles_msg = f"{new_count} neue Meldung{'en' if new_count != 1 else ''} gefunden"

                    db_notifications = [{
                        "type": "refresh_summary",
                        "title": title,
                        "text": f"{research_prefix}: {summary_text}",
                        "icon": "warning" if contradicted_count > 0 else "success",
                    }]
                    if new_count > 0:
                        db_notifications.append({
                            "type": "new_articles",
                            "title": title,
                            "text": new_articles_msg,
                            "icon": "info",
                        })
                    for sc in status_changes:
                        db_notifications.append({
                            "type": "status_change",
                            "title": title,
                            "text": f"{sc['claim']}: {sc['old_status']} \u2192 {sc['new_status']}",
                            "icon": "error" if sc["new_status"] in ("contradicted", "disputed") else "success",
                        })

                    if created_by:
                        await _create_notifications_for_incident(
                            db, incident_id, visibility, created_by, tenant_id, db_notifications
                        )
                        # E-Mail-Benachrichtigungen versenden
                        await _send_email_notifications_for_incident(
                            db, incident_id, title, visibility, created_by, tenant_id, db_notifications,
                            incident_type=incident_type,
                        )
                        _notify_count = len(db_notifications)

                # Pipeline-Schritt 9: Benachrichtigen (fertig)
                await _pipe_done("notify", count_value=_notify_count, count_secondary=None)

            # Falls Analyse-Block übersprungen wurde (kein neuer Artikel und Summary existiert),
            # die noch offenen Pipeline-Schritte als übersprungen markieren.
            for _skipped_key in ("summary", "factcheck", "qc", "notify"):
                if _skipped_key in _step_ids or _skipped_key not in {"summary", "factcheck", "qc", "notify"}:
                    pass
            # Saubere Variante: alle noch offenen Steps am Ende skippen
            for _open_key in list(_step_ids.keys()):
                await _pipe_skip(_open_key)
            # Auch Steps die nie gestartet wurden (bei übersprungenem Outer-If)
            _started_keys = set()
            try:
                _check_cursor = await db.execute(
                    "SELECT step_key FROM refresh_pipeline_steps WHERE refresh_log_id = ? AND pass_number = ?",
                    (log_id, _pass_nr),
                )
                _started_keys = {row[0] for row in await _check_cursor.fetchall()}
            except Exception:
                pass
            for _missing_key in ("summary", "factcheck", "qc", "notify"):
                if _missing_key not in _started_keys:
                    await _pipe_skip(_missing_key)

            # Refresh-Log abschließen (mit Token-Statistiken)
            await db.execute(
                """UPDATE refresh_log SET
                    completed_at = ?, articles_found = ?, status = 'completed',
                    input_tokens = ?, output_tokens = ?,
                    cache_creation_tokens = ?, cache_read_tokens = ?,
                    total_cost_usd = ?, api_calls = ?
                WHERE id = ?""",
                (datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S'), new_count,
                 usage_acc.input_tokens, usage_acc.output_tokens,
                 usage_acc.cache_creation_tokens, usage_acc.cache_read_tokens,
                 round(usage_acc.total_cost_usd, 7), usage_acc.call_count, log_id),
            )
            await db.commit()
            logger.info(
                f"Token: {usage_acc.input_tokens} in / {usage_acc.output_tokens} out / "
                f"${usage_acc.total_cost_usd:.4f} ({usage_acc.call_count} Calls)"
            )

            # Credits-Tracking: Monatliche Aggregation + Credits abziehen
            if tenant_id and usage_acc.total_cost_usd > 0:
                from services.license_service import charge_usage_to_tenant
                await charge_usage_to_tenant(db, tenant_id, usage_acc, source="monitor")
                await db.commit()

            # Quellen-Discovery im Background starten
            if unique_results:
                asyncio.create_task(_background_discover_sources(unique_results))

            if not _suppress_complete and self._ws_manager:
                await self._ws_manager.broadcast_for_incident({
                    "type": "refresh_complete",
                    "incident_id": incident_id,
                    "data": {"new_articles": new_count, "status": "idle"},
                }, visibility, created_by, tenant_id)

            # updated_at IMMER aktualisieren wenn Refresh lief (auch bei fehlgeschlagener Analyse)
            await db.execute(
                "UPDATE incidents SET updated_at = ? WHERE id = ?",
                (now, incident_id),
            )
            await db.commit()

            logger.info(f"Refresh für Lage {incident_id} abgeschlossen: {new_count} neue Artikel")

            # Multi-Pass-Diagnose: Pass-Ergebnis zurueck an Multi-Pass-Caller geben
            if _pass_info is not None:
                _pass_info["new_count"] = new_count
                _pass_info["parse_failed"] = search_parse_failed

            # Executive Summary im Hintergrund vorab generieren (fuer schnelleren Export)
            if new_count > 0:
                async def _pregenerate_exec_summary():
                    try:
                        from report_generator import generate_executive_summary
                        from database import get_db
                        _db = await get_db()
                        try:
                            cursor = await _db.execute(
                                "SELECT summary, executive_summary FROM incidents WHERE id = ?",
                                (incident_id,),
                            )
                            _row = await cursor.fetchone()
                            if _row and _row["summary"] and not _row["executive_summary"]:
                                es = await generate_executive_summary(_row["summary"])
                                await _db.execute(
                                    "UPDATE incidents SET executive_summary = ? WHERE id = ?",
                                    (es, incident_id),
                                )
                                await _db.commit()
                                logger.info(f"Executive Summary fuer Lage {incident_id} vorberechnet")
                        finally:
                            await _db.close()
                    except Exception as e:
                        logger.warning(f"Executive Summary Vorberechnung fehlgeschlagen: {e}")
                asyncio.create_task(_pregenerate_exec_summary())

        finally:
            await db.close()


    async def _get_incident_info(self, incident_id: int) -> tuple[str, bool]:
        """Incident-Typ und Summary-Status laden."""
        from database import get_db
        db = await get_db()
        try:
            cursor = await db.execute(
                "SELECT type, summary FROM incidents WHERE id = ?", (incident_id,)
            )
            row = await cursor.fetchone()
            if not row:
                return "adhoc", False
            return row["type"] or "adhoc", bool(row["summary"])
        finally:
            await db.close()

    async def _run_research_multi_pass(self, incident_id: int, trigger_type: str, user_id: int = None):
        """Führt automatisch 3 Recherche-Durchläufe für Research-Lagen durch.

        Durchlauf 1: Breite Erfassung (initiale 4-Phasen-Recherche)
        Durchlauf 2: Vertiefung (andere Quellen, inkrementelle Analyse)
        Durchlauf 3: Konsolidierung (letzte Lücken, Fakten-Upgrade)
        """
        total = RESEARCH_MULTI_PASS_COUNT
        pass_results = []

        for pass_nr in range(1, total + 1):
            # Cancel zwischen Durchläufen prüfen
            self._check_cancelled(incident_id)

            is_last = (pass_nr == total)
            pass_info = {
                "nr": pass_nr,
                "total": total,
                "label": RESEARCH_PASS_LABELS.get(pass_nr, f"Durchlauf {pass_nr}"),
            }

            logger.info(
                f"Research Multi-Pass {pass_nr}/{total} für Lage {incident_id}: "
                f"{pass_info['label']}"
            )

            try:
                await self._run_refresh(
                    incident_id,
                    trigger_type=trigger_type,
                    retry_count=0,
                    user_id=user_id,
                    _suppress_complete=not is_last,
                    _pass_info=pass_info,
                )
            except asyncio.CancelledError:
                logger.info(
                    f"Research Multi-Pass abgebrochen in Durchlauf {pass_nr}/{total} "
                    f"für Lage {incident_id}"
                )
                raise
            except Exception as e:
                logger.error(
                    f"Research Multi-Pass {pass_nr}/{total} fehlgeschlagen "
                    f"für Lage {incident_id}: {e}"
                )
                if is_last:
                    raise
                # Nicht-letzter Durchlauf: weiter mit nächstem, bisherige Ergebnisse bleiben
            finally:
                pass_results.append(pass_info)

        logger.info(
            f"Research Multi-Pass abgeschlossen für Lage {incident_id}: "
            f"{total} Durchläufe"
        )

        # Diagnose: Wenn ALLE Passes 0 neue Artikel hatten UND mindestens einer
        # an einem Parser-Fehler scheiterte, ist die Recherche faktisch fehlgeschlagen —
        # Claude lieferte zwar Antworten, aber kein verwertbares JSON. Sonst bliebe
        # die Lage ohne sichtbare Fehlermeldung leer (siehe staging Lage "Friedrich Merz").
        total_new = sum(p.get("new_count", 0) for p in pass_results)
        any_parse_failed = any(p.get("parse_failed") for p in pass_results)
        if total_new == 0 and any_parse_failed:
            raise RuntimeError(
                "Recherche fehlgeschlagen: Claude lieferte keine verwertbaren Quellen "
                "(JSON-Parsing schlug bei mindestens einem Durchlauf fehl). "
                "Bitte Logs prüfen und Refresh erneut starten."
            )


# Singleton-Instanz
orchestrator = AgentOrchestrator()