feat(klassifikation): Quellen-Klassifikation aus Monitor in Verwaltung verschoben

Service-Module (source_classifier, external_reputation) liegen jetzt in shared/services/, Endpoints unter /api/sources/classification/* sind hier statt im Monitor: - classification/{stats,queue,bulk-classify,bulk-approve} - {id}/classification/{approve,reject,reclassify} - external-reputation/sync modalSource erweitert um Klassifikations-Section (Politik, Medientyp, Reliability, state-affiliated, Land, 12 Alignment-Chips). Neuer Sub-Tab Klassifikation mit Review-Queue, Pending-Counter, Bulk-Actions. Auth via get_current_admin, Audit-Logging. Begleit-Refactor: Monitor verliert die Klassifikations-UI/-Endpoints separat.
2026-05-09 21:27:55 +00:00
Commit 015255237a
--- a/src/shared/services/external_reputation.py
+++ b/src/shared/services/external_reputation.py
@@ -0,0 +1,282 @@
+"""Externe Reputations-Daten fuer Quellen.
+
+Synchronisiert Domain-Listen von oeffentlichen Reputations-/Faktencheck-Datenbanken
+und schreibt die Treffer in die sources-Spalten:
+
+- IFCN-Signatories (anerkannte Faktenchecker) -> ifcn_signatory
+- EUvsDisinfo (pro-Kreml-Desinformation, Zenodo-CSV) -> eu_disinfo_listed,
+  eu_disinfo_case_count, eu_disinfo_last_seen
+
+Anschliessend wendet apply_reputation_overrides() Override-Regeln auf die
+reliability-Spalte an:
+- ifcn_signatory=1 -> reliability='sehr_hoch'
+- eu_disinfo_case_count >= 5 -> reliability='sehr_niedrig'
+- eu_disinfo_case_count >= 1 -> reliability eine Stufe runter (max bis 'niedrig')
+"""
+import csv
+import io
+import logging
+from collections import defaultdict
+from urllib.parse import urlparse
+
+import aiosqlite
+import httpx
+
+logger = logging.getLogger("osint.external_reputation")
+
+IFCN_LIST_URL = "https://raw.githubusercontent.com/IFCN/verified-signatories/main/list"
+EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base.csv?download=1"
+
+HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
+
+# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen
+# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains).
+PLATFORM_DOMAINS = {
+    "t.me", "telegram.me", "telegram.org",
+    "twitter.com", "x.com", "mobile.twitter.com",
+    "youtube.com", "youtu.be", "m.youtube.com",
+    "facebook.com", "fb.com", "m.facebook.com",
+    "instagram.com", "tiktok.com", "vk.com", "ok.ru",
+    "rumble.com", "bitchute.com", "odysee.com",
+    "reddit.com", "old.reddit.com",
+    "wordpress.com", "blogspot.com", "medium.com",
+    "substack.com", "wixsite.com",
+}
+
+# Reliability-Skala in Stufenfolge (schlecht -> gut)
+RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
+
+
+def _normalize_domain(raw: str | None) -> str | None:
+    """Normalisiert eine Domain: lowercase, ohne www., ohne Schema/Pfad."""
+    if not raw:
+        return None
+    raw = raw.strip().lower()
+    if not raw:
+        return None
+    # Falls eine vollstaendige URL uebergeben wurde
+    if "://" in raw:
+        try:
+            raw = urlparse(raw).netloc or raw
+        except ValueError:
+            pass
+    # Pfad/Query strippen
+    raw = raw.split("/")[0].split("?")[0].split("#")[0]
+    if raw.startswith("www."):
+        raw = raw[4:]
+    return raw or None
+
+
+async def _fetch_text(url: str) -> str:
+    """Laedt Text von einer URL. Wirft HTTPException bei Fehler."""
+    async with httpx.AsyncClient(timeout=HTTP_TIMEOUT, follow_redirects=True) as client:
+        resp = await client.get(url)
+        resp.raise_for_status()
+        return resp.text
+
+
+async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
+    """Laedt IFCN-Domain-Liste und matcht gegen sources.domain.
+
+    Setzt ifcn_signatory=1 wo die Domain in der Liste vorkommt, sonst 0.
+    """
+    text = await _fetch_text(IFCN_LIST_URL)
+    domains: set[str] = set()
+    for line in text.splitlines():
+        d = _normalize_domain(line)
+        if d:
+            domains.add(d)
+    logger.info("IFCN-Liste geladen: %d Domains", len(domains))
+
+    # Aktuelle Quellen mit Domain laden
+    cursor = await db.execute(
+        "SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
+    )
+    sources = [dict(r) for r in await cursor.fetchall()]
+
+    matched_ids: list[int] = []
+    unmatched_ids: list[int] = []
+    for s in sources:
+        nd = _normalize_domain(s["domain"])
+        if nd and nd not in PLATFORM_DOMAINS and nd in domains:
+            matched_ids.append(s["id"])
+        else:
+            unmatched_ids.append(s["id"])
+
+    # Bulk-Update in zwei Statements
+    if matched_ids:
+        placeholders = ",".join("?" for _ in matched_ids)
+        await db.execute(
+            f"UPDATE sources SET ifcn_signatory = 1 WHERE id IN ({placeholders})",
+            matched_ids,
+        )
+    if unmatched_ids:
+        placeholders = ",".join("?" for _ in unmatched_ids)
+        await db.execute(
+            f"UPDATE sources SET ifcn_signatory = 0 WHERE id IN ({placeholders})",
+            unmatched_ids,
+        )
+    await db.commit()
+    logger.info("IFCN-Sync: %d Quellen als Faktenchecker markiert (von %d)",
+                len(matched_ids), len(sources))
+    return {
+        "list_size": len(domains),
+        "sources_checked": len(sources),
+        "matched": len(matched_ids),
+    }
+
+
+async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
+    """Laedt EUvsDisinfo-CSV von Zenodo, aggregiert pro Domain, schreibt sources.
+
+    - eu_disinfo_listed: 1 wenn Domain mindestens 1x als 'disinformation' debunkt
+    - eu_disinfo_case_count: Anzahl Disinformation-Faelle
+    - eu_disinfo_last_seen: spaetestes debunk_date
+    """
+    text = await _fetch_text(EU_DISINFO_CSV_URL)
+    reader = csv.DictReader(io.StringIO(text))
+
+    # Per-Domain aggregieren (nur class='disinformation')
+    counts: dict[str, int] = defaultdict(int)
+    last_seen: dict[str, str] = {}
+    total_rows = 0
+    for row in reader:
+        total_rows += 1
+        if (row.get("class") or "").strip().lower() != "disinformation":
+            continue
+        d = _normalize_domain(row.get("article_domain"))
+        if not d:
+            continue
+        counts[d] += 1
+        debunk_date = (row.get("debunk_date") or "").strip()
+        if debunk_date:
+            prev = last_seen.get(d)
+            if not prev or debunk_date > prev:
+                last_seen[d] = debunk_date
+    logger.info("EUvsDisinfo-CSV: %d Zeilen, %d Domains mit Desinformation",
+                total_rows, len(counts))
+
+    # Quellen laden + matchen
+    cursor = await db.execute(
+        "SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
+    )
+    sources = [dict(r) for r in await cursor.fetchall()]
+
+    matched = 0
+    for s in sources:
+        nd = _normalize_domain(s["domain"])
+        if nd and nd not in PLATFORM_DOMAINS and nd in counts:
+            await db.execute(
+                """UPDATE sources SET
+                    eu_disinfo_listed = 1,
+                    eu_disinfo_case_count = ?,
+                    eu_disinfo_last_seen = ?
+                WHERE id = ?""",
+                (counts[nd], last_seen.get(nd), s["id"]),
+            )
+            matched += 1
+        else:
+            await db.execute(
+                """UPDATE sources SET
+                    eu_disinfo_listed = 0,
+                    eu_disinfo_case_count = 0,
+                    eu_disinfo_last_seen = NULL
+                WHERE id = ?""",
+                (s["id"],),
+            )
+    await db.commit()
+    logger.info("EUvsDisinfo-Sync: %d Quellen als Desinformations-Quelle markiert (von %d)",
+                matched, len(sources))
+    return {
+        "rows_in_csv": total_rows,
+        "domains_with_disinfo_in_csv": len(counts),
+        "sources_checked": len(sources),
+        "matched": matched,
+    }
+
+
+def _override_reliability(current: str | None, ifcn: bool, eu_count: int) -> str | None:
+    """Wendet Override-Regeln auf eine reliability-Stufe an.
+
+    Rueckgabe: neue Stufe (oder None, wenn unveraendert).
+    """
+    cur = current or "na"
+
+    # IFCN gewinnt: zertifizierter Faktenchecker -> sehr_hoch (immer)
+    if ifcn:
+        return "sehr_hoch" if cur != "sehr_hoch" else None
+
+    # EUvsDisinfo: Downgrade
+    if eu_count >= 5:
+        return "sehr_niedrig" if cur != "sehr_niedrig" else None
+    if eu_count >= 1:
+        # Eine Stufe runter, mindestens bis 'niedrig'
+        if cur == "na":
+            return "niedrig"
+        if cur in RELIABILITY_ORDER:
+            idx = RELIABILITY_ORDER.index(cur)
+            new_idx = max(0, idx - 1)
+            new = RELIABILITY_ORDER[new_idx]
+            # Mindeststufe 'niedrig' bei eu_count >= 1
+            if RELIABILITY_ORDER.index(new) > RELIABILITY_ORDER.index("niedrig"):
+                new = "niedrig"
+            return new if new != cur else None
+    return None
+
+
+async def apply_reputation_overrides(db: aiosqlite.Connection, source_id: int | None = None) -> dict:
+    """Wendet Reliability-Override-Regeln an.
+
+    Wenn source_id angegeben ist, nur fuer diese Quelle. Sonst fuer alle Quellen.
+    """
+    if source_id is not None:
+        cursor = await db.execute(
+            "SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count "
+            "FROM sources WHERE id = ?",
+            (source_id,),
+        )
+    else:
+        cursor = await db.execute(
+            "SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count FROM sources"
+        )
+    sources = [dict(r) for r in await cursor.fetchall()]
+
+    changed = 0
+    for s in sources:
+        new = _override_reliability(
+            s.get("reliability"),
+            bool(s.get("ifcn_signatory")),
+            int(s.get("eu_disinfo_case_count") or 0),
+        )
+        if new is not None:
+            await db.execute(
+                "UPDATE sources SET reliability = ? WHERE id = ?",
+                (new, s["id"]),
+            )
+            changed += 1
+    await db.commit()
+    logger.info("Reliability-Override: %d Quellen angepasst (von %d gepruefte)",
+                changed, len(sources))
+    return {"checked": len(sources), "changed": changed}
+
+
+async def sync_all(db: aiosqlite.Connection) -> dict:
+    """Vollstaendiger Sync: IFCN + EUvsDisinfo + Reliability-Override.
+
+    Setzt external_data_synced_at fuer alle Quellen.
+    """
+    ifcn_result = await sync_ifcn_signatories(db)
+    eu_result = await sync_eu_disinfo(db)
+    override_result = await apply_reputation_overrides(db)
+
+    await db.execute(
+        "UPDATE sources SET external_data_synced_at = CURRENT_TIMESTAMP "
+        "WHERE domain IS NOT NULL AND domain != ''"
+    )
+    await db.commit()
+
+    return {
+        "ifcn": ifcn_result,
+        "eu_disinfo": eu_result,
+        "override": override_result,
+    }
--- a/src/shared/services/source_classifier.py
+++ b/src/shared/services/source_classifier.py
@@ -0,0 +1,295 @@
+"""Klassifiziert Quellen via Claude (Haiku) nach 4 Achsen + state_affiliated + country.
+
+Schreibt Vorschlaege in die proposed_*-Spalten von sources und setzt
+classification_source='llm_pending'. Approval erfolgt ueber separate Endpoints,
+die proposed_* in die echten Spalten kopieren.
+"""
+import asyncio
+import json
+import logging
+import re
+
+import aiosqlite
+
+from shared.agents.claude_client import call_claude
+from config import CLAUDE_MODEL_FAST
+
+logger = logging.getLogger("osint.source_classifier")
+
+POLITICAL_VALUES = {
+    "links_extrem", "links", "mitte_links", "liberal", "mitte",
+    "konservativ", "mitte_rechts", "rechts", "rechts_extrem", "na",
+}
+MEDIA_TYPE_VALUES = {
+    "tageszeitung", "wochenzeitung", "magazin", "tv_sender", "radio",
+    "oeffentlich_rechtlich", "nachrichtenagentur", "online_only", "blog",
+    "telegram_kanal", "telegram_bot", "podcast", "social_media", "imageboard",
+    "think_tank", "ngo", "behoerde", "staatsmedium", "fachmedium", "sonstige",
+}
+RELIABILITY_VALUES = {"sehr_hoch", "hoch", "gemischt", "niedrig", "sehr_niedrig", "na"}
+ALIGNMENT_VALUES = {
+    "prorussisch", "proiranisch", "prowestlich", "proukrainisch",
+    "prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
+    "protuerkisch", "panarabisch", "neutral", "sonstige",
+}
+
+
+def _build_prompt(src: dict, sample_articles: list[dict]) -> str:
+    sample_text = ""
+    if sample_articles:
+        lines = []
+        for i, art in enumerate(sample_articles[:5], 1):
+            headline = (art.get("headline") or art.get("headline_de") or "").strip()
+            if headline:
+                lines.append(f"{i}. {headline[:200]}")
+        if lines:
+            sample_text = "\nLetzte Artikel/Headlines:\n" + "\n".join(lines)
+
+    return f"""Du bist ein OSINT-Analyst und klassifizierst Nachrichten- und Medienquellen fuer ein Lagebild-Monitoring-System (DACH-Raum).
+
+QUELLE:
+Name: {src.get('name')}
+URL: {src.get('url') or '-'}
+Domain: {src.get('domain') or '-'}
+Quellentyp: {src.get('source_type')}
+Bisherige Kategorie: {src.get('category')}
+Sprache: {src.get('language') or 'unbekannt'}
+Bisherige Notiz (Freitext): {src.get('bias') or '-'}{sample_text}
+
+AUFGABE: Klassifiziere die Quelle nach folgenden Achsen.
+
+1. political_orientation:
+   - links_extrem (z.B. linksunten.indymedia)
+   - links (klar links, z.B. junge Welt, taz)
+   - mitte_links (linksliberal/sozialdemokratisch, z.B. SZ, Spiegel)
+   - liberal (wirtschafts-/grünliberal, z.B. NZZ, Zeit)
+   - mitte (politisch neutral, Agentur, z.B. dpa, Reuters, tagesschau)
+   - konservativ (buergerlich-konservativ, z.B. FAZ, Welt)
+   - mitte_rechts (rechts-buergerlich, z.B. Tichys Einblick, Achgut)
+   - rechts (klar rechts, z.B. Junge Freiheit, EpochTimes)
+   - rechts_extrem (z.B. Compact, PI-News)
+   - na (nicht klassifizierbar: Behoerde, Fachmedium, Think Tank ohne klare politische Linie)
+
+2. media_type (genau einer):
+   tageszeitung, wochenzeitung, magazin, tv_sender, radio, oeffentlich_rechtlich,
+   nachrichtenagentur, online_only, blog, telegram_kanal, telegram_bot, podcast,
+   social_media, imageboard, think_tank, ngo, behoerde, staatsmedium, fachmedium, sonstige
+
+3. reliability:
+   - sehr_hoch (etablierte Qualitaet, Faktencheck: tagesschau, dpa, FAZ, Reuters)
+   - hoch (serioes mit gelegentlichen Schwaechen: taz, Welt, BILD bei harten News)
+   - gemischt (Mix Meinung/Einseitigkeit: Tichys Einblick, Achgut, Boulevard)
+   - niedrig (haeufig irrefuehrend, schwache Quellenarbeit: Junge Freiheit, EpochTimes)
+   - sehr_niedrig (bekannt fuer Desinformation/Verschwoerung: Compact, RT, Sputnik, PI-News)
+   - na (nicht bewertbar)
+
+4. alignments (Mehrfach, leeres Array wenn keine ausgepraegte Naehe):
+   prorussisch, proiranisch, prowestlich, proukrainisch, prochinesisch, projapanisch,
+   proisraelisch, propalaestinensisch, protuerkisch, panarabisch, neutral, sonstige
+
+5. state_affiliated (true/false): true wenn vom Staat finanziert/kontrolliert
+   (RT, Sputnik, CGTN, PressTV, Xinhua, TRT). Public Service Broadcaster
+   wie ARD/ZDF/BBC sind NICHT state_affiliated.
+
+6. country_code (ISO 3166-1 alpha-2): Heimatland (DE, AT, CH, RU, US, ...). null wenn unklar.
+
+7. confidence (0.0-1.0): 0.85+ fuer bekannte Outlets, 0.5-0.85 fuer mittelbekannt, <0.5 fuer unsicher.
+
+8. reasoning (1-2 Saetze): Kurze Begruendung der Hauptklassifikationen.
+
+WICHTIG:
+- Antworte AUSSCHLIESSLICH mit einem JSON-Objekt, kein Text drumherum.
+- Nutze ausschliesslich die genannten enum-Werte (snake_case).
+- Bei Unklarheit lieber `na` und niedrige confidence.
+
+JSON-Schema:
+{{
+  "political_orientation": "...",
+  "media_type": "...",
+  "reliability": "...",
+  "alignments": ["..."],
+  "state_affiliated": false,
+  "country_code": "DE",
+  "confidence": 0.9,
+  "reasoning": "..."
+}}"""
+
+
+async def _load_sample_articles(db: aiosqlite.Connection, name: str, domain: str | None, limit: int = 5) -> list[dict]:
+    """Laedt die letzten Headlines einer Quelle (per name oder Domain-Match)."""
+    rows: list = []
+    if name:
+        cursor = await db.execute(
+            "SELECT headline, headline_de FROM articles WHERE source = ? ORDER BY collected_at DESC LIMIT ?",
+            (name, limit),
+        )
+        rows = await cursor.fetchall()
+    if not rows and domain:
+        cursor = await db.execute(
+            "SELECT headline, headline_de FROM articles WHERE source_url LIKE ? ORDER BY collected_at DESC LIMIT ?",
+            (f"%{domain}%", limit),
+        )
+        rows = await cursor.fetchall()
+    return [dict(r) for r in rows]
+
+
+def _validate(parsed: dict) -> dict:
+    """Validiert + normalisiert eine LLM-Antwort gegen die Enums."""
+    pol = parsed.get("political_orientation", "na")
+    if pol not in POLITICAL_VALUES:
+        pol = "na"
+    mt = parsed.get("media_type", "sonstige")
+    if mt not in MEDIA_TYPE_VALUES:
+        mt = "sonstige"
+    rel = parsed.get("reliability", "na")
+    if rel not in RELIABILITY_VALUES:
+        rel = "na"
+    aligns_raw = parsed.get("alignments") or []
+    if not isinstance(aligns_raw, list):
+        aligns_raw = []
+    aligns = sorted({a for a in aligns_raw if isinstance(a, str) and a in ALIGNMENT_VALUES})
+    sa = bool(parsed.get("state_affiliated", False))
+    cc = parsed.get("country_code")
+    if isinstance(cc, str) and len(cc) == 2 and cc.isalpha():
+        cc = cc.upper()
+    else:
+        cc = None
+    try:
+        confidence = float(parsed.get("confidence", 0.5))
+        confidence = max(0.0, min(1.0, confidence))
+    except (TypeError, ValueError):
+        confidence = 0.5
+    reasoning = str(parsed.get("reasoning", ""))[:1000]
+    return {
+        "political_orientation": pol,
+        "media_type": mt,
+        "reliability": rel,
+        "alignments": aligns,
+        "state_affiliated": sa,
+        "country_code": cc,
+        "confidence": confidence,
+        "reasoning": reasoning,
+    }
+
+
+async def classify_source(
+    db: aiosqlite.Connection,
+    source_id: int,
+    sample_limit: int = 5,
+    model: str = CLAUDE_MODEL_FAST,
+) -> dict:
+    """Klassifiziert eine einzelne Quelle und schreibt die Vorschlaege in proposed_*-Spalten."""
+    cursor = await db.execute(
+        "SELECT id, name, url, domain, source_type, category, language, bias, "
+        "classification_source FROM sources WHERE id = ?",
+        (source_id,),
+    )
+    row = await cursor.fetchone()
+    if not row:
+        raise ValueError(f"Quelle {source_id} nicht gefunden")
+    src = dict(row)
+
+    sample = await _load_sample_articles(db, src["name"], src.get("domain"), sample_limit)
+    prompt = _build_prompt(src, sample)
+    response, usage = await call_claude(prompt, tools=None, model=model)
+
+    json_match = re.search(r"\{.*\}", response, re.DOTALL)
+    if not json_match:
+        raise ValueError(f"Keine JSON-Antwort von Claude fuer source_id={source_id}: {response[:200]}")
+    parsed = json.loads(json_match.group(0))
+    result = _validate(parsed)
+
+    # Nur classification_source auf 'llm_pending' setzen, wenn nicht bereits manuell/approved
+    new_src = "CASE WHEN classification_source IN ('manual','llm_approved') THEN classification_source ELSE 'llm_pending' END"
+    await db.execute(
+        f"""UPDATE sources SET
+            proposed_political_orientation = ?,
+            proposed_media_type = ?,
+            proposed_reliability = ?,
+            proposed_state_affiliated = ?,
+            proposed_country_code = ?,
+            proposed_alignments_json = ?,
+            proposed_confidence = ?,
+            proposed_reasoning = ?,
+            proposed_at = CURRENT_TIMESTAMP,
+            classification_source = {new_src}
+        WHERE id = ?""",
+        (
+            result["political_orientation"],
+            result["media_type"],
+            result["reliability"],
+            1 if result["state_affiliated"] else 0,
+            result["country_code"],
+            json.dumps(result["alignments"], ensure_ascii=False),
+            result["confidence"],
+            result["reasoning"],
+            source_id,
+        ),
+    )
+    await db.commit()
+
+    logger.info(
+        "Klassifiziert source_id=%s '%s' -> %s/%s/%s conf=%.2f ($%.4f)",
+        source_id, src["name"], result["political_orientation"],
+        result["media_type"], result["reliability"], result["confidence"],
+        usage.cost_usd,
+    )
+
+    result["source_id"] = source_id
+    result["usage"] = {
+        "cost_usd": usage.cost_usd,
+        "input_tokens": usage.input_tokens,
+        "output_tokens": usage.output_tokens,
+    }
+    return result
+
+
+async def bulk_classify(
+    db: aiosqlite.Connection,
+    limit: int = 50,
+    only_unclassified: bool = True,
+    model: str = CLAUDE_MODEL_FAST,
+) -> dict:
+    """Klassifiziert noch unklassifizierte Quellen (sequenziell).
+
+    Args:
+        limit: Maximale Anzahl Quellen pro Aufruf
+        only_unclassified: Wenn True, nur classification_source='legacy'.
+                           Wenn False, auch 'llm_pending' neu klassifizieren.
+    """
+    if only_unclassified:
+        where = "classification_source = 'legacy'"
+    else:
+        where = "classification_source IN ('legacy', 'llm_pending')"
+    cursor = await db.execute(
+        f"SELECT id FROM sources WHERE {where} AND status = 'active' "
+        f"AND source_type != 'excluded' ORDER BY id LIMIT ?",
+        (limit,),
+    )
+    ids = [row["id"] for row in await cursor.fetchall()]
+
+    total_cost = 0.0
+    success = 0
+    errors: list[dict] = []
+
+    for sid in ids:
+        try:
+            r = await classify_source(db, sid, model=model)
+            total_cost += r["usage"]["cost_usd"]
+            success += 1
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            logger.error("Klassifikation source_id=%s fehlgeschlagen: %s", sid, e, exc_info=True)
+            errors.append({"source_id": sid, "error": str(e)})
+
+    logger.info(
+        "Bulk-Klassifikation fertig: %d/%d erfolgreich, $%.4f Kosten, %d Fehler",
+        success, len(ids), total_cost, len(errors),
+    )
+    return {
+        "processed": len(ids),
+        "success": success,
+        "errors": errors,
+        "total_cost_usd": total_cost,
+    }