From 7f220a9b65e164d1cd3d0c59aeee8a87d89be968 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 00:13:39 +0000
Subject: [PATCH 01/15] feat(orchestrator): Faktencheck vor Lagebild mit
 Fallback (sequenziell)

Bislang liefen factcheck + analyze parallel via asyncio.gather. Folge:
Lagebild konnte Aussagen treffen, die der Faktencheck im selben Refresh als
contradicted markiert. Inkonsistenz zwischen Lagebild-Tab und Faktencheck-
Tab; im PDF/DOCX-Export schon kritisch.

Variante 1 aus der Diskussion: strikt sequenziell, mit Fallback bei
Faktencheck-Fail (Refresh bricht NICHT ab, Lagebild laeuft dann ohne
Faktenkontext wie bisher, ein Logeintrag dokumentiert den Fallback).

Aenderungen:
- analyzer.build_fact_context_block(): neuer Helper, baut den
  GEPRUEFTE-FAKTEN-Block aus existing_facts + neuen/aktualisierten
  Fakten. Status-Domaenen adhoc/research vereinheitlicht zu Bestaetigt /
  Umstritten / Unbestaetigt / Entwicklung. Max 20 Fakten, sortiert nach
  Status-Prioritaet desc und sources_count desc. Bei leerer Eingabe
  leerer String -> Fallback-Pfad.
- analyzer.analyze() / analyze_incremental(): neuer Optional-Parameter
  fact_context_block (default leer, Backward-Compat). 4 Prompt-Templates
  bekommen {fact_context_block}-Platzhalter sowie eine AUSSAGE-DISZIPLIN-
  Sektion: bestaetigte Fakten als Geruest, Umstrittenes explizit machen,
  Unbestaetigtes klar einordnen, kein Spekulieren ueber ungedecktes.
- orchestrator: asyncio.gather durch sequenzielle Logik ersetzt.
  Faktencheck zuerst, Pipeline-Step 6 done direkt nach dem Aufruf
  (count_value ist Schaetzung; finale DB-Zahlen stehen spaeter). Lagebild
  danach (Step 7) mit fact_context_block. _do_analysis-Closure um den
  Parameter erweitert, kein toter Inline-Block.
- spaeteres _pipe_done(factcheck) entfernt -- der Step wird jetzt frueher
  geschlossen, der spaetere Persistierungsblock laesst ihn unberuehrt.

UI-Pipeline zeigt automatisch sequenzielle Aktivitaet statt beide Steps
gleichzeitig -- keine Frontend-Aenderung noetig.

Latenz pro Refresh steigt um die factcheck-Dauer. Bewusst akzeptiert:
Konsistenz vor Geschwindigkeit.
---
 src/agents/analyzer.py     | 119 +++++++++++++++++++++++++++++++++++--
 src/agents/orchestrator.py |  76 ++++++++++++++++++-----
 2 files changed, 175 insertions(+), 20 deletions(-)

diff --git a/src/agents/analyzer.py b/src/agents/analyzer.py
index 8a067af..9bb45e6 100644
--- a/src/agents/analyzer.py
+++ b/src/agents/analyzer.py
@@ -16,7 +16,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
 VORFALL: {title}
 KONTEXT: {description}
 
-VORHANDENE MELDUNGEN:
+{fact_context_block}VORHANDENE MELDUNGEN:
 {articles_text}
 
 AUFTRAG:
@@ -59,7 +59,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
 THEMA: {title}
 KONTEXT: {description}
 
-VORLIEGENDE QUELLEN:
+{fact_context_block}VORLIEGENDE QUELLEN:
 {articles_text}
 
 AUFTRAG:
@@ -118,7 +118,7 @@ BISHERIGES LAGEBILD:
 BISHERIGE QUELLEN:
 {previous_sources_text}
 
-NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE:
+{fact_context_block}NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE:
 {new_articles_text}
 
 AUFTRAG:
@@ -165,7 +165,7 @@ BISHERIGES BRIEFING:
 BISHERIGE QUELLEN:
 {previous_sources_text}
 
-NEUE QUELLEN SEIT DEM LETZTEN UPDATE:
+{fact_context_block}NEUE QUELLEN SEIT DEM LETZTEN UPDATE:
 {new_articles_text}
 
 AUFTRAG:
@@ -264,6 +264,112 @@ Antworte AUSSCHLIESSLICH als JSON-Objekt — KEINE Erklärung, KEINE Einleitung:
 {{"relevant_ids": [1, 3, 7]}}"""
 
 
+
+
+# Status-Gruppen fuer den Fakten-Kontext im Analyse-Prompt.
+# adhoc nutzt confirmed/unconfirmed/contradicted/developing,
+# research nutzt established/unverified/disputed/developing — beide Domaenen
+# werden in dieselben vier Anzeige-Gruppen abgebildet.
+_FACT_STATUS_GROUPS = [
+    ("Bestätigt (mehrere unabhängige Quellen oder durch Faktencheck als gesichert eingestuft):",
+     {"confirmed", "established"}),
+    ("Umstritten (Quellen widersprechen sich oder Faktencheck hat Widersprüche dokumentiert):",
+     {"contradicted", "disputed"}),
+    ("Unbestätigt (nur eine einzelne Quelle, eine unabhängige Bestätigung steht aus):",
+     {"unconfirmed", "unverified"}),
+    ("In Entwicklung (laufender Sachverhalt, Stand offen):",
+     {"developing"}),
+]
+
+_FACT_STATUS_PRIORITY = {
+    "confirmed": 5, "established": 5,
+    "contradicted": 4, "disputed": 4,
+    "unconfirmed": 3, "unverified": 3,
+    "developing": 1,
+}
+
+
+def build_fact_context_block(
+    existing_facts: list[dict] | None,
+    new_or_updated_facts: list[dict] | None,
+    incident_type: str,
+    max_total: int = 20,
+) -> str:
+    """Baut den 'GEPRUEFTE FAKTEN'-Block fuer den Analyse-Prompt.
+
+    Wird vom Orchestrator zwischen Faktencheck und Lagebild aufgerufen, damit
+    das Lagebild auf gepruefter Faktenbasis schreibt und Unklarheiten explizit
+    benennt. Bei leerer Faktenliste wird ein leerer String zurueckgegeben — der
+    Prompt laeuft dann ohne Fakten-Kontext (Fallback bei Faktencheck-Fail oder
+    bei Lagen ohne bisherige Fakten).
+    """
+    existing_facts = existing_facts or []
+    new_or_updated_facts = new_or_updated_facts or []
+    if not existing_facts and not new_or_updated_facts:
+        return ""
+
+    seen_claims: set[str] = set()
+    merged: list[dict] = []
+    # Neue/aktualisierte Fakten zuerst (Status ist aktueller Stand).
+    for f in new_or_updated_facts:
+        c = (f.get("claim") or "").strip().lower()
+        if not c or c in seen_claims:
+            continue
+        seen_claims.add(c)
+        merged.append(f)
+    # Dann alte unveraenderte Fakten.
+    for f in existing_facts:
+        c = (f.get("claim") or "").strip().lower()
+        if not c or c in seen_claims:
+            continue
+        seen_claims.add(c)
+        merged.append(f)
+
+    if not merged:
+        return ""
+
+    merged.sort(key=lambda f: (
+        -_FACT_STATUS_PRIORITY.get((f.get("status") or "").lower(), 0),
+        -(f.get("sources_count") or 0),
+    ))
+    merged = merged[:max_total]
+
+    grouped: dict[str, list[dict]] = {label: [] for label, _ in _FACT_STATUS_GROUPS}
+    for f in merged:
+        s = (f.get("status") or "").lower()
+        for label, codes in _FACT_STATUS_GROUPS:
+            if s in codes:
+                grouped[label].append(f)
+                break
+
+    if not any(grouped.values()):
+        return ""
+
+    lines: list[str] = []
+    lines.append("GEPRÜFTE FAKTEN (Stand nach dem Faktencheck dieses Refresh, max. {n} priorisiert):".format(n=max_total))
+    for label, _codes in _FACT_STATUS_GROUPS:
+        items = grouped[label]
+        if not items:
+            continue
+        lines.append("")
+        lines.append(label)
+        for f in items:
+            claim = (f.get("claim") or "").strip()
+            sc = f.get("sources_count") or 0
+            sc_text = f" ({sc} {'Quellen' if sc != 1 else 'Quelle'})" if sc else ""
+            lines.append(f"- {claim}{sc_text}")
+
+    lines.append("")
+    lines.append("AUSSAGE-DISZIPLIN für das Lagebild:")
+    lines.append("- Bestätigte Fakten als Grundgerüst nehmen, ohne Hedging.")
+    lines.append("- Umstrittene Punkte explizit als umstritten kennzeichnen, beide Seiten knapp benennen.")
+    lines.append("- Unbestätigtes klar einordnen ('Eine einzelne Quelle berichtet ...', 'Eine unabhängige Bestätigung steht aus.').")
+    lines.append("- Bei Aussagen, die durch keinen geprüften Fakt gedeckt sind und auch nicht direkt aus einer der vorliegenden Meldungen hervorgehen: NICHT spekulieren — entweder weglassen oder als unklar kennzeichnen.")
+    lines.append("- Triff KEINE Aussagen, die mit den oben gelisteten geprüften Fakten in Widerspruch stehen.")
+    lines.append("")
+    return "\n".join(lines)
+
+
 class AnalyzerAgent:
     """Analysiert und übersetzt Meldungen über Claude CLI."""
 
@@ -290,7 +396,7 @@ class AnalyzerAgent:
                 articles_text += f"Inhalt: {content[:800]}\n"
         return articles_text
 
-    async def analyze(self, title: str, description: str, articles: list[dict], incident_type: str = "adhoc") -> tuple[dict | None, ClaudeUsage | None]:
+    async def analyze(self, title: str, description: str, articles: list[dict], incident_type: str = "adhoc", fact_context_block: str = "") -> tuple[dict | None, ClaudeUsage | None]:
         """Erstanalyse: Analysiert alle Meldungen zu einem Vorfall (erster Refresh)."""
         if not articles:
             return None, None
@@ -306,6 +412,7 @@ class AnalyzerAgent:
             articles_text=articles_text,
             today=today,
             output_language=OUTPUT_LANGUAGE,
+            fact_context_block=fact_context_block,
         )
 
         try:
@@ -327,6 +434,7 @@ class AnalyzerAgent:
         previous_summary: str,
         previous_sources_json: str | None,
         incident_type: str = "adhoc",
+        fact_context_block: str = "",
     ) -> tuple[dict | None, ClaudeUsage | None]:
         """Inkrementelle Analyse: Aktualisiert das Lagebild mit nur den neuen Artikeln.
 
@@ -369,6 +477,7 @@ class AnalyzerAgent:
             new_articles_text=new_articles_text,
             today=today,
             output_language=OUTPUT_LANGUAGE,
+            fact_context_block=fact_context_block,
         )
 
         try:
diff --git a/src/agents/orchestrator.py b/src/agents/orchestrator.py
index 225a666..e8bb457 100644
--- a/src/agents/orchestrator.py
+++ b/src/agents/orchestrator.py
@@ -1299,18 +1299,22 @@ class AgentOrchestrator:
                 except Exception as e:
                     logger.warning("Bias-Anreicherung fehlgeschlagen (Pipeline laeuft weiter): %s", e)
 
-                # --- Analyse-Task ---
-                async def _do_analysis():
+                # --- Analyse-Task (wird nach _do_factcheck mit fact_context_block aufgerufen) ---
+                async def _do_analysis(fact_context_block: str = ""):
                     analyzer = AnalyzerAgent()
                     if previous_summary and new_count > 0:
                         logger.info(f"Inkrementelle Analyse: {new_count} neue Artikel zum bestehenden Lagebild")
                         return await analyzer.analyze_incremental(
                             title, description, new_articles_for_analysis,
                             previous_summary, previous_sources_json, incident_type,
+                            fact_context_block=fact_context_block,
                         )
                     else:
                         logger.info("Erstanalyse: Alle Artikel werden analysiert")
-                        return await analyzer.analyze(title, description, all_articles_preloaded, incident_type)
+                        return await analyzer.analyze(
+                            title, description, all_articles_preloaded, incident_type,
+                            fact_context_block=fact_context_block,
+                        )
 
                 # --- Faktencheck-Task ---
                 async def _do_factcheck():
@@ -1344,20 +1348,61 @@ class AgentOrchestrator:
                             articles_for_check = [dict(row) for row in await cursor.fetchall()]
                         return await factchecker.check(title, articles_for_check, incident_type)
 
-                # Pipeline-Schritte 6+7: Lagebild verfassen + Fakten prüfen (Start, parallel)
-                await _pipe_start("summary")
+                # Pipeline-Schritt 6: Faktencheck zuerst (sequenziell). Liefert den
+                # Faktenkontext fuer das Lagebild, damit dieses auf geprueftem Stand
+                # schreibt und Unklarheiten explizit benennt. Variante 1: bei
+                # Faktencheck-Fehler faellt das Lagebild auf den alten Pfad ohne
+                # Faktenkontext zurueck (Refresh bricht NICHT ab).
                 await _pipe_start("factcheck")
+                factcheck_result: tuple = ([], None)
+                fact_context_block = ""
+                factcheck_failed_reason: str | None = None
+                try:
+                    factcheck_result = await _do_factcheck()
+                except Exception as fc_err:
+                    factcheck_failed_reason = str(fc_err)
+                    logger.warning(
+                        "Faktencheck fehlgeschlagen, Lagebild laeuft ohne Faktenkontext: %s",
+                        fc_err, exc_info=True,
+                    )
 
-                # Beide Tasks PARALLEL starten
-                logger.info("Starte Analyse und Faktencheck parallel...")
-                analysis_result, factcheck_result = await asyncio.gather(
-                    _do_analysis(),
-                    _do_factcheck(),
+                fact_checks, fc_usage = factcheck_result if factcheck_result else ([], None)
+
+                # Pipeline-Schritt 6 done direkt nach dem Aufruf — die finale
+                # DB-Persistierung passiert weiter unten, aber fuer die UI ist
+                # der Faktencheck-Aufruf hier abgeschlossen. Der count_value
+                # ist eine Schaetzung (echte Zahl steht spaeter in der DB).
+                _fc_estimated_new = max(0, len(fact_checks or []) - len(existing_facts or []))
+                await _pipe_done(
+                    "factcheck",
+                    count_value=_fc_estimated_new,
+                    count_secondary=len(fact_checks) if fact_checks else 0,
                 )
 
+                # Faktenkontext fuer das Lagebild bauen.
+                try:
+                    from agents.analyzer import build_fact_context_block as _build_fc_ctx
+                    fact_context_block = _build_fc_ctx(
+                        existing_facts or [], fact_checks or [], incident_type,
+                    )
+                    if fact_context_block:
+                        logger.info(
+                            "Faktenkontext fuer Lagebild: %d Zeichen, basierend auf %d alten + %d neuen Fakten",
+                            len(fact_context_block), len(existing_facts or []), len(fact_checks or []),
+                        )
+                except Exception as ctx_err:
+                    logger.warning("build_fact_context_block fehlgeschlagen: %s", ctx_err, exc_info=True)
+                    fact_context_block = ""
+
+                # Pipeline-Schritt 7: Lagebild verfassen (jetzt mit Faktenkontext)
+                await _pipe_start("summary")
+                logger.info(
+                    "Starte Lagebild (sequenziell nach Faktencheck%s)",
+                    " — OHNE Faktenkontext (Fallback)" if factcheck_failed_reason else "",
+                )
+                analysis_result = await _do_analysis(fact_context_block)
+
                 analysis, analysis_usage = analysis_result
-                fact_checks, fc_usage = factcheck_result
-                # Pipeline-Schritt 6: Lagebild verfassen (fertig, keine Zahl, nur Status)
                 await _pipe_done("summary", count_value=None, count_secondary=None)
 
                 # --- Analyse-Ergebnisse verarbeiten ---
@@ -1656,9 +1701,10 @@ class AgentOrchestrator:
 
                 await db.commit()
 
-                # Pipeline-Schritt 7: Fakten prüfen (fertig)
-                _new_facts_count = max(0, len(fact_checks) - len(existing_facts))
-                await _pipe_done("factcheck", count_value=_new_facts_count, count_secondary=len(fact_checks) if fact_checks else 0)
+                # Pipeline-Schritt 7 (Fakten pruefen) wurde bereits frueher als done
+                # markiert (siehe weiter oben — direkt nach dem _do_factcheck-Aufruf,
+                # bevor das Lagebild generiert wurde). Hier nur noch die DB-
+                # Persistierung der Fakten, ohne den Step erneut zu schliessen.
 
                 # Pipeline-Schritt 8: Qualitätscheck (Start, ohne Zahlen)
                 await _pipe_start("qc")

From f8e2f73bc068ae4ee4212876c4d41938ebaf49a5 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 18:21:45 +0000
Subject: [PATCH 02/15] feat(sources): strukturierte Klassifikation
 (Politik/Medientyp/Reliability/Alignments)

- Neue sources-Spalten: political_orientation (7+2 Stufen), media_type (20),
  reliability (5+1), state_affiliated, country_code, classification_source,
  classified_at sowie proposed_*-Spalten fuer LLM-Vorschlaege.
- Neue source_alignments-Tabelle fuer Mehrfach-Tagging geopolitischer Naehe
  (prorussisch, proiranisch, prowestlich, ...).
- API-Filter: ?political_orientation, ?media_type, ?reliability,
  ?state_affiliated, ?alignment.
- create/update_source nehmen alignments[] entgegen und setzen
  classification_source automatisch auf 'manual' bei Klassifikations-Edits.

Backwards-kompatibel: bestehendes bias/language/category bleibt unveraendert,
Default fuer Bestandsquellen ist classification_source = 'legacy'.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/database.py        |  77 +++++++++++++++-
 src/models.py          |  47 ++++++++--
 src/routers/sources.py | 196 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 279 insertions(+), 41 deletions(-)

diff --git a/src/database.py b/src/database.py
index 19f06bf..54d6b7e 100644
--- a/src/database.py
+++ b/src/database.py
@@ -158,7 +158,31 @@ CREATE TABLE IF NOT EXISTS sources (
     article_count INTEGER DEFAULT 0,
     last_seen_at TIMESTAMP,
     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    tenant_id INTEGER REFERENCES organizations(id)
+    tenant_id INTEGER REFERENCES organizations(id),
+    language TEXT,
+    bias TEXT,
+    political_orientation TEXT DEFAULT 'na',
+    media_type TEXT DEFAULT 'sonstige',
+    reliability TEXT DEFAULT 'na',
+    state_affiliated INTEGER DEFAULT 0,
+    country_code TEXT,
+    classification_source TEXT DEFAULT 'legacy',
+    classified_at TIMESTAMP,
+    proposed_political_orientation TEXT,
+    proposed_media_type TEXT,
+    proposed_reliability TEXT,
+    proposed_state_affiliated INTEGER,
+    proposed_country_code TEXT,
+    proposed_alignments_json TEXT,
+    proposed_confidence REAL,
+    proposed_reasoning TEXT,
+    proposed_at TIMESTAMP
+);
+
+CREATE TABLE IF NOT EXISTS source_alignments (
+    source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
+    alignment TEXT NOT NULL,
+    PRIMARY KEY (source_id, alignment)
 );
 
 CREATE TABLE IF NOT EXISTS notifications (
@@ -611,6 +635,57 @@ async def init_db():
             await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)")
             await db.commit()
 
+        # Migration: language + bias (Freitext, schon laenger im Einsatz, Schema-Lueck schliessen)
+        if "language" not in src_columns:
+            await db.execute("ALTER TABLE sources ADD COLUMN language TEXT")
+            await db.commit()
+        if "bias" not in src_columns:
+            await db.execute("ALTER TABLE sources ADD COLUMN bias TEXT")
+            await db.commit()
+
+        # Migration: strukturierte Klassifikations-Spalten fuer sources
+        for col, ddl in [
+            ("political_orientation", "ALTER TABLE sources ADD COLUMN political_orientation TEXT DEFAULT 'na'"),
+            ("media_type", "ALTER TABLE sources ADD COLUMN media_type TEXT DEFAULT 'sonstige'"),
+            ("reliability", "ALTER TABLE sources ADD COLUMN reliability TEXT DEFAULT 'na'"),
+            ("state_affiliated", "ALTER TABLE sources ADD COLUMN state_affiliated INTEGER DEFAULT 0"),
+            ("country_code", "ALTER TABLE sources ADD COLUMN country_code TEXT"),
+            ("classification_source", "ALTER TABLE sources ADD COLUMN classification_source TEXT DEFAULT 'legacy'"),
+            ("classified_at", "ALTER TABLE sources ADD COLUMN classified_at TIMESTAMP"),
+            ("proposed_political_orientation", "ALTER TABLE sources ADD COLUMN proposed_political_orientation TEXT"),
+            ("proposed_media_type", "ALTER TABLE sources ADD COLUMN proposed_media_type TEXT"),
+            ("proposed_reliability", "ALTER TABLE sources ADD COLUMN proposed_reliability TEXT"),
+            ("proposed_state_affiliated", "ALTER TABLE sources ADD COLUMN proposed_state_affiliated INTEGER"),
+            ("proposed_country_code", "ALTER TABLE sources ADD COLUMN proposed_country_code TEXT"),
+            ("proposed_alignments_json", "ALTER TABLE sources ADD COLUMN proposed_alignments_json TEXT"),
+            ("proposed_confidence", "ALTER TABLE sources ADD COLUMN proposed_confidence REAL"),
+            ("proposed_reasoning", "ALTER TABLE sources ADD COLUMN proposed_reasoning TEXT"),
+            ("proposed_at", "ALTER TABLE sources ADD COLUMN proposed_at TIMESTAMP"),
+        ]:
+            if col not in src_columns:
+                await db.execute(ddl)
+                await db.commit()
+        if any(c not in src_columns for c in ("political_orientation", "media_type", "reliability")):
+            logger.info("Migration: Klassifikations-Spalten zu sources hinzugefuegt")
+
+        # Migration: source_alignments-Tabelle (Mehrfach-Tags fuer geopolitische Naehe)
+        cursor = await db.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='source_alignments'"
+        )
+        if not await cursor.fetchone():
+            await db.executescript(
+                """
+                CREATE TABLE source_alignments (
+                    source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
+                    alignment TEXT NOT NULL,
+                    PRIMARY KEY (source_id, alignment)
+                );
+                CREATE INDEX IF NOT EXISTS idx_source_alignments_alignment ON source_alignments(alignment);
+                """
+            )
+            await db.commit()
+            logger.info("Migration: source_alignments-Tabelle erstellt")
+
         # Migration: tenant_id fuer notifications
         cursor = await db.execute("PRAGMA table_info(notifications)")
         notif_columns = [row[1] for row in await cursor.fetchall()]
diff --git a/src/models.py b/src/models.py
index 6c1e547..32d3bb7 100644
--- a/src/models.py
+++ b/src/models.py
@@ -139,24 +139,51 @@ class IncidentListItem(BaseModel):
 
 
 # Sources (Quellenverwaltung)
+SOURCE_TYPE_PATTERN = "^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$"
+SOURCE_CATEGORY_PATTERN = "^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$"
+SOURCE_STATUS_PATTERN = "^(active|inactive)$"
+POLITICAL_ORIENTATION_PATTERN = "^(links_extrem|links|mitte_links|liberal|mitte|konservativ|mitte_rechts|rechts|rechts_extrem|na)$"
+MEDIA_TYPE_PATTERN = "^(tageszeitung|wochenzeitung|magazin|tv_sender|radio|oeffentlich_rechtlich|nachrichtenagentur|online_only|blog|telegram_kanal|telegram_bot|podcast|social_media|imageboard|think_tank|ngo|behoerde|staatsmedium|fachmedium|sonstige)$"
+RELIABILITY_PATTERN = "^(sehr_hoch|hoch|gemischt|niedrig|sehr_niedrig|na)$"
+ALIGNMENT_PATTERN = "^(prorussisch|proiranisch|prowestlich|proukrainisch|prochinesisch|projapanisch|proisraelisch|propalaestinensisch|protuerkisch|panarabisch|neutral|sonstige)$"
+COUNTRY_CODE_PATTERN = "^[A-Z]{2}$"
+CLASSIFICATION_SOURCE_PATTERN = "^(manual|llm_approved|llm_pending|legacy)$"
+
+
 class SourceCreate(BaseModel):
     name: str = Field(min_length=1, max_length=200)
     url: Optional[str] = None
     domain: Optional[str] = None
-    source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
-    category: str = Field(default="sonstige", pattern="^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$")
-    status: str = Field(default="active", pattern="^(active|inactive)$")
+    source_type: str = Field(default="rss_feed", pattern=SOURCE_TYPE_PATTERN)
+    category: str = Field(default="sonstige", pattern=SOURCE_CATEGORY_PATTERN)
+    status: str = Field(default="active", pattern=SOURCE_STATUS_PATTERN)
     notes: Optional[str] = None
+    language: Optional[str] = None
+    bias: Optional[str] = None
+    political_orientation: Optional[str] = Field(default=None, pattern=POLITICAL_ORIENTATION_PATTERN)
+    media_type: Optional[str] = Field(default=None, pattern=MEDIA_TYPE_PATTERN)
+    reliability: Optional[str] = Field(default=None, pattern=RELIABILITY_PATTERN)
+    state_affiliated: Optional[bool] = None
+    country_code: Optional[str] = Field(default=None, pattern=COUNTRY_CODE_PATTERN)
+    alignments: Optional[list[str]] = None
 
 
 class SourceUpdate(BaseModel):
     name: Optional[str] = Field(default=None, max_length=200)
     url: Optional[str] = None
     domain: Optional[str] = None
-    source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
-    category: Optional[str] = Field(default=None, pattern="^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$")
-    status: Optional[str] = Field(default=None, pattern="^(active|inactive)$")
+    source_type: Optional[str] = Field(default=None, pattern=SOURCE_TYPE_PATTERN)
+    category: Optional[str] = Field(default=None, pattern=SOURCE_CATEGORY_PATTERN)
+    status: Optional[str] = Field(default=None, pattern=SOURCE_STATUS_PATTERN)
     notes: Optional[str] = None
+    language: Optional[str] = None
+    bias: Optional[str] = None
+    political_orientation: Optional[str] = Field(default=None, pattern=POLITICAL_ORIENTATION_PATTERN)
+    media_type: Optional[str] = Field(default=None, pattern=MEDIA_TYPE_PATTERN)
+    reliability: Optional[str] = Field(default=None, pattern=RELIABILITY_PATTERN)
+    state_affiliated: Optional[bool] = None
+    country_code: Optional[str] = Field(default=None, pattern=COUNTRY_CODE_PATTERN)
+    alignments: Optional[list[str]] = None
 
 
 class SourceResponse(BaseModel):
@@ -174,6 +201,14 @@ class SourceResponse(BaseModel):
     created_at: str
     language: Optional[str] = None
     bias: Optional[str] = None
+    political_orientation: Optional[str] = None
+    media_type: Optional[str] = None
+    reliability: Optional[str] = None
+    state_affiliated: bool = False
+    country_code: Optional[str] = None
+    classification_source: Optional[str] = None
+    classified_at: Optional[str] = None
+    alignments: list[str] = []
     is_global: bool = False
 
 
diff --git a/src/routers/sources.py b/src/routers/sources.py
index f6318d1..9adade2 100644
--- a/src/routers/sources.py
+++ b/src/routers/sources.py
@@ -12,7 +12,56 @@ logger = logging.getLogger("osint.sources")
 
 router = APIRouter(prefix="/api/sources", tags=["sources"])
 
-SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes"}
+SOURCE_UPDATE_COLUMNS = {
+    "name", "url", "domain", "source_type", "category", "status", "notes",
+    "language", "bias",
+    "political_orientation", "media_type", "reliability",
+    "state_affiliated", "country_code",
+}
+SOURCE_CLASSIFICATION_FIELDS = {
+    "political_orientation", "media_type", "reliability",
+    "state_affiliated", "country_code",
+}
+ALLOWED_ALIGNMENTS = {
+    "prorussisch", "proiranisch", "prowestlich", "proukrainisch",
+    "prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
+    "protuerkisch", "panarabisch", "neutral", "sonstige",
+}
+
+
+async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int]) -> dict[int, list[str]]:
+    """Lädt alignments fuer mehrere Quellen in einer Query und gibt {source_id: [alignment, ...]} zurück."""
+    if not source_ids:
+        return {}
+    placeholders = ",".join("?" for _ in source_ids)
+    cursor = await db.execute(
+        f"SELECT source_id, alignment FROM source_alignments WHERE source_id IN ({placeholders}) ORDER BY alignment",
+        source_ids,
+    )
+    out: dict[int, list[str]] = {sid: [] for sid in source_ids}
+    for row in await cursor.fetchall():
+        out.setdefault(row["source_id"], []).append(row["alignment"])
+    return out
+
+
+async def _replace_alignments(db: aiosqlite.Connection, source_id: int, alignments: list[str]):
+    """Ersetzt die alignments-Liste einer Quelle (DELETE + INSERT) — Aufrufer muss commit() machen."""
+    await db.execute("DELETE FROM source_alignments WHERE source_id = ?", (source_id,))
+    seen: set[str] = set()
+    for raw in alignments:
+        a = (raw or "").strip().lower()
+        if not a or a in seen:
+            continue
+        if a not in ALLOWED_ALIGNMENTS:
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail=f"Ungueltiger alignment-Wert: '{a}'",
+            )
+        seen.add(a)
+        await db.execute(
+            "INSERT INTO source_alignments (source_id, alignment) VALUES (?, ?)",
+            (source_id, a),
+        )
 
 
 def _check_source_ownership(source: dict, username: str):
@@ -34,6 +83,11 @@ async def list_sources(
     source_type: str = None,
     category: str = None,
     source_status: str = None,
+    political_orientation: str = None,
+    media_type: str = None,
+    reliability: str = None,
+    state_affiliated: bool = None,
+    alignment: str = None,
     current_user: dict = Depends(get_current_user),
     db: aiosqlite.Connection = Depends(db_dependency),
 ):
@@ -41,27 +95,43 @@ async def list_sources(
     tenant_id = current_user.get("tenant_id")
 
     # Global (tenant_id=NULL) + eigene Org
-    query = "SELECT * FROM sources WHERE (tenant_id IS NULL OR tenant_id = ?)"
-    params = [tenant_id]
+    query = "SELECT s.* FROM sources s WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)"
+    params: list = [tenant_id]
 
     if source_type:
-        query += " AND source_type = ?"
+        query += " AND s.source_type = ?"
         params.append(source_type)
     if category:
-        query += " AND category = ?"
+        query += " AND s.category = ?"
         params.append(category)
     if source_status:
-        query += " AND status = ?"
+        query += " AND s.status = ?"
         params.append(source_status)
+    if political_orientation:
+        query += " AND s.political_orientation = ?"
+        params.append(political_orientation)
+    if media_type:
+        query += " AND s.media_type = ?"
+        params.append(media_type)
+    if reliability:
+        query += " AND s.reliability = ?"
+        params.append(reliability)
+    if state_affiliated is not None:
+        query += " AND s.state_affiliated = ?"
+        params.append(1 if state_affiliated else 0)
+    if alignment:
+        query += " AND EXISTS (SELECT 1 FROM source_alignments sa WHERE sa.source_id = s.id AND sa.alignment = ?)"
+        params.append(alignment.lower())
 
-    query += " ORDER BY source_type, category, name"
+    query += " ORDER BY s.source_type, s.category, s.name"
     cursor = await db.execute(query, params)
     rows = await cursor.fetchall()
-    results = []
-    for row in rows:
-        d = dict(row)
+    results = [dict(row) for row in rows]
+    alignments_map = await _load_alignments_for(db, [r["id"] for r in results])
+    for d in results:
         d["is_global"] = d.get("tenant_id") is None
-        results.append(d)
+        d["state_affiliated"] = bool(d.get("state_affiliated"))
+        d["alignments"] = alignments_map.get(d["id"], [])
     return results
 
 
@@ -454,26 +524,60 @@ async def create_source(
                     detail=f"Domain '{domain}' bereits als Quelle vorhanden: {domain_existing['name']}. Für einen neuen RSS-Feed bitte die Feed-URL angeben.",
                 )
 
+    payload = data.model_dump(exclude_unset=True)
+    alignments = payload.pop("alignments", None)
+    classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & payload.keys()) or alignments is not None
+
+    cols = ["name", "url", "domain", "source_type", "category", "status", "notes",
+            "language", "bias",
+            "political_orientation", "media_type", "reliability",
+            "state_affiliated", "country_code",
+            "added_by", "tenant_id"]
+    vals = [
+        data.name,
+        data.url,
+        domain,
+        data.source_type,
+        data.category,
+        data.status,
+        data.notes,
+        payload.get("language"),
+        payload.get("bias"),
+        payload.get("political_orientation"),
+        payload.get("media_type"),
+        payload.get("reliability"),
+        1 if payload.get("state_affiliated") else 0,
+        payload.get("country_code"),
+        current_user["username"],
+        tenant_id,
+    ]
+    if classification_touched:
+        cols += ["classification_source", "classified_at"]
+        vals += ["manual"]
+        ts_marker = True
+    else:
+        ts_marker = False
+
+    placeholders = ", ".join(["?"] * len(vals) + (["CURRENT_TIMESTAMP"] if ts_marker else []))
     cursor = await db.execute(
-        """INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
-           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
-        (
-            data.name,
-            data.url,
-            domain,
-            data.source_type,
-            data.category,
-            data.status,
-            data.notes,
-            current_user["username"],
-            tenant_id,
-        ),
+        f"INSERT INTO sources ({', '.join(cols)}) VALUES ({placeholders})",
+        vals,
     )
+    new_id = cursor.lastrowid
+
+    if alignments:
+        await _replace_alignments(db, new_id, alignments)
+
     await db.commit()
 
-    cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
+    cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (new_id,))
     row = await cursor.fetchone()
-    return dict(row)
+    result = dict(row)
+    result["is_global"] = result.get("tenant_id") is None
+    result["state_affiliated"] = bool(result.get("state_affiliated"))
+    alignments_map = await _load_alignments_for(db, [new_id])
+    result["alignments"] = alignments_map.get(new_id, [])
+    return result
 
 
 @router.put("/{source_id}", response_model=SourceResponse)
@@ -494,27 +598,51 @@ async def update_source(
 
     _check_source_ownership(dict(row), current_user["username"])
 
+    payload = data.model_dump(exclude_unset=True)
+    alignments = payload.pop("alignments", None)
+
     updates = {}
-    for field, value in data.model_dump(exclude_none=True).items():
+    for field, value in payload.items():
         if field not in SOURCE_UPDATE_COLUMNS:
             continue
         # Domain normalisieren
         if field == "domain" and value:
             value = _DOMAIN_ALIASES.get(value.lower(), value.lower())
+        if field == "state_affiliated":
+            value = 1 if value else 0
         updates[field] = value
 
-    if not updates:
-        return dict(row)
+    classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & updates.keys()) or alignments is not None
+    if classification_touched:
+        updates["classification_source"] = "manual"
+        updates["classified_at"] = "CURRENT_TIMESTAMP_MARKER"
 
-    set_clause = ", ".join(f"{k} = ?" for k in updates)
-    values = list(updates.values()) + [source_id]
+    if updates:
+        set_parts = []
+        values = []
+        for k, v in updates.items():
+            if v == "CURRENT_TIMESTAMP_MARKER":
+                set_parts.append(f"{k} = CURRENT_TIMESTAMP")
+            else:
+                set_parts.append(f"{k} = ?")
+                values.append(v)
+        values.append(source_id)
+        await db.execute(f"UPDATE sources SET {', '.join(set_parts)} WHERE id = ?", values)
 
-    await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
-    await db.commit()
+    if alignments is not None:
+        await _replace_alignments(db, source_id, alignments)
+
+    if updates or alignments is not None:
+        await db.commit()
 
     cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
     row = await cursor.fetchone()
-    return dict(row)
+    result = dict(row)
+    result["is_global"] = result.get("tenant_id") is None
+    result["state_affiliated"] = bool(result.get("state_affiliated"))
+    alignments_map = await _load_alignments_for(db, [source_id])
+    result["alignments"] = alignments_map.get(source_id, [])
+    return result
 
 
 @router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)

From 715af17ac3f85f95ae9ffe706a80b49f0147aa6e Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 18:37:09 +0000
Subject: [PATCH 03/15] feat(sources): UI fuer Quellen-Klassifikation (Filter,
 Badges, Edit-Form)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Quellen-Modal: 4 neue Filter (Politik, Medientyp, Reliability, Alignment).
- Edit-Form: Selects fuer political_orientation/media_type/reliability,
  Multi-Select-Chips fuer alignments, Toggle state_affiliated, Country-Code-Input.
- renderSourceGroup: Politik-Badge mit DACH-Farbskala (rot=L, blau=R),
  Reliability-Punkt (gruen→rot), Alignment-Tags, state-affiliated-Indikator.
  Tooltip um alle 4 Achsen erweitert.
- CSS-Block fuer alle neuen Badge-/Chip-Styles.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/static/css/style.css    | 111 +++++++++++++++++++++++++++
 src/static/dashboard.html   | 147 ++++++++++++++++++++++++++++++++++++
 src/static/js/app.js        |  65 ++++++++++++++++
 src/static/js/components.js | 112 ++++++++++++++++++++++++++-
 4 files changed, 431 insertions(+), 4 deletions(-)

diff --git a/src/static/css/style.css b/src/static/css/style.css
index f232fac..3bc671c 100644
--- a/src/static/css/style.css
+++ b/src/static/css/style.css
@@ -3503,6 +3503,117 @@ a.dev-source-pill:hover {
     color: var(--info);
 }
 
+/* Klassifikations-Badges (politisch / reliability / alignments / state) */
+.source-classification-badges {
+    display: inline-flex;
+    align-items: center;
+    gap: 4px;
+    flex-wrap: wrap;
+}
+
+.source-political-badge {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    min-width: 22px;
+    padding: 2px 6px;
+    border-radius: var(--radius);
+    font-size: 10px;
+    font-weight: 700;
+    letter-spacing: 0.4px;
+    color: #fff;
+    background: #9e9e9e;
+}
+.source-political-badge.pol-links_extrem { background: #b71c1c; }
+.source-political-badge.pol-links { background: #e53935; }
+.source-political-badge.pol-mitte_links { background: #ef9a9a; color: #4a0d0d; }
+.source-political-badge.pol-liberal { background: #fdd835; color: #4a3700; }
+.source-political-badge.pol-mitte { background: #9e9e9e; }
+.source-political-badge.pol-konservativ { background: #90caf9; color: #0d2740; }
+.source-political-badge.pol-mitte_rechts { background: #5c6bc0; }
+.source-political-badge.pol-rechts { background: #1976d2; }
+.source-political-badge.pol-rechts_extrem { background: #0d47a1; }
+
+.source-reliability-dot {
+    display: inline-block;
+    width: 10px;
+    height: 10px;
+    border-radius: 50%;
+    background: #9e9e9e;
+    border: 1px solid rgba(0, 0, 0, 0.15);
+}
+.source-reliability-dot.rel-sehr_hoch { background: #2e7d32; }
+.source-reliability-dot.rel-hoch { background: #66bb6a; }
+.source-reliability-dot.rel-gemischt { background: #fbc02d; }
+.source-reliability-dot.rel-niedrig { background: #ef6c00; }
+.source-reliability-dot.rel-sehr_niedrig { background: #c62828; }
+
+.source-state-badge {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    width: 18px;
+    height: 18px;
+    border-radius: 50%;
+    background: #4a148c;
+    color: #fff;
+    font-size: 11px;
+    line-height: 1;
+}
+
+.source-alignment-chip-badge {
+    display: inline-flex;
+    align-items: center;
+    padding: 1px 6px;
+    border-radius: 999px;
+    font-size: 10px;
+    font-weight: 500;
+    background: var(--cat-sonstige-bg, #eef);
+    color: var(--text-secondary, #555);
+    border: 1px solid rgba(0, 0, 0, 0.08);
+}
+
+/* Edit-Form: Klassifikations-Sektion */
+.sources-classification-section {
+    margin-top: 12px;
+    padding-top: 12px;
+    border-top: 1px solid var(--border-color, rgba(0,0,0,0.08));
+}
+.sources-classification-header {
+    font-size: 12px;
+    font-weight: 600;
+    color: var(--text-secondary, #555);
+    margin-bottom: 8px;
+    letter-spacing: 0.3px;
+    text-transform: uppercase;
+}
+.alignment-chips {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+}
+.alignment-chip {
+    display: inline-flex;
+    align-items: center;
+    padding: 4px 10px;
+    border-radius: 999px;
+    font-size: 11px;
+    font-weight: 500;
+    background: transparent;
+    color: var(--text-secondary, #555);
+    border: 1px solid var(--border-color, rgba(0,0,0,0.15));
+    cursor: pointer;
+    transition: all 0.12s ease;
+}
+.alignment-chip:hover {
+    background: var(--cat-sonstige-bg, #eef);
+}
+.alignment-chip.active {
+    background: var(--primary, #2a81cb);
+    color: #fff;
+    border-color: var(--primary, #2a81cb);
+}
+
 /* Typ-Badges */
 .source-type-badge {
     display: inline-flex;
diff --git a/src/static/dashboard.html b/src/static/dashboard.html
index 4737350..43a81dd 100644
--- a/src/static/dashboard.html
+++ b/src/static/dashboard.html
@@ -481,6 +481,70 @@
                             <option value="boulevard">Boulevard</option>
                             <option value="sonstige">Sonstige</option>
                         </select>
+                        <label for="sources-filter-political" class="sr-only">Politische Ausrichtung filtern</label>
+                        <select id="sources-filter-political" class="timeline-filter-select" onchange="App.filterSources()">
+                            <option value="">Alle Ausrichtungen</option>
+                            <option value="links_extrem">Links (extrem)</option>
+                            <option value="links">Links</option>
+                            <option value="mitte_links">Mitte-Links</option>
+                            <option value="liberal">Liberal</option>
+                            <option value="mitte">Mitte</option>
+                            <option value="konservativ">Konservativ</option>
+                            <option value="mitte_rechts">Mitte-Rechts</option>
+                            <option value="rechts">Rechts</option>
+                            <option value="rechts_extrem">Rechts (extrem)</option>
+                            <option value="na">Nicht eingeordnet</option>
+                        </select>
+                        <label for="sources-filter-mediatype" class="sr-only">Medientyp filtern</label>
+                        <select id="sources-filter-mediatype" class="timeline-filter-select" onchange="App.filterSources()">
+                            <option value="">Alle Medientypen</option>
+                            <option value="tageszeitung">Tageszeitung</option>
+                            <option value="wochenzeitung">Wochenzeitung</option>
+                            <option value="magazin">Magazin</option>
+                            <option value="tv_sender">TV-Sender</option>
+                            <option value="radio">Radio</option>
+                            <option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
+                            <option value="nachrichtenagentur">Nachrichtenagentur</option>
+                            <option value="online_only">Online-only</option>
+                            <option value="blog">Blog</option>
+                            <option value="telegram_kanal">Telegram-Kanal</option>
+                            <option value="telegram_bot">Telegram-Bot</option>
+                            <option value="podcast">Podcast</option>
+                            <option value="social_media">Social Media</option>
+                            <option value="imageboard">Imageboard</option>
+                            <option value="think_tank">Think Tank</option>
+                            <option value="ngo">NGO</option>
+                            <option value="behoerde">Behörde</option>
+                            <option value="staatsmedium">Staatsmedium</option>
+                            <option value="fachmedium">Fachmedium</option>
+                            <option value="sonstige">Sonstige</option>
+                        </select>
+                        <label for="sources-filter-reliability" class="sr-only">Glaubwürdigkeit filtern</label>
+                        <select id="sources-filter-reliability" class="timeline-filter-select" onchange="App.filterSources()">
+                            <option value="">Alle Glaubwürdigkeiten</option>
+                            <option value="sehr_hoch">Sehr hoch</option>
+                            <option value="hoch">Hoch</option>
+                            <option value="gemischt">Gemischt</option>
+                            <option value="niedrig">Niedrig</option>
+                            <option value="sehr_niedrig">Sehr niedrig</option>
+                            <option value="na">Nicht eingeordnet</option>
+                        </select>
+                        <label for="sources-filter-alignment" class="sr-only">Geopolitische Nähe filtern</label>
+                        <select id="sources-filter-alignment" class="timeline-filter-select" onchange="App.filterSources()">
+                            <option value="">Alle Nähen</option>
+                            <option value="prorussisch">Prorussisch</option>
+                            <option value="proiranisch">Proiranisch</option>
+                            <option value="prowestlich">Prowestlich</option>
+                            <option value="proukrainisch">Proukrainisch</option>
+                            <option value="prochinesisch">Prochinesisch</option>
+                            <option value="projapanisch">Projapanisch</option>
+                            <option value="proisraelisch">Proisraelisch</option>
+                            <option value="propalaestinensisch">Propalästinensisch</option>
+                            <option value="protuerkisch">Protürkisch</option>
+                            <option value="panarabisch">Panarabisch</option>
+                            <option value="neutral">Neutral</option>
+                            <option value="sonstige">Sonstige</option>
+                        </select>
                         <label for="sources-search" class="sr-only">Quellen durchsuchen</label>
                         <input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()">
                     </div>
@@ -548,6 +612,89 @@
                                 <input type="text" id="src-notes" placeholder="Optional">
                             </div>
                         </div>
+                        <div class="sources-classification-section">
+                            <div class="sources-classification-header">Einordnung</div>
+                            <div class="sources-add-form-grid">
+                                <div class="form-group">
+                                    <label for="src-political">Politische Ausrichtung</label>
+                                    <select id="src-political">
+                                        <option value="na">Nicht eingeordnet</option>
+                                        <option value="links_extrem">Links (extrem)</option>
+                                        <option value="links">Links</option>
+                                        <option value="mitte_links">Mitte-Links</option>
+                                        <option value="liberal">Liberal</option>
+                                        <option value="mitte">Mitte</option>
+                                        <option value="konservativ">Konservativ</option>
+                                        <option value="mitte_rechts">Mitte-Rechts</option>
+                                        <option value="rechts">Rechts</option>
+                                        <option value="rechts_extrem">Rechts (extrem)</option>
+                                    </select>
+                                </div>
+                                <div class="form-group">
+                                    <label for="src-mediatype">Medientyp</label>
+                                    <select id="src-mediatype">
+                                        <option value="sonstige">Sonstige</option>
+                                        <option value="tageszeitung">Tageszeitung</option>
+                                        <option value="wochenzeitung">Wochenzeitung</option>
+                                        <option value="magazin">Magazin</option>
+                                        <option value="tv_sender">TV-Sender</option>
+                                        <option value="radio">Radio</option>
+                                        <option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
+                                        <option value="nachrichtenagentur">Nachrichtenagentur</option>
+                                        <option value="online_only">Online-only</option>
+                                        <option value="blog">Blog</option>
+                                        <option value="telegram_kanal">Telegram-Kanal</option>
+                                        <option value="telegram_bot">Telegram-Bot</option>
+                                        <option value="podcast">Podcast</option>
+                                        <option value="social_media">Social Media</option>
+                                        <option value="imageboard">Imageboard</option>
+                                        <option value="think_tank">Think Tank</option>
+                                        <option value="ngo">NGO</option>
+                                        <option value="behoerde">Behörde</option>
+                                        <option value="staatsmedium">Staatsmedium</option>
+                                        <option value="fachmedium">Fachmedium</option>
+                                    </select>
+                                </div>
+                                <div class="form-group">
+                                    <label for="src-reliability">Glaubwürdigkeit</label>
+                                    <select id="src-reliability">
+                                        <option value="na">Nicht eingeordnet</option>
+                                        <option value="sehr_hoch">Sehr hoch</option>
+                                        <option value="hoch">Hoch</option>
+                                        <option value="gemischt">Gemischt</option>
+                                        <option value="niedrig">Niedrig</option>
+                                        <option value="sehr_niedrig">Sehr niedrig</option>
+                                    </select>
+                                </div>
+                                <div class="form-group">
+                                    <label for="src-country">Land (ISO 3166)</label>
+                                    <input type="text" id="src-country" maxlength="2" placeholder="z.B. DE, RU, US" style="text-transform:uppercase;">
+                                </div>
+                                <div class="form-group">
+                                    <label class="checkbox-label" style="display:flex;align-items:center;gap:8px;">
+                                        <input type="checkbox" id="src-state-affiliated">
+                                        <span>Staatsnah/-kontrolliert</span>
+                                    </label>
+                                </div>
+                            </div>
+                            <div class="form-group" style="margin-top:8px;">
+                                <label>Geopolitische Nähe (Mehrfachauswahl)</label>
+                                <div id="src-alignments-chips" class="alignment-chips" onclick="App.handleAlignmentChipClick(event)">
+                                    <button type="button" class="alignment-chip" data-alignment="prorussisch">prorussisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="proiranisch">proiranisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="prowestlich">prowestlich</button>
+                                    <button type="button" class="alignment-chip" data-alignment="proukrainisch">proukrainisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="prochinesisch">prochinesisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="projapanisch">projapanisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="proisraelisch">proisraelisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="propalaestinensisch">propalästinensisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="protuerkisch">protürkisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="panarabisch">panarabisch</button>
+                                    <button type="button" class="alignment-chip" data-alignment="neutral">neutral</button>
+                                    <button type="button" class="alignment-chip" data-alignment="sonstige">sonstige</button>
+                                </div>
+                            </div>
+                        </div>
                         <div class="sources-discovery-actions">
                             <button class="btn btn-primary btn-small" onclick="App.saveSource()">Speichern</button>
                             <button class="btn btn-secondary btn-small" onclick="App.toggleSourceForm(false)">Abbrechen</button>
diff --git a/src/static/js/app.js b/src/static/js/app.js
index 0e65c4f..1aff794 100644
--- a/src/static/js/app.js
+++ b/src/static/js/app.js
@@ -2750,6 +2750,10 @@ async handleRefresh() {
         // Filter anwenden
         const typeFilter = document.getElementById('sources-filter-type')?.value || '';
         const catFilter = document.getElementById('sources-filter-category')?.value || '';
+        const politicalFilter = document.getElementById('sources-filter-political')?.value || '';
+        const mediaTypeFilter = document.getElementById('sources-filter-mediatype')?.value || '';
+        const reliabilityFilter = document.getElementById('sources-filter-reliability')?.value || '';
+        const alignmentFilter = document.getElementById('sources-filter-alignment')?.value || '';
         const search = (document.getElementById('sources-search')?.value || '').toLowerCase();
 
         // Alle Quellen nach Domain gruppieren
@@ -2800,6 +2804,20 @@ async handleRefresh() {
                 if (!hasMatchingCat) continue;
             }
 
+            // Klassifikations-Filter
+            if (politicalFilter) {
+                if (!feeds.some(f => (f.political_orientation || 'na') === politicalFilter)) continue;
+            }
+            if (mediaTypeFilter) {
+                if (!feeds.some(f => (f.media_type || 'sonstige') === mediaTypeFilter)) continue;
+            }
+            if (reliabilityFilter) {
+                if (!feeds.some(f => (f.reliability || 'na') === reliabilityFilter)) continue;
+            }
+            if (alignmentFilter) {
+                if (!feeds.some(f => Array.isArray(f.alignments) && f.alignments.includes(alignmentFilter))) continue;
+            }
+
             // Suche
             if (search) {
                 const groupText = feeds.map(f =>
@@ -3054,6 +3072,13 @@ async handleRefresh() {
             document.getElementById('src-discover-btn').disabled = false;
             document.getElementById('src-discover-btn').textContent = 'Erkennen';
             document.getElementById('src-type-select').value = 'rss_feed';
+            // Klassifikations-Felder auf Default zurücksetzen
+            const polEl = document.getElementById('src-political'); if (polEl) polEl.value = 'na';
+            const mtEl = document.getElementById('src-mediatype'); if (mtEl) mtEl.value = 'sonstige';
+            const relEl = document.getElementById('src-reliability'); if (relEl) relEl.value = 'na';
+            const ccEl = document.getElementById('src-country'); if (ccEl) ccEl.value = '';
+            const saEl = document.getElementById('src-state-affiliated'); if (saEl) saEl.checked = false;
+            this._setAlignmentChips([]);
             // Save-Button Text zurücksetzen
             const saveBtn = document.querySelector('#src-discovery-result .sources-discovery-actions .btn-primary');
             if (saveBtn) saveBtn.textContent = 'Speichern';
@@ -3235,6 +3260,19 @@ async handleRefresh() {
             rss_url: source.url,
         };
 
+        // Klassifikations-Felder setzen
+        const polEl = document.getElementById('src-political');
+        if (polEl) polEl.value = source.political_orientation || 'na';
+        const mtEl = document.getElementById('src-mediatype');
+        if (mtEl) mtEl.value = source.media_type || 'sonstige';
+        const relEl = document.getElementById('src-reliability');
+        if (relEl) relEl.value = source.reliability || 'na';
+        const ccEl = document.getElementById('src-country');
+        if (ccEl) ccEl.value = source.country_code || '';
+        const saEl = document.getElementById('src-state-affiliated');
+        if (saEl) saEl.checked = !!source.state_affiliated;
+        this._setAlignmentChips(source.alignments || []);
+
         // Submit-Button-Text ändern
         const saveBtn = document.querySelector('#src-discovery-result .sources-discovery-actions .btn-primary');
         if (saveBtn) saveBtn.textContent = 'Quelle speichern';
@@ -3243,6 +3281,27 @@ async handleRefresh() {
         if (form) form.scrollIntoView({ behavior: 'smooth', block: 'start' });
     },
 
+    _setAlignmentChips(active) {
+        const chips = document.querySelectorAll('#src-alignments-chips .alignment-chip');
+        const set = new Set((active || []).map(a => (a || '').toLowerCase()));
+        chips.forEach(chip => {
+            if (set.has(chip.dataset.alignment)) chip.classList.add('active');
+            else chip.classList.remove('active');
+        });
+    },
+
+    _getAlignmentChips() {
+        return Array.from(document.querySelectorAll('#src-alignments-chips .alignment-chip.active'))
+            .map(chip => chip.dataset.alignment);
+    },
+
+    handleAlignmentChipClick(e) {
+        const chip = e.target.closest('.alignment-chip');
+        if (!chip) return;
+        e.preventDefault();
+        chip.classList.toggle('active');
+    },
+
     async saveSource() {
         const name = document.getElementById('src-name').value.trim();
         if (!name) {
@@ -3258,6 +3317,12 @@ async handleRefresh() {
             url: discovered.rss_url || (discovered.source_type === 'telegram_channel' ? (document.getElementById('src-domain').value || null) : null),
             domain: document.getElementById('src-domain').value.trim() || discovered.domain || null,
             notes: document.getElementById('src-notes').value.trim() || null,
+            political_orientation: document.getElementById('src-political')?.value || 'na',
+            media_type: document.getElementById('src-mediatype')?.value || 'sonstige',
+            reliability: document.getElementById('src-reliability')?.value || 'na',
+            country_code: (document.getElementById('src-country')?.value || '').trim().toUpperCase() || null,
+            state_affiliated: !!document.getElementById('src-state-affiliated')?.checked,
+            alignments: this._getAlignmentChips(),
         };
 
         if (!data.domain && discovered.domain) {
diff --git a/src/static/js/components.js b/src/static/js/components.js
index b32dce0..d0a2cd8 100644
--- a/src/static/js/components.js
+++ b/src/static/js/components.js
@@ -1062,6 +1062,85 @@ const UI = {
         'sonstige': 'Sonstige',
     },
 
+    _politicalLabels: {
+        links_extrem: { short: 'L+', full: 'Links (extrem)' },
+        links: { short: 'L', full: 'Links' },
+        mitte_links: { short: 'ML', full: 'Mitte-Links' },
+        liberal: { short: 'LIB', full: 'Liberal' },
+        mitte: { short: 'M', full: 'Mitte' },
+        konservativ: { short: 'KON', full: 'Konservativ' },
+        mitte_rechts: { short: 'MR', full: 'Mitte-Rechts' },
+        rechts: { short: 'R', full: 'Rechts' },
+        rechts_extrem: { short: 'R+', full: 'Rechts (extrem)' },
+        na: { short: '?', full: 'Nicht eingeordnet' },
+    },
+    _reliabilityLabels: {
+        sehr_hoch: 'Sehr hoch',
+        hoch: 'Hoch',
+        gemischt: 'Gemischt',
+        niedrig: 'Niedrig',
+        sehr_niedrig: 'Sehr niedrig',
+        na: 'Nicht eingeordnet',
+    },
+    _mediaTypeLabels: {
+        tageszeitung: 'Tageszeitung',
+        wochenzeitung: 'Wochenzeitung',
+        magazin: 'Magazin',
+        tv_sender: 'TV-Sender',
+        radio: 'Radio',
+        oeffentlich_rechtlich: 'Öffentlich-Rechtlich',
+        nachrichtenagentur: 'Nachrichtenagentur',
+        online_only: 'Online-only',
+        blog: 'Blog',
+        telegram_kanal: 'Telegram-Kanal',
+        telegram_bot: 'Telegram-Bot',
+        podcast: 'Podcast',
+        social_media: 'Social Media',
+        imageboard: 'Imageboard',
+        think_tank: 'Think Tank',
+        ngo: 'NGO',
+        behoerde: 'Behörde',
+        staatsmedium: 'Staatsmedium',
+        fachmedium: 'Fachmedium',
+        sonstige: 'Sonstige',
+    },
+    _alignmentLabels: {
+        prorussisch: 'prorussisch',
+        proiranisch: 'proiranisch',
+        prowestlich: 'prowestlich',
+        proukrainisch: 'proukrainisch',
+        prochinesisch: 'prochinesisch',
+        projapanisch: 'projapanisch',
+        proisraelisch: 'proisraelisch',
+        propalaestinensisch: 'propalästinensisch',
+        protuerkisch: 'protürkisch',
+        panarabisch: 'panarabisch',
+        neutral: 'neutral',
+        sonstige: 'sonstige',
+    },
+
+    _renderClassificationBadges(feed) {
+        const parts = [];
+        const pol = feed.political_orientation;
+        if (pol && pol !== 'na') {
+            const label = this._politicalLabels[pol] || { short: pol, full: pol };
+            parts.push(`<span class="source-political-badge pol-${this.escape(pol)}" title="${this.escape(label.full)}">${this.escape(label.short)}</span>`);
+        }
+        const rel = feed.reliability;
+        if (rel && rel !== 'na') {
+            parts.push(`<span class="source-reliability-dot rel-${this.escape(rel)}" title="Glaubwürdigkeit: ${this.escape(this._reliabilityLabels[rel] || rel)}" aria-label="Glaubwürdigkeit: ${this.escape(this._reliabilityLabels[rel] || rel)}"></span>`);
+        }
+        if (feed.state_affiliated) {
+            parts.push(`<span class="source-state-badge" title="Staatsnah/-kontrolliert" aria-label="Staatsnah">⚑</span>`);
+        }
+        const aligns = Array.isArray(feed.alignments) ? feed.alignments : [];
+        aligns.forEach(a => {
+            const label = this._alignmentLabels[a] || a;
+            parts.push(`<span class="source-alignment-chip-badge align-${this.escape(a)}">${this.escape(label)}</span>`);
+        });
+        return parts.join('');
+    },
+
     /**
      * Domain-Gruppe rendern (aufklappbar mit Feeds).
      */
@@ -1117,20 +1196,44 @@ const UI = {
             ? `<span class="source-feed-count">${feedCount} Feed${feedCount !== 1 ? 's' : ''}</span>`
             : '';
 
-        // Info-Button mit Tooltip (Typ, Sprache, Ausrichtung)
+        // Info-Button mit Tooltip (Typ, Sprache, Ausrichtung, Klassifikation)
         let infoButtonHtml = '';
         const firstFeed = feeds[0] || {};
-        const hasInfo = firstFeed.language || firstFeed.bias;
+        const hasInfo = firstFeed.language || firstFeed.bias
+            || (firstFeed.political_orientation && firstFeed.political_orientation !== 'na')
+            || (firstFeed.media_type && firstFeed.media_type !== 'sonstige')
+            || (firstFeed.reliability && firstFeed.reliability !== 'na')
+            || firstFeed.state_affiliated
+            || firstFeed.country_code
+            || (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0);
         if (hasInfo) {
-            const typeMap = { rss_feed: 'RSS-Feed', web_source: 'Web-Quelle', telegram_channel: 'Telegram-Kanal' };
+            const typeMap = { rss_feed: 'RSS-Feed', web_source: 'Web-Quelle', telegram_channel: 'Telegram-Kanal', podcast_feed: 'Podcast' };
             const lines = [];
             lines.push('Typ: ' + (typeMap[firstFeed.source_type] || firstFeed.source_type || 'Unbekannt'));
             if (firstFeed.language) lines.push('Sprache: ' + firstFeed.language);
-            if (firstFeed.bias) lines.push('Ausrichtung: ' + firstFeed.bias);
+            if (firstFeed.country_code) lines.push('Land: ' + firstFeed.country_code);
+            if (firstFeed.media_type && firstFeed.media_type !== 'sonstige') {
+                lines.push('Medientyp: ' + (this._mediaTypeLabels[firstFeed.media_type] || firstFeed.media_type));
+            }
+            if (firstFeed.political_orientation && firstFeed.political_orientation !== 'na') {
+                const pl = this._politicalLabels[firstFeed.political_orientation];
+                lines.push('Politisch: ' + (pl ? pl.full : firstFeed.political_orientation));
+            }
+            if (firstFeed.reliability && firstFeed.reliability !== 'na') {
+                lines.push('Glaubwürdigkeit: ' + (this._reliabilityLabels[firstFeed.reliability] || firstFeed.reliability));
+            }
+            if (firstFeed.state_affiliated) lines.push('Staatsnah: ja');
+            if (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0) {
+                const labels = firstFeed.alignments.map(a => this._alignmentLabels[a] || a);
+                lines.push('Geopolitische Nähe: ' + labels.join(', '));
+            }
+            if (firstFeed.bias) lines.push('Notiz: ' + firstFeed.bias);
             const tooltipText = this.escape(lines.join('\n'));
             infoButtonHtml = ` <span class="info-icon tooltip-below" data-tooltip="${tooltipText}"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span>`;
         }
 
+        const classificationBadges = this._renderClassificationBadges(firstFeed);
+
         return `<div class="source-group">
             <div class="source-group-header" ${toggleAttr}>
                 ${toggleIcon}
@@ -1138,6 +1241,7 @@ const UI = {
                     <span class="source-group-name">${this.escape(displayName)}</span>${infoButtonHtml}
                 </div>
                 <span class="source-category-badge cat-${feeds[0]?.category || 'sonstige'}">${catLabel}</span>
+                ${classificationBadges ? `<span class="source-classification-badges">${classificationBadges}</span>` : ''}
                 ${feedCountBadge}
                 <div class="source-group-actions" onclick="event.stopPropagation()">
                     ${!isGlobal && !hasMultiple && feeds[0]?.id ? `<button class="source-edit-btn" onclick="App.editSource(${feeds[0].id})" title="Bearbeiten" aria-label="Bearbeiten">&#9998;</button>` : ''}

From 62ba38ae46acc5db8df0eaa3e36d7e70b83e9948 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 18:46:54 +0000
Subject: [PATCH 04/15] feat(sources): LLM-Klassifikator + Review-API +
 Bulk-Migrationsskript

- src/services/source_classifier.py: classify_source(db, id) ruft Haiku mit
  strukturiertem Prompt (4 Achsen + state_affiliated + country + Konfidenz)
  und schreibt Vorschlaege in proposed_*-Spalten. bulk_classify(db, limit)
  iteriert sequenziell ueber unklassifizierte Quellen.

- API-Endpoints (alle hinter Auth, globale Quellen nur fuer org_admin):
  - GET  /api/sources/classification/stats
  - GET  /api/sources/classification/queue
  - POST /api/sources/{id}/classification/approve  (proposed_* -> echte Felder)
  - POST /api/sources/{id}/classification/reject   (proposed_* loeschen)
  - POST /api/sources/{id}/classification/reclassify (sofort, ~3-5s)
  - POST /api/sources/classification/bulk-classify  (BackgroundTask)

- scripts/migrate_sources_classification.py: CLI-Wrapper fuer Bulk-Migration
  zur einmaligen Erstbestueckung aller Bestandsquellen.

Sample-Test auf Staging steht aus.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/migrate_sources_classification.py |  64 +++++
 src/routers/sources.py                    | 241 +++++++++++++++++-
 src/services/source_classifier.py         | 295 ++++++++++++++++++++++
 3 files changed, 598 insertions(+), 2 deletions(-)
 create mode 100644 scripts/migrate_sources_classification.py
 create mode 100644 src/services/source_classifier.py

diff --git a/scripts/migrate_sources_classification.py b/scripts/migrate_sources_classification.py
new file mode 100644
index 0000000..3fab3fe
--- /dev/null
+++ b/scripts/migrate_sources_classification.py
@@ -0,0 +1,64 @@
+"""Einmalige LLM-Klassifikation aller noch unklassifizierten Quellen.
+
+Verwendung:
+    python3 scripts/migrate_sources_classification.py --limit 50
+    python3 scripts/migrate_sources_classification.py --limit 500     # Alle
+    python3 scripts/migrate_sources_classification.py --recheck-pending  # bereits Pending neu
+
+Schreibt Vorschlaege in proposed_*-Spalten. Approval erfolgt anschliessend
+ueber das Verwaltungs-UI / API (POST /api/sources/{id}/classification/approve).
+"""
+import argparse
+import asyncio
+import logging
+import sys
+from pathlib import Path
+
+# src/ in PYTHONPATH aufnehmen, wenn Skript direkt aufgerufen wird
+HERE = Path(__file__).resolve().parent
+SRC = HERE.parent / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+from database import get_db  # noqa: E402
+from services.source_classifier import bulk_classify  # noqa: E402
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
+)
+logger = logging.getLogger("migrate_sources")
+
+
+async def main():
+    parser = argparse.ArgumentParser(description="LLM-Klassifikation aller Quellen.")
+    parser.add_argument("--limit", type=int, default=50, help="Max. Quellen pro Lauf")
+    parser.add_argument(
+        "--recheck-pending",
+        action="store_true",
+        help="Auch Quellen mit classification_source='llm_pending' neu klassifizieren",
+    )
+    args = parser.parse_args()
+
+    db = await get_db()
+    try:
+        result = await bulk_classify(
+            db,
+            limit=args.limit,
+            only_unclassified=not args.recheck_pending,
+        )
+    finally:
+        await db.close()
+
+    print(f"Verarbeitet: {result['processed']}")
+    print(f"Erfolgreich: {result['success']}")
+    print(f"Fehler:      {len(result['errors'])}")
+    print(f"Kosten:      ${result['total_cost_usd']:.4f}")
+    if result["errors"]:
+        print("\nFehler-Details:")
+        for e in result["errors"][:10]:
+            print(f"  source_id={e['source_id']}: {e['error']}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/routers/sources.py b/src/routers/sources.py
index 9adade2..9907e8d 100644
--- a/src/routers/sources.py
+++ b/src/routers/sources.py
@@ -1,10 +1,12 @@
 """Sources-Router: Quellenverwaltung (Multi-Tenant)."""
+import json
 import logging
 from collections import defaultdict
-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
 from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
 from auth import get_current_user
-from database import db_dependency, refresh_source_counts
+from database import db_dependency, get_db, refresh_source_counts
+from services.source_classifier import bulk_classify, classify_source
 from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
 import aiosqlite
 
@@ -700,3 +702,238 @@ async def trigger_refresh_counts(
     """Artikelzaehler fuer alle Quellen neu berechnen."""
     await refresh_source_counts(db)
     return {"status": "ok"}
+
+
+# === Klassifikations-Review (LLM-Vorschlaege approve/reject/reclassify) ===
+
+def _require_admin_for_global(row: dict, current_user: dict):
+    """Globale Quellen (tenant_id IS NULL) duerfen nur org_admins approve-en/reclassify-en."""
+    if row.get("tenant_id") is None and current_user.get("role") != "org_admin":
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Globale Quellen koennen nur von Admins klassifiziert werden",
+        )
+
+
+@router.get("/classification/stats")
+async def classification_stats(
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Counts pro classification_source-Wert (global + eigene Org)."""
+    tenant_id = current_user.get("tenant_id")
+    cursor = await db.execute(
+        """SELECT classification_source, COUNT(*) as cnt
+           FROM sources
+           WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
+           GROUP BY classification_source""",
+        (tenant_id,),
+    )
+    by_source = {row["classification_source"] or "legacy": row["cnt"] for row in await cursor.fetchall()}
+    cursor = await db.execute(
+        """SELECT COUNT(*) as cnt FROM sources
+           WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
+           AND proposed_political_orientation IS NOT NULL""",
+        (tenant_id,),
+    )
+    pending = (await cursor.fetchone())["cnt"]
+    return {
+        "by_classification_source": by_source,
+        "pending_review": pending,
+        "total": sum(by_source.values()),
+    }
+
+
+@router.get("/classification/queue")
+async def classification_queue(
+    limit: int = 50,
+    min_confidence: float = 0.0,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Liefert Quellen mit nicht-leeren proposed_*-Spalten (Review-Queue)."""
+    tenant_id = current_user.get("tenant_id")
+    cursor = await db.execute(
+        """SELECT s.* FROM sources s
+           WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)
+             AND s.proposed_political_orientation IS NOT NULL
+             AND COALESCE(s.proposed_confidence, 0) >= ?
+           ORDER BY s.proposed_confidence DESC, s.proposed_at DESC
+           LIMIT ?""",
+        (tenant_id, min_confidence, limit),
+    )
+    rows = [dict(r) for r in await cursor.fetchall()]
+    alignments_map = await _load_alignments_for(db, [r["id"] for r in rows])
+    out = []
+    for d in rows:
+        try:
+            proposed_aligns = json.loads(d.get("proposed_alignments_json") or "[]")
+        except (json.JSONDecodeError, TypeError):
+            proposed_aligns = []
+        out.append({
+            "id": d["id"],
+            "name": d["name"],
+            "url": d.get("url"),
+            "domain": d.get("domain"),
+            "source_type": d.get("source_type"),
+            "category": d.get("category"),
+            "is_global": d.get("tenant_id") is None,
+            "current": {
+                "political_orientation": d.get("political_orientation"),
+                "media_type": d.get("media_type"),
+                "reliability": d.get("reliability"),
+                "state_affiliated": bool(d.get("state_affiliated")),
+                "country_code": d.get("country_code"),
+                "alignments": alignments_map.get(d["id"], []),
+                "classification_source": d.get("classification_source"),
+            },
+            "proposed": {
+                "political_orientation": d.get("proposed_political_orientation"),
+                "media_type": d.get("proposed_media_type"),
+                "reliability": d.get("proposed_reliability"),
+                "state_affiliated": bool(d.get("proposed_state_affiliated")),
+                "country_code": d.get("proposed_country_code"),
+                "alignments": proposed_aligns,
+                "confidence": d.get("proposed_confidence"),
+                "reasoning": d.get("proposed_reasoning"),
+                "proposed_at": d.get("proposed_at"),
+            },
+        })
+    return out
+
+
+async def _clear_proposed(db: aiosqlite.Connection, source_id: int):
+    """Loescht die proposed_*-Felder einer Quelle (ohne commit)."""
+    await db.execute(
+        """UPDATE sources SET
+            proposed_political_orientation = NULL,
+            proposed_media_type = NULL,
+            proposed_reliability = NULL,
+            proposed_state_affiliated = NULL,
+            proposed_country_code = NULL,
+            proposed_alignments_json = NULL,
+            proposed_confidence = NULL,
+            proposed_reasoning = NULL,
+            proposed_at = NULL
+        WHERE id = ?""",
+        (source_id,),
+    )
+
+
+@router.post("/{source_id}/classification/approve")
+async def approve_classification(
+    source_id: int,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Uebernimmt proposed_* in echte Felder, setzt classification_source='llm_approved'."""
+    cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
+    row = await cursor.fetchone()
+    if not row:
+        raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
+    src = dict(row)
+    _require_admin_for_global(src, current_user)
+
+    if src.get("proposed_political_orientation") is None:
+        raise HTTPException(status_code=400, detail="Keine LLM-Vorschlaege fuer diese Quelle vorhanden")
+
+    try:
+        proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
+    except (json.JSONDecodeError, TypeError):
+        proposed_aligns = []
+
+    await db.execute(
+        """UPDATE sources SET
+            political_orientation = ?,
+            media_type = ?,
+            reliability = ?,
+            state_affiliated = ?,
+            country_code = ?,
+            classification_source = 'llm_approved',
+            classified_at = CURRENT_TIMESTAMP
+        WHERE id = ?""",
+        (
+            src["proposed_political_orientation"],
+            src["proposed_media_type"],
+            src["proposed_reliability"],
+            1 if src.get("proposed_state_affiliated") else 0,
+            src.get("proposed_country_code"),
+            source_id,
+        ),
+    )
+    await _replace_alignments(db, source_id, [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS])
+    await _clear_proposed(db, source_id)
+    await db.commit()
+    return {"source_id": source_id, "status": "approved"}
+
+
+@router.post("/{source_id}/classification/reject")
+async def reject_classification(
+    source_id: int,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Verwirft die LLM-Vorschlaege ohne Uebernahme. classification_source bleibt unveraendert."""
+    cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
+    row = await cursor.fetchone()
+    if not row:
+        raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
+    src = dict(row)
+    _require_admin_for_global(src, current_user)
+
+    await _clear_proposed(db, source_id)
+    # Wenn classification_source noch 'llm_pending' war, zurueck auf 'legacy'
+    if src.get("classification_source") == "llm_pending":
+        await db.execute(
+            "UPDATE sources SET classification_source = 'legacy' WHERE id = ?",
+            (source_id,),
+        )
+    await db.commit()
+    return {"source_id": source_id, "status": "rejected"}
+
+
+@router.post("/{source_id}/classification/reclassify")
+async def reclassify_source(
+    source_id: int,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Triggert eine LLM-Klassifikation einer einzelnen Quelle (synchron, ~3-5s)."""
+    cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
+    row = await cursor.fetchone()
+    if not row:
+        raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
+    src = dict(row)
+    _require_admin_for_global(src, current_user)
+
+    try:
+        result = await classify_source(db, source_id)
+    except Exception as e:
+        logger.error("Reclassify source_id=%s fehlgeschlagen: %s", source_id, e, exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Klassifikation fehlgeschlagen: {e}")
+    return result
+
+
+async def _bulk_classify_background(limit: int, only_unclassified: bool):
+    """Hintergrund-Task: oeffnet eigene DB-Connection."""
+    db = await get_db()
+    try:
+        await bulk_classify(db, limit=limit, only_unclassified=only_unclassified)
+    finally:
+        await db.close()
+
+
+@router.post("/classification/bulk-classify")
+async def trigger_bulk_classify(
+    background_tasks: BackgroundTasks,
+    limit: int = 50,
+    only_unclassified: bool = True,
+    current_user: dict = Depends(get_current_user),
+):
+    """Startet eine Bulk-Klassifikation im Hintergrund (nur Admins)."""
+    if current_user.get("role") != "org_admin":
+        raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Klassifikation starten")
+    if limit < 1 or limit > 500:
+        raise HTTPException(status_code=400, detail="limit muss zwischen 1 und 500 liegen")
+    background_tasks.add_task(_bulk_classify_background, limit, only_unclassified)
+    return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
diff --git a/src/services/source_classifier.py b/src/services/source_classifier.py
new file mode 100644
index 0000000..c965958
--- /dev/null
+++ b/src/services/source_classifier.py
@@ -0,0 +1,295 @@
+"""Klassifiziert Quellen via Claude (Haiku) nach 4 Achsen + state_affiliated + country.
+
+Schreibt Vorschlaege in die proposed_*-Spalten von sources und setzt
+classification_source='llm_pending'. Approval erfolgt ueber separate Endpoints,
+die proposed_* in die echten Spalten kopieren.
+"""
+import asyncio
+import json
+import logging
+import re
+
+import aiosqlite
+
+from agents.claude_client import call_claude
+from config import CLAUDE_MODEL_FAST
+
+logger = logging.getLogger("osint.source_classifier")
+
+POLITICAL_VALUES = {
+    "links_extrem", "links", "mitte_links", "liberal", "mitte",
+    "konservativ", "mitte_rechts", "rechts", "rechts_extrem", "na",
+}
+MEDIA_TYPE_VALUES = {
+    "tageszeitung", "wochenzeitung", "magazin", "tv_sender", "radio",
+    "oeffentlich_rechtlich", "nachrichtenagentur", "online_only", "blog",
+    "telegram_kanal", "telegram_bot", "podcast", "social_media", "imageboard",
+    "think_tank", "ngo", "behoerde", "staatsmedium", "fachmedium", "sonstige",
+}
+RELIABILITY_VALUES = {"sehr_hoch", "hoch", "gemischt", "niedrig", "sehr_niedrig", "na"}
+ALIGNMENT_VALUES = {
+    "prorussisch", "proiranisch", "prowestlich", "proukrainisch",
+    "prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
+    "protuerkisch", "panarabisch", "neutral", "sonstige",
+}
+
+
+def _build_prompt(src: dict, sample_articles: list[dict]) -> str:
+    sample_text = ""
+    if sample_articles:
+        lines = []
+        for i, art in enumerate(sample_articles[:5], 1):
+            headline = (art.get("headline") or art.get("headline_de") or "").strip()
+            if headline:
+                lines.append(f"{i}. {headline[:200]}")
+        if lines:
+            sample_text = "\nLetzte Artikel/Headlines:\n" + "\n".join(lines)
+
+    return f"""Du bist ein OSINT-Analyst und klassifizierst Nachrichten- und Medienquellen fuer ein Lagebild-Monitoring-System (DACH-Raum).
+
+QUELLE:
+Name: {src.get('name')}
+URL: {src.get('url') or '-'}
+Domain: {src.get('domain') or '-'}
+Quellentyp: {src.get('source_type')}
+Bisherige Kategorie: {src.get('category')}
+Sprache: {src.get('language') or 'unbekannt'}
+Bisherige Notiz (Freitext): {src.get('bias') or '-'}{sample_text}
+
+AUFGABE: Klassifiziere die Quelle nach folgenden Achsen.
+
+1. political_orientation:
+   - links_extrem (z.B. linksunten.indymedia)
+   - links (klar links, z.B. junge Welt, taz)
+   - mitte_links (linksliberal/sozialdemokratisch, z.B. SZ, Spiegel)
+   - liberal (wirtschafts-/grünliberal, z.B. NZZ, Zeit)
+   - mitte (politisch neutral, Agentur, z.B. dpa, Reuters, tagesschau)
+   - konservativ (buergerlich-konservativ, z.B. FAZ, Welt)
+   - mitte_rechts (rechts-buergerlich, z.B. Tichys Einblick, Achgut)
+   - rechts (klar rechts, z.B. Junge Freiheit, EpochTimes)
+   - rechts_extrem (z.B. Compact, PI-News)
+   - na (nicht klassifizierbar: Behoerde, Fachmedium, Think Tank ohne klare politische Linie)
+
+2. media_type (genau einer):
+   tageszeitung, wochenzeitung, magazin, tv_sender, radio, oeffentlich_rechtlich,
+   nachrichtenagentur, online_only, blog, telegram_kanal, telegram_bot, podcast,
+   social_media, imageboard, think_tank, ngo, behoerde, staatsmedium, fachmedium, sonstige
+
+3. reliability:
+   - sehr_hoch (etablierte Qualitaet, Faktencheck: tagesschau, dpa, FAZ, Reuters)
+   - hoch (serioes mit gelegentlichen Schwaechen: taz, Welt, BILD bei harten News)
+   - gemischt (Mix Meinung/Einseitigkeit: Tichys Einblick, Achgut, Boulevard)
+   - niedrig (haeufig irrefuehrend, schwache Quellenarbeit: Junge Freiheit, EpochTimes)
+   - sehr_niedrig (bekannt fuer Desinformation/Verschwoerung: Compact, RT, Sputnik, PI-News)
+   - na (nicht bewertbar)
+
+4. alignments (Mehrfach, leeres Array wenn keine ausgepraegte Naehe):
+   prorussisch, proiranisch, prowestlich, proukrainisch, prochinesisch, projapanisch,
+   proisraelisch, propalaestinensisch, protuerkisch, panarabisch, neutral, sonstige
+
+5. state_affiliated (true/false): true wenn vom Staat finanziert/kontrolliert
+   (RT, Sputnik, CGTN, PressTV, Xinhua, TRT). Public Service Broadcaster
+   wie ARD/ZDF/BBC sind NICHT state_affiliated.
+
+6. country_code (ISO 3166-1 alpha-2): Heimatland (DE, AT, CH, RU, US, ...). null wenn unklar.
+
+7. confidence (0.0-1.0): 0.85+ fuer bekannte Outlets, 0.5-0.85 fuer mittelbekannt, <0.5 fuer unsicher.
+
+8. reasoning (1-2 Saetze): Kurze Begruendung der Hauptklassifikationen.
+
+WICHTIG:
+- Antworte AUSSCHLIESSLICH mit einem JSON-Objekt, kein Text drumherum.
+- Nutze ausschliesslich die genannten enum-Werte (snake_case).
+- Bei Unklarheit lieber `na` und niedrige confidence.
+
+JSON-Schema:
+{{
+  "political_orientation": "...",
+  "media_type": "...",
+  "reliability": "...",
+  "alignments": ["..."],
+  "state_affiliated": false,
+  "country_code": "DE",
+  "confidence": 0.9,
+  "reasoning": "..."
+}}"""
+
+
+async def _load_sample_articles(db: aiosqlite.Connection, name: str, domain: str | None, limit: int = 5) -> list[dict]:
+    """Laedt die letzten Headlines einer Quelle (per name oder Domain-Match)."""
+    rows: list = []
+    if name:
+        cursor = await db.execute(
+            "SELECT headline, headline_de FROM articles WHERE source = ? ORDER BY collected_at DESC LIMIT ?",
+            (name, limit),
+        )
+        rows = await cursor.fetchall()
+    if not rows and domain:
+        cursor = await db.execute(
+            "SELECT headline, headline_de FROM articles WHERE source_url LIKE ? ORDER BY collected_at DESC LIMIT ?",
+            (f"%{domain}%", limit),
+        )
+        rows = await cursor.fetchall()
+    return [dict(r) for r in rows]
+
+
+def _validate(parsed: dict) -> dict:
+    """Validiert + normalisiert eine LLM-Antwort gegen die Enums."""
+    pol = parsed.get("political_orientation", "na")
+    if pol not in POLITICAL_VALUES:
+        pol = "na"
+    mt = parsed.get("media_type", "sonstige")
+    if mt not in MEDIA_TYPE_VALUES:
+        mt = "sonstige"
+    rel = parsed.get("reliability", "na")
+    if rel not in RELIABILITY_VALUES:
+        rel = "na"
+    aligns_raw = parsed.get("alignments") or []
+    if not isinstance(aligns_raw, list):
+        aligns_raw = []
+    aligns = sorted({a for a in aligns_raw if isinstance(a, str) and a in ALIGNMENT_VALUES})
+    sa = bool(parsed.get("state_affiliated", False))
+    cc = parsed.get("country_code")
+    if isinstance(cc, str) and len(cc) == 2 and cc.isalpha():
+        cc = cc.upper()
+    else:
+        cc = None
+    try:
+        confidence = float(parsed.get("confidence", 0.5))
+        confidence = max(0.0, min(1.0, confidence))
+    except (TypeError, ValueError):
+        confidence = 0.5
+    reasoning = str(parsed.get("reasoning", ""))[:1000]
+    return {
+        "political_orientation": pol,
+        "media_type": mt,
+        "reliability": rel,
+        "alignments": aligns,
+        "state_affiliated": sa,
+        "country_code": cc,
+        "confidence": confidence,
+        "reasoning": reasoning,
+    }
+
+
+async def classify_source(
+    db: aiosqlite.Connection,
+    source_id: int,
+    sample_limit: int = 5,
+    model: str = CLAUDE_MODEL_FAST,
+) -> dict:
+    """Klassifiziert eine einzelne Quelle und schreibt die Vorschlaege in proposed_*-Spalten."""
+    cursor = await db.execute(
+        "SELECT id, name, url, domain, source_type, category, language, bias, "
+        "classification_source FROM sources WHERE id = ?",
+        (source_id,),
+    )
+    row = await cursor.fetchone()
+    if not row:
+        raise ValueError(f"Quelle {source_id} nicht gefunden")
+    src = dict(row)
+
+    sample = await _load_sample_articles(db, src["name"], src.get("domain"), sample_limit)
+    prompt = _build_prompt(src, sample)
+    response, usage = await call_claude(prompt, tools=None, model=model)
+
+    json_match = re.search(r"\{.*\}", response, re.DOTALL)
+    if not json_match:
+        raise ValueError(f"Keine JSON-Antwort von Claude fuer source_id={source_id}: {response[:200]}")
+    parsed = json.loads(json_match.group(0))
+    result = _validate(parsed)
+
+    # Nur classification_source auf 'llm_pending' setzen, wenn nicht bereits manuell/approved
+    new_src = "CASE WHEN classification_source IN ('manual','llm_approved') THEN classification_source ELSE 'llm_pending' END"
+    await db.execute(
+        f"""UPDATE sources SET
+            proposed_political_orientation = ?,
+            proposed_media_type = ?,
+            proposed_reliability = ?,
+            proposed_state_affiliated = ?,
+            proposed_country_code = ?,
+            proposed_alignments_json = ?,
+            proposed_confidence = ?,
+            proposed_reasoning = ?,
+            proposed_at = CURRENT_TIMESTAMP,
+            classification_source = {new_src}
+        WHERE id = ?""",
+        (
+            result["political_orientation"],
+            result["media_type"],
+            result["reliability"],
+            1 if result["state_affiliated"] else 0,
+            result["country_code"],
+            json.dumps(result["alignments"], ensure_ascii=False),
+            result["confidence"],
+            result["reasoning"],
+            source_id,
+        ),
+    )
+    await db.commit()
+
+    logger.info(
+        "Klassifiziert source_id=%s '%s' -> %s/%s/%s conf=%.2f ($%.4f)",
+        source_id, src["name"], result["political_orientation"],
+        result["media_type"], result["reliability"], result["confidence"],
+        usage.cost_usd,
+    )
+
+    result["source_id"] = source_id
+    result["usage"] = {
+        "cost_usd": usage.cost_usd,
+        "input_tokens": usage.input_tokens,
+        "output_tokens": usage.output_tokens,
+    }
+    return result
+
+
+async def bulk_classify(
+    db: aiosqlite.Connection,
+    limit: int = 50,
+    only_unclassified: bool = True,
+    model: str = CLAUDE_MODEL_FAST,
+) -> dict:
+    """Klassifiziert noch unklassifizierte Quellen (sequenziell).
+
+    Args:
+        limit: Maximale Anzahl Quellen pro Aufruf
+        only_unclassified: Wenn True, nur classification_source='legacy'.
+                           Wenn False, auch 'llm_pending' neu klassifizieren.
+    """
+    if only_unclassified:
+        where = "classification_source = 'legacy'"
+    else:
+        where = "classification_source IN ('legacy', 'llm_pending')"
+    cursor = await db.execute(
+        f"SELECT id FROM sources WHERE {where} AND status = 'active' "
+        f"AND source_type != 'excluded' ORDER BY id LIMIT ?",
+        (limit,),
+    )
+    ids = [row["id"] for row in await cursor.fetchall()]
+
+    total_cost = 0.0
+    success = 0
+    errors: list[dict] = []
+
+    for sid in ids:
+        try:
+            r = await classify_source(db, sid, model=model)
+            total_cost += r["usage"]["cost_usd"]
+            success += 1
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            logger.error("Klassifikation source_id=%s fehlgeschlagen: %s", sid, e, exc_info=True)
+            errors.append({"source_id": sid, "error": str(e)})
+
+    logger.info(
+        "Bulk-Klassifikation fertig: %d/%d erfolgreich, $%.4f Kosten, %d Fehler",
+        success, len(ids), total_cost, len(errors),
+    )
+    return {
+        "processed": len(ids),
+        "success": success,
+        "errors": errors,
+        "total_cost_usd": total_cost,
+    }

From 48a60d7579a32c9eb74e2d97a34ad69671684603 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 19:00:47 +0000
Subject: [PATCH 05/15] feat(sources): Review-Queue-UI fuer
 LLM-Klassifikations-Vorschlaege (Admin)

- Tab-Schalter im Quellen-Modal: "Quellenliste" vs. "Klassifikations-Review"
  (Review-Tab nur fuer org_admin sichtbar, mit Pending-Counter-Badge).
- Review-Karten zeigen Diff aktueller Wert -> LLM-Vorschlag pro Achse,
  Konfidenz-Indikator (gruen/gelb/rot), LLM-Begruendung, Buttons fuer
  Uebernehmen / Verwerfen / Neu klassifizieren.
- Toolbar: Konfidenz-Filter, "Klassifikation starten" (Bulk im Hintergrund),
  "Alle >= 0.85 genehmigen" (Bulk-Approve).
- API-Wrapper in api.js fuer alle 6 neuen Endpoints + erweiterte listSources-Filter.
- Backend-Endpoint POST /api/sources/classification/bulk-approve (Admin-only).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/routers/sources.py      |  59 +++++++++++
 src/static/css/style.css    | 198 ++++++++++++++++++++++++++++++++++++
 src/static/dashboard.html   |  38 +++++++
 src/static/js/api.js        |  33 ++++++
 src/static/js/app.js        | 112 ++++++++++++++++++++
 src/static/js/components.js |  65 ++++++++++++
 6 files changed, 505 insertions(+)

diff --git a/src/routers/sources.py b/src/routers/sources.py
index 9907e8d..25a898f 100644
--- a/src/routers/sources.py
+++ b/src/routers/sources.py
@@ -937,3 +937,62 @@ async def trigger_bulk_classify(
         raise HTTPException(status_code=400, detail="limit muss zwischen 1 und 500 liegen")
     background_tasks.add_task(_bulk_classify_background, limit, only_unclassified)
     return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
+
+
+@router.post("/classification/bulk-approve")
+async def bulk_approve_classifications(
+    min_confidence: float = 0.85,
+    current_user: dict = Depends(get_current_user),
+    db: aiosqlite.Connection = Depends(db_dependency),
+):
+    """Genehmigt alle Pending-Vorschlaege ueber dem confidence-Schwellwert (nur Admins).
+
+    Globale Quellen werden nur bearbeitet, wenn der Aufrufer org_admin ist;
+    Tenant-eigene Quellen sowieso.
+    """
+    if current_user.get("role") != "org_admin":
+        raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Approve nutzen")
+    tenant_id = current_user.get("tenant_id")
+    cursor = await db.execute(
+        """SELECT id, proposed_political_orientation, proposed_media_type,
+                  proposed_reliability, proposed_state_affiliated,
+                  proposed_country_code, proposed_alignments_json, tenant_id
+           FROM sources
+           WHERE proposed_political_orientation IS NOT NULL
+             AND COALESCE(proposed_confidence, 0) >= ?
+             AND (tenant_id IS NULL OR tenant_id = ?)""",
+        (min_confidence, tenant_id),
+    )
+    rows = [dict(r) for r in await cursor.fetchall()]
+    approved_ids: list[int] = []
+    for src in rows:
+        try:
+            proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
+        except (json.JSONDecodeError, TypeError):
+            proposed_aligns = []
+        await db.execute(
+            """UPDATE sources SET
+                political_orientation = ?,
+                media_type = ?,
+                reliability = ?,
+                state_affiliated = ?,
+                country_code = ?,
+                classification_source = 'llm_approved',
+                classified_at = CURRENT_TIMESTAMP
+            WHERE id = ?""",
+            (
+                src["proposed_political_orientation"],
+                src["proposed_media_type"],
+                src["proposed_reliability"],
+                1 if src.get("proposed_state_affiliated") else 0,
+                src.get("proposed_country_code"),
+                src["id"],
+            ),
+        )
+        await _replace_alignments(
+            db, src["id"], [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS]
+        )
+        await _clear_proposed(db, src["id"])
+        approved_ids.append(src["id"])
+    await db.commit()
+    return {"approved_count": len(approved_ids), "min_confidence": min_confidence}
diff --git a/src/static/css/style.css b/src/static/css/style.css
index 3bc671c..777d490 100644
--- a/src/static/css/style.css
+++ b/src/static/css/style.css
@@ -3503,6 +3503,204 @@ a.dev-source-pill:hover {
     color: var(--info);
 }
 
+/* Sources-Modal: Tabs */
+.sources-tabs {
+    display: flex;
+    gap: 2px;
+    border-bottom: 1px solid var(--border-color, rgba(0,0,0,0.1));
+    margin-bottom: 12px;
+}
+.sources-tab {
+    background: transparent;
+    border: none;
+    padding: 8px 16px;
+    font-size: 13px;
+    font-weight: 500;
+    color: var(--text-secondary, #555);
+    cursor: pointer;
+    border-bottom: 2px solid transparent;
+    margin-bottom: -1px;
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+}
+.sources-tab:hover {
+    color: var(--text-primary, #222);
+}
+.sources-tab.active {
+    color: var(--primary, #2a81cb);
+    border-bottom-color: var(--primary, #2a81cb);
+}
+.sources-tab-badge {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    min-width: 20px;
+    padding: 0 6px;
+    height: 18px;
+    border-radius: 9px;
+    background: var(--primary, #2a81cb);
+    color: #fff;
+    font-size: 10px;
+    font-weight: 700;
+}
+
+/* Review-Queue */
+.review-toolbar {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 8px 12px;
+    background: var(--cat-sonstige-bg, #f6f6fa);
+    border-radius: var(--radius);
+    margin-bottom: 12px;
+    flex-wrap: wrap;
+    gap: 12px;
+}
+.review-toolbar-info {
+    display: flex;
+    align-items: center;
+    gap: 16px;
+    font-size: 13px;
+}
+.review-conf-filter {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    font-size: 12px;
+    color: var(--text-secondary, #555);
+}
+.review-conf-filter select {
+    padding: 2px 6px;
+    font-size: 12px;
+    border-radius: var(--radius);
+    border: 1px solid var(--border-color, rgba(0,0,0,0.15));
+}
+.review-toolbar-actions {
+    display: flex;
+    gap: 6px;
+}
+
+.review-list {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+.review-card {
+    background: var(--surface, #fff);
+    border: 1px solid var(--border-color, rgba(0,0,0,0.08));
+    border-radius: var(--radius);
+    padding: 12px 14px;
+}
+.review-card-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: flex-start;
+    gap: 12px;
+    margin-bottom: 10px;
+}
+.review-card-title {
+    display: flex;
+    flex-wrap: wrap;
+    align-items: center;
+    gap: 8px;
+}
+.review-card-name {
+    font-weight: 600;
+    font-size: 14px;
+}
+.review-card-domain {
+    font-size: 11px;
+    color: var(--text-disabled, #888);
+}
+.review-global-badge {
+    display: inline-flex;
+    align-items: center;
+    padding: 1px 6px;
+    border-radius: var(--radius);
+    background: #5e35b1;
+    color: #fff;
+    font-size: 9px;
+    font-weight: 600;
+    letter-spacing: 0.3px;
+    text-transform: uppercase;
+}
+.review-card-confidence {
+    display: inline-flex;
+    flex-direction: column;
+    align-items: center;
+    padding: 4px 10px;
+    border-radius: var(--radius);
+    min-width: 60px;
+}
+.review-card-confidence .conf-value {
+    font-size: 14px;
+    font-weight: 700;
+}
+.review-card-confidence .conf-label {
+    font-size: 9px;
+    text-transform: uppercase;
+    letter-spacing: 0.3px;
+    opacity: 0.8;
+}
+.review-card-confidence.conf-high { background: #e8f5e9; color: #2e7d32; }
+.review-card-confidence.conf-medium { background: #fff8e1; color: #ef6c00; }
+.review-card-confidence.conf-low { background: #ffebee; color: #c62828; }
+
+.review-card-diff {
+    display: grid;
+    grid-template-columns: 1fr;
+    gap: 4px;
+    font-size: 12px;
+    margin-bottom: 10px;
+}
+.review-diff-row {
+    display: grid;
+    grid-template-columns: 110px 1fr 24px 1fr;
+    align-items: center;
+    gap: 8px;
+    padding: 3px 6px;
+    border-radius: 3px;
+}
+.review-diff-row.changed {
+    background: #fff8e1;
+}
+.review-diff-label {
+    color: var(--text-secondary, #555);
+    font-weight: 500;
+}
+.review-diff-current {
+    color: var(--text-disabled, #888);
+}
+.review-diff-arrow {
+    text-align: center;
+    color: var(--text-disabled, #888);
+    font-weight: 600;
+}
+.review-diff-proposed {
+    color: var(--text-primary, #222);
+    font-weight: 500;
+}
+.review-diff-row.changed .review-diff-proposed {
+    color: #ef6c00;
+    font-weight: 600;
+}
+
+.review-card-reasoning {
+    font-size: 12px;
+    color: var(--text-secondary, #555);
+    background: var(--cat-sonstige-bg, #f6f6fa);
+    padding: 8px 10px;
+    border-radius: var(--radius);
+    margin-bottom: 10px;
+    line-height: 1.5;
+}
+.review-card-actions {
+    display: flex;
+    gap: 6px;
+    flex-wrap: wrap;
+}
+
 /* Klassifikations-Badges (politisch / reliability / alignments / state) */
 .source-classification-badges {
     display: inline-flex;
diff --git a/src/static/dashboard.html b/src/static/dashboard.html
index 43a81dd..8e73d59 100644
--- a/src/static/dashboard.html
+++ b/src/static/dashboard.html
@@ -456,6 +456,15 @@
                 <!-- Stats-Leiste -->
                 <div class="sources-stats-bar" id="sources-stats-bar"></div>
 
+                <!-- Tabs: Liste vs. Klassifikations-Review -->
+                <div class="sources-tabs" role="tablist">
+                    <button type="button" class="sources-tab active" id="sources-tab-list" role="tab" aria-selected="true" onclick="App.switchSourcesTab('list')">Quellenliste</button>
+                    <button type="button" class="sources-tab" id="sources-tab-review" role="tab" aria-selected="false" onclick="App.switchSourcesTab('review')" style="display:none;">Klassifikations-Review <span id="sources-review-count" class="sources-tab-badge">0</span></button>
+                </div>
+
+                <!-- View: Quellenliste -->
+                <div id="sources-list-view">
+
                 <!-- Toolbar -->
                 <div class="sources-toolbar">
                     <div class="sources-filters">
@@ -706,6 +715,35 @@
                 <div class="sources-list" id="sources-list">
                     <div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Quellen...</div>
                 </div>
+
+                </div>
+                <!-- /sources-list-view -->
+
+                <!-- View: Klassifikations-Review (Admin-only) -->
+                <div id="sources-review-view" style="display:none;">
+                    <div class="review-toolbar">
+                        <div class="review-toolbar-info">
+                            <span><strong id="review-pending-count">0</strong> Vorschlaege ausstehend</span>
+                            <label class="review-conf-filter">
+                                Mindest-Konfidenz:
+                                <select id="review-min-confidence" onchange="App.loadClassificationQueue()">
+                                    <option value="0">alle</option>
+                                    <option value="0.5">0.5+</option>
+                                    <option value="0.7">0.7+</option>
+                                    <option value="0.85">0.85+</option>
+                                    <option value="0.9">0.9+</option>
+                                </select>
+                            </label>
+                        </div>
+                        <div class="review-toolbar-actions">
+                            <button class="btn btn-small btn-secondary" onclick="App.triggerBulkClassify()" title="LLM-Klassifikation fuer noch unklassifizierte Quellen starten">+ Klassifikation starten</button>
+                            <button class="btn btn-small btn-primary" onclick="App.bulkApproveHighConfidence()" title="Alle Vorschlaege ueber dem Konfidenz-Schwellwert genehmigen">Alle &ge; 0.85 genehmigen</button>
+                        </div>
+                    </div>
+                    <div class="review-list" id="sources-review-list">
+                        <div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Review-Queue...</div>
+                    </div>
+                </div>
             </div>
         </div>
     </div>
diff --git a/src/static/js/api.js b/src/static/js/api.js
index 310476d..b2b1fd9 100644
--- a/src/static/js/api.js
+++ b/src/static/js/api.js
@@ -198,10 +198,43 @@ const API = {
         if (params.source_type) query.set('source_type', params.source_type);
         if (params.category) query.set('category', params.category);
         if (params.source_status) query.set('source_status', params.source_status);
+        if (params.political_orientation) query.set('political_orientation', params.political_orientation);
+        if (params.media_type) query.set('media_type', params.media_type);
+        if (params.reliability) query.set('reliability', params.reliability);
+        if (params.alignment) query.set('alignment', params.alignment);
+        if (params.state_affiliated !== undefined && params.state_affiliated !== null) {
+            query.set('state_affiliated', String(params.state_affiliated));
+        }
         const qs = query.toString();
         return this._request('GET', `/sources${qs ? '?' + qs : ''}`);
     },
 
+    // Sources: Klassifikations-Review (LLM)
+    getClassificationStats() {
+        return this._request('GET', '/sources/classification/stats');
+    },
+    getClassificationQueue(limit = 50, minConfidence = 0.0) {
+        const qs = new URLSearchParams({ limit: String(limit), min_confidence: String(minConfidence) }).toString();
+        return this._request('GET', `/sources/classification/queue?${qs}`);
+    },
+    approveClassification(id) {
+        return this._request('POST', `/sources/${id}/classification/approve`);
+    },
+    rejectClassification(id) {
+        return this._request('POST', `/sources/${id}/classification/reject`);
+    },
+    reclassifySource(id) {
+        return this._request('POST', `/sources/${id}/classification/reclassify`);
+    },
+    triggerBulkClassify(limit = 50, onlyUnclassified = true) {
+        const qs = new URLSearchParams({ limit: String(limit), only_unclassified: String(onlyUnclassified) }).toString();
+        return this._request('POST', `/sources/classification/bulk-classify?${qs}`);
+    },
+    bulkApproveClassifications(minConfidence = 0.85) {
+        const qs = new URLSearchParams({ min_confidence: String(minConfidence) }).toString();
+        return this._request('POST', `/sources/classification/bulk-approve?${qs}`);
+    },
+
     createSource(data) {
         return this._request('POST', '/sources', data);
     },
diff --git a/src/static/js/app.js b/src/static/js/app.js
index 1aff794..1f8d0b4 100644
--- a/src/static/js/app.js
+++ b/src/static/js/app.js
@@ -2702,6 +2702,12 @@ async handleRefresh() {
     async openSourceManagement() {
         openModal('modal-sources');
         await this.loadSources();
+        // Admin sieht den Review-Tab
+        const reviewTab = document.getElementById('sources-tab-review');
+        if (reviewTab && this.user && this.user.role === 'org_admin') {
+            reviewTab.style.display = '';
+            this._refreshReviewBadge().catch(() => {});
+        }
     },
 
     async loadSources() {
@@ -2722,6 +2728,112 @@ async handleRefresh() {
         }
     },
 
+    async _refreshReviewBadge() {
+        try {
+            const stats = await API.getClassificationStats();
+            const badge = document.getElementById('sources-review-count');
+            if (badge) badge.textContent = String(stats.pending_review || 0);
+        } catch (_) { /* still ok */ }
+    },
+
+    switchSourcesTab(tab) {
+        const listView = document.getElementById('sources-list-view');
+        const reviewView = document.getElementById('sources-review-view');
+        const tabList = document.getElementById('sources-tab-list');
+        const tabReview = document.getElementById('sources-tab-review');
+        if (!listView || !reviewView) return;
+        if (tab === 'review') {
+            listView.style.display = 'none';
+            reviewView.style.display = '';
+            if (tabList) { tabList.classList.remove('active'); tabList.setAttribute('aria-selected', 'false'); }
+            if (tabReview) { tabReview.classList.add('active'); tabReview.setAttribute('aria-selected', 'true'); }
+            this.loadClassificationQueue();
+        } else {
+            listView.style.display = '';
+            reviewView.style.display = 'none';
+            if (tabList) { tabList.classList.add('active'); tabList.setAttribute('aria-selected', 'true'); }
+            if (tabReview) { tabReview.classList.remove('active'); tabReview.setAttribute('aria-selected', 'false'); }
+        }
+    },
+
+    async loadClassificationQueue() {
+        const list = document.getElementById('sources-review-list');
+        if (!list) return;
+        const minConf = parseFloat(document.getElementById('review-min-confidence')?.value || '0');
+        list.innerHTML = '<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade...</div>';
+        try {
+            const items = await API.getClassificationQueue(200, minConf);
+            this._reviewItems = items;
+            const countEl = document.getElementById('review-pending-count');
+            if (countEl) countEl.textContent = String(items.length);
+            if (items.length === 0) {
+                list.innerHTML = '<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Keine ausstehenden Vorschlaege.</div>';
+                return;
+            }
+            list.innerHTML = items.map(item => UI.renderClassificationQueueItem(item)).join('');
+        } catch (err) {
+            list.innerHTML = `<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;color:var(--danger);">Fehler: ${err.message}</div>`;
+        }
+    },
+
+    async approveClassification(id) {
+        try {
+            await API.approveClassification(id);
+            UI.showToast('Klassifikation uebernommen.', 'success');
+            await this.loadClassificationQueue();
+            this._refreshReviewBadge();
+        } catch (err) {
+            UI.showToast('Approve fehlgeschlagen: ' + err.message, 'error');
+        }
+    },
+
+    async rejectClassification(id) {
+        try {
+            await API.rejectClassification(id);
+            UI.showToast('Vorschlag verworfen.', 'success');
+            await this.loadClassificationQueue();
+            this._refreshReviewBadge();
+        } catch (err) {
+            UI.showToast('Reject fehlgeschlagen: ' + err.message, 'error');
+        }
+    },
+
+    async reclassifySource(id) {
+        const btn = document.querySelector(`[data-reclassify-id="${id}"]`);
+        if (btn) { btn.disabled = true; btn.textContent = '...'; }
+        try {
+            await API.reclassifySource(id);
+            UI.showToast('Neu klassifiziert.', 'success');
+            await this.loadClassificationQueue();
+        } catch (err) {
+            UI.showToast('Reclassify fehlgeschlagen: ' + err.message, 'error');
+        } finally {
+            if (btn) { btn.disabled = false; btn.textContent = 'Neu klassifizieren'; }
+        }
+    },
+
+    async triggerBulkClassify() {
+        if (!confirm('Bulk-Klassifikation aller noch nicht klassifizierten Quellen starten? Lauft im Hintergrund (~3-5 Sek pro Quelle, ~0.02 USD pro Quelle).')) return;
+        try {
+            const r = await API.triggerBulkClassify(500, true);
+            UI.showToast(`Bulk-Klassifikation gestartet (limit=${r.limit}). Nachschauen mit Reload.`, 'info');
+        } catch (err) {
+            UI.showToast('Start fehlgeschlagen: ' + err.message, 'error');
+        }
+    },
+
+    async bulkApproveHighConfidence() {
+        if (!confirm('Alle Vorschlaege mit Konfidenz >= 0.85 genehmigen?')) return;
+        try {
+            const r = await API.bulkApproveClassifications(0.85);
+            UI.showToast(`${r.approved_count} Vorschlaege uebernommen.`, 'success');
+            await this.loadClassificationQueue();
+            this._refreshReviewBadge();
+        } catch (err) {
+            UI.showToast('Bulk-Approve fehlgeschlagen: ' + err.message, 'error');
+        }
+    },
+
     renderSourceStats(stats) {
         const bar = document.getElementById('sources-stats-bar');
         if (!bar) return;
diff --git a/src/static/js/components.js b/src/static/js/components.js
index d0a2cd8..338802e 100644
--- a/src/static/js/components.js
+++ b/src/static/js/components.js
@@ -1119,6 +1119,71 @@ const UI = {
         sonstige: 'sonstige',
     },
 
+    /**
+     * Eintrag in der Klassifikations-Review-Queue.
+     * Zeigt Diff zwischen aktuellem Wert und LLM-Vorschlag.
+     */
+    renderClassificationQueueItem(item) {
+        const cur = item.current || {};
+        const prop = item.proposed || {};
+        const conf = prop.confidence || 0;
+        const confPct = Math.round(conf * 100);
+        const confClass = conf >= 0.85 ? 'high' : (conf >= 0.7 ? 'medium' : 'low');
+
+        const diffRow = (label, currentVal, proposedVal, formatter) => {
+            const fmt = formatter || (v => v == null || v === '' ? '–' : String(v));
+            const c = fmt(currentVal);
+            const p = fmt(proposedVal);
+            const changed = c !== p;
+            return `<div class="review-diff-row${changed ? ' changed' : ''}">
+                <span class="review-diff-label">${this.escape(label)}</span>
+                <span class="review-diff-current">${this.escape(c)}</span>
+                <span class="review-diff-arrow">→</span>
+                <span class="review-diff-proposed">${this.escape(p)}</span>
+            </div>`;
+        };
+
+        const polFmt = v => (v && v !== 'na') ? (this._politicalLabels[v]?.full || v) : '–';
+        const mtFmt = v => (v && v !== 'sonstige') ? (this._mediaTypeLabels[v] || v) : (v === 'sonstige' ? 'Sonstige' : '–');
+        const relFmt = v => (v && v !== 'na') ? (this._reliabilityLabels[v] || v) : '–';
+        const stateFmt = v => v ? 'ja' : 'nein';
+        const ccFmt = v => v || '–';
+        const alignFmt = v => (Array.isArray(v) && v.length > 0)
+            ? v.map(a => this._alignmentLabels[a] || a).join(', ')
+            : '–';
+
+        const globalBadge = item.is_global ? '<span class="review-global-badge">Grundquelle</span>' : '';
+        const reasoning = prop.reasoning ? this.escape(prop.reasoning) : '';
+
+        return `<div class="review-card" data-source-id="${item.id}">
+            <div class="review-card-header">
+                <div class="review-card-title">
+                    <span class="review-card-name">${this.escape(item.name)}</span>
+                    ${globalBadge}
+                    <span class="review-card-domain">${this.escape(item.domain || '')}</span>
+                </div>
+                <div class="review-card-confidence conf-${confClass}" title="LLM-Konfidenz">
+                    <span class="conf-value">${confPct}%</span>
+                    <span class="conf-label">Konfidenz</span>
+                </div>
+            </div>
+            <div class="review-card-diff">
+                ${diffRow('Politik', cur.political_orientation, prop.political_orientation, polFmt)}
+                ${diffRow('Medientyp', cur.media_type, prop.media_type, mtFmt)}
+                ${diffRow('Glaubwürdigkeit', cur.reliability, prop.reliability, relFmt)}
+                ${diffRow('Staatsnah', cur.state_affiliated, prop.state_affiliated, stateFmt)}
+                ${diffRow('Land', cur.country_code, prop.country_code, ccFmt)}
+                ${diffRow('Geopol. Nähe', cur.alignments, prop.alignments, alignFmt)}
+            </div>
+            ${reasoning ? `<div class="review-card-reasoning"><strong>Begründung:</strong> ${reasoning}</div>` : ''}
+            <div class="review-card-actions">
+                <button class="btn btn-small btn-primary" onclick="App.approveClassification(${item.id})">Übernehmen</button>
+                <button class="btn btn-small btn-secondary" onclick="App.rejectClassification(${item.id})">Verwerfen</button>
+                <button class="btn btn-small btn-secondary" data-reclassify-id="${item.id}" onclick="App.reclassifySource(${item.id})">Neu klassifizieren</button>
+            </div>
+        </div>`;
+    },
+
     _renderClassificationBadges(feed) {
         const parts = [];
         const pol = feed.political_orientation;

From 5fc246755975368c2c0b66d4bd61268d6196bce9 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 19:40:30 +0000
Subject: [PATCH 06/15] feat(sources): externer Reputations-Layer (IFCN +
 EUvsDisinfo)

Externe Datenquellen (kostenlos, Open Data) ergaenzen die LLM-geschaetzte
Reliability-Achse mit objektiven Signalen:

- IFCN-Signatories (raw.githubusercontent.com/IFCN/verified-signatories):
  Plain-Text-Liste anerkannter Faktencheck-Organisationen.
- EUvsDisinfo (Zenodo CSV): Pro-Kreml-Desinformations-Datenbank.

Schema-Erweiterung:
- ifcn_signatory, eu_disinfo_listed, eu_disinfo_case_count,
  eu_disinfo_last_seen, external_data_synced_at.

Service src/services/external_reputation.py:
- sync_ifcn_signatories(), sync_eu_disinfo(), apply_reputation_overrides(),
  sync_all() mit Domain-Normalisierung (lowercase, ohne www., ohne Schema).

Reliability-Override-Regeln (laufen nach Approve und manuellem Sync):
- ifcn_signatory=1 -> reliability=sehr_hoch
- eu_disinfo_case_count >= 5 -> reliability=sehr_niedrig
- eu_disinfo_case_count >= 1 -> Reliability eine Stufe runter (max niedrig)

API: POST /api/sources/external-reputation/sync (Admin, BackgroundTask).
Filter: ?ifcn_signatory=true, ?eu_disinfo_listed=true.

UI:
- Filter-Dropdown "Externe Reputation" im Quellen-Modal.
- Badges: gruenes "IFCN" und rotes "EU-Desinfo (n)".
- Tooltip macht Reliability-Quelle transparent: "(IFCN-Faktenchecker)",
  "(EU-Desinfo, n Faelle)" oder "(LLM-Schaetzung)".
- "Externe Daten syncen"-Button im Review-Toolbar (Admin-only).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/database.py                     |  21 ++-
 src/models.py                       |   5 +
 src/routers/sources.py              |  42 +++++
 src/services/external_reputation.py | 268 ++++++++++++++++++++++++++++
 src/static/css/style.css            |  26 +++
 src/static/dashboard.html           |   7 +
 src/static/js/api.js                |   3 +
 src/static/js/app.js                |  16 ++
 src/static/js/components.js         |  25 ++-
 9 files changed, 410 insertions(+), 3 deletions(-)
 create mode 100644 src/services/external_reputation.py

diff --git a/src/database.py b/src/database.py
index 54d6b7e..b8d9366 100644
--- a/src/database.py
+++ b/src/database.py
@@ -176,7 +176,12 @@ CREATE TABLE IF NOT EXISTS sources (
     proposed_alignments_json TEXT,
     proposed_confidence REAL,
     proposed_reasoning TEXT,
-    proposed_at TIMESTAMP
+    proposed_at TIMESTAMP,
+    eu_disinfo_listed INTEGER DEFAULT 0,
+    eu_disinfo_case_count INTEGER DEFAULT 0,
+    eu_disinfo_last_seen TIMESTAMP,
+    ifcn_signatory INTEGER DEFAULT 0,
+    external_data_synced_at TIMESTAMP
 );
 
 CREATE TABLE IF NOT EXISTS source_alignments (
@@ -668,6 +673,20 @@ async def init_db():
         if any(c not in src_columns for c in ("political_orientation", "media_type", "reliability")):
             logger.info("Migration: Klassifikations-Spalten zu sources hinzugefuegt")
 
+        # Migration: externe Reputations-Daten (EUvsDisinfo + IFCN)
+        for col, ddl in [
+            ("eu_disinfo_listed", "ALTER TABLE sources ADD COLUMN eu_disinfo_listed INTEGER DEFAULT 0"),
+            ("eu_disinfo_case_count", "ALTER TABLE sources ADD COLUMN eu_disinfo_case_count INTEGER DEFAULT 0"),
+            ("eu_disinfo_last_seen", "ALTER TABLE sources ADD COLUMN eu_disinfo_last_seen TIMESTAMP"),
+            ("ifcn_signatory", "ALTER TABLE sources ADD COLUMN ifcn_signatory INTEGER DEFAULT 0"),
+            ("external_data_synced_at", "ALTER TABLE sources ADD COLUMN external_data_synced_at TIMESTAMP"),
+        ]:
+            if col not in src_columns:
+                await db.execute(ddl)
+                await db.commit()
+        if any(c not in src_columns for c in ("eu_disinfo_listed", "ifcn_signatory")):
+            logger.info("Migration: externe Reputations-Spalten zu sources hinzugefuegt")
+
         # Migration: source_alignments-Tabelle (Mehrfach-Tags fuer geopolitische Naehe)
         cursor = await db.execute(
             "SELECT name FROM sqlite_master WHERE type='table' AND name='source_alignments'"
diff --git a/src/models.py b/src/models.py
index 32d3bb7..7682310 100644
--- a/src/models.py
+++ b/src/models.py
@@ -210,6 +210,11 @@ class SourceResponse(BaseModel):
     classified_at: Optional[str] = None
     alignments: list[str] = []
     is_global: bool = False
+    ifcn_signatory: bool = False
+    eu_disinfo_listed: bool = False
+    eu_disinfo_case_count: int = 0
+    eu_disinfo_last_seen: Optional[str] = None
+    external_data_synced_at: Optional[str] = None
 
 
 # Source Discovery
diff --git a/src/routers/sources.py b/src/routers/sources.py
index 25a898f..e0f2014 100644
--- a/src/routers/sources.py
+++ b/src/routers/sources.py
@@ -6,6 +6,7 @@ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
 from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
 from auth import get_current_user
 from database import db_dependency, get_db, refresh_source_counts
+from services.external_reputation import apply_reputation_overrides, sync_all as sync_external_reputation
 from services.source_classifier import bulk_classify, classify_source
 from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
 import aiosqlite
@@ -90,6 +91,8 @@ async def list_sources(
     reliability: str = None,
     state_affiliated: bool = None,
     alignment: str = None,
+    ifcn_signatory: bool = None,
+    eu_disinfo_listed: bool = None,
     current_user: dict = Depends(get_current_user),
     db: aiosqlite.Connection = Depends(db_dependency),
 ):
@@ -124,6 +127,12 @@ async def list_sources(
     if alignment:
         query += " AND EXISTS (SELECT 1 FROM source_alignments sa WHERE sa.source_id = s.id AND sa.alignment = ?)"
         params.append(alignment.lower())
+    if ifcn_signatory is not None:
+        query += " AND s.ifcn_signatory = ?"
+        params.append(1 if ifcn_signatory else 0)
+    if eu_disinfo_listed is not None:
+        query += " AND s.eu_disinfo_listed = ?"
+        params.append(1 if eu_disinfo_listed else 0)
 
     query += " ORDER BY s.source_type, s.category, s.name"
     cursor = await db.execute(query, params)
@@ -133,6 +142,8 @@ async def list_sources(
     for d in results:
         d["is_global"] = d.get("tenant_id") is None
         d["state_affiliated"] = bool(d.get("state_affiliated"))
+        d["ifcn_signatory"] = bool(d.get("ifcn_signatory"))
+        d["eu_disinfo_listed"] = bool(d.get("eu_disinfo_listed"))
         d["alignments"] = alignments_map.get(d["id"], [])
     return results
 
@@ -864,6 +875,11 @@ async def approve_classification(
     await _replace_alignments(db, source_id, [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS])
     await _clear_proposed(db, source_id)
     await db.commit()
+    # Reliability-Override anwenden (IFCN/EUvsDisinfo)
+    try:
+        await apply_reputation_overrides(db, source_id)
+    except Exception as e:
+        logger.warning("Reputation-Override fuer source_id=%s fehlgeschlagen: %s", source_id, e)
     return {"source_id": source_id, "status": "approved"}
 
 
@@ -939,6 +955,26 @@ async def trigger_bulk_classify(
     return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
 
 
+@router.post("/external-reputation/sync")
+async def trigger_external_reputation_sync(
+    background_tasks: BackgroundTasks,
+    current_user: dict = Depends(get_current_user),
+):
+    """Startet Sync von IFCN- und EUvsDisinfo-Daten (Admin, Hintergrund)."""
+    if current_user.get("role") != "org_admin":
+        raise HTTPException(status_code=403, detail="Nur Admins koennen den externen Sync starten")
+
+    async def _bg():
+        db = await get_db()
+        try:
+            await sync_external_reputation(db)
+        finally:
+            await db.close()
+
+    background_tasks.add_task(_bg)
+    return {"status": "started"}
+
+
 @router.post("/classification/bulk-approve")
 async def bulk_approve_classifications(
     min_confidence: float = 0.85,
@@ -995,4 +1031,10 @@ async def bulk_approve_classifications(
         await _clear_proposed(db, src["id"])
         approved_ids.append(src["id"])
     await db.commit()
+    # Reliability-Override fuer alle gerade Approved
+    try:
+        for sid in approved_ids:
+            await apply_reputation_overrides(db, sid)
+    except Exception as e:
+        logger.warning("Bulk Reputation-Override fehlgeschlagen: %s", e)
     return {"approved_count": len(approved_ids), "min_confidence": min_confidence}
diff --git a/src/services/external_reputation.py b/src/services/external_reputation.py
new file mode 100644
index 0000000..1e900b0
--- /dev/null
+++ b/src/services/external_reputation.py
@@ -0,0 +1,268 @@
+"""Externe Reputations-Daten fuer Quellen.
+
+Synchronisiert Domain-Listen von oeffentlichen Reputations-/Faktencheck-Datenbanken
+und schreibt die Treffer in die sources-Spalten:
+
+- IFCN-Signatories (anerkannte Faktenchecker) -> ifcn_signatory
+- EUvsDisinfo (pro-Kreml-Desinformation, Zenodo-CSV) -> eu_disinfo_listed,
+  eu_disinfo_case_count, eu_disinfo_last_seen
+
+Anschliessend wendet apply_reputation_overrides() Override-Regeln auf die
+reliability-Spalte an:
+- ifcn_signatory=1 -> reliability='sehr_hoch'
+- eu_disinfo_case_count >= 5 -> reliability='sehr_niedrig'
+- eu_disinfo_case_count >= 1 -> reliability eine Stufe runter (max bis 'niedrig')
+"""
+import csv
+import io
+import logging
+from collections import defaultdict
+from urllib.parse import urlparse
+
+import aiosqlite
+import httpx
+
+logger = logging.getLogger("osint.external_reputation")
+
+IFCN_LIST_URL = "https://raw.githubusercontent.com/IFCN/verified-signatories/main/list"
+EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base.csv?download=1"
+
+HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
+
+# Reliability-Skala in Stufenfolge (schlecht -> gut)
+RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
+
+
+def _normalize_domain(raw: str | None) -> str | None:
+    """Normalisiert eine Domain: lowercase, ohne www., ohne Schema/Pfad."""
+    if not raw:
+        return None
+    raw = raw.strip().lower()
+    if not raw:
+        return None
+    # Falls eine vollstaendige URL uebergeben wurde
+    if "://" in raw:
+        try:
+            raw = urlparse(raw).netloc or raw
+        except ValueError:
+            pass
+    # Pfad/Query strippen
+    raw = raw.split("/")[0].split("?")[0].split("#")[0]
+    if raw.startswith("www."):
+        raw = raw[4:]
+    return raw or None
+
+
+async def _fetch_text(url: str) -> str:
+    """Laedt Text von einer URL. Wirft HTTPException bei Fehler."""
+    async with httpx.AsyncClient(timeout=HTTP_TIMEOUT, follow_redirects=True) as client:
+        resp = await client.get(url)
+        resp.raise_for_status()
+        return resp.text
+
+
+async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
+    """Laedt IFCN-Domain-Liste und matcht gegen sources.domain.
+
+    Setzt ifcn_signatory=1 wo die Domain in der Liste vorkommt, sonst 0.
+    """
+    text = await _fetch_text(IFCN_LIST_URL)
+    domains: set[str] = set()
+    for line in text.splitlines():
+        d = _normalize_domain(line)
+        if d:
+            domains.add(d)
+    logger.info("IFCN-Liste geladen: %d Domains", len(domains))
+
+    # Aktuelle Quellen mit Domain laden
+    cursor = await db.execute(
+        "SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
+    )
+    sources = [dict(r) for r in await cursor.fetchall()]
+
+    matched_ids: list[int] = []
+    unmatched_ids: list[int] = []
+    for s in sources:
+        nd = _normalize_domain(s["domain"])
+        if nd and nd in domains:
+            matched_ids.append(s["id"])
+        else:
+            unmatched_ids.append(s["id"])
+
+    # Bulk-Update in zwei Statements
+    if matched_ids:
+        placeholders = ",".join("?" for _ in matched_ids)
+        await db.execute(
+            f"UPDATE sources SET ifcn_signatory = 1 WHERE id IN ({placeholders})",
+            matched_ids,
+        )
+    if unmatched_ids:
+        placeholders = ",".join("?" for _ in unmatched_ids)
+        await db.execute(
+            f"UPDATE sources SET ifcn_signatory = 0 WHERE id IN ({placeholders})",
+            unmatched_ids,
+        )
+    await db.commit()
+    logger.info("IFCN-Sync: %d Quellen als Faktenchecker markiert (von %d)",
+                len(matched_ids), len(sources))
+    return {
+        "list_size": len(domains),
+        "sources_checked": len(sources),
+        "matched": len(matched_ids),
+    }
+
+
+async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
+    """Laedt EUvsDisinfo-CSV von Zenodo, aggregiert pro Domain, schreibt sources.
+
+    - eu_disinfo_listed: 1 wenn Domain mindestens 1x als 'disinformation' debunkt
+    - eu_disinfo_case_count: Anzahl Disinformation-Faelle
+    - eu_disinfo_last_seen: spaetestes debunk_date
+    """
+    text = await _fetch_text(EU_DISINFO_CSV_URL)
+    reader = csv.DictReader(io.StringIO(text))
+
+    # Per-Domain aggregieren (nur class='disinformation')
+    counts: dict[str, int] = defaultdict(int)
+    last_seen: dict[str, str] = {}
+    total_rows = 0
+    for row in reader:
+        total_rows += 1
+        if (row.get("class") or "").strip().lower() != "disinformation":
+            continue
+        d = _normalize_domain(row.get("article_domain"))
+        if not d:
+            continue
+        counts[d] += 1
+        debunk_date = (row.get("debunk_date") or "").strip()
+        if debunk_date:
+            prev = last_seen.get(d)
+            if not prev or debunk_date > prev:
+                last_seen[d] = debunk_date
+    logger.info("EUvsDisinfo-CSV: %d Zeilen, %d Domains mit Desinformation",
+                total_rows, len(counts))
+
+    # Quellen laden + matchen
+    cursor = await db.execute(
+        "SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
+    )
+    sources = [dict(r) for r in await cursor.fetchall()]
+
+    matched = 0
+    for s in sources:
+        nd = _normalize_domain(s["domain"])
+        if nd and nd in counts:
+            await db.execute(
+                """UPDATE sources SET
+                    eu_disinfo_listed = 1,
+                    eu_disinfo_case_count = ?,
+                    eu_disinfo_last_seen = ?
+                WHERE id = ?""",
+                (counts[nd], last_seen.get(nd), s["id"]),
+            )
+            matched += 1
+        else:
+            await db.execute(
+                """UPDATE sources SET
+                    eu_disinfo_listed = 0,
+                    eu_disinfo_case_count = 0,
+                    eu_disinfo_last_seen = NULL
+                WHERE id = ?""",
+                (s["id"],),
+            )
+    await db.commit()
+    logger.info("EUvsDisinfo-Sync: %d Quellen als Desinformations-Quelle markiert (von %d)",
+                matched, len(sources))
+    return {
+        "rows_in_csv": total_rows,
+        "domains_with_disinfo_in_csv": len(counts),
+        "sources_checked": len(sources),
+        "matched": matched,
+    }
+
+
+def _override_reliability(current: str | None, ifcn: bool, eu_count: int) -> str | None:
+    """Wendet Override-Regeln auf eine reliability-Stufe an.
+
+    Rueckgabe: neue Stufe (oder None, wenn unveraendert).
+    """
+    cur = current or "na"
+
+    # IFCN gewinnt: zertifizierter Faktenchecker -> sehr_hoch (immer)
+    if ifcn:
+        return "sehr_hoch" if cur != "sehr_hoch" else None
+
+    # EUvsDisinfo: Downgrade
+    if eu_count >= 5:
+        return "sehr_niedrig" if cur != "sehr_niedrig" else None
+    if eu_count >= 1:
+        # Eine Stufe runter, mindestens bis 'niedrig'
+        if cur == "na":
+            return "niedrig"
+        if cur in RELIABILITY_ORDER:
+            idx = RELIABILITY_ORDER.index(cur)
+            new_idx = max(0, idx - 1)
+            new = RELIABILITY_ORDER[new_idx]
+            # Mindeststufe 'niedrig' bei eu_count >= 1
+            if RELIABILITY_ORDER.index(new) > RELIABILITY_ORDER.index("niedrig"):
+                new = "niedrig"
+            return new if new != cur else None
+    return None
+
+
+async def apply_reputation_overrides(db: aiosqlite.Connection, source_id: int | None = None) -> dict:
+    """Wendet Reliability-Override-Regeln an.
+
+    Wenn source_id angegeben ist, nur fuer diese Quelle. Sonst fuer alle Quellen.
+    """
+    if source_id is not None:
+        cursor = await db.execute(
+            "SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count "
+            "FROM sources WHERE id = ?",
+            (source_id,),
+        )
+    else:
+        cursor = await db.execute(
+            "SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count FROM sources"
+        )
+    sources = [dict(r) for r in await cursor.fetchall()]
+
+    changed = 0
+    for s in sources:
+        new = _override_reliability(
+            s.get("reliability"),
+            bool(s.get("ifcn_signatory")),
+            int(s.get("eu_disinfo_case_count") or 0),
+        )
+        if new is not None:
+            await db.execute(
+                "UPDATE sources SET reliability = ? WHERE id = ?",
+                (new, s["id"]),
+            )
+            changed += 1
+    await db.commit()
+    logger.info("Reliability-Override: %d Quellen angepasst (von %d gepruefte)",
+                changed, len(sources))
+    return {"checked": len(sources), "changed": changed}
+
+
+async def sync_all(db: aiosqlite.Connection) -> dict:
+    """Vollstaendiger Sync: IFCN + EUvsDisinfo + Reliability-Override.
+
+    Setzt external_data_synced_at fuer alle Quellen.
+    """
+    ifcn_result = await sync_ifcn_signatories(db)
+    eu_result = await sync_eu_disinfo(db)
+    override_result = await apply_reputation_overrides(db)
+
+    await db.execute(
+        "UPDATE sources SET external_data_synced_at = CURRENT_TIMESTAMP "
+        "WHERE domain IS NOT NULL AND domain != ''"
+    )
+    await db.commit()
+
+    return {
+        "ifcn": ifcn_result,
+        "eu_disinfo": eu_result,
+        "override": override_result,
+    }
diff --git a/src/static/css/style.css b/src/static/css/style.css
index 777d490..4b03934 100644
--- a/src/static/css/style.css
+++ b/src/static/css/style.css
@@ -3759,6 +3759,32 @@ a.dev-source-pill:hover {
     line-height: 1;
 }
 
+.source-ifcn-badge {
+    display: inline-flex;
+    align-items: center;
+    padding: 1px 6px;
+    border-radius: var(--radius);
+    background: #e8f5e9;
+    color: #1b5e20;
+    border: 1px solid #66bb6a;
+    font-size: 10px;
+    font-weight: 600;
+    letter-spacing: 0.3px;
+}
+
+.source-eu-disinfo-badge {
+    display: inline-flex;
+    align-items: center;
+    padding: 1px 6px;
+    border-radius: var(--radius);
+    background: #ffebee;
+    color: #b71c1c;
+    border: 1px solid #c62828;
+    font-size: 10px;
+    font-weight: 600;
+    letter-spacing: 0.3px;
+}
+
 .source-alignment-chip-badge {
     display: inline-flex;
     align-items: center;
diff --git a/src/static/dashboard.html b/src/static/dashboard.html
index 8e73d59..f664cf9 100644
--- a/src/static/dashboard.html
+++ b/src/static/dashboard.html
@@ -538,6 +538,12 @@
                             <option value="sehr_niedrig">Sehr niedrig</option>
                             <option value="na">Nicht eingeordnet</option>
                         </select>
+                        <label for="sources-filter-extern" class="sr-only">Externe Reputation filtern</label>
+                        <select id="sources-filter-extern" class="timeline-filter-select" onchange="App.filterSources()">
+                            <option value="">Externe Reputation: alle</option>
+                            <option value="ifcn">IFCN-Faktenchecker</option>
+                            <option value="eu_disinfo">EU-Desinfo gelistet</option>
+                        </select>
                         <label for="sources-filter-alignment" class="sr-only">Geopolitische Nähe filtern</label>
                         <select id="sources-filter-alignment" class="timeline-filter-select" onchange="App.filterSources()">
                             <option value="">Alle Nähen</option>
@@ -736,6 +742,7 @@
                             </label>
                         </div>
                         <div class="review-toolbar-actions">
+                            <button class="btn btn-small btn-secondary" onclick="App.triggerExternalReputationSync()" title="IFCN-Faktenchecker-Liste und EUvsDisinfo-Daten synchronisieren">Externe Daten syncen</button>
                             <button class="btn btn-small btn-secondary" onclick="App.triggerBulkClassify()" title="LLM-Klassifikation fuer noch unklassifizierte Quellen starten">+ Klassifikation starten</button>
                             <button class="btn btn-small btn-primary" onclick="App.bulkApproveHighConfidence()" title="Alle Vorschlaege ueber dem Konfidenz-Schwellwert genehmigen">Alle &ge; 0.85 genehmigen</button>
                         </div>
diff --git a/src/static/js/api.js b/src/static/js/api.js
index b2b1fd9..427df61 100644
--- a/src/static/js/api.js
+++ b/src/static/js/api.js
@@ -234,6 +234,9 @@ const API = {
         const qs = new URLSearchParams({ min_confidence: String(minConfidence) }).toString();
         return this._request('POST', `/sources/classification/bulk-approve?${qs}`);
     },
+    triggerExternalReputationSync() {
+        return this._request('POST', '/sources/external-reputation/sync');
+    },
 
     createSource(data) {
         return this._request('POST', '/sources', data);
diff --git a/src/static/js/app.js b/src/static/js/app.js
index 1f8d0b4..13cf81a 100644
--- a/src/static/js/app.js
+++ b/src/static/js/app.js
@@ -2834,6 +2834,16 @@ async handleRefresh() {
         }
     },
 
+    async triggerExternalReputationSync() {
+        if (!confirm('IFCN- und EUvsDisinfo-Datenbanken jetzt syncen? Lauft im Hintergrund (~30 Sek).')) return;
+        try {
+            await API.triggerExternalReputationSync();
+            UI.showToast('Externer Sync gestartet. Quellenliste in 30 Sek neu laden.', 'info');
+        } catch (err) {
+            UI.showToast('Sync fehlgeschlagen: ' + err.message, 'error');
+        }
+    },
+
     renderSourceStats(stats) {
         const bar = document.getElementById('sources-stats-bar');
         if (!bar) return;
@@ -2866,6 +2876,7 @@ async handleRefresh() {
         const mediaTypeFilter = document.getElementById('sources-filter-mediatype')?.value || '';
         const reliabilityFilter = document.getElementById('sources-filter-reliability')?.value || '';
         const alignmentFilter = document.getElementById('sources-filter-alignment')?.value || '';
+        const externFilter = document.getElementById('sources-filter-extern')?.value || '';
         const search = (document.getElementById('sources-search')?.value || '').toLowerCase();
 
         // Alle Quellen nach Domain gruppieren
@@ -2929,6 +2940,11 @@ async handleRefresh() {
             if (alignmentFilter) {
                 if (!feeds.some(f => Array.isArray(f.alignments) && f.alignments.includes(alignmentFilter))) continue;
             }
+            if (externFilter === 'ifcn') {
+                if (!feeds.some(f => f.ifcn_signatory)) continue;
+            } else if (externFilter === 'eu_disinfo') {
+                if (!feeds.some(f => f.eu_disinfo_listed)) continue;
+            }
 
             // Suche
             if (search) {
diff --git a/src/static/js/components.js b/src/static/js/components.js
index 338802e..2ea7743 100644
--- a/src/static/js/components.js
+++ b/src/static/js/components.js
@@ -1193,7 +1193,20 @@ const UI = {
         }
         const rel = feed.reliability;
         if (rel && rel !== 'na') {
-            parts.push(`<span class="source-reliability-dot rel-${this.escape(rel)}" title="Glaubwürdigkeit: ${this.escape(this._reliabilityLabels[rel] || rel)}" aria-label="Glaubwürdigkeit: ${this.escape(this._reliabilityLabels[rel] || rel)}"></span>`);
+            const relLabel = this._reliabilityLabels[rel] || rel;
+            const relSource = feed.ifcn_signatory ? '(IFCN-Faktenchecker)'
+                : (feed.eu_disinfo_listed ? `(EU-Desinfo, ${feed.eu_disinfo_case_count || 0} Fälle)`
+                : '(LLM-Schätzung)');
+            const relTitle = `Glaubwürdigkeit: ${relLabel} ${relSource}`;
+            parts.push(`<span class="source-reliability-dot rel-${this.escape(rel)}" title="${this.escape(relTitle)}" aria-label="${this.escape(relTitle)}"></span>`);
+        }
+        if (feed.ifcn_signatory) {
+            parts.push(`<span class="source-ifcn-badge" title="IFCN-zertifizierter Faktenchecker" aria-label="IFCN-Faktenchecker">✓ IFCN</span>`);
+        }
+        if (feed.eu_disinfo_listed) {
+            const cnt = feed.eu_disinfo_case_count || 0;
+            const title = `EUvsDisinfo: ${cnt} dokumentierte Desinformations-Fälle`;
+            parts.push(`<span class="source-eu-disinfo-badge" title="${this.escape(title)}" aria-label="${this.escape(title)}">⚠ EU-Desinfo (${cnt})</span>`);
         }
         if (feed.state_affiliated) {
             parts.push(`<span class="source-state-badge" title="Staatsnah/-kontrolliert" aria-label="Staatsnah">⚑</span>`);
@@ -1285,7 +1298,15 @@ const UI = {
                 lines.push('Politisch: ' + (pl ? pl.full : firstFeed.political_orientation));
             }
             if (firstFeed.reliability && firstFeed.reliability !== 'na') {
-                lines.push('Glaubwürdigkeit: ' + (this._reliabilityLabels[firstFeed.reliability] || firstFeed.reliability));
+                const relLabel = this._reliabilityLabels[firstFeed.reliability] || firstFeed.reliability;
+                const relSrc = firstFeed.ifcn_signatory ? ' (IFCN-Faktenchecker)'
+                    : (firstFeed.eu_disinfo_listed ? ` (EU-Desinfo, ${firstFeed.eu_disinfo_case_count || 0} Fälle)`
+                    : ' (LLM-Schätzung)');
+                lines.push('Glaubwürdigkeit: ' + relLabel + relSrc);
+            }
+            if (firstFeed.ifcn_signatory) lines.push('IFCN-Faktenchecker: ja');
+            if (firstFeed.eu_disinfo_listed) {
+                lines.push(`EUvsDisinfo: ${firstFeed.eu_disinfo_case_count || 0} Fälle` + (firstFeed.eu_disinfo_last_seen ? ` (zuletzt ${firstFeed.eu_disinfo_last_seen})` : ''));
             }
             if (firstFeed.state_affiliated) lines.push('Staatsnah: ja');
             if (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0) {

From ff8a0531a4ff4485c92ca2760713b6acfe04b367 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Thu, 7 May 2026 19:44:07 +0000
Subject: [PATCH 07/15] fix(external_reputation): generische Plattform-Domains
 (t.me, twitter.com, ...) ignorieren

False positive bei sync_eu_disinfo: t.me wurde als Quelle markiert, weil
EUvsDisinfo anonyme Telegram-Posts unter der Plattform-Domain aggregiert.
Eine Allowlist von Plattform-Domains schliesst diese Falle aus.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/services/external_reputation.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/services/external_reputation.py b/src/services/external_reputation.py
index 1e900b0..de973b3 100644
--- a/src/services/external_reputation.py
+++ b/src/services/external_reputation.py
@@ -29,6 +29,20 @@ EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base
 
 HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
 
+# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen
+# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains).
+PLATFORM_DOMAINS = {
+    "t.me", "telegram.me", "telegram.org",
+    "twitter.com", "x.com", "mobile.twitter.com",
+    "youtube.com", "youtu.be", "m.youtube.com",
+    "facebook.com", "fb.com", "m.facebook.com",
+    "instagram.com", "tiktok.com", "vk.com", "ok.ru",
+    "rumble.com", "bitchute.com", "odysee.com",
+    "reddit.com", "old.reddit.com",
+    "wordpress.com", "blogspot.com", "medium.com",
+    "substack.com", "wixsite.com",
+}
+
 # Reliability-Skala in Stufenfolge (schlecht -> gut)
 RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
 
@@ -84,7 +98,7 @@ async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
     unmatched_ids: list[int] = []
     for s in sources:
         nd = _normalize_domain(s["domain"])
-        if nd and nd in domains:
+        if nd and nd not in PLATFORM_DOMAINS and nd in domains:
             matched_ids.append(s["id"])
         else:
             unmatched_ids.append(s["id"])
@@ -151,7 +165,7 @@ async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
     matched = 0
     for s in sources:
         nd = _normalize_domain(s["domain"])
-        if nd and nd in counts:
+        if nd and nd not in PLATFORM_DOMAINS and nd in counts:
             await db.execute(
                 """UPDATE sources SET
                     eu_disinfo_listed = 1,

From 897e56997c34451a4cd0a8ee7c7ff7fca94837b7 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 03:35:13 +0000
Subject: [PATCH 08/15] Mojibake fix: source_suggester.py + source_health.py
 via ftfy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Beide Files hatten Doppel-Encoded UTF-8 in Docstrings, Kommentaren und
Prompt-Strings (z.B. "prÃƒÂ¼ft" statt "prüft", "VorschlÃƒÂ¤ge" statt
"Vorschläge"). ftfy hat das automatisch repariert.

Hauptauswirkungen:
- Logs sind jetzt mit echten Umlauten lesbar
- Claude/Haiku-Prompts in source_suggester.py (Quellen-Vorschlaege via KI)
  bekommen jetzt korrekte deutsche Umlaute - sollte bessere Antworten geben

Daneben hat ftfy line-endings normalisiert, daher der grosse Diff in
source_health.py - inhaltlich nur Mojibake-Reparatur.

Verifiziert mit:
  grep -cE "Ã¤|Ã¶|Ã¼|ÃŸ|Ã„|Ã–|Ãœ" src/services/*.py
  -> 0 Treffer
---
 src/services/source_health.py    | 564 +++++++++++++++----------------
 src/services/source_suggester.py |  34 +-
 2 files changed, 299 insertions(+), 299 deletions(-)

diff --git a/src/services/source_health.py b/src/services/source_health.py
index e6ee799..0f073c9 100644
--- a/src/services/source_health.py
+++ b/src/services/source_health.py
@@ -1,282 +1,282 @@
-"""Quellen-Health-Check Engine - prÃ¼ft Erreichbarkeit, Feed-ValiditÃ¤t, Duplikate."""
-import asyncio
-import logging
-import json
-from urllib.parse import urlparse
-
-import httpx
-import feedparser
-import aiosqlite
-
-logger = logging.getLogger("osint.source_health")
-
-
-async def run_health_checks(db: aiosqlite.Connection) -> dict:
-    """FÃ¼hrt alle Health-Checks fÃ¼r aktive Grundquellen durch."""
-    logger.info("Starte Quellen-Health-Check...")
-
-    # Alle aktiven Grundquellen laden
-    cursor = await db.execute(
-        "SELECT id, name, url, domain, source_type, article_count, last_seen_at "
-        "FROM sources WHERE status = 'active' AND tenant_id IS NULL"
-    )
-    sources = [dict(row) for row in await cursor.fetchall()]
-
-    # Aktuelle Health-Check-Ergebnisse lÃ¶schen (werden neu geschrieben)
-    await db.execute("DELETE FROM source_health_checks")
-    await db.commit()
-
-    checks_done = 0
-    issues_found = 0
-
-    # 1. Erreichbarkeit + Feed-ValiditÃ¤t (nur Quellen mit URL)
-    sources_with_url = [s for s in sources if s["url"]]
-
-    async with httpx.AsyncClient(
-        timeout=15.0,
-        follow_redirects=True,
-        headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT-Monitor/1.0)"},
-    ) as client:
-        for i in range(0, len(sources_with_url), 5):
-            batch = sources_with_url[i:i + 5]
-            tasks = [_check_source_reachability(client, s) for s in batch]
-            results = await asyncio.gather(*tasks, return_exceptions=True)
-
-            for source, result in zip(batch, results):
-                if isinstance(result, Exception):
-                    await _save_check(
-                        db, source["id"], "reachability", "error",
-                        f"PrÃ¼fung fehlgeschlagen: {result}",
-                    )
-                    issues_found += 1
-                else:
-                    for check in result:
-                        await _save_check(
-                            db, source["id"], check["type"], check["status"],
-                            check["message"], check.get("details"),
-                        )
-                        if check["status"] != "ok":
-                            issues_found += 1
-                checks_done += 1
-
-    # 2. Veraltete Quellen (kein Artikel seit >30 Tagen)
-    for source in sources:
-        if source["source_type"] in ("excluded", "web_source"):
-            continue
-        stale_check = _check_stale(source)
-        if stale_check:
-            await _save_check(
-                db, source["id"], stale_check["type"],
-                stale_check["status"], stale_check["message"],
-            )
-            if stale_check["status"] != "ok":
-                issues_found += 1
-
-    # 3. Duplikate erkennen
-    duplicates = _find_duplicates(sources)
-    for dup in duplicates:
-        await _save_check(
-            db, dup["source_id"], "duplicate", "warning",
-            dup["message"], json.dumps(dup.get("details", {})),
-        )
-        issues_found += 1
-
-    await db.commit()
-    logger.info(
-        f"Health-Check abgeschlossen: {checks_done} Quellen geprÃ¼ft, "
-        f"{issues_found} Probleme gefunden"
-    )
-    return {"checked": checks_done, "issues": issues_found}
-
-
-async def _check_source_reachability(
-    client: httpx.AsyncClient, source: dict,
-) -> list[dict]:
-    """PrÃ¼ft Erreichbarkeit und Feed-ValiditÃ¤t einer Quelle."""
-    checks = []
-    url = source["url"]
-
-    try:
-        resp = await client.get(url)
-
-        if resp.status_code >= 400:
-            checks.append({
-                "type": "reachability",
-                "status": "error",
-                "message": f"HTTP {resp.status_code} - nicht erreichbar",
-                "details": json.dumps({"status_code": resp.status_code, "url": url}),
-            })
-            return checks
-
-        if resp.status_code >= 300:
-            checks.append({
-                "type": "reachability",
-                "status": "warning",
-                "message": f"HTTP {resp.status_code} - Weiterleitung",
-                "details": json.dumps({
-                    "status_code": resp.status_code,
-                    "final_url": str(resp.url),
-                }),
-            })
-        else:
-            checks.append({
-                "type": "reachability",
-                "status": "ok",
-                "message": "Erreichbar",
-            })
-
-        # Feed-ValiditÃ¤t nur fÃ¼r RSS-Feeds
-        if source["source_type"] == "rss_feed":
-            text = resp.text[:20000]
-            if "<rss" not in text and "<feed" not in text and "<channel" not in text:
-                checks.append({
-                    "type": "feed_validity",
-                    "status": "error",
-                    "message": "Kein gÃ¼ltiger RSS/Atom-Feed",
-                })
-            else:
-                feed = await asyncio.to_thread(feedparser.parse, text)
-                if feed.get("bozo") and not feed.entries:
-                    checks.append({
-                        "type": "feed_validity",
-                        "status": "error",
-                        "message": "Feed fehlerhaft (bozo)",
-                        "details": json.dumps({
-                            "bozo_exception": str(feed.get("bozo_exception", "")),
-                        }),
-                    })
-                elif not feed.entries:
-                    checks.append({
-                        "type": "feed_validity",
-                        "status": "warning",
-                        "message": "Feed erreichbar aber leer",
-                    })
-                else:
-                    checks.append({
-                        "type": "feed_validity",
-                        "status": "ok",
-                        "message": f"Feed gÃ¼ltig ({len(feed.entries)} EintrÃ¤ge)",
-                    })
-
-    except httpx.TimeoutException:
-        checks.append({
-            "type": "reachability",
-            "status": "error",
-            "message": "Timeout (15s)",
-        })
-    except httpx.ConnectError as e:
-        checks.append({
-            "type": "reachability",
-            "status": "error",
-            "message": f"Verbindung fehlgeschlagen: {e}",
-        })
-    except Exception as e:
-        checks.append({
-            "type": "reachability",
-            "status": "error",
-            "message": f"{type(e).__name__}: {e}",
-        })
-
-    return checks
-
-
-def _check_stale(source: dict) -> dict | None:
-    """PrÃ¼ft ob eine Quelle veraltet ist (keine Artikel seit >30 Tagen)."""
-    if source["source_type"] == "excluded":
-        return None
-
-    article_count = source.get("article_count") or 0
-    last_seen = source.get("last_seen_at")
-
-    if article_count == 0:
-        return {
-            "type": "stale",
-            "status": "warning",
-            "message": "Noch nie Artikel geliefert",
-        }
-
-    if last_seen:
-        try:
-            from datetime import datetime
-            last_dt = datetime.fromisoformat(last_seen)
-            now = datetime.now()
-            age_days = (now - last_dt).days
-            if age_days > 30:
-                return {
-                    "type": "stale",
-                    "status": "warning",
-                    "message": f"Letzter Artikel vor {age_days} Tagen",
-                }
-        except (ValueError, TypeError):
-            pass
-
-    return None
-
-
-def _find_duplicates(sources: list[dict]) -> list[dict]:
-    """Findet doppelte Quellen (gleiche URL)."""
-    duplicates = []
-    url_map = {}
-
-    for s in sources:
-        if not s["url"]:
-            continue
-        url_norm = s["url"].lower().rstrip("/")
-        if url_norm in url_map:
-            existing = url_map[url_norm]
-            duplicates.append({
-                "source_id": s["id"],
-                "message": f"Doppelte URL wie '{existing['name']}' (ID {existing['id']})",
-                "details": {"duplicate_of": existing["id"], "type": "url"},
-            })
-        else:
-            url_map[url_norm] = s
-
-    return duplicates
-
-
-async def _save_check(
-    db: aiosqlite.Connection, source_id: int, check_type: str,
-    status: str, message: str, details: str = None,
-):
-    """Speichert ein Health-Check-Ergebnis."""
-    await db.execute(
-        "INSERT INTO source_health_checks "
-        "(source_id, check_type, status, message, details) "
-        "VALUES (?, ?, ?, ?, ?)",
-        (source_id, check_type, status, message, details),
-    )
-
-
-async def get_health_summary(db: aiosqlite.Connection) -> dict:
-    """Gibt eine Zusammenfassung der letzten Health-Check-Ergebnisse zurÃ¼ck."""
-    cursor = await db.execute("""
-        SELECT
-            h.id, h.source_id, s.name, s.domain, s.url, s.source_type,
-            h.check_type, h.status, h.message, h.details, h.checked_at
-        FROM source_health_checks h
-        JOIN sources s ON s.id = h.source_id
-        ORDER BY
-            CASE h.status WHEN 'error' THEN 0 WHEN 'warning' THEN 1 ELSE 2 END,
-            s.name
-    """)
-    checks = [dict(row) for row in await cursor.fetchall()]
-
-    error_count = sum(1 for c in checks if c["status"] == "error")
-    warning_count = sum(1 for c in checks if c["status"] == "warning")
-    ok_count = sum(1 for c in checks if c["status"] == "ok")
-
-    cursor = await db.execute(
-        "SELECT MAX(checked_at) as last_check FROM source_health_checks"
-    )
-    row = await cursor.fetchone()
-    last_check = row["last_check"] if row else None
-
-    return {
-        "last_check": last_check,
-        "total_checks": len(checks),
-        "errors": error_count,
-        "warnings": warning_count,
-        "ok": ok_count,
-        "checks": checks,
-    }
+"""Quellen-Health-Check Engine - prüft Erreichbarkeit, Feed-Validität, Duplikate."""
+import asyncio
+import logging
+import json
+from urllib.parse import urlparse
+
+import httpx
+import feedparser
+import aiosqlite
+
+logger = logging.getLogger("osint.source_health")
+
+
+async def run_health_checks(db: aiosqlite.Connection) -> dict:
+    """Führt alle Health-Checks für aktive Grundquellen durch."""
+    logger.info("Starte Quellen-Health-Check...")
+
+    # Alle aktiven Grundquellen laden
+    cursor = await db.execute(
+        "SELECT id, name, url, domain, source_type, article_count, last_seen_at "
+        "FROM sources WHERE status = 'active' AND tenant_id IS NULL"
+    )
+    sources = [dict(row) for row in await cursor.fetchall()]
+
+    # Aktuelle Health-Check-Ergebnisse löschen (werden neu geschrieben)
+    await db.execute("DELETE FROM source_health_checks")
+    await db.commit()
+
+    checks_done = 0
+    issues_found = 0
+
+    # 1. Erreichbarkeit + Feed-Validität (nur Quellen mit URL)
+    sources_with_url = [s for s in sources if s["url"]]
+
+    async with httpx.AsyncClient(
+        timeout=15.0,
+        follow_redirects=True,
+        headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT-Monitor/1.0)"},
+    ) as client:
+        for i in range(0, len(sources_with_url), 5):
+            batch = sources_with_url[i:i + 5]
+            tasks = [_check_source_reachability(client, s) for s in batch]
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+
+            for source, result in zip(batch, results):
+                if isinstance(result, Exception):
+                    await _save_check(
+                        db, source["id"], "reachability", "error",
+                        f"Prüfung fehlgeschlagen: {result}",
+                    )
+                    issues_found += 1
+                else:
+                    for check in result:
+                        await _save_check(
+                            db, source["id"], check["type"], check["status"],
+                            check["message"], check.get("details"),
+                        )
+                        if check["status"] != "ok":
+                            issues_found += 1
+                checks_done += 1
+
+    # 2. Veraltete Quellen (kein Artikel seit >30 Tagen)
+    for source in sources:
+        if source["source_type"] in ("excluded", "web_source"):
+            continue
+        stale_check = _check_stale(source)
+        if stale_check:
+            await _save_check(
+                db, source["id"], stale_check["type"],
+                stale_check["status"], stale_check["message"],
+            )
+            if stale_check["status"] != "ok":
+                issues_found += 1
+
+    # 3. Duplikate erkennen
+    duplicates = _find_duplicates(sources)
+    for dup in duplicates:
+        await _save_check(
+            db, dup["source_id"], "duplicate", "warning",
+            dup["message"], json.dumps(dup.get("details", {})),
+        )
+        issues_found += 1
+
+    await db.commit()
+    logger.info(
+        f"Health-Check abgeschlossen: {checks_done} Quellen geprüft, "
+        f"{issues_found} Probleme gefunden"
+    )
+    return {"checked": checks_done, "issues": issues_found}
+
+
+async def _check_source_reachability(
+    client: httpx.AsyncClient, source: dict,
+) -> list[dict]:
+    """Prüft Erreichbarkeit und Feed-Validität einer Quelle."""
+    checks = []
+    url = source["url"]
+
+    try:
+        resp = await client.get(url)
+
+        if resp.status_code >= 400:
+            checks.append({
+                "type": "reachability",
+                "status": "error",
+                "message": f"HTTP {resp.status_code} - nicht erreichbar",
+                "details": json.dumps({"status_code": resp.status_code, "url": url}),
+            })
+            return checks
+
+        if resp.status_code >= 300:
+            checks.append({
+                "type": "reachability",
+                "status": "warning",
+                "message": f"HTTP {resp.status_code} - Weiterleitung",
+                "details": json.dumps({
+                    "status_code": resp.status_code,
+                    "final_url": str(resp.url),
+                }),
+            })
+        else:
+            checks.append({
+                "type": "reachability",
+                "status": "ok",
+                "message": "Erreichbar",
+            })
+
+        # Feed-Validität nur für RSS-Feeds
+        if source["source_type"] == "rss_feed":
+            text = resp.text[:20000]
+            if "<rss" not in text and "<feed" not in text and "<channel" not in text:
+                checks.append({
+                    "type": "feed_validity",
+                    "status": "error",
+                    "message": "Kein gültiger RSS/Atom-Feed",
+                })
+            else:
+                feed = await asyncio.to_thread(feedparser.parse, text)
+                if feed.get("bozo") and not feed.entries:
+                    checks.append({
+                        "type": "feed_validity",
+                        "status": "error",
+                        "message": "Feed fehlerhaft (bozo)",
+                        "details": json.dumps({
+                            "bozo_exception": str(feed.get("bozo_exception", "")),
+                        }),
+                    })
+                elif not feed.entries:
+                    checks.append({
+                        "type": "feed_validity",
+                        "status": "warning",
+                        "message": "Feed erreichbar aber leer",
+                    })
+                else:
+                    checks.append({
+                        "type": "feed_validity",
+                        "status": "ok",
+                        "message": f"Feed gültig ({len(feed.entries)} Einträge)",
+                    })
+
+    except httpx.TimeoutException:
+        checks.append({
+            "type": "reachability",
+            "status": "error",
+            "message": "Timeout (15s)",
+        })
+    except httpx.ConnectError as e:
+        checks.append({
+            "type": "reachability",
+            "status": "error",
+            "message": f"Verbindung fehlgeschlagen: {e}",
+        })
+    except Exception as e:
+        checks.append({
+            "type": "reachability",
+            "status": "error",
+            "message": f"{type(e).__name__}: {e}",
+        })
+
+    return checks
+
+
+def _check_stale(source: dict) -> dict | None:
+    """Prüft ob eine Quelle veraltet ist (keine Artikel seit >30 Tagen)."""
+    if source["source_type"] == "excluded":
+        return None
+
+    article_count = source.get("article_count") or 0
+    last_seen = source.get("last_seen_at")
+
+    if article_count == 0:
+        return {
+            "type": "stale",
+            "status": "warning",
+            "message": "Noch nie Artikel geliefert",
+        }
+
+    if last_seen:
+        try:
+            from datetime import datetime
+            last_dt = datetime.fromisoformat(last_seen)
+            now = datetime.now()
+            age_days = (now - last_dt).days
+            if age_days > 30:
+                return {
+                    "type": "stale",
+                    "status": "warning",
+                    "message": f"Letzter Artikel vor {age_days} Tagen",
+                }
+        except (ValueError, TypeError):
+            pass
+
+    return None
+
+
+def _find_duplicates(sources: list[dict]) -> list[dict]:
+    """Findet doppelte Quellen (gleiche URL)."""
+    duplicates = []
+    url_map = {}
+
+    for s in sources:
+        if not s["url"]:
+            continue
+        url_norm = s["url"].lower().rstrip("/")
+        if url_norm in url_map:
+            existing = url_map[url_norm]
+            duplicates.append({
+                "source_id": s["id"],
+                "message": f"Doppelte URL wie '{existing['name']}' (ID {existing['id']})",
+                "details": {"duplicate_of": existing["id"], "type": "url"},
+            })
+        else:
+            url_map[url_norm] = s
+
+    return duplicates
+
+
+async def _save_check(
+    db: aiosqlite.Connection, source_id: int, check_type: str,
+    status: str, message: str, details: str = None,
+):
+    """Speichert ein Health-Check-Ergebnis."""
+    await db.execute(
+        "INSERT INTO source_health_checks "
+        "(source_id, check_type, status, message, details) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (source_id, check_type, status, message, details),
+    )
+
+
+async def get_health_summary(db: aiosqlite.Connection) -> dict:
+    """Gibt eine Zusammenfassung der letzten Health-Check-Ergebnisse zurück."""
+    cursor = await db.execute("""
+        SELECT
+            h.id, h.source_id, s.name, s.domain, s.url, s.source_type,
+            h.check_type, h.status, h.message, h.details, h.checked_at
+        FROM source_health_checks h
+        JOIN sources s ON s.id = h.source_id
+        ORDER BY
+            CASE h.status WHEN 'error' THEN 0 WHEN 'warning' THEN 1 ELSE 2 END,
+            s.name
+    """)
+    checks = [dict(row) for row in await cursor.fetchall()]
+
+    error_count = sum(1 for c in checks if c["status"] == "error")
+    warning_count = sum(1 for c in checks if c["status"] == "warning")
+    ok_count = sum(1 for c in checks if c["status"] == "ok")
+
+    cursor = await db.execute(
+        "SELECT MAX(checked_at) as last_check FROM source_health_checks"
+    )
+    row = await cursor.fetchone()
+    last_check = row["last_check"] if row else None
+
+    return {
+        "last_check": last_check,
+        "total_checks": len(checks),
+        "errors": error_count,
+        "warnings": warning_count,
+        "ok": ok_count,
+        "checks": checks,
+    }
diff --git a/src/services/source_suggester.py b/src/services/source_suggester.py
index ed7be67..2e41937 100644
--- a/src/services/source_suggester.py
+++ b/src/services/source_suggester.py
@@ -1,4 +1,4 @@
-"""KI-gestÃƒÂ¼tzte Quellen-VorschlÃƒÂ¤ge via Haiku."""
+"""KI-gestützte Quellen-Vorschläge via Haiku."""
 import json
 import logging
 import re
@@ -12,8 +12,8 @@ logger = logging.getLogger("osint.source_suggester")
 
 
 async def generate_suggestions(db: aiosqlite.Connection) -> int:
-    """Generiert Quellen-VorschlÃƒÂ¤ge basierend auf Health-Checks und LÃƒÂ¼ckenanalyse."""
-    logger.info("Starte Quellen-VorschlÃƒÂ¤ge via Haiku...")
+    """Generiert Quellen-Vorschläge basierend auf Health-Checks und Lückenanalyse."""
+    logger.info("Starte Quellen-Vorschläge via Haiku...")
 
     # 1. Aktuelle Quellen laden
     cursor = await db.execute(
@@ -33,13 +33,13 @@ async def generate_suggestions(db: aiosqlite.Connection) -> int:
     """)
     issues = [dict(row) for row in await cursor.fetchall()]
 
-    # 3. Alte pending-VorschlÃƒÂ¤ge entfernen (ÃƒÂ¤lter als 30 Tage)
+    # 3. Alte pending-Vorschläge entfernen (älter als 30 Tage)
     await db.execute(
         "DELETE FROM source_suggestions "
         "WHERE status = 'pending' AND created_at < datetime('now', '-30 days')"
     )
 
-    # 4. Quellen-Zusammenfassung fÃƒÂ¼r Haiku
+    # 4. Quellen-Zusammenfassung für Haiku
     categories = {}
     for s in sources:
         cat = s["category"]
@@ -67,7 +67,7 @@ async def generate_suggestions(db: aiosqlite.Connection) -> int:
                 f"{issue['check_type']} = {issue['status']} - {issue['message']}\n"
             )
 
-    prompt = f"""Du bist ein OSINT-Analyst und verwaltest die Quellensammlung eines Lagebildmonitors fÃƒÂ¼r SicherheitsbehÃƒÂ¶rden.
+    prompt = f"""Du bist ein OSINT-Analyst und verwaltest die Quellensammlung eines Lagebildmonitors für Sicherheitsbehörden.
 
 Aktuelle Quellensammlung:{source_summary}{issues_summary}
 
@@ -78,13 +78,13 @@ Beachte:
 2. Fehlende wichtige OSINT-Quellen: Schlage "add_source" mit konkreter RSS-Feed-URL vor
 3. Fokus auf deutschsprachige + wichtige internationale Nachrichtenquellen
 4. Nur Quellen vorschlagen, die NICHT bereits vorhanden sind
-5. Maximal 5 VorschlÃƒÂ¤ge
+5. Maximal 5 Vorschläge
 
 Antworte NUR mit einem JSON-Array. Jedes Element:
 {{
   "type": "add_source|deactivate_source|fix_url|remove_source",
   "title": "Kurzer Titel",
-  "description": "BegrÃƒÂ¼ndung",
+  "description": "Begründung",
   "priority": "low|medium|high",
   "source_id": null,
   "data": {{
@@ -104,7 +104,7 @@ Nur das JSON-Array, kein anderer Text."""
 
         json_match = re.search(r'\[.*\]', response, re.DOTALL)
         if not json_match:
-            logger.warning("Keine VorschlÃƒÂ¤ge von Haiku erhalten (kein JSON)")
+            logger.warning("Keine Vorschläge von Haiku erhalten (kein JSON)")
             return 0
 
         suggestions = json.loads(json_match.group(0))
@@ -164,14 +164,14 @@ Nur das JSON-Array, kein anderer Text."""
 
         await db.commit()
         logger.info(
-            f"Quellen-VorschlÃƒÂ¤ge: {count} neue VorschlÃƒÂ¤ge generiert "
+            f"Quellen-Vorschläge: {count} neue Vorschläge generiert "
             f"(Haiku: {usage.input_tokens} in / {usage.output_tokens} out / "
             f"${usage.cost_usd:.4f})"
         )
         return count
 
     except Exception as e:
-        logger.error(f"Fehler bei Quellen-VorschlÃƒÂ¤gen: {e}", exc_info=True)
+        logger.error(f"Fehler bei Quellen-Vorschlägen: {e}", exc_info=True)
         return 0
 
 
@@ -218,7 +218,7 @@ async def apply_suggestion(
                     (url,),
                 )
                 if await cursor.fetchone():
-                    result["action"] = "ÃƒÂ¼bersprungen (URL bereits vorhanden)"
+                    result["action"] = "übersprungen (URL bereits vorhanden)"
                     new_status = "rejected"
                 else:
                     await db.execute(
@@ -230,7 +230,7 @@ async def apply_suggestion(
                     )
                     result["action"] = f"Quelle '{name}' angelegt"
             else:
-                result["action"] = "ÃƒÂ¼bersprungen (keine URL)"
+                result["action"] = "übersprungen (keine URL)"
                 new_status = "rejected"
 
         elif stype == "deactivate_source":
@@ -242,7 +242,7 @@ async def apply_suggestion(
                 )
                 result["action"] = "Quelle deaktiviert"
             else:
-                result["action"] = "ÃƒÂ¼bersprungen (keine source_id)"
+                result["action"] = "übersprungen (keine source_id)"
 
         elif stype == "remove_source":
             source_id = suggestion["source_id"]
@@ -250,9 +250,9 @@ async def apply_suggestion(
                 await db.execute(
                     "DELETE FROM sources WHERE id = ?", (source_id,),
                 )
-                result["action"] = "Quelle gelÃƒÂ¶scht"
+                result["action"] = "Quelle gelöscht"
             else:
-                result["action"] = "ÃƒÂ¼bersprungen (keine source_id)"
+                result["action"] = "übersprungen (keine source_id)"
 
         elif stype == "fix_url":
             source_id = suggestion["source_id"]
@@ -264,7 +264,7 @@ async def apply_suggestion(
                 )
                 result["action"] = f"URL aktualisiert auf {new_url}"
             else:
-                result["action"] = "ÃƒÂ¼bersprungen (keine source_id oder URL)"
+                result["action"] = "übersprungen (keine source_id oder URL)"
 
     await db.execute(
         "UPDATE source_suggestions SET status = ?, reviewed_at = CURRENT_TIMESTAMP "

From 5a123ef3b857bd7e9a95bfa13659a8e3518f7dc7 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 03:52:36 +0000
Subject: [PATCH 09/15] fix(researcher): Lagentitel-Eigennamen als
 Pflicht-Keywords (Bug 2 Buckelwal-Diagnose)

KEYWORD_EXTRACTION_PROMPT explizit erweitert:
- Eigennamen/Tiernamen/Personennamen aus dem THEMA als ZWINGEND markiert.
- Hinweis dass DE und EN identisch sein duerfen (Eigennamen).
- Klar gesagt: bei spezifischen Begriffen (>=7 Zeichen) reicht 1 Treffer in
  RSS-Headlines (passt zu rss_parser.py adaptive Schwelle aus a08df3d).

Code-Post-Processing (researcher.py _extract_keywords):
- Nach dem Parser werden Lagentitel-Woerter (>=4 Zeichen, nicht in Stopwords)
  ggf. in die Keyword-Liste injiziert, falls Haiku sie weggelassen hat.
- Verhindert konkret den "Buckelwal timmy"-Bug: "timmy" fehlte in Haikus
  Liste, damit fielen Headlines mit nur "Buckelwal" durch das min_matches.

Hintergrund: Memory-Eintrag rss_match_und_keyword_bug.md, Bug 2 von 3.
Bug 1 (rss_parser min_matches adaptiv) ist seit Commit a08df3d auf Live.
Bug 3 (international=True default) bleibt offen, ist primaer UX-Frage.
---
 src/agents/researcher.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/agents/researcher.py b/src/agents/researcher.py
index ef5d4a8..76b25dc 100644
--- a/src/agents/researcher.py
+++ b/src/agents/researcher.py
@@ -199,14 +199,22 @@ AKTUELLE HEADLINES (die letzten Meldungen zu diesem Thema):
 
 AUFGABE:
 Generiere 5 Begriffspaare (DE + EN), mit denen neue RSS-Artikel zu diesem Thema gefunden werden.
-Ein Artikel gilt als relevant, wenn mindestens 2 dieser Begriffe im Titel oder der Beschreibung vorkommen.
+Ein Artikel gilt als relevant, wenn mindestens 2 dieser Begriffe im Titel oder der Beschreibung vorkommen
+- bei spezifischen Begriffen (Eigennamen, lange Begriffe ab 7 Zeichen) reicht 1 Treffer.
 
 REGELN:
-- Die ersten 2 Begriffspaare MUESSEN die zentralen Akteure/Laender/Themen sein (z.B. iran, israel, usa) — also die Begriffe, die in fast JEDEM Artikel zum Thema vorkommen
-- Die letzten 3 Begriffspaare sind aktuelle Entwicklungen aus den Headlines (Orte, Akteure, Schluesselwoerter der aktuellen Phase)
-- Begriffe muessen so gewaehlt sein, dass sie in kurzen RSS-Titeln matchen (einzelne Woerter, keine Phrasen)
-- Alle Begriffe in Kleinbuchstaben
-- Exakt 5 Begriffspaare
+- ZWINGEND: Eigennamen oder spezifische Begriffe aus dem THEMA (z.B. Personennamen, Tiernamen,
+  Ortsnamen wie "timmy", "buckelwal", "merz", "dobrindt") MUESSEN als eigene Begriffspaare
+  enthalten sein. Solche Begriffe sind oft das einzige, was in kurzen Headlines vorkommt.
+- Die ersten 2 Begriffspaare sind die zentralen Akteure/Laender/Themen (z.B. iran, israel,
+  buckelwal, timmy) — also die Begriffe, die in fast JEDEM Artikel zum Thema vorkommen.
+- Die uebrigen 3 Begriffspaare sind aktuelle Entwicklungen aus den Headlines (Orte, Akteure,
+  Schluesselwoerter der aktuellen Phase).
+- Wenn DE und EN identisch sind (Eigennamen), trotzdem das Paar einreichen.
+- Begriffe muessen so gewaehlt sein, dass sie in kurzen RSS-Titeln matchen (einzelne Woerter,
+  keine Phrasen, keine Konjunktionen).
+- Alle Begriffe in Kleinbuchstaben.
+- Exakt 5 Begriffspaare.
 
 Antwort NUR als JSON-Array:
 [{{"de": "iran", "en": "iran"}}, {{"de": "israel", "en": "israel"}}, {{"de": "teheran", "en": "tehran"}}, {{"de": "luftangriff", "en": "airstrike"}}, {{"de": "trump", "en": "trump"}}]"""
@@ -365,6 +373,17 @@ class ResearcherAgent:
                 if en and en != de:
                     keywords.append(en)
 
+            # Bug-2-Fallback: Lagentitel-Wörter (>=4 Zeichen) zwingend in Keyword-Liste,
+            # falls Haiku sie weggelassen hat. Verhindert "Buckelwal timmy"-Bug, bei dem
+            # der Eigenname "timmy" fehlte und damit Headlines mit nur "Buckelwal" durchfielen.
+            STOPWORDS = {"der", "die", "das", "und", "oder", "von", "vom", "zum", "zur",
+                         "the", "and", "for", "with", "ueber", "über", "von", "for"}
+            for word in (title or "").lower().split():
+                w = word.strip(".,;:!?\"\'()[]{}")
+                if len(w) >= 4 and w not in STOPWORDS and w not in keywords:
+                    keywords.append(w)
+                    logger.info(f"Lagentitel-Keyword '{w}' nachträglich injiziert")
+
             if keywords:
                 logger.info(f"Dynamische Keywords ({len(keywords)}): {keywords}")
             return keywords if keywords else None, usage

From 0e578a38a07d6eb6f1a9eef166594c5272d40159 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 04:20:58 +0000
Subject: [PATCH 10/15] fix(incidents): international-Default auf False (Bug 3
 Buckelwal-Diagnose)

Beim Anlegen einer neuen Lage ist der Schalter "Internationale Quellen einbeziehen"
ab jetzt standardmaessig DEAKTIVIERT.

Hintergrund: Bei lokalen DACH-Ereignissen (Tier-/Personenstoryen wie
"Buckelwal timmy") hat der "international=True"-Default zu schlechteren
Treffern gefuehrt, weil Claude in Deutsch UND Englisch suchte und die
englische Berichterstattung haeufig fehlt. Excluded-Sources- und
Boulevard-Filter haben das Problem zusaetzlich verschaerft.

Aenderungen:
- src/models.py IncidentCreate.international_sources: bool=True -> False
  (nur das Pydantic-Default beim Create-Endpoint - IncidentResponse/Incident
  bleiben True, weil das die DB-Werte bestehender Lagen reflektiert)
- src/static/dashboard.html: <input id="inc-international" checked> -> ohne checked
  (UI-Default ist jetzt unchecked, User muss bewusst aktivieren fuer
  internationale Lagen)
- Tooltip-Text ergaenzt: "Deaktiviert (Standard): ... empfohlen fuer DACH-Lagen."

Bestandslagen sind nicht betroffen - DB-Schema-Default INTEGER DEFAULT 1
bleibt unveraendert, fuer alle existierenden Lagen behaelt international
seinen aktuellen Wert.

Damit ist die Buckelwal-Diagnose komplett geloest:
- Bug 1 (rss_parser min_matches adaptiv) seit a08df3d auf main
- Bug 2 (Eigennamen-Pflicht-Keywords) seit e83f80d auf main
- Bug 3 (international-Default) jetzt auf develop, gleich Cherry-pick auf main
---
 src/models.py             | 2 +-
 src/static/dashboard.html | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/models.py b/src/models.py
index 7682310..87aefa1 100644
--- a/src/models.py
+++ b/src/models.py
@@ -54,7 +54,7 @@ class IncidentCreate(BaseModel):
     refresh_interval: int = Field(default=15, ge=10, le=10080)
     refresh_start_time: Optional[str] = Field(default=None, pattern=r"^([01]\d|2[0-3]):[0-5]\d$")
     retention_days: int = Field(default=0, ge=0, le=999)
-    international_sources: bool = True
+    international_sources: bool = False
     include_telegram: bool = False
     visibility: str = Field(default="public", pattern="^(public|private)$")
 
diff --git a/src/static/dashboard.html b/src/static/dashboard.html
index f664cf9..09175d1 100644
--- a/src/static/dashboard.html
+++ b/src/static/dashboard.html
@@ -362,9 +362,9 @@
                         <label>Quellen</label>
                         <div class="toggle-group">
                             <label class="toggle-label">
-                                <input type="checkbox" id="inc-international" checked>
+                                <input type="checkbox" id="inc-international">
                                 <span class="toggle-switch"></span>
-                                <span class="toggle-text">Internationale Quellen einbeziehen <span class="info-icon tooltip-below" data-tooltip="Aktiviert: Sucht auch in englischsprachigen und internationalen Medien.&#10;&#10;Deaktiviert: Nur deutschsprachige Quellen."><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span></span>
+                                <span class="toggle-text">Internationale Quellen einbeziehen <span class="info-icon tooltip-below" data-tooltip="Aktiviert: Sucht auch in englischsprachigen und internationalen Medien.&#10;&#10;Deaktiviert (Standard): Nur deutschsprachige Quellen - empfohlen für DACH-Lagen."><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span></span>
                             </label>
                         </div>
                         <div class="toggle-group" style="margin-top: 8px;">

From 72b306d90c083a1796d3472268ba63331c633159 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 04:43:01 +0000
Subject: [PATCH 11/15] fix(source_health): tenant-faehig + History (Phase 2 in
 den Monitor ziehen)

Phase 2 hatte die Verbesserungen nur in der Verwaltung
(src/shared/services/source_health.py). Der Daily-Health-Check laeuft aber
im Monitor-Backend (Cron 04:00 UTC) und nutzte deshalb weiter den alten
Code - Folge:

- Tenant-Quellen wurden NIE gecheckt (0 Eintraege in source_health_checks
  fuer tenant_id IS NOT NULL).
- source_health_history blieb leer.

Diese Aenderung holt die Phase-2-Logik in den Monitor:
- services/source_health.py: Verwaltung-Version 1:1 uebernommen
  (tenant_id-Filter weg + History-Save vor DELETE + UA/Timeout aus config).
- config.py: HEALTH_CHECK_USER_AGENT + HEALTH_CHECK_TIMEOUT_S ergaenzt.

Manueller Test auf Staging-Monitor:
  283 Quellen geprueft, 253 Issues, 61 davon Tenant-Quellen.
  History 0 -> 458 Eintraege.

Damit ist die shared/-LOCKED-FILES-Markierung in der Verwaltung obsolet -
beide Repos haben jetzt den gleichen Code.
---
 src/config.py                 |  6 ++++++
 src/services/source_health.py | 28 ++++++++++++++++++++++------
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/config.py b/src/config.py
index 21a48f2..1b39ea5 100644
--- a/src/config.py
+++ b/src/config.py
@@ -95,3 +95,9 @@ TELEGRAM_API_ID = int(os.environ.get("TELEGRAM_API_ID", "0"))
 TELEGRAM_API_HASH = os.environ.get("TELEGRAM_API_HASH", "")
 TELEGRAM_SESSION_PATH = os.environ.get("TELEGRAM_SESSION_PATH", "/home/claude-dev/.telegram/telegram_session")
 
+# Health-Check (genutzt von services/source_health.py)
+HEALTH_CHECK_USER_AGENT = os.environ.get(
+    "HEALTH_CHECK_USER_AGENT",
+    "Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)",
+)
+HEALTH_CHECK_TIMEOUT_S = float(os.environ.get("HEALTH_CHECK_TIMEOUT_S", "15.0"))
diff --git a/src/services/source_health.py b/src/services/source_health.py
index 0f073c9..e6b1cdd 100644
--- a/src/services/source_health.py
+++ b/src/services/source_health.py
@@ -2,29 +2,45 @@
 import asyncio
 import logging
 import json
+import uuid
 from urllib.parse import urlparse
 
 import httpx
 import feedparser
 import aiosqlite
 
+try:
+    from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S
+except ImportError:
+    HEALTH_CHECK_USER_AGENT = "Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)"
+    HEALTH_CHECK_TIMEOUT_S = 15.0
+
 logger = logging.getLogger("osint.source_health")
 
 
 async def run_health_checks(db: aiosqlite.Connection) -> dict:
-    """Führt alle Health-Checks für aktive Grundquellen durch."""
+    """Führt Health-Checks für alle aktiven Quellen durch (global + Tenant)."""
     logger.info("Starte Quellen-Health-Check...")
 
-    # Alle aktiven Grundquellen laden
+    # Alle aktiven Quellen laden (global UND Tenant-spezifisch)
     cursor = await db.execute(
         "SELECT id, name, url, domain, source_type, article_count, last_seen_at "
-        "FROM sources WHERE status = 'active' AND tenant_id IS NULL"
+        "FROM sources WHERE status = 'active' "
     )
     sources = [dict(row) for row in await cursor.fetchall()]
 
-    # Aktuelle Health-Check-Ergebnisse löschen (werden neu geschrieben)
+    # Bisherigen Stand in History archivieren, dann frisch starten
+    run_id = uuid.uuid4().hex[:12]
+    await db.execute(
+        "INSERT INTO source_health_history "
+        "(run_id, source_id, check_type, status, message, details, checked_at) "
+        "SELECT ?, source_id, check_type, status, message, details, checked_at "
+        "FROM source_health_checks",
+        (run_id,),
+    )
     await db.execute("DELETE FROM source_health_checks")
     await db.commit()
+    logger.info(f"Health-Check Run {run_id}: vorigen Stand archiviert")
 
     checks_done = 0
     issues_found = 0
@@ -33,9 +49,9 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict:
     sources_with_url = [s for s in sources if s["url"]]
 
     async with httpx.AsyncClient(
-        timeout=15.0,
+        timeout=HEALTH_CHECK_TIMEOUT_S,
         follow_redirects=True,
-        headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT-Monitor/1.0)"},
+        headers={"User-Agent": HEALTH_CHECK_USER_AGENT},
     ) as client:
         for i in range(0, len(sources_with_url), 5):
             batch = sources_with_url[i:i + 5]

From 1ee6c4ddf1ebdf0f146c6c9eddb675819527da28 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 04:45:18 +0000
Subject: [PATCH 12/15] fix(source_health): URL-Schema vor httpx.get
 sicherstellen

Telegram-Quellen mit url=t.me/kanal (ohne https:// Prefix) liessen httpx
mit "ValueError: unknown url type" crashen. Fix: vor dem Request
https:// vorne anhaengen wenn kein Schema vorhanden ist.

Beobachtet auf Live: 110 Health-Errors, davon einige Telegram-Kanaele
mit "ValueError: unknown url type:" als Fehlermeldung.
---
 src/services/source_health.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/services/source_health.py b/src/services/source_health.py
index e6b1cdd..9837cda 100644
--- a/src/services/source_health.py
+++ b/src/services/source_health.py
@@ -112,6 +112,10 @@ async def _check_source_reachability(
     checks = []
     url = source["url"]
 
+    # URL-Schema sicherstellen: t.me-Kanaele und andere Domains koennen ohne https:// vorkommen
+    if url and not url.startswith(("http://", "https://")):
+        url = "https://" + url.lstrip("/")
+
     try:
         resp = await client.get(url)
 

From 8af0fa07c85bdd9546692ac498b615be1eef7595 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 04:56:06 +0000
Subject: [PATCH 13/15] feat(source_health): fetch_strategy + Retry mit
 Googlebot/removepaywalls (Phase 18)

Pro Quelle ein Feld sources.fetch_strategy (default | googlebot | paywall | skip):
- default: normaler UA, Retry mit Googlebot bei 403/406/429.
- googlebot: direkt mit Googlebot-UA (fuer SEO-freundliche Sites).
- paywall: Anfrage via removepaywalls.com (fuer Spiegel+/SZ+/FT etc.).
- skip: Health-Check ueberspringen (bekannte unerreichbare Quellen wie Login-only).

Pre-Flagging in der Migration: FT/WSJ/NZZ/Handelsblatt/WiWo -> paywall,
Rheinische Post/Verfassungsschutz -> googlebot.

(Test mit den vier prominent fehlerhaften Quellen zeigt: FT/RP/Verfassungsschutz
sind besonders streng, gehen auch nicht ueber Googlebot/removepaywalls durch.
Fuer milder restriktive Quellen wirkt der Retry-Mechanismus.)
---
 src/services/source_health.py | 58 ++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/src/services/source_health.py b/src/services/source_health.py
index 9837cda..b07b5a0 100644
--- a/src/services/source_health.py
+++ b/src/services/source_health.py
@@ -15,6 +15,17 @@ except ImportError:
     HEALTH_CHECK_USER_AGENT = "Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)"
     HEALTH_CHECK_TIMEOUT_S = 15.0
 
+# Phase 18: alternative User-Agents fuer Bot-Block-Bypass
+USER_AGENT_GOOGLEBOT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
+USER_AGENT_BROWSER = (
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
+)
+REMOVEPAYWALLS_PREFIX = "https://www.removepaywalls.com/search?url="
+
+# HTTP-Codes, die einen Retry mit anderem UA rechtfertigen
+RETRY_ON_STATUS = {403, 406, 429}
+
 logger = logging.getLogger("osint.source_health")
 
 
@@ -24,7 +35,8 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict:
 
     # Alle aktiven Quellen laden (global UND Tenant-spezifisch)
     cursor = await db.execute(
-        "SELECT id, name, url, domain, source_type, article_count, last_seen_at "
+        "SELECT id, name, url, domain, source_type, article_count, last_seen_at, "
+        "COALESCE(fetch_strategy, 'default') AS fetch_strategy "
         "FROM sources WHERE status = 'active' "
     )
     sources = [dict(row) for row in await cursor.fetchall()]
@@ -108,16 +120,54 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict:
 async def _check_source_reachability(
     client: httpx.AsyncClient, source: dict,
 ) -> list[dict]:
-    """Prüft Erreichbarkeit und Feed-Validität einer Quelle."""
+    """Prüft Erreichbarkeit und Feed-Validität einer Quelle.
+
+    Phase 18: pro Quelle eine fetch_strategy ('default' | 'googlebot' | 'paywall' | 'skip').
+    Bei 'default' wird im Fehlerfall (403/406/429) ein Retry mit Googlebot-UA gemacht.
+    Bei 'paywall' wird auf removepaywalls.com umgeleitet.
+    Bei 'skip' wird kein Check ausgeführt.
+    """
     checks = []
     url = source["url"]
+    strategy = source.get("fetch_strategy") or "default"
 
-    # URL-Schema sicherstellen: t.me-Kanaele und andere Domains koennen ohne https:// vorkommen
+    # 'skip' -> kein Check (bekannte unerreichbare Quellen, z.B. Login-only)
+    if strategy == "skip":
+        checks.append({
+            "type": "reachability", "status": "ok",
+            "message": "Health-Check uebersprungen (fetch_strategy=skip)",
+        })
+        return checks
+
+    # URL-Schema sicherstellen
     if url and not url.startswith(("http://", "https://")):
         url = "https://" + url.lstrip("/")
 
+    # Initialen UA waehlen: googlebot direkt; paywall ueber removepaywalls; default normal
+    initial_ua = HEALTH_CHECK_USER_AGENT
+    initial_url = url
+    if strategy == "googlebot":
+        initial_ua = USER_AGENT_GOOGLEBOT
+    elif strategy == "paywall":
+        initial_url = REMOVEPAYWALLS_PREFIX + url
+        initial_ua = USER_AGENT_BROWSER
+
     try:
-        resp = await client.get(url)
+        resp = await client.get(initial_url, headers={"User-Agent": initial_ua})
+
+        # Bot-Block-Retry nur bei strategy='default'
+        if (
+            strategy == "default"
+            and resp.status_code in RETRY_ON_STATUS
+        ):
+            retry = await client.get(url, headers={"User-Agent": USER_AGENT_GOOGLEBOT})
+            if retry.status_code < 400:
+                resp = retry  # Retry hat geholfen
+                checks.append({
+                    "type": "reachability", "status": "warning",
+                    "message": f"Erreichbar nur mit Googlebot-UA (Standard-UA bekam HTTP {initial_url and 'unknown' or 'XXX'})",
+                })
+                # Hinweis-Eintrag, aber Hauptcheck folgt unten als 'ok' weil resp jetzt die Retry-Antwort ist
 
         if resp.status_code >= 400:
             checks.append({

From f22c8dbc618484ec34b588e79e93132a6097844f Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 05:00:11 +0000
Subject: [PATCH 14/15] fix: removepaywalls.com -> removepaywall.com (Singular
 ist die echte Domain)

User-Korrektur: die echte Service-Domain heisst removepaywall.com (Singular).
removepaywalls.com (Plural) liefert HTTP 403 - vermutlich nicht der gleiche
Service oder gar nicht mehr existent.

Betrifft:
- services/source_health.py: REMOVEPAYWALLS_PREFIX-Konstante (Phase 18)
- agents/researcher.py: Claude-Prompts fuer Paywall-Hinweise (zwei Stellen)

Verifiziert mit curl: removepaywall.com -> 200, removepaywalls.com -> 403.
---
 src/agents/researcher.py      | 4 ++--
 src/services/source_health.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/agents/researcher.py b/src/agents/researcher.py
index 76b25dc..734b62d 100644
--- a/src/agents/researcher.py
+++ b/src/agents/researcher.py
@@ -77,7 +77,7 @@ REGELN:
 {language_instruction}
 - Faktenbasiert und neutral - keine Spekulationen
 - KRITISCH für source_url: Kopiere die EXAKTE URL aus den WebSearch-Ergebnissen. Erfinde oder konstruiere NIEMALS URLs aus Mustern oder Erinnerung. Wenn du die exakte URL eines Artikels nicht aus den Suchergebnissen hast, lass diesen Artikel komplett weg.
-- Nutze removepaywalls.com für Paywall-geschützte Artikel (z.B. Spiegel+, Zeit+, SZ+): https://www.removepaywalls.com/search?url=ARTIKEL_URL
+- Nutze removepaywall.com für Paywall-geschützte Artikel (z.B. Spiegel+, Zeit+, SZ+): https://www.removepaywall.com/search?url=ARTIKEL_URL
 - Nutze WebFetch um die 3-5 wichtigsten Artikel vollständig abzurufen und zusammenzufassen
 
 Gib die Ergebnisse AUSSCHLIESSLICH als JSON-Array zurück, ohne Erklärungen davor oder danach.
@@ -124,7 +124,7 @@ Nutze spezifische Suchbegriffe für institutionelle Quellen. Ziel: 6-10 weitere
 PHASE 4 — VERIFIKATION UND VERTIEFUNG:
 Nutze WebFetch um die 6-10 wichtigsten Artikel vollständig abzurufen und ausführlich zusammenzufassen.
 Priorisiere dabei Primärquellen und investigative Berichte.
-Nutze removepaywalls.com für Paywall-geschützte Artikel (z.B. https://www.removepaywalls.com/search?url=ARTIKEL_URL)
+Nutze removepaywall.com für Paywall-geschützte Artikel (z.B. https://www.removepaywall.com/search?url=ARTIKEL_URL)
 
 {language_instruction}
 
diff --git a/src/services/source_health.py b/src/services/source_health.py
index b07b5a0..ed1242c 100644
--- a/src/services/source_health.py
+++ b/src/services/source_health.py
@@ -21,7 +21,7 @@ USER_AGENT_BROWSER = (
     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
     "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
 )
-REMOVEPAYWALLS_PREFIX = "https://www.removepaywalls.com/search?url="
+REMOVEPAYWALLS_PREFIX = "https://www.removepaywall.com/search?url="
 
 # HTTP-Codes, die einen Retry mit anderem UA rechtfertigen
 RETRY_ON_STATUS = {403, 406, 429}
@@ -124,7 +124,7 @@ async def _check_source_reachability(
 
     Phase 18: pro Quelle eine fetch_strategy ('default' | 'googlebot' | 'paywall' | 'skip').
     Bei 'default' wird im Fehlerfall (403/406/429) ein Retry mit Googlebot-UA gemacht.
-    Bei 'paywall' wird auf removepaywalls.com umgeleitet.
+    Bei 'paywall' wird auf removepaywall.com umgeleitet.
     Bei 'skip' wird kein Check ausgeführt.
     """
     checks = []

From a716726e36261ac916322d89ed29156776e568c2 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@anthropic.com>
Date: Sat, 9 May 2026 05:02:18 +0000
Subject: [PATCH 15/15] fix(source_health): paywall-Strategie nicht ueber
 removepaywall fuer Feed-URL

removepaywall.com liefert HTML (Article-Renderer), nicht XML - der
Feed-Validity-Check schlug daher fehl mit "Kein gueltiger RSS/Atom-Feed".

Korrektur:
- paywall: Feed-URL direkt mit Browser-UA laden (kein URL-Rewrite).
- Bei paywall + 4xx: status=warning (erwartbar), Feed-Validity skippen.
- removepaywall.com bleibt im Researcher-Prompt fuer Article-Inhalte
  (das ist der korrekte Use-Case).
---
 src/services/source_health.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/services/source_health.py b/src/services/source_health.py
index ed1242c..6cc0e10 100644
--- a/src/services/source_health.py
+++ b/src/services/source_health.py
@@ -143,18 +143,28 @@ async def _check_source_reachability(
     if url and not url.startswith(("http://", "https://")):
         url = "https://" + url.lstrip("/")
 
-    # Initialen UA waehlen: googlebot direkt; paywall ueber removepaywalls; default normal
+    # Initialen UA waehlen
     initial_ua = HEALTH_CHECK_USER_AGENT
     initial_url = url
     if strategy == "googlebot":
         initial_ua = USER_AGENT_GOOGLEBOT
     elif strategy == "paywall":
-        initial_url = REMOVEPAYWALLS_PREFIX + url
+        # Paywall-Quellen: Feed-URL direkt laden, aber mit Browser-UA (versucht Bot-Detection zu umgehen).
+        # removepaywall.com ist fuer Article-URLs, NICHT fuer RSS-Feed-Validity-Checks
+        # (gibt HTML statt XML zurueck). Researcher-Pipeline nutzt removepaywall fuer Inhalte.
         initial_ua = USER_AGENT_BROWSER
 
     try:
         resp = await client.get(initial_url, headers={"User-Agent": initial_ua})
 
+        # Paywall-Quellen: 4xx ist erwartbar (Bot-Detection), als warning markieren statt error
+        if strategy == "paywall" and resp.status_code in RETRY_ON_STATUS:
+            checks.append({
+                "type": "reachability", "status": "warning",
+                "message": f"Paywall-Quelle, Direkt-Zugang HTTP {resp.status_code} (Researcher-Pipeline nutzt removepaywall.com fuer Inhalte)",
+            })
+            return checks  # Feed-Validity-Check skippen (Paywall liefert kein RSS)
+
         # Bot-Block-Retry nur bei strategy='default'
         if (
             strategy == "default"
@@ -167,7 +177,6 @@ async def _check_source_reachability(
                     "type": "reachability", "status": "warning",
                     "message": f"Erreichbar nur mit Googlebot-UA (Standard-UA bekam HTTP {initial_url and 'unknown' or 'XXX'})",
                 })
-                # Hinweis-Eintrag, aber Hauptcheck folgt unten als 'ok' weil resp jetzt die Retry-Antwort ist
 
         if resp.status_code >= 400:
             checks.append({