Promote develop → main (2026-05-09 10:57 UTC)

This commit was merged in pull request #21.
Dieser Commit ist enthalten in:
2026-05-09 12:57:13 +02:00
Commit ed057fa6f5
13 geänderte Dateien mit 2409 neuen und 67 gelöschten Zeilen

Datei anzeigen

@@ -0,0 +1,64 @@
"""Einmalige LLM-Klassifikation aller noch unklassifizierten Quellen.
Verwendung:
python3 scripts/migrate_sources_classification.py --limit 50
python3 scripts/migrate_sources_classification.py --limit 500 # Alle
python3 scripts/migrate_sources_classification.py --recheck-pending # bereits Pending neu
Schreibt Vorschlaege in proposed_*-Spalten. Approval erfolgt anschliessend
ueber das Verwaltungs-UI / API (POST /api/sources/{id}/classification/approve).
"""
import argparse
import asyncio
import logging
import sys
from pathlib import Path
# src/ in PYTHONPATH aufnehmen, wenn Skript direkt aufgerufen wird
HERE = Path(__file__).resolve().parent
SRC = HERE.parent / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from database import get_db # noqa: E402
from services.source_classifier import bulk_classify # noqa: E402
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
)
logger = logging.getLogger("migrate_sources")
async def main():
parser = argparse.ArgumentParser(description="LLM-Klassifikation aller Quellen.")
parser.add_argument("--limit", type=int, default=50, help="Max. Quellen pro Lauf")
parser.add_argument(
"--recheck-pending",
action="store_true",
help="Auch Quellen mit classification_source='llm_pending' neu klassifizieren",
)
args = parser.parse_args()
db = await get_db()
try:
result = await bulk_classify(
db,
limit=args.limit,
only_unclassified=not args.recheck_pending,
)
finally:
await db.close()
print(f"Verarbeitet: {result['processed']}")
print(f"Erfolgreich: {result['success']}")
print(f"Fehler: {len(result['errors'])}")
print(f"Kosten: ${result['total_cost_usd']:.4f}")
if result["errors"]:
print("\nFehler-Details:")
for e in result["errors"][:10]:
print(f" source_id={e['source_id']}: {e['error']}")
if __name__ == "__main__":
asyncio.run(main())

Datei anzeigen

@@ -16,7 +16,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
VORFALL: {title} VORFALL: {title}
KONTEXT: {description} KONTEXT: {description}
VORHANDENE MELDUNGEN: {fact_context_block}VORHANDENE MELDUNGEN:
{articles_text} {articles_text}
AUFTRAG: AUFTRAG:
@@ -59,7 +59,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
THEMA: {title} THEMA: {title}
KONTEXT: {description} KONTEXT: {description}
VORLIEGENDE QUELLEN: {fact_context_block}VORLIEGENDE QUELLEN:
{articles_text} {articles_text}
AUFTRAG: AUFTRAG:
@@ -118,7 +118,7 @@ BISHERIGES LAGEBILD:
BISHERIGE QUELLEN: BISHERIGE QUELLEN:
{previous_sources_text} {previous_sources_text}
NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE: {fact_context_block}NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE:
{new_articles_text} {new_articles_text}
AUFTRAG: AUFTRAG:
@@ -165,7 +165,7 @@ BISHERIGES BRIEFING:
BISHERIGE QUELLEN: BISHERIGE QUELLEN:
{previous_sources_text} {previous_sources_text}
NEUE QUELLEN SEIT DEM LETZTEN UPDATE: {fact_context_block}NEUE QUELLEN SEIT DEM LETZTEN UPDATE:
{new_articles_text} {new_articles_text}
AUFTRAG: AUFTRAG:
@@ -264,6 +264,112 @@ Antworte AUSSCHLIESSLICH als JSON-Objekt — KEINE Erklärung, KEINE Einleitung:
{{"relevant_ids": [1, 3, 7]}}""" {{"relevant_ids": [1, 3, 7]}}"""
# Status-Gruppen fuer den Fakten-Kontext im Analyse-Prompt.
# adhoc nutzt confirmed/unconfirmed/contradicted/developing,
# research nutzt established/unverified/disputed/developing — beide Domaenen
# werden in dieselben vier Anzeige-Gruppen abgebildet.
_FACT_STATUS_GROUPS = [
("Bestätigt (mehrere unabhängige Quellen oder durch Faktencheck als gesichert eingestuft):",
{"confirmed", "established"}),
("Umstritten (Quellen widersprechen sich oder Faktencheck hat Widersprüche dokumentiert):",
{"contradicted", "disputed"}),
("Unbestätigt (nur eine einzelne Quelle, eine unabhängige Bestätigung steht aus):",
{"unconfirmed", "unverified"}),
("In Entwicklung (laufender Sachverhalt, Stand offen):",
{"developing"}),
]
_FACT_STATUS_PRIORITY = {
"confirmed": 5, "established": 5,
"contradicted": 4, "disputed": 4,
"unconfirmed": 3, "unverified": 3,
"developing": 1,
}
def build_fact_context_block(
existing_facts: list[dict] | None,
new_or_updated_facts: list[dict] | None,
incident_type: str,
max_total: int = 20,
) -> str:
"""Baut den 'GEPRUEFTE FAKTEN'-Block fuer den Analyse-Prompt.
Wird vom Orchestrator zwischen Faktencheck und Lagebild aufgerufen, damit
das Lagebild auf gepruefter Faktenbasis schreibt und Unklarheiten explizit
benennt. Bei leerer Faktenliste wird ein leerer String zurueckgegeben — der
Prompt laeuft dann ohne Fakten-Kontext (Fallback bei Faktencheck-Fail oder
bei Lagen ohne bisherige Fakten).
"""
existing_facts = existing_facts or []
new_or_updated_facts = new_or_updated_facts or []
if not existing_facts and not new_or_updated_facts:
return ""
seen_claims: set[str] = set()
merged: list[dict] = []
# Neue/aktualisierte Fakten zuerst (Status ist aktueller Stand).
for f in new_or_updated_facts:
c = (f.get("claim") or "").strip().lower()
if not c or c in seen_claims:
continue
seen_claims.add(c)
merged.append(f)
# Dann alte unveraenderte Fakten.
for f in existing_facts:
c = (f.get("claim") or "").strip().lower()
if not c or c in seen_claims:
continue
seen_claims.add(c)
merged.append(f)
if not merged:
return ""
merged.sort(key=lambda f: (
-_FACT_STATUS_PRIORITY.get((f.get("status") or "").lower(), 0),
-(f.get("sources_count") or 0),
))
merged = merged[:max_total]
grouped: dict[str, list[dict]] = {label: [] for label, _ in _FACT_STATUS_GROUPS}
for f in merged:
s = (f.get("status") or "").lower()
for label, codes in _FACT_STATUS_GROUPS:
if s in codes:
grouped[label].append(f)
break
if not any(grouped.values()):
return ""
lines: list[str] = []
lines.append("GEPRÜFTE FAKTEN (Stand nach dem Faktencheck dieses Refresh, max. {n} priorisiert):".format(n=max_total))
for label, _codes in _FACT_STATUS_GROUPS:
items = grouped[label]
if not items:
continue
lines.append("")
lines.append(label)
for f in items:
claim = (f.get("claim") or "").strip()
sc = f.get("sources_count") or 0
sc_text = f" ({sc} {'Quellen' if sc != 1 else 'Quelle'})" if sc else ""
lines.append(f"- {claim}{sc_text}")
lines.append("")
lines.append("AUSSAGE-DISZIPLIN für das Lagebild:")
lines.append("- Bestätigte Fakten als Grundgerüst nehmen, ohne Hedging.")
lines.append("- Umstrittene Punkte explizit als umstritten kennzeichnen, beide Seiten knapp benennen.")
lines.append("- Unbestätigtes klar einordnen ('Eine einzelne Quelle berichtet ...', 'Eine unabhängige Bestätigung steht aus.').")
lines.append("- Bei Aussagen, die durch keinen geprüften Fakt gedeckt sind und auch nicht direkt aus einer der vorliegenden Meldungen hervorgehen: NICHT spekulieren — entweder weglassen oder als unklar kennzeichnen.")
lines.append("- Triff KEINE Aussagen, die mit den oben gelisteten geprüften Fakten in Widerspruch stehen.")
lines.append("")
return "\n".join(lines)
class AnalyzerAgent: class AnalyzerAgent:
"""Analysiert und übersetzt Meldungen über Claude CLI.""" """Analysiert und übersetzt Meldungen über Claude CLI."""
@@ -290,7 +396,7 @@ class AnalyzerAgent:
articles_text += f"Inhalt: {content[:800]}\n" articles_text += f"Inhalt: {content[:800]}\n"
return articles_text return articles_text
async def analyze(self, title: str, description: str, articles: list[dict], incident_type: str = "adhoc") -> tuple[dict | None, ClaudeUsage | None]: async def analyze(self, title: str, description: str, articles: list[dict], incident_type: str = "adhoc", fact_context_block: str = "") -> tuple[dict | None, ClaudeUsage | None]:
"""Erstanalyse: Analysiert alle Meldungen zu einem Vorfall (erster Refresh).""" """Erstanalyse: Analysiert alle Meldungen zu einem Vorfall (erster Refresh)."""
if not articles: if not articles:
return None, None return None, None
@@ -306,6 +412,7 @@ class AnalyzerAgent:
articles_text=articles_text, articles_text=articles_text,
today=today, today=today,
output_language=OUTPUT_LANGUAGE, output_language=OUTPUT_LANGUAGE,
fact_context_block=fact_context_block,
) )
try: try:
@@ -327,6 +434,7 @@ class AnalyzerAgent:
previous_summary: str, previous_summary: str,
previous_sources_json: str | None, previous_sources_json: str | None,
incident_type: str = "adhoc", incident_type: str = "adhoc",
fact_context_block: str = "",
) -> tuple[dict | None, ClaudeUsage | None]: ) -> tuple[dict | None, ClaudeUsage | None]:
"""Inkrementelle Analyse: Aktualisiert das Lagebild mit nur den neuen Artikeln. """Inkrementelle Analyse: Aktualisiert das Lagebild mit nur den neuen Artikeln.
@@ -369,6 +477,7 @@ class AnalyzerAgent:
new_articles_text=new_articles_text, new_articles_text=new_articles_text,
today=today, today=today,
output_language=OUTPUT_LANGUAGE, output_language=OUTPUT_LANGUAGE,
fact_context_block=fact_context_block,
) )
try: try:

Datei anzeigen

@@ -1299,18 +1299,22 @@ class AgentOrchestrator:
except Exception as e: except Exception as e:
logger.warning("Bias-Anreicherung fehlgeschlagen (Pipeline laeuft weiter): %s", e) logger.warning("Bias-Anreicherung fehlgeschlagen (Pipeline laeuft weiter): %s", e)
# --- Analyse-Task --- # --- Analyse-Task (wird nach _do_factcheck mit fact_context_block aufgerufen) ---
async def _do_analysis(): async def _do_analysis(fact_context_block: str = ""):
analyzer = AnalyzerAgent() analyzer = AnalyzerAgent()
if previous_summary and new_count > 0: if previous_summary and new_count > 0:
logger.info(f"Inkrementelle Analyse: {new_count} neue Artikel zum bestehenden Lagebild") logger.info(f"Inkrementelle Analyse: {new_count} neue Artikel zum bestehenden Lagebild")
return await analyzer.analyze_incremental( return await analyzer.analyze_incremental(
title, description, new_articles_for_analysis, title, description, new_articles_for_analysis,
previous_summary, previous_sources_json, incident_type, previous_summary, previous_sources_json, incident_type,
fact_context_block=fact_context_block,
) )
else: else:
logger.info("Erstanalyse: Alle Artikel werden analysiert") logger.info("Erstanalyse: Alle Artikel werden analysiert")
return await analyzer.analyze(title, description, all_articles_preloaded, incident_type) return await analyzer.analyze(
title, description, all_articles_preloaded, incident_type,
fact_context_block=fact_context_block,
)
# --- Faktencheck-Task --- # --- Faktencheck-Task ---
async def _do_factcheck(): async def _do_factcheck():
@@ -1344,20 +1348,61 @@ class AgentOrchestrator:
articles_for_check = [dict(row) for row in await cursor.fetchall()] articles_for_check = [dict(row) for row in await cursor.fetchall()]
return await factchecker.check(title, articles_for_check, incident_type) return await factchecker.check(title, articles_for_check, incident_type)
# Pipeline-Schritte 6+7: Lagebild verfassen + Fakten prüfen (Start, parallel) # Pipeline-Schritt 6: Faktencheck zuerst (sequenziell). Liefert den
await _pipe_start("summary") # Faktenkontext fuer das Lagebild, damit dieses auf geprueftem Stand
# schreibt und Unklarheiten explizit benennt. Variante 1: bei
# Faktencheck-Fehler faellt das Lagebild auf den alten Pfad ohne
# Faktenkontext zurueck (Refresh bricht NICHT ab).
await _pipe_start("factcheck") await _pipe_start("factcheck")
factcheck_result: tuple = ([], None)
# Beide Tasks PARALLEL starten fact_context_block = ""
logger.info("Starte Analyse und Faktencheck parallel...") factcheck_failed_reason: str | None = None
analysis_result, factcheck_result = await asyncio.gather( try:
_do_analysis(), factcheck_result = await _do_factcheck()
_do_factcheck(), except Exception as fc_err:
factcheck_failed_reason = str(fc_err)
logger.warning(
"Faktencheck fehlgeschlagen, Lagebild laeuft ohne Faktenkontext: %s",
fc_err, exc_info=True,
) )
fact_checks, fc_usage = factcheck_result if factcheck_result else ([], None)
# Pipeline-Schritt 6 done direkt nach dem Aufruf — die finale
# DB-Persistierung passiert weiter unten, aber fuer die UI ist
# der Faktencheck-Aufruf hier abgeschlossen. Der count_value
# ist eine Schaetzung (echte Zahl steht spaeter in der DB).
_fc_estimated_new = max(0, len(fact_checks or []) - len(existing_facts or []))
await _pipe_done(
"factcheck",
count_value=_fc_estimated_new,
count_secondary=len(fact_checks) if fact_checks else 0,
)
# Faktenkontext fuer das Lagebild bauen.
try:
from agents.analyzer import build_fact_context_block as _build_fc_ctx
fact_context_block = _build_fc_ctx(
existing_facts or [], fact_checks or [], incident_type,
)
if fact_context_block:
logger.info(
"Faktenkontext fuer Lagebild: %d Zeichen, basierend auf %d alten + %d neuen Fakten",
len(fact_context_block), len(existing_facts or []), len(fact_checks or []),
)
except Exception as ctx_err:
logger.warning("build_fact_context_block fehlgeschlagen: %s", ctx_err, exc_info=True)
fact_context_block = ""
# Pipeline-Schritt 7: Lagebild verfassen (jetzt mit Faktenkontext)
await _pipe_start("summary")
logger.info(
"Starte Lagebild (sequenziell nach Faktencheck%s)",
" — OHNE Faktenkontext (Fallback)" if factcheck_failed_reason else "",
)
analysis_result = await _do_analysis(fact_context_block)
analysis, analysis_usage = analysis_result analysis, analysis_usage = analysis_result
fact_checks, fc_usage = factcheck_result
# Pipeline-Schritt 6: Lagebild verfassen (fertig, keine Zahl, nur Status)
await _pipe_done("summary", count_value=None, count_secondary=None) await _pipe_done("summary", count_value=None, count_secondary=None)
# --- Analyse-Ergebnisse verarbeiten --- # --- Analyse-Ergebnisse verarbeiten ---
@@ -1656,9 +1701,10 @@ class AgentOrchestrator:
await db.commit() await db.commit()
# Pipeline-Schritt 7: Fakten prüfen (fertig) # Pipeline-Schritt 7 (Fakten pruefen) wurde bereits frueher als done
_new_facts_count = max(0, len(fact_checks) - len(existing_facts)) # markiert (siehe weiter oben — direkt nach dem _do_factcheck-Aufruf,
await _pipe_done("factcheck", count_value=_new_facts_count, count_secondary=len(fact_checks) if fact_checks else 0) # bevor das Lagebild generiert wurde). Hier nur noch die DB-
# Persistierung der Fakten, ohne den Step erneut zu schliessen.
# Pipeline-Schritt 8: Qualitätscheck (Start, ohne Zahlen) # Pipeline-Schritt 8: Qualitätscheck (Start, ohne Zahlen)
await _pipe_start("qc") await _pipe_start("qc")

Datei anzeigen

@@ -158,7 +158,36 @@ CREATE TABLE IF NOT EXISTS sources (
article_count INTEGER DEFAULT 0, article_count INTEGER DEFAULT 0,
last_seen_at TIMESTAMP, last_seen_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
tenant_id INTEGER REFERENCES organizations(id) tenant_id INTEGER REFERENCES organizations(id),
language TEXT,
bias TEXT,
political_orientation TEXT DEFAULT 'na',
media_type TEXT DEFAULT 'sonstige',
reliability TEXT DEFAULT 'na',
state_affiliated INTEGER DEFAULT 0,
country_code TEXT,
classification_source TEXT DEFAULT 'legacy',
classified_at TIMESTAMP,
proposed_political_orientation TEXT,
proposed_media_type TEXT,
proposed_reliability TEXT,
proposed_state_affiliated INTEGER,
proposed_country_code TEXT,
proposed_alignments_json TEXT,
proposed_confidence REAL,
proposed_reasoning TEXT,
proposed_at TIMESTAMP,
eu_disinfo_listed INTEGER DEFAULT 0,
eu_disinfo_case_count INTEGER DEFAULT 0,
eu_disinfo_last_seen TIMESTAMP,
ifcn_signatory INTEGER DEFAULT 0,
external_data_synced_at TIMESTAMP
);
CREATE TABLE IF NOT EXISTS source_alignments (
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
alignment TEXT NOT NULL,
PRIMARY KEY (source_id, alignment)
); );
CREATE TABLE IF NOT EXISTS notifications ( CREATE TABLE IF NOT EXISTS notifications (
@@ -611,6 +640,71 @@ async def init_db():
await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)") await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)")
await db.commit() await db.commit()
# Migration: language + bias (Freitext, schon laenger im Einsatz, Schema-Lueck schliessen)
if "language" not in src_columns:
await db.execute("ALTER TABLE sources ADD COLUMN language TEXT")
await db.commit()
if "bias" not in src_columns:
await db.execute("ALTER TABLE sources ADD COLUMN bias TEXT")
await db.commit()
# Migration: strukturierte Klassifikations-Spalten fuer sources
for col, ddl in [
("political_orientation", "ALTER TABLE sources ADD COLUMN political_orientation TEXT DEFAULT 'na'"),
("media_type", "ALTER TABLE sources ADD COLUMN media_type TEXT DEFAULT 'sonstige'"),
("reliability", "ALTER TABLE sources ADD COLUMN reliability TEXT DEFAULT 'na'"),
("state_affiliated", "ALTER TABLE sources ADD COLUMN state_affiliated INTEGER DEFAULT 0"),
("country_code", "ALTER TABLE sources ADD COLUMN country_code TEXT"),
("classification_source", "ALTER TABLE sources ADD COLUMN classification_source TEXT DEFAULT 'legacy'"),
("classified_at", "ALTER TABLE sources ADD COLUMN classified_at TIMESTAMP"),
("proposed_political_orientation", "ALTER TABLE sources ADD COLUMN proposed_political_orientation TEXT"),
("proposed_media_type", "ALTER TABLE sources ADD COLUMN proposed_media_type TEXT"),
("proposed_reliability", "ALTER TABLE sources ADD COLUMN proposed_reliability TEXT"),
("proposed_state_affiliated", "ALTER TABLE sources ADD COLUMN proposed_state_affiliated INTEGER"),
("proposed_country_code", "ALTER TABLE sources ADD COLUMN proposed_country_code TEXT"),
("proposed_alignments_json", "ALTER TABLE sources ADD COLUMN proposed_alignments_json TEXT"),
("proposed_confidence", "ALTER TABLE sources ADD COLUMN proposed_confidence REAL"),
("proposed_reasoning", "ALTER TABLE sources ADD COLUMN proposed_reasoning TEXT"),
("proposed_at", "ALTER TABLE sources ADD COLUMN proposed_at TIMESTAMP"),
]:
if col not in src_columns:
await db.execute(ddl)
await db.commit()
if any(c not in src_columns for c in ("political_orientation", "media_type", "reliability")):
logger.info("Migration: Klassifikations-Spalten zu sources hinzugefuegt")
# Migration: externe Reputations-Daten (EUvsDisinfo + IFCN)
for col, ddl in [
("eu_disinfo_listed", "ALTER TABLE sources ADD COLUMN eu_disinfo_listed INTEGER DEFAULT 0"),
("eu_disinfo_case_count", "ALTER TABLE sources ADD COLUMN eu_disinfo_case_count INTEGER DEFAULT 0"),
("eu_disinfo_last_seen", "ALTER TABLE sources ADD COLUMN eu_disinfo_last_seen TIMESTAMP"),
("ifcn_signatory", "ALTER TABLE sources ADD COLUMN ifcn_signatory INTEGER DEFAULT 0"),
("external_data_synced_at", "ALTER TABLE sources ADD COLUMN external_data_synced_at TIMESTAMP"),
]:
if col not in src_columns:
await db.execute(ddl)
await db.commit()
if any(c not in src_columns for c in ("eu_disinfo_listed", "ifcn_signatory")):
logger.info("Migration: externe Reputations-Spalten zu sources hinzugefuegt")
# Migration: source_alignments-Tabelle (Mehrfach-Tags fuer geopolitische Naehe)
cursor = await db.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='source_alignments'"
)
if not await cursor.fetchone():
await db.executescript(
"""
CREATE TABLE source_alignments (
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
alignment TEXT NOT NULL,
PRIMARY KEY (source_id, alignment)
);
CREATE INDEX IF NOT EXISTS idx_source_alignments_alignment ON source_alignments(alignment);
"""
)
await db.commit()
logger.info("Migration: source_alignments-Tabelle erstellt")
# Migration: tenant_id fuer notifications # Migration: tenant_id fuer notifications
cursor = await db.execute("PRAGMA table_info(notifications)") cursor = await db.execute("PRAGMA table_info(notifications)")
notif_columns = [row[1] for row in await cursor.fetchall()] notif_columns = [row[1] for row in await cursor.fetchall()]

Datei anzeigen

@@ -139,24 +139,51 @@ class IncidentListItem(BaseModel):
# Sources (Quellenverwaltung) # Sources (Quellenverwaltung)
SOURCE_TYPE_PATTERN = "^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$"
SOURCE_CATEGORY_PATTERN = "^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$"
SOURCE_STATUS_PATTERN = "^(active|inactive)$"
POLITICAL_ORIENTATION_PATTERN = "^(links_extrem|links|mitte_links|liberal|mitte|konservativ|mitte_rechts|rechts|rechts_extrem|na)$"
MEDIA_TYPE_PATTERN = "^(tageszeitung|wochenzeitung|magazin|tv_sender|radio|oeffentlich_rechtlich|nachrichtenagentur|online_only|blog|telegram_kanal|telegram_bot|podcast|social_media|imageboard|think_tank|ngo|behoerde|staatsmedium|fachmedium|sonstige)$"
RELIABILITY_PATTERN = "^(sehr_hoch|hoch|gemischt|niedrig|sehr_niedrig|na)$"
ALIGNMENT_PATTERN = "^(prorussisch|proiranisch|prowestlich|proukrainisch|prochinesisch|projapanisch|proisraelisch|propalaestinensisch|protuerkisch|panarabisch|neutral|sonstige)$"
COUNTRY_CODE_PATTERN = "^[A-Z]{2}$"
CLASSIFICATION_SOURCE_PATTERN = "^(manual|llm_approved|llm_pending|legacy)$"
class SourceCreate(BaseModel): class SourceCreate(BaseModel):
name: str = Field(min_length=1, max_length=200) name: str = Field(min_length=1, max_length=200)
url: Optional[str] = None url: Optional[str] = None
domain: Optional[str] = None domain: Optional[str] = None
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$") source_type: str = Field(default="rss_feed", pattern=SOURCE_TYPE_PATTERN)
category: str = Field(default="sonstige", pattern="^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$") category: str = Field(default="sonstige", pattern=SOURCE_CATEGORY_PATTERN)
status: str = Field(default="active", pattern="^(active|inactive)$") status: str = Field(default="active", pattern=SOURCE_STATUS_PATTERN)
notes: Optional[str] = None notes: Optional[str] = None
language: Optional[str] = None
bias: Optional[str] = None
political_orientation: Optional[str] = Field(default=None, pattern=POLITICAL_ORIENTATION_PATTERN)
media_type: Optional[str] = Field(default=None, pattern=MEDIA_TYPE_PATTERN)
reliability: Optional[str] = Field(default=None, pattern=RELIABILITY_PATTERN)
state_affiliated: Optional[bool] = None
country_code: Optional[str] = Field(default=None, pattern=COUNTRY_CODE_PATTERN)
alignments: Optional[list[str]] = None
class SourceUpdate(BaseModel): class SourceUpdate(BaseModel):
name: Optional[str] = Field(default=None, max_length=200) name: Optional[str] = Field(default=None, max_length=200)
url: Optional[str] = None url: Optional[str] = None
domain: Optional[str] = None domain: Optional[str] = None
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$") source_type: Optional[str] = Field(default=None, pattern=SOURCE_TYPE_PATTERN)
category: Optional[str] = Field(default=None, pattern="^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$") category: Optional[str] = Field(default=None, pattern=SOURCE_CATEGORY_PATTERN)
status: Optional[str] = Field(default=None, pattern="^(active|inactive)$") status: Optional[str] = Field(default=None, pattern=SOURCE_STATUS_PATTERN)
notes: Optional[str] = None notes: Optional[str] = None
language: Optional[str] = None
bias: Optional[str] = None
political_orientation: Optional[str] = Field(default=None, pattern=POLITICAL_ORIENTATION_PATTERN)
media_type: Optional[str] = Field(default=None, pattern=MEDIA_TYPE_PATTERN)
reliability: Optional[str] = Field(default=None, pattern=RELIABILITY_PATTERN)
state_affiliated: Optional[bool] = None
country_code: Optional[str] = Field(default=None, pattern=COUNTRY_CODE_PATTERN)
alignments: Optional[list[str]] = None
class SourceResponse(BaseModel): class SourceResponse(BaseModel):
@@ -174,7 +201,20 @@ class SourceResponse(BaseModel):
created_at: str created_at: str
language: Optional[str] = None language: Optional[str] = None
bias: Optional[str] = None bias: Optional[str] = None
political_orientation: Optional[str] = None
media_type: Optional[str] = None
reliability: Optional[str] = None
state_affiliated: bool = False
country_code: Optional[str] = None
classification_source: Optional[str] = None
classified_at: Optional[str] = None
alignments: list[str] = []
is_global: bool = False is_global: bool = False
ifcn_signatory: bool = False
eu_disinfo_listed: bool = False
eu_disinfo_case_count: int = 0
eu_disinfo_last_seen: Optional[str] = None
external_data_synced_at: Optional[str] = None
# Source Discovery # Source Discovery

Datei anzeigen

@@ -1,10 +1,13 @@
"""Sources-Router: Quellenverwaltung (Multi-Tenant).""" """Sources-Router: Quellenverwaltung (Multi-Tenant)."""
import json
import logging import logging
from collections import defaultdict from collections import defaultdict
from fastapi import APIRouter, Depends, HTTPException, status from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
from auth import get_current_user from auth import get_current_user
from database import db_dependency, refresh_source_counts from database import db_dependency, get_db, refresh_source_counts
from services.external_reputation import apply_reputation_overrides, sync_all as sync_external_reputation
from services.source_classifier import bulk_classify, classify_source
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
import aiosqlite import aiosqlite
@@ -12,7 +15,56 @@ logger = logging.getLogger("osint.sources")
router = APIRouter(prefix="/api/sources", tags=["sources"]) router = APIRouter(prefix="/api/sources", tags=["sources"])
SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes"} SOURCE_UPDATE_COLUMNS = {
"name", "url", "domain", "source_type", "category", "status", "notes",
"language", "bias",
"political_orientation", "media_type", "reliability",
"state_affiliated", "country_code",
}
SOURCE_CLASSIFICATION_FIELDS = {
"political_orientation", "media_type", "reliability",
"state_affiliated", "country_code",
}
ALLOWED_ALIGNMENTS = {
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
"protuerkisch", "panarabisch", "neutral", "sonstige",
}
async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int]) -> dict[int, list[str]]:
"""Lädt alignments fuer mehrere Quellen in einer Query und gibt {source_id: [alignment, ...]} zurück."""
if not source_ids:
return {}
placeholders = ",".join("?" for _ in source_ids)
cursor = await db.execute(
f"SELECT source_id, alignment FROM source_alignments WHERE source_id IN ({placeholders}) ORDER BY alignment",
source_ids,
)
out: dict[int, list[str]] = {sid: [] for sid in source_ids}
for row in await cursor.fetchall():
out.setdefault(row["source_id"], []).append(row["alignment"])
return out
async def _replace_alignments(db: aiosqlite.Connection, source_id: int, alignments: list[str]):
"""Ersetzt die alignments-Liste einer Quelle (DELETE + INSERT) — Aufrufer muss commit() machen."""
await db.execute("DELETE FROM source_alignments WHERE source_id = ?", (source_id,))
seen: set[str] = set()
for raw in alignments:
a = (raw or "").strip().lower()
if not a or a in seen:
continue
if a not in ALLOWED_ALIGNMENTS:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Ungueltiger alignment-Wert: '{a}'",
)
seen.add(a)
await db.execute(
"INSERT INTO source_alignments (source_id, alignment) VALUES (?, ?)",
(source_id, a),
)
def _check_source_ownership(source: dict, username: str): def _check_source_ownership(source: dict, username: str):
@@ -34,6 +86,13 @@ async def list_sources(
source_type: str = None, source_type: str = None,
category: str = None, category: str = None,
source_status: str = None, source_status: str = None,
political_orientation: str = None,
media_type: str = None,
reliability: str = None,
state_affiliated: bool = None,
alignment: str = None,
ifcn_signatory: bool = None,
eu_disinfo_listed: bool = None,
current_user: dict = Depends(get_current_user), current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency), db: aiosqlite.Connection = Depends(db_dependency),
): ):
@@ -41,27 +100,51 @@ async def list_sources(
tenant_id = current_user.get("tenant_id") tenant_id = current_user.get("tenant_id")
# Global (tenant_id=NULL) + eigene Org # Global (tenant_id=NULL) + eigene Org
query = "SELECT * FROM sources WHERE (tenant_id IS NULL OR tenant_id = ?)" query = "SELECT s.* FROM sources s WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)"
params = [tenant_id] params: list = [tenant_id]
if source_type: if source_type:
query += " AND source_type = ?" query += " AND s.source_type = ?"
params.append(source_type) params.append(source_type)
if category: if category:
query += " AND category = ?" query += " AND s.category = ?"
params.append(category) params.append(category)
if source_status: if source_status:
query += " AND status = ?" query += " AND s.status = ?"
params.append(source_status) params.append(source_status)
if political_orientation:
query += " AND s.political_orientation = ?"
params.append(political_orientation)
if media_type:
query += " AND s.media_type = ?"
params.append(media_type)
if reliability:
query += " AND s.reliability = ?"
params.append(reliability)
if state_affiliated is not None:
query += " AND s.state_affiliated = ?"
params.append(1 if state_affiliated else 0)
if alignment:
query += " AND EXISTS (SELECT 1 FROM source_alignments sa WHERE sa.source_id = s.id AND sa.alignment = ?)"
params.append(alignment.lower())
if ifcn_signatory is not None:
query += " AND s.ifcn_signatory = ?"
params.append(1 if ifcn_signatory else 0)
if eu_disinfo_listed is not None:
query += " AND s.eu_disinfo_listed = ?"
params.append(1 if eu_disinfo_listed else 0)
query += " ORDER BY source_type, category, name" query += " ORDER BY s.source_type, s.category, s.name"
cursor = await db.execute(query, params) cursor = await db.execute(query, params)
rows = await cursor.fetchall() rows = await cursor.fetchall()
results = [] results = [dict(row) for row in rows]
for row in rows: alignments_map = await _load_alignments_for(db, [r["id"] for r in results])
d = dict(row) for d in results:
d["is_global"] = d.get("tenant_id") is None d["is_global"] = d.get("tenant_id") is None
results.append(d) d["state_affiliated"] = bool(d.get("state_affiliated"))
d["ifcn_signatory"] = bool(d.get("ifcn_signatory"))
d["eu_disinfo_listed"] = bool(d.get("eu_disinfo_listed"))
d["alignments"] = alignments_map.get(d["id"], [])
return results return results
@@ -454,10 +537,16 @@ async def create_source(
detail=f"Domain '{domain}' bereits als Quelle vorhanden: {domain_existing['name']}. Für einen neuen RSS-Feed bitte die Feed-URL angeben.", detail=f"Domain '{domain}' bereits als Quelle vorhanden: {domain_existing['name']}. Für einen neuen RSS-Feed bitte die Feed-URL angeben.",
) )
cursor = await db.execute( payload = data.model_dump(exclude_unset=True)
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id) alignments = payload.pop("alignments", None)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & payload.keys()) or alignments is not None
(
cols = ["name", "url", "domain", "source_type", "category", "status", "notes",
"language", "bias",
"political_orientation", "media_type", "reliability",
"state_affiliated", "country_code",
"added_by", "tenant_id"]
vals = [
data.name, data.name,
data.url, data.url,
domain, domain,
@@ -465,15 +554,43 @@ async def create_source(
data.category, data.category,
data.status, data.status,
data.notes, data.notes,
payload.get("language"),
payload.get("bias"),
payload.get("political_orientation"),
payload.get("media_type"),
payload.get("reliability"),
1 if payload.get("state_affiliated") else 0,
payload.get("country_code"),
current_user["username"], current_user["username"],
tenant_id, tenant_id,
), ]
if classification_touched:
cols += ["classification_source", "classified_at"]
vals += ["manual"]
ts_marker = True
else:
ts_marker = False
placeholders = ", ".join(["?"] * len(vals) + (["CURRENT_TIMESTAMP"] if ts_marker else []))
cursor = await db.execute(
f"INSERT INTO sources ({', '.join(cols)}) VALUES ({placeholders})",
vals,
) )
new_id = cursor.lastrowid
if alignments:
await _replace_alignments(db, new_id, alignments)
await db.commit() await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,)) cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (new_id,))
row = await cursor.fetchone() row = await cursor.fetchone()
return dict(row) result = dict(row)
result["is_global"] = result.get("tenant_id") is None
result["state_affiliated"] = bool(result.get("state_affiliated"))
alignments_map = await _load_alignments_for(db, [new_id])
result["alignments"] = alignments_map.get(new_id, [])
return result
@router.put("/{source_id}", response_model=SourceResponse) @router.put("/{source_id}", response_model=SourceResponse)
@@ -494,27 +611,51 @@ async def update_source(
_check_source_ownership(dict(row), current_user["username"]) _check_source_ownership(dict(row), current_user["username"])
payload = data.model_dump(exclude_unset=True)
alignments = payload.pop("alignments", None)
updates = {} updates = {}
for field, value in data.model_dump(exclude_none=True).items(): for field, value in payload.items():
if field not in SOURCE_UPDATE_COLUMNS: if field not in SOURCE_UPDATE_COLUMNS:
continue continue
# Domain normalisieren # Domain normalisieren
if field == "domain" and value: if field == "domain" and value:
value = _DOMAIN_ALIASES.get(value.lower(), value.lower()) value = _DOMAIN_ALIASES.get(value.lower(), value.lower())
if field == "state_affiliated":
value = 1 if value else 0
updates[field] = value updates[field] = value
if not updates: classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & updates.keys()) or alignments is not None
return dict(row) if classification_touched:
updates["classification_source"] = "manual"
updates["classified_at"] = "CURRENT_TIMESTAMP_MARKER"
set_clause = ", ".join(f"{k} = ?" for k in updates) if updates:
values = list(updates.values()) + [source_id] set_parts = []
values = []
for k, v in updates.items():
if v == "CURRENT_TIMESTAMP_MARKER":
set_parts.append(f"{k} = CURRENT_TIMESTAMP")
else:
set_parts.append(f"{k} = ?")
values.append(v)
values.append(source_id)
await db.execute(f"UPDATE sources SET {', '.join(set_parts)} WHERE id = ?", values)
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values) if alignments is not None:
await _replace_alignments(db, source_id, alignments)
if updates or alignments is not None:
await db.commit() await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,)) cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone() row = await cursor.fetchone()
return dict(row) result = dict(row)
result["is_global"] = result.get("tenant_id") is None
result["state_affiliated"] = bool(result.get("state_affiliated"))
alignments_map = await _load_alignments_for(db, [source_id])
result["alignments"] = alignments_map.get(source_id, [])
return result
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT) @router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
@@ -572,3 +713,328 @@ async def trigger_refresh_counts(
"""Artikelzaehler fuer alle Quellen neu berechnen.""" """Artikelzaehler fuer alle Quellen neu berechnen."""
await refresh_source_counts(db) await refresh_source_counts(db)
return {"status": "ok"} return {"status": "ok"}
# === Klassifikations-Review (LLM-Vorschlaege approve/reject/reclassify) ===
def _require_admin_for_global(row: dict, current_user: dict):
"""Globale Quellen (tenant_id IS NULL) duerfen nur org_admins approve-en/reclassify-en."""
if row.get("tenant_id") is None and current_user.get("role") != "org_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Globale Quellen koennen nur von Admins klassifiziert werden",
)
@router.get("/classification/stats")
async def classification_stats(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Counts pro classification_source-Wert (global + eigene Org)."""
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""SELECT classification_source, COUNT(*) as cnt
FROM sources
WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
GROUP BY classification_source""",
(tenant_id,),
)
by_source = {row["classification_source"] or "legacy": row["cnt"] for row in await cursor.fetchall()}
cursor = await db.execute(
"""SELECT COUNT(*) as cnt FROM sources
WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
AND proposed_political_orientation IS NOT NULL""",
(tenant_id,),
)
pending = (await cursor.fetchone())["cnt"]
return {
"by_classification_source": by_source,
"pending_review": pending,
"total": sum(by_source.values()),
}
@router.get("/classification/queue")
async def classification_queue(
limit: int = 50,
min_confidence: float = 0.0,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Liefert Quellen mit nicht-leeren proposed_*-Spalten (Review-Queue)."""
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""SELECT s.* FROM sources s
WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)
AND s.proposed_political_orientation IS NOT NULL
AND COALESCE(s.proposed_confidence, 0) >= ?
ORDER BY s.proposed_confidence DESC, s.proposed_at DESC
LIMIT ?""",
(tenant_id, min_confidence, limit),
)
rows = [dict(r) for r in await cursor.fetchall()]
alignments_map = await _load_alignments_for(db, [r["id"] for r in rows])
out = []
for d in rows:
try:
proposed_aligns = json.loads(d.get("proposed_alignments_json") or "[]")
except (json.JSONDecodeError, TypeError):
proposed_aligns = []
out.append({
"id": d["id"],
"name": d["name"],
"url": d.get("url"),
"domain": d.get("domain"),
"source_type": d.get("source_type"),
"category": d.get("category"),
"is_global": d.get("tenant_id") is None,
"current": {
"political_orientation": d.get("political_orientation"),
"media_type": d.get("media_type"),
"reliability": d.get("reliability"),
"state_affiliated": bool(d.get("state_affiliated")),
"country_code": d.get("country_code"),
"alignments": alignments_map.get(d["id"], []),
"classification_source": d.get("classification_source"),
},
"proposed": {
"political_orientation": d.get("proposed_political_orientation"),
"media_type": d.get("proposed_media_type"),
"reliability": d.get("proposed_reliability"),
"state_affiliated": bool(d.get("proposed_state_affiliated")),
"country_code": d.get("proposed_country_code"),
"alignments": proposed_aligns,
"confidence": d.get("proposed_confidence"),
"reasoning": d.get("proposed_reasoning"),
"proposed_at": d.get("proposed_at"),
},
})
return out
async def _clear_proposed(db: aiosqlite.Connection, source_id: int):
"""Loescht die proposed_*-Felder einer Quelle (ohne commit)."""
await db.execute(
"""UPDATE sources SET
proposed_political_orientation = NULL,
proposed_media_type = NULL,
proposed_reliability = NULL,
proposed_state_affiliated = NULL,
proposed_country_code = NULL,
proposed_alignments_json = NULL,
proposed_confidence = NULL,
proposed_reasoning = NULL,
proposed_at = NULL
WHERE id = ?""",
(source_id,),
)
@router.post("/{source_id}/classification/approve")
async def approve_classification(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Uebernimmt proposed_* in echte Felder, setzt classification_source='llm_approved'."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
src = dict(row)
_require_admin_for_global(src, current_user)
if src.get("proposed_political_orientation") is None:
raise HTTPException(status_code=400, detail="Keine LLM-Vorschlaege fuer diese Quelle vorhanden")
try:
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
except (json.JSONDecodeError, TypeError):
proposed_aligns = []
await db.execute(
"""UPDATE sources SET
political_orientation = ?,
media_type = ?,
reliability = ?,
state_affiliated = ?,
country_code = ?,
classification_source = 'llm_approved',
classified_at = CURRENT_TIMESTAMP
WHERE id = ?""",
(
src["proposed_political_orientation"],
src["proposed_media_type"],
src["proposed_reliability"],
1 if src.get("proposed_state_affiliated") else 0,
src.get("proposed_country_code"),
source_id,
),
)
await _replace_alignments(db, source_id, [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS])
await _clear_proposed(db, source_id)
await db.commit()
# Reliability-Override anwenden (IFCN/EUvsDisinfo)
try:
await apply_reputation_overrides(db, source_id)
except Exception as e:
logger.warning("Reputation-Override fuer source_id=%s fehlgeschlagen: %s", source_id, e)
return {"source_id": source_id, "status": "approved"}
@router.post("/{source_id}/classification/reject")
async def reject_classification(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Verwirft die LLM-Vorschlaege ohne Uebernahme. classification_source bleibt unveraendert."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
src = dict(row)
_require_admin_for_global(src, current_user)
await _clear_proposed(db, source_id)
# Wenn classification_source noch 'llm_pending' war, zurueck auf 'legacy'
if src.get("classification_source") == "llm_pending":
await db.execute(
"UPDATE sources SET classification_source = 'legacy' WHERE id = ?",
(source_id,),
)
await db.commit()
return {"source_id": source_id, "status": "rejected"}
@router.post("/{source_id}/classification/reclassify")
async def reclassify_source(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Triggert eine LLM-Klassifikation einer einzelnen Quelle (synchron, ~3-5s)."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
src = dict(row)
_require_admin_for_global(src, current_user)
try:
result = await classify_source(db, source_id)
except Exception as e:
logger.error("Reclassify source_id=%s fehlgeschlagen: %s", source_id, e, exc_info=True)
raise HTTPException(status_code=500, detail=f"Klassifikation fehlgeschlagen: {e}")
return result
async def _bulk_classify_background(limit: int, only_unclassified: bool):
"""Hintergrund-Task: oeffnet eigene DB-Connection."""
db = await get_db()
try:
await bulk_classify(db, limit=limit, only_unclassified=only_unclassified)
finally:
await db.close()
@router.post("/classification/bulk-classify")
async def trigger_bulk_classify(
background_tasks: BackgroundTasks,
limit: int = 50,
only_unclassified: bool = True,
current_user: dict = Depends(get_current_user),
):
"""Startet eine Bulk-Klassifikation im Hintergrund (nur Admins)."""
if current_user.get("role") != "org_admin":
raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Klassifikation starten")
if limit < 1 or limit > 500:
raise HTTPException(status_code=400, detail="limit muss zwischen 1 und 500 liegen")
background_tasks.add_task(_bulk_classify_background, limit, only_unclassified)
return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
@router.post("/external-reputation/sync")
async def trigger_external_reputation_sync(
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user),
):
"""Startet Sync von IFCN- und EUvsDisinfo-Daten (Admin, Hintergrund)."""
if current_user.get("role") != "org_admin":
raise HTTPException(status_code=403, detail="Nur Admins koennen den externen Sync starten")
async def _bg():
db = await get_db()
try:
await sync_external_reputation(db)
finally:
await db.close()
background_tasks.add_task(_bg)
return {"status": "started"}
@router.post("/classification/bulk-approve")
async def bulk_approve_classifications(
min_confidence: float = 0.85,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Genehmigt alle Pending-Vorschlaege ueber dem confidence-Schwellwert (nur Admins).
Globale Quellen werden nur bearbeitet, wenn der Aufrufer org_admin ist;
Tenant-eigene Quellen sowieso.
"""
if current_user.get("role") != "org_admin":
raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Approve nutzen")
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""SELECT id, proposed_political_orientation, proposed_media_type,
proposed_reliability, proposed_state_affiliated,
proposed_country_code, proposed_alignments_json, tenant_id
FROM sources
WHERE proposed_political_orientation IS NOT NULL
AND COALESCE(proposed_confidence, 0) >= ?
AND (tenant_id IS NULL OR tenant_id = ?)""",
(min_confidence, tenant_id),
)
rows = [dict(r) for r in await cursor.fetchall()]
approved_ids: list[int] = []
for src in rows:
try:
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
except (json.JSONDecodeError, TypeError):
proposed_aligns = []
await db.execute(
"""UPDATE sources SET
political_orientation = ?,
media_type = ?,
reliability = ?,
state_affiliated = ?,
country_code = ?,
classification_source = 'llm_approved',
classified_at = CURRENT_TIMESTAMP
WHERE id = ?""",
(
src["proposed_political_orientation"],
src["proposed_media_type"],
src["proposed_reliability"],
1 if src.get("proposed_state_affiliated") else 0,
src.get("proposed_country_code"),
src["id"],
),
)
await _replace_alignments(
db, src["id"], [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS]
)
await _clear_proposed(db, src["id"])
approved_ids.append(src["id"])
await db.commit()
# Reliability-Override fuer alle gerade Approved
try:
for sid in approved_ids:
await apply_reputation_overrides(db, sid)
except Exception as e:
logger.warning("Bulk Reputation-Override fehlgeschlagen: %s", e)
return {"approved_count": len(approved_ids), "min_confidence": min_confidence}

Datei anzeigen

@@ -0,0 +1,282 @@
"""Externe Reputations-Daten fuer Quellen.
Synchronisiert Domain-Listen von oeffentlichen Reputations-/Faktencheck-Datenbanken
und schreibt die Treffer in die sources-Spalten:
- IFCN-Signatories (anerkannte Faktenchecker) -> ifcn_signatory
- EUvsDisinfo (pro-Kreml-Desinformation, Zenodo-CSV) -> eu_disinfo_listed,
eu_disinfo_case_count, eu_disinfo_last_seen
Anschliessend wendet apply_reputation_overrides() Override-Regeln auf die
reliability-Spalte an:
- ifcn_signatory=1 -> reliability='sehr_hoch'
- eu_disinfo_case_count >= 5 -> reliability='sehr_niedrig'
- eu_disinfo_case_count >= 1 -> reliability eine Stufe runter (max bis 'niedrig')
"""
import csv
import io
import logging
from collections import defaultdict
from urllib.parse import urlparse
import aiosqlite
import httpx
logger = logging.getLogger("osint.external_reputation")
IFCN_LIST_URL = "https://raw.githubusercontent.com/IFCN/verified-signatories/main/list"
EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base.csv?download=1"
HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen
# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains).
PLATFORM_DOMAINS = {
"t.me", "telegram.me", "telegram.org",
"twitter.com", "x.com", "mobile.twitter.com",
"youtube.com", "youtu.be", "m.youtube.com",
"facebook.com", "fb.com", "m.facebook.com",
"instagram.com", "tiktok.com", "vk.com", "ok.ru",
"rumble.com", "bitchute.com", "odysee.com",
"reddit.com", "old.reddit.com",
"wordpress.com", "blogspot.com", "medium.com",
"substack.com", "wixsite.com",
}
# Reliability-Skala in Stufenfolge (schlecht -> gut)
RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
def _normalize_domain(raw: str | None) -> str | None:
"""Normalisiert eine Domain: lowercase, ohne www., ohne Schema/Pfad."""
if not raw:
return None
raw = raw.strip().lower()
if not raw:
return None
# Falls eine vollstaendige URL uebergeben wurde
if "://" in raw:
try:
raw = urlparse(raw).netloc or raw
except ValueError:
pass
# Pfad/Query strippen
raw = raw.split("/")[0].split("?")[0].split("#")[0]
if raw.startswith("www."):
raw = raw[4:]
return raw or None
async def _fetch_text(url: str) -> str:
"""Laedt Text von einer URL. Wirft HTTPException bei Fehler."""
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT, follow_redirects=True) as client:
resp = await client.get(url)
resp.raise_for_status()
return resp.text
async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
"""Laedt IFCN-Domain-Liste und matcht gegen sources.domain.
Setzt ifcn_signatory=1 wo die Domain in der Liste vorkommt, sonst 0.
"""
text = await _fetch_text(IFCN_LIST_URL)
domains: set[str] = set()
for line in text.splitlines():
d = _normalize_domain(line)
if d:
domains.add(d)
logger.info("IFCN-Liste geladen: %d Domains", len(domains))
# Aktuelle Quellen mit Domain laden
cursor = await db.execute(
"SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
)
sources = [dict(r) for r in await cursor.fetchall()]
matched_ids: list[int] = []
unmatched_ids: list[int] = []
for s in sources:
nd = _normalize_domain(s["domain"])
if nd and nd not in PLATFORM_DOMAINS and nd in domains:
matched_ids.append(s["id"])
else:
unmatched_ids.append(s["id"])
# Bulk-Update in zwei Statements
if matched_ids:
placeholders = ",".join("?" for _ in matched_ids)
await db.execute(
f"UPDATE sources SET ifcn_signatory = 1 WHERE id IN ({placeholders})",
matched_ids,
)
if unmatched_ids:
placeholders = ",".join("?" for _ in unmatched_ids)
await db.execute(
f"UPDATE sources SET ifcn_signatory = 0 WHERE id IN ({placeholders})",
unmatched_ids,
)
await db.commit()
logger.info("IFCN-Sync: %d Quellen als Faktenchecker markiert (von %d)",
len(matched_ids), len(sources))
return {
"list_size": len(domains),
"sources_checked": len(sources),
"matched": len(matched_ids),
}
async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
"""Laedt EUvsDisinfo-CSV von Zenodo, aggregiert pro Domain, schreibt sources.
- eu_disinfo_listed: 1 wenn Domain mindestens 1x als 'disinformation' debunkt
- eu_disinfo_case_count: Anzahl Disinformation-Faelle
- eu_disinfo_last_seen: spaetestes debunk_date
"""
text = await _fetch_text(EU_DISINFO_CSV_URL)
reader = csv.DictReader(io.StringIO(text))
# Per-Domain aggregieren (nur class='disinformation')
counts: dict[str, int] = defaultdict(int)
last_seen: dict[str, str] = {}
total_rows = 0
for row in reader:
total_rows += 1
if (row.get("class") or "").strip().lower() != "disinformation":
continue
d = _normalize_domain(row.get("article_domain"))
if not d:
continue
counts[d] += 1
debunk_date = (row.get("debunk_date") or "").strip()
if debunk_date:
prev = last_seen.get(d)
if not prev or debunk_date > prev:
last_seen[d] = debunk_date
logger.info("EUvsDisinfo-CSV: %d Zeilen, %d Domains mit Desinformation",
total_rows, len(counts))
# Quellen laden + matchen
cursor = await db.execute(
"SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
)
sources = [dict(r) for r in await cursor.fetchall()]
matched = 0
for s in sources:
nd = _normalize_domain(s["domain"])
if nd and nd not in PLATFORM_DOMAINS and nd in counts:
await db.execute(
"""UPDATE sources SET
eu_disinfo_listed = 1,
eu_disinfo_case_count = ?,
eu_disinfo_last_seen = ?
WHERE id = ?""",
(counts[nd], last_seen.get(nd), s["id"]),
)
matched += 1
else:
await db.execute(
"""UPDATE sources SET
eu_disinfo_listed = 0,
eu_disinfo_case_count = 0,
eu_disinfo_last_seen = NULL
WHERE id = ?""",
(s["id"],),
)
await db.commit()
logger.info("EUvsDisinfo-Sync: %d Quellen als Desinformations-Quelle markiert (von %d)",
matched, len(sources))
return {
"rows_in_csv": total_rows,
"domains_with_disinfo_in_csv": len(counts),
"sources_checked": len(sources),
"matched": matched,
}
def _override_reliability(current: str | None, ifcn: bool, eu_count: int) -> str | None:
"""Wendet Override-Regeln auf eine reliability-Stufe an.
Rueckgabe: neue Stufe (oder None, wenn unveraendert).
"""
cur = current or "na"
# IFCN gewinnt: zertifizierter Faktenchecker -> sehr_hoch (immer)
if ifcn:
return "sehr_hoch" if cur != "sehr_hoch" else None
# EUvsDisinfo: Downgrade
if eu_count >= 5:
return "sehr_niedrig" if cur != "sehr_niedrig" else None
if eu_count >= 1:
# Eine Stufe runter, mindestens bis 'niedrig'
if cur == "na":
return "niedrig"
if cur in RELIABILITY_ORDER:
idx = RELIABILITY_ORDER.index(cur)
new_idx = max(0, idx - 1)
new = RELIABILITY_ORDER[new_idx]
# Mindeststufe 'niedrig' bei eu_count >= 1
if RELIABILITY_ORDER.index(new) > RELIABILITY_ORDER.index("niedrig"):
new = "niedrig"
return new if new != cur else None
return None
async def apply_reputation_overrides(db: aiosqlite.Connection, source_id: int | None = None) -> dict:
"""Wendet Reliability-Override-Regeln an.
Wenn source_id angegeben ist, nur fuer diese Quelle. Sonst fuer alle Quellen.
"""
if source_id is not None:
cursor = await db.execute(
"SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count "
"FROM sources WHERE id = ?",
(source_id,),
)
else:
cursor = await db.execute(
"SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count FROM sources"
)
sources = [dict(r) for r in await cursor.fetchall()]
changed = 0
for s in sources:
new = _override_reliability(
s.get("reliability"),
bool(s.get("ifcn_signatory")),
int(s.get("eu_disinfo_case_count") or 0),
)
if new is not None:
await db.execute(
"UPDATE sources SET reliability = ? WHERE id = ?",
(new, s["id"]),
)
changed += 1
await db.commit()
logger.info("Reliability-Override: %d Quellen angepasst (von %d gepruefte)",
changed, len(sources))
return {"checked": len(sources), "changed": changed}
async def sync_all(db: aiosqlite.Connection) -> dict:
"""Vollstaendiger Sync: IFCN + EUvsDisinfo + Reliability-Override.
Setzt external_data_synced_at fuer alle Quellen.
"""
ifcn_result = await sync_ifcn_signatories(db)
eu_result = await sync_eu_disinfo(db)
override_result = await apply_reputation_overrides(db)
await db.execute(
"UPDATE sources SET external_data_synced_at = CURRENT_TIMESTAMP "
"WHERE domain IS NOT NULL AND domain != ''"
)
await db.commit()
return {
"ifcn": ifcn_result,
"eu_disinfo": eu_result,
"override": override_result,
}

Datei anzeigen

@@ -0,0 +1,295 @@
"""Klassifiziert Quellen via Claude (Haiku) nach 4 Achsen + state_affiliated + country.
Schreibt Vorschlaege in die proposed_*-Spalten von sources und setzt
classification_source='llm_pending'. Approval erfolgt ueber separate Endpoints,
die proposed_* in die echten Spalten kopieren.
"""
import asyncio
import json
import logging
import re
import aiosqlite
from agents.claude_client import call_claude
from config import CLAUDE_MODEL_FAST
logger = logging.getLogger("osint.source_classifier")
POLITICAL_VALUES = {
"links_extrem", "links", "mitte_links", "liberal", "mitte",
"konservativ", "mitte_rechts", "rechts", "rechts_extrem", "na",
}
MEDIA_TYPE_VALUES = {
"tageszeitung", "wochenzeitung", "magazin", "tv_sender", "radio",
"oeffentlich_rechtlich", "nachrichtenagentur", "online_only", "blog",
"telegram_kanal", "telegram_bot", "podcast", "social_media", "imageboard",
"think_tank", "ngo", "behoerde", "staatsmedium", "fachmedium", "sonstige",
}
RELIABILITY_VALUES = {"sehr_hoch", "hoch", "gemischt", "niedrig", "sehr_niedrig", "na"}
ALIGNMENT_VALUES = {
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
"protuerkisch", "panarabisch", "neutral", "sonstige",
}
def _build_prompt(src: dict, sample_articles: list[dict]) -> str:
sample_text = ""
if sample_articles:
lines = []
for i, art in enumerate(sample_articles[:5], 1):
headline = (art.get("headline") or art.get("headline_de") or "").strip()
if headline:
lines.append(f"{i}. {headline[:200]}")
if lines:
sample_text = "\nLetzte Artikel/Headlines:\n" + "\n".join(lines)
return f"""Du bist ein OSINT-Analyst und klassifizierst Nachrichten- und Medienquellen fuer ein Lagebild-Monitoring-System (DACH-Raum).
QUELLE:
Name: {src.get('name')}
URL: {src.get('url') or '-'}
Domain: {src.get('domain') or '-'}
Quellentyp: {src.get('source_type')}
Bisherige Kategorie: {src.get('category')}
Sprache: {src.get('language') or 'unbekannt'}
Bisherige Notiz (Freitext): {src.get('bias') or '-'}{sample_text}
AUFGABE: Klassifiziere die Quelle nach folgenden Achsen.
1. political_orientation:
- links_extrem (z.B. linksunten.indymedia)
- links (klar links, z.B. junge Welt, taz)
- mitte_links (linksliberal/sozialdemokratisch, z.B. SZ, Spiegel)
- liberal (wirtschafts-/grünliberal, z.B. NZZ, Zeit)
- mitte (politisch neutral, Agentur, z.B. dpa, Reuters, tagesschau)
- konservativ (buergerlich-konservativ, z.B. FAZ, Welt)
- mitte_rechts (rechts-buergerlich, z.B. Tichys Einblick, Achgut)
- rechts (klar rechts, z.B. Junge Freiheit, EpochTimes)
- rechts_extrem (z.B. Compact, PI-News)
- na (nicht klassifizierbar: Behoerde, Fachmedium, Think Tank ohne klare politische Linie)
2. media_type (genau einer):
tageszeitung, wochenzeitung, magazin, tv_sender, radio, oeffentlich_rechtlich,
nachrichtenagentur, online_only, blog, telegram_kanal, telegram_bot, podcast,
social_media, imageboard, think_tank, ngo, behoerde, staatsmedium, fachmedium, sonstige
3. reliability:
- sehr_hoch (etablierte Qualitaet, Faktencheck: tagesschau, dpa, FAZ, Reuters)
- hoch (serioes mit gelegentlichen Schwaechen: taz, Welt, BILD bei harten News)
- gemischt (Mix Meinung/Einseitigkeit: Tichys Einblick, Achgut, Boulevard)
- niedrig (haeufig irrefuehrend, schwache Quellenarbeit: Junge Freiheit, EpochTimes)
- sehr_niedrig (bekannt fuer Desinformation/Verschwoerung: Compact, RT, Sputnik, PI-News)
- na (nicht bewertbar)
4. alignments (Mehrfach, leeres Array wenn keine ausgepraegte Naehe):
prorussisch, proiranisch, prowestlich, proukrainisch, prochinesisch, projapanisch,
proisraelisch, propalaestinensisch, protuerkisch, panarabisch, neutral, sonstige
5. state_affiliated (true/false): true wenn vom Staat finanziert/kontrolliert
(RT, Sputnik, CGTN, PressTV, Xinhua, TRT). Public Service Broadcaster
wie ARD/ZDF/BBC sind NICHT state_affiliated.
6. country_code (ISO 3166-1 alpha-2): Heimatland (DE, AT, CH, RU, US, ...). null wenn unklar.
7. confidence (0.0-1.0): 0.85+ fuer bekannte Outlets, 0.5-0.85 fuer mittelbekannt, <0.5 fuer unsicher.
8. reasoning (1-2 Saetze): Kurze Begruendung der Hauptklassifikationen.
WICHTIG:
- Antworte AUSSCHLIESSLICH mit einem JSON-Objekt, kein Text drumherum.
- Nutze ausschliesslich die genannten enum-Werte (snake_case).
- Bei Unklarheit lieber `na` und niedrige confidence.
JSON-Schema:
{{
"political_orientation": "...",
"media_type": "...",
"reliability": "...",
"alignments": ["..."],
"state_affiliated": false,
"country_code": "DE",
"confidence": 0.9,
"reasoning": "..."
}}"""
async def _load_sample_articles(db: aiosqlite.Connection, name: str, domain: str | None, limit: int = 5) -> list[dict]:
"""Laedt die letzten Headlines einer Quelle (per name oder Domain-Match)."""
rows: list = []
if name:
cursor = await db.execute(
"SELECT headline, headline_de FROM articles WHERE source = ? ORDER BY collected_at DESC LIMIT ?",
(name, limit),
)
rows = await cursor.fetchall()
if not rows and domain:
cursor = await db.execute(
"SELECT headline, headline_de FROM articles WHERE source_url LIKE ? ORDER BY collected_at DESC LIMIT ?",
(f"%{domain}%", limit),
)
rows = await cursor.fetchall()
return [dict(r) for r in rows]
def _validate(parsed: dict) -> dict:
"""Validiert + normalisiert eine LLM-Antwort gegen die Enums."""
pol = parsed.get("political_orientation", "na")
if pol not in POLITICAL_VALUES:
pol = "na"
mt = parsed.get("media_type", "sonstige")
if mt not in MEDIA_TYPE_VALUES:
mt = "sonstige"
rel = parsed.get("reliability", "na")
if rel not in RELIABILITY_VALUES:
rel = "na"
aligns_raw = parsed.get("alignments") or []
if not isinstance(aligns_raw, list):
aligns_raw = []
aligns = sorted({a for a in aligns_raw if isinstance(a, str) and a in ALIGNMENT_VALUES})
sa = bool(parsed.get("state_affiliated", False))
cc = parsed.get("country_code")
if isinstance(cc, str) and len(cc) == 2 and cc.isalpha():
cc = cc.upper()
else:
cc = None
try:
confidence = float(parsed.get("confidence", 0.5))
confidence = max(0.0, min(1.0, confidence))
except (TypeError, ValueError):
confidence = 0.5
reasoning = str(parsed.get("reasoning", ""))[:1000]
return {
"political_orientation": pol,
"media_type": mt,
"reliability": rel,
"alignments": aligns,
"state_affiliated": sa,
"country_code": cc,
"confidence": confidence,
"reasoning": reasoning,
}
async def classify_source(
db: aiosqlite.Connection,
source_id: int,
sample_limit: int = 5,
model: str = CLAUDE_MODEL_FAST,
) -> dict:
"""Klassifiziert eine einzelne Quelle und schreibt die Vorschlaege in proposed_*-Spalten."""
cursor = await db.execute(
"SELECT id, name, url, domain, source_type, category, language, bias, "
"classification_source FROM sources WHERE id = ?",
(source_id,),
)
row = await cursor.fetchone()
if not row:
raise ValueError(f"Quelle {source_id} nicht gefunden")
src = dict(row)
sample = await _load_sample_articles(db, src["name"], src.get("domain"), sample_limit)
prompt = _build_prompt(src, sample)
response, usage = await call_claude(prompt, tools=None, model=model)
json_match = re.search(r"\{.*\}", response, re.DOTALL)
if not json_match:
raise ValueError(f"Keine JSON-Antwort von Claude fuer source_id={source_id}: {response[:200]}")
parsed = json.loads(json_match.group(0))
result = _validate(parsed)
# Nur classification_source auf 'llm_pending' setzen, wenn nicht bereits manuell/approved
new_src = "CASE WHEN classification_source IN ('manual','llm_approved') THEN classification_source ELSE 'llm_pending' END"
await db.execute(
f"""UPDATE sources SET
proposed_political_orientation = ?,
proposed_media_type = ?,
proposed_reliability = ?,
proposed_state_affiliated = ?,
proposed_country_code = ?,
proposed_alignments_json = ?,
proposed_confidence = ?,
proposed_reasoning = ?,
proposed_at = CURRENT_TIMESTAMP,
classification_source = {new_src}
WHERE id = ?""",
(
result["political_orientation"],
result["media_type"],
result["reliability"],
1 if result["state_affiliated"] else 0,
result["country_code"],
json.dumps(result["alignments"], ensure_ascii=False),
result["confidence"],
result["reasoning"],
source_id,
),
)
await db.commit()
logger.info(
"Klassifiziert source_id=%s '%s' -> %s/%s/%s conf=%.2f ($%.4f)",
source_id, src["name"], result["political_orientation"],
result["media_type"], result["reliability"], result["confidence"],
usage.cost_usd,
)
result["source_id"] = source_id
result["usage"] = {
"cost_usd": usage.cost_usd,
"input_tokens": usage.input_tokens,
"output_tokens": usage.output_tokens,
}
return result
async def bulk_classify(
db: aiosqlite.Connection,
limit: int = 50,
only_unclassified: bool = True,
model: str = CLAUDE_MODEL_FAST,
) -> dict:
"""Klassifiziert noch unklassifizierte Quellen (sequenziell).
Args:
limit: Maximale Anzahl Quellen pro Aufruf
only_unclassified: Wenn True, nur classification_source='legacy'.
Wenn False, auch 'llm_pending' neu klassifizieren.
"""
if only_unclassified:
where = "classification_source = 'legacy'"
else:
where = "classification_source IN ('legacy', 'llm_pending')"
cursor = await db.execute(
f"SELECT id FROM sources WHERE {where} AND status = 'active' "
f"AND source_type != 'excluded' ORDER BY id LIMIT ?",
(limit,),
)
ids = [row["id"] for row in await cursor.fetchall()]
total_cost = 0.0
success = 0
errors: list[dict] = []
for sid in ids:
try:
r = await classify_source(db, sid, model=model)
total_cost += r["usage"]["cost_usd"]
success += 1
except asyncio.CancelledError:
raise
except Exception as e:
logger.error("Klassifikation source_id=%s fehlgeschlagen: %s", sid, e, exc_info=True)
errors.append({"source_id": sid, "error": str(e)})
logger.info(
"Bulk-Klassifikation fertig: %d/%d erfolgreich, $%.4f Kosten, %d Fehler",
success, len(ids), total_cost, len(errors),
)
return {
"processed": len(ids),
"success": success,
"errors": errors,
"total_cost_usd": total_cost,
}

Datei anzeigen

@@ -3503,6 +3503,341 @@ a.dev-source-pill:hover {
color: var(--info); color: var(--info);
} }
/* Sources-Modal: Tabs */
.sources-tabs {
display: flex;
gap: 2px;
border-bottom: 1px solid var(--border-color, rgba(0,0,0,0.1));
margin-bottom: 12px;
}
.sources-tab {
background: transparent;
border: none;
padding: 8px 16px;
font-size: 13px;
font-weight: 500;
color: var(--text-secondary, #555);
cursor: pointer;
border-bottom: 2px solid transparent;
margin-bottom: -1px;
display: inline-flex;
align-items: center;
gap: 8px;
}
.sources-tab:hover {
color: var(--text-primary, #222);
}
.sources-tab.active {
color: var(--primary, #2a81cb);
border-bottom-color: var(--primary, #2a81cb);
}
.sources-tab-badge {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 20px;
padding: 0 6px;
height: 18px;
border-radius: 9px;
background: var(--primary, #2a81cb);
color: #fff;
font-size: 10px;
font-weight: 700;
}
/* Review-Queue */
.review-toolbar {
display: flex;
align-items: center;
justify-content: space-between;
padding: 8px 12px;
background: var(--cat-sonstige-bg, #f6f6fa);
border-radius: var(--radius);
margin-bottom: 12px;
flex-wrap: wrap;
gap: 12px;
}
.review-toolbar-info {
display: flex;
align-items: center;
gap: 16px;
font-size: 13px;
}
.review-conf-filter {
display: inline-flex;
align-items: center;
gap: 6px;
font-size: 12px;
color: var(--text-secondary, #555);
}
.review-conf-filter select {
padding: 2px 6px;
font-size: 12px;
border-radius: var(--radius);
border: 1px solid var(--border-color, rgba(0,0,0,0.15));
}
.review-toolbar-actions {
display: flex;
gap: 6px;
}
.review-list {
display: flex;
flex-direction: column;
gap: 8px;
}
.review-card {
background: var(--surface, #fff);
border: 1px solid var(--border-color, rgba(0,0,0,0.08));
border-radius: var(--radius);
padding: 12px 14px;
}
.review-card-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 12px;
margin-bottom: 10px;
}
.review-card-title {
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 8px;
}
.review-card-name {
font-weight: 600;
font-size: 14px;
}
.review-card-domain {
font-size: 11px;
color: var(--text-disabled, #888);
}
.review-global-badge {
display: inline-flex;
align-items: center;
padding: 1px 6px;
border-radius: var(--radius);
background: #5e35b1;
color: #fff;
font-size: 9px;
font-weight: 600;
letter-spacing: 0.3px;
text-transform: uppercase;
}
.review-card-confidence {
display: inline-flex;
flex-direction: column;
align-items: center;
padding: 4px 10px;
border-radius: var(--radius);
min-width: 60px;
}
.review-card-confidence .conf-value {
font-size: 14px;
font-weight: 700;
}
.review-card-confidence .conf-label {
font-size: 9px;
text-transform: uppercase;
letter-spacing: 0.3px;
opacity: 0.8;
}
.review-card-confidence.conf-high { background: #e8f5e9; color: #2e7d32; }
.review-card-confidence.conf-medium { background: #fff8e1; color: #ef6c00; }
.review-card-confidence.conf-low { background: #ffebee; color: #c62828; }
.review-card-diff {
display: grid;
grid-template-columns: 1fr;
gap: 4px;
font-size: 12px;
margin-bottom: 10px;
}
.review-diff-row {
display: grid;
grid-template-columns: 110px 1fr 24px 1fr;
align-items: center;
gap: 8px;
padding: 3px 6px;
border-radius: 3px;
}
.review-diff-row.changed {
background: #fff8e1;
}
.review-diff-label {
color: var(--text-secondary, #555);
font-weight: 500;
}
.review-diff-current {
color: var(--text-disabled, #888);
}
.review-diff-arrow {
text-align: center;
color: var(--text-disabled, #888);
font-weight: 600;
}
.review-diff-proposed {
color: var(--text-primary, #222);
font-weight: 500;
}
.review-diff-row.changed .review-diff-proposed {
color: #ef6c00;
font-weight: 600;
}
.review-card-reasoning {
font-size: 12px;
color: var(--text-secondary, #555);
background: var(--cat-sonstige-bg, #f6f6fa);
padding: 8px 10px;
border-radius: var(--radius);
margin-bottom: 10px;
line-height: 1.5;
}
.review-card-actions {
display: flex;
gap: 6px;
flex-wrap: wrap;
}
/* Klassifikations-Badges (politisch / reliability / alignments / state) */
.source-classification-badges {
display: inline-flex;
align-items: center;
gap: 4px;
flex-wrap: wrap;
}
.source-political-badge {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 22px;
padding: 2px 6px;
border-radius: var(--radius);
font-size: 10px;
font-weight: 700;
letter-spacing: 0.4px;
color: #fff;
background: #9e9e9e;
}
.source-political-badge.pol-links_extrem { background: #b71c1c; }
.source-political-badge.pol-links { background: #e53935; }
.source-political-badge.pol-mitte_links { background: #ef9a9a; color: #4a0d0d; }
.source-political-badge.pol-liberal { background: #fdd835; color: #4a3700; }
.source-political-badge.pol-mitte { background: #9e9e9e; }
.source-political-badge.pol-konservativ { background: #90caf9; color: #0d2740; }
.source-political-badge.pol-mitte_rechts { background: #5c6bc0; }
.source-political-badge.pol-rechts { background: #1976d2; }
.source-political-badge.pol-rechts_extrem { background: #0d47a1; }
.source-reliability-dot {
display: inline-block;
width: 10px;
height: 10px;
border-radius: 50%;
background: #9e9e9e;
border: 1px solid rgba(0, 0, 0, 0.15);
}
.source-reliability-dot.rel-sehr_hoch { background: #2e7d32; }
.source-reliability-dot.rel-hoch { background: #66bb6a; }
.source-reliability-dot.rel-gemischt { background: #fbc02d; }
.source-reliability-dot.rel-niedrig { background: #ef6c00; }
.source-reliability-dot.rel-sehr_niedrig { background: #c62828; }
.source-state-badge {
display: inline-flex;
align-items: center;
justify-content: center;
width: 18px;
height: 18px;
border-radius: 50%;
background: #4a148c;
color: #fff;
font-size: 11px;
line-height: 1;
}
.source-ifcn-badge {
display: inline-flex;
align-items: center;
padding: 1px 6px;
border-radius: var(--radius);
background: #e8f5e9;
color: #1b5e20;
border: 1px solid #66bb6a;
font-size: 10px;
font-weight: 600;
letter-spacing: 0.3px;
}
.source-eu-disinfo-badge {
display: inline-flex;
align-items: center;
padding: 1px 6px;
border-radius: var(--radius);
background: #ffebee;
color: #b71c1c;
border: 1px solid #c62828;
font-size: 10px;
font-weight: 600;
letter-spacing: 0.3px;
}
.source-alignment-chip-badge {
display: inline-flex;
align-items: center;
padding: 1px 6px;
border-radius: 999px;
font-size: 10px;
font-weight: 500;
background: var(--cat-sonstige-bg, #eef);
color: var(--text-secondary, #555);
border: 1px solid rgba(0, 0, 0, 0.08);
}
/* Edit-Form: Klassifikations-Sektion */
.sources-classification-section {
margin-top: 12px;
padding-top: 12px;
border-top: 1px solid var(--border-color, rgba(0,0,0,0.08));
}
.sources-classification-header {
font-size: 12px;
font-weight: 600;
color: var(--text-secondary, #555);
margin-bottom: 8px;
letter-spacing: 0.3px;
text-transform: uppercase;
}
.alignment-chips {
display: flex;
flex-wrap: wrap;
gap: 6px;
}
.alignment-chip {
display: inline-flex;
align-items: center;
padding: 4px 10px;
border-radius: 999px;
font-size: 11px;
font-weight: 500;
background: transparent;
color: var(--text-secondary, #555);
border: 1px solid var(--border-color, rgba(0,0,0,0.15));
cursor: pointer;
transition: all 0.12s ease;
}
.alignment-chip:hover {
background: var(--cat-sonstige-bg, #eef);
}
.alignment-chip.active {
background: var(--primary, #2a81cb);
color: #fff;
border-color: var(--primary, #2a81cb);
}
/* Typ-Badges */ /* Typ-Badges */
.source-type-badge { .source-type-badge {
display: inline-flex; display: inline-flex;

Datei anzeigen

@@ -456,6 +456,15 @@
<!-- Stats-Leiste --> <!-- Stats-Leiste -->
<div class="sources-stats-bar" id="sources-stats-bar"></div> <div class="sources-stats-bar" id="sources-stats-bar"></div>
<!-- Tabs: Liste vs. Klassifikations-Review -->
<div class="sources-tabs" role="tablist">
<button type="button" class="sources-tab active" id="sources-tab-list" role="tab" aria-selected="true" onclick="App.switchSourcesTab('list')">Quellenliste</button>
<button type="button" class="sources-tab" id="sources-tab-review" role="tab" aria-selected="false" onclick="App.switchSourcesTab('review')" style="display:none;">Klassifikations-Review <span id="sources-review-count" class="sources-tab-badge">0</span></button>
</div>
<!-- View: Quellenliste -->
<div id="sources-list-view">
<!-- Toolbar --> <!-- Toolbar -->
<div class="sources-toolbar"> <div class="sources-toolbar">
<div class="sources-filters"> <div class="sources-filters">
@@ -481,6 +490,76 @@
<option value="boulevard">Boulevard</option> <option value="boulevard">Boulevard</option>
<option value="sonstige">Sonstige</option> <option value="sonstige">Sonstige</option>
</select> </select>
<label for="sources-filter-political" class="sr-only">Politische Ausrichtung filtern</label>
<select id="sources-filter-political" class="timeline-filter-select" onchange="App.filterSources()">
<option value="">Alle Ausrichtungen</option>
<option value="links_extrem">Links (extrem)</option>
<option value="links">Links</option>
<option value="mitte_links">Mitte-Links</option>
<option value="liberal">Liberal</option>
<option value="mitte">Mitte</option>
<option value="konservativ">Konservativ</option>
<option value="mitte_rechts">Mitte-Rechts</option>
<option value="rechts">Rechts</option>
<option value="rechts_extrem">Rechts (extrem)</option>
<option value="na">Nicht eingeordnet</option>
</select>
<label for="sources-filter-mediatype" class="sr-only">Medientyp filtern</label>
<select id="sources-filter-mediatype" class="timeline-filter-select" onchange="App.filterSources()">
<option value="">Alle Medientypen</option>
<option value="tageszeitung">Tageszeitung</option>
<option value="wochenzeitung">Wochenzeitung</option>
<option value="magazin">Magazin</option>
<option value="tv_sender">TV-Sender</option>
<option value="radio">Radio</option>
<option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
<option value="nachrichtenagentur">Nachrichtenagentur</option>
<option value="online_only">Online-only</option>
<option value="blog">Blog</option>
<option value="telegram_kanal">Telegram-Kanal</option>
<option value="telegram_bot">Telegram-Bot</option>
<option value="podcast">Podcast</option>
<option value="social_media">Social Media</option>
<option value="imageboard">Imageboard</option>
<option value="think_tank">Think Tank</option>
<option value="ngo">NGO</option>
<option value="behoerde">Behörde</option>
<option value="staatsmedium">Staatsmedium</option>
<option value="fachmedium">Fachmedium</option>
<option value="sonstige">Sonstige</option>
</select>
<label for="sources-filter-reliability" class="sr-only">Glaubwürdigkeit filtern</label>
<select id="sources-filter-reliability" class="timeline-filter-select" onchange="App.filterSources()">
<option value="">Alle Glaubwürdigkeiten</option>
<option value="sehr_hoch">Sehr hoch</option>
<option value="hoch">Hoch</option>
<option value="gemischt">Gemischt</option>
<option value="niedrig">Niedrig</option>
<option value="sehr_niedrig">Sehr niedrig</option>
<option value="na">Nicht eingeordnet</option>
</select>
<label for="sources-filter-extern" class="sr-only">Externe Reputation filtern</label>
<select id="sources-filter-extern" class="timeline-filter-select" onchange="App.filterSources()">
<option value="">Externe Reputation: alle</option>
<option value="ifcn">IFCN-Faktenchecker</option>
<option value="eu_disinfo">EU-Desinfo gelistet</option>
</select>
<label for="sources-filter-alignment" class="sr-only">Geopolitische Nähe filtern</label>
<select id="sources-filter-alignment" class="timeline-filter-select" onchange="App.filterSources()">
<option value="">Alle Nähen</option>
<option value="prorussisch">Prorussisch</option>
<option value="proiranisch">Proiranisch</option>
<option value="prowestlich">Prowestlich</option>
<option value="proukrainisch">Proukrainisch</option>
<option value="prochinesisch">Prochinesisch</option>
<option value="projapanisch">Projapanisch</option>
<option value="proisraelisch">Proisraelisch</option>
<option value="propalaestinensisch">Propalästinensisch</option>
<option value="protuerkisch">Protürkisch</option>
<option value="panarabisch">Panarabisch</option>
<option value="neutral">Neutral</option>
<option value="sonstige">Sonstige</option>
</select>
<label for="sources-search" class="sr-only">Quellen durchsuchen</label> <label for="sources-search" class="sr-only">Quellen durchsuchen</label>
<input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()"> <input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()">
</div> </div>
@@ -548,6 +627,89 @@
<input type="text" id="src-notes" placeholder="Optional"> <input type="text" id="src-notes" placeholder="Optional">
</div> </div>
</div> </div>
<div class="sources-classification-section">
<div class="sources-classification-header">Einordnung</div>
<div class="sources-add-form-grid">
<div class="form-group">
<label for="src-political">Politische Ausrichtung</label>
<select id="src-political">
<option value="na">Nicht eingeordnet</option>
<option value="links_extrem">Links (extrem)</option>
<option value="links">Links</option>
<option value="mitte_links">Mitte-Links</option>
<option value="liberal">Liberal</option>
<option value="mitte">Mitte</option>
<option value="konservativ">Konservativ</option>
<option value="mitte_rechts">Mitte-Rechts</option>
<option value="rechts">Rechts</option>
<option value="rechts_extrem">Rechts (extrem)</option>
</select>
</div>
<div class="form-group">
<label for="src-mediatype">Medientyp</label>
<select id="src-mediatype">
<option value="sonstige">Sonstige</option>
<option value="tageszeitung">Tageszeitung</option>
<option value="wochenzeitung">Wochenzeitung</option>
<option value="magazin">Magazin</option>
<option value="tv_sender">TV-Sender</option>
<option value="radio">Radio</option>
<option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
<option value="nachrichtenagentur">Nachrichtenagentur</option>
<option value="online_only">Online-only</option>
<option value="blog">Blog</option>
<option value="telegram_kanal">Telegram-Kanal</option>
<option value="telegram_bot">Telegram-Bot</option>
<option value="podcast">Podcast</option>
<option value="social_media">Social Media</option>
<option value="imageboard">Imageboard</option>
<option value="think_tank">Think Tank</option>
<option value="ngo">NGO</option>
<option value="behoerde">Behörde</option>
<option value="staatsmedium">Staatsmedium</option>
<option value="fachmedium">Fachmedium</option>
</select>
</div>
<div class="form-group">
<label for="src-reliability">Glaubwürdigkeit</label>
<select id="src-reliability">
<option value="na">Nicht eingeordnet</option>
<option value="sehr_hoch">Sehr hoch</option>
<option value="hoch">Hoch</option>
<option value="gemischt">Gemischt</option>
<option value="niedrig">Niedrig</option>
<option value="sehr_niedrig">Sehr niedrig</option>
</select>
</div>
<div class="form-group">
<label for="src-country">Land (ISO 3166)</label>
<input type="text" id="src-country" maxlength="2" placeholder="z.B. DE, RU, US" style="text-transform:uppercase;">
</div>
<div class="form-group">
<label class="checkbox-label" style="display:flex;align-items:center;gap:8px;">
<input type="checkbox" id="src-state-affiliated">
<span>Staatsnah/-kontrolliert</span>
</label>
</div>
</div>
<div class="form-group" style="margin-top:8px;">
<label>Geopolitische Nähe (Mehrfachauswahl)</label>
<div id="src-alignments-chips" class="alignment-chips" onclick="App.handleAlignmentChipClick(event)">
<button type="button" class="alignment-chip" data-alignment="prorussisch">prorussisch</button>
<button type="button" class="alignment-chip" data-alignment="proiranisch">proiranisch</button>
<button type="button" class="alignment-chip" data-alignment="prowestlich">prowestlich</button>
<button type="button" class="alignment-chip" data-alignment="proukrainisch">proukrainisch</button>
<button type="button" class="alignment-chip" data-alignment="prochinesisch">prochinesisch</button>
<button type="button" class="alignment-chip" data-alignment="projapanisch">projapanisch</button>
<button type="button" class="alignment-chip" data-alignment="proisraelisch">proisraelisch</button>
<button type="button" class="alignment-chip" data-alignment="propalaestinensisch">propalästinensisch</button>
<button type="button" class="alignment-chip" data-alignment="protuerkisch">protürkisch</button>
<button type="button" class="alignment-chip" data-alignment="panarabisch">panarabisch</button>
<button type="button" class="alignment-chip" data-alignment="neutral">neutral</button>
<button type="button" class="alignment-chip" data-alignment="sonstige">sonstige</button>
</div>
</div>
</div>
<div class="sources-discovery-actions"> <div class="sources-discovery-actions">
<button class="btn btn-primary btn-small" onclick="App.saveSource()">Speichern</button> <button class="btn btn-primary btn-small" onclick="App.saveSource()">Speichern</button>
<button class="btn btn-secondary btn-small" onclick="App.toggleSourceForm(false)">Abbrechen</button> <button class="btn btn-secondary btn-small" onclick="App.toggleSourceForm(false)">Abbrechen</button>
@@ -559,6 +721,36 @@
<div class="sources-list" id="sources-list"> <div class="sources-list" id="sources-list">
<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Quellen...</div> <div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Quellen...</div>
</div> </div>
</div>
<!-- /sources-list-view -->
<!-- View: Klassifikations-Review (Admin-only) -->
<div id="sources-review-view" style="display:none;">
<div class="review-toolbar">
<div class="review-toolbar-info">
<span><strong id="review-pending-count">0</strong> Vorschlaege ausstehend</span>
<label class="review-conf-filter">
Mindest-Konfidenz:
<select id="review-min-confidence" onchange="App.loadClassificationQueue()">
<option value="0">alle</option>
<option value="0.5">0.5+</option>
<option value="0.7">0.7+</option>
<option value="0.85">0.85+</option>
<option value="0.9">0.9+</option>
</select>
</label>
</div>
<div class="review-toolbar-actions">
<button class="btn btn-small btn-secondary" onclick="App.triggerExternalReputationSync()" title="IFCN-Faktenchecker-Liste und EUvsDisinfo-Daten synchronisieren">Externe Daten syncen</button>
<button class="btn btn-small btn-secondary" onclick="App.triggerBulkClassify()" title="LLM-Klassifikation fuer noch unklassifizierte Quellen starten">+ Klassifikation starten</button>
<button class="btn btn-small btn-primary" onclick="App.bulkApproveHighConfidence()" title="Alle Vorschlaege ueber dem Konfidenz-Schwellwert genehmigen">Alle &ge; 0.85 genehmigen</button>
</div>
</div>
<div class="review-list" id="sources-review-list">
<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Review-Queue...</div>
</div>
</div>
</div> </div>
</div> </div>
</div> </div>

Datei anzeigen

@@ -198,10 +198,46 @@ const API = {
if (params.source_type) query.set('source_type', params.source_type); if (params.source_type) query.set('source_type', params.source_type);
if (params.category) query.set('category', params.category); if (params.category) query.set('category', params.category);
if (params.source_status) query.set('source_status', params.source_status); if (params.source_status) query.set('source_status', params.source_status);
if (params.political_orientation) query.set('political_orientation', params.political_orientation);
if (params.media_type) query.set('media_type', params.media_type);
if (params.reliability) query.set('reliability', params.reliability);
if (params.alignment) query.set('alignment', params.alignment);
if (params.state_affiliated !== undefined && params.state_affiliated !== null) {
query.set('state_affiliated', String(params.state_affiliated));
}
const qs = query.toString(); const qs = query.toString();
return this._request('GET', `/sources${qs ? '?' + qs : ''}`); return this._request('GET', `/sources${qs ? '?' + qs : ''}`);
}, },
// Sources: Klassifikations-Review (LLM)
getClassificationStats() {
return this._request('GET', '/sources/classification/stats');
},
getClassificationQueue(limit = 50, minConfidence = 0.0) {
const qs = new URLSearchParams({ limit: String(limit), min_confidence: String(minConfidence) }).toString();
return this._request('GET', `/sources/classification/queue?${qs}`);
},
approveClassification(id) {
return this._request('POST', `/sources/${id}/classification/approve`);
},
rejectClassification(id) {
return this._request('POST', `/sources/${id}/classification/reject`);
},
reclassifySource(id) {
return this._request('POST', `/sources/${id}/classification/reclassify`);
},
triggerBulkClassify(limit = 50, onlyUnclassified = true) {
const qs = new URLSearchParams({ limit: String(limit), only_unclassified: String(onlyUnclassified) }).toString();
return this._request('POST', `/sources/classification/bulk-classify?${qs}`);
},
bulkApproveClassifications(minConfidence = 0.85) {
const qs = new URLSearchParams({ min_confidence: String(minConfidence) }).toString();
return this._request('POST', `/sources/classification/bulk-approve?${qs}`);
},
triggerExternalReputationSync() {
return this._request('POST', '/sources/external-reputation/sync');
},
createSource(data) { createSource(data) {
return this._request('POST', '/sources', data); return this._request('POST', '/sources', data);
}, },

Datei anzeigen

@@ -2702,6 +2702,12 @@ async handleRefresh() {
async openSourceManagement() { async openSourceManagement() {
openModal('modal-sources'); openModal('modal-sources');
await this.loadSources(); await this.loadSources();
// Admin sieht den Review-Tab
const reviewTab = document.getElementById('sources-tab-review');
if (reviewTab && this.user && this.user.role === 'org_admin') {
reviewTab.style.display = '';
this._refreshReviewBadge().catch(() => {});
}
}, },
async loadSources() { async loadSources() {
@@ -2722,6 +2728,122 @@ async handleRefresh() {
} }
}, },
async _refreshReviewBadge() {
try {
const stats = await API.getClassificationStats();
const badge = document.getElementById('sources-review-count');
if (badge) badge.textContent = String(stats.pending_review || 0);
} catch (_) { /* still ok */ }
},
switchSourcesTab(tab) {
const listView = document.getElementById('sources-list-view');
const reviewView = document.getElementById('sources-review-view');
const tabList = document.getElementById('sources-tab-list');
const tabReview = document.getElementById('sources-tab-review');
if (!listView || !reviewView) return;
if (tab === 'review') {
listView.style.display = 'none';
reviewView.style.display = '';
if (tabList) { tabList.classList.remove('active'); tabList.setAttribute('aria-selected', 'false'); }
if (tabReview) { tabReview.classList.add('active'); tabReview.setAttribute('aria-selected', 'true'); }
this.loadClassificationQueue();
} else {
listView.style.display = '';
reviewView.style.display = 'none';
if (tabList) { tabList.classList.add('active'); tabList.setAttribute('aria-selected', 'true'); }
if (tabReview) { tabReview.classList.remove('active'); tabReview.setAttribute('aria-selected', 'false'); }
}
},
async loadClassificationQueue() {
const list = document.getElementById('sources-review-list');
if (!list) return;
const minConf = parseFloat(document.getElementById('review-min-confidence')?.value || '0');
list.innerHTML = '<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade...</div>';
try {
const items = await API.getClassificationQueue(200, minConf);
this._reviewItems = items;
const countEl = document.getElementById('review-pending-count');
if (countEl) countEl.textContent = String(items.length);
if (items.length === 0) {
list.innerHTML = '<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Keine ausstehenden Vorschlaege.</div>';
return;
}
list.innerHTML = items.map(item => UI.renderClassificationQueueItem(item)).join('');
} catch (err) {
list.innerHTML = `<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;color:var(--danger);">Fehler: ${err.message}</div>`;
}
},
async approveClassification(id) {
try {
await API.approveClassification(id);
UI.showToast('Klassifikation uebernommen.', 'success');
await this.loadClassificationQueue();
this._refreshReviewBadge();
} catch (err) {
UI.showToast('Approve fehlgeschlagen: ' + err.message, 'error');
}
},
async rejectClassification(id) {
try {
await API.rejectClassification(id);
UI.showToast('Vorschlag verworfen.', 'success');
await this.loadClassificationQueue();
this._refreshReviewBadge();
} catch (err) {
UI.showToast('Reject fehlgeschlagen: ' + err.message, 'error');
}
},
async reclassifySource(id) {
const btn = document.querySelector(`[data-reclassify-id="${id}"]`);
if (btn) { btn.disabled = true; btn.textContent = '...'; }
try {
await API.reclassifySource(id);
UI.showToast('Neu klassifiziert.', 'success');
await this.loadClassificationQueue();
} catch (err) {
UI.showToast('Reclassify fehlgeschlagen: ' + err.message, 'error');
} finally {
if (btn) { btn.disabled = false; btn.textContent = 'Neu klassifizieren'; }
}
},
async triggerBulkClassify() {
if (!confirm('Bulk-Klassifikation aller noch nicht klassifizierten Quellen starten? Lauft im Hintergrund (~3-5 Sek pro Quelle, ~0.02 USD pro Quelle).')) return;
try {
const r = await API.triggerBulkClassify(500, true);
UI.showToast(`Bulk-Klassifikation gestartet (limit=${r.limit}). Nachschauen mit Reload.`, 'info');
} catch (err) {
UI.showToast('Start fehlgeschlagen: ' + err.message, 'error');
}
},
async bulkApproveHighConfidence() {
if (!confirm('Alle Vorschlaege mit Konfidenz >= 0.85 genehmigen?')) return;
try {
const r = await API.bulkApproveClassifications(0.85);
UI.showToast(`${r.approved_count} Vorschlaege uebernommen.`, 'success');
await this.loadClassificationQueue();
this._refreshReviewBadge();
} catch (err) {
UI.showToast('Bulk-Approve fehlgeschlagen: ' + err.message, 'error');
}
},
async triggerExternalReputationSync() {
if (!confirm('IFCN- und EUvsDisinfo-Datenbanken jetzt syncen? Lauft im Hintergrund (~30 Sek).')) return;
try {
await API.triggerExternalReputationSync();
UI.showToast('Externer Sync gestartet. Quellenliste in 30 Sek neu laden.', 'info');
} catch (err) {
UI.showToast('Sync fehlgeschlagen: ' + err.message, 'error');
}
},
renderSourceStats(stats) { renderSourceStats(stats) {
const bar = document.getElementById('sources-stats-bar'); const bar = document.getElementById('sources-stats-bar');
if (!bar) return; if (!bar) return;
@@ -2750,6 +2872,11 @@ async handleRefresh() {
// Filter anwenden // Filter anwenden
const typeFilter = document.getElementById('sources-filter-type')?.value || ''; const typeFilter = document.getElementById('sources-filter-type')?.value || '';
const catFilter = document.getElementById('sources-filter-category')?.value || ''; const catFilter = document.getElementById('sources-filter-category')?.value || '';
const politicalFilter = document.getElementById('sources-filter-political')?.value || '';
const mediaTypeFilter = document.getElementById('sources-filter-mediatype')?.value || '';
const reliabilityFilter = document.getElementById('sources-filter-reliability')?.value || '';
const alignmentFilter = document.getElementById('sources-filter-alignment')?.value || '';
const externFilter = document.getElementById('sources-filter-extern')?.value || '';
const search = (document.getElementById('sources-search')?.value || '').toLowerCase(); const search = (document.getElementById('sources-search')?.value || '').toLowerCase();
// Alle Quellen nach Domain gruppieren // Alle Quellen nach Domain gruppieren
@@ -2800,6 +2927,25 @@ async handleRefresh() {
if (!hasMatchingCat) continue; if (!hasMatchingCat) continue;
} }
// Klassifikations-Filter
if (politicalFilter) {
if (!feeds.some(f => (f.political_orientation || 'na') === politicalFilter)) continue;
}
if (mediaTypeFilter) {
if (!feeds.some(f => (f.media_type || 'sonstige') === mediaTypeFilter)) continue;
}
if (reliabilityFilter) {
if (!feeds.some(f => (f.reliability || 'na') === reliabilityFilter)) continue;
}
if (alignmentFilter) {
if (!feeds.some(f => Array.isArray(f.alignments) && f.alignments.includes(alignmentFilter))) continue;
}
if (externFilter === 'ifcn') {
if (!feeds.some(f => f.ifcn_signatory)) continue;
} else if (externFilter === 'eu_disinfo') {
if (!feeds.some(f => f.eu_disinfo_listed)) continue;
}
// Suche // Suche
if (search) { if (search) {
const groupText = feeds.map(f => const groupText = feeds.map(f =>
@@ -3054,6 +3200,13 @@ async handleRefresh() {
document.getElementById('src-discover-btn').disabled = false; document.getElementById('src-discover-btn').disabled = false;
document.getElementById('src-discover-btn').textContent = 'Erkennen'; document.getElementById('src-discover-btn').textContent = 'Erkennen';
document.getElementById('src-type-select').value = 'rss_feed'; document.getElementById('src-type-select').value = 'rss_feed';
// Klassifikations-Felder auf Default zurücksetzen
const polEl = document.getElementById('src-political'); if (polEl) polEl.value = 'na';
const mtEl = document.getElementById('src-mediatype'); if (mtEl) mtEl.value = 'sonstige';
const relEl = document.getElementById('src-reliability'); if (relEl) relEl.value = 'na';
const ccEl = document.getElementById('src-country'); if (ccEl) ccEl.value = '';
const saEl = document.getElementById('src-state-affiliated'); if (saEl) saEl.checked = false;
this._setAlignmentChips([]);
// Save-Button Text zurücksetzen // Save-Button Text zurücksetzen
const saveBtn = document.querySelector('#src-discovery-result .sources-discovery-actions .btn-primary'); const saveBtn = document.querySelector('#src-discovery-result .sources-discovery-actions .btn-primary');
if (saveBtn) saveBtn.textContent = 'Speichern'; if (saveBtn) saveBtn.textContent = 'Speichern';
@@ -3235,6 +3388,19 @@ async handleRefresh() {
rss_url: source.url, rss_url: source.url,
}; };
// Klassifikations-Felder setzen
const polEl = document.getElementById('src-political');
if (polEl) polEl.value = source.political_orientation || 'na';
const mtEl = document.getElementById('src-mediatype');
if (mtEl) mtEl.value = source.media_type || 'sonstige';
const relEl = document.getElementById('src-reliability');
if (relEl) relEl.value = source.reliability || 'na';
const ccEl = document.getElementById('src-country');
if (ccEl) ccEl.value = source.country_code || '';
const saEl = document.getElementById('src-state-affiliated');
if (saEl) saEl.checked = !!source.state_affiliated;
this._setAlignmentChips(source.alignments || []);
// Submit-Button-Text ändern // Submit-Button-Text ändern
const saveBtn = document.querySelector('#src-discovery-result .sources-discovery-actions .btn-primary'); const saveBtn = document.querySelector('#src-discovery-result .sources-discovery-actions .btn-primary');
if (saveBtn) saveBtn.textContent = 'Quelle speichern'; if (saveBtn) saveBtn.textContent = 'Quelle speichern';
@@ -3243,6 +3409,27 @@ async handleRefresh() {
if (form) form.scrollIntoView({ behavior: 'smooth', block: 'start' }); if (form) form.scrollIntoView({ behavior: 'smooth', block: 'start' });
}, },
_setAlignmentChips(active) {
const chips = document.querySelectorAll('#src-alignments-chips .alignment-chip');
const set = new Set((active || []).map(a => (a || '').toLowerCase()));
chips.forEach(chip => {
if (set.has(chip.dataset.alignment)) chip.classList.add('active');
else chip.classList.remove('active');
});
},
_getAlignmentChips() {
return Array.from(document.querySelectorAll('#src-alignments-chips .alignment-chip.active'))
.map(chip => chip.dataset.alignment);
},
handleAlignmentChipClick(e) {
const chip = e.target.closest('.alignment-chip');
if (!chip) return;
e.preventDefault();
chip.classList.toggle('active');
},
async saveSource() { async saveSource() {
const name = document.getElementById('src-name').value.trim(); const name = document.getElementById('src-name').value.trim();
if (!name) { if (!name) {
@@ -3258,6 +3445,12 @@ async handleRefresh() {
url: discovered.rss_url || (discovered.source_type === 'telegram_channel' ? (document.getElementById('src-domain').value || null) : null), url: discovered.rss_url || (discovered.source_type === 'telegram_channel' ? (document.getElementById('src-domain').value || null) : null),
domain: document.getElementById('src-domain').value.trim() || discovered.domain || null, domain: document.getElementById('src-domain').value.trim() || discovered.domain || null,
notes: document.getElementById('src-notes').value.trim() || null, notes: document.getElementById('src-notes').value.trim() || null,
political_orientation: document.getElementById('src-political')?.value || 'na',
media_type: document.getElementById('src-mediatype')?.value || 'sonstige',
reliability: document.getElementById('src-reliability')?.value || 'na',
country_code: (document.getElementById('src-country')?.value || '').trim().toUpperCase() || null,
state_affiliated: !!document.getElementById('src-state-affiliated')?.checked,
alignments: this._getAlignmentChips(),
}; };
if (!data.domain && discovered.domain) { if (!data.domain && discovered.domain) {

Datei anzeigen

@@ -1062,6 +1062,163 @@ const UI = {
'sonstige': 'Sonstige', 'sonstige': 'Sonstige',
}, },
_politicalLabels: {
links_extrem: { short: 'L+', full: 'Links (extrem)' },
links: { short: 'L', full: 'Links' },
mitte_links: { short: 'ML', full: 'Mitte-Links' },
liberal: { short: 'LIB', full: 'Liberal' },
mitte: { short: 'M', full: 'Mitte' },
konservativ: { short: 'KON', full: 'Konservativ' },
mitte_rechts: { short: 'MR', full: 'Mitte-Rechts' },
rechts: { short: 'R', full: 'Rechts' },
rechts_extrem: { short: 'R+', full: 'Rechts (extrem)' },
na: { short: '?', full: 'Nicht eingeordnet' },
},
_reliabilityLabels: {
sehr_hoch: 'Sehr hoch',
hoch: 'Hoch',
gemischt: 'Gemischt',
niedrig: 'Niedrig',
sehr_niedrig: 'Sehr niedrig',
na: 'Nicht eingeordnet',
},
_mediaTypeLabels: {
tageszeitung: 'Tageszeitung',
wochenzeitung: 'Wochenzeitung',
magazin: 'Magazin',
tv_sender: 'TV-Sender',
radio: 'Radio',
oeffentlich_rechtlich: 'Öffentlich-Rechtlich',
nachrichtenagentur: 'Nachrichtenagentur',
online_only: 'Online-only',
blog: 'Blog',
telegram_kanal: 'Telegram-Kanal',
telegram_bot: 'Telegram-Bot',
podcast: 'Podcast',
social_media: 'Social Media',
imageboard: 'Imageboard',
think_tank: 'Think Tank',
ngo: 'NGO',
behoerde: 'Behörde',
staatsmedium: 'Staatsmedium',
fachmedium: 'Fachmedium',
sonstige: 'Sonstige',
},
_alignmentLabels: {
prorussisch: 'prorussisch',
proiranisch: 'proiranisch',
prowestlich: 'prowestlich',
proukrainisch: 'proukrainisch',
prochinesisch: 'prochinesisch',
projapanisch: 'projapanisch',
proisraelisch: 'proisraelisch',
propalaestinensisch: 'propalästinensisch',
protuerkisch: 'protürkisch',
panarabisch: 'panarabisch',
neutral: 'neutral',
sonstige: 'sonstige',
},
/**
* Eintrag in der Klassifikations-Review-Queue.
* Zeigt Diff zwischen aktuellem Wert und LLM-Vorschlag.
*/
renderClassificationQueueItem(item) {
const cur = item.current || {};
const prop = item.proposed || {};
const conf = prop.confidence || 0;
const confPct = Math.round(conf * 100);
const confClass = conf >= 0.85 ? 'high' : (conf >= 0.7 ? 'medium' : 'low');
const diffRow = (label, currentVal, proposedVal, formatter) => {
const fmt = formatter || (v => v == null || v === '' ? '–' : String(v));
const c = fmt(currentVal);
const p = fmt(proposedVal);
const changed = c !== p;
return `<div class="review-diff-row${changed ? ' changed' : ''}">
<span class="review-diff-label">${this.escape(label)}</span>
<span class="review-diff-current">${this.escape(c)}</span>
<span class="review-diff-arrow">→</span>
<span class="review-diff-proposed">${this.escape(p)}</span>
</div>`;
};
const polFmt = v => (v && v !== 'na') ? (this._politicalLabels[v]?.full || v) : '–';
const mtFmt = v => (v && v !== 'sonstige') ? (this._mediaTypeLabels[v] || v) : (v === 'sonstige' ? 'Sonstige' : '–');
const relFmt = v => (v && v !== 'na') ? (this._reliabilityLabels[v] || v) : '–';
const stateFmt = v => v ? 'ja' : 'nein';
const ccFmt = v => v || '–';
const alignFmt = v => (Array.isArray(v) && v.length > 0)
? v.map(a => this._alignmentLabels[a] || a).join(', ')
: '–';
const globalBadge = item.is_global ? '<span class="review-global-badge">Grundquelle</span>' : '';
const reasoning = prop.reasoning ? this.escape(prop.reasoning) : '';
return `<div class="review-card" data-source-id="${item.id}">
<div class="review-card-header">
<div class="review-card-title">
<span class="review-card-name">${this.escape(item.name)}</span>
${globalBadge}
<span class="review-card-domain">${this.escape(item.domain || '')}</span>
</div>
<div class="review-card-confidence conf-${confClass}" title="LLM-Konfidenz">
<span class="conf-value">${confPct}%</span>
<span class="conf-label">Konfidenz</span>
</div>
</div>
<div class="review-card-diff">
${diffRow('Politik', cur.political_orientation, prop.political_orientation, polFmt)}
${diffRow('Medientyp', cur.media_type, prop.media_type, mtFmt)}
${diffRow('Glaubwürdigkeit', cur.reliability, prop.reliability, relFmt)}
${diffRow('Staatsnah', cur.state_affiliated, prop.state_affiliated, stateFmt)}
${diffRow('Land', cur.country_code, prop.country_code, ccFmt)}
${diffRow('Geopol. Nähe', cur.alignments, prop.alignments, alignFmt)}
</div>
${reasoning ? `<div class="review-card-reasoning"><strong>Begründung:</strong> ${reasoning}</div>` : ''}
<div class="review-card-actions">
<button class="btn btn-small btn-primary" onclick="App.approveClassification(${item.id})">Übernehmen</button>
<button class="btn btn-small btn-secondary" onclick="App.rejectClassification(${item.id})">Verwerfen</button>
<button class="btn btn-small btn-secondary" data-reclassify-id="${item.id}" onclick="App.reclassifySource(${item.id})">Neu klassifizieren</button>
</div>
</div>`;
},
_renderClassificationBadges(feed) {
const parts = [];
const pol = feed.political_orientation;
if (pol && pol !== 'na') {
const label = this._politicalLabels[pol] || { short: pol, full: pol };
parts.push(`<span class="source-political-badge pol-${this.escape(pol)}" title="${this.escape(label.full)}">${this.escape(label.short)}</span>`);
}
const rel = feed.reliability;
if (rel && rel !== 'na') {
const relLabel = this._reliabilityLabels[rel] || rel;
const relSource = feed.ifcn_signatory ? '(IFCN-Faktenchecker)'
: (feed.eu_disinfo_listed ? `(EU-Desinfo, ${feed.eu_disinfo_case_count || 0} Fälle)`
: '(LLM-Schätzung)');
const relTitle = `Glaubwürdigkeit: ${relLabel} ${relSource}`;
parts.push(`<span class="source-reliability-dot rel-${this.escape(rel)}" title="${this.escape(relTitle)}" aria-label="${this.escape(relTitle)}"></span>`);
}
if (feed.ifcn_signatory) {
parts.push(`<span class="source-ifcn-badge" title="IFCN-zertifizierter Faktenchecker" aria-label="IFCN-Faktenchecker">✓ IFCN</span>`);
}
if (feed.eu_disinfo_listed) {
const cnt = feed.eu_disinfo_case_count || 0;
const title = `EUvsDisinfo: ${cnt} dokumentierte Desinformations-Fälle`;
parts.push(`<span class="source-eu-disinfo-badge" title="${this.escape(title)}" aria-label="${this.escape(title)}">⚠ EU-Desinfo (${cnt})</span>`);
}
if (feed.state_affiliated) {
parts.push(`<span class="source-state-badge" title="Staatsnah/-kontrolliert" aria-label="Staatsnah">⚑</span>`);
}
const aligns = Array.isArray(feed.alignments) ? feed.alignments : [];
aligns.forEach(a => {
const label = this._alignmentLabels[a] || a;
parts.push(`<span class="source-alignment-chip-badge align-${this.escape(a)}">${this.escape(label)}</span>`);
});
return parts.join('');
},
/** /**
* Domain-Gruppe rendern (aufklappbar mit Feeds). * Domain-Gruppe rendern (aufklappbar mit Feeds).
*/ */
@@ -1117,20 +1274,52 @@ const UI = {
? `<span class="source-feed-count">${feedCount} Feed${feedCount !== 1 ? 's' : ''}</span>` ? `<span class="source-feed-count">${feedCount} Feed${feedCount !== 1 ? 's' : ''}</span>`
: ''; : '';
// Info-Button mit Tooltip (Typ, Sprache, Ausrichtung) // Info-Button mit Tooltip (Typ, Sprache, Ausrichtung, Klassifikation)
let infoButtonHtml = ''; let infoButtonHtml = '';
const firstFeed = feeds[0] || {}; const firstFeed = feeds[0] || {};
const hasInfo = firstFeed.language || firstFeed.bias; const hasInfo = firstFeed.language || firstFeed.bias
|| (firstFeed.political_orientation && firstFeed.political_orientation !== 'na')
|| (firstFeed.media_type && firstFeed.media_type !== 'sonstige')
|| (firstFeed.reliability && firstFeed.reliability !== 'na')
|| firstFeed.state_affiliated
|| firstFeed.country_code
|| (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0);
if (hasInfo) { if (hasInfo) {
const typeMap = { rss_feed: 'RSS-Feed', web_source: 'Web-Quelle', telegram_channel: 'Telegram-Kanal' }; const typeMap = { rss_feed: 'RSS-Feed', web_source: 'Web-Quelle', telegram_channel: 'Telegram-Kanal', podcast_feed: 'Podcast' };
const lines = []; const lines = [];
lines.push('Typ: ' + (typeMap[firstFeed.source_type] || firstFeed.source_type || 'Unbekannt')); lines.push('Typ: ' + (typeMap[firstFeed.source_type] || firstFeed.source_type || 'Unbekannt'));
if (firstFeed.language) lines.push('Sprache: ' + firstFeed.language); if (firstFeed.language) lines.push('Sprache: ' + firstFeed.language);
if (firstFeed.bias) lines.push('Ausrichtung: ' + firstFeed.bias); if (firstFeed.country_code) lines.push('Land: ' + firstFeed.country_code);
if (firstFeed.media_type && firstFeed.media_type !== 'sonstige') {
lines.push('Medientyp: ' + (this._mediaTypeLabels[firstFeed.media_type] || firstFeed.media_type));
}
if (firstFeed.political_orientation && firstFeed.political_orientation !== 'na') {
const pl = this._politicalLabels[firstFeed.political_orientation];
lines.push('Politisch: ' + (pl ? pl.full : firstFeed.political_orientation));
}
if (firstFeed.reliability && firstFeed.reliability !== 'na') {
const relLabel = this._reliabilityLabels[firstFeed.reliability] || firstFeed.reliability;
const relSrc = firstFeed.ifcn_signatory ? ' (IFCN-Faktenchecker)'
: (firstFeed.eu_disinfo_listed ? ` (EU-Desinfo, ${firstFeed.eu_disinfo_case_count || 0} Fälle)`
: ' (LLM-Schätzung)');
lines.push('Glaubwürdigkeit: ' + relLabel + relSrc);
}
if (firstFeed.ifcn_signatory) lines.push('IFCN-Faktenchecker: ja');
if (firstFeed.eu_disinfo_listed) {
lines.push(`EUvsDisinfo: ${firstFeed.eu_disinfo_case_count || 0} Fälle` + (firstFeed.eu_disinfo_last_seen ? ` (zuletzt ${firstFeed.eu_disinfo_last_seen})` : ''));
}
if (firstFeed.state_affiliated) lines.push('Staatsnah: ja');
if (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0) {
const labels = firstFeed.alignments.map(a => this._alignmentLabels[a] || a);
lines.push('Geopolitische Nähe: ' + labels.join(', '));
}
if (firstFeed.bias) lines.push('Notiz: ' + firstFeed.bias);
const tooltipText = this.escape(lines.join('\n')); const tooltipText = this.escape(lines.join('\n'));
infoButtonHtml = ` <span class="info-icon tooltip-below" data-tooltip="${tooltipText}"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span>`; infoButtonHtml = ` <span class="info-icon tooltip-below" data-tooltip="${tooltipText}"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span>`;
} }
const classificationBadges = this._renderClassificationBadges(firstFeed);
return `<div class="source-group"> return `<div class="source-group">
<div class="source-group-header" ${toggleAttr}> <div class="source-group-header" ${toggleAttr}>
${toggleIcon} ${toggleIcon}
@@ -1138,6 +1327,7 @@ const UI = {
<span class="source-group-name">${this.escape(displayName)}</span>${infoButtonHtml} <span class="source-group-name">${this.escape(displayName)}</span>${infoButtonHtml}
</div> </div>
<span class="source-category-badge cat-${feeds[0]?.category || 'sonstige'}">${catLabel}</span> <span class="source-category-badge cat-${feeds[0]?.category || 'sonstige'}">${catLabel}</span>
${classificationBadges ? `<span class="source-classification-badges">${classificationBadges}</span>` : ''}
${feedCountBadge} ${feedCountBadge}
<div class="source-group-actions" onclick="event.stopPropagation()"> <div class="source-group-actions" onclick="event.stopPropagation()">
${!isGlobal && !hasMultiple && feeds[0]?.id ? `<button class="source-edit-btn" onclick="App.editSource(${feeds[0].id})" title="Bearbeiten" aria-label="Bearbeiten">&#9998;</button>` : ''} ${!isGlobal && !hasMultiple && feeds[0]?.id ? `<button class="source-edit-btn" onclick="App.editSource(${feeds[0].id})" title="Bearbeiten" aria-label="Bearbeiten">&#9998;</button>` : ''}