refactor(klassifikation): Klassifikation aus Monitor entfernt — Pflege jetzt in der Verwaltung

Endpoints unter /api/sources/classification/* weg, Service-Module (source_classifier, external_reputation) gelöscht. Quellen-Modal verliert Tab Klassifikations-Review, Klassifikations-Section in der Edit-Form, alle Bulk-Buttons (Sync, Klassifikation starten, Bulk-Approve). API-Methoden in api.js entfernt, alignment-Helper raus, saveSource entschlackt.

Read-Only bleibt: Filter-Dropdowns über der Quellenliste (Politik, Medientyp, Reliability, Externe Reputation, Alignment) und Inline-Badges (_renderClassificationBadges + Label-Maps in components.js). Kunde sieht nur freigegebene Werte.

GET /api/sources liefert weiter Klassifikations-Felder + alignments für die Anzeige; SourceCreate/SourceUpdate akzeptieren keine Klassifikations-Felder mehr.

Bulk-Klassifikations-Skripte entfernt — Pflege läuft über Verwaltungs-UI.
Dieser Commit ist enthalten in:
Claude Code
2026-05-09 22:01:20 +00:00
Ursprung 5f053a3eca
Commit b90e47ff3f
10 geänderte Dateien mit 6150 neuen und 7831 gelöschten Zeilen

Datei anzeigen

@@ -1,13 +1,11 @@
"""Sources-Router: Quellenverwaltung (Multi-Tenant)."""
"""Sources-Router: Quellenverwaltung (Multi-Tenant). Klassifikation: Read-Only — Pflege in der Verwaltung."""
import json
import logging
from collections import defaultdict
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from fastapi import APIRouter, Depends, HTTPException, status
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
from auth import get_current_user
from database import db_dependency, get_db, refresh_source_counts
from services.external_reputation import apply_reputation_overrides, sync_all as sync_external_reputation
from services.source_classifier import bulk_classify, classify_source
from database import db_dependency, refresh_source_counts
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
import aiosqlite
@@ -18,22 +16,11 @@ router = APIRouter(prefix="/api/sources", tags=["sources"])
SOURCE_UPDATE_COLUMNS = {
"name", "url", "domain", "source_type", "category", "status", "notes",
"language", "bias",
"political_orientation", "media_type", "reliability",
"state_affiliated", "country_code",
}
SOURCE_CLASSIFICATION_FIELDS = {
"political_orientation", "media_type", "reliability",
"state_affiliated", "country_code",
}
ALLOWED_ALIGNMENTS = {
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
"protuerkisch", "panarabisch", "neutral", "sonstige",
}
async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int]) -> dict[int, list[str]]:
"""Lädt alignments fuer mehrere Quellen in einer Query und gibt {source_id: [alignment, ...]} zurück."""
"""Lädt alignments fuer mehrere Quellen — Read-Only fuer Anzeige (Pflege in Verwaltung)."""
if not source_ids:
return {}
placeholders = ",".join("?" for _ in source_ids)
@@ -47,26 +34,6 @@ async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int])
return out
async def _replace_alignments(db: aiosqlite.Connection, source_id: int, alignments: list[str]):
"""Ersetzt die alignments-Liste einer Quelle (DELETE + INSERT) — Aufrufer muss commit() machen."""
await db.execute("DELETE FROM source_alignments WHERE source_id = ?", (source_id,))
seen: set[str] = set()
for raw in alignments:
a = (raw or "").strip().lower()
if not a or a in seen:
continue
if a not in ALLOWED_ALIGNMENTS:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Ungueltiger alignment-Wert: '{a}'",
)
seen.add(a)
await db.execute(
"INSERT INTO source_alignments (source_id, alignment) VALUES (?, ?)",
(source_id, a),
)
def _check_source_ownership(source: dict, username: str):
"""Prueft ob der Nutzer die Quelle bearbeiten/loeschen darf.
@@ -538,14 +505,9 @@ async def create_source(
)
payload = data.model_dump(exclude_unset=True)
alignments = payload.pop("alignments", None)
classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & payload.keys()) or alignments is not None
cols = ["name", "url", "domain", "source_type", "category", "status", "notes",
"language", "bias",
"political_orientation", "media_type", "reliability",
"state_affiliated", "country_code",
"added_by", "tenant_id"]
"language", "bias", "added_by", "tenant_id"]
vals = [
data.name,
data.url,
@@ -556,31 +518,16 @@ async def create_source(
data.notes,
payload.get("language"),
payload.get("bias"),
payload.get("political_orientation"),
payload.get("media_type"),
payload.get("reliability"),
1 if payload.get("state_affiliated") else 0,
payload.get("country_code"),
current_user["username"],
tenant_id,
]
if classification_touched:
cols += ["classification_source", "classified_at"]
vals += ["manual"]
ts_marker = True
else:
ts_marker = False
placeholders = ", ".join(["?"] * len(vals) + (["CURRENT_TIMESTAMP"] if ts_marker else []))
placeholders = ", ".join(["?"] * len(vals))
cursor = await db.execute(
f"INSERT INTO sources ({', '.join(cols)}) VALUES ({placeholders})",
vals,
)
new_id = cursor.lastrowid
if alignments:
await _replace_alignments(db, new_id, alignments)
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (new_id,))
@@ -612,40 +559,19 @@ async def update_source(
_check_source_ownership(dict(row), current_user["username"])
payload = data.model_dump(exclude_unset=True)
alignments = payload.pop("alignments", None)
updates = {}
for field, value in payload.items():
if field not in SOURCE_UPDATE_COLUMNS:
continue
# Domain normalisieren
if field == "domain" and value:
value = _DOMAIN_ALIASES.get(value.lower(), value.lower())
if field == "state_affiliated":
value = 1 if value else 0
updates[field] = value
classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & updates.keys()) or alignments is not None
if classification_touched:
updates["classification_source"] = "manual"
updates["classified_at"] = "CURRENT_TIMESTAMP_MARKER"
if updates:
set_parts = []
values = []
for k, v in updates.items():
if v == "CURRENT_TIMESTAMP_MARKER":
set_parts.append(f"{k} = CURRENT_TIMESTAMP")
else:
set_parts.append(f"{k} = ?")
values.append(v)
values.append(source_id)
await db.execute(f"UPDATE sources SET {', '.join(set_parts)} WHERE id = ?", values)
if alignments is not None:
await _replace_alignments(db, source_id, alignments)
if updates or alignments is not None:
set_clause = ", ".join(f"{k} = ?" for k in updates)
values = list(updates.values()) + [source_id]
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
@@ -714,327 +640,3 @@ async def trigger_refresh_counts(
await refresh_source_counts(db)
return {"status": "ok"}
# === Klassifikations-Review (LLM-Vorschlaege approve/reject/reclassify) ===
def _require_admin_for_global(row: dict, current_user: dict):
"""Globale Quellen (tenant_id IS NULL) duerfen nur org_admins approve-en/reclassify-en."""
if row.get("tenant_id") is None and current_user.get("role") != "org_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Globale Quellen koennen nur von Admins klassifiziert werden",
)
@router.get("/classification/stats")
async def classification_stats(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Counts pro classification_source-Wert (global + eigene Org)."""
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""SELECT classification_source, COUNT(*) as cnt
FROM sources
WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
GROUP BY classification_source""",
(tenant_id,),
)
by_source = {row["classification_source"] or "legacy": row["cnt"] for row in await cursor.fetchall()}
cursor = await db.execute(
"""SELECT COUNT(*) as cnt FROM sources
WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
AND proposed_political_orientation IS NOT NULL""",
(tenant_id,),
)
pending = (await cursor.fetchone())["cnt"]
return {
"by_classification_source": by_source,
"pending_review": pending,
"total": sum(by_source.values()),
}
@router.get("/classification/queue")
async def classification_queue(
limit: int = 50,
min_confidence: float = 0.0,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Liefert Quellen mit nicht-leeren proposed_*-Spalten (Review-Queue)."""
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""SELECT s.* FROM sources s
WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)
AND s.proposed_political_orientation IS NOT NULL
AND COALESCE(s.proposed_confidence, 0) >= ?
ORDER BY s.proposed_confidence DESC, s.proposed_at DESC
LIMIT ?""",
(tenant_id, min_confidence, limit),
)
rows = [dict(r) for r in await cursor.fetchall()]
alignments_map = await _load_alignments_for(db, [r["id"] for r in rows])
out = []
for d in rows:
try:
proposed_aligns = json.loads(d.get("proposed_alignments_json") or "[]")
except (json.JSONDecodeError, TypeError):
proposed_aligns = []
out.append({
"id": d["id"],
"name": d["name"],
"url": d.get("url"),
"domain": d.get("domain"),
"source_type": d.get("source_type"),
"category": d.get("category"),
"is_global": d.get("tenant_id") is None,
"current": {
"political_orientation": d.get("political_orientation"),
"media_type": d.get("media_type"),
"reliability": d.get("reliability"),
"state_affiliated": bool(d.get("state_affiliated")),
"country_code": d.get("country_code"),
"alignments": alignments_map.get(d["id"], []),
"classification_source": d.get("classification_source"),
},
"proposed": {
"political_orientation": d.get("proposed_political_orientation"),
"media_type": d.get("proposed_media_type"),
"reliability": d.get("proposed_reliability"),
"state_affiliated": bool(d.get("proposed_state_affiliated")),
"country_code": d.get("proposed_country_code"),
"alignments": proposed_aligns,
"confidence": d.get("proposed_confidence"),
"reasoning": d.get("proposed_reasoning"),
"proposed_at": d.get("proposed_at"),
},
})
return out
async def _clear_proposed(db: aiosqlite.Connection, source_id: int):
"""Loescht die proposed_*-Felder einer Quelle (ohne commit)."""
await db.execute(
"""UPDATE sources SET
proposed_political_orientation = NULL,
proposed_media_type = NULL,
proposed_reliability = NULL,
proposed_state_affiliated = NULL,
proposed_country_code = NULL,
proposed_alignments_json = NULL,
proposed_confidence = NULL,
proposed_reasoning = NULL,
proposed_at = NULL
WHERE id = ?""",
(source_id,),
)
@router.post("/{source_id}/classification/approve")
async def approve_classification(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Uebernimmt proposed_* in echte Felder, setzt classification_source='llm_approved'."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
src = dict(row)
_require_admin_for_global(src, current_user)
if src.get("proposed_political_orientation") is None:
raise HTTPException(status_code=400, detail="Keine LLM-Vorschlaege fuer diese Quelle vorhanden")
try:
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
except (json.JSONDecodeError, TypeError):
proposed_aligns = []
await db.execute(
"""UPDATE sources SET
political_orientation = ?,
media_type = ?,
reliability = ?,
state_affiliated = ?,
country_code = ?,
classification_source = 'llm_approved',
classified_at = CURRENT_TIMESTAMP
WHERE id = ?""",
(
src["proposed_political_orientation"],
src["proposed_media_type"],
src["proposed_reliability"],
1 if src.get("proposed_state_affiliated") else 0,
src.get("proposed_country_code"),
source_id,
),
)
await _replace_alignments(db, source_id, [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS])
await _clear_proposed(db, source_id)
await db.commit()
# Reliability-Override anwenden (IFCN/EUvsDisinfo)
try:
await apply_reputation_overrides(db, source_id)
except Exception as e:
logger.warning("Reputation-Override fuer source_id=%s fehlgeschlagen: %s", source_id, e)
return {"source_id": source_id, "status": "approved"}
@router.post("/{source_id}/classification/reject")
async def reject_classification(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Verwirft die LLM-Vorschlaege ohne Uebernahme. classification_source bleibt unveraendert."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
src = dict(row)
_require_admin_for_global(src, current_user)
await _clear_proposed(db, source_id)
# Wenn classification_source noch 'llm_pending' war, zurueck auf 'legacy'
if src.get("classification_source") == "llm_pending":
await db.execute(
"UPDATE sources SET classification_source = 'legacy' WHERE id = ?",
(source_id,),
)
await db.commit()
return {"source_id": source_id, "status": "rejected"}
@router.post("/{source_id}/classification/reclassify")
async def reclassify_source(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Triggert eine LLM-Klassifikation einer einzelnen Quelle (synchron, ~3-5s)."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
src = dict(row)
_require_admin_for_global(src, current_user)
try:
result = await classify_source(db, source_id)
except Exception as e:
logger.error("Reclassify source_id=%s fehlgeschlagen: %s", source_id, e, exc_info=True)
raise HTTPException(status_code=500, detail=f"Klassifikation fehlgeschlagen: {e}")
return result
async def _bulk_classify_background(limit: int, only_unclassified: bool):
"""Hintergrund-Task: oeffnet eigene DB-Connection."""
db = await get_db()
try:
await bulk_classify(db, limit=limit, only_unclassified=only_unclassified)
finally:
await db.close()
@router.post("/classification/bulk-classify")
async def trigger_bulk_classify(
background_tasks: BackgroundTasks,
limit: int = 50,
only_unclassified: bool = True,
current_user: dict = Depends(get_current_user),
):
"""Startet eine Bulk-Klassifikation im Hintergrund (nur Admins)."""
if current_user.get("role") != "org_admin":
raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Klassifikation starten")
if limit < 1 or limit > 500:
raise HTTPException(status_code=400, detail="limit muss zwischen 1 und 500 liegen")
background_tasks.add_task(_bulk_classify_background, limit, only_unclassified)
return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
@router.post("/external-reputation/sync")
async def trigger_external_reputation_sync(
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user),
):
"""Startet Sync von IFCN- und EUvsDisinfo-Daten (Admin, Hintergrund)."""
if current_user.get("role") != "org_admin":
raise HTTPException(status_code=403, detail="Nur Admins koennen den externen Sync starten")
async def _bg():
db = await get_db()
try:
await sync_external_reputation(db)
finally:
await db.close()
background_tasks.add_task(_bg)
return {"status": "started"}
@router.post("/classification/bulk-approve")
async def bulk_approve_classifications(
min_confidence: float = 0.85,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Genehmigt alle Pending-Vorschlaege ueber dem confidence-Schwellwert (nur Admins).
Globale Quellen werden nur bearbeitet, wenn der Aufrufer org_admin ist;
Tenant-eigene Quellen sowieso.
"""
if current_user.get("role") != "org_admin":
raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Approve nutzen")
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""SELECT id, proposed_political_orientation, proposed_media_type,
proposed_reliability, proposed_state_affiliated,
proposed_country_code, proposed_alignments_json, tenant_id
FROM sources
WHERE proposed_political_orientation IS NOT NULL
AND COALESCE(proposed_confidence, 0) >= ?
AND (tenant_id IS NULL OR tenant_id = ?)""",
(min_confidence, tenant_id),
)
rows = [dict(r) for r in await cursor.fetchall()]
approved_ids: list[int] = []
for src in rows:
try:
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
except (json.JSONDecodeError, TypeError):
proposed_aligns = []
await db.execute(
"""UPDATE sources SET
political_orientation = ?,
media_type = ?,
reliability = ?,
state_affiliated = ?,
country_code = ?,
classification_source = 'llm_approved',
classified_at = CURRENT_TIMESTAMP
WHERE id = ?""",
(
src["proposed_political_orientation"],
src["proposed_media_type"],
src["proposed_reliability"],
1 if src.get("proposed_state_affiliated") else 0,
src.get("proposed_country_code"),
src["id"],
),
)
await _replace_alignments(
db, src["id"], [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS]
)
await _clear_proposed(db, src["id"])
approved_ids.append(src["id"])
await db.commit()
# Reliability-Override fuer alle gerade Approved
try:
for sid in approved_ids:
await apply_reputation_overrides(db, sid)
except Exception as e:
logger.warning("Bulk Reputation-Override fehlgeschlagen: %s", e)
return {"approved_count": len(approved_ids), "min_confidence": min_confidence}