feat(sources): strukturierte Klassifikation (Politik/Medientyp/Reliability/Alignments)
- Neue sources-Spalten: political_orientation (7+2 Stufen), media_type (20), reliability (5+1), state_affiliated, country_code, classification_source, classified_at sowie proposed_*-Spalten fuer LLM-Vorschlaege. - Neue source_alignments-Tabelle fuer Mehrfach-Tagging geopolitischer Naehe (prorussisch, proiranisch, prowestlich, ...). - API-Filter: ?political_orientation, ?media_type, ?reliability, ?state_affiliated, ?alignment. - create/update_source nehmen alignments[] entgegen und setzen classification_source automatisch auf 'manual' bei Klassifikations-Edits. Backwards-kompatibel: bestehendes bias/language/category bleibt unveraendert, Default fuer Bestandsquellen ist classification_source = 'legacy'. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -12,7 +12,56 @@ logger = logging.getLogger("osint.sources")
|
||||
|
||||
router = APIRouter(prefix="/api/sources", tags=["sources"])
|
||||
|
||||
SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes"}
|
||||
SOURCE_UPDATE_COLUMNS = {
|
||||
"name", "url", "domain", "source_type", "category", "status", "notes",
|
||||
"language", "bias",
|
||||
"political_orientation", "media_type", "reliability",
|
||||
"state_affiliated", "country_code",
|
||||
}
|
||||
SOURCE_CLASSIFICATION_FIELDS = {
|
||||
"political_orientation", "media_type", "reliability",
|
||||
"state_affiliated", "country_code",
|
||||
}
|
||||
ALLOWED_ALIGNMENTS = {
|
||||
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
|
||||
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
|
||||
"protuerkisch", "panarabisch", "neutral", "sonstige",
|
||||
}
|
||||
|
||||
|
||||
async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int]) -> dict[int, list[str]]:
|
||||
"""Lädt alignments fuer mehrere Quellen in einer Query und gibt {source_id: [alignment, ...]} zurück."""
|
||||
if not source_ids:
|
||||
return {}
|
||||
placeholders = ",".join("?" for _ in source_ids)
|
||||
cursor = await db.execute(
|
||||
f"SELECT source_id, alignment FROM source_alignments WHERE source_id IN ({placeholders}) ORDER BY alignment",
|
||||
source_ids,
|
||||
)
|
||||
out: dict[int, list[str]] = {sid: [] for sid in source_ids}
|
||||
for row in await cursor.fetchall():
|
||||
out.setdefault(row["source_id"], []).append(row["alignment"])
|
||||
return out
|
||||
|
||||
|
||||
async def _replace_alignments(db: aiosqlite.Connection, source_id: int, alignments: list[str]):
|
||||
"""Ersetzt die alignments-Liste einer Quelle (DELETE + INSERT) — Aufrufer muss commit() machen."""
|
||||
await db.execute("DELETE FROM source_alignments WHERE source_id = ?", (source_id,))
|
||||
seen: set[str] = set()
|
||||
for raw in alignments:
|
||||
a = (raw or "").strip().lower()
|
||||
if not a or a in seen:
|
||||
continue
|
||||
if a not in ALLOWED_ALIGNMENTS:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
detail=f"Ungueltiger alignment-Wert: '{a}'",
|
||||
)
|
||||
seen.add(a)
|
||||
await db.execute(
|
||||
"INSERT INTO source_alignments (source_id, alignment) VALUES (?, ?)",
|
||||
(source_id, a),
|
||||
)
|
||||
|
||||
|
||||
def _check_source_ownership(source: dict, username: str):
|
||||
@@ -34,6 +83,11 @@ async def list_sources(
|
||||
source_type: str = None,
|
||||
category: str = None,
|
||||
source_status: str = None,
|
||||
political_orientation: str = None,
|
||||
media_type: str = None,
|
||||
reliability: str = None,
|
||||
state_affiliated: bool = None,
|
||||
alignment: str = None,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
@@ -41,27 +95,43 @@ async def list_sources(
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
|
||||
# Global (tenant_id=NULL) + eigene Org
|
||||
query = "SELECT * FROM sources WHERE (tenant_id IS NULL OR tenant_id = ?)"
|
||||
params = [tenant_id]
|
||||
query = "SELECT s.* FROM sources s WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)"
|
||||
params: list = [tenant_id]
|
||||
|
||||
if source_type:
|
||||
query += " AND source_type = ?"
|
||||
query += " AND s.source_type = ?"
|
||||
params.append(source_type)
|
||||
if category:
|
||||
query += " AND category = ?"
|
||||
query += " AND s.category = ?"
|
||||
params.append(category)
|
||||
if source_status:
|
||||
query += " AND status = ?"
|
||||
query += " AND s.status = ?"
|
||||
params.append(source_status)
|
||||
if political_orientation:
|
||||
query += " AND s.political_orientation = ?"
|
||||
params.append(political_orientation)
|
||||
if media_type:
|
||||
query += " AND s.media_type = ?"
|
||||
params.append(media_type)
|
||||
if reliability:
|
||||
query += " AND s.reliability = ?"
|
||||
params.append(reliability)
|
||||
if state_affiliated is not None:
|
||||
query += " AND s.state_affiliated = ?"
|
||||
params.append(1 if state_affiliated else 0)
|
||||
if alignment:
|
||||
query += " AND EXISTS (SELECT 1 FROM source_alignments sa WHERE sa.source_id = s.id AND sa.alignment = ?)"
|
||||
params.append(alignment.lower())
|
||||
|
||||
query += " ORDER BY source_type, category, name"
|
||||
query += " ORDER BY s.source_type, s.category, s.name"
|
||||
cursor = await db.execute(query, params)
|
||||
rows = await cursor.fetchall()
|
||||
results = []
|
||||
for row in rows:
|
||||
d = dict(row)
|
||||
results = [dict(row) for row in rows]
|
||||
alignments_map = await _load_alignments_for(db, [r["id"] for r in results])
|
||||
for d in results:
|
||||
d["is_global"] = d.get("tenant_id") is None
|
||||
results.append(d)
|
||||
d["state_affiliated"] = bool(d.get("state_affiliated"))
|
||||
d["alignments"] = alignments_map.get(d["id"], [])
|
||||
return results
|
||||
|
||||
|
||||
@@ -454,26 +524,60 @@ async def create_source(
|
||||
detail=f"Domain '{domain}' bereits als Quelle vorhanden: {domain_existing['name']}. Für einen neuen RSS-Feed bitte die Feed-URL angeben.",
|
||||
)
|
||||
|
||||
payload = data.model_dump(exclude_unset=True)
|
||||
alignments = payload.pop("alignments", None)
|
||||
classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & payload.keys()) or alignments is not None
|
||||
|
||||
cols = ["name", "url", "domain", "source_type", "category", "status", "notes",
|
||||
"language", "bias",
|
||||
"political_orientation", "media_type", "reliability",
|
||||
"state_affiliated", "country_code",
|
||||
"added_by", "tenant_id"]
|
||||
vals = [
|
||||
data.name,
|
||||
data.url,
|
||||
domain,
|
||||
data.source_type,
|
||||
data.category,
|
||||
data.status,
|
||||
data.notes,
|
||||
payload.get("language"),
|
||||
payload.get("bias"),
|
||||
payload.get("political_orientation"),
|
||||
payload.get("media_type"),
|
||||
payload.get("reliability"),
|
||||
1 if payload.get("state_affiliated") else 0,
|
||||
payload.get("country_code"),
|
||||
current_user["username"],
|
||||
tenant_id,
|
||||
]
|
||||
if classification_touched:
|
||||
cols += ["classification_source", "classified_at"]
|
||||
vals += ["manual"]
|
||||
ts_marker = True
|
||||
else:
|
||||
ts_marker = False
|
||||
|
||||
placeholders = ", ".join(["?"] * len(vals) + (["CURRENT_TIMESTAMP"] if ts_marker else []))
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
data.name,
|
||||
data.url,
|
||||
domain,
|
||||
data.source_type,
|
||||
data.category,
|
||||
data.status,
|
||||
data.notes,
|
||||
current_user["username"],
|
||||
tenant_id,
|
||||
),
|
||||
f"INSERT INTO sources ({', '.join(cols)}) VALUES ({placeholders})",
|
||||
vals,
|
||||
)
|
||||
new_id = cursor.lastrowid
|
||||
|
||||
if alignments:
|
||||
await _replace_alignments(db, new_id, alignments)
|
||||
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (new_id,))
|
||||
row = await cursor.fetchone()
|
||||
return dict(row)
|
||||
result = dict(row)
|
||||
result["is_global"] = result.get("tenant_id") is None
|
||||
result["state_affiliated"] = bool(result.get("state_affiliated"))
|
||||
alignments_map = await _load_alignments_for(db, [new_id])
|
||||
result["alignments"] = alignments_map.get(new_id, [])
|
||||
return result
|
||||
|
||||
|
||||
@router.put("/{source_id}", response_model=SourceResponse)
|
||||
@@ -494,27 +598,51 @@ async def update_source(
|
||||
|
||||
_check_source_ownership(dict(row), current_user["username"])
|
||||
|
||||
payload = data.model_dump(exclude_unset=True)
|
||||
alignments = payload.pop("alignments", None)
|
||||
|
||||
updates = {}
|
||||
for field, value in data.model_dump(exclude_none=True).items():
|
||||
for field, value in payload.items():
|
||||
if field not in SOURCE_UPDATE_COLUMNS:
|
||||
continue
|
||||
# Domain normalisieren
|
||||
if field == "domain" and value:
|
||||
value = _DOMAIN_ALIASES.get(value.lower(), value.lower())
|
||||
if field == "state_affiliated":
|
||||
value = 1 if value else 0
|
||||
updates[field] = value
|
||||
|
||||
if not updates:
|
||||
return dict(row)
|
||||
classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & updates.keys()) or alignments is not None
|
||||
if classification_touched:
|
||||
updates["classification_source"] = "manual"
|
||||
updates["classified_at"] = "CURRENT_TIMESTAMP_MARKER"
|
||||
|
||||
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
||||
values = list(updates.values()) + [source_id]
|
||||
if updates:
|
||||
set_parts = []
|
||||
values = []
|
||||
for k, v in updates.items():
|
||||
if v == "CURRENT_TIMESTAMP_MARKER":
|
||||
set_parts.append(f"{k} = CURRENT_TIMESTAMP")
|
||||
else:
|
||||
set_parts.append(f"{k} = ?")
|
||||
values.append(v)
|
||||
values.append(source_id)
|
||||
await db.execute(f"UPDATE sources SET {', '.join(set_parts)} WHERE id = ?", values)
|
||||
|
||||
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
|
||||
await db.commit()
|
||||
if alignments is not None:
|
||||
await _replace_alignments(db, source_id, alignments)
|
||||
|
||||
if updates or alignments is not None:
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
return dict(row)
|
||||
result = dict(row)
|
||||
result["is_global"] = result.get("tenant_id") is None
|
||||
result["state_affiliated"] = bool(result.get("state_affiliated"))
|
||||
alignments_map = await _load_alignments_for(db, [source_id])
|
||||
result["alignments"] = alignments_map.get(source_id, [])
|
||||
return result
|
||||
|
||||
|
||||
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren