fix(external_reputation): generische Plattform-Domains (t.me, twitter.com, ...) ignorieren

False positive bei sync_eu_disinfo: t.me wurde als Quelle markiert, weil
EUvsDisinfo anonyme Telegram-Posts unter der Plattform-Domain aggregiert.
Eine Allowlist von Plattform-Domains schliesst diese Falle aus.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
Claude Code
2026-05-07 19:44:07 +00:00
Ursprung 5fc2467559
Commit ff8a0531a4

Datei anzeigen

@@ -29,6 +29,20 @@ EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base
HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen
# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains).
PLATFORM_DOMAINS = {
"t.me", "telegram.me", "telegram.org",
"twitter.com", "x.com", "mobile.twitter.com",
"youtube.com", "youtu.be", "m.youtube.com",
"facebook.com", "fb.com", "m.facebook.com",
"instagram.com", "tiktok.com", "vk.com", "ok.ru",
"rumble.com", "bitchute.com", "odysee.com",
"reddit.com", "old.reddit.com",
"wordpress.com", "blogspot.com", "medium.com",
"substack.com", "wixsite.com",
}
# Reliability-Skala in Stufenfolge (schlecht -> gut)
RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
@@ -84,7 +98,7 @@ async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
unmatched_ids: list[int] = []
for s in sources:
nd = _normalize_domain(s["domain"])
if nd and nd in domains:
if nd and nd not in PLATFORM_DOMAINS and nd in domains:
matched_ids.append(s["id"])
else:
unmatched_ids.append(s["id"])
@@ -151,7 +165,7 @@ async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
matched = 0
for s in sources:
nd = _normalize_domain(s["domain"])
if nd and nd in counts:
if nd and nd not in PLATFORM_DOMAINS and nd in counts:
await db.execute(
"""UPDATE sources SET
eu_disinfo_listed = 1,