From ff8a0531a4ff4485c92ca2760713b6acfe04b367 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Thu, 7 May 2026 19:44:07 +0000 Subject: [PATCH] fix(external_reputation): generische Plattform-Domains (t.me, twitter.com, ...) ignorieren False positive bei sync_eu_disinfo: t.me wurde als Quelle markiert, weil EUvsDisinfo anonyme Telegram-Posts unter der Plattform-Domain aggregiert. Eine Allowlist von Plattform-Domains schliesst diese Falle aus. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/services/external_reputation.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/services/external_reputation.py b/src/services/external_reputation.py index 1e900b0..de973b3 100644 --- a/src/services/external_reputation.py +++ b/src/services/external_reputation.py @@ -29,6 +29,20 @@ EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0) +# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen +# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains). +PLATFORM_DOMAINS = { + "t.me", "telegram.me", "telegram.org", + "twitter.com", "x.com", "mobile.twitter.com", + "youtube.com", "youtu.be", "m.youtube.com", + "facebook.com", "fb.com", "m.facebook.com", + "instagram.com", "tiktok.com", "vk.com", "ok.ru", + "rumble.com", "bitchute.com", "odysee.com", + "reddit.com", "old.reddit.com", + "wordpress.com", "blogspot.com", "medium.com", + "substack.com", "wixsite.com", +} + # Reliability-Skala in Stufenfolge (schlecht -> gut) RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"] @@ -84,7 +98,7 @@ async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict: unmatched_ids: list[int] = [] for s in sources: nd = _normalize_domain(s["domain"]) - if nd and nd in domains: + if nd and nd not in PLATFORM_DOMAINS and nd in domains: matched_ids.append(s["id"]) else: unmatched_ids.append(s["id"]) @@ -151,7 +165,7 @@ async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict: matched = 0 for s in sources: nd = _normalize_domain(s["domain"]) - if nd and nd in counts: + if nd and nd not in PLATFORM_DOMAINS and nd in counts: await db.execute( """UPDATE sources SET eu_disinfo_listed = 1,