Commits vergleichen
31 Commits
6a02e06887
...
main
| Autor | SHA1 | Datum | |
|---|---|---|---|
| 000b4ef526 | |||
| 52f5debe44 | |||
| 5435d0be8b | |||
|
|
8c75a70655 | ||
| 6bfff67c2f | |||
| 746b1bcd81 | |||
| 7ec153ca49 | |||
|
|
a27fe44b0b | ||
| 6c623a8ae5 | |||
| 240222cb2a | |||
|
|
bd476edb13 | ||
| ed38d68db7 | |||
| c7d6d2eedf | |||
| 031bd9e114 | |||
| c316c67294 | |||
| 430641b128 | |||
| 7c558b7cb4 | |||
|
|
c62be998d5 | ||
| 5d1d72bf3d | |||
| d0b71d82e4 | |||
|
|
c64675b266 | ||
| 1d9ce20b68 | |||
|
|
9843ff0015 | ||
|
|
27afce7c9e | ||
|
|
d3e5fa7079 | ||
|
|
521633bde9 | ||
|
|
015255237a | ||
| b56b7eeda2 | |||
|
|
2f7d967ce2 | ||
| 1d9751ef1a | |||
|
|
5e08d06784 |
46
RELEASES.json
Normale Datei
46
RELEASES.json
Normale Datei
@@ -0,0 +1,46 @@
|
||||
[
|
||||
{
|
||||
"version": "2026-05-22T12:41Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "X-Recherche-Konten im Verwaltungsportal verwalten",
|
||||
"items": [
|
||||
"Recherche-Konten für X (ehemals Twitter) können jetzt direkt im Verwaltungsportal hinzugefügt, bearbeitet und entfernt werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T11:13Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "Interne Verbesserungen",
|
||||
"items": []
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T11:13Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "Interne Verbesserungen",
|
||||
"items": []
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T11:09Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "X-Konten direkt im Verwaltungsportal verwalten",
|
||||
"items": [
|
||||
"X-Konten können jetzt zentral über das Verwaltungsportal angelegt und verwaltet werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T09:37Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "Neue Übersetzungsfunktion im Dashboard",
|
||||
"items": [
|
||||
"Texte können jetzt im Dashboard per Klick manuell übersetzt werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"version": "2026-05-17T19:19Z",
|
||||
"date": "2026-05-17",
|
||||
"title": "83 neue Quellen für Militär, Polizei-Technik & Waffen",
|
||||
"items": [
|
||||
"83 neue Quellen aus den Bereichen Militär, Polizei-Technik und Waffen sind jetzt verfügbar."
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -7,3 +7,7 @@ python-multipart
|
||||
aiosmtplib
|
||||
httpx>=0.28
|
||||
feedparser>=6.0
|
||||
# PDF-Upload-Validierung
|
||||
pypdf>=5.0
|
||||
# X-Scraper-Konten-Verwaltung (twscrape-Account-Pool)
|
||||
twscrape @ git+https://github.com/vladkens/twscrape.git@206f0942fe41149da28530399f7c772ec00be17a
|
||||
|
||||
104
scripts/seed_military_sources.json
Normale Datei
104
scripts/seed_military_sources.json
Normale Datei
@@ -0,0 +1,104 @@
|
||||
{
|
||||
"_meta": {
|
||||
"purpose": "Bulk-Seed fuer Militaer-, Polizei-Technik und internationale Waffen-Quellen",
|
||||
"created": "2026-05-17",
|
||||
"plan": "C:\\Users\\hendr\\.claude\\plans\\gleaming-inventing-fern.md"
|
||||
},
|
||||
"sources": [
|
||||
{"name": "Janes OSINT Insights", "url": "https://www.janes.com/osint-insights/defence-news", "domain": "janes.com", "source_type": "web_source", "language": "en", "country_code": "GB", "fetch_strategy": "paywall", "notes": "[militaertechnik] Goldstandard fuer Equipment-Specs und Defense-OSINT, Vollartikel paywalled"},
|
||||
{"name": "The War Zone (TWZ)", "url": "https://www.twz.com/feed", "domain": "twz.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Air/Land/Sea/Space/Cyber, sehr tiefe Equipment-Analysen, Tyler Rogoway"},
|
||||
{"name": "Defense News", "url": "https://www.defensenews.com/arc/outboundfeeds/rss/?outputType=xml", "domain": "defensenews.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Industriepolitik, Beschaffung, Programme"},
|
||||
{"name": "Breaking Defense", "url": "https://breakingdefense.com/full-rss-feed/", "domain": "breakingdefense.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Tech, Programme, Pentagon-Politik"},
|
||||
{"name": "Naval News", "url": "https://www.navalnews.com/feed/", "domain": "navalnews.com", "source_type": "rss_feed", "language": "en", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Marine global, Schiffstechnik, Werften, U-Boote"},
|
||||
{"name": "Army Recognition", "url": "https://www.armyrecognition.com/news/army-news/feed/rss", "domain": "armyrecognition.com", "source_type": "rss_feed", "language": "en", "country_code": "BE", "fetch_strategy": "default", "notes": "[militaertechnik] Equipment-Specs Heer, sehr fahrzeugfokussiert, breite Datenbank"},
|
||||
{"name": "Navy Recognition", "url": "https://www.navyrecognition.com/index.php?option=com_acymailing&ctrl=fronturl&task=rss", "domain": "navyrecognition.com", "source_type": "rss_feed", "language": "en", "country_code": "BE", "fetch_strategy": "default", "notes": "[militaertechnik] Equipment-Specs Marine, Schwesterportal Army Recognition"},
|
||||
{"name": "Air Recognition", "url": "https://www.airrecognition.com/index.php?option=com_acymailing&ctrl=fronturl&task=rss", "domain": "airrecognition.com", "source_type": "rss_feed", "language": "en", "country_code": "BE", "fetch_strategy": "default", "notes": "[militaertechnik] Equipment-Specs Luftwaffe, Schwesterportal Army Recognition"},
|
||||
{"name": "Aviation Week Defense", "url": "https://aviationweek.com/awn-rss/feed", "domain": "aviationweek.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Luftfahrt und Defense, seit 1916, Industrie-Insider"},
|
||||
{"name": "Air & Space Forces Magazine", "url": "https://www.airandspaceforces.com/feed/", "domain": "airandspaceforces.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] USAF-Schwerpunkt, Programme + Doktrin"},
|
||||
{"name": "Shephard Media", "url": "https://www.shephardmedia.com/news/feed/", "domain": "shephardmedia.com", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[militaertechnik] Defense News, Analyse + Daten, Land/Air/Sea/Training"},
|
||||
{"name": "EDR Magazine (European Defence Review)", "url": "https://www.edrmagazine.eu/feed", "domain": "edrmagazine.eu", "source_type": "rss_feed", "language": "en", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Europaeische Defense-Perspektive, Englisch"},
|
||||
{"name": "The Defense Post", "url": "https://thedefensepost.com/feed/", "domain": "thedefensepost.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Globaler Defense-Nachrichten-Mix"},
|
||||
{"name": "Defense Brief", "url": "https://defbrief.com/feed/", "domain": "defbrief.com", "source_type": "rss_feed", "language": "en", "country_code": "MK", "fetch_strategy": "default", "notes": "[militaertechnik] Defense-News-Aggregator"},
|
||||
{"name": "Defense Update", "url": "https://defense-update.com/feed", "domain": "defense-update.com", "source_type": "rss_feed", "language": "en", "country_code": "IL", "fetch_strategy": "default", "notes": "[militaertechnik] Israel/US-Equipment-Tiefe, Tamir Eshel"},
|
||||
{"name": "Naval Technology", "url": "https://www.naval-technology.com/feed/", "domain": "naval-technology.com", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[militaertechnik] Industrieperspektive Marine"},
|
||||
{"name": "Army Technology", "url": "https://www.army-technology.com/feed/", "domain": "army-technology.com", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[militaertechnik] Industrieperspektive Heer"},
|
||||
{"name": "Airforce Technology", "url": "https://www.airforce-technology.com/feed/", "domain": "airforce-technology.com", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[militaertechnik] Industrieperspektive Luftwaffe"},
|
||||
{"name": "The Aviationist", "url": "https://theaviationist.com/feed/", "domain": "theaviationist.com", "source_type": "rss_feed", "language": "en", "country_code": "IT", "fetch_strategy": "default", "notes": "[militaertechnik] Militaerluftfahrt-Specials, David Cenciotti"},
|
||||
{"name": "C4ISRNET", "url": "https://www.c4isrnet.com/arc/outboundfeeds/rss/?outputType=xml", "domain": "c4isrnet.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Aufklaerung, Cyber, EW, Netze"},
|
||||
{"name": "DefenseScoop", "url": "https://defensescoop.com/feed/", "domain": "defensescoop.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Pentagon-IT, Cyber, KI"},
|
||||
{"name": "Federation of American Scientists", "url": "https://fas.org/feed/", "domain": "fas.org", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik, waffen-international] Nuklear, Strategic Security, Project on Government Secrecy"},
|
||||
{"name": "Military Times", "url": "https://www.militarytimes.com/arc/outboundfeeds/rss/?outputType=xml", "domain": "militarytimes.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] US-Streitkraefte-Alltag, Beschaffung, Truppe"},
|
||||
{"name": "Stars and Stripes", "url": "https://www.stripes.com/rss", "domain": "stripes.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] US-Forces Worldwide"},
|
||||
{"name": "Defense One", "url": "https://www.defenseone.com/rss/all/", "domain": "defenseone.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Defense-Politik + Tech"},
|
||||
{"name": "Inside Defense", "url": "https://insidedefense.com", "domain": "insidedefense.com", "source_type": "web_source", "language": "en", "country_code": "US", "fetch_strategy": "paywall", "notes": "[militaertechnik] US-Pentagon-Insider, komplett paywalled"},
|
||||
{"name": "RealClearDefense", "url": "https://www.realcleardefense.com/index.xml", "domain": "realcleardefense.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Aggregator + Kommentare"},
|
||||
{"name": "War on the Rocks", "url": "https://warontherocks.com/feed/", "domain": "warontherocks.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Strategie-Essays, hochwertige Analyse"},
|
||||
{"name": "RUSI Commentary", "url": "https://www.rusi.org/rss/commentary", "domain": "rusi.org", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[militaertechnik] Royal United Services Institute, Strategie"},
|
||||
{"name": "CSIS Defense & Security", "url": "https://www.csis.org/rss.xml", "domain": "csis.org", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Center for Strategic and International Studies"},
|
||||
{"name": "Soldier Systems Daily", "url": "https://soldiersystems.net/feed/", "domain": "soldiersystems.net", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Tactical Gear und Ausruestung, extrem detailreich"},
|
||||
|
||||
{"name": "ESuT - Europaeische Sicherheit & Technik", "url": "https://esut.de/feed/", "domain": "esut.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Heer/Luft/Marine, Mittler Report, sehr Equipment-orientiert"},
|
||||
{"name": "Soldat & Technik", "url": "https://soldat-und-technik.de/feed/", "domain": "soldat-und-technik.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Infanterie-Ausruestung, Mittler Report"},
|
||||
{"name": "hartpunkt", "url": "https://www.hartpunkt.de/feed/", "domain": "hartpunkt.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Ruestung und Sicherheitspolitik, unabhaengig"},
|
||||
{"name": "Augen geradeaus!", "url": "https://augengeradeaus.net/feed/", "domain": "augengeradeaus.net", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Thomas Wiegold, Bundeswehr-Insider"},
|
||||
{"name": "Bundeswehr-Journal", "url": "https://www.bundeswehr-journal.de/feed/", "domain": "bundeswehr-journal.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Bundeswehr-Themen"},
|
||||
{"name": "Strategie & Technik (Mittler Report)", "url": "https://mittler-report.de/feed/", "domain": "mittler-report.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Mittler-Verlag-Hauptfeed, Fachartikel"},
|
||||
{"name": "cpm Defence Network", "url": "https://www.cpm-defence.de/feed/", "domain": "cpm-defence.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Deutsche Ruestungsbranche"},
|
||||
{"name": "Bundeswehr (offiziell)", "url": "https://www.bundeswehr.de/de/rss", "domain": "bundeswehr.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] Offizielle BMVg/BW-Meldungen"},
|
||||
|
||||
{"name": "Opex360 (Zone Militaire)", "url": "https://www.opex360.com/feed/", "domain": "opex360.com", "source_type": "rss_feed", "language": "fr", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Sehr aktiv, Equipment + Operations FR"},
|
||||
{"name": "Mer et Marine", "url": "https://www.meretmarine.com/fr/rss.xml", "domain": "meretmarine.com", "source_type": "rss_feed", "language": "fr", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Marine + Schiffbau FR"},
|
||||
{"name": "FOB - Forces Operations Blog", "url": "https://www.forcesoperations.com/feed/", "domain": "forcesoperations.com", "source_type": "rss_feed", "language": "fr", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Spezialeinheiten und Ausruestung FR"},
|
||||
{"name": "Lignes de Defense", "url": "https://lignesdedefense.blogs.ouest-france.fr/index.rdf", "domain": "lignesdedefense.blogs.ouest-france.fr", "source_type": "rss_feed", "language": "fr", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Blog Ouest-France, Defense FR"},
|
||||
{"name": "Air & Cosmos Defense", "url": "https://air-cosmos.com/category/defense/feed", "domain": "air-cosmos.com", "source_type": "rss_feed", "language": "fr", "country_code": "FR", "fetch_strategy": "default", "notes": "[militaertechnik] Luftfahrt + Defense FR"},
|
||||
|
||||
{"name": "Topwar / Voyennoye Obozreniye (EN)", "url": "https://en.topwar.ru/rss.xml", "domain": "topwar.ru", "source_type": "rss_feed", "language": "en", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] Pro-russisch (MBFC: Right Biased, Propaganda). Wert: Sicht auf eigene Technik"},
|
||||
{"name": "TASS Defense", "url": "https://tass.com/rss/v2.xml?sections=MjQ%3D", "domain": "tass.com", "source_type": "rss_feed", "language": "en", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] Russische Staatsagentur, Defense-Section"},
|
||||
{"name": "RIA Novosti Army (RU)", "url": "https://ria.ru/export/rss2/army/index.xml", "domain": "ria.ru", "source_type": "rss_feed", "language": "ru", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] Russische Staatsagentur, Army-Section"},
|
||||
{"name": "bmpd (LiveJournal)", "url": "https://bmpd.livejournal.com/data/rss", "domain": "bmpd.livejournal.com", "source_type": "rss_feed", "language": "ru", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] Blog des CAST (Centre for Analysis of Strategies and Technologies)"},
|
||||
{"name": "Zvezda TV", "url": "https://tvzvezda.ru/news.rss", "domain": "tvzvezda.ru", "source_type": "rss_feed", "language": "ru", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] TV-Sender des russischen Verteidigungsministeriums"},
|
||||
|
||||
{"name": "Defense Express (UA, EN)", "url": "https://en.defence-ua.com/rss/", "domain": "defence-ua.com", "source_type": "rss_feed", "language": "en", "country_code": "UA", "fetch_strategy": "default", "notes": "[militaertechnik] Ukrainische Industrie + Technik EN"},
|
||||
{"name": "Militarnyi (EN)", "url": "https://militarnyi.com/en/feed/", "domain": "militarnyi.com", "source_type": "rss_feed", "language": "en", "country_code": "UA", "fetch_strategy": "default", "notes": "[militaertechnik] Ukrainisches Defense-Portal EN"},
|
||||
{"name": "Defence24 (PL)", "url": "https://defence24.pl/rss", "domain": "defence24.pl", "source_type": "rss_feed", "language": "pl", "country_code": "PL", "fetch_strategy": "default", "notes": "[militaertechnik] Polens groesstes Defense-Portal PL"},
|
||||
{"name": "Defence24.com (EN)", "url": "https://defence24.com/feed", "domain": "defence24.com", "source_type": "rss_feed", "language": "en", "country_code": "PL", "fetch_strategy": "default", "notes": "[militaertechnik] Englische Ausgabe Defence24"},
|
||||
|
||||
{"name": "Israel Defense (EN)", "url": "https://www.israeldefense.co.il/en/rss.xml", "domain": "israeldefense.co.il", "source_type": "rss_feed", "language": "en", "country_code": "IL", "fetch_strategy": "default", "notes": "[militaertechnik] Israelische Industrie + IDF"},
|
||||
{"name": "IDF Spokesperson Website", "url": "https://www.idf.il/en/mini-sites/idf-spokesperson/", "domain": "idf.il", "source_type": "web_source", "language": "en", "country_code": "IL", "fetch_strategy": "default", "notes": "[militaertechnik] Offizielle IDF-Meldungen (Telegram-Kanal haben wir bereits)"},
|
||||
|
||||
{"name": "Mehr News Defense (FA)", "url": "https://www.mehrnews.com/rss/tp/12", "domain": "mehrnews.com", "source_type": "rss_feed", "language": "fa", "country_code": "IR", "fetch_strategy": "default", "notes": "[militaertechnik] Halbstaatliche iranische Agentur, Defense-Section"},
|
||||
{"name": "Tasnim News Defense (FA)", "url": "https://www.tasnimnews.com/de/rss/feed/0/8/6/1/1", "domain": "tasnimnews.com", "source_type": "rss_feed", "language": "fa", "country_code": "IR", "fetch_strategy": "default", "notes": "[militaertechnik] IRGC-nah, Defense-Section"},
|
||||
{"name": "Fars News Defense (FA)", "url": "https://www.farsnews.ir/rss?cat=8", "domain": "farsnews.ir", "source_type": "rss_feed", "language": "fa", "country_code": "IR", "fetch_strategy": "default", "notes": "[militaertechnik] IRGC-nah, Defense-Section"},
|
||||
|
||||
{"name": "China Military Online (EN)", "url": "http://eng.chinamil.com.cn/", "domain": "chinamil.com.cn", "source_type": "web_source", "language": "en", "country_code": "CN", "fetch_strategy": "default", "notes": "[militaertechnik] Offizielles PLA-Organ"},
|
||||
{"name": "Global Times Military (EN)", "url": "https://www.globaltimes.cn/rss/military.xml", "domain": "globaltimes.cn", "source_type": "rss_feed", "language": "en", "country_code": "CN", "fetch_strategy": "default", "notes": "[militaertechnik] Chinesisches Staatsmedium, Military-Section"},
|
||||
{"name": "The Diplomat - Security", "url": "https://thediplomat.com/category/security/feed/", "domain": "thediplomat.com", "source_type": "rss_feed", "language": "en", "country_code": "JP", "fetch_strategy": "default", "notes": "[militaertechnik] Asien-Pazifik-Sicherheitsanalyse, in Tokio sitzend"},
|
||||
|
||||
{"name": "ORYX (Spioenkop)", "url": "https://www.oryxspioenkop.com/feeds/posts/default", "domain": "oryxspioenkop.com", "source_type": "rss_feed", "language": "en", "country_code": "NL", "fetch_strategy": "default", "notes": "[militaertechnik, waffen-international] Visually confirmed losses, Equipment-DB Ukraine-Krieg"},
|
||||
{"name": "WarSpotting", "url": "https://warspotting.net/", "domain": "warspotting.net", "source_type": "web_source", "language": "en", "country_code": "NL", "fetch_strategy": "default", "notes": "[militaertechnik, waffen-international] ORYX-Nachfolger fuer Ukraine, OSINT-Verluste"},
|
||||
{"name": "Conflict Intelligence Team (CIT)", "url": "https://citeam.org/feed/", "domain": "citeam.org", "source_type": "rss_feed", "language": "en", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] Russisches Exil-OSINT-Kollektiv"},
|
||||
{"name": "Telegram @rybar", "url": "t.me/rybar", "domain": "t.me", "source_type": "telegram_channel", "language": "ru", "country_code": "RU", "fetch_strategy": "default", "notes": "[militaertechnik] Grosser russischer Mil-OSINT-Kanal"},
|
||||
{"name": "Telegram @osintdefender", "url": "t.me/osintdefender", "domain": "t.me", "source_type": "telegram_channel", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Pro-westliches Equipment-Tracking"},
|
||||
{"name": "Telegram @CovertCabal", "url": "t.me/CovertCabal", "domain": "t.me", "source_type": "telegram_channel", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[militaertechnik] Sat-Bild-OSINT"},
|
||||
{"name": "Telegram @Tendar", "url": "t.me/Tendar", "domain": "t.me", "source_type": "telegram_channel", "language": "en", "country_code": "DE", "fetch_strategy": "default", "notes": "[militaertechnik] UA-Konflikt-Analyse"},
|
||||
{"name": "Telegram @Osint613", "url": "t.me/Osint613", "domain": "t.me", "source_type": "telegram_channel", "language": "en", "country_code": "IL", "fetch_strategy": "default", "notes": "[militaertechnik] Nahost-OSINT"},
|
||||
|
||||
{"name": "Behoerden-Spiegel", "url": "https://www.behoerden-spiegel.de/feed/", "domain": "behoerden-spiegel.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[polizei-technik] DE-BOS-Magazin, Polizei, Fuehrungstechnik"},
|
||||
{"name": "pvt Polizei Verkehr + Technik", "url": "https://www.pvtweb.de/feed/", "domain": "pvtweb.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[polizei-technik] DE-Polizeitechnik-Fachzeitschrift"},
|
||||
{"name": "Police Magazine (US)", "url": "https://www.policemag.com/rss", "domain": "policemag.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[polizei-technik] US-Polizei + Ausruestung"},
|
||||
{"name": "Police1.com", "url": "https://www.police1.com/rss/feed", "domain": "police1.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[polizei-technik] US-Polizei-Industrie"},
|
||||
{"name": "Officer.com", "url": "https://www.officer.com/rss", "domain": "officer.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[polizei-technik] US-Polizei + Equipment"},
|
||||
|
||||
{"name": "Small Arms Survey", "url": "https://www.smallarmssurvey.org/rss.xml", "domain": "smallarmssurvey.org", "source_type": "rss_feed", "language": "en", "country_code": "CH", "fetch_strategy": "default", "notes": "[waffen-international] Genfer Forschungsinstitut, Goldstandard Kleinwaffen, Working Papers + Issue Briefs"},
|
||||
{"name": "SIPRI Publications", "url": "https://www.sipri.org/rss/publications.xml", "domain": "sipri.org", "source_type": "rss_feed", "language": "en", "country_code": "SE", "fetch_strategy": "default", "notes": "[waffen-international] Stockholm International Peace Research, Waffenexporte, Militaerausgaben, SALW"},
|
||||
{"name": "Conflict Armament Research", "url": "https://www.conflictarm.com/feed/", "domain": "conflictarm.com", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[waffen-international] Field-Tracking von Waffen in Konfliktzonen, Lieferketten-Forensik"},
|
||||
{"name": "Armament Research Services (ARES)", "url": "https://armamentresearch.com/feed/", "domain": "armamentresearch.com", "source_type": "rss_feed", "language": "en", "country_code": "AU", "fetch_strategy": "default", "notes": "[militaertechnik, waffen-international] Munitions- und Waffen-Identifikation, sehr Equipment-tief"},
|
||||
{"name": "Calibre Obscura (Substack)", "url": "https://calibreobscura.substack.com/feed", "domain": "calibreobscura.substack.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[waffen-international] OSINT-Spezialist Kleinwaffen Nahost"},
|
||||
{"name": "Arms Control Association", "url": "https://www.armscontrol.org/rss.xml", "domain": "armscontrol.org", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[waffen-international] US-Think-Tank, Ruestungskontrolle + Proliferation"},
|
||||
{"name": "Arms Control Wonk", "url": "https://www.armscontrolwonk.com/feed/", "domain": "armscontrolwonk.com", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[waffen-international] Nuklear- und Raketen-Spezialisten-Blog"},
|
||||
{"name": "Action on Armed Violence (AOAV)", "url": "https://aoav.org.uk/feed/", "domain": "aoav.org.uk", "source_type": "rss_feed", "language": "en", "country_code": "GB", "fetch_strategy": "default", "notes": "[waffen-international] Explosive Waffen in besiedelten Gebieten, Opferzahlen"},
|
||||
{"name": "BICC Bonn", "url": "https://www.bicc.de/feed", "domain": "bicc.de", "source_type": "rss_feed", "language": "de", "country_code": "DE", "fetch_strategy": "default", "notes": "[waffen-international] Bonn International Centre for Conflict Studies, Konflikt + Konversion"},
|
||||
{"name": "Stimson Center", "url": "https://www.stimson.org/feed/", "domain": "stimson.org", "source_type": "rss_feed", "language": "en", "country_code": "US", "fetch_strategy": "default", "notes": "[waffen-international] US-Think-Tank, konventionelle + nukleare Ruestung"},
|
||||
{"name": "ICRC Law and Policy Blog", "url": "https://blogs.icrc.org/law-and-policy/feed/", "domain": "icrc.org", "source_type": "rss_feed", "language": "en", "country_code": "CH", "fetch_strategy": "default", "notes": "[waffen-international] Voelkerrechtliche Sicht auf Waffenwirkung"}
|
||||
]
|
||||
}
|
||||
116
scripts/seed_military_sources.py
Ausführbare Datei
116
scripts/seed_military_sources.py
Ausführbare Datei
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bulk-Seed fuer Militaer- und Polizei-Technik-Quellen + internationale Waffen-Spezialisten.
|
||||
|
||||
Liest scripts/seed_military_sources.json und legt jede Quelle idempotent in der
|
||||
Ziel-DB an (Default: Verwaltungs-Staging-DB). Bestehende Quellen werden anhand
|
||||
der URL erkannt und uebersprungen.
|
||||
|
||||
Beispiel:
|
||||
.venv/bin/python scripts/seed_military_sources.py
|
||||
.venv/bin/python scripts/seed_military_sources.py --db /home/claude-dev/osint-data/osint.db
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
DEFAULT_DB = "/home/claude-dev/AegisSight-Monitor-staging/data/osint.db"
|
||||
SEED_FILE = Path(__file__).with_suffix(".json")
|
||||
|
||||
INSERT_SQL = """
|
||||
INSERT INTO sources (
|
||||
name, url, domain, source_type, category, status, notes,
|
||||
language, country_code, fetch_strategy, added_by, tenant_id
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'seed_military', NULL)
|
||||
"""
|
||||
|
||||
EXISTS_SQL = "SELECT id FROM sources WHERE url = ? AND tenant_id IS NULL"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--db", default=DEFAULT_DB, help="Pfad zur Ziel-SQLite-DB")
|
||||
ap.add_argument("--seed", default=str(SEED_FILE), help="Pfad zur Seed-JSON")
|
||||
ap.add_argument("--dry-run", action="store_true", help="Nur loggen, nichts schreiben")
|
||||
args = ap.parse_args()
|
||||
|
||||
seed_path = Path(args.seed)
|
||||
if not seed_path.is_file():
|
||||
print(f"FEHLER: Seed-Datei nicht gefunden: {seed_path}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
with seed_path.open("r", encoding="utf-8") as fh:
|
||||
seed = json.load(fh)
|
||||
sources = seed.get("sources", [])
|
||||
if not sources:
|
||||
print("FEHLER: Seed-Datei enthaelt keine sources", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
print(f"DB: {args.db}")
|
||||
print(f"Seed: {seed_path} ({len(sources)} Eintraege)")
|
||||
print(f"Dry-Run: {args.dry_run}")
|
||||
print()
|
||||
|
||||
con = sqlite3.connect(args.db)
|
||||
con.row_factory = sqlite3.Row
|
||||
cur = con.cursor()
|
||||
|
||||
created: list[tuple[int, str]] = []
|
||||
skipped: list[tuple[int, str]] = []
|
||||
|
||||
for entry in sources:
|
||||
url = entry.get("url")
|
||||
name = entry.get("name", "?")
|
||||
if not url:
|
||||
skipped.append((-1, f"{name}: ohne url"))
|
||||
continue
|
||||
|
||||
row = cur.execute(EXISTS_SQL, (url,)).fetchone()
|
||||
if row is not None:
|
||||
skipped.append((row["id"], f"{name}: existiert bereits (id={row['id']})"))
|
||||
continue
|
||||
|
||||
params = (
|
||||
name,
|
||||
url,
|
||||
entry.get("domain"),
|
||||
entry.get("source_type", "rss_feed"),
|
||||
entry.get("category", "fachmedien"),
|
||||
entry.get("status", "active"),
|
||||
entry.get("notes"),
|
||||
entry.get("language"),
|
||||
entry.get("country_code"),
|
||||
entry.get("fetch_strategy", "default"),
|
||||
)
|
||||
|
||||
if args.dry_run:
|
||||
created.append((-1, name))
|
||||
continue
|
||||
|
||||
cur.execute(INSERT_SQL, params)
|
||||
created.append((cur.lastrowid, name))
|
||||
|
||||
if not args.dry_run:
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
print(f"Angelegt: {len(created)}")
|
||||
print(f"Uebersprungen:{len(skipped)}")
|
||||
print()
|
||||
if created:
|
||||
print("--- Neue IDs ---")
|
||||
for src_id, name in created:
|
||||
print(f" {src_id:>5} {name}")
|
||||
if skipped:
|
||||
print()
|
||||
print("--- Uebersprungen ---")
|
||||
for src_id, msg in skipped:
|
||||
print(f" {src_id:>5} {msg}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -8,6 +8,10 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
||||
# Gemeinsame Datenbank (gleiche wie OSINT-Monitor)
|
||||
DB_PATH = os.environ.get("DB_PATH", "/mnt/gitea/osint-data/osint.db")
|
||||
|
||||
# twscrape-Account-Store: die X-Login-Konten, mit denen der Monitor bei X
|
||||
# recherchiert. Geteilt mit dem Monitor (gleicher Pfad-Default).
|
||||
X_ACCOUNTS_DB_PATH = os.environ.get("X_ACCOUNTS_DB_PATH", "/home/claude-dev/.x-scraper/accounts.db")
|
||||
|
||||
# JWT (eigener Secret fuer Verwaltungsportal)
|
||||
JWT_SECRET = os.environ.get("PORTAL_JWT_SECRET")
|
||||
if not JWT_SECRET:
|
||||
@@ -42,7 +46,7 @@ PORTAL_MAGIC_LINK_EXPIRE_MINUTES = int(
|
||||
)
|
||||
|
||||
# Source Discovery (geteilte Config mit OSINT-Monitor)
|
||||
CLAUDE_PATH = os.environ.get("CLAUDE_PATH", "/home/claude-dev/.claude/local/claude")
|
||||
CLAUDE_PATH = os.environ.get("CLAUDE_PATH", "/usr/local/bin/claude")
|
||||
CLAUDE_TIMEOUT = 300
|
||||
MAX_FEEDS_PER_DOMAIN = 3
|
||||
CLAUDE_MODEL_FAST = "claude-haiku-4-5-20251001"
|
||||
|
||||
@@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from config import STATIC_DIR, PORT
|
||||
from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit
|
||||
from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit, translation, x_scraper
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -42,6 +42,8 @@ app.include_router(dashboard.router)
|
||||
app.include_router(sources.router)
|
||||
app.include_router(token_usage.router)
|
||||
app.include_router(audit.router)
|
||||
app.include_router(translation.router)
|
||||
app.include_router(x_scraper.router)
|
||||
|
||||
# --- Statische Dateien ---
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
||||
|
||||
@@ -25,11 +25,13 @@ class TokenResponse(BaseModel):
|
||||
class OrgCreate(BaseModel):
|
||||
name: str = Field(min_length=1, max_length=200)
|
||||
slug: str = Field(min_length=1, max_length=100, pattern="^[a-z0-9-]+$")
|
||||
output_language: str = Field(default="de", pattern="^(de|en)$")
|
||||
|
||||
|
||||
class OrgUpdate(BaseModel):
|
||||
name: Optional[str] = Field(default=None, max_length=200)
|
||||
is_active: Optional[bool] = None
|
||||
output_language: Optional[str] = Field(default=None, pattern="^(de|en)$")
|
||||
|
||||
|
||||
class OrgResponse(BaseModel):
|
||||
@@ -43,6 +45,7 @@ class OrgResponse(BaseModel):
|
||||
created_at: str
|
||||
globe_access: bool = False
|
||||
network_access: bool = False
|
||||
output_language: str = "de"
|
||||
|
||||
|
||||
class LicenseCreate(BaseModel):
|
||||
|
||||
@@ -25,6 +25,15 @@ async def _enrich_org(db: aiosqlite.Connection, row: aiosqlite.Row) -> dict:
|
||||
lic = await cursor.fetchone()
|
||||
org["license_status"] = lic["status"] if lic else "none"
|
||||
org["license_type"] = lic["license_type"] if lic else ""
|
||||
|
||||
# output_language aus organization_settings (Default 'de')
|
||||
cursor = await db.execute(
|
||||
"SELECT value FROM organization_settings WHERE organization_id = ? AND key = 'output_language'",
|
||||
(org["id"],),
|
||||
)
|
||||
lang_row = await cursor.fetchone()
|
||||
org["output_language"] = lang_row["value"] if lang_row else "de"
|
||||
|
||||
return org
|
||||
|
||||
|
||||
@@ -57,6 +66,10 @@ async def create_organization(
|
||||
org_id = cursor.lastrowid
|
||||
await db.commit()
|
||||
|
||||
# output_language als organization_settings-Eintrag persistieren
|
||||
from shared.services.org_settings import set_org_setting
|
||||
await set_org_setting(db, org_id, "output_language", data.output_language)
|
||||
|
||||
cursor = await db.execute("SELECT * FROM organizations WHERE id = ?", (org_id,))
|
||||
new_row_obj = await cursor.fetchone()
|
||||
await log_action(
|
||||
@@ -105,6 +118,11 @@ async def update_organization(
|
||||
await db.execute(f"UPDATE organizations SET {set_clause} WHERE id = ?", values)
|
||||
await db.commit()
|
||||
|
||||
# output_language separat ueber organization_settings setzen
|
||||
if data.output_language is not None:
|
||||
from shared.services.org_settings import set_org_setting
|
||||
await set_org_setting(db, org_id, "output_language", data.output_language)
|
||||
|
||||
after = await row_to_dict(db, "organizations", org_id)
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request),
|
||||
|
||||
@@ -1,30 +1,106 @@
|
||||
"""Grundquellen-Verwaltung und Kundenquellen-Übersicht."""
|
||||
import json
|
||||
import logging
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Request, UploadFile, status
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
import aiosqlite
|
||||
|
||||
from auth import get_current_admin
|
||||
from database import db_dependency
|
||||
from database import db_dependency, get_db
|
||||
from audit import log_action, get_client_ip
|
||||
from source_meta import get_meta
|
||||
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S
|
||||
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S, DB_PATH
|
||||
from shared.source_rules import (
|
||||
discover_source,
|
||||
discover_all_feeds,
|
||||
evaluate_feeds_with_claude,
|
||||
domain_to_display_name,
|
||||
)
|
||||
from shared.services.source_classifier import (
|
||||
bulk_classify,
|
||||
classify_source,
|
||||
ALIGNMENT_VALUES,
|
||||
POLITICAL_VALUES,
|
||||
MEDIA_TYPE_VALUES,
|
||||
RELIABILITY_VALUES,
|
||||
)
|
||||
from shared.services.external_reputation import (
|
||||
apply_reputation_overrides,
|
||||
sync_all as sync_external_reputation,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("verwaltung.sources")
|
||||
|
||||
router = APIRouter(prefix="/api/sources", tags=["sources"])
|
||||
|
||||
SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes", "language", "bias", "fetch_strategy"}
|
||||
SOURCE_UPDATE_COLUMNS = {
|
||||
"name", "url", "domain", "source_type", "category", "status", "notes",
|
||||
"language", "bias", "fetch_strategy",
|
||||
"political_orientation", "media_type", "reliability",
|
||||
"state_affiliated", "country_code",
|
||||
}
|
||||
SOURCE_CLASSIFICATION_FIELDS = {
|
||||
"political_orientation", "media_type", "reliability",
|
||||
"state_affiliated", "country_code",
|
||||
}
|
||||
|
||||
|
||||
async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int]) -> dict[int, list[str]]:
|
||||
if not source_ids:
|
||||
return {}
|
||||
placeholders = ",".join("?" for _ in source_ids)
|
||||
cursor = await db.execute(
|
||||
f"SELECT source_id, alignment FROM source_alignments WHERE source_id IN ({placeholders}) ORDER BY alignment",
|
||||
source_ids,
|
||||
)
|
||||
out: dict[int, list[str]] = {sid: [] for sid in source_ids}
|
||||
for row in await cursor.fetchall():
|
||||
out.setdefault(row["source_id"], []).append(row["alignment"])
|
||||
return out
|
||||
|
||||
|
||||
async def _replace_alignments(db: aiosqlite.Connection, source_id: int, alignments: list[str]):
|
||||
"""Ersetzt die alignments-Liste einer Quelle (DELETE + INSERT) — Aufrufer muss commit() machen."""
|
||||
await db.execute("DELETE FROM source_alignments WHERE source_id = ?", (source_id,))
|
||||
seen: set[str] = set()
|
||||
for raw in alignments:
|
||||
a = (raw or "").strip().lower()
|
||||
if not a or a in seen:
|
||||
continue
|
||||
if a not in ALIGNMENT_VALUES:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
detail=f"Ungueltiger alignment-Wert: '{a}'",
|
||||
)
|
||||
seen.add(a)
|
||||
await db.execute(
|
||||
"INSERT INTO source_alignments (source_id, alignment) VALUES (?, ?)",
|
||||
(source_id, a),
|
||||
)
|
||||
|
||||
|
||||
async def _clear_proposed(db: aiosqlite.Connection, source_id: int):
|
||||
await db.execute(
|
||||
"""UPDATE sources SET
|
||||
proposed_political_orientation = NULL,
|
||||
proposed_media_type = NULL,
|
||||
proposed_reliability = NULL,
|
||||
proposed_state_affiliated = NULL,
|
||||
proposed_country_code = NULL,
|
||||
proposed_alignments_json = NULL,
|
||||
proposed_confidence = NULL,
|
||||
proposed_reasoning = NULL,
|
||||
proposed_at = NULL
|
||||
WHERE id = ?""",
|
||||
(source_id,),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/meta")
|
||||
@@ -42,7 +118,7 @@ class GlobalSourceCreate(BaseModel):
|
||||
name: str = Field(min_length=1, max_length=200)
|
||||
url: Optional[str] = None
|
||||
domain: Optional[str] = None
|
||||
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
|
||||
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$")
|
||||
category: str = Field(default="sonstige")
|
||||
status: str = Field(default="active", pattern="^(active|inactive)$")
|
||||
notes: Optional[str] = None
|
||||
@@ -55,12 +131,18 @@ class GlobalSourceUpdate(BaseModel):
|
||||
name: Optional[str] = Field(default=None, max_length=200)
|
||||
url: Optional[str] = None
|
||||
domain: Optional[str] = None
|
||||
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
|
||||
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$")
|
||||
category: Optional[str] = None
|
||||
status: Optional[str] = Field(default=None, pattern="^(active|inactive)$")
|
||||
notes: Optional[str] = None
|
||||
language: Optional[str] = Field(default=None, max_length=100)
|
||||
bias: Optional[str] = Field(default=None, max_length=500)
|
||||
political_orientation: Optional[str] = None
|
||||
media_type: Optional[str] = None
|
||||
reliability: Optional[str] = None
|
||||
state_affiliated: Optional[bool] = None
|
||||
country_code: Optional[str] = Field(default=None, max_length=8)
|
||||
alignments: Optional[list[str]] = None
|
||||
|
||||
|
||||
@router.get("/global")
|
||||
@@ -120,7 +202,11 @@ async def list_global_sources(
|
||||
WHERE s.tenant_id IS NULL
|
||||
ORDER BY s.category, s.source_type, s.name
|
||||
""")
|
||||
return [dict(row) for row in await cursor.fetchall()]
|
||||
rows = [dict(row) for row in await cursor.fetchall()]
|
||||
alignments_map = await _load_alignments_for(db, [r["id"] for r in rows])
|
||||
for r in rows:
|
||||
r["alignments"] = alignments_map.get(r["id"], [])
|
||||
return rows
|
||||
|
||||
|
||||
@router.post("/global", status_code=201)
|
||||
@@ -170,7 +256,7 @@ async def update_global_source(
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Grundquelle bearbeiten."""
|
||||
"""Grundquelle bearbeiten — inkl. Klassifikation + alignments."""
|
||||
cursor = await db.execute(
|
||||
"SELECT * FROM sources WHERE id = ? AND tenant_id IS NULL", (source_id,)
|
||||
)
|
||||
@@ -178,21 +264,50 @@ async def update_global_source(
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Grundquelle nicht gefunden")
|
||||
before = dict(row)
|
||||
before_alignments = sorted((await _load_alignments_for(db, [source_id])).get(source_id, []))
|
||||
|
||||
updates = {}
|
||||
for field, value in data.model_dump(exclude_none=True).items():
|
||||
updates[field] = value
|
||||
payload = data.model_dump(exclude_none=True)
|
||||
alignments = payload.pop("alignments", None)
|
||||
|
||||
if not updates:
|
||||
return before
|
||||
if "political_orientation" in payload and payload["political_orientation"] not in POLITICAL_VALUES:
|
||||
raise HTTPException(status_code=422, detail=f"Ungueltige political_orientation: {payload['political_orientation']}")
|
||||
if "media_type" in payload and payload["media_type"] not in MEDIA_TYPE_VALUES:
|
||||
raise HTTPException(status_code=422, detail=f"Ungueltiger media_type: {payload['media_type']}")
|
||||
if "reliability" in payload and payload["reliability"] not in RELIABILITY_VALUES:
|
||||
raise HTTPException(status_code=422, detail=f"Ungueltige reliability: {payload['reliability']}")
|
||||
|
||||
updates = {k: v for k, v in payload.items() if k in SOURCE_UPDATE_COLUMNS}
|
||||
if "state_affiliated" in updates:
|
||||
updates["state_affiliated"] = 1 if updates["state_affiliated"] else 0
|
||||
|
||||
classification_touched = any(k in updates for k in SOURCE_CLASSIFICATION_FIELDS) or alignments is not None
|
||||
if classification_touched:
|
||||
updates["classification_source"] = "manual"
|
||||
updates["classified_at"] = None # CURRENT_TIMESTAMP via SQL — siehe unten
|
||||
|
||||
if updates:
|
||||
sets = []
|
||||
vals = []
|
||||
for k, v in updates.items():
|
||||
if k == "classified_at":
|
||||
sets.append("classified_at = CURRENT_TIMESTAMP")
|
||||
else:
|
||||
sets.append(f"{k} = ?")
|
||||
vals.append(v)
|
||||
vals.append(source_id)
|
||||
await db.execute(f"UPDATE sources SET {', '.join(sets)} WHERE id = ?", vals)
|
||||
|
||||
if alignments is not None:
|
||||
await _replace_alignments(db, source_id, alignments)
|
||||
|
||||
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
||||
values = list(updates.values()) + [source_id]
|
||||
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
after = dict(await cursor.fetchone())
|
||||
after_alignments = sorted((await _load_alignments_for(db, [source_id])).get(source_id, []))
|
||||
if before_alignments != after_alignments:
|
||||
before["alignments"] = before_alignments
|
||||
after["alignments"] = after_alignments
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request),
|
||||
action="update", resource_type="source", resource_id=source_id,
|
||||
@@ -1086,3 +1201,420 @@ Nur das JSON, kein anderer Text."""
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Recherche fehlgeschlagen: {e}")
|
||||
|
||||
|
||||
# === Klassifikations-Review (LLM-Vorschlaege approve/reject/reclassify) ===
|
||||
|
||||
|
||||
@router.get("/classification/stats")
|
||||
async def classification_stats(
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Counts pro classification_source-Wert + Anzahl Pending-Reviews (alle Quellen)."""
|
||||
cursor = await db.execute(
|
||||
"""SELECT classification_source, COUNT(*) as cnt
|
||||
FROM sources
|
||||
WHERE status = 'active'
|
||||
GROUP BY classification_source"""
|
||||
)
|
||||
by_source = {row["classification_source"] or "legacy": row["cnt"] for row in await cursor.fetchall()}
|
||||
cursor = await db.execute(
|
||||
"""SELECT COUNT(*) as cnt FROM sources
|
||||
WHERE status = 'active' AND proposed_political_orientation IS NOT NULL"""
|
||||
)
|
||||
pending = (await cursor.fetchone())["cnt"]
|
||||
return {
|
||||
"by_classification_source": by_source,
|
||||
"pending_review": pending,
|
||||
"total": sum(by_source.values()),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/classification/queue")
|
||||
async def classification_queue(
|
||||
limit: int = 50,
|
||||
min_confidence: float = 0.0,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Liefert Quellen mit nicht-leeren proposed_*-Spalten (Review-Queue)."""
|
||||
cursor = await db.execute(
|
||||
"""SELECT s.* FROM sources s
|
||||
WHERE s.proposed_political_orientation IS NOT NULL
|
||||
AND COALESCE(s.proposed_confidence, 0) >= ?
|
||||
ORDER BY s.proposed_confidence DESC, s.proposed_at DESC
|
||||
LIMIT ?""",
|
||||
(min_confidence, limit),
|
||||
)
|
||||
rows = [dict(r) for r in await cursor.fetchall()]
|
||||
alignments_map = await _load_alignments_for(db, [r["id"] for r in rows])
|
||||
out = []
|
||||
for d in rows:
|
||||
try:
|
||||
proposed_aligns = json.loads(d.get("proposed_alignments_json") or "[]")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
proposed_aligns = []
|
||||
out.append({
|
||||
"id": d["id"],
|
||||
"name": d["name"],
|
||||
"url": d.get("url"),
|
||||
"domain": d.get("domain"),
|
||||
"source_type": d.get("source_type"),
|
||||
"category": d.get("category"),
|
||||
"is_global": d.get("tenant_id") is None,
|
||||
"current": {
|
||||
"political_orientation": d.get("political_orientation"),
|
||||
"media_type": d.get("media_type"),
|
||||
"reliability": d.get("reliability"),
|
||||
"state_affiliated": bool(d.get("state_affiliated")),
|
||||
"country_code": d.get("country_code"),
|
||||
"alignments": alignments_map.get(d["id"], []),
|
||||
"classification_source": d.get("classification_source"),
|
||||
},
|
||||
"proposed": {
|
||||
"political_orientation": d.get("proposed_political_orientation"),
|
||||
"media_type": d.get("proposed_media_type"),
|
||||
"reliability": d.get("proposed_reliability"),
|
||||
"state_affiliated": bool(d.get("proposed_state_affiliated")),
|
||||
"country_code": d.get("proposed_country_code"),
|
||||
"alignments": proposed_aligns,
|
||||
"confidence": d.get("proposed_confidence"),
|
||||
"reasoning": d.get("proposed_reasoning"),
|
||||
"proposed_at": d.get("proposed_at"),
|
||||
},
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
@router.post("/{source_id}/classification/approve")
|
||||
async def approve_classification(
|
||||
source_id: int,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Uebernimmt proposed_* in echte Felder, setzt classification_source='llm_approved'."""
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||
src = dict(row)
|
||||
before_alignments = sorted((await _load_alignments_for(db, [source_id])).get(source_id, []))
|
||||
before = {**src, "alignments": before_alignments}
|
||||
|
||||
if src.get("proposed_political_orientation") is None:
|
||||
raise HTTPException(status_code=400, detail="Keine LLM-Vorschlaege fuer diese Quelle vorhanden")
|
||||
|
||||
try:
|
||||
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
proposed_aligns = []
|
||||
|
||||
await db.execute(
|
||||
"""UPDATE sources SET
|
||||
political_orientation = ?,
|
||||
media_type = ?,
|
||||
reliability = ?,
|
||||
state_affiliated = ?,
|
||||
country_code = ?,
|
||||
classification_source = 'llm_approved',
|
||||
classified_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?""",
|
||||
(
|
||||
src["proposed_political_orientation"],
|
||||
src["proposed_media_type"],
|
||||
src["proposed_reliability"],
|
||||
1 if src.get("proposed_state_affiliated") else 0,
|
||||
src.get("proposed_country_code"),
|
||||
source_id,
|
||||
),
|
||||
)
|
||||
await _replace_alignments(db, source_id, [a for a in proposed_aligns if a in ALIGNMENT_VALUES])
|
||||
await _clear_proposed(db, source_id)
|
||||
await db.commit()
|
||||
|
||||
try:
|
||||
await apply_reputation_overrides(db, source_id)
|
||||
except Exception as e:
|
||||
logger.warning("Reputation-Override fuer source_id=%s fehlgeschlagen: %s", source_id, e)
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
after_row = dict(await cursor.fetchone())
|
||||
after_alignments = sorted((await _load_alignments_for(db, [source_id])).get(source_id, []))
|
||||
after = {**after_row, "alignments": after_alignments}
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request),
|
||||
action="update", resource_type="source", resource_id=source_id,
|
||||
before=before, after=after,
|
||||
)
|
||||
return {"source_id": source_id, "status": "approved"}
|
||||
|
||||
|
||||
@router.post("/{source_id}/classification/reject")
|
||||
async def reject_classification(
|
||||
source_id: int,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Verwirft die LLM-Vorschlaege ohne Uebernahme. classification_source: 'llm_pending' -> 'legacy'."""
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||
src = dict(row)
|
||||
before = dict(src)
|
||||
|
||||
await _clear_proposed(db, source_id)
|
||||
if src.get("classification_source") == "llm_pending":
|
||||
await db.execute(
|
||||
"UPDATE sources SET classification_source = 'legacy' WHERE id = ?",
|
||||
(source_id,),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
after = dict(await cursor.fetchone())
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request),
|
||||
action="update", resource_type="source", resource_id=source_id,
|
||||
before=before, after=after,
|
||||
)
|
||||
return {"source_id": source_id, "status": "rejected"}
|
||||
|
||||
|
||||
@router.post("/{source_id}/classification/reclassify")
|
||||
async def reclassify_source(
|
||||
source_id: int,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Triggert eine LLM-Klassifikation einer einzelnen Quelle (synchron, ~3-5s)."""
|
||||
cursor = await db.execute("SELECT id FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||
try:
|
||||
result = await classify_source(db, source_id)
|
||||
except Exception as e:
|
||||
logger.error("Reclassify source_id=%s fehlgeschlagen: %s", source_id, e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Klassifikation fehlgeschlagen: {e}")
|
||||
return result
|
||||
|
||||
|
||||
async def _bulk_classify_background(limit: int, only_unclassified: bool):
|
||||
"""Hintergrund-Task: oeffnet eigene DB-Connection."""
|
||||
db = await get_db()
|
||||
try:
|
||||
await bulk_classify(db, limit=limit, only_unclassified=only_unclassified)
|
||||
finally:
|
||||
await db.close()
|
||||
|
||||
|
||||
@router.post("/classification/bulk-classify")
|
||||
async def trigger_bulk_classify(
|
||||
background_tasks: BackgroundTasks,
|
||||
limit: int = 50,
|
||||
only_unclassified: bool = True,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
):
|
||||
"""Startet eine Bulk-Klassifikation im Hintergrund."""
|
||||
if limit < 1 or limit > 500:
|
||||
raise HTTPException(status_code=400, detail="limit muss zwischen 1 und 500 liegen")
|
||||
background_tasks.add_task(_bulk_classify_background, limit, only_unclassified)
|
||||
return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
|
||||
|
||||
|
||||
@router.post("/external-reputation/sync")
|
||||
async def trigger_external_reputation_sync(
|
||||
background_tasks: BackgroundTasks,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
):
|
||||
"""Startet Sync von IFCN- und EUvsDisinfo-Daten (Hintergrund)."""
|
||||
async def _bg():
|
||||
db = await get_db()
|
||||
try:
|
||||
await sync_external_reputation(db)
|
||||
finally:
|
||||
await db.close()
|
||||
|
||||
background_tasks.add_task(_bg)
|
||||
return {"status": "started"}
|
||||
|
||||
|
||||
@router.post("/classification/bulk-approve")
|
||||
async def bulk_approve_classifications(
|
||||
request: Request,
|
||||
min_confidence: float = 0.85,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Genehmigt alle Pending-Vorschlaege ueber dem confidence-Schwellwert."""
|
||||
cursor = await db.execute(
|
||||
"""SELECT id, proposed_political_orientation, proposed_media_type,
|
||||
proposed_reliability, proposed_state_affiliated,
|
||||
proposed_country_code, proposed_alignments_json
|
||||
FROM sources
|
||||
WHERE proposed_political_orientation IS NOT NULL
|
||||
AND COALESCE(proposed_confidence, 0) >= ?""",
|
||||
(min_confidence,),
|
||||
)
|
||||
rows = [dict(r) for r in await cursor.fetchall()]
|
||||
approved_ids: list[int] = []
|
||||
for src in rows:
|
||||
try:
|
||||
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
proposed_aligns = []
|
||||
await db.execute(
|
||||
"""UPDATE sources SET
|
||||
political_orientation = ?,
|
||||
media_type = ?,
|
||||
reliability = ?,
|
||||
state_affiliated = ?,
|
||||
country_code = ?,
|
||||
classification_source = 'llm_approved',
|
||||
classified_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?""",
|
||||
(
|
||||
src["proposed_political_orientation"],
|
||||
src["proposed_media_type"],
|
||||
src["proposed_reliability"],
|
||||
1 if src.get("proposed_state_affiliated") else 0,
|
||||
src.get("proposed_country_code"),
|
||||
src["id"],
|
||||
),
|
||||
)
|
||||
await _replace_alignments(
|
||||
db, src["id"], [a for a in proposed_aligns if a in ALIGNMENT_VALUES]
|
||||
)
|
||||
await _clear_proposed(db, src["id"])
|
||||
approved_ids.append(src["id"])
|
||||
await db.commit()
|
||||
|
||||
try:
|
||||
for sid in approved_ids:
|
||||
await apply_reputation_overrides(db, sid)
|
||||
except Exception as e:
|
||||
logger.warning("Bulk Reputation-Override fehlgeschlagen: %s", e)
|
||||
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request),
|
||||
action="update", resource_type="source", resource_id=None,
|
||||
after={"bulk_approved_ids": approved_ids, "min_confidence": min_confidence},
|
||||
)
|
||||
return {"approved": len(approved_ids), "ids": approved_ids}
|
||||
|
||||
|
||||
# --- PDF-Upload (Quelle vom Typ pdf_document) ---
|
||||
# Speicherort relativ zur DB: <dirname(DB_PATH)>/pdfs/{sha256}.pdf
|
||||
# Der Monitor pollt pdf_document-Quellen mit processed_at IS NULL und
|
||||
# extrahiert Text + Uebersetzungen (DE/EN). Dieser Endpoint legt nur die
|
||||
# Datei + den Source-Eintrag an (kein LLM-Call hier).
|
||||
|
||||
MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB
|
||||
PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs")
|
||||
|
||||
|
||||
def _pdf_dir() -> str:
|
||||
os.makedirs(PDF_DIR, exist_ok=True)
|
||||
return PDF_DIR
|
||||
|
||||
|
||||
@router.post("/global/upload-pdf", status_code=201)
|
||||
async def upload_pdf_source(
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
file: UploadFile = File(...),
|
||||
name: Optional[str] = Form(None),
|
||||
category: str = Form("sonstige"),
|
||||
language: Optional[str] = Form(None),
|
||||
notes: Optional[str] = Form(None),
|
||||
):
|
||||
"""PDF hochladen + als Grundquelle (source_type=pdf_document) registrieren.
|
||||
|
||||
Idempotent ueber SHA256: bestehender Eintrag wird zurueckgegeben (409 mit
|
||||
Detail), die Datei wird nicht erneut gespeichert.
|
||||
"""
|
||||
# Magic-Bytes-Check (PDF beginnt mit %PDF-)
|
||||
head = await file.read(8)
|
||||
if not head.startswith(b"%PDF-"):
|
||||
raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF (Magic-Bytes fehlen)")
|
||||
|
||||
# Datei streaming in Temp lesen + sha256 berechnen + Groesse pruefen
|
||||
sha = hashlib.sha256()
|
||||
sha.update(head)
|
||||
total = len(head)
|
||||
tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp")
|
||||
try:
|
||||
with open(tmp_path, "wb") as out:
|
||||
out.write(head)
|
||||
while True:
|
||||
chunk = await file.read(1024 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if total > MAX_PDF_SIZE_BYTES:
|
||||
raise HTTPException(status_code=413, detail=f"PDF ueberschreitet Maximum von {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB")
|
||||
sha.update(chunk)
|
||||
out.write(chunk)
|
||||
sha_hex = sha.hexdigest()
|
||||
final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf")
|
||||
rel_path = os.path.join("pdfs", f"{sha_hex}.pdf")
|
||||
|
||||
# Duplikat-Check ueber sha256
|
||||
cursor = await db.execute(
|
||||
"SELECT id, name FROM sources WHERE pdf_sha256 = ? AND tenant_id IS NULL",
|
||||
(sha_hex,),
|
||||
)
|
||||
existing = await cursor.fetchone()
|
||||
if existing:
|
||||
# Datei wegwerfen, bestehende Quelle zurueckgeben
|
||||
os.unlink(tmp_path)
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"PDF bereits hochgeladen als Quelle '{existing['name']}' (id={existing['id']})",
|
||||
)
|
||||
|
||||
# Atomar umbenennen
|
||||
if not os.path.exists(final_path):
|
||||
os.replace(tmp_path, final_path)
|
||||
else:
|
||||
# Datei mit gleichem sha existiert physisch, aber keine Source -> wiederverwenden
|
||||
os.unlink(tmp_path)
|
||||
except HTTPException:
|
||||
if os.path.exists(tmp_path):
|
||||
try: os.unlink(tmp_path)
|
||||
except OSError: pass
|
||||
raise
|
||||
except Exception as e:
|
||||
if os.path.exists(tmp_path):
|
||||
try: os.unlink(tmp_path)
|
||||
except OSError: pass
|
||||
logger.exception("PDF-Upload fehlgeschlagen")
|
||||
raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}")
|
||||
|
||||
# Name herleiten falls nicht angegeben
|
||||
display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I)
|
||||
display_name = display_name[:200]
|
||||
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources
|
||||
(name, url, domain, source_type, category, status, notes, language,
|
||||
pdf_path, pdf_sha256, added_by, tenant_id)
|
||||
VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, NULL)""",
|
||||
(display_name, category, notes, language, rel_path, sha_hex, admin.get("email") or "system"),
|
||||
)
|
||||
src_id = cursor.lastrowid
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,))
|
||||
new_src = dict(await cursor.fetchone())
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request),
|
||||
action="upload_pdf", resource_type="source", resource_id=src_id,
|
||||
after={"name": display_name, "pdf_sha256": sha_hex, "size_bytes": total},
|
||||
)
|
||||
return new_src
|
||||
|
||||
222
src/routers/translation.py
Normale Datei
222
src/routers/translation.py
Normale Datei
@@ -0,0 +1,222 @@
|
||||
"""Manuelle Artikel-Übersetzung.
|
||||
|
||||
Stößt die Haiku-Übersetzung fremdsprachiger Artikel an, die noch keine
|
||||
deutsche Fassung haben. Im Monitor läuft der Translator seit 2026-05-22 NICHT
|
||||
mehr automatisch (TRANSLATOR_ENABLED=false), weil ein sehr großer Lauf den
|
||||
Refresh-Worker blockierte. Dieser Endpoint ist der bewusste manuelle Ersatz:
|
||||
er läuft als entkoppelter Hintergrund-Task, blockiert keinen Request und ist
|
||||
jederzeit abbrechbar.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
|
||||
from auth import get_current_admin
|
||||
from audit import log_action, get_client_ip
|
||||
from database import get_db
|
||||
from translation_agent import translate_articles_batch
|
||||
|
||||
logger = logging.getLogger("verwaltung.translation")
|
||||
router = APIRouter(prefix="/api/translation", tags=["Translation"])
|
||||
|
||||
# Batch-Größe wie im Translator-Agent (durch das Haiku-Output-Limit bestimmt).
|
||||
_BATCH_SIZE = 5
|
||||
|
||||
# Grobe Schätzwerte aus Produktiv-Logs (Haiku, 5 Artikel/Batch):
|
||||
# rund 17 s und rund $0.03 pro Batch.
|
||||
_SECONDS_PER_ARTICLE = 3.5
|
||||
_COST_PER_ARTICLE = 0.006
|
||||
|
||||
# Artikel ohne deutsche Fassung: fremdsprachig (language gesetzt und != de)
|
||||
# und headline_de ODER content_de fehlt.
|
||||
_PENDING_WHERE = (
|
||||
"language IS NOT NULL AND LOWER(language) != 'de' "
|
||||
"AND (headline_de IS NULL OR headline_de = '' "
|
||||
"OR content_de IS NULL OR content_de = '')"
|
||||
)
|
||||
|
||||
# Modul-globaler Job-Status. Es gibt bewusst nur EINEN Übersetzungs-Job
|
||||
# gleichzeitig, das hält Claude-Last und DB-Schreiblast kalkulierbar.
|
||||
_job: dict = {
|
||||
"running": False,
|
||||
"started_at": None,
|
||||
"finished_at": None,
|
||||
"total": 0,
|
||||
"done": 0,
|
||||
"translated": 0,
|
||||
"failed_batches": 0,
|
||||
"cancelled": False,
|
||||
"error": None,
|
||||
"started_by": None,
|
||||
}
|
||||
_job_lock = asyncio.Lock()
|
||||
_cancel_event = asyncio.Event()
|
||||
# Referenz auf den laufenden Task halten, damit der Garbage Collector ihn
|
||||
# nicht vorzeitig einsammelt.
|
||||
_job_task: asyncio.Task | None = None
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
async def _count_pending(db) -> int:
|
||||
cursor = await db.execute(
|
||||
f"SELECT COUNT(*) FROM articles WHERE {_PENDING_WHERE}"
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
|
||||
async def _run_translation_job(started_by: str):
|
||||
"""Hintergrund-Task: übersetzt alle ausstehenden Artikel batchweise.
|
||||
|
||||
Schreibt nach jedem Batch in die DB zurück und aktualisiert den
|
||||
Fortschritt, damit das Frontend live mitlesen kann. Bricht zwischen den
|
||||
Batches ab, sobald _cancel_event gesetzt ist.
|
||||
"""
|
||||
db = await get_db()
|
||||
try:
|
||||
# Großzügiger Lock-Timeout, weil der Monitor parallel in dieselbe
|
||||
# geteilte DB schreiben kann (WAL erlaubt nur einen Writer).
|
||||
await db.execute("PRAGMA busy_timeout=30000")
|
||||
cursor = await db.execute(
|
||||
f"SELECT id, headline, content_original, language "
|
||||
f"FROM articles WHERE {_PENDING_WHERE} ORDER BY id DESC"
|
||||
)
|
||||
articles = [dict(r) for r in await cursor.fetchall()]
|
||||
_job["total"] = len(articles)
|
||||
logger.info(
|
||||
"Übersetzungs-Job gestartet von %s: %d Artikel",
|
||||
started_by, len(articles),
|
||||
)
|
||||
|
||||
for i in range(0, len(articles), _BATCH_SIZE):
|
||||
if _cancel_event.is_set():
|
||||
_job["cancelled"] = True
|
||||
logger.info(
|
||||
"Übersetzungs-Job abgebrochen bei %d/%d",
|
||||
_job["done"], _job["total"],
|
||||
)
|
||||
break
|
||||
batch = articles[i : i + _BATCH_SIZE]
|
||||
try:
|
||||
translations, _usage = await translate_articles_batch(batch, "de")
|
||||
except Exception as e: # pragma: no cover - defensiv
|
||||
_job["failed_batches"] += 1
|
||||
logger.error("Übersetzungs-Batch fehlgeschlagen: %s", e)
|
||||
_job["done"] = min(i + _BATCH_SIZE, len(articles))
|
||||
continue
|
||||
for t in translations:
|
||||
hd = t.get("headline_de")
|
||||
cd = t.get("content_de")
|
||||
if hd or cd:
|
||||
await db.execute(
|
||||
"UPDATE articles SET "
|
||||
"headline_de = COALESCE(?, headline_de), "
|
||||
"content_de = COALESCE(?, content_de) WHERE id = ?",
|
||||
(hd, cd, t["id"]),
|
||||
)
|
||||
_job["translated"] += 1
|
||||
await db.commit()
|
||||
_job["done"] = min(i + _BATCH_SIZE, len(articles))
|
||||
|
||||
logger.info(
|
||||
"Übersetzungs-Job beendet: %d/%d übersetzt, %d Batch-Fehler, abgebrochen=%s",
|
||||
_job["translated"], _job["total"], _job["failed_batches"],
|
||||
_job["cancelled"],
|
||||
)
|
||||
except Exception as e:
|
||||
_job["error"] = str(e)
|
||||
logger.error(
|
||||
"Übersetzungs-Job mit Fehler beendet: %s", e, exc_info=True
|
||||
)
|
||||
finally:
|
||||
_job["running"] = False
|
||||
_job["finished_at"] = _now_iso()
|
||||
await db.close()
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def translation_status(admin=Depends(get_current_admin)):
|
||||
"""Aktueller Job-Status plus Anzahl noch nicht übersetzter Artikel."""
|
||||
db = await get_db()
|
||||
try:
|
||||
pending = await _count_pending(db)
|
||||
finally:
|
||||
await db.close()
|
||||
snap = dict(_job)
|
||||
snap["pending"] = pending
|
||||
snap["estimate"] = {
|
||||
"seconds": round(pending * _SECONDS_PER_ARTICLE),
|
||||
"cost_usd": round(pending * _COST_PER_ARTICLE, 2),
|
||||
}
|
||||
return snap
|
||||
|
||||
|
||||
@router.post("/run")
|
||||
async def translation_run(request: Request, admin=Depends(get_current_admin)):
|
||||
"""Startet die Übersetzung aller ausstehenden Artikel als Hintergrund-Task."""
|
||||
global _job_task
|
||||
async with _job_lock:
|
||||
if _job["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409, detail="Es läuft bereits eine Übersetzung."
|
||||
)
|
||||
|
||||
db = await get_db()
|
||||
try:
|
||||
pending = await _count_pending(db)
|
||||
if pending == 0:
|
||||
return {"status": "nothing_to_do", "pending": 0}
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), "translation.run",
|
||||
resource_type="articles", after={"pending": pending},
|
||||
)
|
||||
finally:
|
||||
await db.close()
|
||||
|
||||
started_by = (
|
||||
admin.get("email") or admin.get("username") or str(admin.get("id"))
|
||||
)
|
||||
# Job-Status zurücksetzen und Task entkoppelt starten.
|
||||
_cancel_event.clear()
|
||||
_job.update({
|
||||
"running": True,
|
||||
"started_at": _now_iso(),
|
||||
"finished_at": None,
|
||||
"total": pending,
|
||||
"done": 0,
|
||||
"translated": 0,
|
||||
"failed_batches": 0,
|
||||
"cancelled": False,
|
||||
"error": None,
|
||||
"started_by": started_by,
|
||||
})
|
||||
_job_task = asyncio.create_task(_run_translation_job(started_by))
|
||||
|
||||
logger.info(
|
||||
"Übersetzung manuell gestartet von %s (%d Artikel)", started_by, pending
|
||||
)
|
||||
return {"status": "started", "pending": pending}
|
||||
|
||||
|
||||
@router.post("/cancel")
|
||||
async def translation_cancel(request: Request, admin=Depends(get_current_admin)):
|
||||
"""Bricht einen laufenden Übersetzungs-Job nach dem aktuellen Batch ab."""
|
||||
if not _job["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409, detail="Es läuft keine Übersetzung."
|
||||
)
|
||||
_cancel_event.set()
|
||||
db = await get_db()
|
||||
try:
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), "translation.cancel",
|
||||
resource_type="articles",
|
||||
)
|
||||
finally:
|
||||
await db.close()
|
||||
return {"status": "cancelling"}
|
||||
224
src/routers/x_scraper.py
Normale Datei
224
src/routers/x_scraper.py
Normale Datei
@@ -0,0 +1,224 @@
|
||||
"""X-Scraper-Konten: Verwaltung des twscrape-Account-Pools.
|
||||
|
||||
Das sind die X-Login-Konten, mit denen der Monitor bei X recherchiert
|
||||
(scrapen). Sie liegen im twscrape-Account-Store (config.X_ACCOUNTS_DB_PATH),
|
||||
nicht in der Verwaltungs-Datenbank. twscrape wird lazy importiert, damit das
|
||||
Portal auch ohne installiertes twscrape startet.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
import aiosqlite
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import get_current_admin
|
||||
from audit import log_action, get_client_ip
|
||||
from config import X_ACCOUNTS_DB_PATH
|
||||
from database import db_dependency
|
||||
|
||||
logger = logging.getLogger("verwaltung.x_scraper")
|
||||
|
||||
router = APIRouter(prefix="/api/x-scraper", tags=["x-scraper"])
|
||||
|
||||
|
||||
def _get_pool():
|
||||
"""twscrape-AccountsPool oeffnen. Wirft HTTPException wenn nicht verfuegbar."""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(X_ACCOUNTS_DB_PATH), exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from twscrape import API
|
||||
except ImportError:
|
||||
raise HTTPException(status_code=503, detail="twscrape ist nicht installiert")
|
||||
return API(X_ACCOUNTS_DB_PATH).pool
|
||||
|
||||
|
||||
def _summary(acc) -> dict:
|
||||
"""Account-Objekt auf ein anzeigbares Dict reduzieren -- ohne Geheimnisse."""
|
||||
now = datetime.now(timezone.utc)
|
||||
locked = False
|
||||
locked_until = None
|
||||
for ts in (acc.locks or {}).values():
|
||||
if ts and ts > now:
|
||||
locked = True
|
||||
if locked_until is None or ts > locked_until:
|
||||
locked_until = ts
|
||||
return {
|
||||
"username": acc.username,
|
||||
"email": acc.email if acc.email and acc.email != "_" else None,
|
||||
"active": bool(acc.active),
|
||||
"locked": locked,
|
||||
"locked_until": locked_until.isoformat() if locked_until else None,
|
||||
"has_cookies": bool(acc.cookies),
|
||||
"total_requests": sum((acc.stats or {}).values()),
|
||||
"last_used": acc.last_used.isoformat() if acc.last_used else None,
|
||||
"error_msg": acc.error_msg or None,
|
||||
}
|
||||
|
||||
|
||||
class XScraperCreate(BaseModel):
|
||||
username: str = Field(min_length=1, max_length=100)
|
||||
password: str = Field(default="", max_length=200)
|
||||
email: str = Field(default="", max_length=200)
|
||||
email_password: str = Field(default="", max_length=200)
|
||||
cookies: str = Field(min_length=1, max_length=4000)
|
||||
|
||||
|
||||
class XScraperCookies(BaseModel):
|
||||
cookies: str = Field(min_length=1, max_length=4000)
|
||||
|
||||
|
||||
class XScraperActive(BaseModel):
|
||||
active: bool
|
||||
|
||||
|
||||
@router.get("/accounts")
|
||||
async def list_accounts(admin: dict = Depends(get_current_admin)):
|
||||
"""Alle X-Scraper-Konten auflisten (ohne Passwoerter/Cookies)."""
|
||||
pool = _get_pool()
|
||||
try:
|
||||
accounts = await pool.get_all()
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper get_all fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Konten konnten nicht geladen werden")
|
||||
return [_summary(a) for a in accounts]
|
||||
|
||||
|
||||
@router.post("/accounts", status_code=201)
|
||||
async def add_account(
|
||||
data: XScraperCreate,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Neues X-Scraper-Konto anlegen."""
|
||||
pool = _get_pool()
|
||||
username = data.username.strip().lstrip("@")
|
||||
if not username:
|
||||
raise HTTPException(status_code=422, detail="Benutzername ist erforderlich")
|
||||
if await pool.get_account(username) is not None:
|
||||
raise HTTPException(status_code=409, detail=f"Konto '{username}' existiert bereits")
|
||||
try:
|
||||
await pool.add_account(
|
||||
username=username,
|
||||
password=data.password or "_",
|
||||
email=data.email or "_",
|
||||
email_password=data.email_password or "_",
|
||||
cookies=data.cookies.strip(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper add_account fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Konto konnte nicht angelegt werden")
|
||||
acc = await pool.get_account(username)
|
||||
if acc is None:
|
||||
raise HTTPException(status_code=500, detail="Konto wurde nicht gespeichert, bitte Cookies pruefen")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="create",
|
||||
resource_type="x_scraper_account", after={"username": username, "email": data.email},
|
||||
)
|
||||
return _summary(acc)
|
||||
|
||||
|
||||
@router.post("/accounts/{username}/cookies")
|
||||
async def refresh_cookies(
|
||||
username: str,
|
||||
data: XScraperCookies,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Cookies eines bestehenden Kontos erneuern (Login auffrischen)."""
|
||||
pool = _get_pool()
|
||||
acc = await pool.get_account(username)
|
||||
if acc is None:
|
||||
raise HTTPException(status_code=404, detail="Konto nicht gefunden")
|
||||
# twscrape hat keine Update-Methode -- Konto mit frischen Cookies neu anlegen.
|
||||
pw, em, emp = acc.password, acc.email, acc.email_password
|
||||
try:
|
||||
await pool.delete_accounts([username])
|
||||
await pool.add_account(
|
||||
username=username, password=pw, email=em,
|
||||
email_password=emp, cookies=data.cookies.strip(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper Cookie-Refresh fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Cookies konnten nicht erneuert werden")
|
||||
acc = await pool.get_account(username)
|
||||
if acc is None:
|
||||
raise HTTPException(status_code=500, detail="Konto nach Cookie-Refresh nicht gefunden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="update",
|
||||
resource_type="x_scraper_account", after={"username": username, "change": "cookies"},
|
||||
)
|
||||
return _summary(acc)
|
||||
|
||||
|
||||
@router.post("/accounts/{username}/active")
|
||||
async def set_active(
|
||||
username: str,
|
||||
data: XScraperActive,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Konto aktiv oder inaktiv schalten."""
|
||||
pool = _get_pool()
|
||||
if await pool.get_account(username) is None:
|
||||
raise HTTPException(status_code=404, detail="Konto nicht gefunden")
|
||||
try:
|
||||
await pool.set_active(username, data.active)
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper set_active fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Status konnte nicht geaendert werden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="update",
|
||||
resource_type="x_scraper_account", after={"username": username, "active": data.active},
|
||||
)
|
||||
acc = await pool.get_account(username)
|
||||
return _summary(acc)
|
||||
|
||||
|
||||
@router.delete("/accounts/{username}", status_code=204)
|
||||
async def delete_account(
|
||||
username: str,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""X-Scraper-Konto entfernen."""
|
||||
pool = _get_pool()
|
||||
if await pool.get_account(username) is None:
|
||||
raise HTTPException(status_code=404, detail="Konto nicht gefunden")
|
||||
try:
|
||||
await pool.delete_accounts([username])
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper delete fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Konto konnte nicht entfernt werden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="delete",
|
||||
resource_type="x_scraper_account", before={"username": username},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/reset-locks")
|
||||
async def reset_locks(
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Alle temporaeren Sperren der Konten zuruecksetzen."""
|
||||
pool = _get_pool()
|
||||
try:
|
||||
await pool.reset_locks()
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper reset_locks fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Sperren konnten nicht zurueckgesetzt werden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="update",
|
||||
resource_type="x_scraper_account", after={"change": "reset_locks"},
|
||||
)
|
||||
return {"status": "ok"}
|
||||
282
src/shared/services/external_reputation.py
Normale Datei
282
src/shared/services/external_reputation.py
Normale Datei
@@ -0,0 +1,282 @@
|
||||
"""Externe Reputations-Daten fuer Quellen.
|
||||
|
||||
Synchronisiert Domain-Listen von oeffentlichen Reputations-/Faktencheck-Datenbanken
|
||||
und schreibt die Treffer in die sources-Spalten:
|
||||
|
||||
- IFCN-Signatories (anerkannte Faktenchecker) -> ifcn_signatory
|
||||
- EUvsDisinfo (pro-Kreml-Desinformation, Zenodo-CSV) -> eu_disinfo_listed,
|
||||
eu_disinfo_case_count, eu_disinfo_last_seen
|
||||
|
||||
Anschliessend wendet apply_reputation_overrides() Override-Regeln auf die
|
||||
reliability-Spalte an:
|
||||
- ifcn_signatory=1 -> reliability='sehr_hoch'
|
||||
- eu_disinfo_case_count >= 5 -> reliability='sehr_niedrig'
|
||||
- eu_disinfo_case_count >= 1 -> reliability eine Stufe runter (max bis 'niedrig')
|
||||
"""
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiosqlite
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger("osint.external_reputation")
|
||||
|
||||
IFCN_LIST_URL = "https://raw.githubusercontent.com/IFCN/verified-signatories/main/list"
|
||||
EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base.csv?download=1"
|
||||
|
||||
HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
|
||||
|
||||
# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen
|
||||
# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains).
|
||||
PLATFORM_DOMAINS = {
|
||||
"t.me", "telegram.me", "telegram.org",
|
||||
"twitter.com", "x.com", "mobile.twitter.com",
|
||||
"youtube.com", "youtu.be", "m.youtube.com",
|
||||
"facebook.com", "fb.com", "m.facebook.com",
|
||||
"instagram.com", "tiktok.com", "vk.com", "ok.ru",
|
||||
"rumble.com", "bitchute.com", "odysee.com",
|
||||
"reddit.com", "old.reddit.com",
|
||||
"wordpress.com", "blogspot.com", "medium.com",
|
||||
"substack.com", "wixsite.com",
|
||||
}
|
||||
|
||||
# Reliability-Skala in Stufenfolge (schlecht -> gut)
|
||||
RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
|
||||
|
||||
|
||||
def _normalize_domain(raw: str | None) -> str | None:
|
||||
"""Normalisiert eine Domain: lowercase, ohne www., ohne Schema/Pfad."""
|
||||
if not raw:
|
||||
return None
|
||||
raw = raw.strip().lower()
|
||||
if not raw:
|
||||
return None
|
||||
# Falls eine vollstaendige URL uebergeben wurde
|
||||
if "://" in raw:
|
||||
try:
|
||||
raw = urlparse(raw).netloc or raw
|
||||
except ValueError:
|
||||
pass
|
||||
# Pfad/Query strippen
|
||||
raw = raw.split("/")[0].split("?")[0].split("#")[0]
|
||||
if raw.startswith("www."):
|
||||
raw = raw[4:]
|
||||
return raw or None
|
||||
|
||||
|
||||
async def _fetch_text(url: str) -> str:
|
||||
"""Laedt Text von einer URL. Wirft HTTPException bei Fehler."""
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT, follow_redirects=True) as client:
|
||||
resp = await client.get(url)
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
|
||||
|
||||
async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
|
||||
"""Laedt IFCN-Domain-Liste und matcht gegen sources.domain.
|
||||
|
||||
Setzt ifcn_signatory=1 wo die Domain in der Liste vorkommt, sonst 0.
|
||||
"""
|
||||
text = await _fetch_text(IFCN_LIST_URL)
|
||||
domains: set[str] = set()
|
||||
for line in text.splitlines():
|
||||
d = _normalize_domain(line)
|
||||
if d:
|
||||
domains.add(d)
|
||||
logger.info("IFCN-Liste geladen: %d Domains", len(domains))
|
||||
|
||||
# Aktuelle Quellen mit Domain laden
|
||||
cursor = await db.execute(
|
||||
"SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
|
||||
)
|
||||
sources = [dict(r) for r in await cursor.fetchall()]
|
||||
|
||||
matched_ids: list[int] = []
|
||||
unmatched_ids: list[int] = []
|
||||
for s in sources:
|
||||
nd = _normalize_domain(s["domain"])
|
||||
if nd and nd not in PLATFORM_DOMAINS and nd in domains:
|
||||
matched_ids.append(s["id"])
|
||||
else:
|
||||
unmatched_ids.append(s["id"])
|
||||
|
||||
# Bulk-Update in zwei Statements
|
||||
if matched_ids:
|
||||
placeholders = ",".join("?" for _ in matched_ids)
|
||||
await db.execute(
|
||||
f"UPDATE sources SET ifcn_signatory = 1 WHERE id IN ({placeholders})",
|
||||
matched_ids,
|
||||
)
|
||||
if unmatched_ids:
|
||||
placeholders = ",".join("?" for _ in unmatched_ids)
|
||||
await db.execute(
|
||||
f"UPDATE sources SET ifcn_signatory = 0 WHERE id IN ({placeholders})",
|
||||
unmatched_ids,
|
||||
)
|
||||
await db.commit()
|
||||
logger.info("IFCN-Sync: %d Quellen als Faktenchecker markiert (von %d)",
|
||||
len(matched_ids), len(sources))
|
||||
return {
|
||||
"list_size": len(domains),
|
||||
"sources_checked": len(sources),
|
||||
"matched": len(matched_ids),
|
||||
}
|
||||
|
||||
|
||||
async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
|
||||
"""Laedt EUvsDisinfo-CSV von Zenodo, aggregiert pro Domain, schreibt sources.
|
||||
|
||||
- eu_disinfo_listed: 1 wenn Domain mindestens 1x als 'disinformation' debunkt
|
||||
- eu_disinfo_case_count: Anzahl Disinformation-Faelle
|
||||
- eu_disinfo_last_seen: spaetestes debunk_date
|
||||
"""
|
||||
text = await _fetch_text(EU_DISINFO_CSV_URL)
|
||||
reader = csv.DictReader(io.StringIO(text))
|
||||
|
||||
# Per-Domain aggregieren (nur class='disinformation')
|
||||
counts: dict[str, int] = defaultdict(int)
|
||||
last_seen: dict[str, str] = {}
|
||||
total_rows = 0
|
||||
for row in reader:
|
||||
total_rows += 1
|
||||
if (row.get("class") or "").strip().lower() != "disinformation":
|
||||
continue
|
||||
d = _normalize_domain(row.get("article_domain"))
|
||||
if not d:
|
||||
continue
|
||||
counts[d] += 1
|
||||
debunk_date = (row.get("debunk_date") or "").strip()
|
||||
if debunk_date:
|
||||
prev = last_seen.get(d)
|
||||
if not prev or debunk_date > prev:
|
||||
last_seen[d] = debunk_date
|
||||
logger.info("EUvsDisinfo-CSV: %d Zeilen, %d Domains mit Desinformation",
|
||||
total_rows, len(counts))
|
||||
|
||||
# Quellen laden + matchen
|
||||
cursor = await db.execute(
|
||||
"SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
|
||||
)
|
||||
sources = [dict(r) for r in await cursor.fetchall()]
|
||||
|
||||
matched = 0
|
||||
for s in sources:
|
||||
nd = _normalize_domain(s["domain"])
|
||||
if nd and nd not in PLATFORM_DOMAINS and nd in counts:
|
||||
await db.execute(
|
||||
"""UPDATE sources SET
|
||||
eu_disinfo_listed = 1,
|
||||
eu_disinfo_case_count = ?,
|
||||
eu_disinfo_last_seen = ?
|
||||
WHERE id = ?""",
|
||||
(counts[nd], last_seen.get(nd), s["id"]),
|
||||
)
|
||||
matched += 1
|
||||
else:
|
||||
await db.execute(
|
||||
"""UPDATE sources SET
|
||||
eu_disinfo_listed = 0,
|
||||
eu_disinfo_case_count = 0,
|
||||
eu_disinfo_last_seen = NULL
|
||||
WHERE id = ?""",
|
||||
(s["id"],),
|
||||
)
|
||||
await db.commit()
|
||||
logger.info("EUvsDisinfo-Sync: %d Quellen als Desinformations-Quelle markiert (von %d)",
|
||||
matched, len(sources))
|
||||
return {
|
||||
"rows_in_csv": total_rows,
|
||||
"domains_with_disinfo_in_csv": len(counts),
|
||||
"sources_checked": len(sources),
|
||||
"matched": matched,
|
||||
}
|
||||
|
||||
|
||||
def _override_reliability(current: str | None, ifcn: bool, eu_count: int) -> str | None:
|
||||
"""Wendet Override-Regeln auf eine reliability-Stufe an.
|
||||
|
||||
Rueckgabe: neue Stufe (oder None, wenn unveraendert).
|
||||
"""
|
||||
cur = current or "na"
|
||||
|
||||
# IFCN gewinnt: zertifizierter Faktenchecker -> sehr_hoch (immer)
|
||||
if ifcn:
|
||||
return "sehr_hoch" if cur != "sehr_hoch" else None
|
||||
|
||||
# EUvsDisinfo: Downgrade
|
||||
if eu_count >= 5:
|
||||
return "sehr_niedrig" if cur != "sehr_niedrig" else None
|
||||
if eu_count >= 1:
|
||||
# Eine Stufe runter, mindestens bis 'niedrig'
|
||||
if cur == "na":
|
||||
return "niedrig"
|
||||
if cur in RELIABILITY_ORDER:
|
||||
idx = RELIABILITY_ORDER.index(cur)
|
||||
new_idx = max(0, idx - 1)
|
||||
new = RELIABILITY_ORDER[new_idx]
|
||||
# Mindeststufe 'niedrig' bei eu_count >= 1
|
||||
if RELIABILITY_ORDER.index(new) > RELIABILITY_ORDER.index("niedrig"):
|
||||
new = "niedrig"
|
||||
return new if new != cur else None
|
||||
return None
|
||||
|
||||
|
||||
async def apply_reputation_overrides(db: aiosqlite.Connection, source_id: int | None = None) -> dict:
|
||||
"""Wendet Reliability-Override-Regeln an.
|
||||
|
||||
Wenn source_id angegeben ist, nur fuer diese Quelle. Sonst fuer alle Quellen.
|
||||
"""
|
||||
if source_id is not None:
|
||||
cursor = await db.execute(
|
||||
"SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count "
|
||||
"FROM sources WHERE id = ?",
|
||||
(source_id,),
|
||||
)
|
||||
else:
|
||||
cursor = await db.execute(
|
||||
"SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count FROM sources"
|
||||
)
|
||||
sources = [dict(r) for r in await cursor.fetchall()]
|
||||
|
||||
changed = 0
|
||||
for s in sources:
|
||||
new = _override_reliability(
|
||||
s.get("reliability"),
|
||||
bool(s.get("ifcn_signatory")),
|
||||
int(s.get("eu_disinfo_case_count") or 0),
|
||||
)
|
||||
if new is not None:
|
||||
await db.execute(
|
||||
"UPDATE sources SET reliability = ? WHERE id = ?",
|
||||
(new, s["id"]),
|
||||
)
|
||||
changed += 1
|
||||
await db.commit()
|
||||
logger.info("Reliability-Override: %d Quellen angepasst (von %d gepruefte)",
|
||||
changed, len(sources))
|
||||
return {"checked": len(sources), "changed": changed}
|
||||
|
||||
|
||||
async def sync_all(db: aiosqlite.Connection) -> dict:
|
||||
"""Vollstaendiger Sync: IFCN + EUvsDisinfo + Reliability-Override.
|
||||
|
||||
Setzt external_data_synced_at fuer alle Quellen.
|
||||
"""
|
||||
ifcn_result = await sync_ifcn_signatories(db)
|
||||
eu_result = await sync_eu_disinfo(db)
|
||||
override_result = await apply_reputation_overrides(db)
|
||||
|
||||
await db.execute(
|
||||
"UPDATE sources SET external_data_synced_at = CURRENT_TIMESTAMP "
|
||||
"WHERE domain IS NOT NULL AND domain != ''"
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"ifcn": ifcn_result,
|
||||
"eu_disinfo": eu_result,
|
||||
"override": override_result,
|
||||
}
|
||||
104
src/shared/services/org_settings.py
Normale Datei
104
src/shared/services/org_settings.py
Normale Datei
@@ -0,0 +1,104 @@
|
||||
"""Organization-Settings-Helper.
|
||||
|
||||
KV-Store pro Organisation. Aktuell genutzt fuer output_language ('de'|'en').
|
||||
Spaeter erweiterbar (Default-Modell, Telegram-Toggle, Theme, ...).
|
||||
|
||||
Cache: TTL 60s in-memory pro (tenant_id, key). Wird bei set_org_setting()
|
||||
invalidiert.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import aiosqlite
|
||||
|
||||
logger = logging.getLogger("osint.org_settings")
|
||||
|
||||
_CACHE: dict[tuple[int, str], tuple[float, Optional[str]]] = {}
|
||||
_TTL_SECONDS = 60.0
|
||||
|
||||
|
||||
def _cache_get(tenant_id: int, key: str) -> tuple[bool, Optional[str]]:
|
||||
"""(hit, value). hit=True heisst Cache traf; value kann auch None sein."""
|
||||
entry = _CACHE.get((tenant_id, key))
|
||||
if entry is None:
|
||||
return (False, None)
|
||||
expires_at, value = entry
|
||||
if time.monotonic() > expires_at:
|
||||
_CACHE.pop((tenant_id, key), None)
|
||||
return (False, None)
|
||||
return (True, value)
|
||||
|
||||
|
||||
def _cache_put(tenant_id: int, key: str, value: Optional[str]) -> None:
|
||||
_CACHE[(tenant_id, key)] = (time.monotonic() + _TTL_SECONDS, value)
|
||||
|
||||
|
||||
def _cache_invalidate(tenant_id: int, key: str) -> None:
|
||||
_CACHE.pop((tenant_id, key), None)
|
||||
|
||||
|
||||
async def get_org_setting(
|
||||
db: aiosqlite.Connection,
|
||||
tenant_id: int,
|
||||
key: str,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Liest ein Org-Setting. Fallback auf default."""
|
||||
if tenant_id is None:
|
||||
return default
|
||||
hit, cached = _cache_get(tenant_id, key)
|
||||
if hit:
|
||||
return cached if cached is not None else default
|
||||
cursor = await db.execute(
|
||||
"SELECT value FROM organization_settings WHERE organization_id = ? AND key = ?",
|
||||
(tenant_id, key),
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
value = row["value"] if row else None
|
||||
_cache_put(tenant_id, key, value)
|
||||
return value if value is not None else default
|
||||
|
||||
|
||||
async def set_org_setting(
|
||||
db: aiosqlite.Connection,
|
||||
tenant_id: int,
|
||||
key: str,
|
||||
value: str,
|
||||
) -> None:
|
||||
"""Setzt ein Org-Setting (upsert)."""
|
||||
await db.execute(
|
||||
"""INSERT INTO organization_settings (organization_id, key, value, updated_at)
|
||||
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT(organization_id, key) DO UPDATE SET
|
||||
value = excluded.value,
|
||||
updated_at = CURRENT_TIMESTAMP""",
|
||||
(tenant_id, key, value),
|
||||
)
|
||||
await db.commit()
|
||||
_cache_invalidate(tenant_id, key)
|
||||
logger.info("Org %s Setting %s='%s' gespeichert", tenant_id, key, value)
|
||||
|
||||
|
||||
# Bekannte Sprachen + Anzeigenamen fuer Prompts
|
||||
LANGUAGE_DISPLAY_NAMES = {
|
||||
"de": "Deutsch",
|
||||
"en": "English",
|
||||
}
|
||||
|
||||
|
||||
async def get_org_language(
|
||||
db: aiosqlite.Connection,
|
||||
tenant_id: int,
|
||||
) -> str:
|
||||
"""Liefert ISO-2-Sprachcode der Org (default 'de')."""
|
||||
value = await get_org_setting(db, tenant_id, "output_language", default="de")
|
||||
if value not in LANGUAGE_DISPLAY_NAMES:
|
||||
logger.warning("Unbekannte output_language '%s' fuer Org %s -- fallback 'de'", value, tenant_id)
|
||||
return "de"
|
||||
return value
|
||||
|
||||
|
||||
def language_display(lang_iso: str) -> str:
|
||||
"""ISO-Code -> Anzeigename fuer Prompts ('de' -> 'Deutsch')."""
|
||||
return LANGUAGE_DISPLAY_NAMES.get(lang_iso, lang_iso)
|
||||
295
src/shared/services/source_classifier.py
Normale Datei
295
src/shared/services/source_classifier.py
Normale Datei
@@ -0,0 +1,295 @@
|
||||
"""Klassifiziert Quellen via Claude (Haiku) nach 4 Achsen + state_affiliated + country.
|
||||
|
||||
Schreibt Vorschlaege in die proposed_*-Spalten von sources und setzt
|
||||
classification_source='llm_pending'. Approval erfolgt ueber separate Endpoints,
|
||||
die proposed_* in die echten Spalten kopieren.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
import aiosqlite
|
||||
|
||||
from shared.agents.claude_client import call_claude
|
||||
from config import CLAUDE_MODEL_FAST
|
||||
|
||||
logger = logging.getLogger("osint.source_classifier")
|
||||
|
||||
POLITICAL_VALUES = {
|
||||
"links_extrem", "links", "mitte_links", "liberal", "mitte",
|
||||
"konservativ", "mitte_rechts", "rechts", "rechts_extrem", "na",
|
||||
}
|
||||
MEDIA_TYPE_VALUES = {
|
||||
"tageszeitung", "wochenzeitung", "magazin", "tv_sender", "radio",
|
||||
"oeffentlich_rechtlich", "nachrichtenagentur", "online_only", "blog",
|
||||
"telegram_kanal", "telegram_bot", "podcast", "social_media", "imageboard",
|
||||
"think_tank", "ngo", "behoerde", "staatsmedium", "fachmedium", "sonstige",
|
||||
}
|
||||
RELIABILITY_VALUES = {"sehr_hoch", "hoch", "gemischt", "niedrig", "sehr_niedrig", "na"}
|
||||
ALIGNMENT_VALUES = {
|
||||
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
|
||||
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
|
||||
"protuerkisch", "panarabisch", "neutral", "sonstige",
|
||||
}
|
||||
|
||||
|
||||
def _build_prompt(src: dict, sample_articles: list[dict]) -> str:
|
||||
sample_text = ""
|
||||
if sample_articles:
|
||||
lines = []
|
||||
for i, art in enumerate(sample_articles[:5], 1):
|
||||
headline = (art.get("headline") or art.get("headline_de") or "").strip()
|
||||
if headline:
|
||||
lines.append(f"{i}. {headline[:200]}")
|
||||
if lines:
|
||||
sample_text = "\nLetzte Artikel/Headlines:\n" + "\n".join(lines)
|
||||
|
||||
return f"""Du bist ein OSINT-Analyst und klassifizierst Nachrichten- und Medienquellen fuer ein Lagebild-Monitoring-System (DACH-Raum).
|
||||
|
||||
QUELLE:
|
||||
Name: {src.get('name')}
|
||||
URL: {src.get('url') or '-'}
|
||||
Domain: {src.get('domain') or '-'}
|
||||
Quellentyp: {src.get('source_type')}
|
||||
Bisherige Kategorie: {src.get('category')}
|
||||
Sprache: {src.get('language') or 'unbekannt'}
|
||||
Bisherige Notiz (Freitext): {src.get('bias') or '-'}{sample_text}
|
||||
|
||||
AUFGABE: Klassifiziere die Quelle nach folgenden Achsen.
|
||||
|
||||
1. political_orientation:
|
||||
- links_extrem (z.B. linksunten.indymedia)
|
||||
- links (klar links, z.B. junge Welt, taz)
|
||||
- mitte_links (linksliberal/sozialdemokratisch, z.B. SZ, Spiegel)
|
||||
- liberal (wirtschafts-/grünliberal, z.B. NZZ, Zeit)
|
||||
- mitte (politisch neutral, Agentur, z.B. dpa, Reuters, tagesschau)
|
||||
- konservativ (buergerlich-konservativ, z.B. FAZ, Welt)
|
||||
- mitte_rechts (rechts-buergerlich, z.B. Tichys Einblick, Achgut)
|
||||
- rechts (klar rechts, z.B. Junge Freiheit, EpochTimes)
|
||||
- rechts_extrem (z.B. Compact, PI-News)
|
||||
- na (nicht klassifizierbar: Behoerde, Fachmedium, Think Tank ohne klare politische Linie)
|
||||
|
||||
2. media_type (genau einer):
|
||||
tageszeitung, wochenzeitung, magazin, tv_sender, radio, oeffentlich_rechtlich,
|
||||
nachrichtenagentur, online_only, blog, telegram_kanal, telegram_bot, podcast,
|
||||
social_media, imageboard, think_tank, ngo, behoerde, staatsmedium, fachmedium, sonstige
|
||||
|
||||
3. reliability:
|
||||
- sehr_hoch (etablierte Qualitaet, Faktencheck: tagesschau, dpa, FAZ, Reuters)
|
||||
- hoch (serioes mit gelegentlichen Schwaechen: taz, Welt, BILD bei harten News)
|
||||
- gemischt (Mix Meinung/Einseitigkeit: Tichys Einblick, Achgut, Boulevard)
|
||||
- niedrig (haeufig irrefuehrend, schwache Quellenarbeit: Junge Freiheit, EpochTimes)
|
||||
- sehr_niedrig (bekannt fuer Desinformation/Verschwoerung: Compact, RT, Sputnik, PI-News)
|
||||
- na (nicht bewertbar)
|
||||
|
||||
4. alignments (Mehrfach, leeres Array wenn keine ausgepraegte Naehe):
|
||||
prorussisch, proiranisch, prowestlich, proukrainisch, prochinesisch, projapanisch,
|
||||
proisraelisch, propalaestinensisch, protuerkisch, panarabisch, neutral, sonstige
|
||||
|
||||
5. state_affiliated (true/false): true wenn vom Staat finanziert/kontrolliert
|
||||
(RT, Sputnik, CGTN, PressTV, Xinhua, TRT). Public Service Broadcaster
|
||||
wie ARD/ZDF/BBC sind NICHT state_affiliated.
|
||||
|
||||
6. country_code (ISO 3166-1 alpha-2): Heimatland (DE, AT, CH, RU, US, ...). null wenn unklar.
|
||||
|
||||
7. confidence (0.0-1.0): 0.85+ fuer bekannte Outlets, 0.5-0.85 fuer mittelbekannt, <0.5 fuer unsicher.
|
||||
|
||||
8. reasoning (1-2 Saetze): Kurze Begruendung der Hauptklassifikationen.
|
||||
|
||||
WICHTIG:
|
||||
- Antworte AUSSCHLIESSLICH mit einem JSON-Objekt, kein Text drumherum.
|
||||
- Nutze ausschliesslich die genannten enum-Werte (snake_case).
|
||||
- Bei Unklarheit lieber `na` und niedrige confidence.
|
||||
|
||||
JSON-Schema:
|
||||
{{
|
||||
"political_orientation": "...",
|
||||
"media_type": "...",
|
||||
"reliability": "...",
|
||||
"alignments": ["..."],
|
||||
"state_affiliated": false,
|
||||
"country_code": "DE",
|
||||
"confidence": 0.9,
|
||||
"reasoning": "..."
|
||||
}}"""
|
||||
|
||||
|
||||
async def _load_sample_articles(db: aiosqlite.Connection, name: str, domain: str | None, limit: int = 5) -> list[dict]:
|
||||
"""Laedt die letzten Headlines einer Quelle (per name oder Domain-Match)."""
|
||||
rows: list = []
|
||||
if name:
|
||||
cursor = await db.execute(
|
||||
"SELECT headline, headline_de FROM articles WHERE source = ? ORDER BY collected_at DESC LIMIT ?",
|
||||
(name, limit),
|
||||
)
|
||||
rows = await cursor.fetchall()
|
||||
if not rows and domain:
|
||||
cursor = await db.execute(
|
||||
"SELECT headline, headline_de FROM articles WHERE source_url LIKE ? ORDER BY collected_at DESC LIMIT ?",
|
||||
(f"%{domain}%", limit),
|
||||
)
|
||||
rows = await cursor.fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def _validate(parsed: dict) -> dict:
|
||||
"""Validiert + normalisiert eine LLM-Antwort gegen die Enums."""
|
||||
pol = parsed.get("political_orientation", "na")
|
||||
if pol not in POLITICAL_VALUES:
|
||||
pol = "na"
|
||||
mt = parsed.get("media_type", "sonstige")
|
||||
if mt not in MEDIA_TYPE_VALUES:
|
||||
mt = "sonstige"
|
||||
rel = parsed.get("reliability", "na")
|
||||
if rel not in RELIABILITY_VALUES:
|
||||
rel = "na"
|
||||
aligns_raw = parsed.get("alignments") or []
|
||||
if not isinstance(aligns_raw, list):
|
||||
aligns_raw = []
|
||||
aligns = sorted({a for a in aligns_raw if isinstance(a, str) and a in ALIGNMENT_VALUES})
|
||||
sa = bool(parsed.get("state_affiliated", False))
|
||||
cc = parsed.get("country_code")
|
||||
if isinstance(cc, str) and len(cc) == 2 and cc.isalpha():
|
||||
cc = cc.upper()
|
||||
else:
|
||||
cc = None
|
||||
try:
|
||||
confidence = float(parsed.get("confidence", 0.5))
|
||||
confidence = max(0.0, min(1.0, confidence))
|
||||
except (TypeError, ValueError):
|
||||
confidence = 0.5
|
||||
reasoning = str(parsed.get("reasoning", ""))[:1000]
|
||||
return {
|
||||
"political_orientation": pol,
|
||||
"media_type": mt,
|
||||
"reliability": rel,
|
||||
"alignments": aligns,
|
||||
"state_affiliated": sa,
|
||||
"country_code": cc,
|
||||
"confidence": confidence,
|
||||
"reasoning": reasoning,
|
||||
}
|
||||
|
||||
|
||||
async def classify_source(
|
||||
db: aiosqlite.Connection,
|
||||
source_id: int,
|
||||
sample_limit: int = 5,
|
||||
model: str = CLAUDE_MODEL_FAST,
|
||||
) -> dict:
|
||||
"""Klassifiziert eine einzelne Quelle und schreibt die Vorschlaege in proposed_*-Spalten."""
|
||||
cursor = await db.execute(
|
||||
"SELECT id, name, url, domain, source_type, category, language, bias, "
|
||||
"classification_source FROM sources WHERE id = ?",
|
||||
(source_id,),
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
raise ValueError(f"Quelle {source_id} nicht gefunden")
|
||||
src = dict(row)
|
||||
|
||||
sample = await _load_sample_articles(db, src["name"], src.get("domain"), sample_limit)
|
||||
prompt = _build_prompt(src, sample)
|
||||
response, usage = await call_claude(prompt, tools=None, model=model)
|
||||
|
||||
json_match = re.search(r"\{.*\}", response, re.DOTALL)
|
||||
if not json_match:
|
||||
raise ValueError(f"Keine JSON-Antwort von Claude fuer source_id={source_id}: {response[:200]}")
|
||||
parsed = json.loads(json_match.group(0))
|
||||
result = _validate(parsed)
|
||||
|
||||
# Nur classification_source auf 'llm_pending' setzen, wenn nicht bereits manuell/approved
|
||||
new_src = "CASE WHEN classification_source IN ('manual','llm_approved') THEN classification_source ELSE 'llm_pending' END"
|
||||
await db.execute(
|
||||
f"""UPDATE sources SET
|
||||
proposed_political_orientation = ?,
|
||||
proposed_media_type = ?,
|
||||
proposed_reliability = ?,
|
||||
proposed_state_affiliated = ?,
|
||||
proposed_country_code = ?,
|
||||
proposed_alignments_json = ?,
|
||||
proposed_confidence = ?,
|
||||
proposed_reasoning = ?,
|
||||
proposed_at = CURRENT_TIMESTAMP,
|
||||
classification_source = {new_src}
|
||||
WHERE id = ?""",
|
||||
(
|
||||
result["political_orientation"],
|
||||
result["media_type"],
|
||||
result["reliability"],
|
||||
1 if result["state_affiliated"] else 0,
|
||||
result["country_code"],
|
||||
json.dumps(result["alignments"], ensure_ascii=False),
|
||||
result["confidence"],
|
||||
result["reasoning"],
|
||||
source_id,
|
||||
),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
logger.info(
|
||||
"Klassifiziert source_id=%s '%s' -> %s/%s/%s conf=%.2f ($%.4f)",
|
||||
source_id, src["name"], result["political_orientation"],
|
||||
result["media_type"], result["reliability"], result["confidence"],
|
||||
usage.cost_usd,
|
||||
)
|
||||
|
||||
result["source_id"] = source_id
|
||||
result["usage"] = {
|
||||
"cost_usd": usage.cost_usd,
|
||||
"input_tokens": usage.input_tokens,
|
||||
"output_tokens": usage.output_tokens,
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
async def bulk_classify(
|
||||
db: aiosqlite.Connection,
|
||||
limit: int = 50,
|
||||
only_unclassified: bool = True,
|
||||
model: str = CLAUDE_MODEL_FAST,
|
||||
) -> dict:
|
||||
"""Klassifiziert noch unklassifizierte Quellen (sequenziell).
|
||||
|
||||
Args:
|
||||
limit: Maximale Anzahl Quellen pro Aufruf
|
||||
only_unclassified: Wenn True, nur classification_source='legacy'.
|
||||
Wenn False, auch 'llm_pending' neu klassifizieren.
|
||||
"""
|
||||
if only_unclassified:
|
||||
where = "classification_source = 'legacy'"
|
||||
else:
|
||||
where = "classification_source IN ('legacy', 'llm_pending')"
|
||||
cursor = await db.execute(
|
||||
f"SELECT id FROM sources WHERE {where} AND status = 'active' "
|
||||
f"AND source_type != 'excluded' ORDER BY id LIMIT ?",
|
||||
(limit,),
|
||||
)
|
||||
ids = [row["id"] for row in await cursor.fetchall()]
|
||||
|
||||
total_cost = 0.0
|
||||
success = 0
|
||||
errors: list[dict] = []
|
||||
|
||||
for sid in ids:
|
||||
try:
|
||||
r = await classify_source(db, sid, model=model)
|
||||
total_cost += r["usage"]["cost_usd"]
|
||||
success += 1
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Klassifikation source_id=%s fehlgeschlagen: %s", sid, e, exc_info=True)
|
||||
errors.append({"source_id": sid, "error": str(e)})
|
||||
|
||||
logger.info(
|
||||
"Bulk-Klassifikation fertig: %d/%d erfolgreich, $%.4f Kosten, %d Fehler",
|
||||
success, len(ids), total_cost, len(errors),
|
||||
)
|
||||
return {
|
||||
"processed": len(ids),
|
||||
"success": success,
|
||||
"errors": errors,
|
||||
"total_cost_usd": total_cost,
|
||||
}
|
||||
@@ -102,17 +102,98 @@ async def generate_stale_deactivation_suggestions(
|
||||
return created
|
||||
|
||||
|
||||
async def generate_strategy_escalation_suggestions(db: aiosqlite.Connection) -> int:
|
||||
"""Erzeugt deactivate_source-Vorschläge für Quellen, bei denen die fetch_strategy
|
||||
bereits eskaliert wurde (googlebot oder paywall) und der Reachability-Check
|
||||
trotzdem error meldet.
|
||||
|
||||
Beispiel: Rheinische Post hat fetch_strategy=googlebot, kriegt aber HTTP 403.
|
||||
-> Strategie greift nicht, Quelle ist faktisch nicht abrufbar. Vorschlag: deaktivieren.
|
||||
|
||||
Doppel-Vermeidung wie in der Karteileichen-Heuristik: nur wenn noch kein pending
|
||||
deactivate-Vorschlag für die source_id existiert.
|
||||
|
||||
Returns: Anzahl neu erstellter Vorschläge.
|
||||
"""
|
||||
cursor = await db.execute(
|
||||
"""
|
||||
SELECT s.id, s.name, s.url, s.domain, s.fetch_strategy, h.message
|
||||
FROM sources s
|
||||
JOIN source_health_checks h ON h.source_id = s.id
|
||||
WHERE s.status = 'active'
|
||||
AND s.fetch_strategy IN ('googlebot', 'paywall')
|
||||
AND h.check_type = 'reachability'
|
||||
AND h.status = 'error'
|
||||
"""
|
||||
)
|
||||
candidates = [dict(row) for row in await cursor.fetchall()]
|
||||
if not candidates:
|
||||
return 0
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT DISTINCT source_id FROM source_suggestions "
|
||||
"WHERE status = 'pending' AND suggestion_type = 'deactivate_source' "
|
||||
"AND source_id IS NOT NULL"
|
||||
)
|
||||
already_pending = {row["source_id"] for row in await cursor.fetchall()}
|
||||
|
||||
created = 0
|
||||
for c in candidates:
|
||||
sid = c["id"]
|
||||
if sid in already_pending:
|
||||
continue
|
||||
title = f"{c['name']} (ID {sid}) - Strategie greift nicht"
|
||||
description = (
|
||||
f"Quelle: {c['name']} | URL: {c['url']} | Domain: {c['domain'] or '-'}\n"
|
||||
f"fetch_strategy='{c['fetch_strategy']}' wurde bereits zur Eskalation gesetzt, "
|
||||
f"liefert beim Health-Check aber weiter einen Fehler:\n"
|
||||
f" {c['message']}\n"
|
||||
"Vorschlag: deaktivieren oder fetch_strategy='skip' setzen, damit die Quelle "
|
||||
"den Health-Check nicht weiter verfälscht.\n"
|
||||
"Hinweis: Quelle wurde automatisch erkannt. Bitte vor Annahme prüfen."
|
||||
)
|
||||
suggested_data = json.dumps(
|
||||
{"action": "deactivate", "source_id": sid,
|
||||
"reason": "fetch_strategy_failed", "current_strategy": c["fetch_strategy"]},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
await db.execute(
|
||||
"INSERT INTO source_suggestions "
|
||||
"(suggestion_type, title, description, source_id, suggested_data, "
|
||||
" priority, status) VALUES "
|
||||
"('deactivate_source', ?, ?, ?, ?, 'high', 'pending')",
|
||||
(title, description, sid, suggested_data),
|
||||
)
|
||||
created += 1
|
||||
|
||||
if created > 0:
|
||||
await db.commit()
|
||||
logger.info(
|
||||
"Strategie-Eskalations-Heuristik: %d neue deactivate-Vorschläge "
|
||||
"(%d Kandidaten, %d bereits pending)",
|
||||
created, len(candidates), len(already_pending),
|
||||
)
|
||||
return created
|
||||
|
||||
|
||||
async def generate_suggestions(db: aiosqlite.Connection) -> int:
|
||||
"""Generiert Quellen-Vorschläge basierend auf Health-Checks und Lückenanalyse.
|
||||
|
||||
Zwei Stufen:
|
||||
1. Deterministisch: Karteileichen-Heuristik (article_count=0 oder >60d stumm)
|
||||
erzeugt sofort deactivate_source-Vorschläge ohne KI-Aufruf.
|
||||
2. KI-basiert: Haiku schaut sich Quellensammlung + Health-Probleme an
|
||||
Drei Stufen, in dieser Reihenfolge ausgeführt (spezifisch -> generisch -> KI):
|
||||
1. Deterministisch: Strategie-Eskalations-Heuristik (fetch_strategy=googlebot
|
||||
oder paywall, aber Reachability weiter error) erzeugt deactivate_source-
|
||||
Vorschläge mit Priorität 'high'. Spezifischste Diagnose: "Workaround
|
||||
greift nicht". Läuft ZUERST, damit diese Sources nicht von der
|
||||
generischeren Karteileichen-Stufe weggefangen werden.
|
||||
2. Deterministisch: Karteileichen-Heuristik (article_count=0 oder >60d stumm)
|
||||
erzeugt sofort deactivate_source-Vorschläge für alle übrigen toten
|
||||
Quellen ohne KI-Aufruf.
|
||||
3. KI-basiert: Haiku schaut sich Quellensammlung + Health-Probleme an
|
||||
und schlägt weitere Verbesserungen vor (add_source, deactivate_source,
|
||||
fix_url, ...).
|
||||
Rückgabe ist die Gesamtzahl neu erzeugter Vorschläge beider Stufen.
|
||||
Rückgabe ist die Gesamtzahl neu erzeugter Vorschläge aller Stufen.
|
||||
"""
|
||||
strategy_count = await generate_strategy_escalation_suggestions(db)
|
||||
stale_count = await generate_stale_deactivation_suggestions(db)
|
||||
|
||||
logger.info("Starte Quellen-Vorschläge via Haiku...")
|
||||
@@ -267,15 +348,15 @@ Nur das JSON-Array, kein anderer Text."""
|
||||
await db.commit()
|
||||
logger.info(
|
||||
f"Quellen-Vorschläge: {count} neue Vorschläge generiert via Haiku "
|
||||
f"(+{stale_count} aus Karteileichen-Heuristik) "
|
||||
f"(+{stale_count} Karteileichen, +{strategy_count} Strategie-Eskalation) "
|
||||
f"(Haiku: {usage.input_tokens} in / {usage.output_tokens} out / "
|
||||
f"${usage.cost_usd:.4f})"
|
||||
)
|
||||
return count + stale_count
|
||||
return count + stale_count + strategy_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei Quellen-Vorschlägen: {e}", exc_info=True)
|
||||
return stale_count
|
||||
return stale_count + strategy_count
|
||||
|
||||
|
||||
async def apply_suggestion(
|
||||
|
||||
@@ -27,6 +27,7 @@ SOURCE_CATEGORIES: list[CategoryEntry] = [
|
||||
{"key": "international", "label": "International"},
|
||||
{"key": "regional", "label": "Regional"},
|
||||
{"key": "boulevard", "label": "Boulevard"},
|
||||
{"key": "stimmungsbild", "label": "Forum / Stimmungsbild"},
|
||||
{"key": "sonstige", "label": "Sonstige"},
|
||||
{"key": "cybercrime", "label": "Cybercrime / Hacktivismus"},
|
||||
{"key": "cybercrime-leaks", "label": "Cybercrime / Leaks"},
|
||||
|
||||
@@ -962,3 +962,162 @@ input[type="date"].filter-select { padding: 6px 10px; }
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
/* === Klassifikations-Review === */
|
||||
.sources-tab-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-width: 20px;
|
||||
padding: 0 6px;
|
||||
height: 18px;
|
||||
border-radius: 9px;
|
||||
background: var(--accent);
|
||||
color: var(--bg-primary);
|
||||
font-size: 10px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.review-toolbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 10px 14px;
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
margin-bottom: 12px;
|
||||
flex-wrap: wrap;
|
||||
gap: 12px;
|
||||
}
|
||||
.review-toolbar-info {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 16px;
|
||||
font-size: 13px;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
.review-conf-filter {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
font-size: 12px;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
.review-toolbar-actions { display: flex; gap: 6px; }
|
||||
|
||||
.review-list { display: flex; flex-direction: column; gap: 8px; }
|
||||
.review-card {
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
padding: 12px 14px;
|
||||
}
|
||||
.review-card-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: flex-start;
|
||||
gap: 12px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.review-card-title {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
.review-card-name { font-weight: 600; font-size: 14px; color: var(--text-primary); }
|
||||
.review-card-domain { font-size: 11px; color: var(--text-muted); }
|
||||
.review-global-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
padding: 1px 6px;
|
||||
border-radius: var(--radius);
|
||||
background: #5e35b1;
|
||||
color: #fff;
|
||||
font-size: 9px;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.3px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.review-card-confidence {
|
||||
display: inline-flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
padding: 4px 10px;
|
||||
border-radius: var(--radius);
|
||||
min-width: 60px;
|
||||
}
|
||||
.review-card-confidence .conf-value { font-size: 14px; font-weight: 700; }
|
||||
.review-card-confidence .conf-label { font-size: 9px; text-transform: uppercase; letter-spacing: 0.3px; opacity: 0.8; }
|
||||
.review-card-confidence.conf-high { background: rgba(34,197,94,0.15); color: var(--success); }
|
||||
.review-card-confidence.conf-medium { background: rgba(245,158,11,0.15); color: var(--warning); }
|
||||
.review-card-confidence.conf-low { background: rgba(239,68,68,0.15); color: var(--danger); }
|
||||
|
||||
.review-card-diff {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr;
|
||||
gap: 4px;
|
||||
font-size: 12px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.review-diff-row {
|
||||
display: grid;
|
||||
grid-template-columns: 130px 1fr 24px 1fr;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
padding: 3px 6px;
|
||||
border-radius: 3px;
|
||||
}
|
||||
.review-diff-row.changed { background: rgba(245,158,11,0.10); }
|
||||
.review-diff-label { color: var(--text-secondary); font-weight: 500; }
|
||||
.review-diff-current { color: var(--text-muted); }
|
||||
.review-diff-arrow { text-align: center; color: var(--text-muted); font-weight: 600; }
|
||||
.review-diff-proposed { color: var(--text-primary); font-weight: 500; }
|
||||
.review-diff-row.changed .review-diff-proposed { color: var(--warning); font-weight: 600; }
|
||||
|
||||
.review-card-reasoning {
|
||||
font-size: 12px;
|
||||
color: var(--text-secondary);
|
||||
background: var(--bg-tertiary);
|
||||
padding: 8px 10px;
|
||||
border-radius: var(--radius);
|
||||
margin-bottom: 10px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
.review-card-actions { display: flex; gap: 6px; flex-wrap: wrap; }
|
||||
|
||||
/* Edit-Form: Klassifikations-Sektion */
|
||||
.sources-classification-section {
|
||||
margin-top: 14px;
|
||||
padding-top: 14px;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
.sources-classification-header {
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 10px;
|
||||
letter-spacing: 0.3px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.alignment-chips { display: flex; flex-wrap: wrap; gap: 6px; }
|
||||
.alignment-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
padding: 4px 10px;
|
||||
border-radius: 999px;
|
||||
font-size: 11px;
|
||||
font-weight: 500;
|
||||
background: transparent;
|
||||
color: var(--text-secondary);
|
||||
border: 1px solid var(--border);
|
||||
cursor: pointer;
|
||||
transition: all 0.12s ease;
|
||||
}
|
||||
.alignment-chip:hover { background: var(--bg-tertiary); color: var(--text-primary); }
|
||||
.alignment-chip.active {
|
||||
background: var(--accent);
|
||||
color: var(--bg-primary);
|
||||
border-color: var(--accent);
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,32 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Artikel-Übersetzung -->
|
||||
<div class="card" id="translationCard" style="margin-top:16px;">
|
||||
<div class="card-header">
|
||||
<h2>Artikel-Übersetzung</h2>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="text-muted" style="margin-top:0;">
|
||||
Die automatische Übersetzung im Monitor ist deaktiviert. Hier lassen sich
|
||||
fremdsprachige Artikel ohne deutsche Fassung manuell übersetzen.
|
||||
</p>
|
||||
<p id="translationInfo" style="margin:12px 0;">Status wird geladen…</p>
|
||||
|
||||
<div id="translationProgressWrap" style="display:none; margin:12px 0;">
|
||||
<div style="background:rgba(128,128,128,0.25); border-radius:6px; height:14px; overflow:hidden;">
|
||||
<div id="translationProgressBar" style="background:#1565c0; height:100%; width:0%; transition:width .3s;"></div>
|
||||
</div>
|
||||
<p class="text-muted" id="translationProgressText" style="margin:6px 0 0;"></p>
|
||||
</div>
|
||||
|
||||
<div style="margin-top:12px; display:flex; gap:8px;">
|
||||
<button class="btn btn-primary" id="translationRunBtn">Übersetzung starten</button>
|
||||
<button class="btn btn-danger" id="translationCancelBtn" style="display:none;">Abbrechen</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Organizations Section -->
|
||||
@@ -166,6 +192,14 @@
|
||||
<option value="false">Deaktiviert</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="editOrgLanguage">Pipeline-Sprache</label>
|
||||
<select id="editOrgLanguage">
|
||||
<option value="de">Deutsch</option>
|
||||
<option value="en">English</option>
|
||||
</select>
|
||||
<small class="text-secondary">Bestimmt die Ausgabesprache der KI (Lagebild, Faktencheck, Recherche) und der sichtbarsten UI-Elemente fuer alle Nutzer dieser Organisation.</small>
|
||||
</div>
|
||||
<div style="display: flex; gap: 8px; margin-top: 16px;">
|
||||
<button type="submit" class="btn btn-primary">Speichern</button>
|
||||
<button type="button" class="btn btn-danger" id="deleteOrgBtn">Organisation löschen</button>
|
||||
@@ -294,6 +328,8 @@
|
||||
<button class="nav-tab active" data-subtab="global-sources">Grundquellen</button>
|
||||
<button class="nav-tab" data-subtab="tenant-sources">Kundenquellen</button>
|
||||
<button class="nav-tab" data-subtab="source-health">Quellen-Health</button>
|
||||
<button class="nav-tab" data-subtab="classification-review">Klassifikation <span class="sources-tab-badge" id="classificationPendingBadge">0</span></button>
|
||||
<button class="nav-tab" data-subtab="x-scraper">X-Recherche-Konten</button>
|
||||
</div>
|
||||
|
||||
<!-- Grundquellen -->
|
||||
@@ -319,6 +355,7 @@
|
||||
<span class="text-secondary" id="globalSourceCount"></span>
|
||||
</div>
|
||||
<button class="btn btn-secondary" id="discoverSourceBtn">Erkennen</button>
|
||||
<button class="btn btn-secondary" id="newPdfSourceBtn" style="margin-right:8px;">+ PDF hochladen</button>
|
||||
<button class="btn btn-primary" id="newGlobalSourceBtn">+ Neue Grundquelle</button>
|
||||
</div>
|
||||
<div class="card">
|
||||
@@ -406,6 +443,66 @@
|
||||
<div id="ht-verlauf" class="health-pane" style="display:none;"></div>
|
||||
</div>
|
||||
|
||||
<!-- Klassifikations-Review -->
|
||||
<div class="section" id="sub-classification-review">
|
||||
<div class="action-bar review-toolbar">
|
||||
<div class="review-toolbar-info">
|
||||
<span><strong id="reviewPendingCount">0</strong> Vorschläge ausstehend</span>
|
||||
<label class="review-conf-filter">
|
||||
Mindest-Konfidenz:
|
||||
<select class="filter-select" id="reviewMinConfidence" onchange="loadClassificationQueue()">
|
||||
<option value="0">alle</option>
|
||||
<option value="0.5">0.5+</option>
|
||||
<option value="0.7">0.7+</option>
|
||||
<option value="0.85">0.85+</option>
|
||||
<option value="0.9">0.9+</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
<div class="review-toolbar-actions">
|
||||
<button class="btn btn-secondary btn-small" onclick="triggerExternalReputationSync()" title="IFCN-Faktenchecker und EUvsDisinfo neu syncen">Externe Daten syncen</button>
|
||||
<button class="btn btn-secondary btn-small" onclick="triggerBulkClassify()" title="LLM-Klassifikation für noch unklassifizierte Quellen starten">+ Klassifikation starten</button>
|
||||
<button class="btn btn-primary btn-small" onclick="bulkApproveHighConfidence()" title="Alle Vorschläge ab 0.85 Konfidenz übernehmen">Alle ≥ 0.85 genehmigen</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="review-list" id="classificationReviewList">
|
||||
<div class="text-muted" style="padding:24px;text-align:center;">Lade Review-Queue…</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- X-Recherche-Konten (Sub-Tab) -->
|
||||
<div class="section" id="sub-x-scraper">
|
||||
<div class="action-bar">
|
||||
<div style="display:flex;align-items:center;gap:12px;flex-wrap:wrap;">
|
||||
<span class="text-secondary" id="xScraperCount"></span>
|
||||
</div>
|
||||
<div style="display:flex;gap:8px;">
|
||||
<button class="btn btn-secondary" onclick="resetXScraperLocks()">Sperren zurücksetzen</button>
|
||||
<button class="btn btn-primary" onclick="openXScraperAddModal()">+ Konto hinzufügen</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<p class="text-secondary" style="padding:0 4px 12px;">X-Login-Konten, mit denen der Monitor bei X recherchiert. Mehr Konten bedeuten paralleleres, schnelleres Scrapen. Cookies laufen periodisch ab und müssen dann erneuert werden.</p>
|
||||
<div class="table-wrap">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Benutzername</th>
|
||||
<th>E-Mail</th>
|
||||
<th>Status</th>
|
||||
<th>Anfragen</th>
|
||||
<th>Letzte Nutzung</th>
|
||||
<th>Aktionen</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="xScraperTable"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div> <!-- /sec-sources -->
|
||||
|
||||
<!-- Audit-Log Section -->
|
||||
@@ -469,6 +566,14 @@
|
||||
<label for="newOrgSlug">Slug (URL-freundlich)</label>
|
||||
<input type="text" id="newOrgSlug" required pattern="[a-z0-9-]+" placeholder="z.B. bundespolizei">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="newOrgLanguage">Pipeline-Sprache</label>
|
||||
<select id="newOrgLanguage">
|
||||
<option value="de" selected>Deutsch</option>
|
||||
<option value="en">English</option>
|
||||
</select>
|
||||
<small class="text-secondary">Steuert die Ausgabesprache der KI-Pipeline (Lagebild, Faktencheck, Recherche) und die sichtbarsten UI-Strings im Monitor.</small>
|
||||
</div>
|
||||
<div id="newOrgError" class="error-msg" style="display:none"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
@@ -595,6 +700,7 @@
|
||||
<option value="telegram_channel">Telegram-Kanal</option>
|
||||
<option value="podcast_feed">Podcast-Feed</option>
|
||||
<option value="excluded">Ausgeschlossen</option>
|
||||
<option value="pdf_document" disabled>PDF-Dokument (nur Upload)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
@@ -653,6 +759,96 @@
|
||||
<label for="sourceNotes">Notizen</label>
|
||||
<input type="text" id="sourceNotes" placeholder="Optional">
|
||||
</div>
|
||||
|
||||
<div class="sources-classification-section">
|
||||
<div class="sources-classification-header">Einordnung (Klassifikation)</div>
|
||||
<div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:12px;">
|
||||
<div class="form-group">
|
||||
<label for="sourcePolitical">Politische Ausrichtung</label>
|
||||
<select id="sourcePolitical">
|
||||
<option value="">— unverändert —</option>
|
||||
<option value="na">Nicht eingeordnet</option>
|
||||
<option value="links_extrem">Links (extrem)</option>
|
||||
<option value="links">Links</option>
|
||||
<option value="mitte_links">Mitte-Links</option>
|
||||
<option value="liberal">Liberal</option>
|
||||
<option value="mitte">Mitte</option>
|
||||
<option value="konservativ">Konservativ</option>
|
||||
<option value="mitte_rechts">Mitte-Rechts</option>
|
||||
<option value="rechts">Rechts</option>
|
||||
<option value="rechts_extrem">Rechts (extrem)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="sourceMediaType">Medientyp</label>
|
||||
<select id="sourceMediaType">
|
||||
<option value="">— unverändert —</option>
|
||||
<option value="sonstige">Sonstige</option>
|
||||
<option value="tageszeitung">Tageszeitung</option>
|
||||
<option value="wochenzeitung">Wochenzeitung</option>
|
||||
<option value="magazin">Magazin</option>
|
||||
<option value="tv_sender">TV-Sender</option>
|
||||
<option value="radio">Radio</option>
|
||||
<option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
|
||||
<option value="nachrichtenagentur">Nachrichtenagentur</option>
|
||||
<option value="online_only">Online-only</option>
|
||||
<option value="blog">Blog</option>
|
||||
<option value="telegram_kanal">Telegram-Kanal</option>
|
||||
<option value="telegram_bot">Telegram-Bot</option>
|
||||
<option value="podcast">Podcast</option>
|
||||
<option value="social_media">Social Media</option>
|
||||
<option value="imageboard">Imageboard</option>
|
||||
<option value="think_tank">Think Tank</option>
|
||||
<option value="ngo">NGO</option>
|
||||
<option value="behoerde">Behörde</option>
|
||||
<option value="staatsmedium">Staatsmedium</option>
|
||||
<option value="fachmedium">Fachmedium</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="sourceReliability">Glaubwürdigkeit</label>
|
||||
<select id="sourceReliability">
|
||||
<option value="">— unverändert —</option>
|
||||
<option value="na">Nicht eingeordnet</option>
|
||||
<option value="sehr_hoch">Sehr hoch</option>
|
||||
<option value="hoch">Hoch</option>
|
||||
<option value="gemischt">Gemischt</option>
|
||||
<option value="niedrig">Niedrig</option>
|
||||
<option value="sehr_niedrig">Sehr niedrig</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-top:8px;">
|
||||
<div class="form-group">
|
||||
<label for="sourceCountryCode">Land (ISO 3166)</label>
|
||||
<input type="text" id="sourceCountryCode" maxlength="2" placeholder="z.B. DE, RU, US" style="text-transform:uppercase;">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label class="checkbox-label" style="display:flex;align-items:center;gap:8px;margin-top:24px;">
|
||||
<input type="checkbox" id="sourceStateAffiliated">
|
||||
<span>Staatsnah / staatlich kontrolliert</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group" style="margin-top:8px;">
|
||||
<label>Geopolitische Nähe (Mehrfachauswahl)</label>
|
||||
<div id="sourceAlignmentChips" class="alignment-chips" onclick="handleAlignmentChipClick(event)">
|
||||
<button type="button" class="alignment-chip" data-alignment="prorussisch">prorussisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="proiranisch">proiranisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="prowestlich">prowestlich</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="proukrainisch">proukrainisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="prochinesisch">prochinesisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="projapanisch">projapanisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="proisraelisch">proisraelisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="propalaestinensisch">propalästinensisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="protuerkisch">protürkisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="panarabisch">panarabisch</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="neutral">neutral</button>
|
||||
<button type="button" class="alignment-chip" data-alignment="sonstige">sonstige</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="sourceError" class="error-msg" style="display:none"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
@@ -663,6 +859,59 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: PDF hochladen -->
|
||||
<div class="modal-overlay" id="modalPdfUpload">
|
||||
<div class="modal">
|
||||
<div class="modal-header">
|
||||
<h3>PDF als Quelle hochladen</h3>
|
||||
<button class="modal-close" onclick="closeModal('modalPdfUpload')">×</button>
|
||||
</div>
|
||||
<form id="pdfUploadForm" enctype="multipart/form-data">
|
||||
<div class="modal-body">
|
||||
<p class="text-secondary" style="margin-top:0;">
|
||||
Die PDF wird gespeichert und vom Monitor automatisch verarbeitet:
|
||||
Text extrahieren (OCR-Fallback fuer gescannte Dokumente),
|
||||
Übersetzung nach Deutsch und Englisch.
|
||||
</p>
|
||||
<div class="form-group">
|
||||
<label for="pdfFile">PDF-Datei (max. 50 MB)</label>
|
||||
<input type="file" id="pdfFile" accept="application/pdf,.pdf" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="pdfName">Anzeige-Name (optional)</label>
|
||||
<input type="text" id="pdfName" maxlength="200" placeholder="leer = Dateiname">
|
||||
</div>
|
||||
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
|
||||
<div class="form-group">
|
||||
<label for="pdfCategory">Kategorie</label>
|
||||
<select id="pdfCategory">
|
||||
<option value="sonstige" selected>Sonstige</option>
|
||||
<option value="behoerde">Behörde</option>
|
||||
<option value="think-tank">Think-Tank</option>
|
||||
<option value="fachmedien">Fachmedien</option>
|
||||
<option value="international">International</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="pdfLanguage">Sprache (optional)</label>
|
||||
<input type="text" id="pdfLanguage" list="languageSuggestions" placeholder="z.B. Deutsch, Englisch">
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="pdfNotes">Notizen</label>
|
||||
<input type="text" id="pdfNotes" placeholder="Optional">
|
||||
</div>
|
||||
<div id="pdfUploadError" class="error-msg" style="display:none"></div>
|
||||
<div id="pdfUploadProgress" class="text-secondary" style="display:none;margin-top:8px;">Lädt hoch …</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" onclick="closeModal('modalPdfUpload')">Abbrechen</button>
|
||||
<button type="submit" class="btn btn-primary" id="pdfUploadSubmitBtn">Hochladen</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: Discover Sources -->
|
||||
<div class="modal-overlay" id="modalDiscover">
|
||||
<div class="modal" style="max-width:600px;">
|
||||
@@ -721,9 +970,78 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/app.js?v=20260509j"></script>
|
||||
<script src="/static/js/sources.js?v=20260509d"></script>
|
||||
<script src="/static/js/source-health.js?v=20260509k"></script>
|
||||
<!-- Modal: X-Recherche-Konto hinzufügen -->
|
||||
<div class="modal-overlay" id="modalXScraperAdd">
|
||||
<div class="modal">
|
||||
<div class="modal-header">
|
||||
<h3>X-Recherche-Konto hinzufügen</h3>
|
||||
<button class="modal-close" onclick="closeModal('modalXScraperAdd')">×</button>
|
||||
</div>
|
||||
<form id="xScraperAddForm">
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label for="xsUsername">X-Benutzername</label>
|
||||
<input type="text" id="xsUsername" required placeholder="Login-Handle des Kontos, ohne @">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsPassword">X-Passwort</label>
|
||||
<input type="password" id="xsPassword" placeholder="optional">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsEmail">E-Mail</label>
|
||||
<input type="text" id="xsEmail" placeholder="optional, z.B. konto@protonmail.com">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsEmailPassword">E-Mail-Passwort</label>
|
||||
<input type="password" id="xsEmailPassword" placeholder="optional">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsCookies">Cookies</label>
|
||||
<textarea id="xsCookies" rows="3" required placeholder="auth_token=...; ct0=..."></textarea>
|
||||
<small class="text-secondary">Aus dem eingeloggten X-Browser exportiert, mindestens auth_token und ct0.</small>
|
||||
</div>
|
||||
<div id="xScraperAddError" class="error-msg" style="display:none"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" onclick="closeModal('modalXScraperAdd')">Abbrechen</button>
|
||||
<button type="submit" class="btn btn-primary">Konto anlegen</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: X-Recherche-Konto Cookies erneuern -->
|
||||
<div class="modal-overlay" id="modalXScraperCookies">
|
||||
<div class="modal">
|
||||
<div class="modal-header">
|
||||
<h3>Cookies erneuern</h3>
|
||||
<button class="modal-close" onclick="closeModal('modalXScraperCookies')">×</button>
|
||||
</div>
|
||||
<form id="xScraperCookiesForm">
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label for="xsCookiesUsername">Konto</label>
|
||||
<input type="text" id="xsCookiesUsername" readonly>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsCookiesValue">Neue Cookies</label>
|
||||
<textarea id="xsCookiesValue" rows="3" required placeholder="auth_token=...; ct0=..."></textarea>
|
||||
<small class="text-secondary">Frisch aus dem eingeloggten X-Browser exportieren.</small>
|
||||
</div>
|
||||
<div id="xScraperCookiesError" class="error-msg" style="display:none"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" onclick="closeModal('modalXScraperCookies')">Abbrechen</button>
|
||||
<button type="submit" class="btn btn-primary">Cookies setzen</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/app.js?v=20260522a"></script>
|
||||
<script src="/static/js/sources.js?v=20260522x2"></script>
|
||||
<script src="/static/js/x-scraper.js?v=20260522a"></script>
|
||||
<script src="/static/js/source-health.js?v=20260509l"></script>
|
||||
<script src="/static/js/audit.js?v=20260509d"></script>
|
||||
<div id="toastContainer" class="toast-container" aria-live="polite" aria-atomic="true"></div>
|
||||
</body>
|
||||
|
||||
@@ -26,6 +26,23 @@ const API = {
|
||||
post(path, body) { return this.request(path, { method: "POST", body: JSON.stringify(body) }); },
|
||||
put(path, body) { return this.request(path, { method: "PUT", body: body ? JSON.stringify(body) : undefined }); },
|
||||
del(path) { return this.request(path, { method: "DELETE" }); },
|
||||
|
||||
async upload(path, formData) {
|
||||
const headers = {};
|
||||
if (this.token) headers["Authorization"] = `Bearer ${this.token}`;
|
||||
const res = await fetch(path, { method: "POST", headers, body: formData });
|
||||
if (res.status === 401) {
|
||||
localStorage.removeItem("token");
|
||||
localStorage.removeItem("username");
|
||||
window.location.href = "/";
|
||||
return;
|
||||
}
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}));
|
||||
throw new Error(data.detail || `Fehler ${res.status}`);
|
||||
}
|
||||
return res.json();
|
||||
},
|
||||
};
|
||||
|
||||
// --- State ---
|
||||
@@ -42,8 +59,10 @@ document.addEventListener("DOMContentLoaded", () => {
|
||||
setupNavTabs();
|
||||
setupOrgDetailTabs();
|
||||
setupForms();
|
||||
setupTranslation();
|
||||
loadDashboard();
|
||||
loadDashboardTokenStats();
|
||||
loadTranslationStatus();
|
||||
loadOrgs();
|
||||
});
|
||||
|
||||
@@ -63,6 +82,7 @@ function setupNavTabs() {
|
||||
document.querySelectorAll(".app-content > .section").forEach(s => s.classList.remove("active"));
|
||||
document.getElementById(`sec-${section}`).classList.add("active");
|
||||
|
||||
if (section === "dashboard") loadTranslationStatus();
|
||||
if (section === "licenses") loadExpiringLicenses();
|
||||
if (section === "audit" && typeof loadAudit === "function") loadAudit();
|
||||
});
|
||||
@@ -213,6 +233,8 @@ async function openOrg(orgId) {
|
||||
|
||||
document.getElementById("editOrgName").value = org.name;
|
||||
document.getElementById("editOrgActive").value = org.is_active ? "true" : "false";
|
||||
const langEl = document.getElementById("editOrgLanguage");
|
||||
if (langEl) langEl.value = org.output_language || "de";
|
||||
|
||||
loadOrgUsers(orgId);
|
||||
loadOrgLicenses(orgId);
|
||||
@@ -424,6 +446,7 @@ function setupForms() {
|
||||
await API.post("/api/orgs", {
|
||||
name: document.getElementById("newOrgName").value,
|
||||
slug: document.getElementById("newOrgSlug").value,
|
||||
output_language: document.getElementById("newOrgLanguage").value || "de",
|
||||
});
|
||||
closeModal("modalNewOrg");
|
||||
document.getElementById("newOrgForm").reset();
|
||||
@@ -518,6 +541,7 @@ function setupForms() {
|
||||
await API.put(`/api/orgs/${currentOrgId}`, {
|
||||
name: document.getElementById("editOrgName").value,
|
||||
is_active: document.getElementById("editOrgActive").value === "true",
|
||||
output_language: document.getElementById("editOrgLanguage").value || "de",
|
||||
});
|
||||
openOrg(currentOrgId);
|
||||
loadOrgs();
|
||||
@@ -631,6 +655,151 @@ function formatDate(iso) {
|
||||
}
|
||||
|
||||
|
||||
// ===== Artikel-Übersetzung =====
|
||||
let translationPollTimer = null;
|
||||
|
||||
function setupTranslation() {
|
||||
const runBtn = document.getElementById("translationRunBtn");
|
||||
const cancelBtn = document.getElementById("translationCancelBtn");
|
||||
if (runBtn) runBtn.addEventListener("click", startTranslation);
|
||||
if (cancelBtn) cancelBtn.addEventListener("click", cancelTranslation);
|
||||
}
|
||||
|
||||
function formatDuration(seconds) {
|
||||
seconds = Math.max(0, Math.round(seconds || 0));
|
||||
if (seconds < 60) return seconds + " Sek.";
|
||||
const min = Math.round(seconds / 60);
|
||||
if (min < 60) return min + " Min.";
|
||||
const h = Math.floor(min / 60), m = min % 60;
|
||||
return h + " Std. " + (m ? m + " Min." : "").trim();
|
||||
}
|
||||
|
||||
function renderTranslation(st) {
|
||||
const info = document.getElementById("translationInfo");
|
||||
const wrap = document.getElementById("translationProgressWrap");
|
||||
const bar = document.getElementById("translationProgressBar");
|
||||
const ptext = document.getElementById("translationProgressText");
|
||||
const runBtn = document.getElementById("translationRunBtn");
|
||||
const cancelBtn = document.getElementById("translationCancelBtn");
|
||||
if (!info || !runBtn) return;
|
||||
|
||||
if (st.running) {
|
||||
runBtn.style.display = "none";
|
||||
cancelBtn.style.display = "";
|
||||
wrap.style.display = "";
|
||||
const pct = st.total > 0 ? Math.round((st.done / st.total) * 100) : 0;
|
||||
bar.style.width = pct + "%";
|
||||
ptext.textContent = `${st.done} / ${st.total} verarbeitet, ${st.translated} übersetzt (${pct}%)`;
|
||||
info.textContent = "Übersetzung läuft…";
|
||||
return;
|
||||
}
|
||||
|
||||
runBtn.style.display = "";
|
||||
cancelBtn.style.display = "none";
|
||||
wrap.style.display = "none";
|
||||
|
||||
let resultLine = "";
|
||||
if (st.finished_at && (st.total > 0 || st.error)) {
|
||||
if (st.error) {
|
||||
resultLine = `Letzter Lauf mit Fehler beendet: ${st.error}. `;
|
||||
} else if (st.cancelled) {
|
||||
resultLine = `Letzter Lauf abgebrochen, ${st.translated} von ${st.total} Artikeln übersetzt. `;
|
||||
} else {
|
||||
resultLine = `Letzter Lauf abgeschlossen, ${st.translated} Artikel übersetzt. `;
|
||||
}
|
||||
}
|
||||
|
||||
if (st.pending > 0) {
|
||||
const est = st.estimate || {};
|
||||
info.textContent = resultLine +
|
||||
`${st.pending} Artikel ohne deutsche Übersetzung. ` +
|
||||
`Geschätzt: ${formatDuration(est.seconds)}, ca. $${est.cost_usd}.`;
|
||||
runBtn.disabled = false;
|
||||
} else {
|
||||
info.textContent = resultLine + "Alle Artikel sind übersetzt.";
|
||||
runBtn.disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
async function loadTranslationStatus() {
|
||||
try {
|
||||
const st = await API.get("/api/translation/status");
|
||||
renderTranslation(st);
|
||||
if (st.running && !translationPollTimer) {
|
||||
translationPollTimer = setInterval(pollTranslation, 3000);
|
||||
}
|
||||
} catch (e) {
|
||||
const info = document.getElementById("translationInfo");
|
||||
if (info) info.textContent = "Status nicht abrufbar: " + (e.message || e);
|
||||
}
|
||||
}
|
||||
|
||||
async function pollTranslation() {
|
||||
try {
|
||||
const st = await API.get("/api/translation/status");
|
||||
renderTranslation(st);
|
||||
if (!st.running) {
|
||||
clearInterval(translationPollTimer);
|
||||
translationPollTimer = null;
|
||||
if (st.error) {
|
||||
showToast("Übersetzung mit Fehler beendet", "error");
|
||||
} else if (st.cancelled) {
|
||||
showToast(`Übersetzung abgebrochen, ${st.translated} übersetzt`, "info");
|
||||
} else {
|
||||
showToast(`Übersetzung fertig: ${st.translated} Artikel`, "success");
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn("Translation-Poll fehlgeschlagen:", e);
|
||||
}
|
||||
}
|
||||
|
||||
async function startTranslation() {
|
||||
let st;
|
||||
try {
|
||||
st = await API.get("/api/translation/status");
|
||||
} catch (e) {
|
||||
showToast(e.message || "Status nicht abrufbar", "error");
|
||||
return;
|
||||
}
|
||||
if (st.running) { showToast("Es läuft bereits eine Übersetzung", "info"); return; }
|
||||
if (!st.pending) { showToast("Es gibt nichts zu übersetzen", "info"); return; }
|
||||
|
||||
const est = st.estimate || {};
|
||||
const ok = await showConfirm(
|
||||
"Übersetzung starten",
|
||||
`${st.pending} Artikel werden ins Deutsche übersetzt. ` +
|
||||
`Geschätzte Dauer: ${formatDuration(est.seconds)}, geschätzte Kosten: ca. $${est.cost_usd}. ` +
|
||||
`Der Lauf kann jederzeit abgebrochen werden.`
|
||||
);
|
||||
if (!ok) return;
|
||||
|
||||
try {
|
||||
const res = await API.post("/api/translation/run", {});
|
||||
if (res && res.status === "started") {
|
||||
showToast(`Übersetzung gestartet (${res.pending} Artikel)`, "success");
|
||||
await loadTranslationStatus();
|
||||
if (!translationPollTimer) {
|
||||
translationPollTimer = setInterval(pollTranslation, 3000);
|
||||
}
|
||||
} else {
|
||||
showToast("Es gibt nichts zu übersetzen", "info");
|
||||
loadTranslationStatus();
|
||||
}
|
||||
} catch (e) {
|
||||
showToast(e.message || "Start fehlgeschlagen", "error");
|
||||
}
|
||||
}
|
||||
|
||||
async function cancelTranslation() {
|
||||
try {
|
||||
await API.post("/api/translation/cancel", {});
|
||||
showToast("Übersetzung wird abgebrochen…", "info");
|
||||
} catch (e) {
|
||||
showToast(e.message || "Abbruch fehlgeschlagen", "error");
|
||||
}
|
||||
}
|
||||
|
||||
// ===== Token-Nutzung =====
|
||||
async function loadOrgTokenUsage(orgId) {
|
||||
try {
|
||||
|
||||
@@ -260,6 +260,23 @@ function renderHealthDashboard() {
|
||||
return `<span class="${cssClass}" title="${esc(detail)}">${total} ${label}</span> <span class="text-secondary" style="font-size:11px;">(${esc(detail)})</span>`;
|
||||
}
|
||||
|
||||
// Trend-Delta zum vorletzten Run (healthHistoryCache[1]). Index 0 ist
|
||||
// typischerweise der aktuelle Stand, Index 1 der davor archivierte Run.
|
||||
// Wenn weniger als 2 Runs in der History: kein Delta anzeigen.
|
||||
const prevRun = (healthHistoryCache && healthHistoryCache.length > 1) ? healthHistoryCache[1] : null;
|
||||
function deltaBadge(currentValue, prevValue, badIsUp) {
|
||||
if (prevValue == null) return "";
|
||||
const d = currentValue - prevValue;
|
||||
if (d === 0) return ` <span class="text-secondary" style="font-size:11px;" title="unverändert seit letztem Run">(±0)</span>`;
|
||||
const sign = d > 0 ? "+" : "";
|
||||
// badIsUp=true: Anstieg = schlecht (rot), Abnahme = gut (grün). Umgekehrt für OK.
|
||||
const cls = (badIsUp ? (d > 0) : (d < 0)) ? "text-danger" : "text-success";
|
||||
return ` <span class="${cls}" style="font-size:11px;" title="seit letztem Run">(${sign}${d})</span>`;
|
||||
}
|
||||
const dErr = prevRun ? deltaBadge(healthData.errors, prevRun.errors, true) : "";
|
||||
const dWarn = prevRun ? deltaBadge(healthData.warnings, prevRun.warnings, true) : "";
|
||||
const dOk = prevRun ? deltaBadge(okCount, prevRun.ok, false) : "";
|
||||
|
||||
healthHtml = `
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
@@ -267,9 +284,9 @@ function renderHealthDashboard() {
|
||||
<span class="text-secondary" style="font-size:13px;">
|
||||
Letzter Check: ${healthData.last_check ? formatDateTime(healthData.last_check) : "Noch nie"}
|
||||
|
|
||||
${breakdownLine("error", "text-danger") || `<span class="text-danger">0 Fehler</span>`}
|
||||
${breakdownLine("warning", "text-warning") || `<span class="text-warning">0 Warnungen</span>`}
|
||||
<span class="text-success">${okCount} OK</span>
|
||||
${breakdownLine("error", "text-danger") || `<span class="text-danger">0 Fehler</span>`}${dErr}
|
||||
${breakdownLine("warning", "text-warning") || `<span class="text-warning">0 Warnungen</span>`}${dWarn}
|
||||
<span class="text-success">${okCount} OK</span>${dOk}
|
||||
</span>
|
||||
</div>
|
||||
<div class="action-bar" style="border-bottom:1px solid var(--border, rgba(255,255,255,0.08));">
|
||||
@@ -319,7 +336,10 @@ function renderHealthDashboard() {
|
||||
<td class="text-secondary">${c.tenant_id == null ? '<span style="color:#94a3b8;">global</span>' : esc(c.org_name || ("Org " + c.tenant_id))}</td>
|
||||
<td><span class="badge badge-health-${c.status}">${c.status === "error" ? "Fehler" : (c.status === "warning" ? "Warnung" : "OK")}</span></td>
|
||||
<td class="text-secondary" style="max-width:300px;" title="${esc(c.message || "")}">${esc(c.message || "")}</td>
|
||||
<td>${c.status === "error" && c.check_type === "reachability" ? `<button class="btn btn-secondary btn-small" data-source-id="${c.source_id}" data-source-name="${esc(c.name)}" onclick="searchFix(this)" title="Lösung suchen">${LUCIDE_ICONS.search}</button>` : ""}</td>
|
||||
<td>${(
|
||||
(c.status === "error" && c.check_type === "reachability") ||
|
||||
(c.status === "warning" && c.check_type === "feed_validity")
|
||||
) ? `<button class="btn btn-secondary btn-small" data-source-id="${c.source_id}" data-source-name="${esc(c.name)}" onclick="searchFix(this)" title="Lösung suchen">${LUCIDE_ICONS.search}</button>` : ""}</td>
|
||||
</tr>`;
|
||||
}
|
||||
)
|
||||
|
||||
@@ -37,6 +37,8 @@ function setupSourceSubTabs() {
|
||||
if (subtab === "global-sources") loadGlobalSources();
|
||||
else if (subtab === "tenant-sources") loadTenantSources();
|
||||
else if (subtab === "source-health") loadHealthData();
|
||||
else if (subtab === "classification-review") loadClassificationQueue();
|
||||
else if (subtab === "x-scraper") loadXScraperAccounts();
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -280,6 +282,7 @@ function openNewGlobalSource() {
|
||||
editingSourceId = null;
|
||||
document.getElementById("sourceModalTitle").textContent = "Neue Grundquelle";
|
||||
document.getElementById("sourceForm").reset();
|
||||
setAlignmentChips([]);
|
||||
openModal("modalSource");
|
||||
}
|
||||
|
||||
@@ -298,11 +301,19 @@ function editGlobalSource(id) {
|
||||
document.getElementById("sourceLanguage").value = s.language || "";
|
||||
document.getElementById("sourceBias").value = s.bias || "";
|
||||
document.getElementById("sourceFetchStrategy").value = s.fetch_strategy || "default";
|
||||
document.getElementById("sourcePolitical").value = s.political_orientation || "";
|
||||
document.getElementById("sourceMediaType").value = s.media_type || "";
|
||||
document.getElementById("sourceReliability").value = s.reliability || "";
|
||||
document.getElementById("sourceCountryCode").value = s.country_code || "";
|
||||
document.getElementById("sourceStateAffiliated").checked = !!s.state_affiliated;
|
||||
setAlignmentChips(s.alignments || []);
|
||||
openModal("modalSource");
|
||||
}
|
||||
|
||||
function setupSourceForms() {
|
||||
document.getElementById("newGlobalSourceBtn").addEventListener("click", openNewGlobalSource);
|
||||
document.getElementById("newPdfSourceBtn")?.addEventListener("click", openPdfUploadModal);
|
||||
setupPdfUploadForm();
|
||||
document.getElementById("discoverSourceBtn").addEventListener("click", () => {
|
||||
document.getElementById("discoverUrl").value = "";
|
||||
document.getElementById("discoverStatus").style.display = "none";
|
||||
@@ -328,6 +339,19 @@ function setupSourceForms() {
|
||||
fetch_strategy: document.getElementById("sourceFetchStrategy").value || "default",
|
||||
};
|
||||
|
||||
const pol = document.getElementById("sourcePolitical")?.value;
|
||||
if (pol) body.political_orientation = pol;
|
||||
const mt = document.getElementById("sourceMediaType")?.value;
|
||||
if (mt) body.media_type = mt;
|
||||
const rel = document.getElementById("sourceReliability")?.value;
|
||||
if (rel) body.reliability = rel;
|
||||
const cc = (document.getElementById("sourceCountryCode")?.value || "").trim().toUpperCase();
|
||||
if (cc) body.country_code = cc;
|
||||
if (editingSourceId) {
|
||||
body.state_affiliated = !!document.getElementById("sourceStateAffiliated")?.checked;
|
||||
body.alignments = getAlignmentChips();
|
||||
}
|
||||
|
||||
try {
|
||||
if (editingSourceId) {
|
||||
await API.put("/api/sources/global/" + editingSourceId, body);
|
||||
@@ -641,6 +665,213 @@ async function addDiscoveredFeeds() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Klassifikations-Review ===
|
||||
|
||||
const POLITICAL_LABELS = {
|
||||
links_extrem: { short: "L+", full: "Links (extrem)" },
|
||||
links: { short: "L", full: "Links" },
|
||||
mitte_links: { short: "ML", full: "Mitte-Links" },
|
||||
liberal: { short: "LIB", full: "Liberal" },
|
||||
mitte: { short: "M", full: "Mitte" },
|
||||
konservativ: { short: "KON", full: "Konservativ" },
|
||||
mitte_rechts: { short: "MR", full: "Mitte-Rechts" },
|
||||
rechts: { short: "R", full: "Rechts" },
|
||||
rechts_extrem: { short: "R+", full: "Rechts (extrem)" },
|
||||
na: { short: "?", full: "Nicht eingeordnet" },
|
||||
};
|
||||
const RELIABILITY_LABELS = {
|
||||
sehr_hoch: "Sehr hoch", hoch: "Hoch", gemischt: "Gemischt",
|
||||
niedrig: "Niedrig", sehr_niedrig: "Sehr niedrig", na: "Nicht eingeordnet",
|
||||
};
|
||||
const MEDIA_TYPE_LABELS = {
|
||||
tageszeitung: "Tageszeitung", wochenzeitung: "Wochenzeitung", magazin: "Magazin",
|
||||
tv_sender: "TV-Sender", radio: "Radio", oeffentlich_rechtlich: "Öffentlich-Rechtlich",
|
||||
nachrichtenagentur: "Nachrichtenagentur", online_only: "Online-only", blog: "Blog",
|
||||
telegram_kanal: "Telegram-Kanal", telegram_bot: "Telegram-Bot", podcast: "Podcast",
|
||||
social_media: "Social Media", imageboard: "Imageboard", think_tank: "Think Tank",
|
||||
ngo: "NGO", behoerde: "Behörde", staatsmedium: "Staatsmedium",
|
||||
fachmedium: "Fachmedium", sonstige: "Sonstige",
|
||||
};
|
||||
const ALIGNMENT_LABELS = {
|
||||
prorussisch: "prorussisch", proiranisch: "proiranisch", prowestlich: "prowestlich",
|
||||
proukrainisch: "proukrainisch", prochinesisch: "prochinesisch", projapanisch: "projapanisch",
|
||||
proisraelisch: "proisraelisch", propalaestinensisch: "propalästinensisch",
|
||||
protuerkisch: "protürkisch", panarabisch: "panarabisch", neutral: "neutral", sonstige: "sonstige",
|
||||
};
|
||||
|
||||
function setAlignmentChips(active) {
|
||||
const chips = document.querySelectorAll("#sourceAlignmentChips .alignment-chip");
|
||||
const set = new Set((active || []).map((a) => (a || "").toLowerCase()));
|
||||
chips.forEach((chip) => {
|
||||
if (set.has(chip.dataset.alignment)) chip.classList.add("active");
|
||||
else chip.classList.remove("active");
|
||||
});
|
||||
}
|
||||
|
||||
function getAlignmentChips() {
|
||||
return Array.from(document.querySelectorAll("#sourceAlignmentChips .alignment-chip.active"))
|
||||
.map((chip) => chip.dataset.alignment);
|
||||
}
|
||||
|
||||
function handleAlignmentChipClick(e) {
|
||||
const chip = e.target.closest(".alignment-chip");
|
||||
if (!chip) return;
|
||||
e.preventDefault();
|
||||
chip.classList.toggle("active");
|
||||
}
|
||||
|
||||
async function refreshClassificationStats() {
|
||||
try {
|
||||
const stats = await API.get("/api/sources/classification/stats");
|
||||
const badge = document.getElementById("classificationPendingBadge");
|
||||
if (badge) badge.textContent = String(stats.pending_review || 0);
|
||||
} catch (_) { /* still ok */ }
|
||||
}
|
||||
|
||||
async function loadClassificationQueue() {
|
||||
const list = document.getElementById("classificationReviewList");
|
||||
if (!list) return;
|
||||
const minConf = parseFloat(document.getElementById("reviewMinConfidence")?.value || "0");
|
||||
list.innerHTML = '<div class="text-muted" style="padding:24px;text-align:center;">Lade…</div>';
|
||||
try {
|
||||
const items = await API.get(`/api/sources/classification/queue?limit=200&min_confidence=${minConf}`);
|
||||
const countEl = document.getElementById("reviewPendingCount");
|
||||
if (countEl) countEl.textContent = String(items.length);
|
||||
refreshClassificationStats();
|
||||
if (items.length === 0) {
|
||||
list.innerHTML = '<div class="text-muted" style="padding:24px;text-align:center;">Keine ausstehenden Vorschläge.</div>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = items.map((it) => renderClassificationQueueItem(it)).join("");
|
||||
} catch (err) {
|
||||
list.innerHTML = `<div class="text-danger" style="padding:24px;text-align:center;">Fehler: ${esc(err.message)}</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
function renderClassificationQueueItem(item) {
|
||||
const cur = item.current || {};
|
||||
const prop = item.proposed || {};
|
||||
const conf = prop.confidence || 0;
|
||||
const confPct = Math.round(conf * 100);
|
||||
const confClass = conf >= 0.85 ? "high" : conf >= 0.7 ? "medium" : "low";
|
||||
|
||||
const polFmt = (v) => (v && v !== "na" ? POLITICAL_LABELS[v]?.full || v : "–");
|
||||
const mtFmt = (v) => (v ? MEDIA_TYPE_LABELS[v] || v : "–");
|
||||
const relFmt = (v) => (v && v !== "na" ? RELIABILITY_LABELS[v] || v : "–");
|
||||
const stateFmt = (v) => (v ? "ja" : "nein");
|
||||
const ccFmt = (v) => v || "–";
|
||||
const alignFmt = (v) =>
|
||||
Array.isArray(v) && v.length > 0 ? v.map((a) => ALIGNMENT_LABELS[a] || a).join(", ") : "–";
|
||||
|
||||
const row = (label, c, p, fmt) => {
|
||||
const cs = fmt(c);
|
||||
const ps = fmt(p);
|
||||
const changed = cs !== ps;
|
||||
return `<div class="review-diff-row${changed ? " changed" : ""}">
|
||||
<span class="review-diff-label">${esc(label)}</span>
|
||||
<span class="review-diff-current">${esc(cs)}</span>
|
||||
<span class="review-diff-arrow">→</span>
|
||||
<span class="review-diff-proposed">${esc(ps)}</span>
|
||||
</div>`;
|
||||
};
|
||||
|
||||
const reasoning = prop.reasoning ? esc(prop.reasoning) : "";
|
||||
|
||||
return `<div class="review-card" data-source-id="${item.id}">
|
||||
<div class="review-card-header">
|
||||
<div class="review-card-title">
|
||||
<span class="review-card-name">${esc(item.name)}</span>
|
||||
${item.is_global ? '<span class="review-global-badge">Grundquelle</span>' : ""}
|
||||
<span class="review-card-domain">${esc(item.domain || "")}</span>
|
||||
</div>
|
||||
<div class="review-card-confidence conf-${confClass}" title="LLM-Konfidenz">
|
||||
<span class="conf-value">${confPct}%</span>
|
||||
<span class="conf-label">Konfidenz</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="review-card-diff">
|
||||
${row("Politik", cur.political_orientation, prop.political_orientation, polFmt)}
|
||||
${row("Medientyp", cur.media_type, prop.media_type, mtFmt)}
|
||||
${row("Glaubwürdigkeit", cur.reliability, prop.reliability, relFmt)}
|
||||
${row("Staatsnah", cur.state_affiliated, prop.state_affiliated, stateFmt)}
|
||||
${row("Land", cur.country_code, prop.country_code, ccFmt)}
|
||||
${row("Geopol. Nähe", cur.alignments, prop.alignments, alignFmt)}
|
||||
</div>
|
||||
${reasoning ? `<div class="review-card-reasoning"><strong>Begründung:</strong> ${reasoning}</div>` : ""}
|
||||
<div class="review-card-actions">
|
||||
<button class="btn btn-small btn-primary" onclick="approveClassification(${item.id})">Übernehmen</button>
|
||||
<button class="btn btn-small btn-secondary" onclick="rejectClassification(${item.id})">Verwerfen</button>
|
||||
<button class="btn btn-small btn-secondary" data-reclassify-id="${item.id}" onclick="reclassifySource(${item.id})">Neu klassifizieren</button>
|
||||
</div>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
async function approveClassification(id) {
|
||||
try {
|
||||
await API.post(`/api/sources/${id}/classification/approve`, {});
|
||||
showToast("Klassifikation übernommen.", "success");
|
||||
loadClassificationQueue();
|
||||
} catch (err) {
|
||||
showToast("Approve fehlgeschlagen: " + err.message, "error");
|
||||
}
|
||||
}
|
||||
|
||||
async function rejectClassification(id) {
|
||||
try {
|
||||
await API.post(`/api/sources/${id}/classification/reject`, {});
|
||||
showToast("Vorschlag verworfen.", "success");
|
||||
loadClassificationQueue();
|
||||
} catch (err) {
|
||||
showToast("Reject fehlgeschlagen: " + err.message, "error");
|
||||
}
|
||||
}
|
||||
|
||||
async function reclassifySource(id) {
|
||||
const btn = document.querySelector(`[data-reclassify-id="${id}"]`);
|
||||
if (btn) { btn.disabled = true; btn.textContent = "..."; }
|
||||
try {
|
||||
await API.post(`/api/sources/${id}/classification/reclassify`, {});
|
||||
showToast("Neu klassifiziert.", "success");
|
||||
loadClassificationQueue();
|
||||
} catch (err) {
|
||||
showToast("Reclassify fehlgeschlagen: " + err.message, "error");
|
||||
} finally {
|
||||
if (btn) { btn.disabled = false; btn.textContent = "Neu klassifizieren"; }
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerBulkClassify() {
|
||||
if (!confirm("Bulk-Klassifikation aller noch nicht klassifizierten Quellen starten? Läuft im Hintergrund (~3-5 Sek pro Quelle, ~0.02 USD pro Quelle).")) return;
|
||||
try {
|
||||
const r = await API.post("/api/sources/classification/bulk-classify?limit=500&only_unclassified=true", {});
|
||||
showToast(`Bulk-Klassifikation gestartet (limit=${r.limit}). In ~10 min neu laden.`, "info");
|
||||
} catch (err) {
|
||||
showToast("Start fehlgeschlagen: " + err.message, "error");
|
||||
}
|
||||
}
|
||||
|
||||
async function bulkApproveHighConfidence() {
|
||||
if (!confirm("Alle Vorschläge mit Konfidenz ≥ 0.85 genehmigen?")) return;
|
||||
try {
|
||||
const r = await API.post("/api/sources/classification/bulk-approve?min_confidence=0.85", {});
|
||||
showToast(`${r.approved} Vorschläge übernommen.`, "success");
|
||||
loadClassificationQueue();
|
||||
} catch (err) {
|
||||
showToast("Bulk-Approve fehlgeschlagen: " + err.message, "error");
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerExternalReputationSync() {
|
||||
if (!confirm("IFCN- und EUvsDisinfo-Datenbanken jetzt syncen? Läuft im Hintergrund (~30 Sek).")) return;
|
||||
try {
|
||||
await API.post("/api/sources/external-reputation/sync", {});
|
||||
showToast("Externer Sync gestartet. Quellenliste in ~30 Sek neu laden.", "info");
|
||||
} catch (err) {
|
||||
showToast("Sync fehlgeschlagen: " + err.message, "error");
|
||||
}
|
||||
}
|
||||
|
||||
function toggleSourceInfo(id) {
|
||||
const row = document.getElementById("notes-" + id);
|
||||
if (!row) return;
|
||||
@@ -652,3 +883,68 @@ function toggleSourceInfo(id) {
|
||||
if (btn) btn.classList.toggle("active", !isVisible);
|
||||
}
|
||||
}
|
||||
|
||||
// --- PDF-Quellen-Upload ---
|
||||
function openPdfUploadModal() {
|
||||
const form = document.getElementById("pdfUploadForm");
|
||||
if (form) form.reset();
|
||||
const err = document.getElementById("pdfUploadError");
|
||||
if (err) { err.style.display = "none"; err.textContent = ""; }
|
||||
const prog = document.getElementById("pdfUploadProgress");
|
||||
if (prog) prog.style.display = "none";
|
||||
openModal("modalPdfUpload");
|
||||
}
|
||||
|
||||
function setupPdfUploadForm() {
|
||||
const form = document.getElementById("pdfUploadForm");
|
||||
if (!form || form.dataset.bound === "1") return;
|
||||
form.dataset.bound = "1";
|
||||
|
||||
form.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const errEl = document.getElementById("pdfUploadError");
|
||||
const progEl = document.getElementById("pdfUploadProgress");
|
||||
const submitBtn = document.getElementById("pdfUploadSubmitBtn");
|
||||
errEl.style.display = "none";
|
||||
|
||||
const fileInput = document.getElementById("pdfFile");
|
||||
const f = fileInput?.files?.[0];
|
||||
if (!f) {
|
||||
errEl.textContent = "Bitte eine PDF-Datei auswaehlen.";
|
||||
errEl.style.display = "block";
|
||||
return;
|
||||
}
|
||||
if (f.size > 50 * 1024 * 1024) {
|
||||
errEl.textContent = "Datei ueberschreitet 50 MB.";
|
||||
errEl.style.display = "block";
|
||||
return;
|
||||
}
|
||||
|
||||
const fd = new FormData();
|
||||
fd.append("file", f);
|
||||
const nm = document.getElementById("pdfName").value.trim();
|
||||
if (nm) fd.append("name", nm);
|
||||
fd.append("category", document.getElementById("pdfCategory").value || "sonstige");
|
||||
const lng = document.getElementById("pdfLanguage").value.trim();
|
||||
if (lng) fd.append("language", lng);
|
||||
const nt = document.getElementById("pdfNotes").value.trim();
|
||||
if (nt) fd.append("notes", nt);
|
||||
|
||||
submitBtn.disabled = true;
|
||||
progEl.style.display = "block";
|
||||
try {
|
||||
await API.upload("/api/sources/global/upload-pdf", fd);
|
||||
closeModal("modalPdfUpload");
|
||||
if (typeof showToast === "function") {
|
||||
showToast("PDF hochgeladen -- Verarbeitung laeuft im Hintergrund", "success");
|
||||
}
|
||||
loadGlobalSources();
|
||||
} catch (err) {
|
||||
errEl.textContent = err.message || "Upload fehlgeschlagen";
|
||||
errEl.style.display = "block";
|
||||
} finally {
|
||||
submitBtn.disabled = false;
|
||||
progEl.style.display = "none";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
169
src/static/js/x-scraper.js
Normale Datei
169
src/static/js/x-scraper.js
Normale Datei
@@ -0,0 +1,169 @@
|
||||
/* X-Recherche-Konten: Verwaltung des twscrape-Account-Pools */
|
||||
"use strict";
|
||||
|
||||
let xScraperCache = [];
|
||||
|
||||
async function loadXScraperAccounts() {
|
||||
setupXScraperForms();
|
||||
const tbody = document.getElementById("xScraperTable");
|
||||
tbody.innerHTML = '<tr><td colspan="6" class="text-muted">Lade...</td></tr>';
|
||||
try {
|
||||
xScraperCache = await API.get("/api/x-scraper/accounts");
|
||||
renderXScraperAccounts(xScraperCache || []);
|
||||
} catch (err) {
|
||||
tbody.innerHTML = '<tr><td colspan="6" class="text-muted">Fehler: ' + esc(err.message || "") + '</td></tr>';
|
||||
}
|
||||
}
|
||||
|
||||
function renderXScraperAccounts(list) {
|
||||
const tbody = document.getElementById("xScraperTable");
|
||||
const cnt = document.getElementById("xScraperCount");
|
||||
if (cnt) cnt.textContent = list.length + (list.length === 1 ? " Konto" : " Konten");
|
||||
if (!list.length) {
|
||||
tbody.innerHTML = '<tr><td colspan="6" class="text-muted">Keine X-Recherche-Konten. Mit „+ Konto hinzufügen" anlegen.</td></tr>';
|
||||
return;
|
||||
}
|
||||
tbody.innerHTML = list.map((a) => {
|
||||
let status;
|
||||
if (!a.active) status = '<span class="text-muted">Inaktiv</span>';
|
||||
else if (a.locked) status = '<span style="color:var(--warning,#b8860b);">Gesperrt</span>';
|
||||
else status = '<span style="color:var(--success,#2e7d32);">Aktiv</span>';
|
||||
const lastUsed = a.last_used && typeof formatDateTime === "function"
|
||||
? formatDateTime(a.last_used)
|
||||
: (a.last_used || "—");
|
||||
const errInfo = a.error_msg
|
||||
? ' <span class="info-icon" title="' + esc(a.error_msg) + '">!</span>'
|
||||
: "";
|
||||
const u = esc(a.username);
|
||||
const toggleLabel = a.active ? "Deaktivieren" : "Aktivieren";
|
||||
return '<tr>'
|
||||
+ '<td><strong>' + u + '</strong>' + errInfo + '</td>'
|
||||
+ '<td>' + esc(a.email || "—") + '</td>'
|
||||
+ '<td>' + status + '</td>'
|
||||
+ '<td>' + (a.total_requests || 0) + '</td>'
|
||||
+ '<td>' + esc(lastUsed) + '</td>'
|
||||
+ '<td>'
|
||||
+ '<button class="btn btn-secondary btn-small" onclick="openXScraperCookiesModal(\'' + u + '\')">Cookies erneuern</button> '
|
||||
+ '<button class="btn btn-secondary btn-small" onclick="toggleXScraperActive(\'' + u + '\',' + (!a.active) + ')">' + toggleLabel + '</button> '
|
||||
+ '<button class="btn btn-danger btn-small" onclick="confirmDeleteXScraper(\'' + u + '\')">Entfernen</button>'
|
||||
+ '</td>'
|
||||
+ '</tr>';
|
||||
}).join("");
|
||||
}
|
||||
|
||||
function openXScraperAddModal() {
|
||||
document.getElementById("xScraperAddError").style.display = "none";
|
||||
["xsUsername", "xsPassword", "xsEmail", "xsEmailPassword", "xsCookies"].forEach((id) => {
|
||||
const el = document.getElementById(id);
|
||||
if (el) el.value = "";
|
||||
});
|
||||
openModal("modalXScraperAdd");
|
||||
}
|
||||
|
||||
function openXScraperCookiesModal(username) {
|
||||
document.getElementById("xScraperCookiesError").style.display = "none";
|
||||
document.getElementById("xsCookiesUsername").value = username;
|
||||
document.getElementById("xsCookiesValue").value = "";
|
||||
openModal("modalXScraperCookies");
|
||||
}
|
||||
|
||||
async function toggleXScraperActive(username, active) {
|
||||
try {
|
||||
await API.post("/api/x-scraper/accounts/" + encodeURIComponent(username) + "/active", { active: active });
|
||||
showToast("Status geändert.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
showToast(err.message || "Status konnte nicht geändert werden", "error");
|
||||
}
|
||||
}
|
||||
|
||||
function confirmDeleteXScraper(username) {
|
||||
showConfirm(
|
||||
"Konto entfernen",
|
||||
'Soll das X-Recherche-Konto "' + username + '" entfernt werden? Der Monitor nutzt es dann nicht mehr zum Scrapen.',
|
||||
async () => {
|
||||
try {
|
||||
await API.del("/api/x-scraper/accounts/" + encodeURIComponent(username));
|
||||
showToast("Konto entfernt.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
showToast(err.message || "Konto konnte nicht entfernt werden", "error");
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function resetXScraperLocks() {
|
||||
showConfirm(
|
||||
"Sperren zurücksetzen",
|
||||
"Alle temporären Sperren der X-Recherche-Konten zurücksetzen?",
|
||||
async () => {
|
||||
try {
|
||||
await API.post("/api/x-scraper/reset-locks", {});
|
||||
showToast("Sperren zurückgesetzt.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
showToast(err.message || "Sperren konnten nicht zurückgesetzt werden", "error");
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function setupXScraperForms() {
|
||||
const addForm = document.getElementById("xScraperAddForm");
|
||||
if (addForm && !addForm.dataset.wired) {
|
||||
addForm.dataset.wired = "1";
|
||||
addForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const errEl = document.getElementById("xScraperAddError");
|
||||
errEl.style.display = "none";
|
||||
const body = {
|
||||
username: document.getElementById("xsUsername").value.trim().replace(/^@/, ""),
|
||||
password: document.getElementById("xsPassword").value,
|
||||
email: document.getElementById("xsEmail").value.trim(),
|
||||
email_password: document.getElementById("xsEmailPassword").value,
|
||||
cookies: document.getElementById("xsCookies").value.trim(),
|
||||
};
|
||||
if (!body.username || !body.cookies) {
|
||||
errEl.textContent = "Benutzername und Cookies sind erforderlich.";
|
||||
errEl.style.display = "block";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await API.post("/api/x-scraper/accounts", body);
|
||||
closeModal("modalXScraperAdd");
|
||||
showToast("Konto angelegt.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
errEl.textContent = err.message || "Anlegen fehlgeschlagen";
|
||||
errEl.style.display = "block";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const ckForm = document.getElementById("xScraperCookiesForm");
|
||||
if (ckForm && !ckForm.dataset.wired) {
|
||||
ckForm.dataset.wired = "1";
|
||||
ckForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const errEl = document.getElementById("xScraperCookiesError");
|
||||
errEl.style.display = "none";
|
||||
const username = document.getElementById("xsCookiesUsername").value;
|
||||
const cookies = document.getElementById("xsCookiesValue").value.trim();
|
||||
if (!cookies) {
|
||||
errEl.textContent = "Cookies sind erforderlich.";
|
||||
errEl.style.display = "block";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await API.post("/api/x-scraper/accounts/" + encodeURIComponent(username) + "/cookies", { cookies: cookies });
|
||||
closeModal("modalXScraperCookies");
|
||||
showToast("Cookies erneuert.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
errEl.textContent = err.message || "Cookies konnten nicht erneuert werden";
|
||||
errEl.style.display = "block";
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
422
src/translation_agent.py
Normale Datei
422
src/translation_agent.py
Normale Datei
@@ -0,0 +1,422 @@
|
||||
"""Translator-Agent: übersetzt fremdsprachige Artikel ins Deutsche.
|
||||
|
||||
Verwaltungs-Adaption des gleichnamigen Monitor-Agents. Nutzt CLAUDE_MODEL_FAST
|
||||
(Haiku) in Batches. Im Verwaltungsportal wird der Translator ausschließlich
|
||||
manuell über den Übersetzungs-Button (routers/translation.py) angestoßen,
|
||||
niemals automatisch.
|
||||
|
||||
Quelle: AegisSight-Monitor/src/agents/translator.py - bei größeren Änderungen
|
||||
am Monitor-Original hier nachziehen. Die Imports weichen bewusst ab
|
||||
(shared.agents.claude_client statt agents.claude_client). Der restliche Code
|
||||
unterhalb ist eine 1:1-Kopie und behält daher den Stil des Originals.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
from shared.agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator
|
||||
from config import CLAUDE_MODEL_FAST
|
||||
|
||||
logger = logging.getLogger("verwaltung.translation")
|
||||
|
||||
# Im Verwaltungsportal gibt es kein automatisches Übersetzen: der Translator
|
||||
# läuft nur, wenn translate_articles() explizit mit enabled=True gerufen wird.
|
||||
# Diese Konstante ist daher der konservative Default für enabled=None.
|
||||
TRANSLATOR_ENABLED = False
|
||||
|
||||
# Pro Batch nicht mehr als so viele Artikel an Claude geben.
|
||||
# Bei Haiku ist das Output-Limit ca. 8k Tokens. Pro Artikel kommen leicht
|
||||
# 400-600 Tokens raus (headline_de + content_de bis 1000 Zeichen). Bei 15
|
||||
# wurde regelmaessig getrunkt (mid-JSON broken). 5 ist sicher mit Reserve.
|
||||
DEFAULT_BATCH_SIZE = 5
|
||||
|
||||
# content_original wird ohnehin auf 1000 Zeichen gecappt (rss_parser).
|
||||
# Fuer den Translator nochmal verkuerzen, falls vorhanden mehr.
|
||||
CONTENT_INPUT_MAX = 1200
|
||||
|
||||
# content_de soll wie content_original auf 1000 Zeichen begrenzt sein.
|
||||
CONTENT_OUTPUT_MAX = 1000
|
||||
|
||||
|
||||
def _extract_complete_objects(text: str) -> list[dict]:
|
||||
"""Extrahiert vollstaendige JSON-Objekte aus moeglicherweise abgeschnittenem Text.
|
||||
|
||||
Klammer-Counter-Ansatz: jedes balancierte {...} wird probiert.
|
||||
"""
|
||||
results = []
|
||||
depth = 0
|
||||
start = -1
|
||||
in_string = False
|
||||
escape = False
|
||||
for i, ch in enumerate(text):
|
||||
if escape:
|
||||
escape = False
|
||||
continue
|
||||
if ch == "\\":
|
||||
escape = True
|
||||
continue
|
||||
if ch == '"' and not escape:
|
||||
in_string = not in_string
|
||||
continue
|
||||
if in_string:
|
||||
continue
|
||||
if ch == "{":
|
||||
if depth == 0:
|
||||
start = i
|
||||
depth += 1
|
||||
elif ch == "}":
|
||||
depth -= 1
|
||||
if depth == 0 and start >= 0:
|
||||
obj_text = text[start:i + 1]
|
||||
try:
|
||||
obj = json.loads(obj_text)
|
||||
if isinstance(obj, dict):
|
||||
results.append(obj)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
start = -1
|
||||
return results
|
||||
|
||||
|
||||
def _build_prompt(articles: list[dict], output_lang: str = "de") -> str:
|
||||
"""Bauen den Translation-Prompt fuer eine Batch."""
|
||||
lang_label = {"de": "Deutsch", "en": "Englisch"}.get(output_lang, output_lang)
|
||||
|
||||
items = []
|
||||
for a in articles:
|
||||
items.append({
|
||||
"id": a["id"],
|
||||
"headline": a.get("headline", "") or "",
|
||||
"content": (a.get("content_original") or "")[:CONTENT_INPUT_MAX],
|
||||
"source_lang": a.get("language", "en"),
|
||||
})
|
||||
|
||||
return f"""Du bist ein praeziser Uebersetzer fuer Nachrichten-Artikel.
|
||||
Uebersetze die folgenden Artikel nach {lang_label}.
|
||||
|
||||
WICHTIG:
|
||||
- Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) - NIEMALS Umschreibungen wie ae, oe, ue, ss.
|
||||
Beispiele: "Gespraeche" -> "Gespräche", "Fuehrer" -> "Führer", "grosse" -> "große".
|
||||
- Behalte Eigennamen (Personen, Orte, Organisationen) im Original.
|
||||
- Headline kurz und buendig wie im Original.
|
||||
- Content auf MAX {CONTENT_OUTPUT_MAX} Zeichen kuerzen, kein HTML, kein Markdown.
|
||||
- Wenn der Artikel schon auf {lang_label} ist (z.B. source_lang="{output_lang}"),
|
||||
kopiere headline und content unveraendert.
|
||||
|
||||
Antworte AUSSCHLIESSLICH mit einem flachen JSON-Array (kein Wrapper-Objekt!).
|
||||
Format genau so:
|
||||
[
|
||||
{{"id": 1, "headline_de": "Titel auf Deutsch", "content_de": "Inhalt auf Deutsch"}},
|
||||
{{"id": 2, "headline_de": "...", "content_de": "..."}}
|
||||
]
|
||||
|
||||
NICHT erlaubt: {{"translations": [...]}} oder {{"items": [...]}} oder Markdown-Codefences.
|
||||
Nur das Array, ohne Einleitung, ohne Erklaerung.
|
||||
|
||||
ARTIKEL:
|
||||
{json.dumps(items, ensure_ascii=False, indent=2)}
|
||||
"""
|
||||
|
||||
|
||||
def _parse_response(text: str) -> list[dict]:
|
||||
"""Robustes JSON-Array-Parsing.
|
||||
|
||||
Handhabt:
|
||||
- reines JSON
|
||||
- JSON in Markdown-Codefence ```json ... ```
|
||||
- abgeschnittene Antworten (extrahiert vollstaendige Top-Level-Objekte)
|
||||
"""
|
||||
text = text.strip()
|
||||
# Markdown-Codefence entfernen
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```\s*$", "", text)
|
||||
text = text.strip()
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
# Erst Array versuchen
|
||||
match = re.search(r"\[.*\]", text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group(0))
|
||||
except json.JSONDecodeError:
|
||||
# Truncate-Fallback: einzelne Top-Level-Objekte extrahieren
|
||||
data = _extract_complete_objects(text)
|
||||
else:
|
||||
data = _extract_complete_objects(text)
|
||||
|
||||
# Claude wraps das Array gelegentlich in {"translations": [...]} oder {"items": [...]}
|
||||
if isinstance(data, dict):
|
||||
for key in ("translations", "items", "results", "data"):
|
||||
if isinstance(data.get(key), list):
|
||||
data = data[key]
|
||||
break
|
||||
else:
|
||||
# Einzelnes Objekt? Dann als Liste mit einem Element behandeln
|
||||
if "id" in data:
|
||||
data = [data]
|
||||
else:
|
||||
raise ValueError(f"Translator-Antwort: Dict ohne erwarteten Array-Key (keys={list(data.keys())[:5]})")
|
||||
|
||||
if not isinstance(data, list):
|
||||
raise ValueError(f"Translator-Antwort ist kein Array: {type(data).__name__}")
|
||||
|
||||
cleaned = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
aid = item.get("id")
|
||||
if not isinstance(aid, int):
|
||||
try:
|
||||
aid = int(aid)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
cleaned.append({
|
||||
"id": aid,
|
||||
"headline_de": (item.get("headline_de") or "").strip() or None,
|
||||
"content_de": (item.get("content_de") or "").strip() or None,
|
||||
})
|
||||
return cleaned
|
||||
|
||||
|
||||
async def translate_articles_batch(
|
||||
articles: list[dict],
|
||||
output_lang: str = "de",
|
||||
) -> tuple[list[dict], ClaudeUsage]:
|
||||
"""Uebersetzt eine Batch von Artikeln.
|
||||
|
||||
Erwartet articles als Liste von Dicts mit den Feldern id, headline,
|
||||
content_original, language.
|
||||
|
||||
Rueckgabe: (uebersetzte_artikel, usage)
|
||||
Wenn der Call fehlschlaegt, wird ([], leere_usage) zurueckgegeben - der
|
||||
Caller kann entscheiden, ob retry oder skip.
|
||||
"""
|
||||
if not articles:
|
||||
return [], ClaudeUsage()
|
||||
|
||||
prompt = _build_prompt(articles, output_lang)
|
||||
|
||||
try:
|
||||
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
except Exception as e:
|
||||
logger.error(f"Translator Claude-Call fehlgeschlagen: {e}")
|
||||
return [], ClaudeUsage()
|
||||
|
||||
try:
|
||||
translations = _parse_response(result_text)
|
||||
except Exception as e:
|
||||
logger.error(f"Translator JSON-Parsing fehlgeschlagen: {e}; raw: {result_text[:300]!r}")
|
||||
return [], usage
|
||||
|
||||
# Validierung: nur Translations zurueckgeben, deren id wirklich
|
||||
# in der angefragten Batch war
|
||||
requested_ids = {a["id"] for a in articles}
|
||||
valid = [t for t in translations if t["id"] in requested_ids]
|
||||
if len(valid) != len(translations):
|
||||
logger.warning(
|
||||
"Translator: %d von %d Translations referenzieren unbekannte IDs",
|
||||
len(translations) - len(valid), len(translations),
|
||||
)
|
||||
return valid, usage
|
||||
|
||||
|
||||
# --- Pre-Topic-Filter: schmale Headline-Übersetzung -----------------------------
|
||||
#
|
||||
# Der Topic-Filter (analyzer.filter_relevant_articles) ist ein Haiku-Call, der pro
|
||||
# Artikel beurteilt, ob er thematisch zur Lage passt. Bei fremdsprachigen Headlines
|
||||
# (CJK/Arabisch/Hebräisch/Kyrillisch) bewertet Haiku konservativ und verwirft sie
|
||||
# häufig, weil er sie nur halb versteht. Damit landeten z.B. die japanischen
|
||||
# Ministeriums-Feeds (MOD, NHK, Asahi) in Lagen mit Japan-Bezug nie in der finalen
|
||||
# Auswahl, obwohl der RSS-Match korrekt griff.
|
||||
#
|
||||
# Diese Funktion übersetzt einen einzelnen Batch-Call alle nicht-lateinischen
|
||||
# Headlines + erste Content-Sätze ins Englische und hängt das Ergebnis als
|
||||
# article["headline_en_for_topic"] / article["content_en_for_topic"] an. Der
|
||||
# Topic-Filter zeigt das dem LLM zusätzlich zum Original.
|
||||
#
|
||||
# WICHTIG: Diese Mini-Übersetzung ist UNABHÄNGIG vom TRANSLATOR_ENABLED-Flag —
|
||||
# sie wird auch dann gemacht, wenn der nachgelagerte Volltext-Translator
|
||||
# deaktiviert ist (Pflicht für korrektes Topic-Filtering, sehr kleine Kosten).
|
||||
|
||||
_TOPIC_TRANSLATE_CONTENT_MAX = 500
|
||||
|
||||
|
||||
def _needs_pretopic_translate(article: dict) -> bool:
|
||||
"""Erkennt fremdsprachige Headlines, die für den Topic-Filter übersetzt
|
||||
werden sollten.
|
||||
|
||||
Heuristik: Headline enthält Non-ASCII-Zeichen, die NICHT in den typischen
|
||||
deutsch/franz./span./port./skand. Latin-1-Erweiterungen liegen.
|
||||
Das sind v.a. CJK (Kanji/Kana/Hangul), Arabisch, Hebräisch, Kyrillisch,
|
||||
Thai, Devanagari etc.
|
||||
"""
|
||||
headline = (article.get("headline_de") or article.get("headline") or "").strip()
|
||||
if not headline:
|
||||
return False
|
||||
for ch in headline:
|
||||
cp = ord(ch)
|
||||
# Bereiche ausschließen, die in Latin-Schrift normal sind:
|
||||
# ASCII (0-127), Latin-1 Supplement (128-255), Latin Extended-A/B (256-591)
|
||||
if cp <= 591:
|
||||
continue
|
||||
# Alles darüber sind fremde Schriftsysteme → übersetzen
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def translate_headlines_for_topic_filter(
|
||||
articles: list[dict],
|
||||
target_lang: str = "en",
|
||||
) -> tuple[int, ClaudeUsage]:
|
||||
"""Übersetzt die Headlines fremdsprachiger Artikel ins Englische, damit der
|
||||
nachgelagerte Topic-Filter (Haiku) sie zuverlässig beurteilen kann.
|
||||
|
||||
Setzt direkt auf den Artikel-Dicts:
|
||||
article["headline_en_for_topic"]: str | None
|
||||
article["content_en_for_topic"]: str | None
|
||||
|
||||
Returns:
|
||||
(anzahl_übersetzt, ClaudeUsage)
|
||||
"""
|
||||
if not articles:
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
candidates = [a for a in articles if _needs_pretopic_translate(a)]
|
||||
if not candidates:
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
# Eindeutige Indizes (auch wenn article kein "id"-Feld hat, weil noch nicht
|
||||
# in der DB): wir nutzen die Position in der gesamten articles-Liste.
|
||||
idx_by_obj = {id(a): i for i, a in enumerate(articles)}
|
||||
|
||||
items = []
|
||||
for a in candidates:
|
||||
idx = idx_by_obj.get(id(a))
|
||||
if idx is None:
|
||||
continue
|
||||
headline = (a.get("headline_de") or a.get("headline") or "").strip()
|
||||
content_src = (a.get("content_de") or a.get("content_original") or "")
|
||||
items.append({
|
||||
"i": idx,
|
||||
"h": headline[:200],
|
||||
"c": content_src[:_TOPIC_TRANSLATE_CONTENT_MAX],
|
||||
})
|
||||
|
||||
if not items:
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
lang_label = {"en": "English", "de": "German"}.get(target_lang, target_lang)
|
||||
prompt = f"""Translate these news headlines and short content snippets to {lang_label}.
|
||||
Keep proper names (people, organizations, places) untouched. Keep it concise; the goal
|
||||
is to let another model judge topical relevance, not to publish.
|
||||
|
||||
Return ONLY a JSON array. Each item: {{"i": <index>, "h": <headline in {lang_label}>, "c": <content snippet in {lang_label}>}}.
|
||||
Keep the same "i" values. No prose, no markdown fences.
|
||||
|
||||
INPUT:
|
||||
{json.dumps(items, ensure_ascii=False)}
|
||||
"""
|
||||
|
||||
try:
|
||||
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
except Exception as e:
|
||||
logger.warning(f"Pre-Topic-Translate Claude-Call fehlgeschlagen: {e}")
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
# Robustes Parsing (Markdown-Codefence + nacktes Array)
|
||||
text = result_text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```\s*$", "", text)
|
||||
text = text.strip()
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
m = re.search(r"\[.*\]", text, re.DOTALL)
|
||||
if not m:
|
||||
logger.warning(
|
||||
f"Pre-Topic-Translate: kein JSON-Array in Antwort. Sample: {text[:200]!r}"
|
||||
)
|
||||
return 0, usage
|
||||
try:
|
||||
data = json.loads(m.group(0))
|
||||
except json.JSONDecodeError:
|
||||
data = _extract_complete_objects(text)
|
||||
|
||||
if not isinstance(data, list):
|
||||
logger.warning(
|
||||
f"Pre-Topic-Translate: Antwort ist kein Array ({type(data).__name__})"
|
||||
)
|
||||
return 0, usage
|
||||
|
||||
applied = 0
|
||||
for entry in data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
idx = entry.get("i")
|
||||
if not isinstance(idx, int) or not (0 <= idx < len(articles)):
|
||||
try:
|
||||
idx = int(idx)
|
||||
if not (0 <= idx < len(articles)):
|
||||
continue
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
h = (entry.get("h") or "").strip() or None
|
||||
c = (entry.get("c") or "").strip() or None
|
||||
if h:
|
||||
articles[idx]["headline_en_for_topic"] = h
|
||||
if c:
|
||||
articles[idx]["content_en_for_topic"] = c
|
||||
if h or c:
|
||||
applied += 1
|
||||
|
||||
return applied, usage
|
||||
|
||||
|
||||
async def translate_articles(
|
||||
articles: list[dict],
|
||||
output_lang: str = "de",
|
||||
batch_size: int = DEFAULT_BATCH_SIZE,
|
||||
usage_accumulator: UsageAccumulator | None = None,
|
||||
enabled: bool | None = None,
|
||||
) -> list[dict]:
|
||||
"""Uebersetzt eine beliebige Anzahl Artikel in Batches.
|
||||
|
||||
Bringt die Batches durch Logik in `translate_articles_batch` und gibt
|
||||
EINE flache Liste der Translations zurueck. Wenn ein Batch fehlschlaegt,
|
||||
wird er uebersprungen (anderer Batches laufen weiter).
|
||||
|
||||
enabled: Pro-Aufruf-Override des globalen TRANSLATOR_ENABLED-Flags. Wenn None,
|
||||
greift das Modul-Default (config.TRANSLATOR_ENABLED, abgeleitet aus .env).
|
||||
Der Orchestrator setzt das aus dem Org-Setting 'translator_enabled', damit
|
||||
jp_demo (Translator zwingend an) trotz global deaktiviertem Flag funktioniert.
|
||||
"""
|
||||
if not articles:
|
||||
return []
|
||||
|
||||
is_enabled = TRANSLATOR_ENABLED if enabled is None else bool(enabled)
|
||||
if not is_enabled:
|
||||
logger.info(
|
||||
"Translator deaktiviert (enabled=%s, global TRANSLATOR_ENABLED=%s), %d Artikel uebersprungen",
|
||||
enabled, TRANSLATOR_ENABLED, len(articles),
|
||||
)
|
||||
return []
|
||||
|
||||
all_translations = []
|
||||
for i in range(0, len(articles), batch_size):
|
||||
batch = articles[i : i + batch_size]
|
||||
translations, usage = await translate_articles_batch(batch, output_lang)
|
||||
if usage_accumulator is not None:
|
||||
usage_accumulator.add(usage)
|
||||
all_translations.extend(translations)
|
||||
logger.info(
|
||||
"Translator-Batch %d/%d: %d/%d uebersetzt (cost=$%.4f)",
|
||||
(i // batch_size) + 1,
|
||||
(len(articles) + batch_size - 1) // batch_size,
|
||||
len(translations), len(batch),
|
||||
usage.cost_usd,
|
||||
)
|
||||
return all_translations
|
||||
@@ -10,7 +10,7 @@ def test_main_app_imports():
|
||||
def test_all_routers_importable():
|
||||
"""Bei Syntax-Fehlern in einem Router crasht das Ganze - hier fangen wir das ab."""
|
||||
for mod in ("auth", "organizations", "licenses", "users",
|
||||
"dashboard", "sources", "token_usage", "audit"):
|
||||
"dashboard", "sources", "token_usage", "audit", "translation"):
|
||||
m = importlib.import_module(f"routers.{mod}")
|
||||
assert hasattr(m, "router"), f"routers/{mod} hat keinen router-Objekt"
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren