Commits vergleichen
93 Commits
pre-incide
...
develop
| Autor | SHA1 | Datum | |
|---|---|---|---|
|
|
a716726e36 | ||
|
|
f22c8dbc61 | ||
|
|
8af0fa07c8 | ||
|
|
1ee6c4ddf1 | ||
|
|
72b306d90c | ||
|
|
0e578a38a0 | ||
|
|
5a123ef3b8 | ||
|
|
897e56997c | ||
|
|
ff8a0531a4 | ||
|
|
5fc2467559 | ||
|
|
48a60d7579 | ||
|
|
62ba38ae46 | ||
|
|
715af17ac3 | ||
|
|
f8e2f73bc0 | ||
|
|
7f220a9b65 | ||
|
|
f4c0c930b8 | ||
|
|
f73c21235e | ||
|
|
9078489d0a | ||
| 24d7500152 | |||
|
|
f0fe35b279 | ||
|
|
fb6e9fff19 | ||
|
|
b1a0e97a34 | ||
|
|
77797f6027 | ||
|
|
dc51ecafe8 | ||
|
|
31fa17465a | ||
|
|
2a654cc882 | ||
|
|
6293cef91e | ||
|
|
a6f36be9c6 | ||
|
|
98c9da64b0 | ||
|
|
307f0a1868 | ||
|
|
430541f49b | ||
|
|
ee83f38edf | ||
| 2b1e8c3632 | |||
| b1f8113207 | |||
| 26fac0e824 | |||
| 62c0be64ee | |||
| 8c4ef6b2cf | |||
| ad5b723d79 | |||
| 51615cae62 | |||
| a2610d0094 | |||
| a08df3d121 | |||
| 0a6208c289 | |||
| 19038472cf | |||
| 462127dc52 | |||
| 34aeb04a88 | |||
| b14fe31f42 | |||
| ffb8dddc4f | |||
|
|
0edbf7e3b8 | ||
|
|
de01ab71fc | ||
|
|
86a49e082c | ||
|
|
221b21cb4e | ||
| 30cb276ec6 | |||
| cae9c5467a | |||
| 58eb1298ca | |||
| 370bb94b26 | |||
| c9bd6310ae | |||
| 392028a9aa | |||
| 7b5adccf2b | |||
| 059a9a2dc7 | |||
| 3a346ba2ec | |||
| 2b51e49d0d | |||
|
|
e3fe7fac85 | ||
|
|
88b18d0775 | ||
|
|
682828ea58 | ||
| ac5160010d | |||
|
|
059395393c | ||
|
|
14d1062583 | ||
|
|
2ee90a4b3b | ||
| d1f88c9e9f | |||
|
|
ad53786a24 | ||
| a9806a586b | |||
|
|
2aaa51e2a8 | ||
|
|
2df37cb617 | ||
|
|
5473ba3ed7 | ||
|
|
8042639d20 | ||
|
|
ec53ab27cd | ||
|
|
c73541cdbe | ||
|
|
5d5ec7c924 | ||
|
|
e8ac0d0c50 | ||
|
|
c8a8e10020 | ||
|
|
a579e2c275 | ||
|
|
efae707fa9 | ||
|
|
05b60ffb35 | ||
|
|
60b8646fe4 | ||
|
|
285df86c7b | ||
|
|
5add8d9d59 | ||
|
|
949df868ff | ||
|
|
9293e66d01 | ||
|
|
c0f68e40a5 | ||
| 0d6ad8ea90 | |||
| a302790777 | |||
| 9a43dffa6c | |||
| 194790899c |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,3 +4,4 @@ __pycache__/
|
|||||||
logs/
|
logs/
|
||||||
data/
|
data/
|
||||||
.venv/
|
.venv/
|
||||||
|
data
|
||||||
|
|||||||
125
CLAUDE.md
125
CLAUDE.md
@@ -220,3 +220,128 @@ Changelog-Kategorien in TaskMate:
|
|||||||
- 34 = Changelog Verwaltung
|
- 34 = Changelog Verwaltung
|
||||||
- 35 = Changelog Website
|
- 35 = Changelog Website
|
||||||
- 36 = Changelog TaskMate
|
- 36 = Changelog TaskMate
|
||||||
|
|
||||||
|
## Staging-Umgebung
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
staging:
|
||||||
|
url: https://staging.monitor.aegis-sight.de
|
||||||
|
server: 46.225.141.13 (gleicher Host wie Live)
|
||||||
|
pfad: /home/claude-dev/AegisSight-Monitor-staging
|
||||||
|
branch: develop
|
||||||
|
port: 18891 (Live: 8891)
|
||||||
|
service: aegis-monitor-staging.service (systemd)
|
||||||
|
venv: /home/claude-dev/AegisSight-Monitor-staging/venv (eigenes venv)
|
||||||
|
zugriff: Magic-Link-Login an info@aegis-sight.de (Cookie 30 Tage)
|
||||||
|
|
||||||
|
datenbank:
|
||||||
|
pfad: ~/AegisSight-Monitor-staging/data/osint.db
|
||||||
|
initial: einmalige Kopie der Live-DB
|
||||||
|
drift: gewollt - Aenderungen in Staging beeinflussen Live nicht
|
||||||
|
reseed_von_live: |
|
||||||
|
sudo systemctl stop aegis-monitor-staging
|
||||||
|
cp ~/AegisSight-Monitor/data/osint.db ~/AegisSight-Monitor-staging/data/osint.db
|
||||||
|
sudo systemctl start aegis-monitor-staging
|
||||||
|
|
||||||
|
besonderheiten_env:
|
||||||
|
JWT_SECRET: eigener fuer Staging (nicht Live-JWT)
|
||||||
|
MAGIC_LINK_BASE_URL: https://staging.monitor.aegis-sight.de (sonst leitet App zu Live)
|
||||||
|
TELEGRAM_API_ID: 0 # deaktiviert - verhindert Doppel-Login mit Live
|
||||||
|
TELEGRAM_API_HASH: 0
|
||||||
|
DB-Pfad: relative aus config.py (nutzt automatisch ~/AegisSight-Monitor-staging/data/)
|
||||||
|
|
||||||
|
auth_service:
|
||||||
|
pfad: /opt/aegis-staging-auth
|
||||||
|
service: aegis-monitor-staging-auth.service
|
||||||
|
port: 127.0.0.1:8095
|
||||||
|
cookie_domain: staging.monitor.aegis-sight.de
|
||||||
|
cookie_name: aegis_monitor_staging_auth
|
||||||
|
code_quelle: identisch zum Service auf 46.225.225.49 (eigene Konfig)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow Staging -> Live
|
||||||
|
|
||||||
|
1. **Aenderung in develop machen** (im Staging-Verzeichnis):
|
||||||
|
```bash
|
||||||
|
cd ~/AegisSight-Monitor-staging
|
||||||
|
git checkout develop
|
||||||
|
# Aenderung
|
||||||
|
git add . && git commit -m ... && git push origin develop
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Staging aktualisieren** (aktuell manuell):
|
||||||
|
```bash
|
||||||
|
ssh claude-dev@46.225.141.13 'cd ~/AegisSight-Monitor-staging && git pull && sudo systemctl restart aegis-monitor-staging'
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **In https://staging.monitor.aegis-sight.de testen**
|
||||||
|
|
||||||
|
4. **Promote zu Live**: Pull Request develop -> main in Gitea, dann:
|
||||||
|
```bash
|
||||||
|
ssh claude-dev@46.225.141.13 'cd ~/AegisSight-Monitor && git pull'
|
||||||
|
# Live laeuft als loser uvicorn-Prozess (kein systemd) - manueller Restart
|
||||||
|
# bei Backend-Aenderungen noetig
|
||||||
|
```
|
||||||
|
|
||||||
|
### Offen (noch nicht implementiert)
|
||||||
|
|
||||||
|
- Auto-Deploy bei Push auf develop (Webhook-Listener)
|
||||||
|
- Promote-UI mit Ein-Klick-Button
|
||||||
|
- Live-Monitor auf systemd umstellen (~10s Downtime einmalig)
|
||||||
|
|
||||||
|
## Auto-Deploy + Promote-UI
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
auto_deploy:
|
||||||
|
listener_service:
|
||||||
|
pfad: /opt/aegis-staging-deploy
|
||||||
|
service: aegis-staging-deploy.service
|
||||||
|
port: 127.0.0.1:8096
|
||||||
|
deployments:
|
||||||
|
staging: develop -> ~/AegisSight-Monitor-staging (restartet aegis-monitor-staging)
|
||||||
|
live: main -> ~/AegisSight-Monitor (restartet aegis-monitor)
|
||||||
|
endpoints:
|
||||||
|
"POST /__deploy": staging via Gitea-Webhook (HMAC)
|
||||||
|
"POST /__deploy/live": live via Promote-UI (HMAC)
|
||||||
|
secrets: /opt/aegis-staging-deploy/.env (nicht im Repo)
|
||||||
|
|
||||||
|
gitea_webhook:
|
||||||
|
repo: AegisSight/AegisSight-Monitor
|
||||||
|
url: https://staging.monitor.aegis-sight.de/__deploy
|
||||||
|
branch_filter: develop
|
||||||
|
|
||||||
|
live_systemd:
|
||||||
|
service: aegis-monitor.service
|
||||||
|
hinweis: |
|
||||||
|
Live-Monitor laeuft seit 2026-04-26 als systemd-Service (vorher loser
|
||||||
|
uvicorn-Prozess). Manueller Restart bei Backend-Aenderungen:
|
||||||
|
sudo systemctl restart aegis-monitor
|
||||||
|
Beim Promote via UI passiert das automatisch.
|
||||||
|
|
||||||
|
promote_ui:
|
||||||
|
url: https://deploy.aegis-sight.de
|
||||||
|
laeuft_auf: 46.225.225.49 (zentral fuer alle Services)
|
||||||
|
zugriff: Magic-Link-Login an info@aegis-sight.de
|
||||||
|
funktion: |
|
||||||
|
Live- vs. Staging-Stand pro Service inkl. Liste der ausstehenden Commits.
|
||||||
|
Promote-Knopf -> Gitea-PR develop->main wird auto-gemerged + Live-Listener
|
||||||
|
pullt main + restartet aegis-monitor.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vollstaendiger Workflow (Aenderung am Monitor)
|
||||||
|
|
||||||
|
1. **Entwickeln in develop**:
|
||||||
|
```bash
|
||||||
|
cd ~/AegisSight-Monitor-staging
|
||||||
|
git checkout develop
|
||||||
|
# Aenderung
|
||||||
|
git add . && git commit -m "..." && git push origin develop
|
||||||
|
# Auto-Deploy pullt automatisch + restartet aegis-monitor-staging
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Auf https://staging.monitor.aegis-sight.de pruefen**
|
||||||
|
|
||||||
|
3. **Promoten via https://deploy.aegis-sight.de** (Klick auf Monitor-Karte)
|
||||||
|
→ Gitea merged develop→main → Listener pullt main → `systemctl restart aegis-monitor`
|
||||||
|
|
||||||
|
4. **Live-Check auf https://monitor.aegis-sight.de**
|
||||||
|
|||||||
65
RELEASES.json
Normale Datei
65
RELEASES.json
Normale Datei
@@ -0,0 +1,65 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"version": "2026-05-03T15:21Z",
|
||||||
|
"date": "2026-05-03",
|
||||||
|
"title": "Übersichtlichere Navigation in der Seitenleiste",
|
||||||
|
"items": [
|
||||||
|
"Schaltflächen in der Seitenleiste haben jetzt klarere Icons und kürzere Beschriftungen",
|
||||||
|
"Der Feedback-Button zeigt nun ein Brief-Symbol für bessere Erkennbarkeit"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "2026-04-30T23:12Z",
|
||||||
|
"date": "2026-04-30",
|
||||||
|
"title": "Hintergrundbild-Unschärfe zuverlässiger und vollständiger",
|
||||||
|
"items": [
|
||||||
|
"Der Weichzeichner-Effekt wird jetzt stabiler angezeigt und aktualisiert sich korrekt",
|
||||||
|
"Der Header-Bereich wird nun ebenfalls korrekt mit dem Unschärfe-Effekt versehen"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "2026-04-29T22:30Z",
|
||||||
|
"date": "2026-04-29",
|
||||||
|
"title": "Update-Meldungen folgen Hell-/Dunkelmodus, korrekte Umlaute",
|
||||||
|
"items": [
|
||||||
|
"Banner und „Was ist neu?“-Modal nutzen jetzt die Theme-Variablen und passen sich automatisch dem aktiven Hell- oder Dunkelmodus an",
|
||||||
|
"Ältere Release-Einträge mit ae/oe/ue-Schreibweise wurden auf korrekte Umlaute umgestellt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "2026-04-29T20:10Z",
|
||||||
|
"date": "2026-04-29",
|
||||||
|
"title": "Blur versucht zu fixen",
|
||||||
|
"items": [
|
||||||
|
"war nix..."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "2026-04-26T21:10Z",
|
||||||
|
"date": "2026-04-26",
|
||||||
|
"title": "Update-Modal kommt jetzt auch beim ersten Besuch",
|
||||||
|
"items": [
|
||||||
|
"Beim ersten Login nach einer Aktualisierung erscheint die Was-ist-neu-Übersicht jetzt automatisch",
|
||||||
|
"Für Kunden-Onboarding: erste Highlights werden direkt sichtbar"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "2026-04-26T20:40Z",
|
||||||
|
"date": "2026-04-26",
|
||||||
|
"title": "Updatenachricht bei Deployment",
|
||||||
|
"items": [
|
||||||
|
"Einrichtung Deployment für Updates",
|
||||||
|
"Message im Monitor bei Update"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "5473ba3",
|
||||||
|
"date": "2026-04-26",
|
||||||
|
"title": "Update-System eingeführt",
|
||||||
|
"items": [
|
||||||
|
"Updates berühren ab jetzt nie mehr die Fälle oder Daten",
|
||||||
|
"Beim Promote landet eine 'Was ist neu'-Info hier",
|
||||||
|
"Strukturelle Trennung von Live- und Staging-Datenbank"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
1
data
1
data
@@ -1 +0,0 @@
|
|||||||
/home/claude-dev/osint-data
|
|
||||||
@@ -11,3 +11,8 @@ python-multipart
|
|||||||
aiosmtplib
|
aiosmtplib
|
||||||
geonamescache>=2.0
|
geonamescache>=2.0
|
||||||
telethon
|
telethon
|
||||||
|
# Bericht-Export (PDF via WeasyPrint + DOCX via python-docx)
|
||||||
|
Jinja2>=3.1
|
||||||
|
weasyprint>=68.0
|
||||||
|
python-docx>=1.2
|
||||||
|
pikepdf>=9.0
|
||||||
|
|||||||
64
scripts/migrate_sources_classification.py
Normale Datei
64
scripts/migrate_sources_classification.py
Normale Datei
@@ -0,0 +1,64 @@
|
|||||||
|
"""Einmalige LLM-Klassifikation aller noch unklassifizierten Quellen.
|
||||||
|
|
||||||
|
Verwendung:
|
||||||
|
python3 scripts/migrate_sources_classification.py --limit 50
|
||||||
|
python3 scripts/migrate_sources_classification.py --limit 500 # Alle
|
||||||
|
python3 scripts/migrate_sources_classification.py --recheck-pending # bereits Pending neu
|
||||||
|
|
||||||
|
Schreibt Vorschlaege in proposed_*-Spalten. Approval erfolgt anschliessend
|
||||||
|
ueber das Verwaltungs-UI / API (POST /api/sources/{id}/classification/approve).
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# src/ in PYTHONPATH aufnehmen, wenn Skript direkt aufgerufen wird
|
||||||
|
HERE = Path(__file__).resolve().parent
|
||||||
|
SRC = HERE.parent / "src"
|
||||||
|
if str(SRC) not in sys.path:
|
||||||
|
sys.path.insert(0, str(SRC))
|
||||||
|
|
||||||
|
from database import get_db # noqa: E402
|
||||||
|
from services.source_classifier import bulk_classify # noqa: E402
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("migrate_sources")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
parser = argparse.ArgumentParser(description="LLM-Klassifikation aller Quellen.")
|
||||||
|
parser.add_argument("--limit", type=int, default=50, help="Max. Quellen pro Lauf")
|
||||||
|
parser.add_argument(
|
||||||
|
"--recheck-pending",
|
||||||
|
action="store_true",
|
||||||
|
help="Auch Quellen mit classification_source='llm_pending' neu klassifizieren",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
db = await get_db()
|
||||||
|
try:
|
||||||
|
result = await bulk_classify(
|
||||||
|
db,
|
||||||
|
limit=args.limit,
|
||||||
|
only_unclassified=not args.recheck_pending,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
print(f"Verarbeitet: {result['processed']}")
|
||||||
|
print(f"Erfolgreich: {result['success']}")
|
||||||
|
print(f"Fehler: {len(result['errors'])}")
|
||||||
|
print(f"Kosten: ${result['total_cost_usd']:.4f}")
|
||||||
|
if result["errors"]:
|
||||||
|
print("\nFehler-Details:")
|
||||||
|
for e in result["errors"][:10]:
|
||||||
|
print(f" source_id={e['source_id']}: {e['error']}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -16,7 +16,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
VORFALL: {title}
|
VORFALL: {title}
|
||||||
KONTEXT: {description}
|
KONTEXT: {description}
|
||||||
|
|
||||||
VORHANDENE MELDUNGEN:
|
{fact_context_block}VORHANDENE MELDUNGEN:
|
||||||
{articles_text}
|
{articles_text}
|
||||||
|
|
||||||
AUFTRAG:
|
AUFTRAG:
|
||||||
@@ -47,7 +47,6 @@ Antworte AUSSCHLIESSLICH als JSON-Objekt mit diesen Feldern:
|
|||||||
- "summary": Zusammenfassung auf {output_language} mit Quellenverweisen [1], [2] etc. im Text (Markdown-Überschriften ## erlaubt wenn sinnvoll, aber KEINE "## ZUSAMMENFASSUNG"/"## ÜBERBLICK"-Sektion)
|
- "summary": Zusammenfassung auf {output_language} mit Quellenverweisen [1], [2] etc. im Text (Markdown-Überschriften ## erlaubt wenn sinnvoll, aber KEINE "## ZUSAMMENFASSUNG"/"## ÜBERBLICK"-Sektion)
|
||||||
- "sources": Array von Quellenobjekten, je: {{"nr": 1, "name": "Quellenname", "url": "https://..."}}
|
- "sources": Array von Quellenobjekten, je: {{"nr": 1, "name": "Quellenname", "url": "https://..."}}
|
||||||
- "key_facts": Array von bestätigten Kernfakten (Strings, in Ausgabesprache)
|
- "key_facts": Array von bestätigten Kernfakten (Strings, in Ausgabesprache)
|
||||||
- "translations": Array von Objekten mit "article_id", "headline_de", "content_de" (nur für fremdsprachige Artikel)
|
|
||||||
|
|
||||||
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
||||||
|
|
||||||
@@ -60,7 +59,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
THEMA: {title}
|
THEMA: {title}
|
||||||
KONTEXT: {description}
|
KONTEXT: {description}
|
||||||
|
|
||||||
VORLIEGENDE QUELLEN:
|
{fact_context_block}VORLIEGENDE QUELLEN:
|
||||||
{articles_text}
|
{articles_text}
|
||||||
|
|
||||||
AUFTRAG:
|
AUFTRAG:
|
||||||
@@ -102,7 +101,6 @@ Antworte AUSSCHLIESSLICH als JSON-Objekt mit diesen Feldern:
|
|||||||
- "summary": Das strukturierte Briefing als Markdown-Text mit Quellenverweisen [1], [2] etc.
|
- "summary": Das strukturierte Briefing als Markdown-Text mit Quellenverweisen [1], [2] etc.
|
||||||
- "sources": Array von Quellenobjekten, je: {{"nr": 1, "name": "Quellenname", "url": "https://..."}}
|
- "sources": Array von Quellenobjekten, je: {{"nr": 1, "name": "Quellenname", "url": "https://..."}}
|
||||||
- "key_facts": Array von gesicherten Kernfakten (Strings, in Ausgabesprache)
|
- "key_facts": Array von gesicherten Kernfakten (Strings, in Ausgabesprache)
|
||||||
- "translations": Array von Objekten mit "article_id", "headline_de", "content_de" (nur für fremdsprachige Artikel)
|
|
||||||
|
|
||||||
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
||||||
|
|
||||||
@@ -120,7 +118,7 @@ BISHERIGES LAGEBILD:
|
|||||||
BISHERIGE QUELLEN:
|
BISHERIGE QUELLEN:
|
||||||
{previous_sources_text}
|
{previous_sources_text}
|
||||||
|
|
||||||
NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE:
|
{fact_context_block}NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE:
|
||||||
{new_articles_text}
|
{new_articles_text}
|
||||||
|
|
||||||
AUFTRAG:
|
AUFTRAG:
|
||||||
@@ -149,7 +147,6 @@ Antworte AUSSCHLIESSLICH als JSON-Objekt mit diesen Feldern:
|
|||||||
- "summary": Aktualisierte Zusammenfassung mit Quellenverweisen [1], [2] etc.
|
- "summary": Aktualisierte Zusammenfassung mit Quellenverweisen [1], [2] etc.
|
||||||
- "sources": Array mit NUR den NEUEN Quellen aus den neuen Meldungen, je: {{"nr": <fortlaufende ganze Zahl, KEINE Buchstaben-Suffixe>, "name": "Quellenname", "url": "https://..."}}. Alte Quellen werden automatisch gemerged.
|
- "sources": Array mit NUR den NEUEN Quellen aus den neuen Meldungen, je: {{"nr": <fortlaufende ganze Zahl, KEINE Buchstaben-Suffixe>, "name": "Quellenname", "url": "https://..."}}. Alte Quellen werden automatisch gemerged.
|
||||||
- "key_facts": Array aller aktuellen Kernfakten (in Ausgabesprache)
|
- "key_facts": Array aller aktuellen Kernfakten (in Ausgabesprache)
|
||||||
- "translations": Array von Objekten mit "article_id", "headline_de", "content_de" (nur für neue fremdsprachige Artikel)
|
|
||||||
|
|
||||||
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
||||||
|
|
||||||
@@ -168,7 +165,7 @@ BISHERIGES BRIEFING:
|
|||||||
BISHERIGE QUELLEN:
|
BISHERIGE QUELLEN:
|
||||||
{previous_sources_text}
|
{previous_sources_text}
|
||||||
|
|
||||||
NEUE QUELLEN SEIT DEM LETZTEN UPDATE:
|
{fact_context_block}NEUE QUELLEN SEIT DEM LETZTEN UPDATE:
|
||||||
{new_articles_text}
|
{new_articles_text}
|
||||||
|
|
||||||
AUFTRAG:
|
AUFTRAG:
|
||||||
@@ -201,12 +198,11 @@ Antworte AUSSCHLIESSLICH als JSON-Objekt mit diesen Feldern:
|
|||||||
- "summary": Das aktualisierte Briefing als Markdown-Text mit Quellenverweisen
|
- "summary": Das aktualisierte Briefing als Markdown-Text mit Quellenverweisen
|
||||||
- "sources": Array mit NUR den NEUEN Quellen aus den neuen Meldungen, je: {{"nr": <fortlaufende ganze Zahl, KEINE Buchstaben-Suffixe>, "name": "Quellenname", "url": "https://..."}}. Alte Quellen werden automatisch gemerged.
|
- "sources": Array mit NUR den NEUEN Quellen aus den neuen Meldungen, je: {{"nr": <fortlaufende ganze Zahl, KEINE Buchstaben-Suffixe>, "name": "Quellenname", "url": "https://..."}}. Alte Quellen werden automatisch gemerged.
|
||||||
- "key_facts": Array aller gesicherten Kernfakten (in Ausgabesprache)
|
- "key_facts": Array aller gesicherten Kernfakten (in Ausgabesprache)
|
||||||
- "translations": Array von Objekten mit "article_id", "headline_de", "content_de" (nur für neue fremdsprachige Artikel)
|
|
||||||
|
|
||||||
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
Antworte NUR mit dem JSON-Objekt. Keine Einleitung, keine Erklärung."""
|
||||||
|
|
||||||
|
|
||||||
LATEST_DEVELOPMENTS_PROMPT_TEMPLATE = """Du pflegst eine Kachel "Neueste Entwicklungen" für eine Live-Monitoring-Lage.
|
LATEST_DEVELOPMENTS_PROMPT_TEMPLATE = """Du erzeugst die Kachel "Neueste Entwicklungen" für eine Live-Monitoring-Lage.
|
||||||
HEUTIGES DATUM: {today}
|
HEUTIGES DATUM: {today}
|
||||||
AUSGABESPRACHE: {output_language}
|
AUSGABESPRACHE: {output_language}
|
||||||
WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschreibungen (ae, oe, ue, ss).
|
WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschreibungen (ae, oe, ue, ss).
|
||||||
@@ -214,37 +210,166 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
LAGE: {title}
|
LAGE: {title}
|
||||||
KONTEXT: {description}
|
KONTEXT: {description}
|
||||||
|
|
||||||
BISHERIGE ENTWICKLUNGEN (chronologisch absteigend, neueste oben):
|
AKTUELLES LAGEBILD (autoritative inhaltliche Grundlage):
|
||||||
{previous_developments}
|
{summary}
|
||||||
|
|
||||||
NEUE MELDUNGEN SEIT DEM LETZTEN UPDATE:
|
BELEGENDE MELDUNGEN (chronologisch absteigend, neueste zuerst — nur hieraus dürfen Zeitstempel und Quellen-Klammern stammen):
|
||||||
{new_articles_text}
|
{articles_text}
|
||||||
|
|
||||||
AUFTRAG:
|
AUFTRAG:
|
||||||
Extrahiere aus den NEUEN Meldungen konkrete Ereignisse und aktualisiere die Liste. Fasse die bisherigen und neuen Ereignisse zu EINER Liste zusammen (max. 8 Bullets, neueste oben).
|
Extrahiere aus dem LAGEBILD die wichtigsten jüngsten Ereignisse und stelle sie als chronologisch absteigende Bullet-Liste dar. Für jedes Bullet wählst du eine oder mehrere belegende Meldungen aus der obigen Liste und übernimmst deren Publikationsdatum als Zeitstempel.
|
||||||
|
|
||||||
REGELN:
|
REGELN zur Auswahl der Bullets:
|
||||||
- Jedes Bullet = EIN konkretes Ereignis (1-2 Sätze, faktenbasiert). Keine Themen-Zusammenfassungen.
|
- Ziel: 4 bis 6 Bullets. Wenn das Lagebild weniger tatsächlich AKTUELLE Ereignisse hergibt, dann lieber 3 ehrliche Bullets als 6 mit veralteten. Kein Auffüllen.
|
||||||
- Jedes Bullet beginnt mit dem Zeitstempel der frühesten belegenden Quelle im Format "[DD.MM. HH:MM]".
|
- "AKTUELL" bedeutet: belegende Meldung ist spätestens ~7 Tage alt (relativ zu HEUTIGES DATUM). Ältere Ereignisse — auch wenn sie im Lagebild stehen — gehören NICHT rein. Sie sind Hintergrund, keine Neuesten Entwicklungen.
|
||||||
- Jedes Bullet ENDET mit einer Quellen-Klammer — ZWINGEND. Bullets ohne Klammer werden verworfen.
|
- Wenn das Lagebild ein Ereignis erwähnt, aber KEINE aktuelle belegende Meldung dafür existiert: Bullet verwerfen. Lieber weglassen als fabulieren.
|
||||||
- NEUE Bullets (aus den NEUEN MELDUNGEN): {{M<ID1>, M<ID2>}} mit den ganzzahligen IDs aus der "ID:"-Zeile der belegenden Meldung(en). Beispiele: {{M42}} oder {{M42, M17}}.
|
- Bevorzuge Ereignisse mit hohem Neuigkeitswert und konkretem Vorfall/Aussage gegenüber allgemeinen Hintergrundkonstatierungen.
|
||||||
- UEBERNOMMENE Bullets aus BISHERIGE ENTWICKLUNGEN: behalten ihre bestehende Klammer KOMPLETT UND UNVERAENDERT, inklusive des Pipe-Zeichens und der URL. Beispiel: {{Reuters|https://reuters.com/article, Rybar|https://t.me/rybar/123}}. NICHT in M-IDs umwandeln, NICHT die URL entfernen, NICHT umformatieren.
|
|
||||||
- Wenn mehrere Meldungen dasselbe Ereignis belegen: EIN Bullet, Zeitstempel = frühester Zeitpunkt, ALLE IDs in der Klammer.
|
REGELN zur Formulierung:
|
||||||
- Bestehende Bullets aus BISHERIGE ENTWICKLUNGEN sinngemäß übernehmen, NICHT umformulieren. Nur entfernen, wenn sie durch neue Meldungen nachweislich überholt sind oder die 8-Bullet-Grenze überschritten wird (dann älteste fallen raus). Wenn einem uebernommenen Bullet die Quellen-Klammer fehlt (Altformat): Bullet VERWERFEN und nicht in die neue Liste uebernehmen.
|
- Jedes Bullet = EIN konkretes Ereignis oder eine konkrete Aussage, 1-2 Sätze, präzise und neutral.
|
||||||
- Wenn eine Quelle eine erkennbare politische Ausrichtung hat (z.B. pro-russisch, staatsnah, rechtsextrem), im Bullet-Text erwähnen ("laut pro-russischem Telegram-Kanal Rybar...").
|
- Beginne JEDES Bullet mit dem Zeitstempel der frühesten belegenden Meldung im Format "[DD.MM. HH:MM]".
|
||||||
- Neutral und sachlich — keine Wertungen oder Spekulationen.
|
- Ende JEDES Bullet mit einer Quellen-Klammer mit Pipe-getrennten Paaren "Name|URL", kommagetrennt bei mehreren Belegen: {{Reuters|https://reuters.com/..., Rybar|https://t.me/rybar/123}}. Maximal 3 Quellen pro Bullet. Bullets ohne Klammer werden verworfen.
|
||||||
- KEINE Gedankenstriche (—, –) — stattdessen Kommas, Doppelpunkte oder neue Sätze.
|
- Sortiere die Bullets nach Zeitstempel absteigend — neueste zuerst.
|
||||||
|
- Wenn eine Quelle eine erkennbare politische Ausrichtung hat (pro-russisch, staatsnah, rechtsextrem etc.), im Bullet-Text erwähnen ("laut pro-russischem Telegram-Kanal Rybar...").
|
||||||
|
- KEINE Gedankenstriche (—, –). Stattdessen Kommas, Doppelpunkte, neue Sätze.
|
||||||
- Bei widersprüchlichen Angaben beide Seiten knapp nennen.
|
- Bei widersprüchlichen Angaben beide Seiten knapp nennen.
|
||||||
- KEINE Einleitung, KEINE Überschrift, KEINE Nachbemerkungen.
|
- KEINE Einleitung, KEINE Überschrift, KEINE Nachbemerkungen.
|
||||||
- Wenn aus den neuen Meldungen kein neues Ereignis extrahierbar ist: BISHERIGE ENTWICKLUNGEN unverändert zurückgeben.
|
|
||||||
|
|
||||||
OUTPUT-FORMAT (ausschliesslich, keine Anführungszeichen, kein Code-Fence, JEDE Zeile beginnt mit "- "):
|
OUTPUT-FORMAT (ausschliesslich, kein Code-Fence, JEDE Zeile beginnt mit "- "):
|
||||||
- [DD.MM. HH:MM] Ereignistext neu. {{M<ID>}}
|
- [DD.MM. HH:MM] Ereignistext. {{Quellenname1|URL1}}
|
||||||
- [DD.MM. HH:MM] Ereignistext neu mit mehreren Belegen. {{M<ID1>, M<ID2>}}
|
- [DD.MM. HH:MM] Ereignistext mit mehreren Belegen. {{Quellenname1|URL1, Quellenname2|URL2}}
|
||||||
- [DD.MM. HH:MM] Ereignistext aus BISHERIGE ENTWICKLUNGEN. {{Quellenname1|URL1, Quellenname2|URL2}}
|
|
||||||
..."""
|
..."""
|
||||||
|
|
||||||
|
|
||||||
|
TOPIC_FILTER_PROMPT_TEMPLATE = """Du bist ein OSINT-Relevanzfilter. Ein vorgeschalteter Keyword-Prefilter hat diese Artikel für eine Lage durchgelassen — aber Keyword-Treffer allein reichen nicht. Artikel müssen das SPEZIFISCHE KERNTHEMA der Lage inhaltlich behandeln.
|
||||||
|
|
||||||
|
LAGE: {title}
|
||||||
|
KONTEXT: {description}
|
||||||
|
|
||||||
|
ARTIKEL-KANDIDATEN:
|
||||||
|
{articles_text}
|
||||||
|
|
||||||
|
AUFGABE:
|
||||||
|
Entscheide je Artikel, ob er thematisch zur Lage passt, und gib die laufenden Nummern der relevanten Artikel zurück.
|
||||||
|
|
||||||
|
REGELN:
|
||||||
|
- Relevant = der Artikel behandelt konkret das im Titel + Kontext beschriebene Kernthema. Zentrale Akteure, Handlungen, Aussagen oder Ereignisse des Themas müssen im Artikel erkennbar sein.
|
||||||
|
- NICHT relevant = Artikel, die nur allgemeine Begriffe aus dem Thema streifen (z.B. "Russland", "Iran", "Krieg", "Drohne"), ohne das Spezifikum der Lage zu behandeln. Allgemeine Kontext-Berichte aus der gleichen Region oder zum gleichen Großkonflikt sind NICHT automatisch relevant.
|
||||||
|
- Breit gefasste Lagen (z.B. "Iran-Israel-Krieg", "Ukrainekrieg – aktuelle Lage") akzeptieren alle Meldungen, die einen der direkt beteiligten Akteure oder Kriegsschauplätze behandeln.
|
||||||
|
- Eng gefasste Lagen (z.B. "Russische Militärblogger", "Ausfall bei Cloudflare", "Cybervorfall Stadtwerke X") akzeptieren NUR Meldungen zum Spezifikum. Peripheres, auch wenn im selben Großkontext, wird abgelehnt.
|
||||||
|
- Eine Meldung gilt auch dann als relevant, wenn sie das Thema aus einer gegnerischen/kritischen Perspektive behandelt — es geht um thematische Zugehörigkeit, nicht um Ausrichtung.
|
||||||
|
- Im Zweifel: NICHT relevant. Ein zu schmaler Filter ist besser als ein Schwall off-topic-Treffer.
|
||||||
|
|
||||||
|
Antworte AUSSCHLIESSLICH als JSON-Objekt — KEINE Erklärung, KEINE Einleitung:
|
||||||
|
{{"relevant_ids": [1, 3, 7]}}"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Status-Gruppen fuer den Fakten-Kontext im Analyse-Prompt.
|
||||||
|
# adhoc nutzt confirmed/unconfirmed/contradicted/developing,
|
||||||
|
# research nutzt established/unverified/disputed/developing — beide Domaenen
|
||||||
|
# werden in dieselben vier Anzeige-Gruppen abgebildet.
|
||||||
|
_FACT_STATUS_GROUPS = [
|
||||||
|
("Bestätigt (mehrere unabhängige Quellen oder durch Faktencheck als gesichert eingestuft):",
|
||||||
|
{"confirmed", "established"}),
|
||||||
|
("Umstritten (Quellen widersprechen sich oder Faktencheck hat Widersprüche dokumentiert):",
|
||||||
|
{"contradicted", "disputed"}),
|
||||||
|
("Unbestätigt (nur eine einzelne Quelle, eine unabhängige Bestätigung steht aus):",
|
||||||
|
{"unconfirmed", "unverified"}),
|
||||||
|
("In Entwicklung (laufender Sachverhalt, Stand offen):",
|
||||||
|
{"developing"}),
|
||||||
|
]
|
||||||
|
|
||||||
|
_FACT_STATUS_PRIORITY = {
|
||||||
|
"confirmed": 5, "established": 5,
|
||||||
|
"contradicted": 4, "disputed": 4,
|
||||||
|
"unconfirmed": 3, "unverified": 3,
|
||||||
|
"developing": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_fact_context_block(
|
||||||
|
existing_facts: list[dict] | None,
|
||||||
|
new_or_updated_facts: list[dict] | None,
|
||||||
|
incident_type: str,
|
||||||
|
max_total: int = 20,
|
||||||
|
) -> str:
|
||||||
|
"""Baut den 'GEPRUEFTE FAKTEN'-Block fuer den Analyse-Prompt.
|
||||||
|
|
||||||
|
Wird vom Orchestrator zwischen Faktencheck und Lagebild aufgerufen, damit
|
||||||
|
das Lagebild auf gepruefter Faktenbasis schreibt und Unklarheiten explizit
|
||||||
|
benennt. Bei leerer Faktenliste wird ein leerer String zurueckgegeben — der
|
||||||
|
Prompt laeuft dann ohne Fakten-Kontext (Fallback bei Faktencheck-Fail oder
|
||||||
|
bei Lagen ohne bisherige Fakten).
|
||||||
|
"""
|
||||||
|
existing_facts = existing_facts or []
|
||||||
|
new_or_updated_facts = new_or_updated_facts or []
|
||||||
|
if not existing_facts and not new_or_updated_facts:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
seen_claims: set[str] = set()
|
||||||
|
merged: list[dict] = []
|
||||||
|
# Neue/aktualisierte Fakten zuerst (Status ist aktueller Stand).
|
||||||
|
for f in new_or_updated_facts:
|
||||||
|
c = (f.get("claim") or "").strip().lower()
|
||||||
|
if not c or c in seen_claims:
|
||||||
|
continue
|
||||||
|
seen_claims.add(c)
|
||||||
|
merged.append(f)
|
||||||
|
# Dann alte unveraenderte Fakten.
|
||||||
|
for f in existing_facts:
|
||||||
|
c = (f.get("claim") or "").strip().lower()
|
||||||
|
if not c or c in seen_claims:
|
||||||
|
continue
|
||||||
|
seen_claims.add(c)
|
||||||
|
merged.append(f)
|
||||||
|
|
||||||
|
if not merged:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
merged.sort(key=lambda f: (
|
||||||
|
-_FACT_STATUS_PRIORITY.get((f.get("status") or "").lower(), 0),
|
||||||
|
-(f.get("sources_count") or 0),
|
||||||
|
))
|
||||||
|
merged = merged[:max_total]
|
||||||
|
|
||||||
|
grouped: dict[str, list[dict]] = {label: [] for label, _ in _FACT_STATUS_GROUPS}
|
||||||
|
for f in merged:
|
||||||
|
s = (f.get("status") or "").lower()
|
||||||
|
for label, codes in _FACT_STATUS_GROUPS:
|
||||||
|
if s in codes:
|
||||||
|
grouped[label].append(f)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not any(grouped.values()):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append("GEPRÜFTE FAKTEN (Stand nach dem Faktencheck dieses Refresh, max. {n} priorisiert):".format(n=max_total))
|
||||||
|
for label, _codes in _FACT_STATUS_GROUPS:
|
||||||
|
items = grouped[label]
|
||||||
|
if not items:
|
||||||
|
continue
|
||||||
|
lines.append("")
|
||||||
|
lines.append(label)
|
||||||
|
for f in items:
|
||||||
|
claim = (f.get("claim") or "").strip()
|
||||||
|
sc = f.get("sources_count") or 0
|
||||||
|
sc_text = f" ({sc} {'Quellen' if sc != 1 else 'Quelle'})" if sc else ""
|
||||||
|
lines.append(f"- {claim}{sc_text}")
|
||||||
|
|
||||||
|
lines.append("")
|
||||||
|
lines.append("AUSSAGE-DISZIPLIN für das Lagebild:")
|
||||||
|
lines.append("- Bestätigte Fakten als Grundgerüst nehmen, ohne Hedging.")
|
||||||
|
lines.append("- Umstrittene Punkte explizit als umstritten kennzeichnen, beide Seiten knapp benennen.")
|
||||||
|
lines.append("- Unbestätigtes klar einordnen ('Eine einzelne Quelle berichtet ...', 'Eine unabhängige Bestätigung steht aus.').")
|
||||||
|
lines.append("- Bei Aussagen, die durch keinen geprüften Fakt gedeckt sind und auch nicht direkt aus einer der vorliegenden Meldungen hervorgehen: NICHT spekulieren — entweder weglassen oder als unklar kennzeichnen.")
|
||||||
|
lines.append("- Triff KEINE Aussagen, die mit den oben gelisteten geprüften Fakten in Widerspruch stehen.")
|
||||||
|
lines.append("")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
class AnalyzerAgent:
|
class AnalyzerAgent:
|
||||||
"""Analysiert und übersetzt Meldungen über Claude CLI."""
|
"""Analysiert und übersetzt Meldungen über Claude CLI."""
|
||||||
|
|
||||||
@@ -271,7 +396,7 @@ class AnalyzerAgent:
|
|||||||
articles_text += f"Inhalt: {content[:800]}\n"
|
articles_text += f"Inhalt: {content[:800]}\n"
|
||||||
return articles_text
|
return articles_text
|
||||||
|
|
||||||
async def analyze(self, title: str, description: str, articles: list[dict], incident_type: str = "adhoc") -> tuple[dict | None, ClaudeUsage | None]:
|
async def analyze(self, title: str, description: str, articles: list[dict], incident_type: str = "adhoc", fact_context_block: str = "") -> tuple[dict | None, ClaudeUsage | None]:
|
||||||
"""Erstanalyse: Analysiert alle Meldungen zu einem Vorfall (erster Refresh)."""
|
"""Erstanalyse: Analysiert alle Meldungen zu einem Vorfall (erster Refresh)."""
|
||||||
if not articles:
|
if not articles:
|
||||||
return None, None
|
return None, None
|
||||||
@@ -287,6 +412,7 @@ class AnalyzerAgent:
|
|||||||
articles_text=articles_text,
|
articles_text=articles_text,
|
||||||
today=today,
|
today=today,
|
||||||
output_language=OUTPUT_LANGUAGE,
|
output_language=OUTPUT_LANGUAGE,
|
||||||
|
fact_context_block=fact_context_block,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -308,6 +434,7 @@ class AnalyzerAgent:
|
|||||||
previous_summary: str,
|
previous_summary: str,
|
||||||
previous_sources_json: str | None,
|
previous_sources_json: str | None,
|
||||||
incident_type: str = "adhoc",
|
incident_type: str = "adhoc",
|
||||||
|
fact_context_block: str = "",
|
||||||
) -> tuple[dict | None, ClaudeUsage | None]:
|
) -> tuple[dict | None, ClaudeUsage | None]:
|
||||||
"""Inkrementelle Analyse: Aktualisiert das Lagebild mit nur den neuen Artikeln.
|
"""Inkrementelle Analyse: Aktualisiert das Lagebild mit nur den neuen Artikeln.
|
||||||
|
|
||||||
@@ -350,6 +477,7 @@ class AnalyzerAgent:
|
|||||||
new_articles_text=new_articles_text,
|
new_articles_text=new_articles_text,
|
||||||
today=today,
|
today=today,
|
||||||
output_language=OUTPUT_LANGUAGE,
|
output_language=OUTPUT_LANGUAGE,
|
||||||
|
fact_context_block=fact_context_block,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -379,32 +507,127 @@ class AnalyzerAgent:
|
|||||||
logger.error(f"Inkrementelle Analyse-Fehler: {e}")
|
logger.error(f"Inkrementelle Analyse-Fehler: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
async def filter_relevant_articles(
|
||||||
|
self,
|
||||||
|
title: str,
|
||||||
|
description: str,
|
||||||
|
articles: list[dict],
|
||||||
|
) -> tuple[list[dict], ClaudeUsage | None]:
|
||||||
|
"""Semantischer Topic-Filter (Haiku).
|
||||||
|
|
||||||
|
Nimmt die vom Keyword-Prefilter durchgelassenen Artikel und wirft diejenigen raus,
|
||||||
|
die zwar auf Keywords matchen, aber das Kernthema der Lage thematisch nicht treffen.
|
||||||
|
Fällt bei Parsing- oder API-Fehlern auf die unveränderte Liste zurück.
|
||||||
|
"""
|
||||||
|
if not articles:
|
||||||
|
return articles, None
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for i, article in enumerate(articles, 1):
|
||||||
|
headline = article.get("headline_de") or article.get("headline", "")
|
||||||
|
source = article.get("source", "Unbekannt")
|
||||||
|
content = article.get("content_de") or article.get("content_original") or ""
|
||||||
|
lines.append(f"[{i}] Quelle: {source}")
|
||||||
|
lines.append(f" Überschrift: {headline}")
|
||||||
|
if content:
|
||||||
|
lines.append(f" Inhalt: {content[:400]}")
|
||||||
|
articles_text = "\n".join(lines)
|
||||||
|
|
||||||
|
prompt = TOPIC_FILTER_PROMPT_TEMPLATE.format(
|
||||||
|
title=title,
|
||||||
|
description=description or "Keine weiteren Details",
|
||||||
|
articles_text=articles_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
from config import CLAUDE_MODEL_FAST
|
||||||
|
try:
|
||||||
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Topic-Filter-Fehler (behalte alle {len(articles)} Artikel): {e}")
|
||||||
|
return articles, None
|
||||||
|
|
||||||
|
parsed = self._parse_response(result)
|
||||||
|
if not parsed or not isinstance(parsed.get("relevant_ids"), list):
|
||||||
|
logger.warning(
|
||||||
|
f"Topic-Filter: keine relevant_ids geparst, behalte alle {len(articles)} Artikel"
|
||||||
|
)
|
||||||
|
return articles, usage
|
||||||
|
|
||||||
|
relevant_set = {
|
||||||
|
i for i in parsed["relevant_ids"]
|
||||||
|
if isinstance(i, int) and 1 <= i <= len(articles)
|
||||||
|
}
|
||||||
|
filtered = [a for i, a in enumerate(articles, 1) if i in relevant_set]
|
||||||
|
|
||||||
|
rejected = len(articles) - len(filtered)
|
||||||
|
if not filtered and articles:
|
||||||
|
logger.warning(
|
||||||
|
f"Topic-Filter hat ALLE {len(articles)} Artikel verworfen — "
|
||||||
|
"möglicherweise zu aggressiv. Behalte Original."
|
||||||
|
)
|
||||||
|
return articles, usage
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Topic-Filter: {len(filtered)}/{len(articles)} Artikel thematisch relevant "
|
||||||
|
f"({rejected} verworfen)"
|
||||||
|
)
|
||||||
|
return filtered, usage
|
||||||
|
|
||||||
async def generate_latest_developments(
|
async def generate_latest_developments(
|
||||||
self,
|
self,
|
||||||
title: str,
|
title: str,
|
||||||
description: str,
|
description: str,
|
||||||
new_articles: list[dict],
|
summary: str,
|
||||||
previous_developments: str | None,
|
recent_articles: list[dict],
|
||||||
|
previous_developments: str | None = None,
|
||||||
) -> tuple[str | None, ClaudeUsage | None]:
|
) -> tuple[str | None, ClaudeUsage | None]:
|
||||||
"""Pflegt die Kachel 'Neueste Entwicklungen' für Live-Monitoring-Lagen.
|
"""Generiert die Kachel 'Neueste Entwicklungen' aus dem Lagebild.
|
||||||
|
|
||||||
Gibt Markdown-Bullets mit Zeitstempel zurück (max 8, neueste oben).
|
Der LLM extrahiert aus dem Summary die jüngsten Ereignisse und bindet sie an
|
||||||
Wenn keine neuen Artikel vorliegen, werden die bisherigen Bullets unverändert zurückgegeben.
|
das Publikationsdatum der belegenden Meldungen (recent_articles). Damit bleiben
|
||||||
|
die Einträge zwingend aktuell und thematisch an das Lagebild gekoppelt. Alte
|
||||||
|
Hintergrund-Erwähnungen im Lagebild erzeugen keine Bullets, weil keine aktuelle
|
||||||
|
Meldung sie belegen würde.
|
||||||
|
|
||||||
|
Gibt 4–6 Bullets (absteigend nach Zeitstempel) zurück. Bei Fehler/Parsing-Leer:
|
||||||
|
Fallback auf previous_developments (falls vorhanden), sonst None.
|
||||||
"""
|
"""
|
||||||
prev = (previous_developments or "").strip()
|
prev = (previous_developments or "").strip() or None
|
||||||
if not new_articles:
|
if not summary or not summary.strip():
|
||||||
return (prev or None), None
|
return prev, None
|
||||||
|
if not recent_articles:
|
||||||
|
return prev, None
|
||||||
|
|
||||||
from config import OUTPUT_LANGUAGE, CLAUDE_MODEL_FAST
|
from config import OUTPUT_LANGUAGE, CLAUDE_MODEL_FAST
|
||||||
today = datetime.now(TIMEZONE).strftime("%d.%m.%Y")
|
today = datetime.now(TIMEZONE).strftime("%d.%m.%Y")
|
||||||
new_articles_text = self._format_articles_text(new_articles, max_articles=25)
|
|
||||||
prev_block = prev if prev else "(noch keine Einträge)"
|
# Kompakter Artikel-Block: nur die für Zeitstempel/Quellen nötigen Felder.
|
||||||
|
# Sortiert nach published_at absteigend — damit der LLM die jüngsten sofort sieht.
|
||||||
|
def _pub_sort_key(a: dict) -> str:
|
||||||
|
return a.get("published_at") or ""
|
||||||
|
|
||||||
|
sorted_articles = sorted(recent_articles, key=_pub_sort_key, reverse=True)
|
||||||
|
lines: list[str] = []
|
||||||
|
for a in sorted_articles[:60]:
|
||||||
|
headline = a.get("headline_de") or a.get("headline", "")
|
||||||
|
source = a.get("source", "Unbekannt")
|
||||||
|
url = a.get("source_url", "")
|
||||||
|
published = a.get("published_at") or "unbekannt"
|
||||||
|
bias = a.get("source_bias") or ""
|
||||||
|
line = f"- [{published}] {source}"
|
||||||
|
if bias:
|
||||||
|
line += f" ({bias})"
|
||||||
|
line += f" | {headline}"
|
||||||
|
if url:
|
||||||
|
line += f" | {url}"
|
||||||
|
lines.append(line)
|
||||||
|
articles_text = "\n".join(lines) if lines else "(keine belegenden Meldungen verfügbar)"
|
||||||
|
|
||||||
prompt = LATEST_DEVELOPMENTS_PROMPT_TEMPLATE.format(
|
prompt = LATEST_DEVELOPMENTS_PROMPT_TEMPLATE.format(
|
||||||
title=title,
|
title=title,
|
||||||
description=description or "Keine weiteren Details",
|
description=description or "Keine weiteren Details",
|
||||||
previous_developments=prev_block,
|
summary=summary.strip(),
|
||||||
new_articles_text=new_articles_text,
|
articles_text=articles_text,
|
||||||
today=today,
|
today=today,
|
||||||
output_language=OUTPUT_LANGUAGE,
|
output_language=OUTPUT_LANGUAGE,
|
||||||
)
|
)
|
||||||
@@ -413,16 +636,16 @@ class AnalyzerAgent:
|
|||||||
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST, raw_text=True)
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST, raw_text=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Latest-Developments-Fehler: {e}")
|
logger.error(f"Latest-Developments-Fehler: {e}")
|
||||||
return (prev or None), None
|
return prev, None
|
||||||
|
|
||||||
bullets = self._parse_latest_developments(result, new_articles)
|
bullets = self._parse_latest_developments(result, recent_articles)
|
||||||
if not bullets:
|
if not bullets:
|
||||||
logger.info("Latest-Developments: keine Bullets geparst, behalte bisherigen Stand")
|
logger.info("Latest-Developments: keine Bullets geparst, behalte bisherigen Stand")
|
||||||
return (prev or None), usage
|
return prev, usage
|
||||||
|
|
||||||
bullets = bullets[:8]
|
bullets = bullets[:6]
|
||||||
output = "\n".join(bullets)
|
output = "\n".join(bullets)
|
||||||
logger.info(f"Latest-Developments: {len(bullets)} Bullets generiert")
|
logger.info(f"Latest-Developments: {len(bullets)} Bullets aus Lagebild generiert")
|
||||||
return output, usage
|
return output, usage
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -678,5 +901,5 @@ class AnalyzerAgent:
|
|||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return {"summary": summary, "sources": sources, "key_facts": [], "translations": []}
|
return {"summary": summary, "sources": sources, "key_facts": []}
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,35 @@ _cancel_event_var: contextvars.ContextVar[asyncio.Event | None] = contextvars.Co
|
|||||||
logger = logging.getLogger("osint.claude_client")
|
logger = logging.getLogger("osint.claude_client")
|
||||||
|
|
||||||
|
|
||||||
|
class ClaudeCliError(RuntimeError):
|
||||||
|
"""Strukturierter Fehler aus dem Claude CLI mit Kategorie.
|
||||||
|
|
||||||
|
error_type:
|
||||||
|
- "rate_limit": Anthropic Rate-Limit oder Overload (transient, retry-tauglich)
|
||||||
|
- "auth_error": Account-Problem (Organisation hat keinen Claude-Zugang,
|
||||||
|
Token abgelaufen/ungueltig) - kein Retry sinnvoll, Admin-Aktion noetig
|
||||||
|
- "timeout": Claude CLI Timeout (transient)
|
||||||
|
- "cli_error": Sonstiger CLI-Fehler (unspezifisch, Default)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, error_type: str, message: str):
|
||||||
|
self.error_type = error_type
|
||||||
|
self.message = message
|
||||||
|
super().__init__(f"Claude CLI [{error_type}]: {message}")
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_cli_error(combined_output: str) -> str:
|
||||||
|
"""Ordnet einer Fehler-Ausgabe eine error_type-Kategorie zu."""
|
||||||
|
txt = combined_output.lower()
|
||||||
|
rate_limit_keywords = ["hit your limit", "rate limit", "resets", "rate_limit", "overloaded"]
|
||||||
|
auth_error_keywords = ["does not have access", "login again", "contact your administrator"]
|
||||||
|
if any(kw in txt for kw in rate_limit_keywords):
|
||||||
|
return "rate_limit"
|
||||||
|
if any(kw in txt for kw in auth_error_keywords):
|
||||||
|
return "auth_error"
|
||||||
|
return "cli_error"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ClaudeUsage:
|
class ClaudeUsage:
|
||||||
"""Token-Verbrauch eines einzelnen Claude CLI Aufrufs."""
|
"""Token-Verbrauch eines einzelnen Claude CLI Aufrufs."""
|
||||||
@@ -48,7 +77,7 @@ def _sanitize_mdash(text: str) -> str:
|
|||||||
"""Ersetzt Gedankenstriche durch Bindestriche (KI-Indikator reduzieren)."""
|
"""Ersetzt Gedankenstriche durch Bindestriche (KI-Indikator reduzieren)."""
|
||||||
return text.replace("\u2014", " - ").replace("\u2013", " - ")
|
return text.replace("\u2014", " - ").replace("\u2013", " - ")
|
||||||
|
|
||||||
async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", model: str | None = None, raw_text: bool = False) -> tuple[str, ClaudeUsage]:
|
async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", model: str | None = None, raw_text: bool = False, timeout: float | None = None) -> tuple[str, ClaudeUsage]:
|
||||||
"""Ruft Claude CLI auf. Gibt (result_text, usage) zurück.
|
"""Ruft Claude CLI auf. Gibt (result_text, usage) zurück.
|
||||||
|
|
||||||
Prompt wird via stdin uebergeben um OS ARG_MAX Limits zu vermeiden.
|
Prompt wird via stdin uebergeben um OS ARG_MAX Limits zu vermeiden.
|
||||||
@@ -57,8 +86,10 @@ async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", mod
|
|||||||
prompt: Der Prompt fuer Claude
|
prompt: Der Prompt fuer Claude
|
||||||
tools: Kommagetrennte erlaubte Tools (None = keine Tools, --max-turns 1)
|
tools: Kommagetrennte erlaubte Tools (None = keine Tools, --max-turns 1)
|
||||||
model: Optionales Modell (z.B. CLAUDE_MODEL_FAST fuer Haiku). None = CLAUDE_MODEL_STANDARD (Opus 4.7).
|
model: Optionales Modell (z.B. CLAUDE_MODEL_FAST fuer Haiku). None = CLAUDE_MODEL_STANDARD (Opus 4.7).
|
||||||
|
timeout: Override in Sekunden. None = Fallback auf globalen CLAUDE_TIMEOUT (1800s).
|
||||||
"""
|
"""
|
||||||
effective_model = model or CLAUDE_MODEL_STANDARD
|
effective_model = model or CLAUDE_MODEL_STANDARD
|
||||||
|
effective_timeout = timeout if timeout is not None else CLAUDE_TIMEOUT
|
||||||
cmd = [CLAUDE_PATH, "-p", "-", "--output-format", "json", "--model", effective_model]
|
cmd = [CLAUDE_PATH, "-p", "-", "--output-format", "json", "--model", effective_model]
|
||||||
if tools:
|
if tools:
|
||||||
cmd.extend(["--allowedTools", tools])
|
cmd.extend(["--allowedTools", tools])
|
||||||
@@ -89,7 +120,7 @@ async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", mod
|
|||||||
process.communicate(input=prompt.encode("utf-8"))
|
process.communicate(input=prompt.encode("utf-8"))
|
||||||
)
|
)
|
||||||
cancel_wait_task = asyncio.create_task(cancel_event.wait())
|
cancel_wait_task = asyncio.create_task(cancel_event.wait())
|
||||||
timeout_task = asyncio.create_task(asyncio.sleep(CLAUDE_TIMEOUT))
|
timeout_task = asyncio.create_task(asyncio.sleep(effective_timeout))
|
||||||
|
|
||||||
done, pending = await asyncio.wait(
|
done, pending = await asyncio.wait(
|
||||||
[communicate_task, cancel_wait_task, timeout_task],
|
[communicate_task, cancel_wait_task, timeout_task],
|
||||||
@@ -108,32 +139,33 @@ async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", mod
|
|||||||
else:
|
else:
|
||||||
process.kill()
|
process.kill()
|
||||||
await process.wait()
|
await process.wait()
|
||||||
raise TimeoutError(f"Claude CLI Timeout nach {CLAUDE_TIMEOUT}s")
|
raise TimeoutError(f"Claude CLI Timeout nach {effective_timeout}s")
|
||||||
else:
|
else:
|
||||||
stdout, stderr = await asyncio.wait_for(
|
stdout, stderr = await asyncio.wait_for(
|
||||||
process.communicate(input=prompt.encode("utf-8")), timeout=CLAUDE_TIMEOUT
|
process.communicate(input=prompt.encode("utf-8")), timeout=effective_timeout
|
||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
process.kill()
|
process.kill()
|
||||||
raise TimeoutError(f"Claude CLI Timeout nach {CLAUDE_TIMEOUT}s")
|
raise TimeoutError(f"Claude CLI Timeout nach {effective_timeout}s")
|
||||||
|
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
error_msg = stderr.decode("utf-8", errors="replace").strip()
|
error_msg = stderr.decode("utf-8", errors="replace").strip()
|
||||||
stdout_msg = stdout.decode("utf-8", errors="replace").strip()
|
stdout_msg = stdout.decode("utf-8", errors="replace").strip()
|
||||||
|
|
||||||
# Rate-Limit-Fehler kommen als JSON auf stdout, nicht auf stderr
|
# Rate-Limit/Auth-Fehler kommen teils als JSON auf stdout, nicht auf stderr
|
||||||
error_type = "cli_error"
|
combined_output = f"{error_msg} {stdout_msg}"
|
||||||
rate_limit_keywords = ["hit your limit", "rate limit", "resets", "rate_limit", "overloaded"]
|
error_type = _classify_cli_error(combined_output)
|
||||||
combined_output = f"{error_msg} {stdout_msg}".lower()
|
|
||||||
if any(kw in combined_output for kw in rate_limit_keywords):
|
if error_type == "rate_limit":
|
||||||
error_type = "rate_limit"
|
|
||||||
logger.warning(f"Claude CLI Rate-Limit (Exit {process.returncode}): {stdout_msg or error_msg}")
|
logger.warning(f"Claude CLI Rate-Limit (Exit {process.returncode}): {stdout_msg or error_msg}")
|
||||||
|
elif error_type == "auth_error":
|
||||||
|
logger.error(f"Claude CLI Auth-Fehler (Exit {process.returncode}): {stdout_msg or error_msg}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Claude CLI Fehler (Exit {process.returncode}): {error_msg}")
|
logger.error(f"Claude CLI Fehler (Exit {process.returncode}): {error_msg}")
|
||||||
if stdout_msg:
|
if stdout_msg:
|
||||||
logger.error(f"Claude CLI stdout bei Fehler: {stdout_msg[:500]}")
|
logger.error(f"Claude CLI stdout bei Fehler: {stdout_msg[:500]}")
|
||||||
|
|
||||||
raise RuntimeError(f"Claude CLI Fehler [{error_type}]: {stdout_msg or error_msg}")
|
raise ClaudeCliError(error_type, stdout_msg or error_msg)
|
||||||
|
|
||||||
raw = stdout.decode("utf-8", errors="replace").strip()
|
raw = stdout.decode("utf-8", errors="replace").strip()
|
||||||
usage = ClaudeUsage()
|
usage = ClaudeUsage()
|
||||||
@@ -141,6 +173,19 @@ async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", mod
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
|
# CLI kann returncode=0 liefern und trotzdem is_error=true setzen
|
||||||
|
# (z.B. "Your organization does not have access to Claude")
|
||||||
|
if data.get("is_error"):
|
||||||
|
error_text = str(data.get("result", ""))
|
||||||
|
error_type = _classify_cli_error(error_text)
|
||||||
|
if error_type == "rate_limit":
|
||||||
|
logger.warning(f"Claude CLI Rate-Limit (is_error): {error_text}")
|
||||||
|
elif error_type == "auth_error":
|
||||||
|
logger.error(f"Claude CLI Auth-Fehler (is_error): {error_text}")
|
||||||
|
else:
|
||||||
|
logger.error(f"Claude CLI Fehler (is_error): {error_text}")
|
||||||
|
raise ClaudeCliError(error_type, error_text)
|
||||||
|
|
||||||
result_text = data.get("result", raw)
|
result_text = data.get("result", raw)
|
||||||
u = data.get("usage", {})
|
u = data.get("usage", {})
|
||||||
usage = ClaudeUsage(
|
usage = ClaudeUsage(
|
||||||
|
|||||||
@@ -21,15 +21,21 @@ from source_rules import (
|
|||||||
|
|
||||||
logger = logging.getLogger("osint.orchestrator")
|
logger = logging.getLogger("osint.orchestrator")
|
||||||
|
|
||||||
# Reputations-Score nach Quellenkategorie (für Relevanz-Scoring)
|
# Reputations-Score nach Quellenkategorie (fuer Relevanz-Scoring).
|
||||||
|
# Keys muessen mit den tatsaechlichen DB-Werten in sources.category uebereinstimmen
|
||||||
|
# (siehe DOMAIN_CATEGORY_MAP in source_rules.py).
|
||||||
CATEGORY_REPUTATION = {
|
CATEGORY_REPUTATION = {
|
||||||
"nachrichten_de": 0.9,
|
"nachrichtenagentur": 1.0, # Reuters, AP, dpa, AFP — Primärquellen
|
||||||
"nachrichten_int": 0.9,
|
"behoerde": 1.0, # BMI, BSI, Europol — offizielle Quellen
|
||||||
"presseagenturen": 1.0,
|
"oeffentlich-rechtlich": 0.95, # tagesschau, ZDF, ARD, BBC, ORF
|
||||||
"behoerden": 1.0,
|
"qualitaetszeitung": 0.85, # Spiegel, Zeit, FAZ, NZZ, Süddeutsche
|
||||||
"fachmedien": 0.8,
|
"think-tank": 0.85, # SWP, IISS, Brookings, Chatham House
|
||||||
"international": 0.7,
|
"fachmedien": 0.8, # heise, golem, netzpolitik, Handelsblatt
|
||||||
"sonstige": 0.4,
|
"international": 0.75, # CNN, Guardian, NYT, Al Jazeera, France24
|
||||||
|
"regional": 0.65, # regionale Tageszeitungen
|
||||||
|
"telegram": 0.5, # OSINT-Kanaele — gemischte Qualitaet
|
||||||
|
"sonstige": 0.4, # unkategorisiert
|
||||||
|
"boulevard": 0.3, # Bild, Sun etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
# Research-Modus: Automatisch 3 Durchläufe für optimale Ergebnisse
|
# Research-Modus: Automatisch 3 Durchläufe für optimale Ergebnisse
|
||||||
@@ -395,6 +401,10 @@ class AgentOrchestrator:
|
|||||||
self._queue: asyncio.Queue = asyncio.Queue()
|
self._queue: asyncio.Queue = asyncio.Queue()
|
||||||
self._running = False
|
self._running = False
|
||||||
self._current_task: Optional[int] = None
|
self._current_task: Optional[int] = None
|
||||||
|
# Session-Start des aktuellen Tasks (UTC ISO mit 'Z'). Ueberspannt Multi-Pass
|
||||||
|
# und Retries innerhalb derselben Queue-Abarbeitung — verhindert, dass der
|
||||||
|
# Frontend-Timer beim Seiten-Reload auf den Pass/Retry-Start zurueckspringt.
|
||||||
|
self._current_task_started_at: Optional[str] = None
|
||||||
self._ws_manager = None
|
self._ws_manager = None
|
||||||
self._queued_ids: set[int] = set()
|
self._queued_ids: set[int] = set()
|
||||||
self._cancel_requested: set[int] = set()
|
self._cancel_requested: set[int] = set()
|
||||||
@@ -479,6 +489,9 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
logger.info(f"Lage {incident_id} aus Warteschlange entfernt (removed={removed})")
|
logger.info(f"Lage {incident_id} aus Warteschlange entfernt (removed={removed})")
|
||||||
|
|
||||||
|
# refresh_log-Eintrag schreiben, damit Auto-Refresh nicht im naechsten Tick erneut einreiht
|
||||||
|
await self._log_queued_cancellation(incident_id)
|
||||||
|
|
||||||
# Send cancelled event
|
# Send cancelled event
|
||||||
if self._ws_manager:
|
if self._ws_manager:
|
||||||
try:
|
try:
|
||||||
@@ -515,14 +528,20 @@ class AgentOrchestrator:
|
|||||||
user_id = None
|
user_id = None
|
||||||
self._queued_ids.discard(incident_id)
|
self._queued_ids.discard(incident_id)
|
||||||
self._current_task = incident_id
|
self._current_task = incident_id
|
||||||
|
# Session-Start EINMAL setzen — bleibt ueber Multi-Pass/Retry hinweg stabil
|
||||||
|
self._current_task_started_at = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
self._cancel_event = asyncio.Event()
|
self._cancel_event = asyncio.Event()
|
||||||
_cancel_event_var.set(self._cancel_event)
|
_cancel_event_var.set(self._cancel_event)
|
||||||
logger.info(f"Starte Refresh für Lage {incident_id} (Trigger: {trigger_type})")
|
logger.info(f"Starte Refresh für Lage {incident_id} (Trigger: {trigger_type})")
|
||||||
|
|
||||||
RETRY_DELAYS = [0, 120, 300] # Sekunden: sofort, 2min, 5min
|
RETRY_DELAYS = [0, 120, 300] # Sekunden: sofort, 2min, 5min
|
||||||
TRANSIENT_ERRORS = (asyncio.TimeoutError, TimeoutError, ConnectionError, OSError)
|
TRANSIENT_ERRORS = (asyncio.TimeoutError, TimeoutError, ConnectionError, OSError)
|
||||||
|
from agents.claude_client import ClaudeCliError
|
||||||
last_error = None
|
last_error = None
|
||||||
|
|
||||||
|
def _is_transient_cli(err: Exception) -> bool:
|
||||||
|
return isinstance(err, ClaudeCliError) and err.error_type in ("rate_limit", "timeout")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Research-Lagen: Automatisch 3 Durchläufe nur beim ersten Refresh
|
# Research-Lagen: Automatisch 3 Durchläufe nur beim ersten Refresh
|
||||||
incident_type, has_summary = await self._get_incident_info(incident_id)
|
incident_type, has_summary = await self._get_incident_info(incident_id)
|
||||||
@@ -551,32 +570,44 @@ class AgentOrchestrator:
|
|||||||
}, _vis, _cb, _tid)
|
}, _vis, _cb, _tid)
|
||||||
last_error = None
|
last_error = None
|
||||||
break
|
break
|
||||||
except TRANSIENT_ERRORS as e:
|
|
||||||
last_error = e
|
|
||||||
logger.warning(f"Transienter Fehler bei Lage {incident_id} (Versuch {attempt + 1}/3): {e}")
|
|
||||||
if attempt < 2:
|
|
||||||
await self._mark_refresh_failed(incident_id, str(e))
|
|
||||||
delay = RETRY_DELAYS[attempt + 1]
|
|
||||||
logger.info(f"Retry in {delay}s für Lage {incident_id}")
|
|
||||||
# Retry-Status per WebSocket senden
|
|
||||||
if self._ws_manager:
|
|
||||||
try:
|
|
||||||
_vis, _cb, _tid = await self._get_incident_visibility(incident_id)
|
|
||||||
except Exception:
|
|
||||||
_vis, _cb, _tid = "public", None, None
|
|
||||||
await self._ws_manager.broadcast_for_incident({
|
|
||||||
"type": "status_update",
|
|
||||||
"incident_id": incident_id,
|
|
||||||
"data": {"status": "retrying", "attempt": attempt + 1, "delay": delay},
|
|
||||||
}, _vis, _cb, _tid)
|
|
||||||
await asyncio.sleep(delay)
|
|
||||||
else:
|
|
||||||
await self._mark_refresh_failed(incident_id, f"Endgültig fehlgeschlagen nach 3 Versuchen: {e}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Auth/CLI-Fehler: sofort abbrechen, kein Retry sinnvoll
|
||||||
|
if isinstance(e, ClaudeCliError) and e.error_type in ("auth_error", "cli_error"):
|
||||||
|
last_error = e
|
||||||
|
logger.error(f"Permanenter Claude-Fehler [{e.error_type}] bei Lage {incident_id}: {e}")
|
||||||
|
await self._mark_refresh_failed(incident_id, str(e))
|
||||||
|
break
|
||||||
|
|
||||||
|
# Transiente Fehler: Retry bis 3x
|
||||||
|
if isinstance(e, TRANSIENT_ERRORS) or _is_transient_cli(e):
|
||||||
|
last_error = e
|
||||||
|
kind = e.error_type if isinstance(e, ClaudeCliError) else type(e).__name__
|
||||||
|
logger.warning(f"Transienter Fehler [{kind}] bei Lage {incident_id} (Versuch {attempt + 1}/3): {e}")
|
||||||
|
if attempt < 2:
|
||||||
|
await self._mark_refresh_failed(incident_id, str(e))
|
||||||
|
delay = RETRY_DELAYS[attempt + 1]
|
||||||
|
logger.info(f"Retry in {delay}s für Lage {incident_id}")
|
||||||
|
if self._ws_manager:
|
||||||
|
try:
|
||||||
|
_vis, _cb, _tid = await self._get_incident_visibility(incident_id)
|
||||||
|
except Exception:
|
||||||
|
_vis, _cb, _tid = "public", None, None
|
||||||
|
await self._ws_manager.broadcast_for_incident({
|
||||||
|
"type": "status_update",
|
||||||
|
"incident_id": incident_id,
|
||||||
|
"data": {"status": "retrying", "attempt": attempt + 1, "delay": delay},
|
||||||
|
}, _vis, _cb, _tid)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
await self._mark_refresh_failed(incident_id, f"Endgültig fehlgeschlagen nach 3 Versuchen: {e}")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Alles andere: permanent
|
||||||
last_error = e
|
last_error = e
|
||||||
logger.error(f"Permanenter Fehler bei Refresh für Lage {incident_id}: {e}")
|
logger.error(f"Permanenter Fehler bei Refresh für Lage {incident_id}: {e}")
|
||||||
await self._mark_refresh_failed(incident_id, str(e))
|
await self._mark_refresh_failed(incident_id, str(e))
|
||||||
break # Permanenter Fehler, kein Retry
|
break
|
||||||
|
|
||||||
if last_error and self._ws_manager:
|
if last_error and self._ws_manager:
|
||||||
try:
|
try:
|
||||||
@@ -590,23 +621,62 @@ class AgentOrchestrator:
|
|||||||
}, _vis, _cb, _tid)
|
}, _vis, _cb, _tid)
|
||||||
finally:
|
finally:
|
||||||
self._current_task = None
|
self._current_task = None
|
||||||
|
self._current_task_started_at = None
|
||||||
self._cancel_event = None
|
self._cancel_event = None
|
||||||
_cancel_event_var.set(None)
|
_cancel_event_var.set(None)
|
||||||
self._queue.task_done()
|
self._queue.task_done()
|
||||||
|
|
||||||
async def _mark_refresh_cancelled(self, incident_id: int):
|
async def _mark_refresh_cancelled(self, incident_id: int):
|
||||||
"""Markiert den laufenden Refresh-Log-Eintrag als cancelled."""
|
"""Markiert den laufenden Refresh-Log-Eintrag als cancelled und schliesst
|
||||||
|
alle noch aktiven Pipeline-Schritte. Ohne den zweiten Schritt blieb der
|
||||||
|
zuletzt aktive Step-Eintrag verwaist und das Frontend zeigte dauerhaft
|
||||||
|
'Schritt X laeuft', weil /api/incidents/<id>/pipeline aus
|
||||||
|
refresh_pipeline_steps liest."""
|
||||||
from database import get_db
|
from database import get_db
|
||||||
|
from services.pipeline_tracker import cancel_active_steps
|
||||||
db = await get_db()
|
db = await get_db()
|
||||||
try:
|
try:
|
||||||
|
now_str = datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
cur = await db.execute(
|
||||||
|
"SELECT id FROM refresh_log WHERE incident_id = ? AND status = 'running'",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
row = await cur.fetchone()
|
||||||
|
refresh_log_id = row["id"] if row else None
|
||||||
|
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""UPDATE refresh_log SET status = 'cancelled', error_message = 'Vom Nutzer abgebrochen',
|
"""UPDATE refresh_log SET status = 'cancelled', error_message = 'Vom Nutzer abgebrochen',
|
||||||
completed_at = ? WHERE incident_id = ? AND status = 'running'""",
|
completed_at = ? WHERE incident_id = ? AND status = 'running'""",
|
||||||
(datetime.now(TIMEZONE).strftime('%Y-%m-%d %H:%M:%S'), incident_id),
|
(now_str, incident_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
if refresh_log_id is not None:
|
||||||
|
await cancel_active_steps(db, refresh_log_id=refresh_log_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Konnte Refresh-Log nicht als abgebrochen markieren: {e}")
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
async def _log_queued_cancellation(self, incident_id: int):
|
||||||
|
"""Schreibt einen cancelled-Eintrag fuer einen Queue-Abbruch (Lage war noch nicht laufend).
|
||||||
|
Verhindert, dass der Auto-Refresh-Scheduler im naechsten Tick sofort wieder einreiht."""
|
||||||
|
from database import get_db
|
||||||
|
db = await get_db()
|
||||||
|
try:
|
||||||
|
cur = await db.execute("SELECT tenant_id FROM incidents WHERE id = ?", (incident_id,))
|
||||||
|
row = await cur.fetchone()
|
||||||
|
tid = row["tenant_id"] if row else None
|
||||||
|
now_str = datetime.now(TIMEZONE).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO refresh_log (incident_id, started_at, completed_at, status,
|
||||||
|
trigger_type, error_message, tenant_id)
|
||||||
|
VALUES (?, ?, ?, 'cancelled', 'manual', 'Aus Warteschlange entfernt', ?)""",
|
||||||
|
(incident_id, now_str, now_str, tid),
|
||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Konnte Refresh-Log nicht als abgebrochen markieren: {e}")
|
logger.warning(f"Konnte Queue-Cancel nicht in refresh_log loggen: {e}")
|
||||||
finally:
|
finally:
|
||||||
await db.close()
|
await db.close()
|
||||||
|
|
||||||
@@ -654,6 +724,7 @@ class AgentOrchestrator:
|
|||||||
from agents.analyzer import AnalyzerAgent
|
from agents.analyzer import AnalyzerAgent
|
||||||
from agents.factchecker import FactCheckerAgent
|
from agents.factchecker import FactCheckerAgent
|
||||||
from feeds.rss_parser import RSSParser
|
from feeds.rss_parser import RSSParser
|
||||||
|
from services import pipeline_tracker as _pipe
|
||||||
|
|
||||||
db = await get_db()
|
db = await get_db()
|
||||||
try:
|
try:
|
||||||
@@ -696,6 +767,47 @@ class AgentOrchestrator:
|
|||||||
log_id = cursor.lastrowid
|
log_id = cursor.lastrowid
|
||||||
usage_acc = UsageAccumulator()
|
usage_acc = UsageAccumulator()
|
||||||
|
|
||||||
|
# --- Pipeline-Tracking (Analysepipeline-Visualisierung) ---
|
||||||
|
_pass_nr = (_pass_info or {}).get("nr", 1)
|
||||||
|
_step_ids: dict[str, Optional[int]] = {}
|
||||||
|
|
||||||
|
async def _pipe_start(step_key: str):
|
||||||
|
try:
|
||||||
|
sid = await _pipe.start_step(
|
||||||
|
db, self._ws_manager,
|
||||||
|
refresh_log_id=log_id, incident_id=incident_id, step_key=step_key,
|
||||||
|
pass_number=_pass_nr, tenant_id=tenant_id,
|
||||||
|
visibility=visibility, created_by=created_by,
|
||||||
|
)
|
||||||
|
_step_ids[step_key] = sid
|
||||||
|
return sid
|
||||||
|
except Exception as _e:
|
||||||
|
logger.debug(f"_pipe_start({step_key}) ignoriert: {_e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _pipe_done(step_key: str, count_value=None, count_secondary=None):
|
||||||
|
try:
|
||||||
|
sid = _step_ids.pop(step_key, None)
|
||||||
|
await _pipe.complete_step(
|
||||||
|
db, self._ws_manager, step_id=sid,
|
||||||
|
refresh_log_id=log_id, incident_id=incident_id, step_key=step_key,
|
||||||
|
pass_number=_pass_nr, count_value=count_value, count_secondary=count_secondary,
|
||||||
|
tenant_id=tenant_id, visibility=visibility, created_by=created_by,
|
||||||
|
)
|
||||||
|
except Exception as _e:
|
||||||
|
logger.debug(f"_pipe_done({step_key}) ignoriert: {_e}")
|
||||||
|
|
||||||
|
async def _pipe_skip(step_key: str):
|
||||||
|
try:
|
||||||
|
await _pipe.skip_step(
|
||||||
|
db, self._ws_manager,
|
||||||
|
refresh_log_id=log_id, incident_id=incident_id, step_key=step_key,
|
||||||
|
pass_number=_pass_nr, tenant_id=tenant_id,
|
||||||
|
visibility=visibility, created_by=created_by,
|
||||||
|
)
|
||||||
|
except Exception as _e:
|
||||||
|
logger.debug(f"_pipe_skip({step_key}) ignoriert: {_e}")
|
||||||
|
|
||||||
research_status = "deep_researching" if incident_type == "research" else "researching"
|
research_status = "deep_researching" if incident_type == "research" else "researching"
|
||||||
research_detail = "Hintergrundrecherche im Web läuft..." if incident_type == "research" else "RSS-Feeds und Web werden durchsucht..."
|
research_detail = "Hintergrundrecherche im Web läuft..." if incident_type == "research" else "RSS-Feeds und Web werden durchsucht..."
|
||||||
# Multi-Pass: Detail-Text mit Durchlauf-Info versehen
|
# Multi-Pass: Detail-Text mit Durchlauf-Info versehen
|
||||||
@@ -718,6 +830,23 @@ class AgentOrchestrator:
|
|||||||
)
|
)
|
||||||
existing_db_articles_full = await cursor.fetchall()
|
existing_db_articles_full = await cursor.fetchall()
|
||||||
|
|
||||||
|
# Pipeline-Schritt 1: Quellen sichten (vorbereitet)
|
||||||
|
await _pipe_start("sources_review")
|
||||||
|
try:
|
||||||
|
if incident_type == "adhoc":
|
||||||
|
_src_cursor = await db.execute(
|
||||||
|
"SELECT COUNT(*) AS cnt FROM sources WHERE tenant_id = ? AND status = 'active'",
|
||||||
|
(tenant_id,),
|
||||||
|
)
|
||||||
|
_src_row = await _src_cursor.fetchone()
|
||||||
|
_src_total = _src_row["cnt"] if _src_row else 0
|
||||||
|
else:
|
||||||
|
_src_total = None
|
||||||
|
except Exception:
|
||||||
|
_src_total = None
|
||||||
|
# secondary wird später mit der Anzahl tatsächlich liefernder Quellen ergänzt
|
||||||
|
await _pipe_done("sources_review", count_value=_src_total, count_secondary=None)
|
||||||
|
|
||||||
# Schritt 1+2: RSS-Feeds und Claude-Recherche parallel ausführen
|
# Schritt 1+2: RSS-Feeds und Claude-Recherche parallel ausführen
|
||||||
async def _rss_pipeline():
|
async def _rss_pipeline():
|
||||||
"""RSS-Feed-Suche (Feed-Selektion + dynamische Keywords + Parsing)."""
|
"""RSS-Feed-Suche (Feed-Selektion + dynamische Keywords + Parsing)."""
|
||||||
@@ -762,7 +891,7 @@ class AgentOrchestrator:
|
|||||||
return articles, feed_usage
|
return articles, feed_usage
|
||||||
|
|
||||||
async def _web_search_pipeline():
|
async def _web_search_pipeline():
|
||||||
"""Claude WebSearch-Recherche."""
|
"""Claude WebSearch-Recherche mit Vorselektion eingetragener Web-Quellen."""
|
||||||
researcher = ResearcherAgent()
|
researcher = ResearcherAgent()
|
||||||
# Bestehende Artikel als Kontext mitgeben (Research + Adhoc)
|
# Bestehende Artikel als Kontext mitgeben (Research + Adhoc)
|
||||||
existing_for_context = None
|
existing_for_context = None
|
||||||
@@ -773,13 +902,34 @@ class AgentOrchestrator:
|
|||||||
"source_url": row["source_url"]}
|
"source_url": row["source_url"]}
|
||||||
for row in existing_db_articles_full
|
for row in existing_db_articles_full
|
||||||
]
|
]
|
||||||
results, usage = await researcher.search(
|
|
||||||
|
# Web-Quellen vorselektieren (Haiku) — nur thematisch passende werden Claude im Prompt empfohlen
|
||||||
|
preferred_sources = []
|
||||||
|
try:
|
||||||
|
from source_rules import get_feeds_with_metadata
|
||||||
|
web_sources = await get_feeds_with_metadata(tenant_id=tenant_id, source_type="web_source")
|
||||||
|
if web_sources:
|
||||||
|
preferred_sources, web_sel_usage = await researcher.select_relevant_web_sources(
|
||||||
|
title, description, web_sources,
|
||||||
|
)
|
||||||
|
if web_sel_usage:
|
||||||
|
usage_acc.add(web_sel_usage)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Web-Source-Vorselektion fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
||||||
|
preferred_sources = []
|
||||||
|
|
||||||
|
results, usage, parse_failed = await researcher.search(
|
||||||
title, description, incident_type,
|
title, description, incident_type,
|
||||||
international=international, user_id=user_id,
|
international=international, user_id=user_id,
|
||||||
existing_articles=existing_for_context,
|
existing_articles=existing_for_context,
|
||||||
|
preferred_sources=preferred_sources,
|
||||||
)
|
)
|
||||||
logger.info(f"Claude-Recherche: {len(results)} Ergebnisse")
|
logger.info(
|
||||||
return results, usage
|
f"Claude-Recherche: {len(results)} Ergebnisse"
|
||||||
|
+ (f" (mit {len(preferred_sources)} Web-Quellen-Hinweis)" if preferred_sources else "")
|
||||||
|
+ (" (Parser fehlgeschlagen)" if parse_failed else "")
|
||||||
|
)
|
||||||
|
return results, usage, parse_failed
|
||||||
|
|
||||||
async def _podcast_pipeline():
|
async def _podcast_pipeline():
|
||||||
"""Podcast-Episoden-Suche (nur adhoc-Lagen, nur mit vorhandenen Transkripten)."""
|
"""Podcast-Episoden-Suche (nur adhoc-Lagen, nur mit vorhandenen Transkripten)."""
|
||||||
@@ -854,6 +1004,9 @@ class AgentOrchestrator:
|
|||||||
logger.info(f"Telegram-Pipeline: {len(articles)} Nachrichten")
|
logger.info(f"Telegram-Pipeline: {len(articles)} Nachrichten")
|
||||||
return articles, None
|
return articles, None
|
||||||
|
|
||||||
|
# Pipeline-Schritt 2: Nachrichten sammeln (Start)
|
||||||
|
await _pipe_start("collect")
|
||||||
|
|
||||||
# Pipelines parallel starten (RSS + WebSearch + Podcasts + optional Telegram)
|
# Pipelines parallel starten (RSS + WebSearch + Podcasts + optional Telegram)
|
||||||
pipelines = [_rss_pipeline(), _web_search_pipeline(), _podcast_pipeline()]
|
pipelines = [_rss_pipeline(), _web_search_pipeline(), _podcast_pipeline()]
|
||||||
if include_telegram:
|
if include_telegram:
|
||||||
@@ -862,7 +1015,7 @@ class AgentOrchestrator:
|
|||||||
pipeline_results = await asyncio.gather(*pipelines)
|
pipeline_results = await asyncio.gather(*pipelines)
|
||||||
|
|
||||||
(rss_articles, rss_feed_usage) = pipeline_results[0]
|
(rss_articles, rss_feed_usage) = pipeline_results[0]
|
||||||
(search_results, search_usage) = pipeline_results[1]
|
(search_results, search_usage, search_parse_failed) = pipeline_results[1]
|
||||||
(podcast_articles, _podcast_usage) = pipeline_results[2]
|
(podcast_articles, _podcast_usage) = pipeline_results[2]
|
||||||
telegram_articles = pipeline_results[3][0] if include_telegram else []
|
telegram_articles = pipeline_results[3][0] if include_telegram else []
|
||||||
|
|
||||||
@@ -884,6 +1037,15 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
# Alle Ergebnisse zusammenführen
|
# Alle Ergebnisse zusammenführen
|
||||||
all_results = rss_articles + search_results + telegram_articles
|
all_results = rss_articles + search_results + telegram_articles
|
||||||
|
# Pipeline-Schritt 2: Nachrichten sammeln (fertig)
|
||||||
|
try:
|
||||||
|
_delivering_sources = len({a.get("source", "") for a in all_results if a.get("source")})
|
||||||
|
except Exception:
|
||||||
|
_delivering_sources = None
|
||||||
|
await _pipe_done("collect", count_value=len(all_results), count_secondary=_delivering_sources)
|
||||||
|
|
||||||
|
# Pipeline-Schritt 3: Doppeltes filtern (Start)
|
||||||
|
await _pipe_start("dedup")
|
||||||
|
|
||||||
# Duplikate entfernen (normalisierte URL + Headline-Ähnlichkeit)
|
# Duplikate entfernen (normalisierte URL + Headline-Ähnlichkeit)
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
@@ -896,6 +1058,7 @@ class AgentOrchestrator:
|
|||||||
dupes_removed = len(all_results) - len(unique_results)
|
dupes_removed = len(all_results) - len(unique_results)
|
||||||
if dupes_removed > 0:
|
if dupes_removed > 0:
|
||||||
logger.info(f"Deduplizierung: {dupes_removed} Duplikate entfernt, {len(unique_results)} verbleibend")
|
logger.info(f"Deduplizierung: {dupes_removed} Duplikate entfernt, {len(unique_results)} verbleibend")
|
||||||
|
await _pipe_done("dedup", count_value=dupes_removed, count_secondary=len(unique_results))
|
||||||
|
|
||||||
# Relevanz-Scoring und Sortierung
|
# Relevanz-Scoring und Sortierung
|
||||||
for article in unique_results:
|
for article in unique_results:
|
||||||
@@ -933,18 +1096,15 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
logger.info(f"DB-Dedup: {len(existing_urls)} URLs, {len(existing_headlines)} Headlines im Bestand")
|
logger.info(f"DB-Dedup: {len(existing_urls)} URLs, {len(existing_headlines)} Headlines im Bestand")
|
||||||
|
|
||||||
# Neue Artikel speichern und für Analyse tracken
|
# --- Dedup gegen Bestand: nur neue (noch nicht gespeicherte) Kandidaten behalten ---
|
||||||
new_count = 0
|
new_candidates = []
|
||||||
new_articles_for_analysis = []
|
|
||||||
for article in unique_results:
|
for article in unique_results:
|
||||||
# URL-Duplikat gegen DB
|
|
||||||
if article.get("source_url"):
|
if article.get("source_url"):
|
||||||
norm_url = _normalize_url(article["source_url"])
|
norm_url = _normalize_url(article["source_url"])
|
||||||
if norm_url in existing_urls:
|
if norm_url in existing_urls:
|
||||||
continue
|
continue
|
||||||
existing_urls.add(norm_url)
|
existing_urls.add(norm_url)
|
||||||
|
|
||||||
# Headline-Duplikat gegen DB
|
|
||||||
headline = article.get("headline", "")
|
headline = article.get("headline", "")
|
||||||
if headline and len(headline) > 20:
|
if headline and len(headline) > 20:
|
||||||
norm_h = _normalize_headline(headline)
|
norm_h = _normalize_headline(headline)
|
||||||
@@ -953,6 +1113,28 @@ class AgentOrchestrator:
|
|||||||
if norm_h:
|
if norm_h:
|
||||||
existing_headlines.add(norm_h)
|
existing_headlines.add(norm_h)
|
||||||
|
|
||||||
|
new_candidates.append(article)
|
||||||
|
|
||||||
|
# Pipeline-Schritt 4: Relevanz bewerten (Start)
|
||||||
|
await _pipe_start("relevance")
|
||||||
|
_candidates_before_topic = len(new_candidates)
|
||||||
|
|
||||||
|
# --- Semantischer Topic-Filter (Haiku) ---
|
||||||
|
# Wirft Artikel raus, die zwar Keyword-Treffer hatten, aber das Kernthema
|
||||||
|
# der Lage nicht inhaltlich behandeln. Bei Fehler Fallback auf alle Kandidaten.
|
||||||
|
if new_candidates:
|
||||||
|
_tf_agent = AnalyzerAgent()
|
||||||
|
new_candidates, _tf_usage = await _tf_agent.filter_relevant_articles(
|
||||||
|
title, description, new_candidates,
|
||||||
|
)
|
||||||
|
if _tf_usage:
|
||||||
|
usage_acc.add(_tf_usage)
|
||||||
|
await _pipe_done("relevance", count_value=len(new_candidates), count_secondary=_candidates_before_topic)
|
||||||
|
|
||||||
|
# --- Neue (thematisch gefilterte) Artikel speichern und für Analyse tracken ---
|
||||||
|
new_count = 0
|
||||||
|
new_articles_for_analysis = []
|
||||||
|
for article in new_candidates:
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"""INSERT INTO articles (incident_id, headline, headline_de, source,
|
"""INSERT INTO articles (incident_id, headline, headline_de, source,
|
||||||
source_url, content_original, content_de, language, published_at, tenant_id)
|
source_url, content_original, content_de, language, published_at, tenant_id)
|
||||||
@@ -971,7 +1153,6 @@ class AgentOrchestrator:
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
new_count += 1
|
new_count += 1
|
||||||
# Artikel mit DB-ID für die Analyse tracken
|
|
||||||
article_with_id = dict(article)
|
article_with_id = dict(article)
|
||||||
article_with_id["id"] = cursor.lastrowid
|
article_with_id["id"] = cursor.lastrowid
|
||||||
new_articles_for_analysis.append(article_with_id)
|
new_articles_for_analysis.append(article_with_id)
|
||||||
@@ -980,6 +1161,8 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
# Geoparsing: Orte aus neuen Artikeln extrahieren und speichern
|
# Geoparsing: Orte aus neuen Artikeln extrahieren und speichern
|
||||||
if new_articles_for_analysis:
|
if new_articles_for_analysis:
|
||||||
|
# Pipeline-Schritt 5: Orte erkennen (Start)
|
||||||
|
await _pipe_start("geoparsing")
|
||||||
try:
|
try:
|
||||||
from agents.geoparsing import geoparse_articles
|
from agents.geoparsing import geoparse_articles
|
||||||
incident_context = f"{title} - {description}"
|
incident_context = f"{title} - {description}"
|
||||||
@@ -1010,8 +1193,12 @@ class AgentOrchestrator:
|
|||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info(f"Category-Labels gespeichert fuer Incident {incident_id}: {category_labels}")
|
logger.info(f"Category-Labels gespeichert fuer Incident {incident_id}: {category_labels}")
|
||||||
|
await _pipe_done("geoparsing", count_value=geo_count, count_secondary=len(geo_results) if geo_results else 0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
logger.warning(f"Geoparsing fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
||||||
|
await _pipe_done("geoparsing", count_value=0, count_secondary=0)
|
||||||
|
else:
|
||||||
|
await _pipe_skip("geoparsing")
|
||||||
|
|
||||||
# Quellen-Statistiken aktualisieren
|
# Quellen-Statistiken aktualisieren
|
||||||
if new_count > 0:
|
if new_count > 0:
|
||||||
@@ -1112,18 +1299,22 @@ class AgentOrchestrator:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Bias-Anreicherung fehlgeschlagen (Pipeline laeuft weiter): %s", e)
|
logger.warning("Bias-Anreicherung fehlgeschlagen (Pipeline laeuft weiter): %s", e)
|
||||||
|
|
||||||
# --- Analyse-Task ---
|
# --- Analyse-Task (wird nach _do_factcheck mit fact_context_block aufgerufen) ---
|
||||||
async def _do_analysis():
|
async def _do_analysis(fact_context_block: str = ""):
|
||||||
analyzer = AnalyzerAgent()
|
analyzer = AnalyzerAgent()
|
||||||
if previous_summary and new_count > 0:
|
if previous_summary and new_count > 0:
|
||||||
logger.info(f"Inkrementelle Analyse: {new_count} neue Artikel zum bestehenden Lagebild")
|
logger.info(f"Inkrementelle Analyse: {new_count} neue Artikel zum bestehenden Lagebild")
|
||||||
return await analyzer.analyze_incremental(
|
return await analyzer.analyze_incremental(
|
||||||
title, description, new_articles_for_analysis,
|
title, description, new_articles_for_analysis,
|
||||||
previous_summary, previous_sources_json, incident_type,
|
previous_summary, previous_sources_json, incident_type,
|
||||||
|
fact_context_block=fact_context_block,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info("Erstanalyse: Alle Artikel werden analysiert")
|
logger.info("Erstanalyse: Alle Artikel werden analysiert")
|
||||||
return await analyzer.analyze(title, description, all_articles_preloaded, incident_type)
|
return await analyzer.analyze(
|
||||||
|
title, description, all_articles_preloaded, incident_type,
|
||||||
|
fact_context_block=fact_context_block,
|
||||||
|
)
|
||||||
|
|
||||||
# --- Faktencheck-Task ---
|
# --- Faktencheck-Task ---
|
||||||
async def _do_factcheck():
|
async def _do_factcheck():
|
||||||
@@ -1157,15 +1348,62 @@ class AgentOrchestrator:
|
|||||||
articles_for_check = [dict(row) for row in await cursor.fetchall()]
|
articles_for_check = [dict(row) for row in await cursor.fetchall()]
|
||||||
return await factchecker.check(title, articles_for_check, incident_type)
|
return await factchecker.check(title, articles_for_check, incident_type)
|
||||||
|
|
||||||
# Beide Tasks PARALLEL starten
|
# Pipeline-Schritt 6: Faktencheck zuerst (sequenziell). Liefert den
|
||||||
logger.info("Starte Analyse und Faktencheck parallel...")
|
# Faktenkontext fuer das Lagebild, damit dieses auf geprueftem Stand
|
||||||
analysis_result, factcheck_result = await asyncio.gather(
|
# schreibt und Unklarheiten explizit benennt. Variante 1: bei
|
||||||
_do_analysis(),
|
# Faktencheck-Fehler faellt das Lagebild auf den alten Pfad ohne
|
||||||
_do_factcheck(),
|
# Faktenkontext zurueck (Refresh bricht NICHT ab).
|
||||||
|
await _pipe_start("factcheck")
|
||||||
|
factcheck_result: tuple = ([], None)
|
||||||
|
fact_context_block = ""
|
||||||
|
factcheck_failed_reason: str | None = None
|
||||||
|
try:
|
||||||
|
factcheck_result = await _do_factcheck()
|
||||||
|
except Exception as fc_err:
|
||||||
|
factcheck_failed_reason = str(fc_err)
|
||||||
|
logger.warning(
|
||||||
|
"Faktencheck fehlgeschlagen, Lagebild laeuft ohne Faktenkontext: %s",
|
||||||
|
fc_err, exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
fact_checks, fc_usage = factcheck_result if factcheck_result else ([], None)
|
||||||
|
|
||||||
|
# Pipeline-Schritt 6 done direkt nach dem Aufruf — die finale
|
||||||
|
# DB-Persistierung passiert weiter unten, aber fuer die UI ist
|
||||||
|
# der Faktencheck-Aufruf hier abgeschlossen. Der count_value
|
||||||
|
# ist eine Schaetzung (echte Zahl steht spaeter in der DB).
|
||||||
|
_fc_estimated_new = max(0, len(fact_checks or []) - len(existing_facts or []))
|
||||||
|
await _pipe_done(
|
||||||
|
"factcheck",
|
||||||
|
count_value=_fc_estimated_new,
|
||||||
|
count_secondary=len(fact_checks) if fact_checks else 0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Faktenkontext fuer das Lagebild bauen.
|
||||||
|
try:
|
||||||
|
from agents.analyzer import build_fact_context_block as _build_fc_ctx
|
||||||
|
fact_context_block = _build_fc_ctx(
|
||||||
|
existing_facts or [], fact_checks or [], incident_type,
|
||||||
|
)
|
||||||
|
if fact_context_block:
|
||||||
|
logger.info(
|
||||||
|
"Faktenkontext fuer Lagebild: %d Zeichen, basierend auf %d alten + %d neuen Fakten",
|
||||||
|
len(fact_context_block), len(existing_facts or []), len(fact_checks or []),
|
||||||
|
)
|
||||||
|
except Exception as ctx_err:
|
||||||
|
logger.warning("build_fact_context_block fehlgeschlagen: %s", ctx_err, exc_info=True)
|
||||||
|
fact_context_block = ""
|
||||||
|
|
||||||
|
# Pipeline-Schritt 7: Lagebild verfassen (jetzt mit Faktenkontext)
|
||||||
|
await _pipe_start("summary")
|
||||||
|
logger.info(
|
||||||
|
"Starte Lagebild (sequenziell nach Faktencheck%s)",
|
||||||
|
" — OHNE Faktenkontext (Fallback)" if factcheck_failed_reason else "",
|
||||||
|
)
|
||||||
|
analysis_result = await _do_analysis(fact_context_block)
|
||||||
|
|
||||||
analysis, analysis_usage = analysis_result
|
analysis, analysis_usage = analysis_result
|
||||||
fact_checks, fc_usage = factcheck_result
|
await _pipe_done("summary", count_value=None, count_secondary=None)
|
||||||
|
|
||||||
# --- Analyse-Ergebnisse verarbeiten ---
|
# --- Analyse-Ergebnisse verarbeiten ---
|
||||||
if analysis_usage:
|
if analysis_usage:
|
||||||
@@ -1258,26 +1496,83 @@ class AgentOrchestrator:
|
|||||||
snap_articles, snap_fcs, log_id, now, tenant_id),
|
snap_articles, snap_fcs, log_id, now, tenant_id),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Übersetzungen aktualisieren (nur für gültige DB-IDs)
|
# Translations werden vom dedizierten Translator-Agent unten
|
||||||
for translation in analysis.get("translations", []):
|
# erzeugt (frueher inline im Analyzer-Output, das war token-
|
||||||
article_id = translation.get("article_id")
|
# instabil und schaetzte regelmaessig content_de aus).
|
||||||
if isinstance(article_id, int):
|
|
||||||
await db.execute(
|
|
||||||
"UPDATE articles SET headline_de = ?, content_de = ? WHERE id = ? AND incident_id = ?",
|
|
||||||
(translation.get("headline_de"), translation.get("content_de"), article_id, incident_id),
|
|
||||||
)
|
|
||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
# Cancel-Check nach paralleler Verarbeitung
|
# Cancel-Check nach paralleler Verarbeitung
|
||||||
self._check_cancelled(incident_id)
|
self._check_cancelled(incident_id)
|
||||||
|
|
||||||
|
# --- Translator (Haiku) fuer fremdsprachige Artikel ohne DE-Texte ---
|
||||||
|
# Idempotent: nur Artikel ohne headline_de/content_de werden geholt.
|
||||||
|
# Lauft nach der Analyse (Lagebild ist schon committed) und vor QC
|
||||||
|
# (damit normalize_umlaut_articles auch die frischen DE-Texte fasst).
|
||||||
|
try:
|
||||||
|
tr_cursor = await db.execute(
|
||||||
|
"""SELECT id, headline, content_original, language
|
||||||
|
FROM articles
|
||||||
|
WHERE incident_id = ?
|
||||||
|
AND language IS NOT NULL AND LOWER(language) != 'de'
|
||||||
|
AND (headline_de IS NULL OR headline_de = ''
|
||||||
|
OR content_de IS NULL OR content_de = '')""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
pending_translations = [dict(r) for r in await tr_cursor.fetchall()]
|
||||||
|
if pending_translations:
|
||||||
|
logger.info(
|
||||||
|
"Translator fuer Incident %d: %d Artikel ohne DE-Uebersetzung",
|
||||||
|
incident_id, len(pending_translations),
|
||||||
|
)
|
||||||
|
from agents.translator import translate_articles
|
||||||
|
from services.post_refresh_qc import normalize_german_umlauts as _norm_de2
|
||||||
|
translations = await translate_articles(
|
||||||
|
pending_translations,
|
||||||
|
output_lang="de",
|
||||||
|
usage_accumulator=usage_acc,
|
||||||
|
)
|
||||||
|
for t in translations:
|
||||||
|
hd = t.get("headline_de")
|
||||||
|
cd = t.get("content_de")
|
||||||
|
if hd:
|
||||||
|
hd, _ = _norm_de2(hd)
|
||||||
|
if cd:
|
||||||
|
cd, _ = _norm_de2(cd)
|
||||||
|
if hd or cd:
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE articles SET headline_de = COALESCE(?, headline_de), "
|
||||||
|
"content_de = COALESCE(?, content_de) WHERE id = ? AND incident_id = ?",
|
||||||
|
(hd, cd, t["id"], incident_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
logger.info(
|
||||||
|
"Translator fuer Incident %d: %d/%d Artikel uebersetzt",
|
||||||
|
incident_id, len(translations), len(pending_translations),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Translator-Fehler fuer Incident %d: %s", incident_id, e, exc_info=True)
|
||||||
|
# Refresh trotz Translator-Fehler weiterlaufen lassen
|
||||||
|
|
||||||
# --- Neueste Entwicklungen (nur Live-Monitoring / adhoc) ---
|
# --- Neueste Entwicklungen (nur Live-Monitoring / adhoc) ---
|
||||||
if incident_type == "adhoc" and new_articles_for_analysis:
|
# Basis ist jetzt das frisch generierte Lagebild (autoritativ, thematisch sauber).
|
||||||
|
# Zeitstempel und Quellen kommen aus den jüngsten belegenden Artikeln.
|
||||||
|
dev_summary_source = (locals().get("new_summary") or previous_summary or "").strip()
|
||||||
|
if incident_type == "adhoc" and dev_summary_source:
|
||||||
try:
|
try:
|
||||||
|
# Top-60 neueste Artikel mit Publikationsdatum als Beleg-Pool.
|
||||||
|
dev_cursor = await db.execute(
|
||||||
|
"""SELECT id, headline, headline_de, source, source_url, published_at
|
||||||
|
FROM articles
|
||||||
|
WHERE incident_id = ? AND published_at IS NOT NULL
|
||||||
|
ORDER BY published_at DESC LIMIT 60""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
dev_articles = [dict(row) for row in await dev_cursor.fetchall()]
|
||||||
|
|
||||||
dev_analyzer = AnalyzerAgent()
|
dev_analyzer = AnalyzerAgent()
|
||||||
dev_text, dev_usage = await dev_analyzer.generate_latest_developments(
|
dev_text, dev_usage = await dev_analyzer.generate_latest_developments(
|
||||||
title, description, new_articles_for_analysis, previous_developments,
|
title, description, dev_summary_source, dev_articles, previous_developments,
|
||||||
)
|
)
|
||||||
if dev_usage:
|
if dev_usage:
|
||||||
usage_acc.add(dev_usage)
|
usage_acc.add(dev_usage)
|
||||||
@@ -1406,6 +1701,14 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
# Pipeline-Schritt 7 (Fakten pruefen) wurde bereits frueher als done
|
||||||
|
# markiert (siehe weiter oben — direkt nach dem _do_factcheck-Aufruf,
|
||||||
|
# bevor das Lagebild generiert wurde). Hier nur noch die DB-
|
||||||
|
# Persistierung der Fakten, ohne den Step erneut zu schliessen.
|
||||||
|
|
||||||
|
# Pipeline-Schritt 8: Qualitätscheck (Start, ohne Zahlen)
|
||||||
|
await _pipe_start("qc")
|
||||||
|
|
||||||
# Post-Refresh Quality Check: Duplikate und Karten-Kategorien pruefen
|
# Post-Refresh Quality Check: Duplikate und Karten-Kategorien pruefen
|
||||||
try:
|
try:
|
||||||
from services.post_refresh_qc import run_post_refresh_qc
|
from services.post_refresh_qc import run_post_refresh_qc
|
||||||
@@ -1417,6 +1720,12 @@ class AgentOrchestrator:
|
|||||||
)
|
)
|
||||||
except Exception as qc_err:
|
except Exception as qc_err:
|
||||||
logger.warning(f"Post-Refresh QC fehlgeschlagen: {qc_err}")
|
logger.warning(f"Post-Refresh QC fehlgeschlagen: {qc_err}")
|
||||||
|
await _pipe_done("qc", count_value=None, count_secondary=None)
|
||||||
|
|
||||||
|
# Pipeline-Schritt 9: Benachrichtigen (Start)
|
||||||
|
await _pipe_start("notify")
|
||||||
|
_notify_count = 0
|
||||||
|
|
||||||
# Gebündelte Notification senden (nicht beim ersten Refresh)
|
# Gebündelte Notification senden (nicht beim ersten Refresh)
|
||||||
if not is_first_refresh:
|
if not is_first_refresh:
|
||||||
if self._ws_manager:
|
if self._ws_manager:
|
||||||
@@ -1473,6 +1782,32 @@ class AgentOrchestrator:
|
|||||||
db, incident_id, title, visibility, created_by, tenant_id, db_notifications,
|
db, incident_id, title, visibility, created_by, tenant_id, db_notifications,
|
||||||
incident_type=incident_type,
|
incident_type=incident_type,
|
||||||
)
|
)
|
||||||
|
_notify_count = len(db_notifications)
|
||||||
|
|
||||||
|
# Pipeline-Schritt 9: Benachrichtigen (fertig)
|
||||||
|
await _pipe_done("notify", count_value=_notify_count, count_secondary=None)
|
||||||
|
|
||||||
|
# Falls Analyse-Block übersprungen wurde (kein neuer Artikel und Summary existiert),
|
||||||
|
# die noch offenen Pipeline-Schritte als übersprungen markieren.
|
||||||
|
for _skipped_key in ("summary", "factcheck", "qc", "notify"):
|
||||||
|
if _skipped_key in _step_ids or _skipped_key not in {"summary", "factcheck", "qc", "notify"}:
|
||||||
|
pass
|
||||||
|
# Saubere Variante: alle noch offenen Steps am Ende skippen
|
||||||
|
for _open_key in list(_step_ids.keys()):
|
||||||
|
await _pipe_skip(_open_key)
|
||||||
|
# Auch Steps die nie gestartet wurden (bei übersprungenem Outer-If)
|
||||||
|
_started_keys = set()
|
||||||
|
try:
|
||||||
|
_check_cursor = await db.execute(
|
||||||
|
"SELECT step_key FROM refresh_pipeline_steps WHERE refresh_log_id = ? AND pass_number = ?",
|
||||||
|
(log_id, _pass_nr),
|
||||||
|
)
|
||||||
|
_started_keys = {row[0] for row in await _check_cursor.fetchall()}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
for _missing_key in ("summary", "factcheck", "qc", "notify"):
|
||||||
|
if _missing_key not in _started_keys:
|
||||||
|
await _pipe_skip(_missing_key)
|
||||||
|
|
||||||
# Refresh-Log abschließen (mit Token-Statistiken)
|
# Refresh-Log abschließen (mit Token-Statistiken)
|
||||||
await db.execute(
|
await db.execute(
|
||||||
@@ -1495,38 +1830,9 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
# Credits-Tracking: Monatliche Aggregation + Credits abziehen
|
# Credits-Tracking: Monatliche Aggregation + Credits abziehen
|
||||||
if tenant_id and usage_acc.total_cost_usd > 0:
|
if tenant_id and usage_acc.total_cost_usd > 0:
|
||||||
year_month = datetime.now(TIMEZONE).strftime('%Y-%m')
|
from services.license_service import charge_usage_to_tenant
|
||||||
await db.execute("""
|
await charge_usage_to_tenant(db, tenant_id, usage_acc, source="monitor")
|
||||||
INSERT INTO token_usage_monthly
|
|
||||||
(organization_id, year_month, source, input_tokens, output_tokens,
|
|
||||||
cache_creation_tokens, cache_read_tokens, total_cost_usd, api_calls, refresh_count)
|
|
||||||
VALUES (?, ?, 'monitor', ?, ?, ?, ?, ?, ?, 1)
|
|
||||||
ON CONFLICT(organization_id, year_month, source) DO UPDATE SET
|
|
||||||
input_tokens = input_tokens + excluded.input_tokens,
|
|
||||||
output_tokens = output_tokens + excluded.output_tokens,
|
|
||||||
cache_creation_tokens = cache_creation_tokens + excluded.cache_creation_tokens,
|
|
||||||
cache_read_tokens = cache_read_tokens + excluded.cache_read_tokens,
|
|
||||||
total_cost_usd = total_cost_usd + excluded.total_cost_usd,
|
|
||||||
api_calls = api_calls + excluded.api_calls,
|
|
||||||
refresh_count = refresh_count + 1,
|
|
||||||
updated_at = CURRENT_TIMESTAMP
|
|
||||||
""", (tenant_id, year_month,
|
|
||||||
usage_acc.input_tokens, usage_acc.output_tokens,
|
|
||||||
usage_acc.cache_creation_tokens, usage_acc.cache_read_tokens,
|
|
||||||
round(usage_acc.total_cost_usd, 7), usage_acc.call_count))
|
|
||||||
|
|
||||||
# Credits auf Lizenz abziehen
|
|
||||||
lic_cursor = await db.execute(
|
|
||||||
"SELECT cost_per_credit FROM licenses WHERE organization_id = ? AND status = 'active' ORDER BY id DESC LIMIT 1",
|
|
||||||
(tenant_id,))
|
|
||||||
lic = await lic_cursor.fetchone()
|
|
||||||
if lic and lic["cost_per_credit"] and lic["cost_per_credit"] > 0:
|
|
||||||
credits_consumed = usage_acc.total_cost_usd / lic["cost_per_credit"]
|
|
||||||
await db.execute(
|
|
||||||
"UPDATE licenses SET credits_used = COALESCE(credits_used, 0) + ? WHERE organization_id = ? AND status = 'active'",
|
|
||||||
(round(credits_consumed, 2), tenant_id))
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info(f"Credits: {round(credits_consumed, 1) if lic and lic['cost_per_credit'] else 0} abgezogen für Tenant {tenant_id}")
|
|
||||||
|
|
||||||
# Quellen-Discovery im Background starten
|
# Quellen-Discovery im Background starten
|
||||||
if unique_results:
|
if unique_results:
|
||||||
@@ -1548,6 +1854,11 @@ class AgentOrchestrator:
|
|||||||
|
|
||||||
logger.info(f"Refresh für Lage {incident_id} abgeschlossen: {new_count} neue Artikel")
|
logger.info(f"Refresh für Lage {incident_id} abgeschlossen: {new_count} neue Artikel")
|
||||||
|
|
||||||
|
# Multi-Pass-Diagnose: Pass-Ergebnis zurueck an Multi-Pass-Caller geben
|
||||||
|
if _pass_info is not None:
|
||||||
|
_pass_info["new_count"] = new_count
|
||||||
|
_pass_info["parse_failed"] = search_parse_failed
|
||||||
|
|
||||||
# Executive Summary im Hintergrund vorab generieren (fuer schnelleren Export)
|
# Executive Summary im Hintergrund vorab generieren (fuer schnelleren Export)
|
||||||
if new_count > 0:
|
if new_count > 0:
|
||||||
async def _pregenerate_exec_summary():
|
async def _pregenerate_exec_summary():
|
||||||
@@ -1602,6 +1913,7 @@ class AgentOrchestrator:
|
|||||||
Durchlauf 3: Konsolidierung (letzte Lücken, Fakten-Upgrade)
|
Durchlauf 3: Konsolidierung (letzte Lücken, Fakten-Upgrade)
|
||||||
"""
|
"""
|
||||||
total = RESEARCH_MULTI_PASS_COUNT
|
total = RESEARCH_MULTI_PASS_COUNT
|
||||||
|
pass_results = []
|
||||||
|
|
||||||
for pass_nr in range(1, total + 1):
|
for pass_nr in range(1, total + 1):
|
||||||
# Cancel zwischen Durchläufen prüfen
|
# Cancel zwischen Durchläufen prüfen
|
||||||
@@ -1642,12 +1954,27 @@ class AgentOrchestrator:
|
|||||||
if is_last:
|
if is_last:
|
||||||
raise
|
raise
|
||||||
# Nicht-letzter Durchlauf: weiter mit nächstem, bisherige Ergebnisse bleiben
|
# Nicht-letzter Durchlauf: weiter mit nächstem, bisherige Ergebnisse bleiben
|
||||||
|
finally:
|
||||||
|
pass_results.append(pass_info)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Research Multi-Pass abgeschlossen für Lage {incident_id}: "
|
f"Research Multi-Pass abgeschlossen für Lage {incident_id}: "
|
||||||
f"{total} Durchläufe"
|
f"{total} Durchläufe"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Diagnose: Wenn ALLE Passes 0 neue Artikel hatten UND mindestens einer
|
||||||
|
# an einem Parser-Fehler scheiterte, ist die Recherche faktisch fehlgeschlagen —
|
||||||
|
# Claude lieferte zwar Antworten, aber kein verwertbares JSON. Sonst bliebe
|
||||||
|
# die Lage ohne sichtbare Fehlermeldung leer (siehe staging Lage "Friedrich Merz").
|
||||||
|
total_new = sum(p.get("new_count", 0) for p in pass_results)
|
||||||
|
any_parse_failed = any(p.get("parse_failed") for p in pass_results)
|
||||||
|
if total_new == 0 and any_parse_failed:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Recherche fehlgeschlagen: Claude lieferte keine verwertbaren Quellen "
|
||||||
|
"(JSON-Parsing schlug bei mindestens einem Durchlauf fehl). "
|
||||||
|
"Bitte Logs prüfen und Refresh erneut starten."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Singleton-Instanz
|
# Singleton-Instanz
|
||||||
orchestrator = AgentOrchestrator()
|
orchestrator = AgentOrchestrator()
|
||||||
|
|||||||
@@ -7,6 +7,60 @@ from config import CLAUDE_MODEL_FAST
|
|||||||
|
|
||||||
logger = logging.getLogger("osint.researcher")
|
logger = logging.getLogger("osint.researcher")
|
||||||
|
|
||||||
|
|
||||||
|
class ResearcherParseError(Exception):
|
||||||
|
"""Claude hat eine nicht-leere Antwort geliefert, aus der kein JSON extrahiert werden konnte."""
|
||||||
|
|
||||||
|
|
||||||
|
def _truncate_for_log(text: str, limit: int = 600) -> str:
|
||||||
|
"""Kürzt eine Claude-Antwort für Logs, damit ein Sample sichtbar ist."""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
snippet = text.strip().replace("\n", "\\n")
|
||||||
|
if len(snippet) > limit:
|
||||||
|
snippet = snippet[:limit] + "..."
|
||||||
|
return snippet
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json_array(text: str):
|
||||||
|
"""Findet das erste vollständige JSON-Array im Text (auch mit Vor-/Nachtext oder Markdown-Fence)."""
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
decoder = json.JSONDecoder()
|
||||||
|
idx = 0
|
||||||
|
while True:
|
||||||
|
bracket = text.find("[", idx)
|
||||||
|
if bracket == -1:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
obj, _ = decoder.raw_decode(text, bracket)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
idx = bracket + 1
|
||||||
|
continue
|
||||||
|
if isinstance(obj, list):
|
||||||
|
return obj
|
||||||
|
idx = bracket + 1
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json_object(text: str):
|
||||||
|
"""Findet das erste vollständige JSON-Objekt im Text (auch mit Vor-/Nachtext oder Markdown-Fence)."""
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
decoder = json.JSONDecoder()
|
||||||
|
idx = 0
|
||||||
|
while True:
|
||||||
|
brace = text.find("{", idx)
|
||||||
|
if brace == -1:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
obj, _ = decoder.raw_decode(text, brace)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
idx = brace + 1
|
||||||
|
continue
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return obj
|
||||||
|
idx = brace + 1
|
||||||
|
|
||||||
RESEARCH_PROMPT_TEMPLATE = """Du bist ein OSINT-Recherche-Agent für ein Lagemonitoring-System.
|
RESEARCH_PROMPT_TEMPLATE = """Du bist ein OSINT-Recherche-Agent für ein Lagemonitoring-System.
|
||||||
AUSGABESPRACHE: {output_language}
|
AUSGABESPRACHE: {output_language}
|
||||||
- KEINE Gedankenstriche (— oder –) verwenden, stattdessen Kommas, Doppelpunkte oder neue Saetze.
|
- KEINE Gedankenstriche (— oder –) verwenden, stattdessen Kommas, Doppelpunkte oder neue Saetze.
|
||||||
@@ -15,7 +69,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
AUFTRAG: Suche nach aktuellen Informationen zu folgendem Vorfall:
|
AUFTRAG: Suche nach aktuellen Informationen zu folgendem Vorfall:
|
||||||
Titel: {title}
|
Titel: {title}
|
||||||
Kontext: {description}
|
Kontext: {description}
|
||||||
{existing_context}
|
{existing_context}{preferred_sources_block}
|
||||||
REGELN:
|
REGELN:
|
||||||
- Suche nur bei seriösen Nachrichtenquellen (Nachrichtenagenturen, Qualitätszeitungen, öffentlich-rechtliche Medien, Behörden)
|
- Suche nur bei seriösen Nachrichtenquellen (Nachrichtenagenturen, Qualitätszeitungen, öffentlich-rechtliche Medien, Behörden)
|
||||||
- KEIN Social Media (Twitter/X, Facebook, Instagram, TikTok, Reddit)
|
- KEIN Social Media (Twitter/X, Facebook, Instagram, TikTok, Reddit)
|
||||||
@@ -23,7 +77,7 @@ REGELN:
|
|||||||
{language_instruction}
|
{language_instruction}
|
||||||
- Faktenbasiert und neutral - keine Spekulationen
|
- Faktenbasiert und neutral - keine Spekulationen
|
||||||
- KRITISCH für source_url: Kopiere die EXAKTE URL aus den WebSearch-Ergebnissen. Erfinde oder konstruiere NIEMALS URLs aus Mustern oder Erinnerung. Wenn du die exakte URL eines Artikels nicht aus den Suchergebnissen hast, lass diesen Artikel komplett weg.
|
- KRITISCH für source_url: Kopiere die EXAKTE URL aus den WebSearch-Ergebnissen. Erfinde oder konstruiere NIEMALS URLs aus Mustern oder Erinnerung. Wenn du die exakte URL eines Artikels nicht aus den Suchergebnissen hast, lass diesen Artikel komplett weg.
|
||||||
- Nutze removepaywalls.com für Paywall-geschützte Artikel (z.B. Spiegel+, Zeit+, SZ+): https://www.removepaywalls.com/search?url=ARTIKEL_URL
|
- Nutze removepaywall.com für Paywall-geschützte Artikel (z.B. Spiegel+, Zeit+, SZ+): https://www.removepaywall.com/search?url=ARTIKEL_URL
|
||||||
- Nutze WebFetch um die 3-5 wichtigsten Artikel vollständig abzurufen und zusammenzufassen
|
- Nutze WebFetch um die 3-5 wichtigsten Artikel vollständig abzurufen und zusammenzufassen
|
||||||
|
|
||||||
Gib die Ergebnisse AUSSCHLIESSLICH als JSON-Array zurück, ohne Erklärungen davor oder danach.
|
Gib die Ergebnisse AUSSCHLIESSLICH als JSON-Array zurück, ohne Erklärungen davor oder danach.
|
||||||
@@ -46,7 +100,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
AUFTRAG: Führe eine umfassende, mehrstufige Hintergrundrecherche durch zu:
|
AUFTRAG: Führe eine umfassende, mehrstufige Hintergrundrecherche durch zu:
|
||||||
Titel: {title}
|
Titel: {title}
|
||||||
Kontext: {description}
|
Kontext: {description}
|
||||||
{existing_context}
|
{existing_context}{preferred_sources_block}
|
||||||
RECHERCHE IN 4 PHASEN — Führe ALLE Phasen nacheinander durch:
|
RECHERCHE IN 4 PHASEN — Führe ALLE Phasen nacheinander durch:
|
||||||
|
|
||||||
PHASE 1 — BREITE ERFASSUNG:
|
PHASE 1 — BREITE ERFASSUNG:
|
||||||
@@ -70,7 +124,7 @@ Nutze spezifische Suchbegriffe für institutionelle Quellen. Ziel: 6-10 weitere
|
|||||||
PHASE 4 — VERIFIKATION UND VERTIEFUNG:
|
PHASE 4 — VERIFIKATION UND VERTIEFUNG:
|
||||||
Nutze WebFetch um die 6-10 wichtigsten Artikel vollständig abzurufen und ausführlich zusammenzufassen.
|
Nutze WebFetch um die 6-10 wichtigsten Artikel vollständig abzurufen und ausführlich zusammenzufassen.
|
||||||
Priorisiere dabei Primärquellen und investigative Berichte.
|
Priorisiere dabei Primärquellen und investigative Berichte.
|
||||||
Nutze removepaywalls.com für Paywall-geschützte Artikel (z.B. https://www.removepaywalls.com/search?url=ARTIKEL_URL)
|
Nutze removepaywall.com für Paywall-geschützte Artikel (z.B. https://www.removepaywall.com/search?url=ARTIKEL_URL)
|
||||||
|
|
||||||
{language_instruction}
|
{language_instruction}
|
||||||
|
|
||||||
@@ -145,19 +199,45 @@ AKTUELLE HEADLINES (die letzten Meldungen zu diesem Thema):
|
|||||||
|
|
||||||
AUFGABE:
|
AUFGABE:
|
||||||
Generiere 5 Begriffspaare (DE + EN), mit denen neue RSS-Artikel zu diesem Thema gefunden werden.
|
Generiere 5 Begriffspaare (DE + EN), mit denen neue RSS-Artikel zu diesem Thema gefunden werden.
|
||||||
Ein Artikel gilt als relevant, wenn mindestens 2 dieser Begriffe im Titel oder der Beschreibung vorkommen.
|
Ein Artikel gilt als relevant, wenn mindestens 2 dieser Begriffe im Titel oder der Beschreibung vorkommen
|
||||||
|
- bei spezifischen Begriffen (Eigennamen, lange Begriffe ab 7 Zeichen) reicht 1 Treffer.
|
||||||
|
|
||||||
REGELN:
|
REGELN:
|
||||||
- Die ersten 2 Begriffspaare MUESSEN die zentralen Akteure/Laender/Themen sein (z.B. iran, israel, usa) — also die Begriffe, die in fast JEDEM Artikel zum Thema vorkommen
|
- ZWINGEND: Eigennamen oder spezifische Begriffe aus dem THEMA (z.B. Personennamen, Tiernamen,
|
||||||
- Die letzten 3 Begriffspaare sind aktuelle Entwicklungen aus den Headlines (Orte, Akteure, Schluesselwoerter der aktuellen Phase)
|
Ortsnamen wie "timmy", "buckelwal", "merz", "dobrindt") MUESSEN als eigene Begriffspaare
|
||||||
- Begriffe muessen so gewaehlt sein, dass sie in kurzen RSS-Titeln matchen (einzelne Woerter, keine Phrasen)
|
enthalten sein. Solche Begriffe sind oft das einzige, was in kurzen Headlines vorkommt.
|
||||||
- Alle Begriffe in Kleinbuchstaben
|
- Die ersten 2 Begriffspaare sind die zentralen Akteure/Laender/Themen (z.B. iran, israel,
|
||||||
- Exakt 5 Begriffspaare
|
buckelwal, timmy) — also die Begriffe, die in fast JEDEM Artikel zum Thema vorkommen.
|
||||||
|
- Die uebrigen 3 Begriffspaare sind aktuelle Entwicklungen aus den Headlines (Orte, Akteure,
|
||||||
|
Schluesselwoerter der aktuellen Phase).
|
||||||
|
- Wenn DE und EN identisch sind (Eigennamen), trotzdem das Paar einreichen.
|
||||||
|
- Begriffe muessen so gewaehlt sein, dass sie in kurzen RSS-Titeln matchen (einzelne Woerter,
|
||||||
|
keine Phrasen, keine Konjunktionen).
|
||||||
|
- Alle Begriffe in Kleinbuchstaben.
|
||||||
|
- Exakt 5 Begriffspaare.
|
||||||
|
|
||||||
Antwort NUR als JSON-Array:
|
Antwort NUR als JSON-Array:
|
||||||
[{{"de": "iran", "en": "iran"}}, {{"de": "israel", "en": "israel"}}, {{"de": "teheran", "en": "tehran"}}, {{"de": "luftangriff", "en": "airstrike"}}, {{"de": "trump", "en": "trump"}}]"""
|
[{{"de": "iran", "en": "iran"}}, {{"de": "israel", "en": "israel"}}, {{"de": "teheran", "en": "tehran"}}, {{"de": "luftangriff", "en": "airstrike"}}, {{"de": "trump", "en": "trump"}}]"""
|
||||||
|
|
||||||
|
|
||||||
|
WEB_SOURCE_SELECTION_PROMPT = """Du bist ein OSINT-Analyst. Pruefe diese eingetragenen Web-Quellen und waehle nur die thematisch passenden aus.
|
||||||
|
|
||||||
|
LAGE: {title}
|
||||||
|
KONTEXT: {description}
|
||||||
|
|
||||||
|
WEB-QUELLEN:
|
||||||
|
{source_list}
|
||||||
|
|
||||||
|
REGELN:
|
||||||
|
- Waehle nur Quellen, die thematisch tatsaechlich zur Lage passen
|
||||||
|
- Lieber leere Liste zurueckgeben als pauschal alle aufnehmen
|
||||||
|
- Behoerden- und institutionelle Quellen sind oft hochwertig, aber nur wenn das Thema passt
|
||||||
|
- Petitions-Plattformen z.B. nur bei Lagen zu Buergerinitiativen, Gesetzen, oeffentlichem Druck
|
||||||
|
- Bei reinen Kriegs-/Konflikt-/Tagesnachrichten meistens leere Liste
|
||||||
|
|
||||||
|
Antworte NUR mit einem JSON-Array der Quellen-Nummern, z.B. [1, 3] oder []."""
|
||||||
|
|
||||||
|
|
||||||
TELEGRAM_CHANNEL_SELECTION_PROMPT = """Du bist ein OSINT-Analyst. Waehle aus dieser Liste von Telegram-Kanaelen diejenigen aus, die fuer die Lage relevant sein koennten.
|
TELEGRAM_CHANNEL_SELECTION_PROMPT = """Du bist ein OSINT-Analyst. Waehle aus dieser Liste von Telegram-Kanaelen diejenigen aus, die fuer die Lage relevant sein koennten.
|
||||||
|
|
||||||
LAGE: {title}
|
LAGE: {title}
|
||||||
@@ -211,30 +291,28 @@ class ResearcherAgent:
|
|||||||
try:
|
try:
|
||||||
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
|
||||||
# Neues Format: JSON-Objekt mit "feeds" und "keywords"
|
|
||||||
keywords = None
|
keywords = None
|
||||||
indices = None
|
indices = None
|
||||||
|
|
||||||
# Versuche JSON-Objekt zu parsen
|
# Neues Format: {"feeds": [...], "keywords": [...]}
|
||||||
obj_match = re.search(r'\{[^{}]*"feeds"\s*:\s*\[[\d\s,]+\][^{}]*\}', result, re.DOTALL)
|
obj = _extract_json_object(result)
|
||||||
if obj_match:
|
if isinstance(obj, dict) and isinstance(obj.get("feeds"), list):
|
||||||
try:
|
indices = obj["feeds"]
|
||||||
obj = json.loads(obj_match.group())
|
raw_keywords = obj.get("keywords", [])
|
||||||
indices = obj.get("feeds", [])
|
if isinstance(raw_keywords, list) and raw_keywords:
|
||||||
raw_keywords = obj.get("keywords", [])
|
keywords = [str(k).lower().strip() for k in raw_keywords if k]
|
||||||
if isinstance(raw_keywords, list) and raw_keywords:
|
logger.info(f"Feed-Selektion Keywords: {keywords}")
|
||||||
keywords = [str(k).lower().strip() for k in raw_keywords if k]
|
|
||||||
logger.info(f"Feed-Selektion Keywords: {keywords}")
|
|
||||||
except (json.JSONDecodeError, ValueError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Fallback: altes Array-Format
|
# Fallback: nacktes Array
|
||||||
if indices is None:
|
if indices is None:
|
||||||
arr_match = re.search(r'\[[\d\s,]+\]', result)
|
arr = _extract_json_array(result)
|
||||||
if not arr_match:
|
if not isinstance(arr, list):
|
||||||
logger.warning("Feed-Selektion: Kein JSON in Antwort, nutze alle Feeds")
|
logger.warning(
|
||||||
|
"Feed-Selektion: Kein JSON in Antwort, nutze alle Feeds. Sample: %s",
|
||||||
|
_truncate_for_log(result),
|
||||||
|
)
|
||||||
return feeds_metadata, None, usage
|
return feeds_metadata, None, usage
|
||||||
indices = json.loads(arr_match.group())
|
indices = arr
|
||||||
|
|
||||||
selected = []
|
selected = []
|
||||||
for idx in indices:
|
for idx in indices:
|
||||||
@@ -275,19 +353,12 @@ class ResearcherAgent:
|
|||||||
try:
|
try:
|
||||||
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
|
||||||
parsed = None
|
parsed = _extract_json_array(result)
|
||||||
try:
|
if not isinstance(parsed, list):
|
||||||
parsed = json.loads(result)
|
logger.warning(
|
||||||
except json.JSONDecodeError:
|
"Keyword-Extraktion: Kein gueltiges JSON erhalten. Sample: %s",
|
||||||
match = re.search(r'\[.*\]', result, re.DOTALL)
|
_truncate_for_log(result),
|
||||||
if match:
|
)
|
||||||
try:
|
|
||||||
parsed = json.loads(match.group())
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not parsed or not isinstance(parsed, list):
|
|
||||||
logger.warning("Keyword-Extraktion: Kein gueltiges JSON erhalten")
|
|
||||||
return None, usage
|
return None, usage
|
||||||
|
|
||||||
# Flache Liste: alle DE + EN Begriffe
|
# Flache Liste: alle DE + EN Begriffe
|
||||||
@@ -302,6 +373,17 @@ class ResearcherAgent:
|
|||||||
if en and en != de:
|
if en and en != de:
|
||||||
keywords.append(en)
|
keywords.append(en)
|
||||||
|
|
||||||
|
# Bug-2-Fallback: Lagentitel-Wörter (>=4 Zeichen) zwingend in Keyword-Liste,
|
||||||
|
# falls Haiku sie weggelassen hat. Verhindert "Buckelwal timmy"-Bug, bei dem
|
||||||
|
# der Eigenname "timmy" fehlte und damit Headlines mit nur "Buckelwal" durchfielen.
|
||||||
|
STOPWORDS = {"der", "die", "das", "und", "oder", "von", "vom", "zum", "zur",
|
||||||
|
"the", "and", "for", "with", "ueber", "über", "von", "for"}
|
||||||
|
for word in (title or "").lower().split():
|
||||||
|
w = word.strip(".,;:!?\"\'()[]{}")
|
||||||
|
if len(w) >= 4 and w not in STOPWORDS and w not in keywords:
|
||||||
|
keywords.append(w)
|
||||||
|
logger.info(f"Lagentitel-Keyword '{w}' nachträglich injiziert")
|
||||||
|
|
||||||
if keywords:
|
if keywords:
|
||||||
logger.info(f"Dynamische Keywords ({len(keywords)}): {keywords}")
|
logger.info(f"Dynamische Keywords ({len(keywords)}): {keywords}")
|
||||||
return keywords if keywords else None, usage
|
return keywords if keywords else None, usage
|
||||||
@@ -310,9 +392,35 @@ class ResearcherAgent:
|
|||||||
logger.warning(f"Keyword-Extraktion fehlgeschlagen: {e}")
|
logger.warning(f"Keyword-Extraktion fehlgeschlagen: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
async def search(self, title: str, description: str = "", incident_type: str = "adhoc", international: bool = True, user_id: int = None, existing_articles: list[dict] = None) -> tuple[list[dict], ClaudeUsage | None]:
|
async def search(self, title: str, description: str = "", incident_type: str = "adhoc", international: bool = True, user_id: int = None, existing_articles: list[dict] = None, preferred_sources: list[dict] = None) -> tuple[list[dict], ClaudeUsage | None, bool]:
|
||||||
"""Sucht nach Informationen zu einem Vorfall."""
|
"""Sucht nach Informationen zu einem Vorfall.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(artikel, usage, parse_failed) — parse_failed ist True, wenn Claude geantwortet hat,
|
||||||
|
das JSON aber nicht extrahierbar war. So kann der Orchestrator zwischen
|
||||||
|
"echt keine Treffer" und "kaputte Antwort" unterscheiden.
|
||||||
|
"""
|
||||||
from config import OUTPUT_LANGUAGE
|
from config import OUTPUT_LANGUAGE
|
||||||
|
|
||||||
|
# Bevorzugte Web-Quellen als Prompt-Block (optional)
|
||||||
|
preferred_sources_block = ""
|
||||||
|
if preferred_sources:
|
||||||
|
ps_lines = []
|
||||||
|
for s in preferred_sources:
|
||||||
|
domain = s.get("domain", "")
|
||||||
|
name = s.get("name", domain) or domain
|
||||||
|
if not domain:
|
||||||
|
continue
|
||||||
|
ps_lines.append(f"- {domain} ({name})")
|
||||||
|
if ps_lines:
|
||||||
|
preferred_sources_block = (
|
||||||
|
"\nEINGETRAGENE WEB-QUELLEN (vom Betreiber als seriös markiert):\n"
|
||||||
|
+ "\n".join(ps_lines) + "\n"
|
||||||
|
"EMPFEHLUNG: Wenn diese Domains thematisch zur Lage passen, suche dort gezielt "
|
||||||
|
"mit \"site:domain [Suchbegriff]\". Sie sind vertrauenswuerdig eingetragen, ersetzen "
|
||||||
|
"aber nicht deine sonstige Recherche.\n"
|
||||||
|
)
|
||||||
|
|
||||||
if incident_type == "research":
|
if incident_type == "research":
|
||||||
lang_instruction = LANG_DEEP_INTERNATIONAL if international else LANG_DEEP_GERMAN_ONLY
|
lang_instruction = LANG_DEEP_INTERNATIONAL if international else LANG_DEEP_GERMAN_ONLY
|
||||||
# Bestehende Artikel als Kontext für den Prompt aufbereiten
|
# Bestehende Artikel als Kontext für den Prompt aufbereiten
|
||||||
@@ -332,6 +440,7 @@ class ResearcherAgent:
|
|||||||
prompt = DEEP_RESEARCH_PROMPT_TEMPLATE.format(
|
prompt = DEEP_RESEARCH_PROMPT_TEMPLATE.format(
|
||||||
title=title, description=description, language_instruction=lang_instruction,
|
title=title, description=description, language_instruction=lang_instruction,
|
||||||
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
||||||
|
preferred_sources_block=preferred_sources_block,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
lang_instruction = LANG_INTERNATIONAL if international else LANG_GERMAN_ONLY
|
lang_instruction = LANG_INTERNATIONAL if international else LANG_GERMAN_ONLY
|
||||||
@@ -350,11 +459,18 @@ class ResearcherAgent:
|
|||||||
prompt = RESEARCH_PROMPT_TEMPLATE.format(
|
prompt = RESEARCH_PROMPT_TEMPLATE.format(
|
||||||
title=title, description=description, language_instruction=lang_instruction,
|
title=title, description=description, language_instruction=lang_instruction,
|
||||||
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
||||||
|
preferred_sources_block=preferred_sources_block,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result, usage = await call_claude(prompt)
|
result, usage = await call_claude(prompt)
|
||||||
articles = self._parse_response(result)
|
try:
|
||||||
|
articles = self._parse_response(result)
|
||||||
|
except ResearcherParseError as parse_err:
|
||||||
|
# Claude hat geantwortet, aber kein verwertbares JSON dabei.
|
||||||
|
# Usage trotzdem zurueckgeben, damit Credits korrekt verbucht werden.
|
||||||
|
logger.warning("Claude-Recherche: %s", parse_err)
|
||||||
|
return [], usage, True
|
||||||
|
|
||||||
# Ausgeschlossene Quellen dynamisch aus DB laden
|
# Ausgeschlossene Quellen dynamisch aus DB laden
|
||||||
excluded_sources = await self._get_excluded_sources(user_id=user_id)
|
excluded_sources = await self._get_excluded_sources(user_id=user_id)
|
||||||
@@ -376,13 +492,13 @@ class ResearcherAgent:
|
|||||||
filtered.append(article)
|
filtered.append(article)
|
||||||
|
|
||||||
logger.info(f"Recherche ergab {len(filtered)} Artikel (von {len(articles)} gefundenen, international={international})")
|
logger.info(f"Recherche ergab {len(filtered)} Artikel (von {len(articles)} gefundenen, international={international})")
|
||||||
return filtered, usage
|
return filtered, usage, False
|
||||||
|
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
raise # Timeout nach oben durchreichen fuer Retry im Orchestrator
|
raise # Timeout nach oben durchreichen fuer Retry im Orchestrator
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Recherche-Fehler: {e}")
|
logger.error(f"Recherche-Fehler: {e}")
|
||||||
return [], None
|
return [], None, False
|
||||||
|
|
||||||
async def _get_excluded_sources(self, user_id: int = None) -> list[str]:
|
async def _get_excluded_sources(self, user_id: int = None) -> list[str]:
|
||||||
"""Laedt ausgeschlossene Quellen (global + per-User)."""
|
"""Laedt ausgeschlossene Quellen (global + per-User)."""
|
||||||
@@ -405,56 +521,118 @@ class ResearcherAgent:
|
|||||||
return list(EXCLUDED_SOURCES)
|
return list(EXCLUDED_SOURCES)
|
||||||
|
|
||||||
def _parse_response(self, response: str) -> list[dict]:
|
def _parse_response(self, response: str) -> list[dict]:
|
||||||
"""Parst die Claude-Antwort als JSON-Array."""
|
"""Parst die Claude-Antwort als JSON-Array.
|
||||||
# Versuche JSON direkt zu parsen
|
|
||||||
|
Wirft ResearcherParseError, wenn die Antwort nicht-leer ist, sich aber
|
||||||
|
kein JSON extrahieren laesst. Eine echte leere Liste (z.B. wenn Claude
|
||||||
|
wirklich keine Treffer hat) wird als [] zurueckgegeben.
|
||||||
|
"""
|
||||||
|
text = (response or "").strip()
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 1) Direkt parsen (Antwort ist bereits sauberes JSON)
|
||||||
try:
|
try:
|
||||||
data = json.loads(response)
|
data = json.loads(text)
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
return data
|
return data
|
||||||
if isinstance(data, dict) and "articles" in data:
|
if isinstance(data, dict) and isinstance(data.get("articles"), list):
|
||||||
return data["articles"]
|
return data["articles"]
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# JSON-Code-Block extrahieren
|
# 2) JSON-Array irgendwo im Text (Markdown-Fence oder Vor-/Nachtext)
|
||||||
code_pat = r'`{3}(?:json)?\s*\n?(\[.*?\])\s*`{3}'
|
arr = _extract_json_array(text)
|
||||||
code_match = re.search(code_pat, response, re.DOTALL)
|
if isinstance(arr, list):
|
||||||
if code_match:
|
return arr
|
||||||
|
|
||||||
|
# 3) JSON-Objekt mit "articles"-Key
|
||||||
|
obj = _extract_json_object(text)
|
||||||
|
if isinstance(obj, dict) and isinstance(obj.get("articles"), list):
|
||||||
|
return obj["articles"]
|
||||||
|
|
||||||
|
# 4) Recovery: einzelne Headline-Objekte aus Fliesstext
|
||||||
|
recovered = []
|
||||||
|
for obj_str in re.findall(r'\{[^{}]*"headline"[^{}]*\}', text, re.DOTALL):
|
||||||
try:
|
try:
|
||||||
data = json.loads(code_match.group(1))
|
parsed = json.loads(obj_str)
|
||||||
if isinstance(data, list):
|
|
||||||
return data
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
continue
|
||||||
|
if isinstance(parsed, dict) and "headline" in parsed:
|
||||||
|
recovered.append(parsed)
|
||||||
|
if recovered:
|
||||||
|
logger.info("JSON-Recovery: %d Artikel aus Einzelobjekten extrahiert", len(recovered))
|
||||||
|
return recovered
|
||||||
|
|
||||||
# Versuche JSON aus der Antwort zu extrahieren (zwischen [ und ])
|
# Parse fehlgeschlagen — Claude hat geantwortet, aber kein verwertbares JSON dabei.
|
||||||
arr_pat = r'\[\s*\{.*\}\s*\]'
|
# Sample loggen, damit der Fehler debuggbar ist, und Aufrufer signalisieren.
|
||||||
match = re.search(arr_pat, response, re.DOTALL)
|
logger.warning(
|
||||||
if match:
|
"Konnte Claude-Antwort nicht als JSON parsen (Laenge: %d). Sample: %s",
|
||||||
try:
|
len(text),
|
||||||
data = json.loads(match.group())
|
_truncate_for_log(text),
|
||||||
if isinstance(data, list):
|
)
|
||||||
return data
|
raise ResearcherParseError(f"Claude-Antwort enthielt kein verwertbares JSON (Laenge: {len(text)})")
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Letzter Versuch: einzelne JSON-Objekte mit headline
|
async def select_relevant_web_sources(
|
||||||
objects = re.findall(r'\{[^{}]*"headline"[^{}]*\}', response)
|
self,
|
||||||
if objects:
|
title: str,
|
||||||
results = []
|
description: str,
|
||||||
for obj_str in objects:
|
web_sources: list[dict],
|
||||||
try:
|
) -> tuple[list[dict], ClaudeUsage | None]:
|
||||||
obj = json.loads(obj_str)
|
"""Laesst Claude die thematisch passenden Web-Quellen auswaehlen (Haiku).
|
||||||
if "headline" in obj:
|
|
||||||
results.append(obj)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
continue
|
|
||||||
if results:
|
|
||||||
logger.info(f"JSON-Recovery: {len(results)} Artikel aus Einzelobjekten extrahiert")
|
|
||||||
return results
|
|
||||||
|
|
||||||
logger.warning(f"Konnte Claude-Antwort nicht als JSON parsen (Laenge: {len(response)})")
|
Returns:
|
||||||
return []
|
(ausgewaehlte Quellen, usage). Bei Fehler: ([], None).
|
||||||
|
Leere Auswahl ist explizit erlaubt — keine Quelle wird zwangsweise aufgenommen.
|
||||||
|
"""
|
||||||
|
if not web_sources:
|
||||||
|
return [], None
|
||||||
|
|
||||||
|
# Bei sehr wenigen Quellen lohnt der Selektions-Call kaum — alle weiterreichen.
|
||||||
|
if len(web_sources) <= 3:
|
||||||
|
logger.info("Web-Source-Selektion: Nur %d Quellen, alle uebernehmen", len(web_sources))
|
||||||
|
return list(web_sources), None
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for i, src in enumerate(web_sources, 1):
|
||||||
|
cat = src.get("category", "sonstige")
|
||||||
|
notes = (src.get("notes") or "")[:80]
|
||||||
|
domain = src.get("domain", "")
|
||||||
|
line = f"{i}. {src.get('name', domain)} ({domain}) [{cat}]"
|
||||||
|
if notes:
|
||||||
|
line += f" - {notes}"
|
||||||
|
lines.append(line)
|
||||||
|
|
||||||
|
prompt = WEB_SOURCE_SELECTION_PROMPT.format(
|
||||||
|
title=title,
|
||||||
|
description=description or "Keine weitere Beschreibung",
|
||||||
|
source_list="\n".join(lines),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
indices = _extract_json_array(result)
|
||||||
|
if not isinstance(indices, list):
|
||||||
|
logger.warning(
|
||||||
|
"Web-Source-Selektion: Kein JSON in Antwort, ignoriere Quellen. Sample: %s",
|
||||||
|
_truncate_for_log(result),
|
||||||
|
)
|
||||||
|
return [], usage
|
||||||
|
|
||||||
|
selected = []
|
||||||
|
for idx in indices:
|
||||||
|
if isinstance(idx, int) and 1 <= idx <= len(web_sources):
|
||||||
|
selected.append(web_sources[idx - 1])
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Web-Source-Selektion: %d von %d ausgewaehlt%s",
|
||||||
|
len(selected), len(web_sources),
|
||||||
|
f" ({', '.join(s.get('domain', '') for s in selected)})" if selected else "",
|
||||||
|
)
|
||||||
|
return selected, usage
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Web-Source-Selektion fehlgeschlagen (%s)", e)
|
||||||
|
return [], None
|
||||||
|
|
||||||
async def select_relevant_telegram_channels(
|
async def select_relevant_telegram_channels(
|
||||||
self,
|
self,
|
||||||
@@ -488,12 +666,14 @@ class ResearcherAgent:
|
|||||||
try:
|
try:
|
||||||
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
|
||||||
arr_match = re.search(r'\[[\d\s,]+\]', result)
|
indices = _extract_json_array(result)
|
||||||
if not arr_match:
|
if not isinstance(indices, list):
|
||||||
logger.warning("Telegram-Selektion: Kein JSON in Antwort, nutze alle Kanaele")
|
logger.warning(
|
||||||
|
"Telegram-Selektion: Kein JSON in Antwort, nutze alle Kanaele. Sample: %s",
|
||||||
|
_truncate_for_log(result),
|
||||||
|
)
|
||||||
return channels_metadata, usage
|
return channels_metadata, usage
|
||||||
|
|
||||||
indices = json.loads(arr_match.group())
|
|
||||||
selected = []
|
selected = []
|
||||||
for idx in indices:
|
for idx in indices:
|
||||||
if isinstance(idx, int) and 1 <= idx <= len(channels_metadata):
|
if isinstance(idx, int) and 1 <= idx <= len(channels_metadata):
|
||||||
|
|||||||
254
src/agents/translator.py
Normale Datei
254
src/agents/translator.py
Normale Datei
@@ -0,0 +1,254 @@
|
|||||||
|
"""Translator-Agent: uebersetzt fremdsprachige Artikel ins Deutsche.
|
||||||
|
|
||||||
|
Eigener Agent (separat vom Analyzer), damit Token-Limits nicht zwischen
|
||||||
|
Lagebild und Uebersetzung konkurrieren. Nutzt CLAUDE_MODEL_FAST (Haiku) in
|
||||||
|
Batches.
|
||||||
|
|
||||||
|
Aufgerufen vom Orchestrator nach analyzer.analyze() und vor post_refresh_qc.
|
||||||
|
Backfill-Skript nutzt dieselbe Funktion fuer rueckwirkendes Auffuellen.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator
|
||||||
|
from config import CLAUDE_MODEL_FAST, TRANSLATOR_ENABLED
|
||||||
|
|
||||||
|
logger = logging.getLogger("osint.translator")
|
||||||
|
|
||||||
|
# Pro Batch nicht mehr als so viele Artikel an Claude geben.
|
||||||
|
# Bei Haiku ist das Output-Limit ca. 8k Tokens. Pro Artikel kommen leicht
|
||||||
|
# 400-600 Tokens raus (headline_de + content_de bis 1000 Zeichen). Bei 15
|
||||||
|
# wurde regelmaessig getrunkt (mid-JSON broken). 5 ist sicher mit Reserve.
|
||||||
|
DEFAULT_BATCH_SIZE = 5
|
||||||
|
|
||||||
|
# content_original wird ohnehin auf 1000 Zeichen gecappt (rss_parser).
|
||||||
|
# Fuer den Translator nochmal verkuerzen, falls vorhanden mehr.
|
||||||
|
CONTENT_INPUT_MAX = 1200
|
||||||
|
|
||||||
|
# content_de soll wie content_original auf 1000 Zeichen begrenzt sein.
|
||||||
|
CONTENT_OUTPUT_MAX = 1000
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_complete_objects(text: str) -> list[dict]:
|
||||||
|
"""Extrahiert vollstaendige JSON-Objekte aus moeglicherweise abgeschnittenem Text.
|
||||||
|
|
||||||
|
Klammer-Counter-Ansatz: jedes balancierte {...} wird probiert.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
depth = 0
|
||||||
|
start = -1
|
||||||
|
in_string = False
|
||||||
|
escape = False
|
||||||
|
for i, ch in enumerate(text):
|
||||||
|
if escape:
|
||||||
|
escape = False
|
||||||
|
continue
|
||||||
|
if ch == "\\":
|
||||||
|
escape = True
|
||||||
|
continue
|
||||||
|
if ch == '"' and not escape:
|
||||||
|
in_string = not in_string
|
||||||
|
continue
|
||||||
|
if in_string:
|
||||||
|
continue
|
||||||
|
if ch == "{":
|
||||||
|
if depth == 0:
|
||||||
|
start = i
|
||||||
|
depth += 1
|
||||||
|
elif ch == "}":
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0 and start >= 0:
|
||||||
|
obj_text = text[start:i + 1]
|
||||||
|
try:
|
||||||
|
obj = json.loads(obj_text)
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
results.append(obj)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
start = -1
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _build_prompt(articles: list[dict], output_lang: str = "de") -> str:
|
||||||
|
"""Bauen den Translation-Prompt fuer eine Batch."""
|
||||||
|
lang_label = {"de": "Deutsch", "en": "Englisch"}.get(output_lang, output_lang)
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for a in articles:
|
||||||
|
items.append({
|
||||||
|
"id": a["id"],
|
||||||
|
"headline": a.get("headline", "") or "",
|
||||||
|
"content": (a.get("content_original") or "")[:CONTENT_INPUT_MAX],
|
||||||
|
"source_lang": a.get("language", "en"),
|
||||||
|
})
|
||||||
|
|
||||||
|
return f"""Du bist ein praeziser Uebersetzer fuer Nachrichten-Artikel.
|
||||||
|
Uebersetze die folgenden Artikel nach {lang_label}.
|
||||||
|
|
||||||
|
WICHTIG:
|
||||||
|
- Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) - NIEMALS Umschreibungen wie ae, oe, ue, ss.
|
||||||
|
Beispiele: "Gespraeche" -> "Gespräche", "Fuehrer" -> "Führer", "grosse" -> "große".
|
||||||
|
- Behalte Eigennamen (Personen, Orte, Organisationen) im Original.
|
||||||
|
- Headline kurz und buendig wie im Original.
|
||||||
|
- Content auf MAX {CONTENT_OUTPUT_MAX} Zeichen kuerzen, kein HTML, kein Markdown.
|
||||||
|
- Wenn der Artikel schon auf {lang_label} ist (z.B. source_lang="{output_lang}"),
|
||||||
|
kopiere headline und content unveraendert.
|
||||||
|
|
||||||
|
Antworte AUSSCHLIESSLICH mit einem flachen JSON-Array (kein Wrapper-Objekt!).
|
||||||
|
Format genau so:
|
||||||
|
[
|
||||||
|
{{"id": 1, "headline_de": "Titel auf Deutsch", "content_de": "Inhalt auf Deutsch"}},
|
||||||
|
{{"id": 2, "headline_de": "...", "content_de": "..."}}
|
||||||
|
]
|
||||||
|
|
||||||
|
NICHT erlaubt: {{"translations": [...]}} oder {{"items": [...]}} oder Markdown-Codefences.
|
||||||
|
Nur das Array, ohne Einleitung, ohne Erklaerung.
|
||||||
|
|
||||||
|
ARTIKEL:
|
||||||
|
{json.dumps(items, ensure_ascii=False, indent=2)}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_response(text: str) -> list[dict]:
|
||||||
|
"""Robustes JSON-Array-Parsing.
|
||||||
|
|
||||||
|
Handhabt:
|
||||||
|
- reines JSON
|
||||||
|
- JSON in Markdown-Codefence ```json ... ```
|
||||||
|
- abgeschnittene Antworten (extrahiert vollstaendige Top-Level-Objekte)
|
||||||
|
"""
|
||||||
|
text = text.strip()
|
||||||
|
# Markdown-Codefence entfernen
|
||||||
|
if text.startswith("```"):
|
||||||
|
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||||
|
text = re.sub(r"\s*```\s*$", "", text)
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Erst Array versuchen
|
||||||
|
match = re.search(r"\[.*\]", text, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
data = json.loads(match.group(0))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Truncate-Fallback: einzelne Top-Level-Objekte extrahieren
|
||||||
|
data = _extract_complete_objects(text)
|
||||||
|
else:
|
||||||
|
data = _extract_complete_objects(text)
|
||||||
|
|
||||||
|
# Claude wraps das Array gelegentlich in {"translations": [...]} oder {"items": [...]}
|
||||||
|
if isinstance(data, dict):
|
||||||
|
for key in ("translations", "items", "results", "data"):
|
||||||
|
if isinstance(data.get(key), list):
|
||||||
|
data = data[key]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Einzelnes Objekt? Dann als Liste mit einem Element behandeln
|
||||||
|
if "id" in data:
|
||||||
|
data = [data]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Translator-Antwort: Dict ohne erwarteten Array-Key (keys={list(data.keys())[:5]})")
|
||||||
|
|
||||||
|
if not isinstance(data, list):
|
||||||
|
raise ValueError(f"Translator-Antwort ist kein Array: {type(data).__name__}")
|
||||||
|
|
||||||
|
cleaned = []
|
||||||
|
for item in data:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
aid = item.get("id")
|
||||||
|
if not isinstance(aid, int):
|
||||||
|
try:
|
||||||
|
aid = int(aid)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
cleaned.append({
|
||||||
|
"id": aid,
|
||||||
|
"headline_de": (item.get("headline_de") or "").strip() or None,
|
||||||
|
"content_de": (item.get("content_de") or "").strip() or None,
|
||||||
|
})
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
async def translate_articles_batch(
|
||||||
|
articles: list[dict],
|
||||||
|
output_lang: str = "de",
|
||||||
|
) -> tuple[list[dict], ClaudeUsage]:
|
||||||
|
"""Uebersetzt eine Batch von Artikeln.
|
||||||
|
|
||||||
|
Erwartet articles als Liste von Dicts mit den Feldern id, headline,
|
||||||
|
content_original, language.
|
||||||
|
|
||||||
|
Rueckgabe: (uebersetzte_artikel, usage)
|
||||||
|
Wenn der Call fehlschlaegt, wird ([], leere_usage) zurueckgegeben - der
|
||||||
|
Caller kann entscheiden, ob retry oder skip.
|
||||||
|
"""
|
||||||
|
if not articles:
|
||||||
|
return [], ClaudeUsage()
|
||||||
|
|
||||||
|
prompt = _build_prompt(articles, output_lang)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Translator Claude-Call fehlgeschlagen: {e}")
|
||||||
|
return [], ClaudeUsage()
|
||||||
|
|
||||||
|
try:
|
||||||
|
translations = _parse_response(result_text)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Translator JSON-Parsing fehlgeschlagen: {e}; raw: {result_text[:300]!r}")
|
||||||
|
return [], usage
|
||||||
|
|
||||||
|
# Validierung: nur Translations zurueckgeben, deren id wirklich
|
||||||
|
# in der angefragten Batch war
|
||||||
|
requested_ids = {a["id"] for a in articles}
|
||||||
|
valid = [t for t in translations if t["id"] in requested_ids]
|
||||||
|
if len(valid) != len(translations):
|
||||||
|
logger.warning(
|
||||||
|
"Translator: %d von %d Translations referenzieren unbekannte IDs",
|
||||||
|
len(translations) - len(valid), len(translations),
|
||||||
|
)
|
||||||
|
return valid, usage
|
||||||
|
|
||||||
|
|
||||||
|
async def translate_articles(
|
||||||
|
articles: list[dict],
|
||||||
|
output_lang: str = "de",
|
||||||
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
||||||
|
usage_accumulator: UsageAccumulator | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Uebersetzt eine beliebige Anzahl Artikel in Batches.
|
||||||
|
|
||||||
|
Bringt die Batches durch Logik in `translate_articles_batch` und gibt
|
||||||
|
EINE flache Liste der Translations zurueck. Wenn ein Batch fehlschlaegt,
|
||||||
|
wird er uebersprungen (anderer Batches laufen weiter).
|
||||||
|
"""
|
||||||
|
if not articles:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not TRANSLATOR_ENABLED:
|
||||||
|
logger.info(
|
||||||
|
"Translator deaktiviert (TRANSLATOR_ENABLED=false), %d Artikel uebersprungen",
|
||||||
|
len(articles),
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
all_translations = []
|
||||||
|
for i in range(0, len(articles), batch_size):
|
||||||
|
batch = articles[i : i + batch_size]
|
||||||
|
translations, usage = await translate_articles_batch(batch, output_lang)
|
||||||
|
if usage_accumulator is not None:
|
||||||
|
usage_accumulator.add(usage)
|
||||||
|
all_translations.extend(translations)
|
||||||
|
logger.info(
|
||||||
|
"Translator-Batch %d/%d: %d/%d uebersetzt (cost=$%.4f)",
|
||||||
|
(i // batch_size) + 1,
|
||||||
|
(len(articles) + batch_size - 1) // batch_size,
|
||||||
|
len(translations), len(batch),
|
||||||
|
usage.cost_usd,
|
||||||
|
)
|
||||||
|
return all_translations
|
||||||
@@ -10,7 +10,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|||||||
DATA_DIR = os.path.join(BASE_DIR, "data")
|
DATA_DIR = os.path.join(BASE_DIR, "data")
|
||||||
LOG_DIR = os.path.join(BASE_DIR, "logs")
|
LOG_DIR = os.path.join(BASE_DIR, "logs")
|
||||||
STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
||||||
DB_PATH = os.path.join(DATA_DIR, "osint.db")
|
DB_PATH = os.environ.get("DB_PATH") or os.path.join(DATA_DIR, "osint.db")
|
||||||
|
|
||||||
# JWT
|
# JWT
|
||||||
_JWT_SECRET = os.environ.get("JWT_SECRET", "")
|
_JWT_SECRET = os.environ.get("JWT_SECRET", "")
|
||||||
@@ -41,6 +41,10 @@ OUTPUT_LANGUAGE = "Deutsch"
|
|||||||
# In Kundenversion auf False setzen oder Env-Variable entfernen
|
# In Kundenversion auf False setzen oder Env-Variable entfernen
|
||||||
DEV_MODE = os.environ.get("DEV_MODE", "true").lower() == "true"
|
DEV_MODE = os.environ.get("DEV_MODE", "true").lower() == "true"
|
||||||
|
|
||||||
|
# Feature-Flag: Translator-Agent (Haiku) komplett deaktivieren.
|
||||||
|
# False = keine Uebersetzungen mehr, fremdsprachige Artikel bleiben unuebersetzt.
|
||||||
|
TRANSLATOR_ENABLED = os.environ.get("TRANSLATOR_ENABLED", "true").lower() == "true"
|
||||||
|
|
||||||
# RSS-Feeds (Fallback, primär aus DB geladen)
|
# RSS-Feeds (Fallback, primär aus DB geladen)
|
||||||
RSS_FEEDS = {
|
RSS_FEEDS = {
|
||||||
"deutsch": [
|
"deutsch": [
|
||||||
@@ -91,3 +95,9 @@ TELEGRAM_API_ID = int(os.environ.get("TELEGRAM_API_ID", "0"))
|
|||||||
TELEGRAM_API_HASH = os.environ.get("TELEGRAM_API_HASH", "")
|
TELEGRAM_API_HASH = os.environ.get("TELEGRAM_API_HASH", "")
|
||||||
TELEGRAM_SESSION_PATH = os.environ.get("TELEGRAM_SESSION_PATH", "/home/claude-dev/.telegram/telegram_session")
|
TELEGRAM_SESSION_PATH = os.environ.get("TELEGRAM_SESSION_PATH", "/home/claude-dev/.telegram/telegram_session")
|
||||||
|
|
||||||
|
# Health-Check (genutzt von services/source_health.py)
|
||||||
|
HEALTH_CHECK_USER_AGENT = os.environ.get(
|
||||||
|
"HEALTH_CHECK_USER_AGENT",
|
||||||
|
"Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)",
|
||||||
|
)
|
||||||
|
HEALTH_CHECK_TIMEOUT_S = float(os.environ.get("HEALTH_CHECK_TIMEOUT_S", "15.0"))
|
||||||
|
|||||||
136
src/database.py
136
src/database.py
@@ -117,6 +117,22 @@ CREATE TABLE IF NOT EXISTS refresh_log (
|
|||||||
tenant_id INTEGER REFERENCES organizations(id)
|
tenant_id INTEGER REFERENCES organizations(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS refresh_pipeline_steps (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
refresh_log_id INTEGER REFERENCES refresh_log(id) ON DELETE CASCADE,
|
||||||
|
incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE,
|
||||||
|
step_key TEXT NOT NULL,
|
||||||
|
pass_number INTEGER DEFAULT 1,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
count_value INTEGER,
|
||||||
|
count_secondary INTEGER,
|
||||||
|
tenant_id INTEGER REFERENCES organizations(id)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_pipeline_steps_incident ON refresh_pipeline_steps(incident_id, started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_pipeline_steps_log ON refresh_pipeline_steps(refresh_log_id);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS incident_snapshots (
|
CREATE TABLE IF NOT EXISTS incident_snapshots (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE,
|
incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE,
|
||||||
@@ -142,7 +158,36 @@ CREATE TABLE IF NOT EXISTS sources (
|
|||||||
article_count INTEGER DEFAULT 0,
|
article_count INTEGER DEFAULT 0,
|
||||||
last_seen_at TIMESTAMP,
|
last_seen_at TIMESTAMP,
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
tenant_id INTEGER REFERENCES organizations(id)
|
tenant_id INTEGER REFERENCES organizations(id),
|
||||||
|
language TEXT,
|
||||||
|
bias TEXT,
|
||||||
|
political_orientation TEXT DEFAULT 'na',
|
||||||
|
media_type TEXT DEFAULT 'sonstige',
|
||||||
|
reliability TEXT DEFAULT 'na',
|
||||||
|
state_affiliated INTEGER DEFAULT 0,
|
||||||
|
country_code TEXT,
|
||||||
|
classification_source TEXT DEFAULT 'legacy',
|
||||||
|
classified_at TIMESTAMP,
|
||||||
|
proposed_political_orientation TEXT,
|
||||||
|
proposed_media_type TEXT,
|
||||||
|
proposed_reliability TEXT,
|
||||||
|
proposed_state_affiliated INTEGER,
|
||||||
|
proposed_country_code TEXT,
|
||||||
|
proposed_alignments_json TEXT,
|
||||||
|
proposed_confidence REAL,
|
||||||
|
proposed_reasoning TEXT,
|
||||||
|
proposed_at TIMESTAMP,
|
||||||
|
eu_disinfo_listed INTEGER DEFAULT 0,
|
||||||
|
eu_disinfo_case_count INTEGER DEFAULT 0,
|
||||||
|
eu_disinfo_last_seen TIMESTAMP,
|
||||||
|
ifcn_signatory INTEGER DEFAULT 0,
|
||||||
|
external_data_synced_at TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS source_alignments (
|
||||||
|
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
||||||
|
alignment TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (source_id, alignment)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS notifications (
|
CREATE TABLE IF NOT EXISTS notifications (
|
||||||
@@ -418,6 +463,29 @@ async def init_db():
|
|||||||
await db.execute("ALTER TABLE refresh_log ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)")
|
await db.execute("ALTER TABLE refresh_log ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)")
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
# Migration: refresh_pipeline_steps-Tabelle (Analysepipeline-Visualisierung)
|
||||||
|
cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='refresh_pipeline_steps'")
|
||||||
|
if not await cursor.fetchone():
|
||||||
|
await db.executescript("""
|
||||||
|
CREATE TABLE refresh_pipeline_steps (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
refresh_log_id INTEGER REFERENCES refresh_log(id) ON DELETE CASCADE,
|
||||||
|
incident_id INTEGER REFERENCES incidents(id) ON DELETE CASCADE,
|
||||||
|
step_key TEXT NOT NULL,
|
||||||
|
pass_number INTEGER DEFAULT 1,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
count_value INTEGER,
|
||||||
|
count_secondary INTEGER,
|
||||||
|
tenant_id INTEGER REFERENCES organizations(id)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_pipeline_steps_incident ON refresh_pipeline_steps(incident_id, started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_pipeline_steps_log ON refresh_pipeline_steps(refresh_log_id);
|
||||||
|
""")
|
||||||
|
await db.commit()
|
||||||
|
logger.info("Migration: refresh_pipeline_steps-Tabelle erstellt")
|
||||||
|
|
||||||
# Migration: notifications-Tabelle (fuer bestehende DBs)
|
# Migration: notifications-Tabelle (fuer bestehende DBs)
|
||||||
cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='notifications'")
|
cursor = await db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='notifications'")
|
||||||
if not await cursor.fetchone():
|
if not await cursor.fetchone():
|
||||||
@@ -572,6 +640,71 @@ async def init_db():
|
|||||||
await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)")
|
await db.execute("ALTER TABLE sources ADD COLUMN tenant_id INTEGER REFERENCES organizations(id)")
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
# Migration: language + bias (Freitext, schon laenger im Einsatz, Schema-Lueck schliessen)
|
||||||
|
if "language" not in src_columns:
|
||||||
|
await db.execute("ALTER TABLE sources ADD COLUMN language TEXT")
|
||||||
|
await db.commit()
|
||||||
|
if "bias" not in src_columns:
|
||||||
|
await db.execute("ALTER TABLE sources ADD COLUMN bias TEXT")
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
# Migration: strukturierte Klassifikations-Spalten fuer sources
|
||||||
|
for col, ddl in [
|
||||||
|
("political_orientation", "ALTER TABLE sources ADD COLUMN political_orientation TEXT DEFAULT 'na'"),
|
||||||
|
("media_type", "ALTER TABLE sources ADD COLUMN media_type TEXT DEFAULT 'sonstige'"),
|
||||||
|
("reliability", "ALTER TABLE sources ADD COLUMN reliability TEXT DEFAULT 'na'"),
|
||||||
|
("state_affiliated", "ALTER TABLE sources ADD COLUMN state_affiliated INTEGER DEFAULT 0"),
|
||||||
|
("country_code", "ALTER TABLE sources ADD COLUMN country_code TEXT"),
|
||||||
|
("classification_source", "ALTER TABLE sources ADD COLUMN classification_source TEXT DEFAULT 'legacy'"),
|
||||||
|
("classified_at", "ALTER TABLE sources ADD COLUMN classified_at TIMESTAMP"),
|
||||||
|
("proposed_political_orientation", "ALTER TABLE sources ADD COLUMN proposed_political_orientation TEXT"),
|
||||||
|
("proposed_media_type", "ALTER TABLE sources ADD COLUMN proposed_media_type TEXT"),
|
||||||
|
("proposed_reliability", "ALTER TABLE sources ADD COLUMN proposed_reliability TEXT"),
|
||||||
|
("proposed_state_affiliated", "ALTER TABLE sources ADD COLUMN proposed_state_affiliated INTEGER"),
|
||||||
|
("proposed_country_code", "ALTER TABLE sources ADD COLUMN proposed_country_code TEXT"),
|
||||||
|
("proposed_alignments_json", "ALTER TABLE sources ADD COLUMN proposed_alignments_json TEXT"),
|
||||||
|
("proposed_confidence", "ALTER TABLE sources ADD COLUMN proposed_confidence REAL"),
|
||||||
|
("proposed_reasoning", "ALTER TABLE sources ADD COLUMN proposed_reasoning TEXT"),
|
||||||
|
("proposed_at", "ALTER TABLE sources ADD COLUMN proposed_at TIMESTAMP"),
|
||||||
|
]:
|
||||||
|
if col not in src_columns:
|
||||||
|
await db.execute(ddl)
|
||||||
|
await db.commit()
|
||||||
|
if any(c not in src_columns for c in ("political_orientation", "media_type", "reliability")):
|
||||||
|
logger.info("Migration: Klassifikations-Spalten zu sources hinzugefuegt")
|
||||||
|
|
||||||
|
# Migration: externe Reputations-Daten (EUvsDisinfo + IFCN)
|
||||||
|
for col, ddl in [
|
||||||
|
("eu_disinfo_listed", "ALTER TABLE sources ADD COLUMN eu_disinfo_listed INTEGER DEFAULT 0"),
|
||||||
|
("eu_disinfo_case_count", "ALTER TABLE sources ADD COLUMN eu_disinfo_case_count INTEGER DEFAULT 0"),
|
||||||
|
("eu_disinfo_last_seen", "ALTER TABLE sources ADD COLUMN eu_disinfo_last_seen TIMESTAMP"),
|
||||||
|
("ifcn_signatory", "ALTER TABLE sources ADD COLUMN ifcn_signatory INTEGER DEFAULT 0"),
|
||||||
|
("external_data_synced_at", "ALTER TABLE sources ADD COLUMN external_data_synced_at TIMESTAMP"),
|
||||||
|
]:
|
||||||
|
if col not in src_columns:
|
||||||
|
await db.execute(ddl)
|
||||||
|
await db.commit()
|
||||||
|
if any(c not in src_columns for c in ("eu_disinfo_listed", "ifcn_signatory")):
|
||||||
|
logger.info("Migration: externe Reputations-Spalten zu sources hinzugefuegt")
|
||||||
|
|
||||||
|
# Migration: source_alignments-Tabelle (Mehrfach-Tags fuer geopolitische Naehe)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' AND name='source_alignments'"
|
||||||
|
)
|
||||||
|
if not await cursor.fetchone():
|
||||||
|
await db.executescript(
|
||||||
|
"""
|
||||||
|
CREATE TABLE source_alignments (
|
||||||
|
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
||||||
|
alignment TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (source_id, alignment)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_source_alignments_alignment ON source_alignments(alignment);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
logger.info("Migration: source_alignments-Tabelle erstellt")
|
||||||
|
|
||||||
# Migration: tenant_id fuer notifications
|
# Migration: tenant_id fuer notifications
|
||||||
cursor = await db.execute("PRAGMA table_info(notifications)")
|
cursor = await db.execute("PRAGMA table_info(notifications)")
|
||||||
notif_columns = [row[1] for row in await cursor.fetchall()]
|
notif_columns = [row[1] for row in await cursor.fetchall()]
|
||||||
@@ -583,6 +716,7 @@ async def init_db():
|
|||||||
for idx_sql in [
|
for idx_sql in [
|
||||||
"CREATE INDEX IF NOT EXISTS idx_incidents_tenant_status ON incidents(tenant_id, status)",
|
"CREATE INDEX IF NOT EXISTS idx_incidents_tenant_status ON incidents(tenant_id, status)",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_articles_tenant_incident ON articles(tenant_id, incident_id)",
|
"CREATE INDEX IF NOT EXISTS idx_articles_tenant_incident ON articles(tenant_id, incident_id)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_articles_incident_collected ON articles(incident_id, collected_at DESC)",
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
await db.execute(idx_sql)
|
await db.execute(idx_sql)
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ import httpx
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from config import TIMEZONE, MAX_ARTICLES_PER_DOMAIN_RSS
|
from config import TIMEZONE, MAX_ARTICLES_PER_DOMAIN_RSS
|
||||||
from source_rules import _extract_domain
|
from source_rules import _extract_domain
|
||||||
|
from feeds.transcript_extractors._common import html_to_text
|
||||||
|
from services.post_refresh_qc import normalize_german_umlauts
|
||||||
|
|
||||||
logger = logging.getLogger("osint.rss")
|
logger = logging.getLogger("osint.rss")
|
||||||
|
|
||||||
@@ -152,11 +154,27 @@ class RSSParser:
|
|||||||
|
|
||||||
for entry in feed.entries[:50]:
|
for entry in feed.entries[:50]:
|
||||||
title = entry.get("title", "")
|
title = entry.get("title", "")
|
||||||
summary = entry.get("summary", "")
|
# RSS-summary ist bei vielen Quellen HTML (Guardian, AP, SZ, ...).
|
||||||
|
# Vor weiterer Verwendung strippen, sonst landet HTML in DB
|
||||||
|
# und KI-Agenten und Sprach-Heuristik werden gestoert.
|
||||||
|
summary_raw = entry.get("summary", "")
|
||||||
|
summary = html_to_text(summary_raw) if summary_raw else ""
|
||||||
|
# ASCII-Umlaut-Normalisierung (z.B. dpa-AFX schreibt "Gespraeche").
|
||||||
|
# Dictionary-basiert, sicher gegen englische Woerter wie "Boeing".
|
||||||
|
title, _ = normalize_german_umlauts(title)
|
||||||
|
summary, _ = normalize_german_umlauts(summary)
|
||||||
text = f"{title} {summary}".lower()
|
text = f"{title} {summary}".lower()
|
||||||
|
|
||||||
# Flexibles Keyword-Matching: mindestens die Hälfte der Suchworte muss vorkommen (aufgerundet)
|
# Adaptive Match-Schwelle:
|
||||||
min_matches = min(2, max(1, (len(search_words) + 1) // 2))
|
# - Bei mindestens einem spezifischen Keyword (>=7 Zeichen) im Text reicht 1 Treffer.
|
||||||
|
# Verhindert, dass Headlines mit nur einem starken Keyword wie "buckelwal"
|
||||||
|
# rausfallen, wenn die Lage thematisch eng ist (Bug 1, vom User dokumentiert).
|
||||||
|
# - Sonst: alte Heuristik (mindestens halb der Wörter, max. 2).
|
||||||
|
specific_in_text = any(w in text for w in search_words if len(w) >= 7)
|
||||||
|
if specific_in_text:
|
||||||
|
min_matches = 1
|
||||||
|
else:
|
||||||
|
min_matches = min(2, max(1, (len(search_words) + 1) // 2))
|
||||||
match_count = sum(1 for word in search_words if word in text)
|
match_count = sum(1 for word in search_words if word in text)
|
||||||
|
|
||||||
if match_count >= min_matches:
|
if match_count >= min_matches:
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ async def check_auto_refresh():
|
|||||||
|
|
||||||
# Letzten abgeschlossenen oder laufenden Refresh pruefen
|
# Letzten abgeschlossenen oder laufenden Refresh pruefen
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT started_at, status FROM refresh_log WHERE incident_id = ? AND status IN ('completed', 'running') ORDER BY id DESC LIMIT 1",
|
"SELECT started_at, status FROM refresh_log WHERE incident_id = ? AND status IN ('completed', 'running', 'cancelled', 'error') ORDER BY id DESC LIMIT 1",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
)
|
)
|
||||||
last_refresh = await cursor.fetchone()
|
last_refresh = await cursor.fetchone()
|
||||||
@@ -378,6 +378,7 @@ from routers.feedback import router as feedback_router
|
|||||||
from routers.public_api import router as public_api_router
|
from routers.public_api import router as public_api_router
|
||||||
from routers.chat import router as chat_router
|
from routers.chat import router as chat_router
|
||||||
from routers.tutorial import router as tutorial_router
|
from routers.tutorial import router as tutorial_router
|
||||||
|
from routes.version_router import router as version_router
|
||||||
|
|
||||||
app.include_router(auth_router)
|
app.include_router(auth_router)
|
||||||
app.include_router(incidents_router)
|
app.include_router(incidents_router)
|
||||||
@@ -387,6 +388,7 @@ app.include_router(feedback_router)
|
|||||||
app.include_router(public_api_router)
|
app.include_router(public_api_router)
|
||||||
app.include_router(chat_router, prefix="/api/chat")
|
app.include_router(chat_router, prefix="/api/chat")
|
||||||
app.include_router(tutorial_router)
|
app.include_router(tutorial_router)
|
||||||
|
app.include_router(version_router)
|
||||||
|
|
||||||
|
|
||||||
@app.websocket("/api/ws")
|
@app.websocket("/api/ws")
|
||||||
|
|||||||
@@ -40,12 +40,25 @@ async def require_writable_license(
|
|||||||
) -> dict:
|
) -> dict:
|
||||||
"""Dependency die sicherstellt, dass die Lizenz Schreibzugriff erlaubt.
|
"""Dependency die sicherstellt, dass die Lizenz Schreibzugriff erlaubt.
|
||||||
|
|
||||||
Blockiert neue Lagen/Refreshes bei abgelaufener Lizenz (Nur-Lesen-Modus).
|
Blockiert neue Lagen/Refreshes bei abgelaufener Lizenz, deaktivierter Org
|
||||||
|
oder aufgebrauchtem Token-Budget (Hard-Stop).
|
||||||
"""
|
"""
|
||||||
lic = current_user.get("license", {})
|
lic = current_user.get("license", {})
|
||||||
if lic.get("read_only"):
|
if lic.get("read_only"):
|
||||||
|
reason = lic.get("read_only_reason") or "expired"
|
||||||
|
if reason == "budget_exceeded":
|
||||||
|
detail = "Token-Budget aufgebraucht. Für Aufstockung oder Upgrade bitte info@aegis-sight.de kontaktieren."
|
||||||
|
elif reason == "expired":
|
||||||
|
detail = "Lizenz abgelaufen. Nur Lesezugriff moeglich."
|
||||||
|
elif reason == "no_license":
|
||||||
|
detail = "Keine aktive Lizenz. Bitte Verwaltung kontaktieren."
|
||||||
|
elif reason == "org_disabled":
|
||||||
|
detail = "Organisation deaktiviert. Bitte Support kontaktieren."
|
||||||
|
else:
|
||||||
|
detail = lic.get("message") or "Nur Lesezugriff moeglich."
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
detail="Lizenz abgelaufen oder widerrufen. Nur Lesezugriff moeglich.",
|
detail=detail,
|
||||||
|
headers={"X-License-Status": reason},
|
||||||
)
|
)
|
||||||
return current_user
|
return current_user
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ class UserMeResponse(BaseModel):
|
|||||||
license_status: str = "unknown"
|
license_status: str = "unknown"
|
||||||
license_type: str = ""
|
license_type: str = ""
|
||||||
read_only: bool = False
|
read_only: bool = False
|
||||||
|
read_only_reason: Optional[str] = None
|
||||||
|
unlimited_budget: bool = False
|
||||||
credits_total: Optional[int] = None
|
credits_total: Optional[int] = None
|
||||||
credits_remaining: Optional[int] = None
|
credits_remaining: Optional[int] = None
|
||||||
credits_percent_used: Optional[float] = None
|
credits_percent_used: Optional[float] = None
|
||||||
@@ -52,7 +54,7 @@ class IncidentCreate(BaseModel):
|
|||||||
refresh_interval: int = Field(default=15, ge=10, le=10080)
|
refresh_interval: int = Field(default=15, ge=10, le=10080)
|
||||||
refresh_start_time: Optional[str] = Field(default=None, pattern=r"^([01]\d|2[0-3]):[0-5]\d$")
|
refresh_start_time: Optional[str] = Field(default=None, pattern=r"^([01]\d|2[0-3]):[0-5]\d$")
|
||||||
retention_days: int = Field(default=0, ge=0, le=999)
|
retention_days: int = Field(default=0, ge=0, le=999)
|
||||||
international_sources: bool = True
|
international_sources: bool = False
|
||||||
include_telegram: bool = False
|
include_telegram: bool = False
|
||||||
visibility: str = Field(default="public", pattern="^(public|private)$")
|
visibility: str = Field(default="public", pattern="^(public|private)$")
|
||||||
|
|
||||||
@@ -78,6 +80,11 @@ class DescriptionEnhanceRequest(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class IncidentResponse(BaseModel):
|
class IncidentResponse(BaseModel):
|
||||||
|
"""Vollstaendige Lage-Details (fuer GET /incidents/{id}).
|
||||||
|
|
||||||
|
Enthaelt summary + latest_developments, aber NICHT mehr sources_json —
|
||||||
|
das wird separat per GET /incidents/{id}/sources geladen (Lazy-Load).
|
||||||
|
"""
|
||||||
id: int
|
id: int
|
||||||
title: str
|
title: str
|
||||||
description: Optional[str]
|
description: Optional[str]
|
||||||
@@ -90,7 +97,6 @@ class IncidentResponse(BaseModel):
|
|||||||
visibility: str = "public"
|
visibility: str = "public"
|
||||||
summary: Optional[str]
|
summary: Optional[str]
|
||||||
latest_developments: Optional[str] = None
|
latest_developments: Optional[str] = None
|
||||||
sources_json: Optional[str] = None
|
|
||||||
international_sources: bool = True
|
international_sources: bool = True
|
||||||
include_telegram: bool = False
|
include_telegram: bool = False
|
||||||
created_by: int
|
created_by: int
|
||||||
@@ -101,27 +107,83 @@ class IncidentResponse(BaseModel):
|
|||||||
source_count: int = 0
|
source_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class IncidentListItem(BaseModel):
|
||||||
|
"""Schlankes Sidebar-Item (fuer GET /incidents).
|
||||||
|
|
||||||
|
Enthaelt, was Sidebar und Edit-Dialog brauchen — kein summary,
|
||||||
|
kein sources_json. Statt summary-Volltext ein ``has_summary``-Bit,
|
||||||
|
damit das Frontend "erster Refresh"-Zustand erkennen kann.
|
||||||
|
description bleibt drin (kurz, vom Edit-Modal direkt genutzt).
|
||||||
|
"""
|
||||||
|
id: int
|
||||||
|
title: str
|
||||||
|
description: Optional[str] = None
|
||||||
|
type: str
|
||||||
|
status: str
|
||||||
|
refresh_mode: str
|
||||||
|
refresh_interval: int
|
||||||
|
refresh_start_time: Optional[str] = None
|
||||||
|
retention_days: int
|
||||||
|
visibility: str = "public"
|
||||||
|
international_sources: bool = True
|
||||||
|
include_telegram: bool = False
|
||||||
|
created_by: int
|
||||||
|
created_by_username: str = ""
|
||||||
|
created_at: str
|
||||||
|
updated_at: str
|
||||||
|
article_count: int = 0
|
||||||
|
source_count: int = 0
|
||||||
|
has_summary: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Sources (Quellenverwaltung)
|
# Sources (Quellenverwaltung)
|
||||||
|
SOURCE_TYPE_PATTERN = "^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$"
|
||||||
|
SOURCE_CATEGORY_PATTERN = "^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$"
|
||||||
|
SOURCE_STATUS_PATTERN = "^(active|inactive)$"
|
||||||
|
POLITICAL_ORIENTATION_PATTERN = "^(links_extrem|links|mitte_links|liberal|mitte|konservativ|mitte_rechts|rechts|rechts_extrem|na)$"
|
||||||
|
MEDIA_TYPE_PATTERN = "^(tageszeitung|wochenzeitung|magazin|tv_sender|radio|oeffentlich_rechtlich|nachrichtenagentur|online_only|blog|telegram_kanal|telegram_bot|podcast|social_media|imageboard|think_tank|ngo|behoerde|staatsmedium|fachmedium|sonstige)$"
|
||||||
|
RELIABILITY_PATTERN = "^(sehr_hoch|hoch|gemischt|niedrig|sehr_niedrig|na)$"
|
||||||
|
ALIGNMENT_PATTERN = "^(prorussisch|proiranisch|prowestlich|proukrainisch|prochinesisch|projapanisch|proisraelisch|propalaestinensisch|protuerkisch|panarabisch|neutral|sonstige)$"
|
||||||
|
COUNTRY_CODE_PATTERN = "^[A-Z]{2}$"
|
||||||
|
CLASSIFICATION_SOURCE_PATTERN = "^(manual|llm_approved|llm_pending|legacy)$"
|
||||||
|
|
||||||
|
|
||||||
class SourceCreate(BaseModel):
|
class SourceCreate(BaseModel):
|
||||||
name: str = Field(min_length=1, max_length=200)
|
name: str = Field(min_length=1, max_length=200)
|
||||||
url: Optional[str] = None
|
url: Optional[str] = None
|
||||||
domain: Optional[str] = None
|
domain: Optional[str] = None
|
||||||
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
|
source_type: str = Field(default="rss_feed", pattern=SOURCE_TYPE_PATTERN)
|
||||||
category: str = Field(default="sonstige", pattern="^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$")
|
category: str = Field(default="sonstige", pattern=SOURCE_CATEGORY_PATTERN)
|
||||||
status: str = Field(default="active", pattern="^(active|inactive)$")
|
status: str = Field(default="active", pattern=SOURCE_STATUS_PATTERN)
|
||||||
notes: Optional[str] = None
|
notes: Optional[str] = None
|
||||||
|
language: Optional[str] = None
|
||||||
|
bias: Optional[str] = None
|
||||||
|
political_orientation: Optional[str] = Field(default=None, pattern=POLITICAL_ORIENTATION_PATTERN)
|
||||||
|
media_type: Optional[str] = Field(default=None, pattern=MEDIA_TYPE_PATTERN)
|
||||||
|
reliability: Optional[str] = Field(default=None, pattern=RELIABILITY_PATTERN)
|
||||||
|
state_affiliated: Optional[bool] = None
|
||||||
|
country_code: Optional[str] = Field(default=None, pattern=COUNTRY_CODE_PATTERN)
|
||||||
|
alignments: Optional[list[str]] = None
|
||||||
|
|
||||||
|
|
||||||
class SourceUpdate(BaseModel):
|
class SourceUpdate(BaseModel):
|
||||||
name: Optional[str] = Field(default=None, max_length=200)
|
name: Optional[str] = Field(default=None, max_length=200)
|
||||||
url: Optional[str] = None
|
url: Optional[str] = None
|
||||||
domain: Optional[str] = None
|
domain: Optional[str] = None
|
||||||
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
|
source_type: Optional[str] = Field(default=None, pattern=SOURCE_TYPE_PATTERN)
|
||||||
category: Optional[str] = Field(default=None, pattern="^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$")
|
category: Optional[str] = Field(default=None, pattern=SOURCE_CATEGORY_PATTERN)
|
||||||
status: Optional[str] = Field(default=None, pattern="^(active|inactive)$")
|
status: Optional[str] = Field(default=None, pattern=SOURCE_STATUS_PATTERN)
|
||||||
notes: Optional[str] = None
|
notes: Optional[str] = None
|
||||||
|
language: Optional[str] = None
|
||||||
|
bias: Optional[str] = None
|
||||||
|
political_orientation: Optional[str] = Field(default=None, pattern=POLITICAL_ORIENTATION_PATTERN)
|
||||||
|
media_type: Optional[str] = Field(default=None, pattern=MEDIA_TYPE_PATTERN)
|
||||||
|
reliability: Optional[str] = Field(default=None, pattern=RELIABILITY_PATTERN)
|
||||||
|
state_affiliated: Optional[bool] = None
|
||||||
|
country_code: Optional[str] = Field(default=None, pattern=COUNTRY_CODE_PATTERN)
|
||||||
|
alignments: Optional[list[str]] = None
|
||||||
|
|
||||||
|
|
||||||
class SourceResponse(BaseModel):
|
class SourceResponse(BaseModel):
|
||||||
@@ -139,7 +201,20 @@ class SourceResponse(BaseModel):
|
|||||||
created_at: str
|
created_at: str
|
||||||
language: Optional[str] = None
|
language: Optional[str] = None
|
||||||
bias: Optional[str] = None
|
bias: Optional[str] = None
|
||||||
|
political_orientation: Optional[str] = None
|
||||||
|
media_type: Optional[str] = None
|
||||||
|
reliability: Optional[str] = None
|
||||||
|
state_affiliated: bool = False
|
||||||
|
country_code: Optional[str] = None
|
||||||
|
classification_source: Optional[str] = None
|
||||||
|
classified_at: Optional[str] = None
|
||||||
|
alignments: list[str] = []
|
||||||
is_global: bool = False
|
is_global: bool = False
|
||||||
|
ifcn_signatory: bool = False
|
||||||
|
eu_disinfo_listed: bool = False
|
||||||
|
eu_disinfo_case_count: int = 0
|
||||||
|
eu_disinfo_last_seen: Optional[str] = None
|
||||||
|
external_data_synced_at: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# Source Discovery
|
# Source Discovery
|
||||||
|
|||||||
@@ -4,10 +4,12 @@ import io
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
import uuid
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pikepdf
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
from weasyprint import HTML
|
from weasyprint import HTML
|
||||||
from docx import Document
|
from docx import Document
|
||||||
@@ -24,10 +26,15 @@ LOGO_PATH = Path(__file__).parent / "static" / "favicon.svg"
|
|||||||
|
|
||||||
|
|
||||||
FC_STATUS_LABELS = {
|
FC_STATUS_LABELS = {
|
||||||
"confirmed": "Bestätigt",
|
# 1:1 vom Monitor-Frontend (components.js) — konsistent zum UI.
|
||||||
"unconfirmed": "Unbestätigt",
|
"confirmed": "Bestätigt",
|
||||||
"disputed": "Umstritten",
|
"unconfirmed": "Unbestätigt",
|
||||||
"false": "Falsch",
|
"contradicted": "Widerlegt",
|
||||||
|
"developing": "Unklar",
|
||||||
|
"established": "Gesichert",
|
||||||
|
"disputed": "Umstritten",
|
||||||
|
"unverified": "Ungeprüft",
|
||||||
|
"false": "Falsch", # Legacy-Fallback
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -391,10 +398,267 @@ LAGEBILD:
|
|||||||
return "<ul><li>Zusammenfassung konnte nicht generiert werden.</li></ul>"
|
return "<ul><li>Zusammenfassung konnte nicht generiert werden.</li></ul>"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_db_timestamp(value) -> datetime | None:
|
||||||
|
"""SQLite-Timestamp robust als datetime parsen (ISO oder 'YYYY-MM-DD HH:MM:SS')."""
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value
|
||||||
|
try:
|
||||||
|
text = str(value).replace("T", " ").replace("Z", "")
|
||||||
|
# Sekundenbruchteile und Timezone-Offset abschneiden (python-docx mag nur naive dt)
|
||||||
|
text = text.split(".")[0].split("+")[0].strip()
|
||||||
|
return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(str(value)[:10], "%Y-%m-%d")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _slug_scope_label(scope: str, sections: set[str] | None) -> str:
|
||||||
|
"""Scope-Label fuer Metadaten und Dateinamen."""
|
||||||
|
if sections:
|
||||||
|
if sections == {"zusammenfassung"}:
|
||||||
|
return "Zusammenfassung"
|
||||||
|
if "timeline" in sections:
|
||||||
|
return "Vollständiger Bericht"
|
||||||
|
return "Lagebericht"
|
||||||
|
return {"summary": "Zusammenfassung", "report": "Lagebericht", "full": "Vollständiger Bericht"}.get(
|
||||||
|
scope, "Lagebericht"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_export_metadata(
|
||||||
|
incident: dict,
|
||||||
|
articles: list,
|
||||||
|
fact_checks: list,
|
||||||
|
sources: list,
|
||||||
|
creator: str,
|
||||||
|
scope: str,
|
||||||
|
sections: set[str] | None,
|
||||||
|
organization_name: str | None,
|
||||||
|
top_locations: list[str] | None,
|
||||||
|
snapshot_count: int = 0,
|
||||||
|
) -> dict:
|
||||||
|
"""Einheitlicher Metadaten-Dict fuer PDF (HTML-Meta-Tags) und DOCX (core_properties)."""
|
||||||
|
is_research = incident.get("type") == "research"
|
||||||
|
type_label = "Hintergrundrecherche" if is_research else "Live-Monitoring"
|
||||||
|
category = "OSINT-Hintergrundrecherche" if is_research else "OSINT-Lagebericht"
|
||||||
|
scope_label = _slug_scope_label(scope, sections)
|
||||||
|
|
||||||
|
title_raw = (incident.get("title") or "Unbenannte Lage").strip()
|
||||||
|
title = f"{title_raw} — {type_label}"
|
||||||
|
|
||||||
|
subject = (incident.get("description") or "").strip()
|
||||||
|
if not subject:
|
||||||
|
subject = f"{type_label} zu: {title_raw}"
|
||||||
|
|
||||||
|
# Keywords sammeln (Reihenfolge relevant für Anzeige, Dedup mit dict.fromkeys)
|
||||||
|
keywords: list[str] = ["OSINT", type_label]
|
||||||
|
if organization_name:
|
||||||
|
keywords.append(organization_name)
|
||||||
|
|
||||||
|
# category_labels: kann JSON-Dict (Karte primary/secondary/...), JSON-Liste
|
||||||
|
# oder ein Komma-getrennter String sein. Nur die Label-Werte extrahieren.
|
||||||
|
cat_labels_raw = (incident.get("category_labels") or "").strip()
|
||||||
|
if cat_labels_raw:
|
||||||
|
cat_values: list[str] = []
|
||||||
|
try:
|
||||||
|
parsed = json.loads(cat_labels_raw)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
cat_values = [str(v).strip() for v in parsed.values() if isinstance(v, str) and v.strip()]
|
||||||
|
elif isinstance(parsed, list):
|
||||||
|
cat_values = [str(v).strip() for v in parsed if isinstance(v, str) and v.strip()]
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
cat_values = [lbl.strip() for lbl in cat_labels_raw.split(",") if lbl.strip()]
|
||||||
|
# Keine JSON-Fragmente (geschweifte/eckige Klammern) als Keyword zulassen
|
||||||
|
for lbl in cat_values:
|
||||||
|
if lbl and not any(c in lbl for c in "{}[]"):
|
||||||
|
keywords.append(lbl)
|
||||||
|
|
||||||
|
if top_locations:
|
||||||
|
keywords.extend([loc for loc in top_locations if loc])
|
||||||
|
|
||||||
|
# Sanitize: Zeilenumbrueche/Tabs weg, Sonderzeichen mit PDF-Sonderbedeutung filtern
|
||||||
|
def _sanitize_keyword(kw: str) -> str:
|
||||||
|
if not kw:
|
||||||
|
return ""
|
||||||
|
# Whitespace normalisieren
|
||||||
|
cleaned = re.sub(r"\s+", " ", kw).strip()
|
||||||
|
# PDF-Dict/Array-Klammern und Backslash raus (WeasyPrint escaped () bei Strings,
|
||||||
|
# { und [ koennen aber den Keywords-Stream abschneiden)
|
||||||
|
cleaned = re.sub(r"[{}\[\]\\]", "", cleaned)
|
||||||
|
return cleaned.strip(" ,;:")
|
||||||
|
|
||||||
|
# Dedup (case-insensitive) mit Reihenfolge erhalten, max 15
|
||||||
|
seen = set()
|
||||||
|
unique_keywords: list[str] = []
|
||||||
|
for kw in keywords:
|
||||||
|
clean_kw = _sanitize_keyword(kw)
|
||||||
|
if not clean_kw:
|
||||||
|
continue
|
||||||
|
key = clean_kw.lower()
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_keywords.append(clean_kw)
|
||||||
|
if len(unique_keywords) >= 15:
|
||||||
|
break
|
||||||
|
|
||||||
|
now = datetime.now(TIMEZONE)
|
||||||
|
created = _parse_db_timestamp(incident.get("created_at")) or now.replace(tzinfo=None)
|
||||||
|
modified = _parse_db_timestamp(incident.get("updated_at")) or created
|
||||||
|
|
||||||
|
# Strukturierter Comments-Block (wird in DOCX angezeigt, kompakt)
|
||||||
|
stand = now.strftime("%d.%m.%Y")
|
||||||
|
comments_lines = [
|
||||||
|
f"Incident-ID: {incident.get('id', '?')} | Typ: {incident.get('type', 'adhoc')} | Scope: {scope_label}",
|
||||||
|
f"Stand: {stand}",
|
||||||
|
]
|
||||||
|
if organization_name:
|
||||||
|
comments_lines.append(f"Organisation: {organization_name}")
|
||||||
|
comments_lines.append(
|
||||||
|
f"Umfang: {len(articles)} Artikel, {len(fact_checks)} Faktenchecks, {len(sources)} Quellen"
|
||||||
|
)
|
||||||
|
if top_locations:
|
||||||
|
comments_lines.append("Orte: " + ", ".join(top_locations[:5]))
|
||||||
|
comments = "\n".join(comments_lines)
|
||||||
|
|
||||||
|
publisher = organization_name or "AegisSight"
|
||||||
|
identifier = f"urn:aegissight:incident:{incident.get('id', '0')}:{now.strftime('%Y%m%dT%H%M%S')}"
|
||||||
|
rights = (
|
||||||
|
"Vertrauliche Lageanalyse — AegisSight Monitor. "
|
||||||
|
"Weitergabe nur an autorisierte Empfänger."
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"author": creator or "AegisSight Monitor",
|
||||||
|
"subject": subject,
|
||||||
|
"keywords": unique_keywords,
|
||||||
|
"keywords_comma": ", ".join(unique_keywords),
|
||||||
|
"keywords_semicolon": "; ".join(unique_keywords),
|
||||||
|
"category": category,
|
||||||
|
"comments": comments,
|
||||||
|
"creator_app": "AegisSight Monitor",
|
||||||
|
"language": "de-DE",
|
||||||
|
"created": created,
|
||||||
|
"modified": modified,
|
||||||
|
"created_iso": created.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||||
|
"modified_iso": modified.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||||
|
"type_label": type_label,
|
||||||
|
"scope_label": scope_label,
|
||||||
|
"publisher": publisher,
|
||||||
|
"identifier": identifier,
|
||||||
|
"rights": rights,
|
||||||
|
"doc_type": "Report",
|
||||||
|
"version_id": str(max(1, snapshot_count)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pdf_date(dt: datetime) -> str:
|
||||||
|
"""PDF-Datumsformat: D:YYYYMMDDHHmmSS+HH'mm' (mit Zeitzone) oder Z (UTC)."""
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
# Naive dt — als lokale TIMEZONE interpretieren
|
||||||
|
dt = dt.replace(tzinfo=TIMEZONE)
|
||||||
|
base = dt.strftime("D:%Y%m%d%H%M%S")
|
||||||
|
offset = dt.utcoffset()
|
||||||
|
if offset is None:
|
||||||
|
return base + "Z"
|
||||||
|
total_minutes = int(offset.total_seconds() // 60)
|
||||||
|
sign = "+" if total_minutes >= 0 else "-"
|
||||||
|
total_minutes = abs(total_minutes)
|
||||||
|
return f"{base}{sign}{total_minutes // 60:02d}'{total_minutes % 60:02d}'"
|
||||||
|
|
||||||
|
|
||||||
|
def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
|
||||||
|
"""PDF-Ausgabe um XMP-Metadaten und CreationDate/ModDate erweitern (post-process via pikepdf)."""
|
||||||
|
try:
|
||||||
|
buf_in = io.BytesIO(pdf_bytes)
|
||||||
|
with pikepdf.Pdf.open(buf_in) as pdf:
|
||||||
|
created: datetime = meta.get("created")
|
||||||
|
modified: datetime = meta.get("modified")
|
||||||
|
if created and created.tzinfo is None:
|
||||||
|
created = created.replace(tzinfo=TIMEZONE)
|
||||||
|
if modified and modified.tzinfo is None:
|
||||||
|
modified = modified.replace(tzinfo=TIMEZONE)
|
||||||
|
|
||||||
|
# Klassisches Info-Dict: CreationDate + ModDate nachziehen
|
||||||
|
if created:
|
||||||
|
pdf.docinfo["/CreationDate"] = pikepdf.String(_format_pdf_date(created))
|
||||||
|
if modified:
|
||||||
|
pdf.docinfo["/ModDate"] = pikepdf.String(_format_pdf_date(modified))
|
||||||
|
|
||||||
|
# Document-/Instance-ID fuer DMS-Versionierung (frisch pro Export)
|
||||||
|
doc_uuid = f"uuid:{uuid.uuid4()}"
|
||||||
|
instance_uuid = f"uuid:{uuid.uuid4()}"
|
||||||
|
|
||||||
|
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF + xmpRights + xmpMM)
|
||||||
|
with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp:
|
||||||
|
# Dublin Core
|
||||||
|
xmp["dc:title"] = meta.get("title", "")
|
||||||
|
xmp["dc:creator"] = [meta.get("author", "")]
|
||||||
|
xmp["dc:description"] = meta.get("subject", "")
|
||||||
|
if meta.get("keywords"):
|
||||||
|
xmp["dc:subject"] = list(meta["keywords"])
|
||||||
|
xmp["dc:language"] = [meta.get("language", "de-DE")]
|
||||||
|
xmp["dc:publisher"] = [meta.get("publisher", "AegisSight")]
|
||||||
|
xmp["dc:identifier"] = meta.get("identifier", "")
|
||||||
|
xmp["dc:format"] = "application/pdf"
|
||||||
|
xmp["dc:type"] = [meta.get("doc_type", "Report")]
|
||||||
|
xmp["dc:rights"] = meta.get("rights", "")
|
||||||
|
if created:
|
||||||
|
xmp["dc:date"] = [created.strftime("%Y-%m-%dT%H:%M:%S%z")]
|
||||||
|
|
||||||
|
# PDF Namespace
|
||||||
|
xmp["pdf:Keywords"] = meta.get("keywords_comma", "")
|
||||||
|
xmp["pdf:Producer"] = "WeasyPrint + AegisSight Monitor"
|
||||||
|
|
||||||
|
# XMP Namespace
|
||||||
|
xmp["xmp:CreatorTool"] = meta.get("creator_app", "AegisSight Monitor")
|
||||||
|
if created:
|
||||||
|
xmp["xmp:CreateDate"] = created.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
if modified:
|
||||||
|
xmp["xmp:ModifyDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
xmp["xmp:MetadataDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
|
||||||
|
# xmpRights: Rechte- und Vertraulichkeitshinweis (XMP erwartet String "True")
|
||||||
|
xmp["xmpRights:Marked"] = "True"
|
||||||
|
if meta.get("rights"):
|
||||||
|
# String: pikepdf wrapped das automatisch als LangAlt mit x-default
|
||||||
|
xmp["xmpRights:UsageTerms"] = meta["rights"]
|
||||||
|
|
||||||
|
# xmpMM: Document- und Instance-ID fuer DMS-Versionierung
|
||||||
|
xmp["xmpMM:DocumentID"] = doc_uuid
|
||||||
|
xmp["xmpMM:InstanceID"] = instance_uuid
|
||||||
|
xmp["xmpMM:VersionID"] = meta.get("version_id", "1")
|
||||||
|
|
||||||
|
# xmpMM:History — Audit-Event fuer diesen Export (einzeiliger Eintrag je Seq-Item)
|
||||||
|
history_when = (modified or datetime.now(TIMEZONE)).strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
history_entry = (
|
||||||
|
f"action=published; when={history_when}; "
|
||||||
|
f"softwareAgent={meta.get('creator_app', 'AegisSight Monitor')}; "
|
||||||
|
f"instanceID={instance_uuid}; "
|
||||||
|
f"scope={meta.get('scope_label', '')}; "
|
||||||
|
f"version={meta.get('version_id', '1')}"
|
||||||
|
)
|
||||||
|
xmp["xmpMM:History"] = [history_entry]
|
||||||
|
|
||||||
|
buf_out = io.BytesIO()
|
||||||
|
pdf.save(buf_out)
|
||||||
|
return buf_out.getvalue()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"PDF-Metadaten-Anreicherung (XMP/Dates) fehlgeschlagen: {e}")
|
||||||
|
return pdf_bytes
|
||||||
|
|
||||||
|
|
||||||
async def generate_pdf(
|
async def generate_pdf(
|
||||||
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
||||||
scope: str, creator: str, executive_summary_html: str,
|
scope: str, creator: str, executive_summary_html: str,
|
||||||
sections: set[str] | None = None,
|
sections: set[str] | None = None,
|
||||||
|
organization_name: str | None = None,
|
||||||
|
top_locations: list[str] | None = None,
|
||||||
|
snapshot_count: int = 0,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""PDF-Report via WeasyPrint generieren."""
|
"""PDF-Report via WeasyPrint generieren."""
|
||||||
# Sections aus scope ableiten wenn nicht explizit angegeben
|
# Sections aus scope ableiten wenn nicht explizit angegeben
|
||||||
@@ -424,6 +688,11 @@ async def generate_pdf(
|
|||||||
if not is_research and zusammenfassung_html:
|
if not is_research and zusammenfassung_html:
|
||||||
zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)
|
zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)
|
||||||
|
|
||||||
|
meta = _build_export_metadata(
|
||||||
|
incident, articles, fact_checks, all_sources, creator, scope, sections,
|
||||||
|
organization_name, top_locations, snapshot_count=snapshot_count,
|
||||||
|
)
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
||||||
template = env.get_template("report.html")
|
template = env.get_template("report.html")
|
||||||
|
|
||||||
@@ -445,10 +714,11 @@ async def generate_pdf(
|
|||||||
),
|
),
|
||||||
lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "),
|
lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "),
|
||||||
sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident),
|
sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident),
|
||||||
fact_checks=_prepare_fact_checks(fact_checks[:20] if scope == "report" else fact_checks),
|
fact_checks=_prepare_fact_checks(fact_checks),
|
||||||
source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles),
|
source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles),
|
||||||
timeline=_prepare_timeline(articles) if scope == "full" else [],
|
timeline=_prepare_timeline(articles) if scope == "full" else [],
|
||||||
articles=articles if scope == "full" else [],
|
articles=articles if scope == "full" else [],
|
||||||
|
meta=meta,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Artikel pub_date aufbereiten
|
# Artikel pub_date aufbereiten
|
||||||
@@ -461,6 +731,7 @@ async def generate_pdf(
|
|||||||
art["pub_date"] = pub[:10] if pub else ""
|
art["pub_date"] = pub[:10] if pub else ""
|
||||||
|
|
||||||
pdf_bytes = HTML(string=html_content).write_pdf()
|
pdf_bytes = HTML(string=html_content).write_pdf()
|
||||||
|
pdf_bytes = _enrich_pdf_metadata(pdf_bytes, meta)
|
||||||
return pdf_bytes
|
return pdf_bytes
|
||||||
|
|
||||||
|
|
||||||
@@ -468,6 +739,9 @@ async def generate_docx(
|
|||||||
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
||||||
scope: str, creator: str, executive_summary_text: str,
|
scope: str, creator: str, executive_summary_text: str,
|
||||||
sections: set[str] | None = None,
|
sections: set[str] | None = None,
|
||||||
|
organization_name: str | None = None,
|
||||||
|
top_locations: list[str] | None = None,
|
||||||
|
snapshot_count: int = 0,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Word-Report via python-docx generieren."""
|
"""Word-Report via python-docx generieren."""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
@@ -485,7 +759,7 @@ async def generate_docx(
|
|||||||
is_research = incident.get("type") == "research"
|
is_research = incident.get("type") == "research"
|
||||||
all_sources = _prepare_sources(incident)
|
all_sources = _prepare_sources(incident)
|
||||||
zusammenfassung_text = executive_summary_text
|
zusammenfassung_text = executive_summary_text
|
||||||
bericht_summary = incident.get("summary") or "Keine Zusammenfassung verfuegbar."
|
bericht_summary = incident.get("summary") or "Keine Zusammenfassung verfügbar."
|
||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
zusammenfassung_lines: list[str] = []
|
zusammenfassung_lines: list[str] = []
|
||||||
|
|
||||||
@@ -496,6 +770,28 @@ async def generate_docx(
|
|||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
bericht_summary = remaining
|
bericht_summary = remaining
|
||||||
|
|
||||||
|
meta = _build_export_metadata(
|
||||||
|
incident, articles, fact_checks, all_sources, creator, scope, sections,
|
||||||
|
organization_name, top_locations, snapshot_count=snapshot_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Dateimetadaten setzen (sichtbar in Explorer/Finder, DMS-Systemen)
|
||||||
|
cp = doc.core_properties
|
||||||
|
cp.title = meta["title"]
|
||||||
|
cp.author = meta["author"]
|
||||||
|
cp.subject = meta["subject"]
|
||||||
|
cp.keywords = meta["keywords_semicolon"]
|
||||||
|
cp.comments = meta["comments"]
|
||||||
|
cp.category = meta["category"]
|
||||||
|
cp.last_modified_by = meta["author"]
|
||||||
|
cp.language = meta["language"]
|
||||||
|
cp.content_status = "Final"
|
||||||
|
try:
|
||||||
|
cp.created = meta["created"]
|
||||||
|
cp.modified = meta["modified"]
|
||||||
|
except (ValueError, TypeError) as e:
|
||||||
|
logger.warning(f"DOCX created/modified konnte nicht gesetzt werden: {e}")
|
||||||
|
|
||||||
# Styles
|
# Styles
|
||||||
style = doc.styles['Normal']
|
style = doc.styles['Normal']
|
||||||
style.font.size = Pt(10)
|
style.font.size = Pt(10)
|
||||||
|
|||||||
@@ -1,7 +1,19 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="de">
|
<html lang="{{ meta.language if meta else 'de-DE' }}">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
|
{% if meta %}
|
||||||
|
<title>{{ meta.title }}</title>
|
||||||
|
<meta name="author" content="{{ meta.author }}">
|
||||||
|
<meta name="description" content="{{ meta.subject }}">
|
||||||
|
<meta name="keywords" content="{{ meta.keywords_comma }}">
|
||||||
|
<meta name="subject" content="{{ meta.subject }}">
|
||||||
|
<meta name="generator" content="{{ meta.creator_app }}">
|
||||||
|
<meta name="dcterms.created" content="{{ meta.created_iso }}">
|
||||||
|
<meta name="dcterms.modified" content="{{ meta.modified_iso }}">
|
||||||
|
{% else %}
|
||||||
|
<title>{{ incident.title }}</title>
|
||||||
|
{% endif %}
|
||||||
<style>
|
<style>
|
||||||
@page { margin: 20mm 18mm 20mm 18mm; size: A4; @bottom-center { content: "Seite " counter(page) " von " counter(pages); font-size: 8pt; color: #0a1832; } }
|
@page { margin: 20mm 18mm 20mm 18mm; size: A4; @bottom-center { content: "Seite " counter(page) " von " counter(pages); font-size: 8pt; color: #0a1832; } }
|
||||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
|||||||
@@ -1,7 +1,13 @@
|
|||||||
"""Auth-Router: Magic-Link-Login und Nutzerverwaltung."""
|
"""Auth-Router: Magic-Link-Login und Nutzerverwaltung."""
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||||
|
|
||||||
|
|
||||||
|
def _staging_mode() -> bool:
|
||||||
|
"""STAGING_MODE Env-Flag (vgl. services.license_service)."""
|
||||||
|
return os.environ.get("STAGING_MODE", "").lower() in ("1", "true", "yes")
|
||||||
from models import (
|
from models import (
|
||||||
MagicLinkRequest,
|
MagicLinkRequest,
|
||||||
MagicLinkResponse,
|
MagicLinkResponse,
|
||||||
@@ -187,10 +193,11 @@ async def get_me(
|
|||||||
from services.license_service import check_license
|
from services.license_service import check_license
|
||||||
license_info = await check_license(db, current_user["tenant_id"])
|
license_info = await check_license(db, current_user["tenant_id"])
|
||||||
|
|
||||||
# Credits-Daten laden
|
# Credits-Daten laden (echte Prozente, nicht gekappt)
|
||||||
credits_total = None
|
credits_total = None
|
||||||
credits_remaining = None
|
credits_remaining = None
|
||||||
credits_percent_used = None
|
credits_percent_used = None
|
||||||
|
unlimited_budget = bool(license_info.get("unlimited_budget", False))
|
||||||
if current_user.get("tenant_id"):
|
if current_user.get("tenant_id"):
|
||||||
lic_cursor = await db.execute(
|
lic_cursor = await db.execute(
|
||||||
"SELECT credits_total, credits_used, cost_per_credit FROM licenses WHERE organization_id = ? AND status = 'active' ORDER BY id DESC LIMIT 1",
|
"SELECT credits_total, credits_used, cost_per_credit FROM licenses WHERE organization_id = ? AND status = 'active' ORDER BY id DESC LIMIT 1",
|
||||||
@@ -200,7 +207,12 @@ async def get_me(
|
|||||||
credits_total = lic_row["credits_total"]
|
credits_total = lic_row["credits_total"]
|
||||||
credits_used = lic_row["credits_used"] or 0
|
credits_used = lic_row["credits_used"] or 0
|
||||||
credits_remaining = max(0, int(credits_total - credits_used))
|
credits_remaining = max(0, int(credits_total - credits_used))
|
||||||
credits_percent_used = round(min(100, (credits_used / credits_total) * 100), 1) if credits_total > 0 else 0
|
credits_percent_used = round((credits_used / credits_total) * 100, 1) if credits_total > 0 else 0
|
||||||
|
|
||||||
|
# STAGING_MODE: Org-Switcher im Frontend deaktivieren
|
||||||
|
is_global_admin_response = current_user.get("is_global_admin", False)
|
||||||
|
if _staging_mode():
|
||||||
|
is_global_admin_response = False
|
||||||
|
|
||||||
return UserMeResponse(
|
return UserMeResponse(
|
||||||
id=current_user["id"],
|
id=current_user["id"],
|
||||||
@@ -216,7 +228,9 @@ async def get_me(
|
|||||||
license_status=license_info.get("status", "unknown"),
|
license_status=license_info.get("status", "unknown"),
|
||||||
license_type=license_info.get("license_type", ""),
|
license_type=license_info.get("license_type", ""),
|
||||||
read_only=license_info.get("read_only", False),
|
read_only=license_info.get("read_only", False),
|
||||||
is_global_admin=current_user.get("is_global_admin", False),
|
read_only_reason=license_info.get("read_only_reason"),
|
||||||
|
unlimited_budget=unlimited_budget,
|
||||||
|
is_global_admin=is_global_admin_response,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,11 @@ from pydantic import BaseModel, Field
|
|||||||
|
|
||||||
from auth import get_current_user
|
from auth import get_current_user
|
||||||
from config import CLAUDE_PATH, CLAUDE_MODEL_FAST
|
from config import CLAUDE_PATH, CLAUDE_MODEL_FAST
|
||||||
|
from database import db_dependency
|
||||||
|
from middleware.license_check import require_writable_license
|
||||||
|
from services.license_service import charge_usage_to_tenant
|
||||||
|
from agents.claude_client import ClaudeUsage, ClaudeCliError, _classify_cli_error
|
||||||
|
import aiosqlite
|
||||||
|
|
||||||
logger = logging.getLogger("osint.chat")
|
logger = logging.getLogger("osint.chat")
|
||||||
|
|
||||||
@@ -21,8 +26,8 @@ router = APIRouter(tags=["chat"])
|
|||||||
# Claude CLI Aufruf (Chat-spezifisch, kein JSON-Modus)
|
# Claude CLI Aufruf (Chat-spezifisch, kein JSON-Modus)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
async def _call_claude_chat(prompt: str) -> tuple[str, int]:
|
async def _call_claude_chat(prompt: str) -> tuple[str, int, ClaudeUsage]:
|
||||||
"""Ruft Claude CLI fuer Chat auf. Gibt (text, duration_ms) zurueck.
|
"""Ruft Claude CLI fuer Chat auf. Gibt (text, duration_ms, usage) zurueck.
|
||||||
|
|
||||||
Anders als call_claude(): kein JSON-Output-Modus, kein append-system-prompt.
|
Anders als call_claude(): kein JSON-Output-Modus, kein append-system-prompt.
|
||||||
"""
|
"""
|
||||||
@@ -46,7 +51,7 @@ async def _call_claude_chat(prompt: str) -> tuple[str, int]:
|
|||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
stdout, stderr = await asyncio.wait_for(
|
stdout, stderr = await asyncio.wait_for(
|
||||||
process.communicate(input=prompt.encode("utf-8")), timeout=60
|
process.communicate(input=prompt.encode("utf-8")), timeout=120
|
||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
process.kill()
|
process.kill()
|
||||||
@@ -54,29 +59,44 @@ async def _call_claude_chat(prompt: str) -> tuple[str, int]:
|
|||||||
|
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
err_msg = stderr.decode("utf-8", errors="replace").strip()
|
err_msg = stderr.decode("utf-8", errors="replace").strip()
|
||||||
logger.error(f"Chat Claude CLI Fehler (rc={process.returncode}): {err_msg[:500]}")
|
stdout_msg = stdout.decode("utf-8", errors="replace").strip()
|
||||||
if "rate_limit" in err_msg.lower() or "overloaded" in err_msg.lower():
|
combined = f"{err_msg} {stdout_msg}"
|
||||||
raise RuntimeError("rate_limit")
|
error_type = _classify_cli_error(combined)
|
||||||
raise RuntimeError(f"Claude CLI Fehler: {err_msg[:200]}")
|
logger.error(f"Chat Claude CLI Fehler [{error_type}] (rc={process.returncode}): {(stdout_msg or err_msg)[:500]}")
|
||||||
|
raise ClaudeCliError(error_type, stdout_msg or err_msg)
|
||||||
|
|
||||||
raw = stdout.decode("utf-8", errors="replace").strip()
|
raw = stdout.decode("utf-8", errors="replace").strip()
|
||||||
duration_ms = 0
|
duration_ms = 0
|
||||||
result_text = raw
|
result_text = raw
|
||||||
|
usage = ClaudeUsage()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = _json.loads(raw)
|
data = _json.loads(raw)
|
||||||
|
if data.get("is_error"):
|
||||||
|
error_text = str(data.get("result", ""))
|
||||||
|
error_type = _classify_cli_error(error_text)
|
||||||
|
logger.error(f"Chat Claude CLI Fehler [{error_type}] (is_error): {error_text[:500]}")
|
||||||
|
raise ClaudeCliError(error_type, error_text)
|
||||||
|
|
||||||
result_text = data.get("result", raw)
|
result_text = data.get("result", raw)
|
||||||
duration_ms = data.get("duration_ms", 0)
|
duration_ms = data.get("duration_ms", 0)
|
||||||
cost = data.get("total_cost_usd", 0.0)
|
|
||||||
u = data.get("usage", {})
|
u = data.get("usage", {})
|
||||||
|
usage = ClaudeUsage(
|
||||||
|
input_tokens=u.get("input_tokens", 0),
|
||||||
|
output_tokens=u.get("output_tokens", 0),
|
||||||
|
cache_creation_tokens=u.get("cache_creation_input_tokens", 0),
|
||||||
|
cache_read_tokens=u.get("cache_read_input_tokens", 0),
|
||||||
|
cost_usd=data.get("total_cost_usd", 0.0),
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Chat Claude: {u.get('input_tokens', 0)} in / {u.get('output_tokens', 0)} out / "
|
f"Chat Claude: {usage.input_tokens} in / {usage.output_tokens} out / "
|
||||||
f"${cost:.4f} / {duration_ms}ms"
|
f"${usage.cost_usd:.4f} / {duration_ms}ms"
|
||||||
)
|
)
|
||||||
except _json.JSONDecodeError:
|
except _json.JSONDecodeError:
|
||||||
logger.warning("Chat Claude CLI Antwort kein JSON, nutze raw output")
|
logger.warning("Chat Claude CLI Antwort kein JSON, nutze raw output")
|
||||||
|
|
||||||
return result_text, duration_ms
|
return result_text, duration_ms, usage
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Models
|
# Models
|
||||||
@@ -298,7 +318,7 @@ Typische Fragen die du beantworten kannst:
|
|||||||
FEATURE-DOKUMENTATION:
|
FEATURE-DOKUMENTATION:
|
||||||
|
|
||||||
Lage/Recherche erstellen:
|
Lage/Recherche erstellen:
|
||||||
Oben im Dashboard gibt es den Button "Neue Lage". Dort waehlt der Nutzer unter "Art der Lage" zwischen zwei Typen. "Live-Monitoring, Ereignis beobachten" durchsucht laufend hunderte Nachrichtenquellen nach neuen Meldungen zu einem aktuellen Ereignis, hier reicht eine kurze, praegnante Beschreibung. Empfohlen ist die automatische Aktualisierung. "Recherche, Thema analysieren" ist fuer tiefergehende Analysen gedacht, hier sollte eine ausfuehrlichere Beschreibung mit Kontext, Zeitraum und Fokus eingegeben werden, das System nutzt dann KI-gestuetzte Quellenauswahl und eine breitere Suche. Empfohlen ist manuelles Starten und bei Bedarf vertiefen. Bei beiden Typen gibt der Nutzer Titel und Beschreibung ein und klickt "Lage anlegen". Der erste Refresh startet automatisch und sammelt passende Artikel. In der Sidebar werden Live-Monitoring Lagen unter "Live-Monitoring" und Recherchen unter "Recherchen" gruppiert angezeigt.
|
Oben im Dashboard gibt es den Button "Neue Lage". Dort waehlt der Nutzer unter "Art der Lage" zwischen zwei Typen. "Live-Monitoring, Ereignis beobachten" eignet sich fuer aktuelle Ereignisse, die der Nutzer laufend verfolgen moechte, hier reicht eine kurze, praegnante Beschreibung. Empfohlen ist die automatische Aktualisierung. "Recherche, Thema analysieren" ist fuer tiefergehende Analysen gedacht, hier sollte eine ausfuehrlichere Beschreibung mit Kontext, Zeitraum und Fokus eingegeben werden. Empfohlen ist manuelles Starten und bei Bedarf vertiefen. Bei beiden Typen gibt der Nutzer Titel und Beschreibung ein und klickt "Lage anlegen". Nach dem Anlegen startet die erste Aktualisierung automatisch. In der Sidebar werden Live-Monitoring Lagen unter "Live-Monitoring" und Recherchen unter "Recherchen" gruppiert angezeigt.
|
||||||
|
|
||||||
Wichtiger Unterschied bei Kacheln: Bei Live-Monitoring heisst die Zusammenfassungs-Kachel "Lagebild", bei Recherche-Lagen heisst sie "Recherchebericht". Auch im PDF-Export, in den Layout-Toggles und bei E-Mail-Benachrichtigungen passt sich die Bezeichnung entsprechend an.
|
Wichtiger Unterschied bei Kacheln: Bei Live-Monitoring heisst die Zusammenfassungs-Kachel "Lagebild", bei Recherche-Lagen heisst sie "Recherchebericht". Auch im PDF-Export, in den Layout-Toggles und bei E-Mail-Benachrichtigungen passt sich die Bezeichnung entsprechend an.
|
||||||
|
|
||||||
@@ -308,17 +328,17 @@ Je praeziser die Beschreibung, desto relevantere Ergebnisse liefert das System.
|
|||||||
Quellen:
|
Quellen:
|
||||||
Quellen werden automatisch vom System verwaltet. Es gibt verschiedene Kategorien: oeffentlich-rechtlich, Qualitaetszeitung, Nachrichtenagentur, international, Behoerde, Telegram und sonstige. Unter den Quellen-Einstellungen koennen bestimmte Domains blockiert werden, damit deren Artikel nicht mehr in Lagen erscheinen. Das System schlaegt auch automatisch neue relevante Quellen vor basierend auf den Themen der Lagen. Die Quellenansicht zeigt fuer jede Quelle Name, Kategorie, Typ, Artikelanzahl und wann zuletzt Artikel gefunden wurden.
|
Quellen werden automatisch vom System verwaltet. Es gibt verschiedene Kategorien: oeffentlich-rechtlich, Qualitaetszeitung, Nachrichtenagentur, international, Behoerde, Telegram und sonstige. Unter den Quellen-Einstellungen koennen bestimmte Domains blockiert werden, damit deren Artikel nicht mehr in Lagen erscheinen. Das System schlaegt auch automatisch neue relevante Quellen vor basierend auf den Themen der Lagen. Die Quellenansicht zeigt fuer jede Quelle Name, Kategorie, Typ, Artikelanzahl und wann zuletzt Artikel gefunden wurden.
|
||||||
|
|
||||||
Refresh-Modi:
|
Aktualisierungs-Modi:
|
||||||
Jede Lage hat einen Refresh-Modus. "Manuell" bedeutet, der Nutzer klickt selbst auf "Aktualisieren" wenn er neue Artikel suchen moechte. "Automatisch" laesst das System in einem einstellbaren Intervall automatisch nach neuen Artikeln suchen. Das Intervall ist pro Lage einstellbar, z.B. alle 15, 30, 60 oder 180 Minuten. Bei einem Refresh durchsucht das System alle konfigurierten Quellen nach neuen relevanten Artikeln, erstellt oder aktualisiert die Zusammenfassung und fuehrt Faktenchecks durch.
|
Jede Lage hat einen Aktualisierungs-Modus. "Manuell" bedeutet, der Nutzer klickt selbst auf "Aktualisieren" wenn er neue Artikel suchen moechte. "Automatisch" laesst die Lage in einem selbst gewaehlten Intervall turnusmaessig nach neuen Artikeln suchen. Das Intervall kann in Minuten, Stunden, Tagen oder Wochen angegeben werden, mindestens 10 Minuten. Im Automatik-Modus laesst sich ausserdem eine Uhrzeit fuer die erste Aktualisierung festlegen, danach laeuft es im gewaehlten Takt weiter. Bei jeder Aktualisierung kommen neue Artikel hinzu, die Zusammenfassung wird aktualisiert und die Faktenchecks werden neu bewertet.
|
||||||
|
|
||||||
Faktenchecks:
|
Faktenchecks:
|
||||||
Das System prueft automatisch Behauptungen aus den gesammelten Artikeln. Es gibt vier Status: "Bestaetigt" bedeutet mehrere unabhaengige Quellen bestaetigen die Information. "Umstritten" heisst Quellen widersprechen sich und die Faktenlage ist unklar. "Widerlegt" bedeutet die Information wurde durch zuverlaessige Quellen widerlegt. "In Entwicklung" zeigt an dass noch nicht genug Informationen fuer eine Einschaetzung vorliegen. Die Faktenchecks werden bei jedem Refresh automatisch aktualisiert und koennen sich im Laufe der Zeit aendern wenn neue Evidenz hinzukommt.
|
In der Faktencheck-Kachel werden zentrale Behauptungen aus den Artikeln mit einem Status markiert. Es gibt fuenf Status: "Bestaetigt" (gruenes Haekchen) heisst, mindestens zwei unabhaengige, serioese Quellen stuetzen die Aussage uebereinstimmend. "Gesichert" (gruenes Haekchen) bedeutet, drei oder mehr unabhaengige Quellen belegen den Sachverhalt, hohe Verlaesslichkeit. "Unbestaetigt" (Fragezeichen) zeigt an, dass die Aussage bisher nur aus einer Quelle stammt und eine unabhaengige Bestaetigung aussteht. "Umstritten" (Warndreieck) bedeutet, Quellen widersprechen sich, es gibt sowohl stuetzende als auch widersprechende Belege. "Widerlegt" (rotes Kreuz) heisst, zuverlaessige Quellen widersprechen der Aussage und sie ist wahrscheinlich falsch. Der Status kann sich bei spaeteren Aktualisierungen aendern, wenn neue Belege hinzukommen.
|
||||||
|
|
||||||
Benachrichtigungen und Abos:
|
Benachrichtigungen und Abos:
|
||||||
Lagen koennen ueber das Glocken-Symbol abonniert werden. Es gibt verschiedene E-Mail-Benachrichtigungstypen: Zusammenfassung nach einem Refresh, Benachrichtigung bei neuen Artikeln und Benachrichtigung bei Statusaenderungen von Faktenchecks. Im Dashboard erscheinen neue Benachrichtigungen als Badge am Glocken-Symbol. Welche Benachrichtigungstypen gewuenscht sind, laesst sich pro Lage einzeln einstellen.
|
Lagen koennen ueber das Glocken-Symbol abonniert werden. Beim Anlegen oder Bearbeiten einer Lage koennen drei E-Mail-Benachrichtigungen einzeln aktiviert werden: "Neues Lagebild" (bzw. Recherchebericht) informiert nach einer Aktualisierung ueber die neue Zusammenfassung, "Neue Artikel" meldet gefundene Artikel und "Statusaenderung Faktencheck" meldet, wenn sich der Status einer geprueften Aussage aendert. Im Dashboard erscheinen neue Benachrichtigungen zusaetzlich als Badge am Glocken-Symbol.
|
||||||
|
|
||||||
Export:
|
Export:
|
||||||
Im Lage-Detail gibt es einen Export-Button. Der Markdown-Export erzeugt einen vollstaendigen Lagebericht als .md-Datei mit Zusammenfassung, Artikeln und Faktenchecks. Der JSON-Export liefert strukturierte Daten zur Weiterverarbeitung in anderen Systemen.
|
Im Lage-Detail gibt es einen Export-Button. Der Nutzer waehlt im Export-Dialog zunaechst aus, welche Bereiche enthalten sein sollen: "Zusammenfassung", "Recherchebericht / Lagebild", "Faktencheck" und "Quellen". Als Format stehen "PDF" und "Word (DOCX)" zur Verfuegung. Mit "Exportieren" wird die Datei erzeugt und heruntergeladen.
|
||||||
|
|
||||||
Sichtbarkeit:
|
Sichtbarkeit:
|
||||||
Jede Lage kann "oeffentlich" oder "privat" sein. Oeffentliche Lagen sind fuer alle Nutzer der Organisation sichtbar. Private Lagen kann nur der Ersteller sehen und bearbeiten. Die Sichtbarkeit laesst sich ueber das Einstellungs-Menue der jeweiligen Lage aendern.
|
Jede Lage kann "oeffentlich" oder "privat" sein. Oeffentliche Lagen sind fuer alle Nutzer der Organisation sichtbar. Private Lagen kann nur der Ersteller sehen und bearbeiten. Die Sichtbarkeit laesst sich ueber das Einstellungs-Menue der jeweiligen Lage aendern.
|
||||||
@@ -326,8 +346,8 @@ Jede Lage kann "oeffentlich" oder "privat" sein. Oeffentliche Lagen sind fuer al
|
|||||||
Retention (Aufbewahrung):
|
Retention (Aufbewahrung):
|
||||||
Standardmaessig werden Lagen unbegrenzt aufbewahrt. Es kann aber eine Aufbewahrungsdauer in Tagen eingestellt werden. Nach Ablauf wird die Lage automatisch archiviert. Archivierte Lagen bleiben lesbar, werden aber nicht mehr automatisch aktualisiert.
|
Standardmaessig werden Lagen unbegrenzt aufbewahrt. Es kann aber eine Aufbewahrungsdauer in Tagen eingestellt werden. Nach Ablauf wird die Lage automatisch archiviert. Archivierte Lagen bleiben lesbar, werden aber nicht mehr automatisch aktualisiert.
|
||||||
|
|
||||||
Kartenansicht (Geoparsing):
|
Kartenansicht:
|
||||||
Artikel werden automatisch auf geografische Erwahnungen analysiert. Erkannte Orte erscheinen auf einer interaktiven Karte mit farbigen Markern. Die Farben zeigen die Relevanz: Rot fuer Hauptgeschehen, Orange fuer Reaktionen, Blau fuer Beteiligte und Grau fuer erwaehnte Orte. Bei vielen Markern werden diese zu Clustern zusammengefasst. Ein Klick auf einen Marker zeigt die zugehoerigen Artikel. Die Karte hat einen Vollbildmodus und die Kategorien lassen sich ueber Checkboxen in der Legende ein- und ausblenden.
|
In der Karten-Kachel erscheinen alle zur Lage erkannten Orte als farbige Marker. Die Farben zeigen die Relevanz: Rot fuer Hauptgeschehen, Orange fuer Reaktionen, Blau fuer Beteiligte und Grau fuer erwaehnte Orte. Bei vielen Markern werden diese zu Clustern zusammengefasst, ein Klick auf einen Marker oeffnet die zugehoerigen Artikel. Ueber das Vollbild-Symbol laesst sich die Karte grossformatig anzeigen, die Kategorien koennen ueber Checkboxen in der Legende ein- und ausgeblendet werden.
|
||||||
|
|
||||||
Quellenausschluss:
|
Quellenausschluss:
|
||||||
Bestimmte Domains koennen ueber die Quellen-Einstellungen blockiert werden. Blockierte Quellen tauchen dann in keiner Lage mehr auf. So lassen sich unerwuenschte oder unzuverlaessige Quellen dauerhaft ausschliessen.
|
Bestimmte Domains koennen ueber die Quellen-Einstellungen blockiert werden. Blockierte Quellen tauchen dann in keiner Lage mehr auf. So lassen sich unerwuenschte oder unzuverlaessige Quellen dauerhaft ausschliessen.
|
||||||
@@ -395,7 +415,8 @@ def _build_prompt(user_message: str, history: list[dict]) -> str:
|
|||||||
@router.post("", response_model=ChatResponse)
|
@router.post("", response_model=ChatResponse)
|
||||||
async def chat(
|
async def chat(
|
||||||
req: ChatRequest,
|
req: ChatRequest,
|
||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(require_writable_license),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Chat-Nachricht verarbeiten und Antwort generieren."""
|
"""Chat-Nachricht verarbeiten und Antwort generieren."""
|
||||||
user_id = current_user["id"]
|
user_id = current_user["id"]
|
||||||
@@ -420,15 +441,23 @@ async def chat(
|
|||||||
|
|
||||||
# Claude CLI aufrufen
|
# Claude CLI aufrufen
|
||||||
try:
|
try:
|
||||||
result, duration_ms = await _call_claude_chat(prompt)
|
result, duration_ms, usage = await _call_claude_chat(prompt)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
raise HTTPException(status_code=504, detail="Der Assistent antwortet gerade nicht. Bitte versuche es erneut.")
|
raise HTTPException(status_code=504, detail="Der Assistent antwortet gerade nicht. Bitte versuche es erneut.")
|
||||||
except RuntimeError as e:
|
except ClaudeCliError as e:
|
||||||
error_str = str(e)
|
if e.error_type == "rate_limit":
|
||||||
if "rate_limit" in error_str:
|
|
||||||
raise HTTPException(status_code=429, detail="Der Assistent ist gerade ausgelastet. Bitte versuche es in einer Minute erneut.")
|
raise HTTPException(status_code=429, detail="Der Assistent ist gerade ausgelastet. Bitte versuche es in einer Minute erneut.")
|
||||||
logger.error(f"Chat Claude-Fehler: {e}")
|
if e.error_type == "auth_error":
|
||||||
|
raise HTTPException(status_code=503, detail="KI-Zugang aktuell nicht verfuegbar. Bitte Administrator kontaktieren.")
|
||||||
|
logger.error(f"Chat Claude-Fehler [{e.error_type}]: {e}")
|
||||||
raise HTTPException(status_code=502, detail="Der Assistent ist voruebergehend nicht erreichbar.")
|
raise HTTPException(status_code=502, detail="Der Assistent ist voruebergehend nicht erreichbar.")
|
||||||
|
except RuntimeError as e:
|
||||||
|
logger.error(f"Chat Claude-Fehler (unspezifisch): {e}")
|
||||||
|
raise HTTPException(status_code=502, detail="Der Assistent ist voruebergehend nicht erreichbar.")
|
||||||
|
|
||||||
|
# Credits buchen
|
||||||
|
await charge_usage_to_tenant(db, current_user.get("tenant_id"), usage, source="chat")
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
# Output sanitieren
|
# Output sanitieren
|
||||||
reply = _sanitize_output(result)
|
reply = _sanitize_output(result)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""Incidents-Router: Lagen verwalten (Multi-Tenant)."""
|
"""Incidents-Router: Lagen verwalten (Multi-Tenant)."""
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, status
|
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, status
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from models import IncidentCreate, IncidentUpdate, IncidentResponse, SubscriptionUpdate, SubscriptionResponse, DescriptionEnhanceRequest
|
from models import IncidentCreate, IncidentUpdate, IncidentResponse, IncidentListItem, SubscriptionUpdate, SubscriptionResponse, DescriptionEnhanceRequest
|
||||||
from auth import get_current_user
|
from auth import get_current_user
|
||||||
from middleware.license_check import require_writable_license
|
from middleware.license_check import require_writable_license
|
||||||
from database import db_dependency, get_db
|
from database import db_dependency, get_db
|
||||||
@@ -69,17 +69,30 @@ async def _enrich_incident(db: aiosqlite.Connection, row: aiosqlite.Row) -> dict
|
|||||||
return incident
|
return incident
|
||||||
|
|
||||||
|
|
||||||
@router.get("", response_model=list[IncidentResponse])
|
@router.get("", response_model=list[IncidentListItem])
|
||||||
async def list_incidents(
|
async def list_incidents(
|
||||||
status_filter: str = None,
|
status_filter: str = None,
|
||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Alle Lagen des Tenants auflisten (oeffentliche + eigene private)."""
|
"""Alle Lagen des Tenants auflisten (oeffentliche + eigene private).
|
||||||
|
|
||||||
|
Liefert schlanke Sidebar-Items — ohne summary, description, sources_json.
|
||||||
|
Volltexte kommen erst beim Oeffnen der Lage per GET /incidents/{id}.
|
||||||
|
"""
|
||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
user_id = current_user["id"]
|
user_id = current_user["id"]
|
||||||
|
|
||||||
query = "SELECT * FROM incidents WHERE tenant_id = ? AND (visibility = 'public' OR created_by = ?)"
|
# Nur die fuer Sidebar + Edit-Dialog noetigen Spalten selektieren
|
||||||
|
# (spart bei Iran: 324 KB sources_json + 32 KB summary).
|
||||||
|
# has_summary als Bit — Frontend nutzt es zur Erkennung "erster Refresh".
|
||||||
|
query = (
|
||||||
|
"SELECT id, title, description, type, status, refresh_mode, refresh_interval, "
|
||||||
|
"refresh_start_time, retention_days, visibility, "
|
||||||
|
"international_sources, include_telegram, created_by, created_at, updated_at, "
|
||||||
|
"CASE WHEN summary IS NOT NULL AND summary != '' THEN 1 ELSE 0 END AS has_summary "
|
||||||
|
"FROM incidents WHERE tenant_id = ? AND (visibility = 'public' OR created_by = ?)"
|
||||||
|
)
|
||||||
params = [tenant_id, user_id]
|
params = [tenant_id, user_id]
|
||||||
|
|
||||||
if status_filter:
|
if status_filter:
|
||||||
@@ -155,43 +168,60 @@ async def get_refreshing_incidents(
|
|||||||
from agents.orchestrator import orchestrator
|
from agents.orchestrator import orchestrator
|
||||||
queued_ids = list(orchestrator._queued_ids) if hasattr(orchestrator, '_queued_ids') else []
|
queued_ids = list(orchestrator._queued_ids) if hasattr(orchestrator, '_queued_ids') else []
|
||||||
current_task = orchestrator._current_task if hasattr(orchestrator, '_current_task') else None
|
current_task = orchestrator._current_task if hasattr(orchestrator, '_current_task') else None
|
||||||
|
# Session-Start des aktuell laufenden Tasks — stabil ueber Multi-Pass/Retry hinweg.
|
||||||
|
# Verhindert, dass der Frontend-Timer beim Reload auf den letzten Log-Eintrag
|
||||||
|
# (pass 2/3 oder retry n) zurueckspringt.
|
||||||
|
current_started_at = (
|
||||||
|
orchestrator._current_task_started_at
|
||||||
|
if hasattr(orchestrator, '_current_task_started_at') else None
|
||||||
|
)
|
||||||
|
|
||||||
|
details = {}
|
||||||
|
for row in rows:
|
||||||
|
iid = row["incident_id"]
|
||||||
|
started_at = (
|
||||||
|
current_started_at
|
||||||
|
if (iid == current_task and current_started_at)
|
||||||
|
else row["started_at"]
|
||||||
|
)
|
||||||
|
details[str(iid)] = {"started_at": started_at}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"refreshing": [row["incident_id"] for row in rows],
|
"refreshing": [row["incident_id"] for row in rows],
|
||||||
"queued": queued_ids,
|
"queued": queued_ids,
|
||||||
"current": current_task,
|
"current": current_task,
|
||||||
"details": {str(row["incident_id"]): {"started_at": row["started_at"]} for row in rows},
|
"details": details,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# --- Beschreibung generieren (Prompt Enhancement) ---
|
# --- Beschreibung generieren (Prompt Enhancement) ---
|
||||||
|
|
||||||
ENHANCE_PROMPT_RESEARCH = """Du bist ein Recherche-Planer in einem OSINT-Lagemonitoring-System.
|
ENHANCE_PROMPT_RESEARCH = """Du bist ein Recherche-Planer in einem OSINT-Lagemonitoring-System.
|
||||||
Deine Aufgabe: Strukturiere ein Recherche-Briefing, das Analysten als Leitfaden fuer ihre Suche verwenden.
|
Deine Aufgabe: Strukturiere ein Recherche-Briefing, das Analysten als Leitfaden für ihre Suche verwenden.
|
||||||
Du behauptest KEINE Fakten und musst das Thema NICHT kennen oder verifizieren.
|
Du behauptest KEINE Fakten und musst das Thema NICHT kennen oder verifizieren.
|
||||||
Der Nutzer gibt das Thema vor -- du definierst Suchrichtungen, Schwerpunkte und Stichworte.
|
Der Nutzer gibt das Thema vor -- du definierst Suchrichtungen, Schwerpunkte und Stichworte.
|
||||||
Erstelle das Briefing IMMER, auch wenn dir das Thema unbekannt ist.
|
Erstelle das Briefing IMMER, auch wenn dir das Thema unbekannt ist.
|
||||||
|
|
||||||
WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ae, oe, ue, ss) und KEINE Umschreibungen.
|
WICHTIG: Verwende IMMER echte Umlaute (ä, ö, ü, ß) und KEINE Umschreibungen.
|
||||||
|
|
||||||
Titel: {title}
|
Titel: {title}
|
||||||
Vorhandener Kontext: {context}
|
Vorhandener Kontext: {context}
|
||||||
Typ: Hintergrundrecherche
|
Typ: Hintergrundrecherche
|
||||||
|
|
||||||
Erstelle ein praezises Recherche-Briefing mit:
|
Erstelle ein präzises Recherche-Briefing mit:
|
||||||
1. Fallbezeichnung (vollstaendige Benennung des Themas basierend auf Titel und Kontext)
|
1. Fallbezeichnung (vollständige Benennung des Themas basierend auf Titel und Kontext)
|
||||||
2. Recherche-Schwerpunkte (5-8 thematische Punkte, z.B. Sachverhalt, beteiligte Parteien, rechtliche Aspekte, mediale Rezeption, Hintergruende, Chronologie)
|
2. Recherche-Schwerpunkte (5-8 thematische Punkte, z.B. Sachverhalt, beteiligte Parteien, rechtliche Aspekte, mediale Rezeption, Hintergründe, Chronologie)
|
||||||
3. Relevante Suchbegriffe (deutsch + englisch, inkl. Abkuerzungen und alternative Schreibweisen)
|
3. Relevante Suchbegriffe (deutsch + englisch, inkl. Abkürzungen und alternative Schreibweisen)
|
||||||
|
|
||||||
Schreibe NUR das Briefing als Fliesstext mit Aufzaehlungen. Keine Erklaerungen, Rueckfragen oder Disclaimer."""
|
Schreibe NUR das Briefing als Fließtext mit Aufzählungen. Keine Erklärungen, Rückfragen oder Disclaimer."""
|
||||||
|
|
||||||
ENHANCE_PROMPT_ADHOC = """Du bist ein Recherche-Planer in einem OSINT-Lagemonitoring-System.
|
ENHANCE_PROMPT_ADHOC = """Du bist ein Recherche-Planer in einem OSINT-Lagemonitoring-System.
|
||||||
Deine Aufgabe: Erstelle eine knappe Vorfallsbeschreibung, die als Suchauftrag fuer Live-Monitoring dient.
|
Deine Aufgabe: Erstelle eine knappe Vorfallsbeschreibung, die als Suchauftrag für Live-Monitoring dient.
|
||||||
Du behauptest KEINE Fakten und musst den Vorfall NICHT kennen oder verifizieren.
|
Du behauptest KEINE Fakten und musst den Vorfall NICHT kennen oder verifizieren.
|
||||||
Der Nutzer gibt das Thema vor -- du strukturierst, wonach gesucht werden soll.
|
Der Nutzer gibt das Thema vor -- du strukturierst, wonach gesucht werden soll.
|
||||||
Erstelle die Beschreibung IMMER, auch wenn dir der Vorfall unbekannt ist.
|
Erstelle die Beschreibung IMMER, auch wenn dir der Vorfall unbekannt ist.
|
||||||
|
|
||||||
WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ae, oe, ue, ss) und KEINE Umschreibungen.
|
WICHTIG: Verwende IMMER echte Umlaute (ä, ö, ü, ß) und KEINE Umschreibungen.
|
||||||
|
|
||||||
Titel: {title}
|
Titel: {title}
|
||||||
Vorhandener Kontext: {context}
|
Vorhandener Kontext: {context}
|
||||||
@@ -200,10 +230,10 @@ Typ: Live-Monitoring (aktuelle Ereignisse)
|
|||||||
Erstelle eine knappe, informative Beschreibung mit:
|
Erstelle eine knappe, informative Beschreibung mit:
|
||||||
1. Was ist passiert / worum geht es (basierend auf Titel und Kontext)
|
1. Was ist passiert / worum geht es (basierend auf Titel und Kontext)
|
||||||
2. Wo (geographischer Kontext, falls ableitbar)
|
2. Wo (geographischer Kontext, falls ableitbar)
|
||||||
3. Wer ist beteiligt (Akteure, Organisationen, Laender)
|
3. Wer ist beteiligt (Akteure, Organisationen, Länder)
|
||||||
4. Wonach soll gesucht werden (aktuelle Entwicklungen, Reaktionen, Hintergruende)
|
4. Wonach soll gesucht werden (aktuelle Entwicklungen, Reaktionen, Hintergründe)
|
||||||
|
|
||||||
Schreibe NUR die Beschreibung als Fliesstext (3-5 Zeilen). Keine Erklaerungen, Rueckfragen oder Disclaimer."""
|
Schreibe NUR die Beschreibung als Fließtext (3-5 Zeilen). Keine Erklärungen, Rückfragen oder Disclaimer."""
|
||||||
|
|
||||||
_enhance_logger = logging.getLogger("osint.enhance")
|
_enhance_logger = logging.getLogger("osint.enhance")
|
||||||
|
|
||||||
@@ -211,27 +241,44 @@ _enhance_logger = logging.getLogger("osint.enhance")
|
|||||||
@router.post("/enhance-description")
|
@router.post("/enhance-description")
|
||||||
async def enhance_description(
|
async def enhance_description(
|
||||||
data: DescriptionEnhanceRequest,
|
data: DescriptionEnhanceRequest,
|
||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(require_writable_license),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Generiert eine strukturierte Beschreibung per KI aus dem Titel."""
|
"""Generiert eine strukturierte Beschreibung per KI aus dem Titel."""
|
||||||
from agents.claude_client import call_claude
|
from agents.claude_client import call_claude, ClaudeCliError
|
||||||
from config import CLAUDE_MODEL_FAST
|
from config import CLAUDE_MODEL_FAST
|
||||||
|
from services.license_service import charge_usage_to_tenant
|
||||||
|
|
||||||
template = ENHANCE_PROMPT_RESEARCH if data.type == "research" else ENHANCE_PROMPT_ADHOC
|
template = ENHANCE_PROMPT_RESEARCH if data.type == "research" else ENHANCE_PROMPT_ADHOC
|
||||||
context = data.description.strip() if data.description and data.description.strip() else "Kein Kontext angegeben"
|
context = data.description.strip() if data.description and data.description.strip() else "Kein Kontext angegeben"
|
||||||
prompt = template.format(title=data.title.strip(), context=context)
|
prompt = template.format(title=data.title.strip(), context=context)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST, raw_text=True)
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST, raw_text=True, timeout=60)
|
||||||
_enhance_logger.info(
|
except ClaudeCliError as e:
|
||||||
f"Beschreibung generiert fuer \"{data.title[:50]}\": "
|
_enhance_logger.error(f"Beschreibung generieren: ClaudeCliError [{e.error_type}]: {e.message}")
|
||||||
f"{usage.input_tokens}in/{usage.output_tokens}out"
|
if e.error_type == "auth_error":
|
||||||
)
|
raise HTTPException(status_code=503, detail="KI-Zugang aktuell nicht verfuegbar. Bitte Administrator kontaktieren.")
|
||||||
return {"description": result.strip()}
|
if e.error_type == "rate_limit":
|
||||||
|
raise HTTPException(status_code=429, detail="KI ist gerade ausgelastet. Bitte in einer Minute erneut versuchen.")
|
||||||
|
raise HTTPException(status_code=500, detail="Beschreibung konnte nicht generiert werden")
|
||||||
|
except TimeoutError:
|
||||||
|
_enhance_logger.error("Beschreibung generieren: Timeout")
|
||||||
|
raise HTTPException(status_code=504, detail="Die KI antwortet gerade nicht. Bitte erneut versuchen.")
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_enhance_logger.error(f"Beschreibung generieren fehlgeschlagen: {e}")
|
_enhance_logger.error(f"Beschreibung generieren fehlgeschlagen: {e}")
|
||||||
raise HTTPException(status_code=500, detail="Beschreibung konnte nicht generiert werden")
|
raise HTTPException(status_code=500, detail="Beschreibung konnte nicht generiert werden")
|
||||||
|
|
||||||
|
_enhance_logger.info(
|
||||||
|
f"Beschreibung generiert fuer \"{data.title[:50]}\": "
|
||||||
|
f"{usage.input_tokens}in/{usage.output_tokens}out"
|
||||||
|
)
|
||||||
|
await charge_usage_to_tenant(db, current_user.get("tenant_id"), usage, source="enhance")
|
||||||
|
await db.commit()
|
||||||
|
return {"description": result.strip()}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{incident_id}", response_model=IncidentResponse)
|
@router.get("/{incident_id}", response_model=IncidentResponse)
|
||||||
async def get_incident(
|
async def get_incident(
|
||||||
@@ -239,12 +286,41 @@ async def get_incident(
|
|||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Einzelne Lage abrufen."""
|
"""Einzelne Lage abrufen.
|
||||||
|
|
||||||
|
sources_json wird NICHT mitgeliefert — fuer Zitate-Lookups
|
||||||
|
stattdessen GET /incidents/{id}/sources verwenden (lazy).
|
||||||
|
"""
|
||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
row = await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
row = await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
return await _enrich_incident(db, row)
|
return await _enrich_incident(db, row)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{incident_id}/sources")
|
||||||
|
async def get_incident_sources(
|
||||||
|
incident_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Sources-Array einer Lage (geparst aus sources_json) fuer Zitate-Lookups."""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT sources_json FROM incidents WHERE id = ?",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
sources: list = []
|
||||||
|
if row and row["sources_json"]:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(row["sources_json"])
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
sources = parsed
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
sources = []
|
||||||
|
return {"incident_id": incident_id, "sources": sources}
|
||||||
|
|
||||||
|
|
||||||
@router.put("/{incident_id}", response_model=IncidentResponse)
|
@router.put("/{incident_id}", response_model=IncidentResponse)
|
||||||
async def update_incident(
|
async def update_incident(
|
||||||
incident_id: int,
|
incident_id: int,
|
||||||
@@ -317,18 +393,133 @@ async def delete_incident(
|
|||||||
@router.get("/{incident_id}/articles")
|
@router.get("/{incident_id}/articles")
|
||||||
async def get_articles(
|
async def get_articles(
|
||||||
incident_id: int,
|
incident_id: int,
|
||||||
|
limit: int = Query(500, ge=1, le=1000),
|
||||||
|
offset: int = Query(0, ge=0),
|
||||||
|
search: str | None = Query(None, min_length=0, max_length=200),
|
||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Alle Artikel einer Lage abrufen."""
|
"""Artikel einer Lage paginiert abrufen.
|
||||||
|
|
||||||
|
Response: ``{"total": int, "articles": [...]}``.
|
||||||
|
Optionaler ``search``-Param filtert per LIKE ueber
|
||||||
|
headline, headline_de, source, content_de, content_original.
|
||||||
|
"""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
|
||||||
|
search_clean = (search or "").strip()
|
||||||
|
if search_clean:
|
||||||
|
like = f"%{search_clean}%"
|
||||||
|
params = (incident_id, like, like, like, like, like)
|
||||||
|
where = (
|
||||||
|
"WHERE incident_id = ? AND ("
|
||||||
|
"COALESCE(headline,'') LIKE ? OR "
|
||||||
|
"COALESCE(headline_de,'') LIKE ? OR "
|
||||||
|
"COALESCE(source,'') LIKE ? OR "
|
||||||
|
"COALESCE(content_de,'') LIKE ? OR "
|
||||||
|
"COALESCE(content_original,'') LIKE ?)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
params = (incident_id,)
|
||||||
|
where = "WHERE incident_id = ?"
|
||||||
|
|
||||||
|
cursor = await db.execute(f"SELECT COUNT(*) AS cnt FROM articles {where}", params)
|
||||||
|
total = (await cursor.fetchone())["cnt"]
|
||||||
|
|
||||||
|
cursor = await db.execute(
|
||||||
|
f"SELECT * FROM articles {where} ORDER BY collected_at DESC LIMIT ? OFFSET ?",
|
||||||
|
(*params, limit, offset),
|
||||||
|
)
|
||||||
|
rows = await cursor.fetchall()
|
||||||
|
return {"total": total, "articles": [dict(row) for row in rows]}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{incident_id}/articles/sources-summary")
|
||||||
|
async def get_articles_sources_summary(
|
||||||
|
incident_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Aggregierte Quellen-Statistik fuer eine Lage (fuer Quellenuebersicht)."""
|
||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
|
"""SELECT source,
|
||||||
|
COUNT(*) AS article_count,
|
||||||
|
GROUP_CONCAT(DISTINCT COALESCE(language,'de')) AS languages
|
||||||
|
FROM articles WHERE incident_id = ?
|
||||||
|
GROUP BY source ORDER BY article_count DESC""",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
)
|
)
|
||||||
rows = await cursor.fetchall()
|
sources = []
|
||||||
return [dict(row) for row in rows]
|
for r in await cursor.fetchall():
|
||||||
|
d = dict(r)
|
||||||
|
langs = (d.pop("languages") or "de").split(",")
|
||||||
|
d["languages"] = sorted({(l or "de").strip() for l in langs if l is not None})
|
||||||
|
sources.append(d)
|
||||||
|
# Sprach-Verteilung gesamt
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT COALESCE(language,'de') AS language, COUNT(*) AS cnt
|
||||||
|
FROM articles WHERE incident_id = ?
|
||||||
|
GROUP BY language ORDER BY cnt DESC""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
lang_counts = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
total_cursor = await db.execute(
|
||||||
|
"SELECT COUNT(*) AS cnt FROM articles WHERE incident_id = ?",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
total = (await total_cursor.fetchone())["cnt"]
|
||||||
|
return {"total": total, "sources": sources, "language_counts": lang_counts}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{incident_id}/articles/timeline-buckets")
|
||||||
|
async def get_articles_timeline_buckets(
|
||||||
|
incident_id: int,
|
||||||
|
granularity: str = Query("day", pattern="^(hour|day|week|month)$"),
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Aggregierte Zeit-Buckets fuer die Timeline-Achse.
|
||||||
|
|
||||||
|
Zaehlt Artikel und Snapshots pro Bucket. Kein Inhalt, nur Counts.
|
||||||
|
"""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
fmt_map = {
|
||||||
|
"hour": "%Y-%m-%d %H:00",
|
||||||
|
"day": "%Y-%m-%d",
|
||||||
|
"week": "%Y-%W",
|
||||||
|
"month": "%Y-%m",
|
||||||
|
}
|
||||||
|
fmt = fmt_map[granularity]
|
||||||
|
cursor = await db.execute(
|
||||||
|
f"""SELECT strftime(?, collected_at) AS bucket, COUNT(*) AS article_count
|
||||||
|
FROM articles WHERE incident_id = ?
|
||||||
|
GROUP BY bucket ORDER BY bucket""",
|
||||||
|
(fmt, incident_id),
|
||||||
|
)
|
||||||
|
article_rows = {r["bucket"]: r["article_count"] for r in await cursor.fetchall()}
|
||||||
|
cursor = await db.execute(
|
||||||
|
f"""SELECT strftime(?, created_at) AS bucket, COUNT(*) AS snapshot_count
|
||||||
|
FROM incident_snapshots WHERE incident_id = ?
|
||||||
|
GROUP BY bucket ORDER BY bucket""",
|
||||||
|
(fmt, incident_id),
|
||||||
|
)
|
||||||
|
snapshot_rows = {r["bucket"]: r["snapshot_count"] for r in await cursor.fetchall()}
|
||||||
|
all_buckets = sorted(set(article_rows.keys()) | set(snapshot_rows.keys()))
|
||||||
|
return {
|
||||||
|
"granularity": granularity,
|
||||||
|
"buckets": [
|
||||||
|
{
|
||||||
|
"bucket": b,
|
||||||
|
"article_count": article_rows.get(b, 0),
|
||||||
|
"snapshot_count": snapshot_rows.get(b, 0),
|
||||||
|
}
|
||||||
|
for b in all_buckets
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{incident_id}/snapshots")
|
@router.get("/{incident_id}/snapshots")
|
||||||
@@ -337,12 +528,17 @@ async def get_snapshots(
|
|||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Lageberichte (Snapshots) einer Lage abrufen."""
|
"""Lageberichte (Snapshots) einer Lage abrufen — schlanke Liste.
|
||||||
|
|
||||||
|
Liefert nur Metadaten und einen 300-Zeichen-Preview des Summary.
|
||||||
|
Der Volltext (summary + sources_json) wird per Einzel-Endpunkt
|
||||||
|
``GET /{incident_id}/snapshots/{snapshot_id}`` bei Bedarf geladen.
|
||||||
|
"""
|
||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"""SELECT id, incident_id, summary, sources_json,
|
"""SELECT id, incident_id, article_count, fact_check_count, created_at,
|
||||||
article_count, fact_check_count, created_at
|
SUBSTR(summary, 1, 300) AS summary_preview
|
||||||
FROM incident_snapshots WHERE incident_id = ?
|
FROM incident_snapshots WHERE incident_id = ?
|
||||||
ORDER BY created_at DESC""",
|
ORDER BY created_at DESC""",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
@@ -351,6 +547,55 @@ async def get_snapshots(
|
|||||||
return [dict(row) for row in rows]
|
return [dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{incident_id}/snapshots/search")
|
||||||
|
async def search_snapshots(
|
||||||
|
incident_id: int,
|
||||||
|
q: str = Query(..., min_length=2, max_length=200),
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Volltextsuche über alle Snapshots einer Lage.
|
||||||
|
|
||||||
|
Liefert dieselbe schlanke Shape wie der Listen-Endpunkt,
|
||||||
|
gefiltert per ``summary LIKE '%q%'``.
|
||||||
|
"""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
like = f"%{q}%"
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT id, incident_id, article_count, fact_check_count, created_at,
|
||||||
|
SUBSTR(summary, 1, 300) AS summary_preview
|
||||||
|
FROM incident_snapshots
|
||||||
|
WHERE incident_id = ? AND summary LIKE ?
|
||||||
|
ORDER BY created_at DESC""",
|
||||||
|
(incident_id, like),
|
||||||
|
)
|
||||||
|
rows = await cursor.fetchall()
|
||||||
|
return [dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{incident_id}/snapshots/{snapshot_id}")
|
||||||
|
async def get_snapshot(
|
||||||
|
incident_id: int,
|
||||||
|
snapshot_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Einzelnen Snapshot mit vollem Summary + sources_json abrufen (Lazy-Load)."""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT id, incident_id, summary, sources_json,
|
||||||
|
article_count, fact_check_count, created_at
|
||||||
|
FROM incident_snapshots WHERE id = ? AND incident_id = ?""",
|
||||||
|
(snapshot_id, incident_id),
|
||||||
|
)
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Snapshot nicht gefunden")
|
||||||
|
return dict(row)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{incident_id}/factchecks")
|
@router.get("/{incident_id}/factchecks")
|
||||||
async def get_factchecks(
|
async def get_factchecks(
|
||||||
incident_id: int,
|
incident_id: int,
|
||||||
@@ -368,66 +613,198 @@ async def get_factchecks(
|
|||||||
return [dict(row) for row in rows]
|
return [dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{incident_id}/pipeline")
|
||||||
|
async def get_pipeline(
|
||||||
|
incident_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Analysepipeline-Status der Lage: Definition aller Schritte + Stand des
|
||||||
|
letzten (oder gerade laufenden) Refreshs.
|
||||||
|
|
||||||
|
Antwort:
|
||||||
|
{
|
||||||
|
"is_research": bool,
|
||||||
|
"is_running": bool,
|
||||||
|
"last_refresh": {started_at, completed_at, duration_sec, status, pass_total} | null,
|
||||||
|
"steps_definition": [{key, label, icon, tooltip}, ...],
|
||||||
|
"steps": [{step_key, status, count_value, count_secondary, pass_number}, ...]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
from services.pipeline_tracker import PIPELINE_STEPS
|
||||||
|
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
incident_row = await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
is_research = (incident_row["type"] or "adhoc") == "research"
|
||||||
|
|
||||||
|
# Jüngsten Refresh-Log wählen: bevorzugt running, sonst der letzte completed
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT id, started_at, completed_at, status, retry_count
|
||||||
|
FROM refresh_log
|
||||||
|
WHERE incident_id = ? AND status = 'running'
|
||||||
|
ORDER BY started_at DESC LIMIT 1""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT id, started_at, completed_at, status, retry_count
|
||||||
|
FROM refresh_log
|
||||||
|
WHERE incident_id = ?
|
||||||
|
ORDER BY started_at DESC LIMIT 1""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
|
||||||
|
last_refresh = None
|
||||||
|
steps = []
|
||||||
|
is_running = False
|
||||||
|
if row:
|
||||||
|
is_running = row["status"] == "running"
|
||||||
|
# Pipeline-Steps zu diesem Refresh laden
|
||||||
|
sc = await db.execute(
|
||||||
|
"""SELECT step_key, pass_number, status, count_value, count_secondary,
|
||||||
|
started_at, completed_at
|
||||||
|
FROM refresh_pipeline_steps
|
||||||
|
WHERE refresh_log_id = ?
|
||||||
|
ORDER BY pass_number ASC, id ASC""",
|
||||||
|
(row["id"],),
|
||||||
|
)
|
||||||
|
steps = [dict(r) for r in await sc.fetchall()]
|
||||||
|
|
||||||
|
# Pass-Total: bei Research-Lagen mit Multi-Pass-Daten ermitteln
|
||||||
|
max_pass = 1
|
||||||
|
for s in steps:
|
||||||
|
if s["pass_number"] and s["pass_number"] > max_pass:
|
||||||
|
max_pass = s["pass_number"]
|
||||||
|
|
||||||
|
# Dauer berechnen (nur wenn completed)
|
||||||
|
duration_sec = None
|
||||||
|
try:
|
||||||
|
if row["started_at"] and row["completed_at"]:
|
||||||
|
t0 = datetime.strptime(row["started_at"], "%Y-%m-%d %H:%M:%S")
|
||||||
|
t1 = datetime.strptime(row["completed_at"], "%Y-%m-%d %H:%M:%S")
|
||||||
|
duration_sec = max(0, int((t1 - t0).total_seconds()))
|
||||||
|
except Exception:
|
||||||
|
duration_sec = None
|
||||||
|
|
||||||
|
last_refresh = {
|
||||||
|
"started_at": row["started_at"],
|
||||||
|
"completed_at": row["completed_at"],
|
||||||
|
"status": row["status"],
|
||||||
|
"duration_sec": duration_sec,
|
||||||
|
"pass_total": max_pass,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"is_research": is_research,
|
||||||
|
"is_running": is_running,
|
||||||
|
"last_refresh": last_refresh,
|
||||||
|
"steps_definition": PIPELINE_STEPS,
|
||||||
|
"steps": steps,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{incident_id}/locations")
|
@router.get("/{incident_id}/locations")
|
||||||
async def get_locations(
|
async def get_locations(
|
||||||
incident_id: int,
|
incident_id: int,
|
||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
"""Geografische Orte einer Lage abrufen (aggregiert nach Ort)."""
|
"""Geografische Orte einer Lage abrufen (serverseitig aggregiert nach Ort).
|
||||||
|
|
||||||
|
Drei getrennte Queries (alle klein) statt eines 21k-Zeilen-JOINs:
|
||||||
|
1. Orte-Aggregate per GROUP BY (name, lat, lon) — liefert direkt ~Ergebnismenge.
|
||||||
|
2. Kategorien pro Ort per GROUP BY (name, lat, lon, category) — fuer dominante Kategorie.
|
||||||
|
3. Sample-Artikel pro Ort via ROW_NUMBER() — max. 10 pro Ort.
|
||||||
|
"""
|
||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
|
||||||
|
|
||||||
|
# 1. Orte-Aggregate
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"""SELECT al.location_name, al.location_name_normalized, al.country_code,
|
"""SELECT
|
||||||
al.latitude, al.longitude, al.confidence, al.category,
|
COALESCE(location_name_normalized, location_name) AS name,
|
||||||
a.id as article_id, a.headline, a.headline_de, a.source, a.source_url
|
ROUND(latitude, 2) AS lat,
|
||||||
FROM article_locations al
|
ROUND(longitude, 2) AS lon,
|
||||||
JOIN articles a ON a.id = al.article_id
|
MIN(country_code) AS country_code,
|
||||||
WHERE al.incident_id = ?
|
MAX(confidence) AS confidence,
|
||||||
ORDER BY al.location_name_normalized, a.collected_at DESC""",
|
COUNT(*) AS article_count
|
||||||
|
FROM article_locations
|
||||||
|
WHERE incident_id = ?
|
||||||
|
GROUP BY name, lat, lon
|
||||||
|
ORDER BY article_count DESC""",
|
||||||
(incident_id,),
|
(incident_id,),
|
||||||
)
|
)
|
||||||
rows = await cursor.fetchall()
|
loc_rows = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
|
||||||
# Aggregierung nach normalisiertem Ortsnamen + Koordinaten
|
# 2. Kategorien pro Ort
|
||||||
loc_map = {}
|
cursor = await db.execute(
|
||||||
for row in rows:
|
"""SELECT
|
||||||
row = dict(row)
|
COALESCE(location_name_normalized, location_name) AS name,
|
||||||
key = (row["location_name_normalized"] or row["location_name"], round(row["latitude"], 2), round(row["longitude"], 2))
|
ROUND(latitude, 2) AS lat,
|
||||||
if key not in loc_map:
|
ROUND(longitude, 2) AS lon,
|
||||||
loc_map[key] = {
|
COALESCE(category, 'mentioned') AS category,
|
||||||
"location_name": row["location_name_normalized"] or row["location_name"],
|
COUNT(*) AS cnt
|
||||||
"lat": row["latitude"],
|
FROM article_locations
|
||||||
"lon": row["longitude"],
|
WHERE incident_id = ?
|
||||||
"country_code": row["country_code"],
|
GROUP BY name, lat, lon, category""",
|
||||||
"confidence": row["confidence"],
|
(incident_id,),
|
||||||
"article_count": 0,
|
)
|
||||||
"articles": [],
|
cat_map: dict[tuple, dict[str, int]] = {}
|
||||||
"categories": {},
|
for r in await cursor.fetchall():
|
||||||
}
|
key = (r["name"], r["lat"], r["lon"])
|
||||||
loc_map[key]["article_count"] += 1
|
cat_map.setdefault(key, {})[r["category"]] = r["cnt"]
|
||||||
cat = row["category"] or "mentioned"
|
|
||||||
loc_map[key]["categories"][cat] = loc_map[key]["categories"].get(cat, 0) + 1
|
|
||||||
# Maximal 10 Artikel pro Ort mitliefern
|
|
||||||
if len(loc_map[key]["articles"]) < 10:
|
|
||||||
loc_map[key]["articles"].append({
|
|
||||||
"id": row["article_id"],
|
|
||||||
"headline": row["headline_de"] or row["headline"],
|
|
||||||
"source": row["source"],
|
|
||||||
"source_url": row["source_url"],
|
|
||||||
})
|
|
||||||
|
|
||||||
# Dominanteste Kategorie pro Ort bestimmen (Prioritaet: primary > secondary > tertiary > mentioned)
|
# 3. Sample-Artikel pro Ort (max. 10, neueste zuerst)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT name, lat, lon, article_id, headline, headline_de, source, source_url
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
COALESCE(al.location_name_normalized, al.location_name) AS name,
|
||||||
|
ROUND(al.latitude, 2) AS lat,
|
||||||
|
ROUND(al.longitude, 2) AS lon,
|
||||||
|
a.id AS article_id,
|
||||||
|
a.headline, a.headline_de, a.source, a.source_url,
|
||||||
|
ROW_NUMBER() OVER (
|
||||||
|
PARTITION BY COALESCE(al.location_name_normalized, al.location_name),
|
||||||
|
ROUND(al.latitude, 2), ROUND(al.longitude, 2)
|
||||||
|
ORDER BY a.collected_at DESC
|
||||||
|
) AS rn
|
||||||
|
FROM article_locations al
|
||||||
|
JOIN articles a ON a.id = al.article_id
|
||||||
|
WHERE al.incident_id = ?
|
||||||
|
)
|
||||||
|
WHERE rn <= 10""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
sample_map: dict[tuple, list[dict]] = {}
|
||||||
|
for r in await cursor.fetchall():
|
||||||
|
key = (r["name"], r["lat"], r["lon"])
|
||||||
|
sample_map.setdefault(key, []).append({
|
||||||
|
"id": r["article_id"],
|
||||||
|
"headline": r["headline_de"] or r["headline"],
|
||||||
|
"source": r["source"],
|
||||||
|
"source_url": r["source_url"],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Zusammensetzen
|
||||||
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
priority = {"primary": 4, "secondary": 3, "tertiary": 2, "mentioned": 1}
|
||||||
result = []
|
result = []
|
||||||
for loc in loc_map.values():
|
for loc in loc_rows:
|
||||||
cats = loc.pop("categories")
|
key = (loc["name"], loc["lat"], loc["lon"])
|
||||||
if cats:
|
cats = cat_map.get(key, {})
|
||||||
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c]))
|
best_cat = max(cats, key=lambda c: (priority.get(c, 0), cats[c])) if cats else "mentioned"
|
||||||
else:
|
result.append({
|
||||||
best_cat = "mentioned"
|
"location_name": loc["name"],
|
||||||
loc["category"] = best_cat
|
"lat": loc["lat"],
|
||||||
result.append(loc)
|
"lon": loc["lon"],
|
||||||
|
"country_code": loc["country_code"],
|
||||||
|
"confidence": loc["confidence"],
|
||||||
|
"article_count": loc["article_count"],
|
||||||
|
"articles": sample_map.get(key, []),
|
||||||
|
"category": best_cat,
|
||||||
|
})
|
||||||
|
|
||||||
# Category-Labels aus Incident laden
|
# Category-Labels aus Incident laden
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
@@ -737,6 +1114,34 @@ async def export_incident(
|
|||||||
user_row = await cursor.fetchone()
|
user_row = await cursor.fetchone()
|
||||||
creator = user_row["email"] if user_row else "Unbekannt"
|
creator = user_row["email"] if user_row else "Unbekannt"
|
||||||
|
|
||||||
|
# Organisation (fuer Dateimetadaten)
|
||||||
|
organization_name = None
|
||||||
|
if incident.get("tenant_id"):
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT name FROM organizations WHERE id = ?", (incident["tenant_id"],)
|
||||||
|
)
|
||||||
|
org_row = await cursor.fetchone()
|
||||||
|
organization_name = org_row["name"] if org_row else None
|
||||||
|
|
||||||
|
# Top-Orte (fuer Keyword-Metadaten)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT location_name, COUNT(*) AS cnt
|
||||||
|
FROM article_locations
|
||||||
|
WHERE incident_id = ?
|
||||||
|
GROUP BY COALESCE(location_name_normalized, location_name)
|
||||||
|
ORDER BY cnt DESC
|
||||||
|
LIMIT 5""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
top_locations = [r["location_name"] for r in await cursor.fetchall() if r["location_name"]]
|
||||||
|
|
||||||
|
# Snapshot-Count (als xmpMM:VersionID im PDF)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT COUNT(*) AS cnt FROM incident_snapshots WHERE incident_id = ?",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
snapshot_count = (await cursor.fetchone())["cnt"] or 0
|
||||||
|
|
||||||
# Artikel
|
# Artikel
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
|
"SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
|
||||||
@@ -760,8 +1165,18 @@ async def export_incident(
|
|||||||
)
|
)
|
||||||
snapshots = [dict(r) for r in await cursor.fetchall()]
|
snapshots = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
|
||||||
# Executive Summary (KI-generiert, gecacht)
|
# Zusammenfassung fuer den Export:
|
||||||
exec_summary = incident.get("executive_summary")
|
# - Bei Adhoc-Lagen primaer "Neueste Entwicklungen" (latest_developments) als Markdown-Bullets,
|
||||||
|
# weil Live-Monitoring von Aktualitaet lebt.
|
||||||
|
# - Fallback (oder bei Research): Executive Summary (KI-generiert, gecacht).
|
||||||
|
is_adhoc = (incident.get("type") or "adhoc") != "research"
|
||||||
|
latest_dev = (incident.get("latest_developments") or "").strip()
|
||||||
|
exec_summary = None
|
||||||
|
if is_adhoc and latest_dev:
|
||||||
|
from report_generator import _markdown_to_html as _md_to_html
|
||||||
|
exec_summary = _md_to_html(latest_dev)
|
||||||
|
if not exec_summary:
|
||||||
|
exec_summary = incident.get("executive_summary")
|
||||||
if not exec_summary:
|
if not exec_summary:
|
||||||
summary_text = incident.get("summary") or ""
|
summary_text = incident.get("summary") or ""
|
||||||
exec_summary = await generate_executive_summary(summary_text)
|
exec_summary = await generate_executive_summary(summary_text)
|
||||||
@@ -786,7 +1201,13 @@ async def export_incident(
|
|||||||
scope_labels_key = scope_labels.get(scope, "lagebericht")
|
scope_labels_key = scope_labels.get(scope, "lagebericht")
|
||||||
|
|
||||||
if format == "pdf":
|
if format == "pdf":
|
||||||
pdf_bytes = await generate_pdf(incident, articles, fact_checks, snapshots, scope, creator, exec_summary, sections=sections_set)
|
pdf_bytes = await generate_pdf(
|
||||||
|
incident, articles, fact_checks, snapshots, scope, creator, exec_summary,
|
||||||
|
sections=sections_set,
|
||||||
|
organization_name=organization_name,
|
||||||
|
top_locations=top_locations,
|
||||||
|
snapshot_count=snapshot_count,
|
||||||
|
)
|
||||||
filename = f"{slug}_{scope_labels_key}_{date_str}.pdf"
|
filename = f"{slug}_{scope_labels_key}_{date_str}.pdf"
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
io.BytesIO(pdf_bytes),
|
io.BytesIO(pdf_bytes),
|
||||||
@@ -794,7 +1215,13 @@ async def export_incident(
|
|||||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
docx_bytes = await generate_docx(incident, articles, fact_checks, snapshots, scope, creator, exec_summary, sections=sections_set)
|
docx_bytes = await generate_docx(
|
||||||
|
incident, articles, fact_checks, snapshots, scope, creator, exec_summary,
|
||||||
|
sections=sections_set,
|
||||||
|
organization_name=organization_name,
|
||||||
|
top_locations=top_locations,
|
||||||
|
snapshot_count=snapshot_count,
|
||||||
|
)
|
||||||
filename = f"{slug}_{scope_labels_key}_{date_str}.docx"
|
filename = f"{slug}_{scope_labels_key}_{date_str}.docx"
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
io.BytesIO(docx_bytes),
|
io.BytesIO(docx_bytes),
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
"""Sources-Router: Quellenverwaltung (Multi-Tenant)."""
|
"""Sources-Router: Quellenverwaltung (Multi-Tenant)."""
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
|
||||||
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
|
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
|
||||||
from auth import get_current_user
|
from auth import get_current_user
|
||||||
from database import db_dependency, refresh_source_counts
|
from database import db_dependency, get_db, refresh_source_counts
|
||||||
|
from services.external_reputation import apply_reputation_overrides, sync_all as sync_external_reputation
|
||||||
|
from services.source_classifier import bulk_classify, classify_source
|
||||||
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
|
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
|
||||||
import aiosqlite
|
import aiosqlite
|
||||||
|
|
||||||
@@ -12,7 +15,56 @@ logger = logging.getLogger("osint.sources")
|
|||||||
|
|
||||||
router = APIRouter(prefix="/api/sources", tags=["sources"])
|
router = APIRouter(prefix="/api/sources", tags=["sources"])
|
||||||
|
|
||||||
SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes"}
|
SOURCE_UPDATE_COLUMNS = {
|
||||||
|
"name", "url", "domain", "source_type", "category", "status", "notes",
|
||||||
|
"language", "bias",
|
||||||
|
"political_orientation", "media_type", "reliability",
|
||||||
|
"state_affiliated", "country_code",
|
||||||
|
}
|
||||||
|
SOURCE_CLASSIFICATION_FIELDS = {
|
||||||
|
"political_orientation", "media_type", "reliability",
|
||||||
|
"state_affiliated", "country_code",
|
||||||
|
}
|
||||||
|
ALLOWED_ALIGNMENTS = {
|
||||||
|
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
|
||||||
|
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
|
||||||
|
"protuerkisch", "panarabisch", "neutral", "sonstige",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_alignments_for(db: aiosqlite.Connection, source_ids: list[int]) -> dict[int, list[str]]:
|
||||||
|
"""Lädt alignments fuer mehrere Quellen in einer Query und gibt {source_id: [alignment, ...]} zurück."""
|
||||||
|
if not source_ids:
|
||||||
|
return {}
|
||||||
|
placeholders = ",".join("?" for _ in source_ids)
|
||||||
|
cursor = await db.execute(
|
||||||
|
f"SELECT source_id, alignment FROM source_alignments WHERE source_id IN ({placeholders}) ORDER BY alignment",
|
||||||
|
source_ids,
|
||||||
|
)
|
||||||
|
out: dict[int, list[str]] = {sid: [] for sid in source_ids}
|
||||||
|
for row in await cursor.fetchall():
|
||||||
|
out.setdefault(row["source_id"], []).append(row["alignment"])
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def _replace_alignments(db: aiosqlite.Connection, source_id: int, alignments: list[str]):
|
||||||
|
"""Ersetzt die alignments-Liste einer Quelle (DELETE + INSERT) — Aufrufer muss commit() machen."""
|
||||||
|
await db.execute("DELETE FROM source_alignments WHERE source_id = ?", (source_id,))
|
||||||
|
seen: set[str] = set()
|
||||||
|
for raw in alignments:
|
||||||
|
a = (raw or "").strip().lower()
|
||||||
|
if not a or a in seen:
|
||||||
|
continue
|
||||||
|
if a not in ALLOWED_ALIGNMENTS:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
detail=f"Ungueltiger alignment-Wert: '{a}'",
|
||||||
|
)
|
||||||
|
seen.add(a)
|
||||||
|
await db.execute(
|
||||||
|
"INSERT INTO source_alignments (source_id, alignment) VALUES (?, ?)",
|
||||||
|
(source_id, a),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _check_source_ownership(source: dict, username: str):
|
def _check_source_ownership(source: dict, username: str):
|
||||||
@@ -34,6 +86,13 @@ async def list_sources(
|
|||||||
source_type: str = None,
|
source_type: str = None,
|
||||||
category: str = None,
|
category: str = None,
|
||||||
source_status: str = None,
|
source_status: str = None,
|
||||||
|
political_orientation: str = None,
|
||||||
|
media_type: str = None,
|
||||||
|
reliability: str = None,
|
||||||
|
state_affiliated: bool = None,
|
||||||
|
alignment: str = None,
|
||||||
|
ifcn_signatory: bool = None,
|
||||||
|
eu_disinfo_listed: bool = None,
|
||||||
current_user: dict = Depends(get_current_user),
|
current_user: dict = Depends(get_current_user),
|
||||||
db: aiosqlite.Connection = Depends(db_dependency),
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
):
|
):
|
||||||
@@ -41,27 +100,51 @@ async def list_sources(
|
|||||||
tenant_id = current_user.get("tenant_id")
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
|
||||||
# Global (tenant_id=NULL) + eigene Org
|
# Global (tenant_id=NULL) + eigene Org
|
||||||
query = "SELECT * FROM sources WHERE (tenant_id IS NULL OR tenant_id = ?)"
|
query = "SELECT s.* FROM sources s WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)"
|
||||||
params = [tenant_id]
|
params: list = [tenant_id]
|
||||||
|
|
||||||
if source_type:
|
if source_type:
|
||||||
query += " AND source_type = ?"
|
query += " AND s.source_type = ?"
|
||||||
params.append(source_type)
|
params.append(source_type)
|
||||||
if category:
|
if category:
|
||||||
query += " AND category = ?"
|
query += " AND s.category = ?"
|
||||||
params.append(category)
|
params.append(category)
|
||||||
if source_status:
|
if source_status:
|
||||||
query += " AND status = ?"
|
query += " AND s.status = ?"
|
||||||
params.append(source_status)
|
params.append(source_status)
|
||||||
|
if political_orientation:
|
||||||
|
query += " AND s.political_orientation = ?"
|
||||||
|
params.append(political_orientation)
|
||||||
|
if media_type:
|
||||||
|
query += " AND s.media_type = ?"
|
||||||
|
params.append(media_type)
|
||||||
|
if reliability:
|
||||||
|
query += " AND s.reliability = ?"
|
||||||
|
params.append(reliability)
|
||||||
|
if state_affiliated is not None:
|
||||||
|
query += " AND s.state_affiliated = ?"
|
||||||
|
params.append(1 if state_affiliated else 0)
|
||||||
|
if alignment:
|
||||||
|
query += " AND EXISTS (SELECT 1 FROM source_alignments sa WHERE sa.source_id = s.id AND sa.alignment = ?)"
|
||||||
|
params.append(alignment.lower())
|
||||||
|
if ifcn_signatory is not None:
|
||||||
|
query += " AND s.ifcn_signatory = ?"
|
||||||
|
params.append(1 if ifcn_signatory else 0)
|
||||||
|
if eu_disinfo_listed is not None:
|
||||||
|
query += " AND s.eu_disinfo_listed = ?"
|
||||||
|
params.append(1 if eu_disinfo_listed else 0)
|
||||||
|
|
||||||
query += " ORDER BY source_type, category, name"
|
query += " ORDER BY s.source_type, s.category, s.name"
|
||||||
cursor = await db.execute(query, params)
|
cursor = await db.execute(query, params)
|
||||||
rows = await cursor.fetchall()
|
rows = await cursor.fetchall()
|
||||||
results = []
|
results = [dict(row) for row in rows]
|
||||||
for row in rows:
|
alignments_map = await _load_alignments_for(db, [r["id"] for r in results])
|
||||||
d = dict(row)
|
for d in results:
|
||||||
d["is_global"] = d.get("tenant_id") is None
|
d["is_global"] = d.get("tenant_id") is None
|
||||||
results.append(d)
|
d["state_affiliated"] = bool(d.get("state_affiliated"))
|
||||||
|
d["ifcn_signatory"] = bool(d.get("ifcn_signatory"))
|
||||||
|
d["eu_disinfo_listed"] = bool(d.get("eu_disinfo_listed"))
|
||||||
|
d["alignments"] = alignments_map.get(d["id"], [])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
@@ -454,26 +537,60 @@ async def create_source(
|
|||||||
detail=f"Domain '{domain}' bereits als Quelle vorhanden: {domain_existing['name']}. Für einen neuen RSS-Feed bitte die Feed-URL angeben.",
|
detail=f"Domain '{domain}' bereits als Quelle vorhanden: {domain_existing['name']}. Für einen neuen RSS-Feed bitte die Feed-URL angeben.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
payload = data.model_dump(exclude_unset=True)
|
||||||
|
alignments = payload.pop("alignments", None)
|
||||||
|
classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & payload.keys()) or alignments is not None
|
||||||
|
|
||||||
|
cols = ["name", "url", "domain", "source_type", "category", "status", "notes",
|
||||||
|
"language", "bias",
|
||||||
|
"political_orientation", "media_type", "reliability",
|
||||||
|
"state_affiliated", "country_code",
|
||||||
|
"added_by", "tenant_id"]
|
||||||
|
vals = [
|
||||||
|
data.name,
|
||||||
|
data.url,
|
||||||
|
domain,
|
||||||
|
data.source_type,
|
||||||
|
data.category,
|
||||||
|
data.status,
|
||||||
|
data.notes,
|
||||||
|
payload.get("language"),
|
||||||
|
payload.get("bias"),
|
||||||
|
payload.get("political_orientation"),
|
||||||
|
payload.get("media_type"),
|
||||||
|
payload.get("reliability"),
|
||||||
|
1 if payload.get("state_affiliated") else 0,
|
||||||
|
payload.get("country_code"),
|
||||||
|
current_user["username"],
|
||||||
|
tenant_id,
|
||||||
|
]
|
||||||
|
if classification_touched:
|
||||||
|
cols += ["classification_source", "classified_at"]
|
||||||
|
vals += ["manual"]
|
||||||
|
ts_marker = True
|
||||||
|
else:
|
||||||
|
ts_marker = False
|
||||||
|
|
||||||
|
placeholders = ", ".join(["?"] * len(vals) + (["CURRENT_TIMESTAMP"] if ts_marker else []))
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
|
f"INSERT INTO sources ({', '.join(cols)}) VALUES ({placeholders})",
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
vals,
|
||||||
(
|
|
||||||
data.name,
|
|
||||||
data.url,
|
|
||||||
domain,
|
|
||||||
data.source_type,
|
|
||||||
data.category,
|
|
||||||
data.status,
|
|
||||||
data.notes,
|
|
||||||
current_user["username"],
|
|
||||||
tenant_id,
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
new_id = cursor.lastrowid
|
||||||
|
|
||||||
|
if alignments:
|
||||||
|
await _replace_alignments(db, new_id, alignments)
|
||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
|
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (new_id,))
|
||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
return dict(row)
|
result = dict(row)
|
||||||
|
result["is_global"] = result.get("tenant_id") is None
|
||||||
|
result["state_affiliated"] = bool(result.get("state_affiliated"))
|
||||||
|
alignments_map = await _load_alignments_for(db, [new_id])
|
||||||
|
result["alignments"] = alignments_map.get(new_id, [])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.put("/{source_id}", response_model=SourceResponse)
|
@router.put("/{source_id}", response_model=SourceResponse)
|
||||||
@@ -494,27 +611,51 @@ async def update_source(
|
|||||||
|
|
||||||
_check_source_ownership(dict(row), current_user["username"])
|
_check_source_ownership(dict(row), current_user["username"])
|
||||||
|
|
||||||
|
payload = data.model_dump(exclude_unset=True)
|
||||||
|
alignments = payload.pop("alignments", None)
|
||||||
|
|
||||||
updates = {}
|
updates = {}
|
||||||
for field, value in data.model_dump(exclude_none=True).items():
|
for field, value in payload.items():
|
||||||
if field not in SOURCE_UPDATE_COLUMNS:
|
if field not in SOURCE_UPDATE_COLUMNS:
|
||||||
continue
|
continue
|
||||||
# Domain normalisieren
|
# Domain normalisieren
|
||||||
if field == "domain" and value:
|
if field == "domain" and value:
|
||||||
value = _DOMAIN_ALIASES.get(value.lower(), value.lower())
|
value = _DOMAIN_ALIASES.get(value.lower(), value.lower())
|
||||||
|
if field == "state_affiliated":
|
||||||
|
value = 1 if value else 0
|
||||||
updates[field] = value
|
updates[field] = value
|
||||||
|
|
||||||
if not updates:
|
classification_touched = bool(SOURCE_CLASSIFICATION_FIELDS & updates.keys()) or alignments is not None
|
||||||
return dict(row)
|
if classification_touched:
|
||||||
|
updates["classification_source"] = "manual"
|
||||||
|
updates["classified_at"] = "CURRENT_TIMESTAMP_MARKER"
|
||||||
|
|
||||||
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
if updates:
|
||||||
values = list(updates.values()) + [source_id]
|
set_parts = []
|
||||||
|
values = []
|
||||||
|
for k, v in updates.items():
|
||||||
|
if v == "CURRENT_TIMESTAMP_MARKER":
|
||||||
|
set_parts.append(f"{k} = CURRENT_TIMESTAMP")
|
||||||
|
else:
|
||||||
|
set_parts.append(f"{k} = ?")
|
||||||
|
values.append(v)
|
||||||
|
values.append(source_id)
|
||||||
|
await db.execute(f"UPDATE sources SET {', '.join(set_parts)} WHERE id = ?", values)
|
||||||
|
|
||||||
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
|
if alignments is not None:
|
||||||
await db.commit()
|
await _replace_alignments(db, source_id, alignments)
|
||||||
|
|
||||||
|
if updates or alignments is not None:
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
return dict(row)
|
result = dict(row)
|
||||||
|
result["is_global"] = result.get("tenant_id") is None
|
||||||
|
result["state_affiliated"] = bool(result.get("state_affiliated"))
|
||||||
|
alignments_map = await _load_alignments_for(db, [source_id])
|
||||||
|
result["alignments"] = alignments_map.get(source_id, [])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
|
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||||
@@ -572,3 +713,328 @@ async def trigger_refresh_counts(
|
|||||||
"""Artikelzaehler fuer alle Quellen neu berechnen."""
|
"""Artikelzaehler fuer alle Quellen neu berechnen."""
|
||||||
await refresh_source_counts(db)
|
await refresh_source_counts(db)
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
# === Klassifikations-Review (LLM-Vorschlaege approve/reject/reclassify) ===
|
||||||
|
|
||||||
|
def _require_admin_for_global(row: dict, current_user: dict):
|
||||||
|
"""Globale Quellen (tenant_id IS NULL) duerfen nur org_admins approve-en/reclassify-en."""
|
||||||
|
if row.get("tenant_id") is None and current_user.get("role") != "org_admin":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
detail="Globale Quellen koennen nur von Admins klassifiziert werden",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/classification/stats")
|
||||||
|
async def classification_stats(
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Counts pro classification_source-Wert (global + eigene Org)."""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT classification_source, COUNT(*) as cnt
|
||||||
|
FROM sources
|
||||||
|
WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
|
||||||
|
GROUP BY classification_source""",
|
||||||
|
(tenant_id,),
|
||||||
|
)
|
||||||
|
by_source = {row["classification_source"] or "legacy": row["cnt"] for row in await cursor.fetchall()}
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT COUNT(*) as cnt FROM sources
|
||||||
|
WHERE (tenant_id IS NULL OR tenant_id = ?) AND status = 'active'
|
||||||
|
AND proposed_political_orientation IS NOT NULL""",
|
||||||
|
(tenant_id,),
|
||||||
|
)
|
||||||
|
pending = (await cursor.fetchone())["cnt"]
|
||||||
|
return {
|
||||||
|
"by_classification_source": by_source,
|
||||||
|
"pending_review": pending,
|
||||||
|
"total": sum(by_source.values()),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/classification/queue")
|
||||||
|
async def classification_queue(
|
||||||
|
limit: int = 50,
|
||||||
|
min_confidence: float = 0.0,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Liefert Quellen mit nicht-leeren proposed_*-Spalten (Review-Queue)."""
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT s.* FROM sources s
|
||||||
|
WHERE (s.tenant_id IS NULL OR s.tenant_id = ?)
|
||||||
|
AND s.proposed_political_orientation IS NOT NULL
|
||||||
|
AND COALESCE(s.proposed_confidence, 0) >= ?
|
||||||
|
ORDER BY s.proposed_confidence DESC, s.proposed_at DESC
|
||||||
|
LIMIT ?""",
|
||||||
|
(tenant_id, min_confidence, limit),
|
||||||
|
)
|
||||||
|
rows = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
alignments_map = await _load_alignments_for(db, [r["id"] for r in rows])
|
||||||
|
out = []
|
||||||
|
for d in rows:
|
||||||
|
try:
|
||||||
|
proposed_aligns = json.loads(d.get("proposed_alignments_json") or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
proposed_aligns = []
|
||||||
|
out.append({
|
||||||
|
"id": d["id"],
|
||||||
|
"name": d["name"],
|
||||||
|
"url": d.get("url"),
|
||||||
|
"domain": d.get("domain"),
|
||||||
|
"source_type": d.get("source_type"),
|
||||||
|
"category": d.get("category"),
|
||||||
|
"is_global": d.get("tenant_id") is None,
|
||||||
|
"current": {
|
||||||
|
"political_orientation": d.get("political_orientation"),
|
||||||
|
"media_type": d.get("media_type"),
|
||||||
|
"reliability": d.get("reliability"),
|
||||||
|
"state_affiliated": bool(d.get("state_affiliated")),
|
||||||
|
"country_code": d.get("country_code"),
|
||||||
|
"alignments": alignments_map.get(d["id"], []),
|
||||||
|
"classification_source": d.get("classification_source"),
|
||||||
|
},
|
||||||
|
"proposed": {
|
||||||
|
"political_orientation": d.get("proposed_political_orientation"),
|
||||||
|
"media_type": d.get("proposed_media_type"),
|
||||||
|
"reliability": d.get("proposed_reliability"),
|
||||||
|
"state_affiliated": bool(d.get("proposed_state_affiliated")),
|
||||||
|
"country_code": d.get("proposed_country_code"),
|
||||||
|
"alignments": proposed_aligns,
|
||||||
|
"confidence": d.get("proposed_confidence"),
|
||||||
|
"reasoning": d.get("proposed_reasoning"),
|
||||||
|
"proposed_at": d.get("proposed_at"),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def _clear_proposed(db: aiosqlite.Connection, source_id: int):
|
||||||
|
"""Loescht die proposed_*-Felder einer Quelle (ohne commit)."""
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE sources SET
|
||||||
|
proposed_political_orientation = NULL,
|
||||||
|
proposed_media_type = NULL,
|
||||||
|
proposed_reliability = NULL,
|
||||||
|
proposed_state_affiliated = NULL,
|
||||||
|
proposed_country_code = NULL,
|
||||||
|
proposed_alignments_json = NULL,
|
||||||
|
proposed_confidence = NULL,
|
||||||
|
proposed_reasoning = NULL,
|
||||||
|
proposed_at = NULL
|
||||||
|
WHERE id = ?""",
|
||||||
|
(source_id,),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{source_id}/classification/approve")
|
||||||
|
async def approve_classification(
|
||||||
|
source_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Uebernimmt proposed_* in echte Felder, setzt classification_source='llm_approved'."""
|
||||||
|
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||||
|
src = dict(row)
|
||||||
|
_require_admin_for_global(src, current_user)
|
||||||
|
|
||||||
|
if src.get("proposed_political_orientation") is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Keine LLM-Vorschlaege fuer diese Quelle vorhanden")
|
||||||
|
|
||||||
|
try:
|
||||||
|
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
proposed_aligns = []
|
||||||
|
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE sources SET
|
||||||
|
political_orientation = ?,
|
||||||
|
media_type = ?,
|
||||||
|
reliability = ?,
|
||||||
|
state_affiliated = ?,
|
||||||
|
country_code = ?,
|
||||||
|
classification_source = 'llm_approved',
|
||||||
|
classified_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = ?""",
|
||||||
|
(
|
||||||
|
src["proposed_political_orientation"],
|
||||||
|
src["proposed_media_type"],
|
||||||
|
src["proposed_reliability"],
|
||||||
|
1 if src.get("proposed_state_affiliated") else 0,
|
||||||
|
src.get("proposed_country_code"),
|
||||||
|
source_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await _replace_alignments(db, source_id, [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS])
|
||||||
|
await _clear_proposed(db, source_id)
|
||||||
|
await db.commit()
|
||||||
|
# Reliability-Override anwenden (IFCN/EUvsDisinfo)
|
||||||
|
try:
|
||||||
|
await apply_reputation_overrides(db, source_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Reputation-Override fuer source_id=%s fehlgeschlagen: %s", source_id, e)
|
||||||
|
return {"source_id": source_id, "status": "approved"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{source_id}/classification/reject")
|
||||||
|
async def reject_classification(
|
||||||
|
source_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Verwirft die LLM-Vorschlaege ohne Uebernahme. classification_source bleibt unveraendert."""
|
||||||
|
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||||
|
src = dict(row)
|
||||||
|
_require_admin_for_global(src, current_user)
|
||||||
|
|
||||||
|
await _clear_proposed(db, source_id)
|
||||||
|
# Wenn classification_source noch 'llm_pending' war, zurueck auf 'legacy'
|
||||||
|
if src.get("classification_source") == "llm_pending":
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE sources SET classification_source = 'legacy' WHERE id = ?",
|
||||||
|
(source_id,),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
return {"source_id": source_id, "status": "rejected"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{source_id}/classification/reclassify")
|
||||||
|
async def reclassify_source(
|
||||||
|
source_id: int,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Triggert eine LLM-Klassifikation einer einzelnen Quelle (synchron, ~3-5s)."""
|
||||||
|
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||||
|
src = dict(row)
|
||||||
|
_require_admin_for_global(src, current_user)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await classify_source(db, source_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Reclassify source_id=%s fehlgeschlagen: %s", source_id, e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=f"Klassifikation fehlgeschlagen: {e}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def _bulk_classify_background(limit: int, only_unclassified: bool):
|
||||||
|
"""Hintergrund-Task: oeffnet eigene DB-Connection."""
|
||||||
|
db = await get_db()
|
||||||
|
try:
|
||||||
|
await bulk_classify(db, limit=limit, only_unclassified=only_unclassified)
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/classification/bulk-classify")
|
||||||
|
async def trigger_bulk_classify(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
limit: int = 50,
|
||||||
|
only_unclassified: bool = True,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
):
|
||||||
|
"""Startet eine Bulk-Klassifikation im Hintergrund (nur Admins)."""
|
||||||
|
if current_user.get("role") != "org_admin":
|
||||||
|
raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Klassifikation starten")
|
||||||
|
if limit < 1 or limit > 500:
|
||||||
|
raise HTTPException(status_code=400, detail="limit muss zwischen 1 und 500 liegen")
|
||||||
|
background_tasks.add_task(_bulk_classify_background, limit, only_unclassified)
|
||||||
|
return {"status": "started", "limit": limit, "only_unclassified": only_unclassified}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/external-reputation/sync")
|
||||||
|
async def trigger_external_reputation_sync(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
):
|
||||||
|
"""Startet Sync von IFCN- und EUvsDisinfo-Daten (Admin, Hintergrund)."""
|
||||||
|
if current_user.get("role") != "org_admin":
|
||||||
|
raise HTTPException(status_code=403, detail="Nur Admins koennen den externen Sync starten")
|
||||||
|
|
||||||
|
async def _bg():
|
||||||
|
db = await get_db()
|
||||||
|
try:
|
||||||
|
await sync_external_reputation(db)
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
background_tasks.add_task(_bg)
|
||||||
|
return {"status": "started"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/classification/bulk-approve")
|
||||||
|
async def bulk_approve_classifications(
|
||||||
|
min_confidence: float = 0.85,
|
||||||
|
current_user: dict = Depends(get_current_user),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
):
|
||||||
|
"""Genehmigt alle Pending-Vorschlaege ueber dem confidence-Schwellwert (nur Admins).
|
||||||
|
|
||||||
|
Globale Quellen werden nur bearbeitet, wenn der Aufrufer org_admin ist;
|
||||||
|
Tenant-eigene Quellen sowieso.
|
||||||
|
"""
|
||||||
|
if current_user.get("role") != "org_admin":
|
||||||
|
raise HTTPException(status_code=403, detail="Nur Admins koennen Bulk-Approve nutzen")
|
||||||
|
tenant_id = current_user.get("tenant_id")
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT id, proposed_political_orientation, proposed_media_type,
|
||||||
|
proposed_reliability, proposed_state_affiliated,
|
||||||
|
proposed_country_code, proposed_alignments_json, tenant_id
|
||||||
|
FROM sources
|
||||||
|
WHERE proposed_political_orientation IS NOT NULL
|
||||||
|
AND COALESCE(proposed_confidence, 0) >= ?
|
||||||
|
AND (tenant_id IS NULL OR tenant_id = ?)""",
|
||||||
|
(min_confidence, tenant_id),
|
||||||
|
)
|
||||||
|
rows = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
approved_ids: list[int] = []
|
||||||
|
for src in rows:
|
||||||
|
try:
|
||||||
|
proposed_aligns = json.loads(src.get("proposed_alignments_json") or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
proposed_aligns = []
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE sources SET
|
||||||
|
political_orientation = ?,
|
||||||
|
media_type = ?,
|
||||||
|
reliability = ?,
|
||||||
|
state_affiliated = ?,
|
||||||
|
country_code = ?,
|
||||||
|
classification_source = 'llm_approved',
|
||||||
|
classified_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = ?""",
|
||||||
|
(
|
||||||
|
src["proposed_political_orientation"],
|
||||||
|
src["proposed_media_type"],
|
||||||
|
src["proposed_reliability"],
|
||||||
|
1 if src.get("proposed_state_affiliated") else 0,
|
||||||
|
src.get("proposed_country_code"),
|
||||||
|
src["id"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await _replace_alignments(
|
||||||
|
db, src["id"], [a for a in proposed_aligns if a in ALLOWED_ALIGNMENTS]
|
||||||
|
)
|
||||||
|
await _clear_proposed(db, src["id"])
|
||||||
|
approved_ids.append(src["id"])
|
||||||
|
await db.commit()
|
||||||
|
# Reliability-Override fuer alle gerade Approved
|
||||||
|
try:
|
||||||
|
for sid in approved_ids:
|
||||||
|
await apply_reputation_overrides(db, sid)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Bulk Reputation-Override fehlgeschlagen: %s", e)
|
||||||
|
return {"approved_count": len(approved_ids), "min_confidence": min_confidence}
|
||||||
|
|||||||
0
src/routes/__init__.py
Normale Datei
0
src/routes/__init__.py
Normale Datei
54
src/routes/version_router.py
Normale Datei
54
src/routes/version_router.py
Normale Datei
@@ -0,0 +1,54 @@
|
|||||||
|
"""Version + Release-Notes-Endpoints fuer das Frontend-Update-System."""
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||||
|
RELEASES_FILE = REPO_ROOT / 'RELEASES.json'
|
||||||
|
|
||||||
|
# Version-Hash beim Boot einmalig auslesen.
|
||||||
|
try:
|
||||||
|
COMMIT_HASH = subprocess.check_output(
|
||||||
|
['git', 'rev-parse', '--short=10', 'HEAD'],
|
||||||
|
cwd=str(REPO_ROOT), text=True, timeout=5
|
||||||
|
).strip()
|
||||||
|
except Exception:
|
||||||
|
COMMIT_HASH = 'unknown'
|
||||||
|
|
||||||
|
DEPLOYED_AT = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
router = APIRouter(tags=['version'])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get('/api/version')
|
||||||
|
def version():
|
||||||
|
return {'commit': COMMIT_HASH, 'deployed_at': DEPLOYED_AT}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get('/api/release-notes')
|
||||||
|
def release_notes(since: str = '', limit: int = 5):
|
||||||
|
"""Liefert Release-Notes seit der gegebenen Version.
|
||||||
|
|
||||||
|
'since' = letzte vom User gesehene Version. Liefert alle Eintraege NEUER
|
||||||
|
als diese Version. Ohne 'since' werden die letzten 'limit' Eintraege
|
||||||
|
geliefert.
|
||||||
|
"""
|
||||||
|
if not RELEASES_FILE.exists():
|
||||||
|
return {'entries': [], 'current': COMMIT_HASH}
|
||||||
|
try:
|
||||||
|
with open(RELEASES_FILE, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
return {'entries': [], 'error': f'parse-failed: {e}'}
|
||||||
|
|
||||||
|
if since:
|
||||||
|
result = []
|
||||||
|
for entry in data:
|
||||||
|
if entry.get('version') == since:
|
||||||
|
break
|
||||||
|
result.append(entry)
|
||||||
|
return {'entries': result[:limit], 'current': COMMIT_HASH}
|
||||||
|
|
||||||
|
return {'entries': data[:limit], 'current': COMMIT_HASH}
|
||||||
282
src/services/external_reputation.py
Normale Datei
282
src/services/external_reputation.py
Normale Datei
@@ -0,0 +1,282 @@
|
|||||||
|
"""Externe Reputations-Daten fuer Quellen.
|
||||||
|
|
||||||
|
Synchronisiert Domain-Listen von oeffentlichen Reputations-/Faktencheck-Datenbanken
|
||||||
|
und schreibt die Treffer in die sources-Spalten:
|
||||||
|
|
||||||
|
- IFCN-Signatories (anerkannte Faktenchecker) -> ifcn_signatory
|
||||||
|
- EUvsDisinfo (pro-Kreml-Desinformation, Zenodo-CSV) -> eu_disinfo_listed,
|
||||||
|
eu_disinfo_case_count, eu_disinfo_last_seen
|
||||||
|
|
||||||
|
Anschliessend wendet apply_reputation_overrides() Override-Regeln auf die
|
||||||
|
reliability-Spalte an:
|
||||||
|
- ifcn_signatory=1 -> reliability='sehr_hoch'
|
||||||
|
- eu_disinfo_case_count >= 5 -> reliability='sehr_niedrig'
|
||||||
|
- eu_disinfo_case_count >= 1 -> reliability eine Stufe runter (max bis 'niedrig')
|
||||||
|
"""
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
from collections import defaultdict
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiosqlite
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger("osint.external_reputation")
|
||||||
|
|
||||||
|
IFCN_LIST_URL = "https://raw.githubusercontent.com/IFCN/verified-signatories/main/list"
|
||||||
|
EU_DISINFO_CSV_URL = "https://zenodo.org/records/10514307/files/euvsdisinfo_base.csv?download=1"
|
||||||
|
|
||||||
|
HTTP_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
|
||||||
|
|
||||||
|
# Generische Plattform-Domains, die NICHT als Quelle markiert werden duerfen
|
||||||
|
# (EUvsDisinfo aggregiert anonyme Telegram-/Twitter-Posts unter Plattform-Domains).
|
||||||
|
PLATFORM_DOMAINS = {
|
||||||
|
"t.me", "telegram.me", "telegram.org",
|
||||||
|
"twitter.com", "x.com", "mobile.twitter.com",
|
||||||
|
"youtube.com", "youtu.be", "m.youtube.com",
|
||||||
|
"facebook.com", "fb.com", "m.facebook.com",
|
||||||
|
"instagram.com", "tiktok.com", "vk.com", "ok.ru",
|
||||||
|
"rumble.com", "bitchute.com", "odysee.com",
|
||||||
|
"reddit.com", "old.reddit.com",
|
||||||
|
"wordpress.com", "blogspot.com", "medium.com",
|
||||||
|
"substack.com", "wixsite.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Reliability-Skala in Stufenfolge (schlecht -> gut)
|
||||||
|
RELIABILITY_ORDER = ["sehr_niedrig", "niedrig", "gemischt", "hoch", "sehr_hoch"]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_domain(raw: str | None) -> str | None:
|
||||||
|
"""Normalisiert eine Domain: lowercase, ohne www., ohne Schema/Pfad."""
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
raw = raw.strip().lower()
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
# Falls eine vollstaendige URL uebergeben wurde
|
||||||
|
if "://" in raw:
|
||||||
|
try:
|
||||||
|
raw = urlparse(raw).netloc or raw
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
# Pfad/Query strippen
|
||||||
|
raw = raw.split("/")[0].split("?")[0].split("#")[0]
|
||||||
|
if raw.startswith("www."):
|
||||||
|
raw = raw[4:]
|
||||||
|
return raw or None
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_text(url: str) -> str:
|
||||||
|
"""Laedt Text von einer URL. Wirft HTTPException bei Fehler."""
|
||||||
|
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT, follow_redirects=True) as client:
|
||||||
|
resp = await client.get(url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.text
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_ifcn_signatories(db: aiosqlite.Connection) -> dict:
|
||||||
|
"""Laedt IFCN-Domain-Liste und matcht gegen sources.domain.
|
||||||
|
|
||||||
|
Setzt ifcn_signatory=1 wo die Domain in der Liste vorkommt, sonst 0.
|
||||||
|
"""
|
||||||
|
text = await _fetch_text(IFCN_LIST_URL)
|
||||||
|
domains: set[str] = set()
|
||||||
|
for line in text.splitlines():
|
||||||
|
d = _normalize_domain(line)
|
||||||
|
if d:
|
||||||
|
domains.add(d)
|
||||||
|
logger.info("IFCN-Liste geladen: %d Domains", len(domains))
|
||||||
|
|
||||||
|
# Aktuelle Quellen mit Domain laden
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
|
||||||
|
)
|
||||||
|
sources = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
|
||||||
|
matched_ids: list[int] = []
|
||||||
|
unmatched_ids: list[int] = []
|
||||||
|
for s in sources:
|
||||||
|
nd = _normalize_domain(s["domain"])
|
||||||
|
if nd and nd not in PLATFORM_DOMAINS and nd in domains:
|
||||||
|
matched_ids.append(s["id"])
|
||||||
|
else:
|
||||||
|
unmatched_ids.append(s["id"])
|
||||||
|
|
||||||
|
# Bulk-Update in zwei Statements
|
||||||
|
if matched_ids:
|
||||||
|
placeholders = ",".join("?" for _ in matched_ids)
|
||||||
|
await db.execute(
|
||||||
|
f"UPDATE sources SET ifcn_signatory = 1 WHERE id IN ({placeholders})",
|
||||||
|
matched_ids,
|
||||||
|
)
|
||||||
|
if unmatched_ids:
|
||||||
|
placeholders = ",".join("?" for _ in unmatched_ids)
|
||||||
|
await db.execute(
|
||||||
|
f"UPDATE sources SET ifcn_signatory = 0 WHERE id IN ({placeholders})",
|
||||||
|
unmatched_ids,
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
logger.info("IFCN-Sync: %d Quellen als Faktenchecker markiert (von %d)",
|
||||||
|
len(matched_ids), len(sources))
|
||||||
|
return {
|
||||||
|
"list_size": len(domains),
|
||||||
|
"sources_checked": len(sources),
|
||||||
|
"matched": len(matched_ids),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_eu_disinfo(db: aiosqlite.Connection) -> dict:
|
||||||
|
"""Laedt EUvsDisinfo-CSV von Zenodo, aggregiert pro Domain, schreibt sources.
|
||||||
|
|
||||||
|
- eu_disinfo_listed: 1 wenn Domain mindestens 1x als 'disinformation' debunkt
|
||||||
|
- eu_disinfo_case_count: Anzahl Disinformation-Faelle
|
||||||
|
- eu_disinfo_last_seen: spaetestes debunk_date
|
||||||
|
"""
|
||||||
|
text = await _fetch_text(EU_DISINFO_CSV_URL)
|
||||||
|
reader = csv.DictReader(io.StringIO(text))
|
||||||
|
|
||||||
|
# Per-Domain aggregieren (nur class='disinformation')
|
||||||
|
counts: dict[str, int] = defaultdict(int)
|
||||||
|
last_seen: dict[str, str] = {}
|
||||||
|
total_rows = 0
|
||||||
|
for row in reader:
|
||||||
|
total_rows += 1
|
||||||
|
if (row.get("class") or "").strip().lower() != "disinformation":
|
||||||
|
continue
|
||||||
|
d = _normalize_domain(row.get("article_domain"))
|
||||||
|
if not d:
|
||||||
|
continue
|
||||||
|
counts[d] += 1
|
||||||
|
debunk_date = (row.get("debunk_date") or "").strip()
|
||||||
|
if debunk_date:
|
||||||
|
prev = last_seen.get(d)
|
||||||
|
if not prev or debunk_date > prev:
|
||||||
|
last_seen[d] = debunk_date
|
||||||
|
logger.info("EUvsDisinfo-CSV: %d Zeilen, %d Domains mit Desinformation",
|
||||||
|
total_rows, len(counts))
|
||||||
|
|
||||||
|
# Quellen laden + matchen
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, domain FROM sources WHERE domain IS NOT NULL AND domain != ''"
|
||||||
|
)
|
||||||
|
sources = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
|
||||||
|
matched = 0
|
||||||
|
for s in sources:
|
||||||
|
nd = _normalize_domain(s["domain"])
|
||||||
|
if nd and nd not in PLATFORM_DOMAINS and nd in counts:
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE sources SET
|
||||||
|
eu_disinfo_listed = 1,
|
||||||
|
eu_disinfo_case_count = ?,
|
||||||
|
eu_disinfo_last_seen = ?
|
||||||
|
WHERE id = ?""",
|
||||||
|
(counts[nd], last_seen.get(nd), s["id"]),
|
||||||
|
)
|
||||||
|
matched += 1
|
||||||
|
else:
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE sources SET
|
||||||
|
eu_disinfo_listed = 0,
|
||||||
|
eu_disinfo_case_count = 0,
|
||||||
|
eu_disinfo_last_seen = NULL
|
||||||
|
WHERE id = ?""",
|
||||||
|
(s["id"],),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
logger.info("EUvsDisinfo-Sync: %d Quellen als Desinformations-Quelle markiert (von %d)",
|
||||||
|
matched, len(sources))
|
||||||
|
return {
|
||||||
|
"rows_in_csv": total_rows,
|
||||||
|
"domains_with_disinfo_in_csv": len(counts),
|
||||||
|
"sources_checked": len(sources),
|
||||||
|
"matched": matched,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _override_reliability(current: str | None, ifcn: bool, eu_count: int) -> str | None:
|
||||||
|
"""Wendet Override-Regeln auf eine reliability-Stufe an.
|
||||||
|
|
||||||
|
Rueckgabe: neue Stufe (oder None, wenn unveraendert).
|
||||||
|
"""
|
||||||
|
cur = current or "na"
|
||||||
|
|
||||||
|
# IFCN gewinnt: zertifizierter Faktenchecker -> sehr_hoch (immer)
|
||||||
|
if ifcn:
|
||||||
|
return "sehr_hoch" if cur != "sehr_hoch" else None
|
||||||
|
|
||||||
|
# EUvsDisinfo: Downgrade
|
||||||
|
if eu_count >= 5:
|
||||||
|
return "sehr_niedrig" if cur != "sehr_niedrig" else None
|
||||||
|
if eu_count >= 1:
|
||||||
|
# Eine Stufe runter, mindestens bis 'niedrig'
|
||||||
|
if cur == "na":
|
||||||
|
return "niedrig"
|
||||||
|
if cur in RELIABILITY_ORDER:
|
||||||
|
idx = RELIABILITY_ORDER.index(cur)
|
||||||
|
new_idx = max(0, idx - 1)
|
||||||
|
new = RELIABILITY_ORDER[new_idx]
|
||||||
|
# Mindeststufe 'niedrig' bei eu_count >= 1
|
||||||
|
if RELIABILITY_ORDER.index(new) > RELIABILITY_ORDER.index("niedrig"):
|
||||||
|
new = "niedrig"
|
||||||
|
return new if new != cur else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def apply_reputation_overrides(db: aiosqlite.Connection, source_id: int | None = None) -> dict:
|
||||||
|
"""Wendet Reliability-Override-Regeln an.
|
||||||
|
|
||||||
|
Wenn source_id angegeben ist, nur fuer diese Quelle. Sonst fuer alle Quellen.
|
||||||
|
"""
|
||||||
|
if source_id is not None:
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count "
|
||||||
|
"FROM sources WHERE id = ?",
|
||||||
|
(source_id,),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, reliability, ifcn_signatory, eu_disinfo_case_count FROM sources"
|
||||||
|
)
|
||||||
|
sources = [dict(r) for r in await cursor.fetchall()]
|
||||||
|
|
||||||
|
changed = 0
|
||||||
|
for s in sources:
|
||||||
|
new = _override_reliability(
|
||||||
|
s.get("reliability"),
|
||||||
|
bool(s.get("ifcn_signatory")),
|
||||||
|
int(s.get("eu_disinfo_case_count") or 0),
|
||||||
|
)
|
||||||
|
if new is not None:
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE sources SET reliability = ? WHERE id = ?",
|
||||||
|
(new, s["id"]),
|
||||||
|
)
|
||||||
|
changed += 1
|
||||||
|
await db.commit()
|
||||||
|
logger.info("Reliability-Override: %d Quellen angepasst (von %d gepruefte)",
|
||||||
|
changed, len(sources))
|
||||||
|
return {"checked": len(sources), "changed": changed}
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_all(db: aiosqlite.Connection) -> dict:
|
||||||
|
"""Vollstaendiger Sync: IFCN + EUvsDisinfo + Reliability-Override.
|
||||||
|
|
||||||
|
Setzt external_data_synced_at fuer alle Quellen.
|
||||||
|
"""
|
||||||
|
ifcn_result = await sync_ifcn_signatories(db)
|
||||||
|
eu_result = await sync_eu_disinfo(db)
|
||||||
|
override_result = await apply_reputation_overrides(db)
|
||||||
|
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE sources SET external_data_synced_at = CURRENT_TIMESTAMP "
|
||||||
|
"WHERE domain IS NOT NULL AND domain != ''"
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ifcn": ifcn_result,
|
||||||
|
"eu_disinfo": eu_result,
|
||||||
|
"override": override_result,
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Lizenz-Verwaltung und -Pruefung."""
|
"""Lizenz-Verwaltung und -Pruefung."""
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from config import TIMEZONE
|
from config import TIMEZONE
|
||||||
import aiosqlite
|
import aiosqlite
|
||||||
@@ -7,11 +8,21 @@ import aiosqlite
|
|||||||
logger = logging.getLogger("osint.license")
|
logger = logging.getLogger("osint.license")
|
||||||
|
|
||||||
|
|
||||||
|
def _staging_mode() -> bool:
|
||||||
|
"""Staging-Mode aktiv? Wenn ja, gilt: immer unlimited Budget, kein Hard-Stop.
|
||||||
|
|
||||||
|
Wird ueber ENV-Variable STAGING_MODE=1 (oder true) aktiviert.
|
||||||
|
Nur in Staging-.env gesetzt; Live-.env hat das Flag nicht.
|
||||||
|
"""
|
||||||
|
return os.environ.get("STAGING_MODE", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
|
|
||||||
async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
||||||
"""Prueft den Lizenzstatus einer Organisation.
|
"""Prueft den Lizenzstatus einer Organisation.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict mit: valid, status, license_type, max_users, current_users, read_only, message
|
dict mit: valid, status, license_type, max_users, current_users, read_only,
|
||||||
|
read_only_reason, message, unlimited_budget, credits_total, credits_used
|
||||||
"""
|
"""
|
||||||
# Organisation pruefen
|
# Organisation pruefen
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
@@ -20,10 +31,14 @@ async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
|||||||
)
|
)
|
||||||
org = await cursor.fetchone()
|
org = await cursor.fetchone()
|
||||||
if not org:
|
if not org:
|
||||||
return {"valid": False, "status": "not_found", "read_only": True, "message": "Organisation nicht gefunden"}
|
return {"valid": False, "status": "not_found", "read_only": True,
|
||||||
|
"read_only_reason": "not_found",
|
||||||
|
"message": "Organisation nicht gefunden"}
|
||||||
|
|
||||||
if not org["is_active"]:
|
if not org["is_active"]:
|
||||||
return {"valid": False, "status": "org_disabled", "read_only": True, "message": "Organisation deaktiviert"}
|
return {"valid": False, "status": "org_disabled", "read_only": True,
|
||||||
|
"read_only_reason": "org_disabled",
|
||||||
|
"message": "Organisation deaktiviert"}
|
||||||
|
|
||||||
# Aktive Lizenz suchen
|
# Aktive Lizenz suchen
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
@@ -35,7 +50,19 @@ async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
|||||||
license_row = await cursor.fetchone()
|
license_row = await cursor.fetchone()
|
||||||
|
|
||||||
if not license_row:
|
if not license_row:
|
||||||
return {"valid": False, "status": "no_license", "read_only": True, "message": "Keine aktive Lizenz"}
|
return {"valid": False, "status": "no_license", "read_only": True,
|
||||||
|
"read_only_reason": "no_license",
|
||||||
|
"message": "Keine aktive Lizenz"}
|
||||||
|
|
||||||
|
# Felder zur weiteren Verwendung extrahieren
|
||||||
|
lic_dict = dict(license_row)
|
||||||
|
unlimited_budget = bool(lic_dict.get("unlimited_budget"))
|
||||||
|
credits_total = lic_dict.get("credits_total")
|
||||||
|
credits_used = lic_dict.get("credits_used") or 0
|
||||||
|
|
||||||
|
# STAGING_MODE: kein Token-Budget-Hard-Stop, immer unlimited
|
||||||
|
if _staging_mode():
|
||||||
|
unlimited_budget = True
|
||||||
|
|
||||||
# Ablauf pruefen
|
# Ablauf pruefen
|
||||||
now = datetime.now(TIMEZONE)
|
now = datetime.now(TIMEZONE)
|
||||||
@@ -52,11 +79,21 @@ async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
|||||||
"status": "expired",
|
"status": "expired",
|
||||||
"license_type": license_row["license_type"],
|
"license_type": license_row["license_type"],
|
||||||
"read_only": True,
|
"read_only": True,
|
||||||
|
"read_only_reason": "expired",
|
||||||
"message": "Lizenz abgelaufen",
|
"message": "Lizenz abgelaufen",
|
||||||
|
"unlimited_budget": unlimited_budget,
|
||||||
|
"credits_total": credits_total,
|
||||||
|
"credits_used": credits_used,
|
||||||
}
|
}
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Budget-Check (Hard-Stop bei aufgebrauchten Credits, ausser unlimited)
|
||||||
|
budget_exceeded = False
|
||||||
|
if not unlimited_budget and credits_total and credits_total > 0:
|
||||||
|
if credits_used >= credits_total:
|
||||||
|
budget_exceeded = True
|
||||||
|
|
||||||
# Nutzerzahl pruefen
|
# Nutzerzahl pruefen
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT COUNT(*) as cnt FROM users WHERE organization_id = ? AND is_active = 1",
|
"SELECT COUNT(*) as cnt FROM users WHERE organization_id = ? AND is_active = 1",
|
||||||
@@ -64,6 +101,21 @@ async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
|||||||
)
|
)
|
||||||
current_users = (await cursor.fetchone())["cnt"]
|
current_users = (await cursor.fetchone())["cnt"]
|
||||||
|
|
||||||
|
if budget_exceeded:
|
||||||
|
return {
|
||||||
|
"valid": True, # Lizenz ist gueltig, aber Budget aufgebraucht -> read-only
|
||||||
|
"status": "budget_exceeded",
|
||||||
|
"license_type": license_row["license_type"],
|
||||||
|
"max_users": license_row["max_users"],
|
||||||
|
"current_users": current_users,
|
||||||
|
"read_only": True,
|
||||||
|
"read_only_reason": "budget_exceeded",
|
||||||
|
"message": "Token-Budget aufgebraucht",
|
||||||
|
"unlimited_budget": False,
|
||||||
|
"credits_total": credits_total,
|
||||||
|
"credits_used": credits_used,
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"valid": True,
|
"valid": True,
|
||||||
"status": license_row["status"],
|
"status": license_row["status"],
|
||||||
@@ -71,7 +123,11 @@ async def check_license(db: aiosqlite.Connection, organization_id: int) -> dict:
|
|||||||
"max_users": license_row["max_users"],
|
"max_users": license_row["max_users"],
|
||||||
"current_users": current_users,
|
"current_users": current_users,
|
||||||
"read_only": False,
|
"read_only": False,
|
||||||
|
"read_only_reason": None,
|
||||||
"message": "Lizenz aktiv",
|
"message": "Lizenz aktiv",
|
||||||
|
"unlimited_budget": unlimited_budget,
|
||||||
|
"credits_total": credits_total,
|
||||||
|
"credits_used": credits_used,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -91,6 +147,92 @@ async def can_add_user(db: aiosqlite.Connection, organization_id: int) -> tuple[
|
|||||||
return True, ""
|
return True, ""
|
||||||
|
|
||||||
|
|
||||||
|
async def charge_usage_to_tenant(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
tenant_id: int | None,
|
||||||
|
usage,
|
||||||
|
source: str,
|
||||||
|
) -> None:
|
||||||
|
"""Verbucht Token-Verbrauch auf einen Tenant.
|
||||||
|
|
||||||
|
Aktualisiert `token_usage_monthly` (UPSERT pro organization_id+year_month+source)
|
||||||
|
und zieht Credits von der aktiven Lizenz ab (wenn cost_per_credit gesetzt).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db: offene aiosqlite.Connection
|
||||||
|
tenant_id: Organisations-ID oder None (dann nur geloggt, keine DB-Buchung)
|
||||||
|
usage: ClaudeUsage oder UsageAccumulator mit input_tokens/output_tokens/
|
||||||
|
cache_creation_tokens/cache_read_tokens/total_cost_usd/call_count
|
||||||
|
source: 'monitor' | 'enhance' | 'chat'
|
||||||
|
|
||||||
|
Der Helper ruft KEIN db.commit() auf — die Transaktionsgrenzen bestimmt der Caller.
|
||||||
|
Ohne Verbrauch (total_cost_usd == 0) oder ohne tenant_id wird nichts gebucht.
|
||||||
|
"""
|
||||||
|
total_cost = getattr(usage, "total_cost_usd", None)
|
||||||
|
if total_cost is None:
|
||||||
|
total_cost = getattr(usage, "cost_usd", 0.0)
|
||||||
|
|
||||||
|
if not tenant_id:
|
||||||
|
logger.info(
|
||||||
|
f"charge_usage_to_tenant[{source}]: kein tenant_id, uebersprungen "
|
||||||
|
f"(cost=${total_cost:.4f})"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if total_cost <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
input_tokens = getattr(usage, "input_tokens", 0)
|
||||||
|
output_tokens = getattr(usage, "output_tokens", 0)
|
||||||
|
cache_creation = getattr(usage, "cache_creation_tokens", 0)
|
||||||
|
cache_read = getattr(usage, "cache_read_tokens", 0)
|
||||||
|
api_calls = getattr(usage, "call_count", 1)
|
||||||
|
refresh_increment = 1 if source == "monitor" else 0
|
||||||
|
|
||||||
|
year_month = datetime.now(TIMEZONE).strftime("%Y-%m")
|
||||||
|
|
||||||
|
await db.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO token_usage_monthly
|
||||||
|
(organization_id, year_month, source, input_tokens, output_tokens,
|
||||||
|
cache_creation_tokens, cache_read_tokens, total_cost_usd, api_calls, refresh_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(organization_id, year_month, source) DO UPDATE SET
|
||||||
|
input_tokens = input_tokens + excluded.input_tokens,
|
||||||
|
output_tokens = output_tokens + excluded.output_tokens,
|
||||||
|
cache_creation_tokens = cache_creation_tokens + excluded.cache_creation_tokens,
|
||||||
|
cache_read_tokens = cache_read_tokens + excluded.cache_read_tokens,
|
||||||
|
total_cost_usd = total_cost_usd + excluded.total_cost_usd,
|
||||||
|
api_calls = api_calls + excluded.api_calls,
|
||||||
|
refresh_count = refresh_count + excluded.refresh_count,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
tenant_id, year_month, source,
|
||||||
|
input_tokens, output_tokens, cache_creation, cache_read,
|
||||||
|
round(total_cost, 7), api_calls, refresh_increment,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
lic_cursor = await db.execute(
|
||||||
|
"SELECT cost_per_credit FROM licenses WHERE organization_id = ? AND status = 'active' ORDER BY id DESC LIMIT 1",
|
||||||
|
(tenant_id,),
|
||||||
|
)
|
||||||
|
lic = await lic_cursor.fetchone()
|
||||||
|
credits_consumed = 0.0
|
||||||
|
if lic and lic["cost_per_credit"] and lic["cost_per_credit"] > 0:
|
||||||
|
credits_consumed = total_cost / lic["cost_per_credit"]
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE licenses SET credits_used = COALESCE(credits_used, 0) + ? WHERE organization_id = ? AND status = 'active'",
|
||||||
|
(round(credits_consumed, 2), tenant_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"charge_usage_to_tenant[{source}] Tenant {tenant_id}: "
|
||||||
|
f"${total_cost:.4f} -> {round(credits_consumed, 2)} Credits"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def expire_licenses(db: aiosqlite.Connection):
|
async def expire_licenses(db: aiosqlite.Connection):
|
||||||
"""Setzt abgelaufene Lizenzen auf 'expired'. Taeglich aufrufen."""
|
"""Setzt abgelaufene Lizenzen auf 'expired'. Taeglich aufrufen."""
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
|
|||||||
252
src/services/pipeline_tracker.py
Normale Datei
252
src/services/pipeline_tracker.py
Normale Datei
@@ -0,0 +1,252 @@
|
|||||||
|
"""Analysepipeline-Tracking: persistiert Pipeline-Schritte pro Refresh und sendet
|
||||||
|
Live-Status an die Frontend-Visualisierung.
|
||||||
|
|
||||||
|
Die Pipeline hat 9 Schritte und ist eine bewusst vereinfachte Außensicht der
|
||||||
|
internen Refresh-Pipeline (siehe orchestrator.py). Sie verschweigt Internas
|
||||||
|
(Modellnamen, Tools, Phasen, Multi-Pass-Labels) und beschreibt jeden Schritt in
|
||||||
|
verständlicher Sprache.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from config import TIMEZONE
|
||||||
|
|
||||||
|
logger = logging.getLogger("osint.pipeline")
|
||||||
|
|
||||||
|
|
||||||
|
# Single Source of Truth für die Pipeline-Definition.
|
||||||
|
# Reihenfolge bestimmt die Anzeige im Frontend.
|
||||||
|
PIPELINE_STEPS = [
|
||||||
|
{
|
||||||
|
"key": "sources_review",
|
||||||
|
"label": "Quellen sichten",
|
||||||
|
"icon": "search",
|
||||||
|
"tooltip": "Wir prüfen alle deine Nachrichtenquellen, ob sie aktuell erreichbar sind und was sie zu deiner Lage melden.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "collect",
|
||||||
|
"label": "Nachrichten sammeln",
|
||||||
|
"icon": "rss",
|
||||||
|
"tooltip": "Aus den passenden Quellen werden alle relevanten Meldungen eingesammelt - aus deinen RSS-Feeds, dem Web und optional Telegram-Kanälen.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "dedup",
|
||||||
|
"label": "Doppeltes filtern",
|
||||||
|
"icon": "copy-x",
|
||||||
|
"tooltip": "Mehrfach gemeldete Nachrichten werden zusammengefasst, damit nichts doppelt im Lagebild auftaucht.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "relevance",
|
||||||
|
"label": "Relevanz bewerten",
|
||||||
|
"icon": "scale",
|
||||||
|
"tooltip": "Jede Meldung wird darauf geprüft, ob sie wirklich zu deiner Lage passt. Themenfremdes wird aussortiert.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "geoparsing",
|
||||||
|
"label": "Orte erkennen",
|
||||||
|
"icon": "map-pin",
|
||||||
|
"tooltip": "Aus den Meldungen werden Ortsangaben erkannt und auf der Karte verortet.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "factcheck",
|
||||||
|
"label": "Fakten prüfen",
|
||||||
|
"icon": "shield",
|
||||||
|
"tooltip": "Behauptungen aus den Meldungen werden gegeneinander abgeglichen: Bestätigt? Umstritten? Noch unklar?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "summary",
|
||||||
|
"label": "Lagebild verfassen",
|
||||||
|
"icon": "file-text",
|
||||||
|
"tooltip": "Aus allen geprüften Meldungen wird ein zusammenhängendes Lagebild geschrieben, mit Quellenangaben am Text.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "qc",
|
||||||
|
"label": "Qualitätscheck",
|
||||||
|
"icon": "check-circle",
|
||||||
|
"tooltip": "Eine letzte Kontrollprüfung am Ergebnis: Doppelte Fakten zusammenführen, Karten-Verortung prüfen, bevor du benachrichtigt wirst.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "notify",
|
||||||
|
"label": "Benachrichtigen",
|
||||||
|
"icon": "bell",
|
||||||
|
"tooltip": "Wenn etwas Wichtiges dabei war, gehen Benachrichtigungen raus, im Glockensymbol oben rechts und optional per E-Mail.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
VALID_KEYS = {s["key"] for s in PIPELINE_STEPS}
|
||||||
|
|
||||||
|
|
||||||
|
def _now_db() -> str:
|
||||||
|
"""Aktuelle Zeit im DB-Format (lokal)."""
|
||||||
|
return datetime.now(TIMEZONE).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
|
||||||
|
async def _broadcast(ws_manager, incident_id: int, payload: dict,
|
||||||
|
visibility: str, created_by: Optional[int], tenant_id: Optional[int]):
|
||||||
|
"""Sendet ein pipeline_step-Event an verbundene Clients der Lage."""
|
||||||
|
if not ws_manager:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await ws_manager.broadcast_for_incident(
|
||||||
|
{"type": "pipeline_step", "incident_id": incident_id, "data": payload},
|
||||||
|
visibility, created_by, tenant_id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pipeline-WS-Broadcast fehlgeschlagen: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def start_step(db, ws_manager, *, refresh_log_id: int, incident_id: int,
|
||||||
|
step_key: str, pass_number: int = 1, tenant_id: Optional[int] = None,
|
||||||
|
visibility: str = "public", created_by: Optional[int] = None) -> Optional[int]:
|
||||||
|
"""Markiert einen Pipeline-Schritt als aktiv.
|
||||||
|
|
||||||
|
Returns die DB-ID der Step-Zeile (für späteres Update via complete_step), oder None bei Fehler.
|
||||||
|
"""
|
||||||
|
if step_key not in VALID_KEYS:
|
||||||
|
logger.warning(f"Unbekannter Pipeline-Schritt: {step_key}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""INSERT INTO refresh_pipeline_steps
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, started_at, status, tenant_id)
|
||||||
|
VALUES (?, ?, ?, ?, ?, 'active', ?)""",
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, _now_db(), tenant_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
step_id = cursor.lastrowid
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pipeline start_step({step_key}) DB-Fehler: {e}")
|
||||||
|
step_id = None
|
||||||
|
|
||||||
|
await _broadcast(ws_manager, incident_id, {
|
||||||
|
"step_key": step_key,
|
||||||
|
"status": "active",
|
||||||
|
"pass_number": pass_number,
|
||||||
|
}, visibility, created_by, tenant_id)
|
||||||
|
|
||||||
|
return step_id
|
||||||
|
|
||||||
|
|
||||||
|
async def complete_step(db, ws_manager, *, step_id: Optional[int], refresh_log_id: int,
|
||||||
|
incident_id: int, step_key: str, pass_number: int = 1,
|
||||||
|
count_value: Optional[int] = None, count_secondary: Optional[int] = None,
|
||||||
|
tenant_id: Optional[int] = None, visibility: str = "public",
|
||||||
|
created_by: Optional[int] = None):
|
||||||
|
"""Markiert einen Pipeline-Schritt als abgeschlossen, mit Zahlen."""
|
||||||
|
if step_key not in VALID_KEYS:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
if step_id:
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE refresh_pipeline_steps
|
||||||
|
SET status = 'done', completed_at = ?, count_value = ?, count_secondary = ?
|
||||||
|
WHERE id = ?""",
|
||||||
|
(_now_db(), count_value, count_secondary, step_id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback wenn start_step keine ID lieferte
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO refresh_pipeline_steps
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, started_at, completed_at,
|
||||||
|
status, count_value, count_secondary, tenant_id)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, 'done', ?, ?, ?)""",
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, _now_db(), _now_db(),
|
||||||
|
count_value, count_secondary, tenant_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pipeline complete_step({step_key}) DB-Fehler: {e}")
|
||||||
|
|
||||||
|
await _broadcast(ws_manager, incident_id, {
|
||||||
|
"step_key": step_key,
|
||||||
|
"status": "done",
|
||||||
|
"pass_number": pass_number,
|
||||||
|
"count_value": count_value,
|
||||||
|
"count_secondary": count_secondary,
|
||||||
|
}, visibility, created_by, tenant_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def skip_step(db, ws_manager, *, refresh_log_id: int, incident_id: int,
|
||||||
|
step_key: str, pass_number: int = 1, tenant_id: Optional[int] = None,
|
||||||
|
visibility: str = "public", created_by: Optional[int] = None):
|
||||||
|
"""Markiert einen Schritt als übersprungen (z.B. Geoparsing ohne neue Artikel)."""
|
||||||
|
if step_key not in VALID_KEYS:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO refresh_pipeline_steps
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, started_at, completed_at,
|
||||||
|
status, tenant_id)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, 'skipped', ?)""",
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, _now_db(), _now_db(), tenant_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pipeline skip_step({step_key}) DB-Fehler: {e}")
|
||||||
|
|
||||||
|
await _broadcast(ws_manager, incident_id, {
|
||||||
|
"step_key": step_key,
|
||||||
|
"status": "skipped",
|
||||||
|
"pass_number": pass_number,
|
||||||
|
}, visibility, created_by, tenant_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def error_step(db, ws_manager, *, step_id: Optional[int], refresh_log_id: int,
|
||||||
|
incident_id: int, step_key: str, pass_number: int = 1,
|
||||||
|
tenant_id: Optional[int] = None, visibility: str = "public",
|
||||||
|
created_by: Optional[int] = None):
|
||||||
|
"""Markiert einen Schritt als fehlgeschlagen."""
|
||||||
|
if step_key not in VALID_KEYS:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if step_id:
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE refresh_pipeline_steps
|
||||||
|
SET status = 'error', completed_at = ?
|
||||||
|
WHERE id = ?""",
|
||||||
|
(_now_db(), step_id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO refresh_pipeline_steps
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, started_at, completed_at,
|
||||||
|
status, tenant_id)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, 'error', ?)""",
|
||||||
|
(refresh_log_id, incident_id, step_key, pass_number, _now_db(), _now_db(), tenant_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pipeline error_step({step_key}) DB-Fehler: {e}")
|
||||||
|
|
||||||
|
await _broadcast(ws_manager, incident_id, {
|
||||||
|
"step_key": step_key,
|
||||||
|
"status": "error",
|
||||||
|
"pass_number": pass_number,
|
||||||
|
}, visibility, created_by, tenant_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def cancel_active_steps(db, *, refresh_log_id: int) -> int:
|
||||||
|
"""Schliesst alle noch aktiven Pipeline-Schritte eines Refreshs als 'cancelled' ab.
|
||||||
|
|
||||||
|
Wird vom Orchestrator nach einem User-Cancel aufgerufen. Ohne diesen Schritt
|
||||||
|
bleibt der zuletzt aktive Step-Eintrag verwaist und der Pipeline-Endpoint
|
||||||
|
liefert dauerhaft 'Schritt X laeuft' an die UI.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cur = await db.execute(
|
||||||
|
"""UPDATE refresh_pipeline_steps
|
||||||
|
SET status = 'cancelled', completed_at = ?
|
||||||
|
WHERE refresh_log_id = ? AND status = 'active'""",
|
||||||
|
(_now_db(), refresh_log_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
return cur.rowcount or 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pipeline cancel_active_steps DB-Fehler: {e}")
|
||||||
|
return 0
|
||||||
|
|
||||||
@@ -400,18 +400,20 @@ async def run_post_refresh_qc(db, incident_id: int) -> dict:
|
|||||||
db, incident_id, incident_title, incident_desc
|
db, incident_id, incident_title, incident_desc
|
||||||
)
|
)
|
||||||
umlauts_fixed = await normalize_umlaut_fields(db, incident_id)
|
umlauts_fixed = await normalize_umlaut_fields(db, incident_id)
|
||||||
|
article_umlauts_fixed = await normalize_umlaut_articles(db, incident_id)
|
||||||
|
|
||||||
if facts_removed > 0 or locations_fixed > 0 or umlauts_fixed > 0:
|
total_umlaut_changes = umlauts_fixed + article_umlauts_fixed
|
||||||
|
if facts_removed > 0 or locations_fixed > 0 or total_umlaut_changes > 0:
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
"Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert, %d Umlaute normalisiert",
|
"Post-Refresh QC fuer Incident %d: %d Duplikate entfernt, %d Locations korrigiert, %d Umlaute normalisiert (davon %d in Articles)",
|
||||||
incident_id, facts_removed, locations_fixed, umlauts_fixed,
|
incident_id, facts_removed, locations_fixed, total_umlaut_changes, article_umlauts_fixed,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"facts_removed": facts_removed,
|
"facts_removed": facts_removed,
|
||||||
"locations_fixed": locations_fixed,
|
"locations_fixed": locations_fixed,
|
||||||
"umlauts_fixed": umlauts_fixed,
|
"umlauts_fixed": total_umlaut_changes,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -568,3 +570,64 @@ async def normalize_umlaut_fields(db, incident_id: int) -> int:
|
|||||||
incident_id, count_summary, count_dev,
|
incident_id, count_summary, count_dev,
|
||||||
)
|
)
|
||||||
return total
|
return total
|
||||||
|
|
||||||
|
|
||||||
|
async def normalize_umlaut_articles(db, incident_id: int) -> int:
|
||||||
|
"""Normalisiert Umlaute in allen Artikel-Texten des Incidents.
|
||||||
|
|
||||||
|
Felder die behandelt werden:
|
||||||
|
- headline_de und content_de bei allen Artikeln (LLM-Uebersetzung kann
|
||||||
|
ASCII-Umlaute liefern trotz Prompt-Anweisung)
|
||||||
|
- headline und content_original bei language='de' (manche Quellen wie
|
||||||
|
dpa-AFX, Telegram-Kanaele liefern selbst schon ASCII-Umlaute)
|
||||||
|
|
||||||
|
Idempotent: Wenn der Text schon korrekt ist, macht das Dict-Lookup
|
||||||
|
keine Aenderung und wir schreiben nicht zurueck.
|
||||||
|
|
||||||
|
Rueckgabe: Gesamtzahl der Wort-Ersetzungen ueber alle Artikel.
|
||||||
|
"""
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT id, language, headline, headline_de, content_original, content_de
|
||||||
|
FROM articles WHERE incident_id = ?""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
rows = await cursor.fetchall()
|
||||||
|
if not rows:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
for row in rows:
|
||||||
|
is_de = (row["language"] or "").lower() == "de"
|
||||||
|
updates = {}
|
||||||
|
|
||||||
|
# Felder die immer behandelt werden (LLM-Uebersetzungen)
|
||||||
|
if row["headline_de"]:
|
||||||
|
new, n = normalize_german_umlauts(row["headline_de"])
|
||||||
|
if n > 0:
|
||||||
|
updates["headline_de"] = new
|
||||||
|
total += n
|
||||||
|
if row["content_de"]:
|
||||||
|
new, n = normalize_german_umlauts(row["content_de"])
|
||||||
|
if n > 0:
|
||||||
|
updates["content_de"] = new
|
||||||
|
total += n
|
||||||
|
|
||||||
|
# Originalfelder nur bei deutschen Quellen
|
||||||
|
if is_de:
|
||||||
|
if row["headline"]:
|
||||||
|
new, n = normalize_german_umlauts(row["headline"])
|
||||||
|
if n > 0:
|
||||||
|
updates["headline"] = new
|
||||||
|
total += n
|
||||||
|
if row["content_original"]:
|
||||||
|
new, n = normalize_german_umlauts(row["content_original"])
|
||||||
|
if n > 0:
|
||||||
|
updates["content_original"] = new
|
||||||
|
total += n
|
||||||
|
|
||||||
|
if updates:
|
||||||
|
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
||||||
|
values = list(updates.values()) + [row["id"]]
|
||||||
|
await db.execute(f"UPDATE articles SET {set_clause} WHERE id = ?", values)
|
||||||
|
|
||||||
|
return total
|
||||||
|
|||||||
295
src/services/source_classifier.py
Normale Datei
295
src/services/source_classifier.py
Normale Datei
@@ -0,0 +1,295 @@
|
|||||||
|
"""Klassifiziert Quellen via Claude (Haiku) nach 4 Achsen + state_affiliated + country.
|
||||||
|
|
||||||
|
Schreibt Vorschlaege in die proposed_*-Spalten von sources und setzt
|
||||||
|
classification_source='llm_pending'. Approval erfolgt ueber separate Endpoints,
|
||||||
|
die proposed_* in die echten Spalten kopieren.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
import aiosqlite
|
||||||
|
|
||||||
|
from agents.claude_client import call_claude
|
||||||
|
from config import CLAUDE_MODEL_FAST
|
||||||
|
|
||||||
|
logger = logging.getLogger("osint.source_classifier")
|
||||||
|
|
||||||
|
POLITICAL_VALUES = {
|
||||||
|
"links_extrem", "links", "mitte_links", "liberal", "mitte",
|
||||||
|
"konservativ", "mitte_rechts", "rechts", "rechts_extrem", "na",
|
||||||
|
}
|
||||||
|
MEDIA_TYPE_VALUES = {
|
||||||
|
"tageszeitung", "wochenzeitung", "magazin", "tv_sender", "radio",
|
||||||
|
"oeffentlich_rechtlich", "nachrichtenagentur", "online_only", "blog",
|
||||||
|
"telegram_kanal", "telegram_bot", "podcast", "social_media", "imageboard",
|
||||||
|
"think_tank", "ngo", "behoerde", "staatsmedium", "fachmedium", "sonstige",
|
||||||
|
}
|
||||||
|
RELIABILITY_VALUES = {"sehr_hoch", "hoch", "gemischt", "niedrig", "sehr_niedrig", "na"}
|
||||||
|
ALIGNMENT_VALUES = {
|
||||||
|
"prorussisch", "proiranisch", "prowestlich", "proukrainisch",
|
||||||
|
"prochinesisch", "projapanisch", "proisraelisch", "propalaestinensisch",
|
||||||
|
"protuerkisch", "panarabisch", "neutral", "sonstige",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_prompt(src: dict, sample_articles: list[dict]) -> str:
|
||||||
|
sample_text = ""
|
||||||
|
if sample_articles:
|
||||||
|
lines = []
|
||||||
|
for i, art in enumerate(sample_articles[:5], 1):
|
||||||
|
headline = (art.get("headline") or art.get("headline_de") or "").strip()
|
||||||
|
if headline:
|
||||||
|
lines.append(f"{i}. {headline[:200]}")
|
||||||
|
if lines:
|
||||||
|
sample_text = "\nLetzte Artikel/Headlines:\n" + "\n".join(lines)
|
||||||
|
|
||||||
|
return f"""Du bist ein OSINT-Analyst und klassifizierst Nachrichten- und Medienquellen fuer ein Lagebild-Monitoring-System (DACH-Raum).
|
||||||
|
|
||||||
|
QUELLE:
|
||||||
|
Name: {src.get('name')}
|
||||||
|
URL: {src.get('url') or '-'}
|
||||||
|
Domain: {src.get('domain') or '-'}
|
||||||
|
Quellentyp: {src.get('source_type')}
|
||||||
|
Bisherige Kategorie: {src.get('category')}
|
||||||
|
Sprache: {src.get('language') or 'unbekannt'}
|
||||||
|
Bisherige Notiz (Freitext): {src.get('bias') or '-'}{sample_text}
|
||||||
|
|
||||||
|
AUFGABE: Klassifiziere die Quelle nach folgenden Achsen.
|
||||||
|
|
||||||
|
1. political_orientation:
|
||||||
|
- links_extrem (z.B. linksunten.indymedia)
|
||||||
|
- links (klar links, z.B. junge Welt, taz)
|
||||||
|
- mitte_links (linksliberal/sozialdemokratisch, z.B. SZ, Spiegel)
|
||||||
|
- liberal (wirtschafts-/grünliberal, z.B. NZZ, Zeit)
|
||||||
|
- mitte (politisch neutral, Agentur, z.B. dpa, Reuters, tagesschau)
|
||||||
|
- konservativ (buergerlich-konservativ, z.B. FAZ, Welt)
|
||||||
|
- mitte_rechts (rechts-buergerlich, z.B. Tichys Einblick, Achgut)
|
||||||
|
- rechts (klar rechts, z.B. Junge Freiheit, EpochTimes)
|
||||||
|
- rechts_extrem (z.B. Compact, PI-News)
|
||||||
|
- na (nicht klassifizierbar: Behoerde, Fachmedium, Think Tank ohne klare politische Linie)
|
||||||
|
|
||||||
|
2. media_type (genau einer):
|
||||||
|
tageszeitung, wochenzeitung, magazin, tv_sender, radio, oeffentlich_rechtlich,
|
||||||
|
nachrichtenagentur, online_only, blog, telegram_kanal, telegram_bot, podcast,
|
||||||
|
social_media, imageboard, think_tank, ngo, behoerde, staatsmedium, fachmedium, sonstige
|
||||||
|
|
||||||
|
3. reliability:
|
||||||
|
- sehr_hoch (etablierte Qualitaet, Faktencheck: tagesschau, dpa, FAZ, Reuters)
|
||||||
|
- hoch (serioes mit gelegentlichen Schwaechen: taz, Welt, BILD bei harten News)
|
||||||
|
- gemischt (Mix Meinung/Einseitigkeit: Tichys Einblick, Achgut, Boulevard)
|
||||||
|
- niedrig (haeufig irrefuehrend, schwache Quellenarbeit: Junge Freiheit, EpochTimes)
|
||||||
|
- sehr_niedrig (bekannt fuer Desinformation/Verschwoerung: Compact, RT, Sputnik, PI-News)
|
||||||
|
- na (nicht bewertbar)
|
||||||
|
|
||||||
|
4. alignments (Mehrfach, leeres Array wenn keine ausgepraegte Naehe):
|
||||||
|
prorussisch, proiranisch, prowestlich, proukrainisch, prochinesisch, projapanisch,
|
||||||
|
proisraelisch, propalaestinensisch, protuerkisch, panarabisch, neutral, sonstige
|
||||||
|
|
||||||
|
5. state_affiliated (true/false): true wenn vom Staat finanziert/kontrolliert
|
||||||
|
(RT, Sputnik, CGTN, PressTV, Xinhua, TRT). Public Service Broadcaster
|
||||||
|
wie ARD/ZDF/BBC sind NICHT state_affiliated.
|
||||||
|
|
||||||
|
6. country_code (ISO 3166-1 alpha-2): Heimatland (DE, AT, CH, RU, US, ...). null wenn unklar.
|
||||||
|
|
||||||
|
7. confidence (0.0-1.0): 0.85+ fuer bekannte Outlets, 0.5-0.85 fuer mittelbekannt, <0.5 fuer unsicher.
|
||||||
|
|
||||||
|
8. reasoning (1-2 Saetze): Kurze Begruendung der Hauptklassifikationen.
|
||||||
|
|
||||||
|
WICHTIG:
|
||||||
|
- Antworte AUSSCHLIESSLICH mit einem JSON-Objekt, kein Text drumherum.
|
||||||
|
- Nutze ausschliesslich die genannten enum-Werte (snake_case).
|
||||||
|
- Bei Unklarheit lieber `na` und niedrige confidence.
|
||||||
|
|
||||||
|
JSON-Schema:
|
||||||
|
{{
|
||||||
|
"political_orientation": "...",
|
||||||
|
"media_type": "...",
|
||||||
|
"reliability": "...",
|
||||||
|
"alignments": ["..."],
|
||||||
|
"state_affiliated": false,
|
||||||
|
"country_code": "DE",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"reasoning": "..."
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_sample_articles(db: aiosqlite.Connection, name: str, domain: str | None, limit: int = 5) -> list[dict]:
|
||||||
|
"""Laedt die letzten Headlines einer Quelle (per name oder Domain-Match)."""
|
||||||
|
rows: list = []
|
||||||
|
if name:
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT headline, headline_de FROM articles WHERE source = ? ORDER BY collected_at DESC LIMIT ?",
|
||||||
|
(name, limit),
|
||||||
|
)
|
||||||
|
rows = await cursor.fetchall()
|
||||||
|
if not rows and domain:
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT headline, headline_de FROM articles WHERE source_url LIKE ? ORDER BY collected_at DESC LIMIT ?",
|
||||||
|
(f"%{domain}%", limit),
|
||||||
|
)
|
||||||
|
rows = await cursor.fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _validate(parsed: dict) -> dict:
|
||||||
|
"""Validiert + normalisiert eine LLM-Antwort gegen die Enums."""
|
||||||
|
pol = parsed.get("political_orientation", "na")
|
||||||
|
if pol not in POLITICAL_VALUES:
|
||||||
|
pol = "na"
|
||||||
|
mt = parsed.get("media_type", "sonstige")
|
||||||
|
if mt not in MEDIA_TYPE_VALUES:
|
||||||
|
mt = "sonstige"
|
||||||
|
rel = parsed.get("reliability", "na")
|
||||||
|
if rel not in RELIABILITY_VALUES:
|
||||||
|
rel = "na"
|
||||||
|
aligns_raw = parsed.get("alignments") or []
|
||||||
|
if not isinstance(aligns_raw, list):
|
||||||
|
aligns_raw = []
|
||||||
|
aligns = sorted({a for a in aligns_raw if isinstance(a, str) and a in ALIGNMENT_VALUES})
|
||||||
|
sa = bool(parsed.get("state_affiliated", False))
|
||||||
|
cc = parsed.get("country_code")
|
||||||
|
if isinstance(cc, str) and len(cc) == 2 and cc.isalpha():
|
||||||
|
cc = cc.upper()
|
||||||
|
else:
|
||||||
|
cc = None
|
||||||
|
try:
|
||||||
|
confidence = float(parsed.get("confidence", 0.5))
|
||||||
|
confidence = max(0.0, min(1.0, confidence))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
confidence = 0.5
|
||||||
|
reasoning = str(parsed.get("reasoning", ""))[:1000]
|
||||||
|
return {
|
||||||
|
"political_orientation": pol,
|
||||||
|
"media_type": mt,
|
||||||
|
"reliability": rel,
|
||||||
|
"alignments": aligns,
|
||||||
|
"state_affiliated": sa,
|
||||||
|
"country_code": cc,
|
||||||
|
"confidence": confidence,
|
||||||
|
"reasoning": reasoning,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def classify_source(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
source_id: int,
|
||||||
|
sample_limit: int = 5,
|
||||||
|
model: str = CLAUDE_MODEL_FAST,
|
||||||
|
) -> dict:
|
||||||
|
"""Klassifiziert eine einzelne Quelle und schreibt die Vorschlaege in proposed_*-Spalten."""
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, name, url, domain, source_type, category, language, bias, "
|
||||||
|
"classification_source FROM sources WHERE id = ?",
|
||||||
|
(source_id,),
|
||||||
|
)
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
raise ValueError(f"Quelle {source_id} nicht gefunden")
|
||||||
|
src = dict(row)
|
||||||
|
|
||||||
|
sample = await _load_sample_articles(db, src["name"], src.get("domain"), sample_limit)
|
||||||
|
prompt = _build_prompt(src, sample)
|
||||||
|
response, usage = await call_claude(prompt, tools=None, model=model)
|
||||||
|
|
||||||
|
json_match = re.search(r"\{.*\}", response, re.DOTALL)
|
||||||
|
if not json_match:
|
||||||
|
raise ValueError(f"Keine JSON-Antwort von Claude fuer source_id={source_id}: {response[:200]}")
|
||||||
|
parsed = json.loads(json_match.group(0))
|
||||||
|
result = _validate(parsed)
|
||||||
|
|
||||||
|
# Nur classification_source auf 'llm_pending' setzen, wenn nicht bereits manuell/approved
|
||||||
|
new_src = "CASE WHEN classification_source IN ('manual','llm_approved') THEN classification_source ELSE 'llm_pending' END"
|
||||||
|
await db.execute(
|
||||||
|
f"""UPDATE sources SET
|
||||||
|
proposed_political_orientation = ?,
|
||||||
|
proposed_media_type = ?,
|
||||||
|
proposed_reliability = ?,
|
||||||
|
proposed_state_affiliated = ?,
|
||||||
|
proposed_country_code = ?,
|
||||||
|
proposed_alignments_json = ?,
|
||||||
|
proposed_confidence = ?,
|
||||||
|
proposed_reasoning = ?,
|
||||||
|
proposed_at = CURRENT_TIMESTAMP,
|
||||||
|
classification_source = {new_src}
|
||||||
|
WHERE id = ?""",
|
||||||
|
(
|
||||||
|
result["political_orientation"],
|
||||||
|
result["media_type"],
|
||||||
|
result["reliability"],
|
||||||
|
1 if result["state_affiliated"] else 0,
|
||||||
|
result["country_code"],
|
||||||
|
json.dumps(result["alignments"], ensure_ascii=False),
|
||||||
|
result["confidence"],
|
||||||
|
result["reasoning"],
|
||||||
|
source_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Klassifiziert source_id=%s '%s' -> %s/%s/%s conf=%.2f ($%.4f)",
|
||||||
|
source_id, src["name"], result["political_orientation"],
|
||||||
|
result["media_type"], result["reliability"], result["confidence"],
|
||||||
|
usage.cost_usd,
|
||||||
|
)
|
||||||
|
|
||||||
|
result["source_id"] = source_id
|
||||||
|
result["usage"] = {
|
||||||
|
"cost_usd": usage.cost_usd,
|
||||||
|
"input_tokens": usage.input_tokens,
|
||||||
|
"output_tokens": usage.output_tokens,
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def bulk_classify(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
limit: int = 50,
|
||||||
|
only_unclassified: bool = True,
|
||||||
|
model: str = CLAUDE_MODEL_FAST,
|
||||||
|
) -> dict:
|
||||||
|
"""Klassifiziert noch unklassifizierte Quellen (sequenziell).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: Maximale Anzahl Quellen pro Aufruf
|
||||||
|
only_unclassified: Wenn True, nur classification_source='legacy'.
|
||||||
|
Wenn False, auch 'llm_pending' neu klassifizieren.
|
||||||
|
"""
|
||||||
|
if only_unclassified:
|
||||||
|
where = "classification_source = 'legacy'"
|
||||||
|
else:
|
||||||
|
where = "classification_source IN ('legacy', 'llm_pending')"
|
||||||
|
cursor = await db.execute(
|
||||||
|
f"SELECT id FROM sources WHERE {where} AND status = 'active' "
|
||||||
|
f"AND source_type != 'excluded' ORDER BY id LIMIT ?",
|
||||||
|
(limit,),
|
||||||
|
)
|
||||||
|
ids = [row["id"] for row in await cursor.fetchall()]
|
||||||
|
|
||||||
|
total_cost = 0.0
|
||||||
|
success = 0
|
||||||
|
errors: list[dict] = []
|
||||||
|
|
||||||
|
for sid in ids:
|
||||||
|
try:
|
||||||
|
r = await classify_source(db, sid, model=model)
|
||||||
|
total_cost += r["usage"]["cost_usd"]
|
||||||
|
success += 1
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Klassifikation source_id=%s fehlgeschlagen: %s", sid, e, exc_info=True)
|
||||||
|
errors.append({"source_id": sid, "error": str(e)})
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Bulk-Klassifikation fertig: %d/%d erfolgreich, $%.4f Kosten, %d Fehler",
|
||||||
|
success, len(ids), total_cost, len(errors),
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"processed": len(ids),
|
||||||
|
"success": success,
|
||||||
|
"errors": errors,
|
||||||
|
"total_cost_usd": total_cost,
|
||||||
|
}
|
||||||
@@ -1,41 +1,69 @@
|
|||||||
"""Quellen-Health-Check Engine - prüft Erreichbarkeit, Feed-Validität, Duplikate."""
|
"""Quellen-Health-Check Engine - prüft Erreichbarkeit, Feed-Validität, Duplikate."""
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
|
import uuid
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import feedparser
|
import feedparser
|
||||||
import aiosqlite
|
import aiosqlite
|
||||||
|
|
||||||
|
try:
|
||||||
|
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S
|
||||||
|
except ImportError:
|
||||||
|
HEALTH_CHECK_USER_AGENT = "Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)"
|
||||||
|
HEALTH_CHECK_TIMEOUT_S = 15.0
|
||||||
|
|
||||||
|
# Phase 18: alternative User-Agents fuer Bot-Block-Bypass
|
||||||
|
USER_AGENT_GOOGLEBOT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||||
|
USER_AGENT_BROWSER = (
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
REMOVEPAYWALLS_PREFIX = "https://www.removepaywall.com/search?url="
|
||||||
|
|
||||||
|
# HTTP-Codes, die einen Retry mit anderem UA rechtfertigen
|
||||||
|
RETRY_ON_STATUS = {403, 406, 429}
|
||||||
|
|
||||||
logger = logging.getLogger("osint.source_health")
|
logger = logging.getLogger("osint.source_health")
|
||||||
|
|
||||||
|
|
||||||
async def run_health_checks(db: aiosqlite.Connection) -> dict:
|
async def run_health_checks(db: aiosqlite.Connection) -> dict:
|
||||||
"""Führt alle Health-Checks für aktive Grundquellen durch."""
|
"""Führt Health-Checks für alle aktiven Quellen durch (global + Tenant)."""
|
||||||
logger.info("Starte Quellen-Health-Check...")
|
logger.info("Starte Quellen-Health-Check...")
|
||||||
|
|
||||||
# Alle aktiven Grundquellen laden
|
# Alle aktiven Quellen laden (global UND Tenant-spezifisch)
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT id, name, url, domain, source_type, article_count, last_seen_at "
|
"SELECT id, name, url, domain, source_type, article_count, last_seen_at, "
|
||||||
"FROM sources WHERE status = 'active' AND tenant_id IS NULL"
|
"COALESCE(fetch_strategy, 'default') AS fetch_strategy "
|
||||||
|
"FROM sources WHERE status = 'active' "
|
||||||
)
|
)
|
||||||
sources = [dict(row) for row in await cursor.fetchall()]
|
sources = [dict(row) for row in await cursor.fetchall()]
|
||||||
|
|
||||||
# Aktuelle Health-Check-Ergebnisse löschen (werden neu geschrieben)
|
# Bisherigen Stand in History archivieren, dann frisch starten
|
||||||
|
run_id = uuid.uuid4().hex[:12]
|
||||||
|
await db.execute(
|
||||||
|
"INSERT INTO source_health_history "
|
||||||
|
"(run_id, source_id, check_type, status, message, details, checked_at) "
|
||||||
|
"SELECT ?, source_id, check_type, status, message, details, checked_at "
|
||||||
|
"FROM source_health_checks",
|
||||||
|
(run_id,),
|
||||||
|
)
|
||||||
await db.execute("DELETE FROM source_health_checks")
|
await db.execute("DELETE FROM source_health_checks")
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
logger.info(f"Health-Check Run {run_id}: vorigen Stand archiviert")
|
||||||
|
|
||||||
checks_done = 0
|
checks_done = 0
|
||||||
issues_found = 0
|
issues_found = 0
|
||||||
|
|
||||||
# 1. Erreichbarkeit + Feed-Validität (nur Quellen mit URL)
|
# 1. Erreichbarkeit + Feed-Validität (nur Quellen mit URL)
|
||||||
sources_with_url = [s for s in sources if s["url"]]
|
sources_with_url = [s for s in sources if s["url"]]
|
||||||
|
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(
|
||||||
timeout=15.0,
|
timeout=HEALTH_CHECK_TIMEOUT_S,
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT-Monitor/1.0)"},
|
headers={"User-Agent": HEALTH_CHECK_USER_AGENT},
|
||||||
) as client:
|
) as client:
|
||||||
for i in range(0, len(sources_with_url), 5):
|
for i in range(0, len(sources_with_url), 5):
|
||||||
batch = sources_with_url[i:i + 5]
|
batch = sources_with_url[i:i + 5]
|
||||||
@@ -46,7 +74,7 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict:
|
|||||||
if isinstance(result, Exception):
|
if isinstance(result, Exception):
|
||||||
await _save_check(
|
await _save_check(
|
||||||
db, source["id"], "reachability", "error",
|
db, source["id"], "reachability", "error",
|
||||||
f"Prüfung fehlgeschlagen: {result}",
|
f"Prüfung fehlgeschlagen: {result}",
|
||||||
)
|
)
|
||||||
issues_found += 1
|
issues_found += 1
|
||||||
else:
|
else:
|
||||||
@@ -83,7 +111,7 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict:
|
|||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Health-Check abgeschlossen: {checks_done} Quellen geprüft, "
|
f"Health-Check abgeschlossen: {checks_done} Quellen geprüft, "
|
||||||
f"{issues_found} Probleme gefunden"
|
f"{issues_found} Probleme gefunden"
|
||||||
)
|
)
|
||||||
return {"checked": checks_done, "issues": issues_found}
|
return {"checked": checks_done, "issues": issues_found}
|
||||||
@@ -92,12 +120,63 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict:
|
|||||||
async def _check_source_reachability(
|
async def _check_source_reachability(
|
||||||
client: httpx.AsyncClient, source: dict,
|
client: httpx.AsyncClient, source: dict,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Prüft Erreichbarkeit und Feed-Validität einer Quelle."""
|
"""Prüft Erreichbarkeit und Feed-Validität einer Quelle.
|
||||||
|
|
||||||
|
Phase 18: pro Quelle eine fetch_strategy ('default' | 'googlebot' | 'paywall' | 'skip').
|
||||||
|
Bei 'default' wird im Fehlerfall (403/406/429) ein Retry mit Googlebot-UA gemacht.
|
||||||
|
Bei 'paywall' wird auf removepaywall.com umgeleitet.
|
||||||
|
Bei 'skip' wird kein Check ausgeführt.
|
||||||
|
"""
|
||||||
checks = []
|
checks = []
|
||||||
url = source["url"]
|
url = source["url"]
|
||||||
|
strategy = source.get("fetch_strategy") or "default"
|
||||||
|
|
||||||
|
# 'skip' -> kein Check (bekannte unerreichbare Quellen, z.B. Login-only)
|
||||||
|
if strategy == "skip":
|
||||||
|
checks.append({
|
||||||
|
"type": "reachability", "status": "ok",
|
||||||
|
"message": "Health-Check uebersprungen (fetch_strategy=skip)",
|
||||||
|
})
|
||||||
|
return checks
|
||||||
|
|
||||||
|
# URL-Schema sicherstellen
|
||||||
|
if url and not url.startswith(("http://", "https://")):
|
||||||
|
url = "https://" + url.lstrip("/")
|
||||||
|
|
||||||
|
# Initialen UA waehlen
|
||||||
|
initial_ua = HEALTH_CHECK_USER_AGENT
|
||||||
|
initial_url = url
|
||||||
|
if strategy == "googlebot":
|
||||||
|
initial_ua = USER_AGENT_GOOGLEBOT
|
||||||
|
elif strategy == "paywall":
|
||||||
|
# Paywall-Quellen: Feed-URL direkt laden, aber mit Browser-UA (versucht Bot-Detection zu umgehen).
|
||||||
|
# removepaywall.com ist fuer Article-URLs, NICHT fuer RSS-Feed-Validity-Checks
|
||||||
|
# (gibt HTML statt XML zurueck). Researcher-Pipeline nutzt removepaywall fuer Inhalte.
|
||||||
|
initial_ua = USER_AGENT_BROWSER
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = await client.get(url)
|
resp = await client.get(initial_url, headers={"User-Agent": initial_ua})
|
||||||
|
|
||||||
|
# Paywall-Quellen: 4xx ist erwartbar (Bot-Detection), als warning markieren statt error
|
||||||
|
if strategy == "paywall" and resp.status_code in RETRY_ON_STATUS:
|
||||||
|
checks.append({
|
||||||
|
"type": "reachability", "status": "warning",
|
||||||
|
"message": f"Paywall-Quelle, Direkt-Zugang HTTP {resp.status_code} (Researcher-Pipeline nutzt removepaywall.com fuer Inhalte)",
|
||||||
|
})
|
||||||
|
return checks # Feed-Validity-Check skippen (Paywall liefert kein RSS)
|
||||||
|
|
||||||
|
# Bot-Block-Retry nur bei strategy='default'
|
||||||
|
if (
|
||||||
|
strategy == "default"
|
||||||
|
and resp.status_code in RETRY_ON_STATUS
|
||||||
|
):
|
||||||
|
retry = await client.get(url, headers={"User-Agent": USER_AGENT_GOOGLEBOT})
|
||||||
|
if retry.status_code < 400:
|
||||||
|
resp = retry # Retry hat geholfen
|
||||||
|
checks.append({
|
||||||
|
"type": "reachability", "status": "warning",
|
||||||
|
"message": f"Erreichbar nur mit Googlebot-UA (Standard-UA bekam HTTP {initial_url and 'unknown' or 'XXX'})",
|
||||||
|
})
|
||||||
|
|
||||||
if resp.status_code >= 400:
|
if resp.status_code >= 400:
|
||||||
checks.append({
|
checks.append({
|
||||||
@@ -125,14 +204,14 @@ async def _check_source_reachability(
|
|||||||
"message": "Erreichbar",
|
"message": "Erreichbar",
|
||||||
})
|
})
|
||||||
|
|
||||||
# Feed-Validität nur für RSS-Feeds
|
# Feed-Validität nur für RSS-Feeds
|
||||||
if source["source_type"] == "rss_feed":
|
if source["source_type"] == "rss_feed":
|
||||||
text = resp.text[:20000]
|
text = resp.text[:20000]
|
||||||
if "<rss" not in text and "<feed" not in text and "<channel" not in text:
|
if "<rss" not in text and "<feed" not in text and "<channel" not in text:
|
||||||
checks.append({
|
checks.append({
|
||||||
"type": "feed_validity",
|
"type": "feed_validity",
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Kein gültiger RSS/Atom-Feed",
|
"message": "Kein gültiger RSS/Atom-Feed",
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
feed = await asyncio.to_thread(feedparser.parse, text)
|
feed = await asyncio.to_thread(feedparser.parse, text)
|
||||||
@@ -155,7 +234,7 @@ async def _check_source_reachability(
|
|||||||
checks.append({
|
checks.append({
|
||||||
"type": "feed_validity",
|
"type": "feed_validity",
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"message": f"Feed gültig ({len(feed.entries)} Einträge)",
|
"message": f"Feed gültig ({len(feed.entries)} Einträge)",
|
||||||
})
|
})
|
||||||
|
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
@@ -181,7 +260,7 @@ async def _check_source_reachability(
|
|||||||
|
|
||||||
|
|
||||||
def _check_stale(source: dict) -> dict | None:
|
def _check_stale(source: dict) -> dict | None:
|
||||||
"""Prüft ob eine Quelle veraltet ist (keine Artikel seit >30 Tagen)."""
|
"""Prüft ob eine Quelle veraltet ist (keine Artikel seit >30 Tagen)."""
|
||||||
if source["source_type"] == "excluded":
|
if source["source_type"] == "excluded":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -249,7 +328,7 @@ async def _save_check(
|
|||||||
|
|
||||||
|
|
||||||
async def get_health_summary(db: aiosqlite.Connection) -> dict:
|
async def get_health_summary(db: aiosqlite.Connection) -> dict:
|
||||||
"""Gibt eine Zusammenfassung der letzten Health-Check-Ergebnisse zurück."""
|
"""Gibt eine Zusammenfassung der letzten Health-Check-Ergebnisse zurück."""
|
||||||
cursor = await db.execute("""
|
cursor = await db.execute("""
|
||||||
SELECT
|
SELECT
|
||||||
h.id, h.source_id, s.name, s.domain, s.url, s.source_type,
|
h.id, h.source_id, s.name, s.domain, s.url, s.source_type,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""KI-gestützte Quellen-Vorschläge via Haiku."""
|
"""KI-gestützte Quellen-Vorschläge via Haiku."""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
@@ -12,8 +12,8 @@ logger = logging.getLogger("osint.source_suggester")
|
|||||||
|
|
||||||
|
|
||||||
async def generate_suggestions(db: aiosqlite.Connection) -> int:
|
async def generate_suggestions(db: aiosqlite.Connection) -> int:
|
||||||
"""Generiert Quellen-Vorschläge basierend auf Health-Checks und Lückenanalyse."""
|
"""Generiert Quellen-Vorschläge basierend auf Health-Checks und Lückenanalyse."""
|
||||||
logger.info("Starte Quellen-Vorschläge via Haiku...")
|
logger.info("Starte Quellen-Vorschläge via Haiku...")
|
||||||
|
|
||||||
# 1. Aktuelle Quellen laden
|
# 1. Aktuelle Quellen laden
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
@@ -33,13 +33,13 @@ async def generate_suggestions(db: aiosqlite.Connection) -> int:
|
|||||||
""")
|
""")
|
||||||
issues = [dict(row) for row in await cursor.fetchall()]
|
issues = [dict(row) for row in await cursor.fetchall()]
|
||||||
|
|
||||||
# 3. Alte pending-Vorschläge entfernen (älter als 30 Tage)
|
# 3. Alte pending-Vorschläge entfernen (älter als 30 Tage)
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"DELETE FROM source_suggestions "
|
"DELETE FROM source_suggestions "
|
||||||
"WHERE status = 'pending' AND created_at < datetime('now', '-30 days')"
|
"WHERE status = 'pending' AND created_at < datetime('now', '-30 days')"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. Quellen-Zusammenfassung für Haiku
|
# 4. Quellen-Zusammenfassung für Haiku
|
||||||
categories = {}
|
categories = {}
|
||||||
for s in sources:
|
for s in sources:
|
||||||
cat = s["category"]
|
cat = s["category"]
|
||||||
@@ -67,7 +67,7 @@ async def generate_suggestions(db: aiosqlite.Connection) -> int:
|
|||||||
f"{issue['check_type']} = {issue['status']} - {issue['message']}\n"
|
f"{issue['check_type']} = {issue['status']} - {issue['message']}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt = f"""Du bist ein OSINT-Analyst und verwaltest die Quellensammlung eines Lagebildmonitors für Sicherheitsbehörden.
|
prompt = f"""Du bist ein OSINT-Analyst und verwaltest die Quellensammlung eines Lagebildmonitors für Sicherheitsbehörden.
|
||||||
|
|
||||||
Aktuelle Quellensammlung:{source_summary}{issues_summary}
|
Aktuelle Quellensammlung:{source_summary}{issues_summary}
|
||||||
|
|
||||||
@@ -78,13 +78,13 @@ Beachte:
|
|||||||
2. Fehlende wichtige OSINT-Quellen: Schlage "add_source" mit konkreter RSS-Feed-URL vor
|
2. Fehlende wichtige OSINT-Quellen: Schlage "add_source" mit konkreter RSS-Feed-URL vor
|
||||||
3. Fokus auf deutschsprachige + wichtige internationale Nachrichtenquellen
|
3. Fokus auf deutschsprachige + wichtige internationale Nachrichtenquellen
|
||||||
4. Nur Quellen vorschlagen, die NICHT bereits vorhanden sind
|
4. Nur Quellen vorschlagen, die NICHT bereits vorhanden sind
|
||||||
5. Maximal 5 Vorschläge
|
5. Maximal 5 Vorschläge
|
||||||
|
|
||||||
Antworte NUR mit einem JSON-Array. Jedes Element:
|
Antworte NUR mit einem JSON-Array. Jedes Element:
|
||||||
{{
|
{{
|
||||||
"type": "add_source|deactivate_source|fix_url|remove_source",
|
"type": "add_source|deactivate_source|fix_url|remove_source",
|
||||||
"title": "Kurzer Titel",
|
"title": "Kurzer Titel",
|
||||||
"description": "Begründung",
|
"description": "Begründung",
|
||||||
"priority": "low|medium|high",
|
"priority": "low|medium|high",
|
||||||
"source_id": null,
|
"source_id": null,
|
||||||
"data": {{
|
"data": {{
|
||||||
@@ -104,7 +104,7 @@ Nur das JSON-Array, kein anderer Text."""
|
|||||||
|
|
||||||
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
||||||
if not json_match:
|
if not json_match:
|
||||||
logger.warning("Keine Vorschläge von Haiku erhalten (kein JSON)")
|
logger.warning("Keine Vorschläge von Haiku erhalten (kein JSON)")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
suggestions = json.loads(json_match.group(0))
|
suggestions = json.loads(json_match.group(0))
|
||||||
@@ -164,14 +164,14 @@ Nur das JSON-Array, kein anderer Text."""
|
|||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Quellen-Vorschläge: {count} neue Vorschläge generiert "
|
f"Quellen-Vorschläge: {count} neue Vorschläge generiert "
|
||||||
f"(Haiku: {usage.input_tokens} in / {usage.output_tokens} out / "
|
f"(Haiku: {usage.input_tokens} in / {usage.output_tokens} out / "
|
||||||
f"${usage.cost_usd:.4f})"
|
f"${usage.cost_usd:.4f})"
|
||||||
)
|
)
|
||||||
return count
|
return count
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Fehler bei Quellen-Vorschlägen: {e}", exc_info=True)
|
logger.error(f"Fehler bei Quellen-Vorschlägen: {e}", exc_info=True)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
@@ -218,7 +218,7 @@ async def apply_suggestion(
|
|||||||
(url,),
|
(url,),
|
||||||
)
|
)
|
||||||
if await cursor.fetchone():
|
if await cursor.fetchone():
|
||||||
result["action"] = "übersprungen (URL bereits vorhanden)"
|
result["action"] = "übersprungen (URL bereits vorhanden)"
|
||||||
new_status = "rejected"
|
new_status = "rejected"
|
||||||
else:
|
else:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
@@ -230,7 +230,7 @@ async def apply_suggestion(
|
|||||||
)
|
)
|
||||||
result["action"] = f"Quelle '{name}' angelegt"
|
result["action"] = f"Quelle '{name}' angelegt"
|
||||||
else:
|
else:
|
||||||
result["action"] = "übersprungen (keine URL)"
|
result["action"] = "übersprungen (keine URL)"
|
||||||
new_status = "rejected"
|
new_status = "rejected"
|
||||||
|
|
||||||
elif stype == "deactivate_source":
|
elif stype == "deactivate_source":
|
||||||
@@ -242,7 +242,7 @@ async def apply_suggestion(
|
|||||||
)
|
)
|
||||||
result["action"] = "Quelle deaktiviert"
|
result["action"] = "Quelle deaktiviert"
|
||||||
else:
|
else:
|
||||||
result["action"] = "übersprungen (keine source_id)"
|
result["action"] = "übersprungen (keine source_id)"
|
||||||
|
|
||||||
elif stype == "remove_source":
|
elif stype == "remove_source":
|
||||||
source_id = suggestion["source_id"]
|
source_id = suggestion["source_id"]
|
||||||
@@ -250,9 +250,9 @@ async def apply_suggestion(
|
|||||||
await db.execute(
|
await db.execute(
|
||||||
"DELETE FROM sources WHERE id = ?", (source_id,),
|
"DELETE FROM sources WHERE id = ?", (source_id,),
|
||||||
)
|
)
|
||||||
result["action"] = "Quelle gelöscht"
|
result["action"] = "Quelle gelöscht"
|
||||||
else:
|
else:
|
||||||
result["action"] = "übersprungen (keine source_id)"
|
result["action"] = "übersprungen (keine source_id)"
|
||||||
|
|
||||||
elif stype == "fix_url":
|
elif stype == "fix_url":
|
||||||
source_id = suggestion["source_id"]
|
source_id = suggestion["source_id"]
|
||||||
@@ -264,7 +264,7 @@ async def apply_suggestion(
|
|||||||
)
|
)
|
||||||
result["action"] = f"URL aktualisiert auf {new_url}"
|
result["action"] = f"URL aktualisiert auf {new_url}"
|
||||||
else:
|
else:
|
||||||
result["action"] = "übersprungen (keine source_id oder URL)"
|
result["action"] = "übersprungen (keine source_id oder URL)"
|
||||||
|
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"UPDATE source_suggestions SET status = ?, reviewed_at = CURRENT_TIMESTAMP "
|
"UPDATE source_suggestions SET status = ?, reviewed_at = CURRENT_TIMESTAMP "
|
||||||
|
|||||||
@@ -649,14 +649,14 @@ async def get_feeds_with_metadata(tenant_id: int = None, source_type: str = "rss
|
|||||||
try:
|
try:
|
||||||
if tenant_id:
|
if tenant_id:
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT name, url, domain, category, COALESCE(article_count, 0) AS article_count FROM sources "
|
"SELECT name, url, domain, category, notes, COALESCE(article_count, 0) AS article_count FROM sources "
|
||||||
"WHERE source_type = ? AND status = 'active' "
|
"WHERE source_type = ? AND status = 'active' "
|
||||||
"AND (tenant_id IS NULL OR tenant_id = ?)",
|
"AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||||
(source_type, tenant_id),
|
(source_type, tenant_id),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT name, url, domain, category, COALESCE(article_count, 0) AS article_count FROM sources "
|
"SELECT name, url, domain, category, notes, COALESCE(article_count, 0) AS article_count FROM sources "
|
||||||
"WHERE source_type = ? AND status = 'active'",
|
"WHERE source_type = ? AND status = 'active'",
|
||||||
(source_type,),
|
(source_type,),
|
||||||
)
|
)
|
||||||
|
|||||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -13,7 +13,7 @@
|
|||||||
<link rel="stylesheet" href="/static/vendor/leaflet.css">
|
<link rel="stylesheet" href="/static/vendor/leaflet.css">
|
||||||
<link rel="stylesheet" href="/static/vendor/MarkerCluster.css">
|
<link rel="stylesheet" href="/static/vendor/MarkerCluster.css">
|
||||||
<link rel="stylesheet" href="/static/vendor/MarkerCluster.Default.css">
|
<link rel="stylesheet" href="/static/vendor/MarkerCluster.Default.css">
|
||||||
<link rel="stylesheet" href="/static/css/style.css?v=20260316k">
|
<link rel="stylesheet" href="/static/css/style.css?v=20260501h">
|
||||||
<style>
|
<style>
|
||||||
/* Export Modal Radio */
|
/* Export Modal Radio */
|
||||||
.export-radio { display:flex; align-items:center; gap:10px; padding:8px 12px; cursor:pointer; border-radius:var(--radius-sm); transition:background 0.15s; border:1px solid transparent; margin-bottom:4px; }
|
.export-radio { display:flex; align-items:center; gap:10px; padding:8px 12px; cursor:pointer; border-radius:var(--radius-sm); transition:background 0.15s; border:1px solid transparent; margin-bottom:4px; }
|
||||||
@@ -72,6 +72,11 @@
|
|||||||
<span class="credits-percent" id="credits-percent"></span>
|
<span class="credits-percent" id="credits-percent"></span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="credits-divider"></div>
|
||||||
|
<button class="header-dropdown-action" type="button" onclick="AIDisclaimer && AIDisclaimer.show()">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg>
|
||||||
|
<span>Über KI-Inhalte</span>
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="header-license-warning" id="header-license-warning"></div>
|
<div class="header-license-warning" id="header-license-warning"></div>
|
||||||
@@ -118,9 +123,17 @@
|
|||||||
<div id="archived-incidents" aria-live="polite" style="display:none;"></div>
|
<div id="archived-incidents" aria-live="polite" style="display:none;"></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="sidebar-sources-link">
|
<div class="sidebar-sources-link">
|
||||||
<button class="btn btn-secondary btn-full btn-small" onclick="App.openSourceManagement()">Quellen verwalten</button>
|
<button class="btn btn-secondary btn-full btn-small" onclick="App.openSourceManagement()" title="Quellen verwalten">
|
||||||
<button class="btn btn-secondary btn-full btn-small sidebar-feedback-btn" onclick="App.openFeedback()">Feedback senden</button>
|
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><ellipse cx="12" cy="5" rx="9" ry="3"/><path d="M3 5v14c0 1.66 4.03 3 9 3s9-1.34 9-3V5"/><path d="M3 12c0 1.66 4.03 3 9 3s9-1.34 9-3"/></svg>
|
||||||
|
<span>Quellen</span>
|
||||||
|
</button>
|
||||||
|
<button class="btn btn-secondary btn-full btn-small sidebar-feedback-btn" onclick="App.openFeedback()" title="Feedback senden">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><rect width="20" height="16" x="2" y="4" rx="2"/><path d="m22 7-10 5L2 7"/></svg>
|
||||||
|
<span>Feedback</span>
|
||||||
|
</button>
|
||||||
|
<!-- Tutorial-Einstieg temporaer deaktiviert (Ueberarbeitung) - reaktivieren durch Entfernen der Kommentarzeichen:
|
||||||
<button class="btn btn-secondary btn-full btn-small" onclick="Tutorial.start()" title="Interaktiven Rundgang starten">Rundgang starten</button>
|
<button class="btn btn-secondary btn-full btn-small" onclick="Tutorial.start()" title="Interaktiven Rundgang starten">Rundgang starten</button>
|
||||||
|
-->
|
||||||
<div class="sidebar-stats-mini">
|
<div class="sidebar-stats-mini">
|
||||||
<span id="stat-sources-count">0 Quellen</span> · <span id="stat-articles-count">0 Artikel</span>
|
<span id="stat-sources-count">0 Quellen</span> · <span id="stat-articles-count">0 Artikel</span>
|
||||||
</div>
|
</div>
|
||||||
@@ -196,6 +209,7 @@
|
|||||||
<button class="tab-btn" data-tab="timeline">Ereignis-Timeline</button>
|
<button class="tab-btn" data-tab="timeline">Ereignis-Timeline</button>
|
||||||
<button class="tab-btn" data-tab="karte">Geografische Verteilung</button>
|
<button class="tab-btn" data-tab="karte">Geografische Verteilung</button>
|
||||||
<button class="tab-btn" data-tab="faktencheck">Faktencheck</button>
|
<button class="tab-btn" data-tab="faktencheck">Faktencheck</button>
|
||||||
|
<button class="tab-btn" data-tab="pipeline">Analysepipeline</button>
|
||||||
<button class="tab-btn" data-tab="quellen">Quellenübersicht</button>
|
<button class="tab-btn" data-tab="quellen">Quellenübersicht</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -279,6 +293,23 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="tab-panel" id="panel-pipeline">
|
||||||
|
<div class="card pipeline-card" id="pipeline-card">
|
||||||
|
<div class="card-header">
|
||||||
|
<div class="card-title">Analysepipeline</div>
|
||||||
|
<span class="pipeline-header-meta" id="pipeline-header-meta"></span>
|
||||||
|
</div>
|
||||||
|
<div class="pipeline-body">
|
||||||
|
<div class="pipeline-stage" id="pipeline-stage" aria-label="Analysepipeline-Visualisierung">
|
||||||
|
<div class="pipeline-empty" id="pipeline-empty">Noch nie aktualisiert. Starte den ersten Refresh.</div>
|
||||||
|
</div>
|
||||||
|
<aside class="pipeline-sidenote" id="pipeline-sidenote" hidden>
|
||||||
|
Recherche-Lagen werden mehrfach evaluiert, um das Bild Schritt für Schritt aufzubauen.
|
||||||
|
</aside>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="tab-panel" id="panel-quellen">
|
<div class="tab-panel" id="panel-quellen">
|
||||||
<div class="card source-overview-card">
|
<div class="card source-overview-card">
|
||||||
<div class="card-header">
|
<div class="card-header">
|
||||||
@@ -331,9 +362,9 @@
|
|||||||
<label>Quellen</label>
|
<label>Quellen</label>
|
||||||
<div class="toggle-group">
|
<div class="toggle-group">
|
||||||
<label class="toggle-label">
|
<label class="toggle-label">
|
||||||
<input type="checkbox" id="inc-international" checked>
|
<input type="checkbox" id="inc-international">
|
||||||
<span class="toggle-switch"></span>
|
<span class="toggle-switch"></span>
|
||||||
<span class="toggle-text">Internationale Quellen einbeziehen <span class="info-icon tooltip-below" data-tooltip="Aktiviert: Sucht auch in englischsprachigen und internationalen Medien. Deaktiviert: Nur deutschsprachige Quellen."><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span></span>
|
<span class="toggle-text">Internationale Quellen einbeziehen <span class="info-icon tooltip-below" data-tooltip="Aktiviert: Sucht auch in englischsprachigen und internationalen Medien. Deaktiviert (Standard): Nur deutschsprachige Quellen - empfohlen für DACH-Lagen."><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span></span>
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
<div class="toggle-group" style="margin-top: 8px;">
|
<div class="toggle-group" style="margin-top: 8px;">
|
||||||
@@ -425,6 +456,15 @@
|
|||||||
<!-- Stats-Leiste -->
|
<!-- Stats-Leiste -->
|
||||||
<div class="sources-stats-bar" id="sources-stats-bar"></div>
|
<div class="sources-stats-bar" id="sources-stats-bar"></div>
|
||||||
|
|
||||||
|
<!-- Tabs: Liste vs. Klassifikations-Review -->
|
||||||
|
<div class="sources-tabs" role="tablist">
|
||||||
|
<button type="button" class="sources-tab active" id="sources-tab-list" role="tab" aria-selected="true" onclick="App.switchSourcesTab('list')">Quellenliste</button>
|
||||||
|
<button type="button" class="sources-tab" id="sources-tab-review" role="tab" aria-selected="false" onclick="App.switchSourcesTab('review')" style="display:none;">Klassifikations-Review <span id="sources-review-count" class="sources-tab-badge">0</span></button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- View: Quellenliste -->
|
||||||
|
<div id="sources-list-view">
|
||||||
|
|
||||||
<!-- Toolbar -->
|
<!-- Toolbar -->
|
||||||
<div class="sources-toolbar">
|
<div class="sources-toolbar">
|
||||||
<div class="sources-filters">
|
<div class="sources-filters">
|
||||||
@@ -450,6 +490,76 @@
|
|||||||
<option value="boulevard">Boulevard</option>
|
<option value="boulevard">Boulevard</option>
|
||||||
<option value="sonstige">Sonstige</option>
|
<option value="sonstige">Sonstige</option>
|
||||||
</select>
|
</select>
|
||||||
|
<label for="sources-filter-political" class="sr-only">Politische Ausrichtung filtern</label>
|
||||||
|
<select id="sources-filter-political" class="timeline-filter-select" onchange="App.filterSources()">
|
||||||
|
<option value="">Alle Ausrichtungen</option>
|
||||||
|
<option value="links_extrem">Links (extrem)</option>
|
||||||
|
<option value="links">Links</option>
|
||||||
|
<option value="mitte_links">Mitte-Links</option>
|
||||||
|
<option value="liberal">Liberal</option>
|
||||||
|
<option value="mitte">Mitte</option>
|
||||||
|
<option value="konservativ">Konservativ</option>
|
||||||
|
<option value="mitte_rechts">Mitte-Rechts</option>
|
||||||
|
<option value="rechts">Rechts</option>
|
||||||
|
<option value="rechts_extrem">Rechts (extrem)</option>
|
||||||
|
<option value="na">Nicht eingeordnet</option>
|
||||||
|
</select>
|
||||||
|
<label for="sources-filter-mediatype" class="sr-only">Medientyp filtern</label>
|
||||||
|
<select id="sources-filter-mediatype" class="timeline-filter-select" onchange="App.filterSources()">
|
||||||
|
<option value="">Alle Medientypen</option>
|
||||||
|
<option value="tageszeitung">Tageszeitung</option>
|
||||||
|
<option value="wochenzeitung">Wochenzeitung</option>
|
||||||
|
<option value="magazin">Magazin</option>
|
||||||
|
<option value="tv_sender">TV-Sender</option>
|
||||||
|
<option value="radio">Radio</option>
|
||||||
|
<option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
|
||||||
|
<option value="nachrichtenagentur">Nachrichtenagentur</option>
|
||||||
|
<option value="online_only">Online-only</option>
|
||||||
|
<option value="blog">Blog</option>
|
||||||
|
<option value="telegram_kanal">Telegram-Kanal</option>
|
||||||
|
<option value="telegram_bot">Telegram-Bot</option>
|
||||||
|
<option value="podcast">Podcast</option>
|
||||||
|
<option value="social_media">Social Media</option>
|
||||||
|
<option value="imageboard">Imageboard</option>
|
||||||
|
<option value="think_tank">Think Tank</option>
|
||||||
|
<option value="ngo">NGO</option>
|
||||||
|
<option value="behoerde">Behörde</option>
|
||||||
|
<option value="staatsmedium">Staatsmedium</option>
|
||||||
|
<option value="fachmedium">Fachmedium</option>
|
||||||
|
<option value="sonstige">Sonstige</option>
|
||||||
|
</select>
|
||||||
|
<label for="sources-filter-reliability" class="sr-only">Glaubwürdigkeit filtern</label>
|
||||||
|
<select id="sources-filter-reliability" class="timeline-filter-select" onchange="App.filterSources()">
|
||||||
|
<option value="">Alle Glaubwürdigkeiten</option>
|
||||||
|
<option value="sehr_hoch">Sehr hoch</option>
|
||||||
|
<option value="hoch">Hoch</option>
|
||||||
|
<option value="gemischt">Gemischt</option>
|
||||||
|
<option value="niedrig">Niedrig</option>
|
||||||
|
<option value="sehr_niedrig">Sehr niedrig</option>
|
||||||
|
<option value="na">Nicht eingeordnet</option>
|
||||||
|
</select>
|
||||||
|
<label for="sources-filter-extern" class="sr-only">Externe Reputation filtern</label>
|
||||||
|
<select id="sources-filter-extern" class="timeline-filter-select" onchange="App.filterSources()">
|
||||||
|
<option value="">Externe Reputation: alle</option>
|
||||||
|
<option value="ifcn">IFCN-Faktenchecker</option>
|
||||||
|
<option value="eu_disinfo">EU-Desinfo gelistet</option>
|
||||||
|
</select>
|
||||||
|
<label for="sources-filter-alignment" class="sr-only">Geopolitische Nähe filtern</label>
|
||||||
|
<select id="sources-filter-alignment" class="timeline-filter-select" onchange="App.filterSources()">
|
||||||
|
<option value="">Alle Nähen</option>
|
||||||
|
<option value="prorussisch">Prorussisch</option>
|
||||||
|
<option value="proiranisch">Proiranisch</option>
|
||||||
|
<option value="prowestlich">Prowestlich</option>
|
||||||
|
<option value="proukrainisch">Proukrainisch</option>
|
||||||
|
<option value="prochinesisch">Prochinesisch</option>
|
||||||
|
<option value="projapanisch">Projapanisch</option>
|
||||||
|
<option value="proisraelisch">Proisraelisch</option>
|
||||||
|
<option value="propalaestinensisch">Propalästinensisch</option>
|
||||||
|
<option value="protuerkisch">Protürkisch</option>
|
||||||
|
<option value="panarabisch">Panarabisch</option>
|
||||||
|
<option value="neutral">Neutral</option>
|
||||||
|
<option value="sonstige">Sonstige</option>
|
||||||
|
</select>
|
||||||
<label for="sources-search" class="sr-only">Quellen durchsuchen</label>
|
<label for="sources-search" class="sr-only">Quellen durchsuchen</label>
|
||||||
<input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()">
|
<input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()">
|
||||||
</div>
|
</div>
|
||||||
@@ -517,6 +627,89 @@
|
|||||||
<input type="text" id="src-notes" placeholder="Optional">
|
<input type="text" id="src-notes" placeholder="Optional">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="sources-classification-section">
|
||||||
|
<div class="sources-classification-header">Einordnung</div>
|
||||||
|
<div class="sources-add-form-grid">
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="src-political">Politische Ausrichtung</label>
|
||||||
|
<select id="src-political">
|
||||||
|
<option value="na">Nicht eingeordnet</option>
|
||||||
|
<option value="links_extrem">Links (extrem)</option>
|
||||||
|
<option value="links">Links</option>
|
||||||
|
<option value="mitte_links">Mitte-Links</option>
|
||||||
|
<option value="liberal">Liberal</option>
|
||||||
|
<option value="mitte">Mitte</option>
|
||||||
|
<option value="konservativ">Konservativ</option>
|
||||||
|
<option value="mitte_rechts">Mitte-Rechts</option>
|
||||||
|
<option value="rechts">Rechts</option>
|
||||||
|
<option value="rechts_extrem">Rechts (extrem)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="src-mediatype">Medientyp</label>
|
||||||
|
<select id="src-mediatype">
|
||||||
|
<option value="sonstige">Sonstige</option>
|
||||||
|
<option value="tageszeitung">Tageszeitung</option>
|
||||||
|
<option value="wochenzeitung">Wochenzeitung</option>
|
||||||
|
<option value="magazin">Magazin</option>
|
||||||
|
<option value="tv_sender">TV-Sender</option>
|
||||||
|
<option value="radio">Radio</option>
|
||||||
|
<option value="oeffentlich_rechtlich">Öffentlich-Rechtlich</option>
|
||||||
|
<option value="nachrichtenagentur">Nachrichtenagentur</option>
|
||||||
|
<option value="online_only">Online-only</option>
|
||||||
|
<option value="blog">Blog</option>
|
||||||
|
<option value="telegram_kanal">Telegram-Kanal</option>
|
||||||
|
<option value="telegram_bot">Telegram-Bot</option>
|
||||||
|
<option value="podcast">Podcast</option>
|
||||||
|
<option value="social_media">Social Media</option>
|
||||||
|
<option value="imageboard">Imageboard</option>
|
||||||
|
<option value="think_tank">Think Tank</option>
|
||||||
|
<option value="ngo">NGO</option>
|
||||||
|
<option value="behoerde">Behörde</option>
|
||||||
|
<option value="staatsmedium">Staatsmedium</option>
|
||||||
|
<option value="fachmedium">Fachmedium</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="src-reliability">Glaubwürdigkeit</label>
|
||||||
|
<select id="src-reliability">
|
||||||
|
<option value="na">Nicht eingeordnet</option>
|
||||||
|
<option value="sehr_hoch">Sehr hoch</option>
|
||||||
|
<option value="hoch">Hoch</option>
|
||||||
|
<option value="gemischt">Gemischt</option>
|
||||||
|
<option value="niedrig">Niedrig</option>
|
||||||
|
<option value="sehr_niedrig">Sehr niedrig</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="src-country">Land (ISO 3166)</label>
|
||||||
|
<input type="text" id="src-country" maxlength="2" placeholder="z.B. DE, RU, US" style="text-transform:uppercase;">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label class="checkbox-label" style="display:flex;align-items:center;gap:8px;">
|
||||||
|
<input type="checkbox" id="src-state-affiliated">
|
||||||
|
<span>Staatsnah/-kontrolliert</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="form-group" style="margin-top:8px;">
|
||||||
|
<label>Geopolitische Nähe (Mehrfachauswahl)</label>
|
||||||
|
<div id="src-alignments-chips" class="alignment-chips" onclick="App.handleAlignmentChipClick(event)">
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="prorussisch">prorussisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="proiranisch">proiranisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="prowestlich">prowestlich</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="proukrainisch">proukrainisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="prochinesisch">prochinesisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="projapanisch">projapanisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="proisraelisch">proisraelisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="propalaestinensisch">propalästinensisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="protuerkisch">protürkisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="panarabisch">panarabisch</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="neutral">neutral</button>
|
||||||
|
<button type="button" class="alignment-chip" data-alignment="sonstige">sonstige</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="sources-discovery-actions">
|
<div class="sources-discovery-actions">
|
||||||
<button class="btn btn-primary btn-small" onclick="App.saveSource()">Speichern</button>
|
<button class="btn btn-primary btn-small" onclick="App.saveSource()">Speichern</button>
|
||||||
<button class="btn btn-secondary btn-small" onclick="App.toggleSourceForm(false)">Abbrechen</button>
|
<button class="btn btn-secondary btn-small" onclick="App.toggleSourceForm(false)">Abbrechen</button>
|
||||||
@@ -528,6 +721,36 @@
|
|||||||
<div class="sources-list" id="sources-list">
|
<div class="sources-list" id="sources-list">
|
||||||
<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Quellen...</div>
|
<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Quellen...</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<!-- /sources-list-view -->
|
||||||
|
|
||||||
|
<!-- View: Klassifikations-Review (Admin-only) -->
|
||||||
|
<div id="sources-review-view" style="display:none;">
|
||||||
|
<div class="review-toolbar">
|
||||||
|
<div class="review-toolbar-info">
|
||||||
|
<span><strong id="review-pending-count">0</strong> Vorschlaege ausstehend</span>
|
||||||
|
<label class="review-conf-filter">
|
||||||
|
Mindest-Konfidenz:
|
||||||
|
<select id="review-min-confidence" onchange="App.loadClassificationQueue()">
|
||||||
|
<option value="0">alle</option>
|
||||||
|
<option value="0.5">0.5+</option>
|
||||||
|
<option value="0.7">0.7+</option>
|
||||||
|
<option value="0.85">0.85+</option>
|
||||||
|
<option value="0.9">0.9+</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="review-toolbar-actions">
|
||||||
|
<button class="btn btn-small btn-secondary" onclick="App.triggerExternalReputationSync()" title="IFCN-Faktenchecker-Liste und EUvsDisinfo-Daten synchronisieren">Externe Daten syncen</button>
|
||||||
|
<button class="btn btn-small btn-secondary" onclick="App.triggerBulkClassify()" title="LLM-Klassifikation fuer noch unklassifizierte Quellen starten">+ Klassifikation starten</button>
|
||||||
|
<button class="btn btn-small btn-primary" onclick="App.bulkApproveHighConfidence()" title="Alle Vorschlaege ueber dem Konfidenz-Schwellwert genehmigen">Alle ≥ 0.85 genehmigen</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="review-list" id="sources-review-list">
|
||||||
|
<div class="empty-state-text" style="padding:var(--sp-3xl);text-align:center;">Lade Review-Queue...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -622,14 +845,15 @@
|
|||||||
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
||||||
<script src="/static/vendor/leaflet.js"></script>
|
<script src="/static/vendor/leaflet.js"></script>
|
||||||
<script src="/static/vendor/leaflet.markercluster.js"></script>
|
<script src="/static/vendor/leaflet.markercluster.js"></script>
|
||||||
<script src="/static/js/api.js?v=20260316c"></script>
|
<script src="/static/js/api.js?v=20260423a"></script>
|
||||||
<script src="/static/js/ws.js?v=20260316b"></script>
|
<script src="/static/js/ws.js?v=20260316b"></script>
|
||||||
<script src="/static/js/components.js?v=20260316d"></script>
|
<script src="/static/js/components.js?v=20260427a"></script>
|
||||||
<script src="/static/js/layout.js?v=20260316b"></script>
|
<script src="/static/js/layout.js?v=20260316b"></script>
|
||||||
<script src="/static/js/app.js?v=20260316b"></script>
|
<script src="/static/js/pipeline.js?v=20260501i"></script>
|
||||||
|
<script src="/static/js/app.js?v=20260501h"></script>
|
||||||
<script src="/static/js/cluster-data.js?v=20260322f"></script>
|
<script src="/static/js/cluster-data.js?v=20260322f"></script>
|
||||||
<script src="/static/js/tutorial.js?v=20260316z"></script>
|
<script src="/static/js/tutorial.js?v=20260316z"></script>
|
||||||
<script src="/static/js/chat.js?v=20260316i"></script>
|
<script src="/static/js/chat.js?v=20260422a"></script>
|
||||||
<script>document.addEventListener("DOMContentLoaded",function(){Chat.init();Tutorial.init()});</script>
|
<script>document.addEventListener("DOMContentLoaded",function(){Chat.init();Tutorial.init()});</script>
|
||||||
|
|
||||||
<!-- Map Fullscreen Overlay -->
|
<!-- Map Fullscreen Overlay -->
|
||||||
@@ -685,7 +909,8 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="progress-popup-body">
|
<div class="progress-popup-body">
|
||||||
<div class="progress-popup-pass" id="progress-popup-pass" style="display:none;"></div>
|
<div class="progress-popup-pass" id="progress-popup-pass" style="display:none;"></div>
|
||||||
<div class="progress-checklist" id="progress-checklist">
|
<div class="pipeline-mini" id="progress-pipeline-mini" aria-label="Analyseschritte"></div>
|
||||||
|
<div class="progress-checklist" id="progress-checklist" style="display:none;">
|
||||||
<div class="progress-check-item" data-step="queued">
|
<div class="progress-check-item" data-step="queued">
|
||||||
<span class="progress-check-icon">○</span>
|
<span class="progress-check-icon">○</span>
|
||||||
<span class="progress-check-label">In Warteschlange</span>
|
<span class="progress-check-label">In Warteschlange</span>
|
||||||
@@ -715,5 +940,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<script src="/static/js/update-system.js"></script>
|
||||||
|
<script src="/static/js/ai-disclaimer.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
195
src/static/js/ai-disclaimer.js
Normale Datei
195
src/static/js/ai-disclaimer.js
Normale Datei
@@ -0,0 +1,195 @@
|
|||||||
|
/**
|
||||||
|
* AI-Hallucination-Disclaimer fuer den AegisSight Monitor.
|
||||||
|
*
|
||||||
|
* Zeigt:
|
||||||
|
* 1) Beim ersten Besuch (oder bei neuem v-Bump) ein Modal mit Hinweisen
|
||||||
|
* zur Fehlbarkeit von KI-Modellen.
|
||||||
|
* 2) Im Header-User-Dropdown immer einen Eintrag "Ueber KI-Inhalte",
|
||||||
|
* ueber den der User das Modal jederzeit erneut oeffnen kann.
|
||||||
|
*
|
||||||
|
* Persistenz:
|
||||||
|
* localStorage 'aegis_ai_disclaimer_seen' -> Versionsstring (z.B. "v1").
|
||||||
|
* Wenn die Version sich aendert (Wortlaut-Update), erscheint das Modal
|
||||||
|
* beim naechsten Login erneut.
|
||||||
|
*/
|
||||||
|
(function () {
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const STORAGE_KEY = 'aegis_ai_disclaimer_seen';
|
||||||
|
const CURRENT_VERSION = 'v1';
|
||||||
|
|
||||||
|
// ---- DOM-Helpers (analog zu update-system.js) ----
|
||||||
|
function el(tag, attrs, ...children) {
|
||||||
|
const e = document.createElement(tag);
|
||||||
|
for (const k in (attrs || {})) {
|
||||||
|
if (k === 'class') e.className = attrs[k];
|
||||||
|
else if (k === 'html') e.innerHTML = attrs[k];
|
||||||
|
else if (k.startsWith('on')) e.addEventListener(k.slice(2), attrs[k]);
|
||||||
|
else e.setAttribute(k, attrs[k]);
|
||||||
|
}
|
||||||
|
for (const c of children) {
|
||||||
|
if (c == null) continue;
|
||||||
|
e.appendChild(typeof c === 'string' ? document.createTextNode(c) : c);
|
||||||
|
}
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
function injectStyles() {
|
||||||
|
if (document.getElementById('aegis-aidisc-styles')) return;
|
||||||
|
const css = `
|
||||||
|
#aegis-aidisc-overlay {
|
||||||
|
position: fixed; inset: 0; background: rgba(0,0,0,0.55); z-index: 99998;
|
||||||
|
backdrop-filter: blur(3px);
|
||||||
|
display: flex; align-items: center; justify-content: center; padding: 24px;
|
||||||
|
animation: aegis-aidisc-fade 0.25s ease;
|
||||||
|
}
|
||||||
|
@keyframes aegis-aidisc-fade { from { opacity: 0; } to { opacity: 1; } }
|
||||||
|
#aegis-aidisc-modal {
|
||||||
|
background: var(--bg-card);
|
||||||
|
color: var(--text-primary);
|
||||||
|
border-radius: 14px;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
box-shadow: 0 24px 80px rgba(0,0,0,0.4);
|
||||||
|
font-family: 'Inter', -apple-system, sans-serif;
|
||||||
|
max-width: 580px; width: 100%; max-height: 85vh; overflow: hidden;
|
||||||
|
display: flex; flex-direction: column;
|
||||||
|
}
|
||||||
|
#aegis-aidisc-modal header {
|
||||||
|
padding: 22px 28px 18px; border-bottom: 1px solid var(--border);
|
||||||
|
display: flex; align-items: center; gap: 12px;
|
||||||
|
}
|
||||||
|
#aegis-aidisc-modal header svg { color: var(--accent); flex-shrink: 0; }
|
||||||
|
#aegis-aidisc-modal h2 { margin: 0; color: var(--accent); font-size: 1.25rem; font-weight: 700; }
|
||||||
|
#aegis-aidisc-modal .body { padding: 18px 28px; overflow-y: auto; line-height: 1.55; }
|
||||||
|
#aegis-aidisc-modal .body p { margin: 0 0 12px; color: var(--text-primary); font-size: 0.94rem; }
|
||||||
|
#aegis-aidisc-modal .body strong { color: var(--accent); }
|
||||||
|
#aegis-aidisc-modal .body ul { margin: 8px 0 14px; padding-left: 22px; }
|
||||||
|
#aegis-aidisc-modal .body li { margin-bottom: 6px; color: var(--text-secondary); font-size: 0.92rem; }
|
||||||
|
#aegis-aidisc-modal .footnote {
|
||||||
|
margin-top: 10px; padding-top: 12px; border-top: 1px solid var(--border);
|
||||||
|
color: var(--text-tertiary); font-size: 0.82rem;
|
||||||
|
}
|
||||||
|
#aegis-aidisc-modal footer {
|
||||||
|
padding: 14px 28px 20px; border-top: 1px solid var(--border);
|
||||||
|
display: flex; justify-content: flex-end; gap: 10px;
|
||||||
|
}
|
||||||
|
#aegis-aidisc-modal footer button {
|
||||||
|
background: var(--accent); color: #fff; border: 0; padding: 10px 22px;
|
||||||
|
border-radius: 6px; font: inherit; font-size: 0.92rem; font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
#aegis-aidisc-modal footer button:hover { background: var(--accent-hover); }
|
||||||
|
#aegis-aidisc-modal footer button.secondary {
|
||||||
|
background: transparent; color: var(--text-secondary); border: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
#aegis-aidisc-modal footer button.secondary:hover {
|
||||||
|
background: var(--bg-hover, rgba(255,255,255,0.04)); color: var(--text-primary);
|
||||||
|
}`;
|
||||||
|
document.head.appendChild(el('style', { id: 'aegis-aidisc-styles', html: css }));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Modal-Aufbau ----
|
||||||
|
function buildModal(opts) {
|
||||||
|
const isFromUser = !!(opts && opts.fromUserAction);
|
||||||
|
|
||||||
|
// Lucide info-Icon (gleiches Pattern wie .info-icon im Repo)
|
||||||
|
const headerIcon = el('span', {
|
||||||
|
html: '<svg xmlns="http://www.w3.org/2000/svg" width="22" height="22" '
|
||||||
|
+ 'viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" '
|
||||||
|
+ 'stroke-linecap="round" stroke-linejoin="round">'
|
||||||
|
+ '<circle cx="12" cy="12" r="10"/>'
|
||||||
|
+ '<path d="M12 16v-4"/><path d="M12 8h.01"/></svg>'
|
||||||
|
});
|
||||||
|
|
||||||
|
const body = el('div', { class: 'body' });
|
||||||
|
body.appendChild(el('p', null,
|
||||||
|
'Der AegisSight Monitor nutzt Künstliche Intelligenz '
|
||||||
|
+ 'zur Analyse, Übersetzung und Zusammenfassung von Nachrichten.'));
|
||||||
|
|
||||||
|
const warn = el('p');
|
||||||
|
warn.innerHTML = '<strong>KI-Modelle können Fehler machen</strong> '
|
||||||
|
+ '(sogenannte „Halluzinationen"): erfundene Details, falsche Verbindungen oder '
|
||||||
|
+ 'ungenaue Zusammenfassungen sind möglich, auch wenn der Text plausibel klingt.';
|
||||||
|
body.appendChild(warn);
|
||||||
|
|
||||||
|
body.appendChild(el('p', null, 'Wir empfehlen daher:'));
|
||||||
|
body.appendChild(el('ul', null,
|
||||||
|
el('li', null, 'Wichtige Informationen mit den verlinkten Quellen verifizieren'),
|
||||||
|
el('li', null, 'Bei kritischen Entscheidungen die Originalartikel prüfen'),
|
||||||
|
el('li', null, 'Faktenchecks als Hinweis verstehen, nicht als endgültige Wahrheit')
|
||||||
|
));
|
||||||
|
|
||||||
|
body.appendChild(el('p', { class: 'footnote' },
|
||||||
|
'Diesen Hinweis findest du jederzeit wieder im Menü oben rechts unter „Über KI-Inhalte".'));
|
||||||
|
|
||||||
|
const closeAndStore = () => {
|
||||||
|
try { localStorage.setItem(STORAGE_KEY, CURRENT_VERSION); } catch (e) {}
|
||||||
|
overlay.remove();
|
||||||
|
document.removeEventListener('keydown', escHandler);
|
||||||
|
};
|
||||||
|
const closeOnly = () => {
|
||||||
|
overlay.remove();
|
||||||
|
document.removeEventListener('keydown', escHandler);
|
||||||
|
};
|
||||||
|
|
||||||
|
const footer = el('footer', null);
|
||||||
|
if (!isFromUser) {
|
||||||
|
footer.appendChild(el('button', { class: 'secondary', onclick: closeOnly }, 'Später nochmal'));
|
||||||
|
}
|
||||||
|
footer.appendChild(el('button', { onclick: closeAndStore }, 'Verstanden'));
|
||||||
|
|
||||||
|
const overlay = el('div', { id: 'aegis-aidisc-overlay' },
|
||||||
|
el('div', { id: 'aegis-aidisc-modal' },
|
||||||
|
el('header', null, headerIcon, el('h2', null, 'Hinweis zu KI-generierten Inhalten')),
|
||||||
|
body,
|
||||||
|
footer
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
function escHandler(ev) {
|
||||||
|
if (ev.key === 'Escape' && document.getElementById('aegis-aidisc-overlay')) {
|
||||||
|
// ESC = wie "Verstanden" beim erstmaligen Anzeigen, sonst nur schliessen
|
||||||
|
if (isFromUser) closeOnly(); else closeAndStore();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
overlay.addEventListener('click', (ev) => {
|
||||||
|
if (ev.target === overlay) {
|
||||||
|
if (isFromUser) closeOnly(); else closeAndStore();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
document.addEventListener('keydown', escHandler);
|
||||||
|
|
||||||
|
return overlay;
|
||||||
|
}
|
||||||
|
|
||||||
|
function show(opts) {
|
||||||
|
if (document.getElementById('aegis-aidisc-overlay')) return;
|
||||||
|
injectStyles();
|
||||||
|
document.body.appendChild(buildModal(opts));
|
||||||
|
}
|
||||||
|
|
||||||
|
function init() {
|
||||||
|
// Nur auf der Dashboard-Seite zeigen, nicht auf der Login-Seite
|
||||||
|
if (!document.body || document.body.classList.contains('login-page')) return;
|
||||||
|
|
||||||
|
injectStyles();
|
||||||
|
let seenVersion = '';
|
||||||
|
try { seenVersion = localStorage.getItem(STORAGE_KEY) || ''; } catch (e) {}
|
||||||
|
if (seenVersion !== CURRENT_VERSION) {
|
||||||
|
// Etwas verzoegern, damit Hauptdashboard sichtbar ist bevor Modal kommt
|
||||||
|
setTimeout(() => show({ fromUserAction: false }), 600);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Globaler Zugriff zum manuellen Oeffnen aus dem Header-Dropdown
|
||||||
|
window.AIDisclaimer = {
|
||||||
|
show: () => show({ fromUserAction: true }),
|
||||||
|
VERSION: CURRENT_VERSION,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (document.readyState === 'loading') {
|
||||||
|
document.addEventListener('DOMContentLoaded', init);
|
||||||
|
} else {
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
})();
|
||||||
@@ -1,6 +1,16 @@
|
|||||||
/**
|
/**
|
||||||
* API-Client für den OSINT Lagemonitor.
|
* API-Client für den OSINT Lagemonitor.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
class ApiError extends Error {
|
||||||
|
constructor(status, detail) {
|
||||||
|
super(detail || `Fehler ${status}`);
|
||||||
|
this.name = 'ApiError';
|
||||||
|
this.status = status;
|
||||||
|
this.detail = detail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const API = {
|
const API = {
|
||||||
baseUrl: '/api',
|
baseUrl: '/api',
|
||||||
|
|
||||||
@@ -57,7 +67,30 @@ const API = {
|
|||||||
} else if (typeof detail === 'object' && detail !== null) {
|
} else if (typeof detail === 'object' && detail !== null) {
|
||||||
detail = JSON.stringify(detail);
|
detail = JSON.stringify(detail);
|
||||||
}
|
}
|
||||||
throw new Error(detail || `Fehler ${response.status}`);
|
|
||||||
|
// Lizenz-Status aus Header auslesen (vom Backend gesetzt bei 403)
|
||||||
|
const licStatus = response.headers.get('X-License-Status');
|
||||||
|
if (response.status === 403 && licStatus && typeof App !== 'undefined') {
|
||||||
|
if (!App.user) App.user = {};
|
||||||
|
App.user.read_only = true;
|
||||||
|
App.user.read_only_reason = licStatus;
|
||||||
|
const warningEl = document.getElementById('header-license-warning');
|
||||||
|
if (warningEl) {
|
||||||
|
let text = 'Nur Lesezugriff';
|
||||||
|
if (licStatus === 'budget_exceeded') text = 'Token-Budget aufgebraucht – nur Lesezugriff. Bitte Verwaltung kontaktieren.';
|
||||||
|
else if (licStatus === 'expired') text = 'Lizenz abgelaufen – nur Lesezugriff';
|
||||||
|
else if (licStatus === 'no_license') text = 'Keine aktive Lizenz – nur Lesezugriff';
|
||||||
|
else if (licStatus === 'org_disabled') text = 'Organisation deaktiviert – nur Lesezugriff';
|
||||||
|
warningEl.textContent = text;
|
||||||
|
warningEl.classList.add('visible');
|
||||||
|
}
|
||||||
|
if (typeof App._updateRefreshButton === 'function') App._updateRefreshButton(false);
|
||||||
|
if (typeof UI !== 'undefined' && UI.showToast) {
|
||||||
|
UI.showToast(detail || 'Lizenz-Beschränkung – nur Lesezugriff', 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new ApiError(response.status, detail);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 204) return null;
|
if (response.status === 204) return null;
|
||||||
@@ -91,6 +124,10 @@ const API = {
|
|||||||
return this._request('GET', `/incidents/${id}`);
|
return this._request('GET', `/incidents/${id}`);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
getIncidentSources(id) {
|
||||||
|
return this._request('GET', `/incidents/${id}/sources`);
|
||||||
|
},
|
||||||
|
|
||||||
updateIncident(id, data) {
|
updateIncident(id, data) {
|
||||||
return this._request('PUT', `/incidents/${id}`, data);
|
return this._request('PUT', `/incidents/${id}`, data);
|
||||||
},
|
},
|
||||||
@@ -99,18 +136,42 @@ const API = {
|
|||||||
return this._request('DELETE', `/incidents/${id}`);
|
return this._request('DELETE', `/incidents/${id}`);
|
||||||
},
|
},
|
||||||
|
|
||||||
getArticles(incidentId) {
|
getArticles(incidentId, { limit = 500, offset = 0, search = null } = {}) {
|
||||||
return this._request('GET', `/incidents/${incidentId}/articles`);
|
const params = new URLSearchParams();
|
||||||
|
params.set('limit', String(limit));
|
||||||
|
params.set('offset', String(offset));
|
||||||
|
if (search) params.set('search', search);
|
||||||
|
return this._request('GET', `/incidents/${incidentId}/articles?${params.toString()}`);
|
||||||
|
},
|
||||||
|
|
||||||
|
getArticlesSourcesSummary(incidentId) {
|
||||||
|
return this._request('GET', `/incidents/${incidentId}/articles/sources-summary`);
|
||||||
|
},
|
||||||
|
|
||||||
|
getArticlesTimelineBuckets(incidentId, granularity = 'day') {
|
||||||
|
return this._request('GET', `/incidents/${incidentId}/articles/timeline-buckets?granularity=${encodeURIComponent(granularity)}`);
|
||||||
},
|
},
|
||||||
|
|
||||||
getFactChecks(incidentId) {
|
getFactChecks(incidentId) {
|
||||||
return this._request('GET', `/incidents/${incidentId}/factchecks`);
|
return this._request('GET', `/incidents/${incidentId}/factchecks`);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
getPipeline(incidentId) {
|
||||||
|
return this._request('GET', `/incidents/${incidentId}/pipeline`);
|
||||||
|
},
|
||||||
|
|
||||||
getSnapshots(incidentId) {
|
getSnapshots(incidentId) {
|
||||||
return this._request('GET', `/incidents/${incidentId}/snapshots`);
|
return this._request('GET', `/incidents/${incidentId}/snapshots`);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
getSnapshot(incidentId, snapshotId) {
|
||||||
|
return this._request('GET', `/incidents/${incidentId}/snapshots/${snapshotId}`);
|
||||||
|
},
|
||||||
|
|
||||||
|
searchSnapshots(incidentId, query) {
|
||||||
|
return this._request('GET', `/incidents/${incidentId}/snapshots/search?q=${encodeURIComponent(query)}`);
|
||||||
|
},
|
||||||
|
|
||||||
getLocations(incidentId) {
|
getLocations(incidentId) {
|
||||||
return this._request('GET', `/incidents/${incidentId}/locations`);
|
return this._request('GET', `/incidents/${incidentId}/locations`);
|
||||||
},
|
},
|
||||||
@@ -137,10 +198,46 @@ const API = {
|
|||||||
if (params.source_type) query.set('source_type', params.source_type);
|
if (params.source_type) query.set('source_type', params.source_type);
|
||||||
if (params.category) query.set('category', params.category);
|
if (params.category) query.set('category', params.category);
|
||||||
if (params.source_status) query.set('source_status', params.source_status);
|
if (params.source_status) query.set('source_status', params.source_status);
|
||||||
|
if (params.political_orientation) query.set('political_orientation', params.political_orientation);
|
||||||
|
if (params.media_type) query.set('media_type', params.media_type);
|
||||||
|
if (params.reliability) query.set('reliability', params.reliability);
|
||||||
|
if (params.alignment) query.set('alignment', params.alignment);
|
||||||
|
if (params.state_affiliated !== undefined && params.state_affiliated !== null) {
|
||||||
|
query.set('state_affiliated', String(params.state_affiliated));
|
||||||
|
}
|
||||||
const qs = query.toString();
|
const qs = query.toString();
|
||||||
return this._request('GET', `/sources${qs ? '?' + qs : ''}`);
|
return this._request('GET', `/sources${qs ? '?' + qs : ''}`);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Sources: Klassifikations-Review (LLM)
|
||||||
|
getClassificationStats() {
|
||||||
|
return this._request('GET', '/sources/classification/stats');
|
||||||
|
},
|
||||||
|
getClassificationQueue(limit = 50, minConfidence = 0.0) {
|
||||||
|
const qs = new URLSearchParams({ limit: String(limit), min_confidence: String(minConfidence) }).toString();
|
||||||
|
return this._request('GET', `/sources/classification/queue?${qs}`);
|
||||||
|
},
|
||||||
|
approveClassification(id) {
|
||||||
|
return this._request('POST', `/sources/${id}/classification/approve`);
|
||||||
|
},
|
||||||
|
rejectClassification(id) {
|
||||||
|
return this._request('POST', `/sources/${id}/classification/reject`);
|
||||||
|
},
|
||||||
|
reclassifySource(id) {
|
||||||
|
return this._request('POST', `/sources/${id}/classification/reclassify`);
|
||||||
|
},
|
||||||
|
triggerBulkClassify(limit = 50, onlyUnclassified = true) {
|
||||||
|
const qs = new URLSearchParams({ limit: String(limit), only_unclassified: String(onlyUnclassified) }).toString();
|
||||||
|
return this._request('POST', `/sources/classification/bulk-classify?${qs}`);
|
||||||
|
},
|
||||||
|
bulkApproveClassifications(minConfidence = 0.85) {
|
||||||
|
const qs = new URLSearchParams({ min_confidence: String(minConfidence) }).toString();
|
||||||
|
return this._request('POST', `/sources/classification/bulk-approve?${qs}`);
|
||||||
|
},
|
||||||
|
triggerExternalReputationSync() {
|
||||||
|
return this._request('POST', '/sources/external-reputation/sync');
|
||||||
|
},
|
||||||
|
|
||||||
createSource(data) {
|
createSource(data) {
|
||||||
return this._request('POST', '/sources', data);
|
return this._request('POST', '/sources', data);
|
||||||
},
|
},
|
||||||
|
|||||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -67,12 +67,12 @@ const Chat = {
|
|||||||
this.addMessage('assistant', 'Hallo! Ich bin der AegisSight Assistent. Stell mir gerne jede Frage rund um die Bedienung des Monitors, ich helfe dir weiter.');
|
this.addMessage('assistant', 'Hallo! Ich bin der AegisSight Assistent. Stell mir gerne jede Frage rund um die Bedienung des Monitors, ich helfe dir weiter.');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tutorial-Hinweis bei jedem Oeffnen aktualisieren (wenn nicht dismissed)
|
// Tutorial-Hinweis temporaer deaktiviert (Ueberarbeitung) - reaktivieren durch Entfernen der Kommentarzeichen:
|
||||||
if (typeof Tutorial !== 'undefined' && !this._tutorialHintDismissed) {
|
// if (typeof Tutorial !== 'undefined' && !this._tutorialHintDismissed) {
|
||||||
var oldHint = document.getElementById('chat-tutorial-hint');
|
// var oldHint = document.getElementById('chat-tutorial-hint');
|
||||||
if (oldHint) oldHint.remove();
|
// if (oldHint) oldHint.remove();
|
||||||
this._showTutorialHint();
|
// this._showTutorialHint();
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Focus auf Input
|
// Focus auf Input
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
@@ -137,15 +137,15 @@ const Chat = {
|
|||||||
this._showTyping();
|
this._showTyping();
|
||||||
this._isLoading = true;
|
this._isLoading = true;
|
||||||
|
|
||||||
// Tutorial-Keywords abfangen
|
// Tutorial-Keywords temporaer deaktiviert (Ueberarbeitung) - reaktivieren durch Entfernen der Kommentarzeichen:
|
||||||
var lowerText = text.toLowerCase();
|
// var lowerText = text.toLowerCase();
|
||||||
if (lowerText === 'rundgang' || lowerText === 'tutorial' || lowerText === 'tour' || lowerText === 'f\u00fchrung') {
|
// if (lowerText === 'rundgang' || lowerText === 'tutorial' || lowerText === 'tour' || lowerText === 'f\u00fchrung') {
|
||||||
this._hideTyping();
|
// this._hideTyping();
|
||||||
this._isLoading = false;
|
// this._isLoading = false;
|
||||||
this.close();
|
// this.close();
|
||||||
if (typeof Tutorial !== 'undefined') Tutorial.start();
|
// if (typeof Tutorial !== 'undefined') Tutorial.start();
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const body = {
|
const body = {
|
||||||
|
|||||||
@@ -334,9 +334,18 @@ const UI = {
|
|||||||
// Blocking (no close) for first refresh
|
// Blocking (no close) for first refresh
|
||||||
if (state.isFirst) {
|
if (state.isFirst) {
|
||||||
overlay.classList.add('blocking');
|
overlay.classList.add('blocking');
|
||||||
// Apply blur to grid
|
// Apply blur to incident-view (Header + Tab-Panels gemeinsam).
|
||||||
const grid = document.querySelector('.tab-panels');
|
const blurTarget = document.getElementById('incident-view');
|
||||||
if (grid) grid.classList.add('blurred');
|
if (blurTarget) {
|
||||||
|
blurTarget.classList.add('refresh-blurred');
|
||||||
|
// Sicherheitsnetz: bei viel DOM-Reshuffle im selben Tick
|
||||||
|
// (Display-Wechsel, renderSidebar, leere innerHTML) greift
|
||||||
|
// CSS filter:blur erst beim naechsten Layout-Pass. Im
|
||||||
|
// naechsten Frame nochmal setzen — idempotent.
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
if (state && state.isFirst) blurTarget.classList.add('refresh-blurred');
|
||||||
|
});
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
overlay.classList.remove('blocking');
|
overlay.classList.remove('blocking');
|
||||||
}
|
}
|
||||||
@@ -345,9 +354,22 @@ const UI = {
|
|||||||
const minBtn = document.getElementById('progress-popup-minimize');
|
const minBtn = document.getElementById('progress-popup-minimize');
|
||||||
if (minBtn) minBtn.style.display = state.isFirst ? 'none' : '';
|
if (minBtn) minBtn.style.display = state.isFirst ? 'none' : '';
|
||||||
|
|
||||||
// Title
|
// Title - haengt von Status ab (queued = wartet, cancelling = bricht ab, sonst laeuft)
|
||||||
const titleEl = document.getElementById('progress-popup-title');
|
const titleEl = document.getElementById('progress-popup-title');
|
||||||
if (titleEl) titleEl.textContent = state.isFirst ? 'Erste Recherche l\u00e4uft' : 'Aktualisierung l\u00e4uft';
|
if (titleEl) {
|
||||||
|
let title;
|
||||||
|
if (status === 'queued') {
|
||||||
|
const pos = (state && state._queuePos) ? ' (#' + state._queuePos + ')' : '';
|
||||||
|
title = 'In Warteschlange' + pos;
|
||||||
|
} else if (status === 'cancelling') {
|
||||||
|
title = 'Wird abgebrochen\u2026';
|
||||||
|
} else if (state.isFirst) {
|
||||||
|
title = 'Erste Recherche l\u00e4uft';
|
||||||
|
} else {
|
||||||
|
title = 'Aktualisierung l\u00e4uft';
|
||||||
|
}
|
||||||
|
titleEl.textContent = title;
|
||||||
|
}
|
||||||
|
|
||||||
// Multi-pass info
|
// Multi-pass info
|
||||||
const passEl = document.getElementById('progress-popup-pass');
|
const passEl = document.getElementById('progress-popup-pass');
|
||||||
@@ -465,8 +487,8 @@ const UI = {
|
|||||||
|
|
||||||
if (incidentId === App.currentIncidentId) {
|
if (incidentId === App.currentIncidentId) {
|
||||||
// Remove blur
|
// Remove blur
|
||||||
const grid = document.querySelector('.tab-panels');
|
const blurTarget = document.getElementById('incident-view');
|
||||||
if (grid) grid.classList.remove('blurred');
|
if (blurTarget) blurTarget.classList.remove('refresh-blurred');
|
||||||
|
|
||||||
const overlay = document.getElementById('progress-overlay');
|
const overlay = document.getElementById('progress-overlay');
|
||||||
if (overlay) {
|
if (overlay) {
|
||||||
@@ -559,8 +581,8 @@ const UI = {
|
|||||||
if (!incidentId) incidentId = App.currentIncidentId;
|
if (!incidentId) incidentId = App.currentIncidentId;
|
||||||
|
|
||||||
// Remove blur
|
// Remove blur
|
||||||
const grid = document.querySelector('.tab-panels');
|
const blurTarget = document.getElementById('incident-view');
|
||||||
if (grid) grid.classList.remove('blurred');
|
if (blurTarget) blurTarget.classList.remove('refresh-blurred');
|
||||||
|
|
||||||
if (incidentId === App.currentIncidentId) {
|
if (incidentId === App.currentIncidentId) {
|
||||||
const overlay = document.getElementById('progress-overlay');
|
const overlay = document.getElementById('progress-overlay');
|
||||||
@@ -709,13 +731,27 @@ const UI = {
|
|||||||
return { zusammenfassung, remaining: remaining.trim() };
|
return { zusammenfassung, remaining: remaining.trim() };
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parst sources: akzeptiert Array (neu, vom /sources-Endpunkt) ODER
|
||||||
|
* JSON-String (alt, aus sources_json) fuer Rueckwaertskompatibilitaet.
|
||||||
|
*/
|
||||||
|
_parseSources(input) {
|
||||||
|
if (!input) return [];
|
||||||
|
if (Array.isArray(input)) return input;
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(input);
|
||||||
|
return Array.isArray(parsed) ? parsed : [];
|
||||||
|
} catch (e) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rendert die Zusammenfassung als HTML (Bullet Points).
|
* Rendert die Zusammenfassung als HTML (Bullet Points).
|
||||||
*/
|
*/
|
||||||
renderZusammenfassung(text, sourcesJson) {
|
renderZusammenfassung(text, sourcesJson) {
|
||||||
if (!text) return '<span style="color:var(--text-disabled);">Noch keine Zusammenfassung.</span>';
|
if (!text) return '<span style="color:var(--text-disabled);">Noch keine Zusammenfassung.</span>';
|
||||||
let sources = [];
|
const sources = this._parseSources(sourcesJson);
|
||||||
try { sources = JSON.parse(sourcesJson || '[]'); } catch(e) {}
|
|
||||||
// Nur Bullet-Point-Zeilen behalten, Fliesstext herausfiltern
|
// Nur Bullet-Point-Zeilen behalten, Fliesstext herausfiltern
|
||||||
const bulletLines = text.split("\n").filter(line => line.trim().startsWith("- "));
|
const bulletLines = text.split("\n").filter(line => line.trim().startsWith("- "));
|
||||||
const bulletText = bulletLines.length > 0 ? bulletLines.join("\n") : text;
|
const bulletText = bulletLines.length > 0 ? bulletLines.join("\n") : text;
|
||||||
@@ -751,8 +787,7 @@ const UI = {
|
|||||||
*/
|
*/
|
||||||
renderLatestDevelopments(text, sourcesJson) {
|
renderLatestDevelopments(text, sourcesJson) {
|
||||||
if (!text) return '<span style="color:var(--text-disabled);">Noch keine Entwicklungen erfasst.</span>';
|
if (!text) return '<span style="color:var(--text-disabled);">Noch keine Entwicklungen erfasst.</span>';
|
||||||
let sources = [];
|
const sources = this._parseSources(sourcesJson);
|
||||||
try { sources = JSON.parse(sourcesJson || '[]'); } catch(e) {}
|
|
||||||
|
|
||||||
const bulletLines = text.split("\n").map(l => l.trim()).filter(l => l && (l.startsWith("- ") || l.startsWith("[")));
|
const bulletLines = text.split("\n").map(l => l.trim()).filter(l => l && (l.startsWith("- ") || l.startsWith("[")));
|
||||||
if (bulletLines.length === 0) {
|
if (bulletLines.length === 0) {
|
||||||
@@ -869,8 +904,7 @@ const UI = {
|
|||||||
renderSummary(summary, sourcesJson, incidentType) {
|
renderSummary(summary, sourcesJson, incidentType) {
|
||||||
if (!summary) return '<span style="color:var(--text-tertiary);">Noch keine Zusammenfassung.</span>';
|
if (!summary) return '<span style="color:var(--text-tertiary);">Noch keine Zusammenfassung.</span>';
|
||||||
|
|
||||||
let sources = [];
|
const sources = this._parseSources(sourcesJson);
|
||||||
try { sources = JSON.parse(sourcesJson || '[]'); } catch(e) {}
|
|
||||||
|
|
||||||
// Markdown-Rendering
|
// Markdown-Rendering
|
||||||
let html = this.escape(summary);
|
let html = this.escape(summary);
|
||||||
@@ -930,6 +964,38 @@ const UI = {
|
|||||||
/**
|
/**
|
||||||
* Quellenübersicht für eine Lage rendern.
|
* Quellenübersicht für eine Lage rendern.
|
||||||
*/
|
*/
|
||||||
|
/**
|
||||||
|
* Quellenuebersicht aus Aggregat-Endpunkt rendern (alle Artikel der Lage,
|
||||||
|
* unabhaengig von Paginierung im Frontend).
|
||||||
|
* data: {total, sources: [{source, article_count, languages: []}], language_counts: [{language, cnt}]}
|
||||||
|
*/
|
||||||
|
renderSourceOverviewFromSummary(data) {
|
||||||
|
if (!data || !data.sources || data.sources.length === 0) return '';
|
||||||
|
|
||||||
|
const langChips = (data.language_counts || [])
|
||||||
|
.map(l => `<span class="source-lang-chip">${(l.language || 'de').toUpperCase()} <strong>${l.cnt}</strong></span>`)
|
||||||
|
.join('');
|
||||||
|
|
||||||
|
let html = `<div class="source-overview-header">`;
|
||||||
|
html += `<span class="source-overview-stat">${data.total} Artikel aus ${data.sources.length} Quellen</span>`;
|
||||||
|
html += `<div class="source-lang-chips">${langChips}</div>`;
|
||||||
|
html += `</div>`;
|
||||||
|
|
||||||
|
html += '<div class="source-overview-grid">';
|
||||||
|
data.sources.forEach(s => {
|
||||||
|
const langs = (s.languages || ['de']).map(l => (l || 'de').toUpperCase()).join('/');
|
||||||
|
const sourceName = this.escape(s.source || 'Unbekannt');
|
||||||
|
html += `<div class="source-overview-item" data-source="${sourceName}" tabindex="0" role="button" aria-expanded="false" onclick="App.toggleSourceOverviewDetail(this)" onkeydown="if(event.key==='Enter'||event.key===' '){event.preventDefault();App.toggleSourceOverviewDetail(this);}">
|
||||||
|
<span class="source-overview-name">${sourceName}</span>
|
||||||
|
<span class="source-overview-lang">${langs}</span>
|
||||||
|
<span class="source-overview-count">${s.article_count}</span>
|
||||||
|
</div>`;
|
||||||
|
});
|
||||||
|
html += '</div>';
|
||||||
|
|
||||||
|
return html;
|
||||||
|
},
|
||||||
|
|
||||||
renderSourceOverview(articles) {
|
renderSourceOverview(articles) {
|
||||||
if (!articles || articles.length === 0) return '';
|
if (!articles || articles.length === 0) return '';
|
||||||
|
|
||||||
@@ -996,6 +1062,163 @@ const UI = {
|
|||||||
'sonstige': 'Sonstige',
|
'sonstige': 'Sonstige',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
_politicalLabels: {
|
||||||
|
links_extrem: { short: 'L+', full: 'Links (extrem)' },
|
||||||
|
links: { short: 'L', full: 'Links' },
|
||||||
|
mitte_links: { short: 'ML', full: 'Mitte-Links' },
|
||||||
|
liberal: { short: 'LIB', full: 'Liberal' },
|
||||||
|
mitte: { short: 'M', full: 'Mitte' },
|
||||||
|
konservativ: { short: 'KON', full: 'Konservativ' },
|
||||||
|
mitte_rechts: { short: 'MR', full: 'Mitte-Rechts' },
|
||||||
|
rechts: { short: 'R', full: 'Rechts' },
|
||||||
|
rechts_extrem: { short: 'R+', full: 'Rechts (extrem)' },
|
||||||
|
na: { short: '?', full: 'Nicht eingeordnet' },
|
||||||
|
},
|
||||||
|
_reliabilityLabels: {
|
||||||
|
sehr_hoch: 'Sehr hoch',
|
||||||
|
hoch: 'Hoch',
|
||||||
|
gemischt: 'Gemischt',
|
||||||
|
niedrig: 'Niedrig',
|
||||||
|
sehr_niedrig: 'Sehr niedrig',
|
||||||
|
na: 'Nicht eingeordnet',
|
||||||
|
},
|
||||||
|
_mediaTypeLabels: {
|
||||||
|
tageszeitung: 'Tageszeitung',
|
||||||
|
wochenzeitung: 'Wochenzeitung',
|
||||||
|
magazin: 'Magazin',
|
||||||
|
tv_sender: 'TV-Sender',
|
||||||
|
radio: 'Radio',
|
||||||
|
oeffentlich_rechtlich: 'Öffentlich-Rechtlich',
|
||||||
|
nachrichtenagentur: 'Nachrichtenagentur',
|
||||||
|
online_only: 'Online-only',
|
||||||
|
blog: 'Blog',
|
||||||
|
telegram_kanal: 'Telegram-Kanal',
|
||||||
|
telegram_bot: 'Telegram-Bot',
|
||||||
|
podcast: 'Podcast',
|
||||||
|
social_media: 'Social Media',
|
||||||
|
imageboard: 'Imageboard',
|
||||||
|
think_tank: 'Think Tank',
|
||||||
|
ngo: 'NGO',
|
||||||
|
behoerde: 'Behörde',
|
||||||
|
staatsmedium: 'Staatsmedium',
|
||||||
|
fachmedium: 'Fachmedium',
|
||||||
|
sonstige: 'Sonstige',
|
||||||
|
},
|
||||||
|
_alignmentLabels: {
|
||||||
|
prorussisch: 'prorussisch',
|
||||||
|
proiranisch: 'proiranisch',
|
||||||
|
prowestlich: 'prowestlich',
|
||||||
|
proukrainisch: 'proukrainisch',
|
||||||
|
prochinesisch: 'prochinesisch',
|
||||||
|
projapanisch: 'projapanisch',
|
||||||
|
proisraelisch: 'proisraelisch',
|
||||||
|
propalaestinensisch: 'propalästinensisch',
|
||||||
|
protuerkisch: 'protürkisch',
|
||||||
|
panarabisch: 'panarabisch',
|
||||||
|
neutral: 'neutral',
|
||||||
|
sonstige: 'sonstige',
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Eintrag in der Klassifikations-Review-Queue.
|
||||||
|
* Zeigt Diff zwischen aktuellem Wert und LLM-Vorschlag.
|
||||||
|
*/
|
||||||
|
renderClassificationQueueItem(item) {
|
||||||
|
const cur = item.current || {};
|
||||||
|
const prop = item.proposed || {};
|
||||||
|
const conf = prop.confidence || 0;
|
||||||
|
const confPct = Math.round(conf * 100);
|
||||||
|
const confClass = conf >= 0.85 ? 'high' : (conf >= 0.7 ? 'medium' : 'low');
|
||||||
|
|
||||||
|
const diffRow = (label, currentVal, proposedVal, formatter) => {
|
||||||
|
const fmt = formatter || (v => v == null || v === '' ? '–' : String(v));
|
||||||
|
const c = fmt(currentVal);
|
||||||
|
const p = fmt(proposedVal);
|
||||||
|
const changed = c !== p;
|
||||||
|
return `<div class="review-diff-row${changed ? ' changed' : ''}">
|
||||||
|
<span class="review-diff-label">${this.escape(label)}</span>
|
||||||
|
<span class="review-diff-current">${this.escape(c)}</span>
|
||||||
|
<span class="review-diff-arrow">→</span>
|
||||||
|
<span class="review-diff-proposed">${this.escape(p)}</span>
|
||||||
|
</div>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const polFmt = v => (v && v !== 'na') ? (this._politicalLabels[v]?.full || v) : '–';
|
||||||
|
const mtFmt = v => (v && v !== 'sonstige') ? (this._mediaTypeLabels[v] || v) : (v === 'sonstige' ? 'Sonstige' : '–');
|
||||||
|
const relFmt = v => (v && v !== 'na') ? (this._reliabilityLabels[v] || v) : '–';
|
||||||
|
const stateFmt = v => v ? 'ja' : 'nein';
|
||||||
|
const ccFmt = v => v || '–';
|
||||||
|
const alignFmt = v => (Array.isArray(v) && v.length > 0)
|
||||||
|
? v.map(a => this._alignmentLabels[a] || a).join(', ')
|
||||||
|
: '–';
|
||||||
|
|
||||||
|
const globalBadge = item.is_global ? '<span class="review-global-badge">Grundquelle</span>' : '';
|
||||||
|
const reasoning = prop.reasoning ? this.escape(prop.reasoning) : '';
|
||||||
|
|
||||||
|
return `<div class="review-card" data-source-id="${item.id}">
|
||||||
|
<div class="review-card-header">
|
||||||
|
<div class="review-card-title">
|
||||||
|
<span class="review-card-name">${this.escape(item.name)}</span>
|
||||||
|
${globalBadge}
|
||||||
|
<span class="review-card-domain">${this.escape(item.domain || '')}</span>
|
||||||
|
</div>
|
||||||
|
<div class="review-card-confidence conf-${confClass}" title="LLM-Konfidenz">
|
||||||
|
<span class="conf-value">${confPct}%</span>
|
||||||
|
<span class="conf-label">Konfidenz</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="review-card-diff">
|
||||||
|
${diffRow('Politik', cur.political_orientation, prop.political_orientation, polFmt)}
|
||||||
|
${diffRow('Medientyp', cur.media_type, prop.media_type, mtFmt)}
|
||||||
|
${diffRow('Glaubwürdigkeit', cur.reliability, prop.reliability, relFmt)}
|
||||||
|
${diffRow('Staatsnah', cur.state_affiliated, prop.state_affiliated, stateFmt)}
|
||||||
|
${diffRow('Land', cur.country_code, prop.country_code, ccFmt)}
|
||||||
|
${diffRow('Geopol. Nähe', cur.alignments, prop.alignments, alignFmt)}
|
||||||
|
</div>
|
||||||
|
${reasoning ? `<div class="review-card-reasoning"><strong>Begründung:</strong> ${reasoning}</div>` : ''}
|
||||||
|
<div class="review-card-actions">
|
||||||
|
<button class="btn btn-small btn-primary" onclick="App.approveClassification(${item.id})">Übernehmen</button>
|
||||||
|
<button class="btn btn-small btn-secondary" onclick="App.rejectClassification(${item.id})">Verwerfen</button>
|
||||||
|
<button class="btn btn-small btn-secondary" data-reclassify-id="${item.id}" onclick="App.reclassifySource(${item.id})">Neu klassifizieren</button>
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
},
|
||||||
|
|
||||||
|
_renderClassificationBadges(feed) {
|
||||||
|
const parts = [];
|
||||||
|
const pol = feed.political_orientation;
|
||||||
|
if (pol && pol !== 'na') {
|
||||||
|
const label = this._politicalLabels[pol] || { short: pol, full: pol };
|
||||||
|
parts.push(`<span class="source-political-badge pol-${this.escape(pol)}" title="${this.escape(label.full)}">${this.escape(label.short)}</span>`);
|
||||||
|
}
|
||||||
|
const rel = feed.reliability;
|
||||||
|
if (rel && rel !== 'na') {
|
||||||
|
const relLabel = this._reliabilityLabels[rel] || rel;
|
||||||
|
const relSource = feed.ifcn_signatory ? '(IFCN-Faktenchecker)'
|
||||||
|
: (feed.eu_disinfo_listed ? `(EU-Desinfo, ${feed.eu_disinfo_case_count || 0} Fälle)`
|
||||||
|
: '(LLM-Schätzung)');
|
||||||
|
const relTitle = `Glaubwürdigkeit: ${relLabel} ${relSource}`;
|
||||||
|
parts.push(`<span class="source-reliability-dot rel-${this.escape(rel)}" title="${this.escape(relTitle)}" aria-label="${this.escape(relTitle)}"></span>`);
|
||||||
|
}
|
||||||
|
if (feed.ifcn_signatory) {
|
||||||
|
parts.push(`<span class="source-ifcn-badge" title="IFCN-zertifizierter Faktenchecker" aria-label="IFCN-Faktenchecker">✓ IFCN</span>`);
|
||||||
|
}
|
||||||
|
if (feed.eu_disinfo_listed) {
|
||||||
|
const cnt = feed.eu_disinfo_case_count || 0;
|
||||||
|
const title = `EUvsDisinfo: ${cnt} dokumentierte Desinformations-Fälle`;
|
||||||
|
parts.push(`<span class="source-eu-disinfo-badge" title="${this.escape(title)}" aria-label="${this.escape(title)}">⚠ EU-Desinfo (${cnt})</span>`);
|
||||||
|
}
|
||||||
|
if (feed.state_affiliated) {
|
||||||
|
parts.push(`<span class="source-state-badge" title="Staatsnah/-kontrolliert" aria-label="Staatsnah">⚑</span>`);
|
||||||
|
}
|
||||||
|
const aligns = Array.isArray(feed.alignments) ? feed.alignments : [];
|
||||||
|
aligns.forEach(a => {
|
||||||
|
const label = this._alignmentLabels[a] || a;
|
||||||
|
parts.push(`<span class="source-alignment-chip-badge align-${this.escape(a)}">${this.escape(label)}</span>`);
|
||||||
|
});
|
||||||
|
return parts.join('');
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Domain-Gruppe rendern (aufklappbar mit Feeds).
|
* Domain-Gruppe rendern (aufklappbar mit Feeds).
|
||||||
*/
|
*/
|
||||||
@@ -1051,20 +1274,52 @@ const UI = {
|
|||||||
? `<span class="source-feed-count">${feedCount} Feed${feedCount !== 1 ? 's' : ''}</span>`
|
? `<span class="source-feed-count">${feedCount} Feed${feedCount !== 1 ? 's' : ''}</span>`
|
||||||
: '';
|
: '';
|
||||||
|
|
||||||
// Info-Button mit Tooltip (Typ, Sprache, Ausrichtung)
|
// Info-Button mit Tooltip (Typ, Sprache, Ausrichtung, Klassifikation)
|
||||||
let infoButtonHtml = '';
|
let infoButtonHtml = '';
|
||||||
const firstFeed = feeds[0] || {};
|
const firstFeed = feeds[0] || {};
|
||||||
const hasInfo = firstFeed.language || firstFeed.bias;
|
const hasInfo = firstFeed.language || firstFeed.bias
|
||||||
|
|| (firstFeed.political_orientation && firstFeed.political_orientation !== 'na')
|
||||||
|
|| (firstFeed.media_type && firstFeed.media_type !== 'sonstige')
|
||||||
|
|| (firstFeed.reliability && firstFeed.reliability !== 'na')
|
||||||
|
|| firstFeed.state_affiliated
|
||||||
|
|| firstFeed.country_code
|
||||||
|
|| (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0);
|
||||||
if (hasInfo) {
|
if (hasInfo) {
|
||||||
const typeMap = { rss_feed: 'RSS-Feed', web_source: 'Web-Quelle', telegram_channel: 'Telegram-Kanal' };
|
const typeMap = { rss_feed: 'RSS-Feed', web_source: 'Web-Quelle', telegram_channel: 'Telegram-Kanal', podcast_feed: 'Podcast' };
|
||||||
const lines = [];
|
const lines = [];
|
||||||
lines.push('Typ: ' + (typeMap[firstFeed.source_type] || firstFeed.source_type || 'Unbekannt'));
|
lines.push('Typ: ' + (typeMap[firstFeed.source_type] || firstFeed.source_type || 'Unbekannt'));
|
||||||
if (firstFeed.language) lines.push('Sprache: ' + firstFeed.language);
|
if (firstFeed.language) lines.push('Sprache: ' + firstFeed.language);
|
||||||
if (firstFeed.bias) lines.push('Ausrichtung: ' + firstFeed.bias);
|
if (firstFeed.country_code) lines.push('Land: ' + firstFeed.country_code);
|
||||||
|
if (firstFeed.media_type && firstFeed.media_type !== 'sonstige') {
|
||||||
|
lines.push('Medientyp: ' + (this._mediaTypeLabels[firstFeed.media_type] || firstFeed.media_type));
|
||||||
|
}
|
||||||
|
if (firstFeed.political_orientation && firstFeed.political_orientation !== 'na') {
|
||||||
|
const pl = this._politicalLabels[firstFeed.political_orientation];
|
||||||
|
lines.push('Politisch: ' + (pl ? pl.full : firstFeed.political_orientation));
|
||||||
|
}
|
||||||
|
if (firstFeed.reliability && firstFeed.reliability !== 'na') {
|
||||||
|
const relLabel = this._reliabilityLabels[firstFeed.reliability] || firstFeed.reliability;
|
||||||
|
const relSrc = firstFeed.ifcn_signatory ? ' (IFCN-Faktenchecker)'
|
||||||
|
: (firstFeed.eu_disinfo_listed ? ` (EU-Desinfo, ${firstFeed.eu_disinfo_case_count || 0} Fälle)`
|
||||||
|
: ' (LLM-Schätzung)');
|
||||||
|
lines.push('Glaubwürdigkeit: ' + relLabel + relSrc);
|
||||||
|
}
|
||||||
|
if (firstFeed.ifcn_signatory) lines.push('IFCN-Faktenchecker: ja');
|
||||||
|
if (firstFeed.eu_disinfo_listed) {
|
||||||
|
lines.push(`EUvsDisinfo: ${firstFeed.eu_disinfo_case_count || 0} Fälle` + (firstFeed.eu_disinfo_last_seen ? ` (zuletzt ${firstFeed.eu_disinfo_last_seen})` : ''));
|
||||||
|
}
|
||||||
|
if (firstFeed.state_affiliated) lines.push('Staatsnah: ja');
|
||||||
|
if (Array.isArray(firstFeed.alignments) && firstFeed.alignments.length > 0) {
|
||||||
|
const labels = firstFeed.alignments.map(a => this._alignmentLabels[a] || a);
|
||||||
|
lines.push('Geopolitische Nähe: ' + labels.join(', '));
|
||||||
|
}
|
||||||
|
if (firstFeed.bias) lines.push('Notiz: ' + firstFeed.bias);
|
||||||
const tooltipText = this.escape(lines.join('\n'));
|
const tooltipText = this.escape(lines.join('\n'));
|
||||||
infoButtonHtml = ` <span class="info-icon tooltip-below" data-tooltip="${tooltipText}"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span>`;
|
infoButtonHtml = ` <span class="info-icon tooltip-below" data-tooltip="${tooltipText}"><svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg></span>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const classificationBadges = this._renderClassificationBadges(firstFeed);
|
||||||
|
|
||||||
return `<div class="source-group">
|
return `<div class="source-group">
|
||||||
<div class="source-group-header" ${toggleAttr}>
|
<div class="source-group-header" ${toggleAttr}>
|
||||||
${toggleIcon}
|
${toggleIcon}
|
||||||
@@ -1072,6 +1327,7 @@ const UI = {
|
|||||||
<span class="source-group-name">${this.escape(displayName)}</span>${infoButtonHtml}
|
<span class="source-group-name">${this.escape(displayName)}</span>${infoButtonHtml}
|
||||||
</div>
|
</div>
|
||||||
<span class="source-category-badge cat-${feeds[0]?.category || 'sonstige'}">${catLabel}</span>
|
<span class="source-category-badge cat-${feeds[0]?.category || 'sonstige'}">${catLabel}</span>
|
||||||
|
${classificationBadges ? `<span class="source-classification-badges">${classificationBadges}</span>` : ''}
|
||||||
${feedCountBadge}
|
${feedCountBadge}
|
||||||
<div class="source-group-actions" onclick="event.stopPropagation()">
|
<div class="source-group-actions" onclick="event.stopPropagation()">
|
||||||
${!isGlobal && !hasMultiple && feeds[0]?.id ? `<button class="source-edit-btn" onclick="App.editSource(${feeds[0].id})" title="Bearbeiten" aria-label="Bearbeiten">✎</button>` : ''}
|
${!isGlobal && !hasMultiple && feeds[0]?.id ? `<button class="source-edit-btn" onclick="App.editSource(${feeds[0].id})" title="Bearbeiten" aria-label="Bearbeiten">✎</button>` : ''}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
* Nur ein Tab-Panel gleichzeitig sichtbar, pro Lage gemerkt in localStorage.
|
* Nur ein Tab-Panel gleichzeitig sichtbar, pro Lage gemerkt in localStorage.
|
||||||
*/
|
*/
|
||||||
const LayoutManager = {
|
const LayoutManager = {
|
||||||
TAB_ORDER: ['zusammenfassung', 'lagebild', 'timeline', 'karte', 'faktencheck', 'quellen'],
|
TAB_ORDER: ['zusammenfassung', 'lagebild', 'timeline', 'karte', 'faktencheck', 'pipeline', 'quellen'],
|
||||||
_currentIncidentId: null,
|
_currentIncidentId: null,
|
||||||
_initialized: false,
|
_initialized: false,
|
||||||
|
|
||||||
|
|||||||
592
src/static/js/pipeline.js
Normale Datei
592
src/static/js/pipeline.js
Normale Datei
@@ -0,0 +1,592 @@
|
|||||||
|
/**
|
||||||
|
* Pipeline-Modul: Visualisierung der Analysepipeline pro Lage.
|
||||||
|
*
|
||||||
|
* - Liest Pipeline-Definition + letzten Refresh-Stand vom Backend
|
||||||
|
* (GET /api/incidents/{id}/pipeline)
|
||||||
|
* - Hört auf WebSocket-Events vom Typ "pipeline_step" und animiert Live
|
||||||
|
* den jeweils aktiven Schritt
|
||||||
|
* - Bei Lagen-Wechsel wird die Visualisierung an die neue Lage neu gebunden
|
||||||
|
*
|
||||||
|
* Stilkonzept:
|
||||||
|
* - Blöcke = Karten mit Icon + Titel + Zahl
|
||||||
|
* - Verbindungspfeile als SVG zwischen den Blöcken
|
||||||
|
* - Aktiver Block: pulsierender Glow (CSS-Klasse .is-active)
|
||||||
|
* - Fertiger Block: Häkchen + dezente Outline (.is-done)
|
||||||
|
* - Übersprungener Block: ausgeblendet (laut Anforderung)
|
||||||
|
* - Multi-Pass (Research): am letzten Block leuchtet ein Schleifen-Pfeil auf
|
||||||
|
*/
|
||||||
|
const Pipeline = {
|
||||||
|
_incidentId: null,
|
||||||
|
_definition: null, // PIPELINE_STEPS vom Backend
|
||||||
|
_stateByKey: {}, // step_key -> {status, count_value, count_secondary, pass_number}
|
||||||
|
_snapshotState: null, // deep-copy von _stateByKey vor Refresh-Start (fuer Cancel-Restore)
|
||||||
|
_isResearch: false,
|
||||||
|
_passTotal: 1,
|
||||||
|
_lastRefreshHeader: null,
|
||||||
|
_hoverTooltipEl: null,
|
||||||
|
_isLoading: false,
|
||||||
|
_wsBound: false,
|
||||||
|
_icons: {
|
||||||
|
search: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="7"/><path d="M21 21l-4.3-4.3"/></svg>',
|
||||||
|
rss: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M4 11a9 9 0 0 1 9 9"/><path d="M4 4a16 16 0 0 1 16 16"/><circle cx="5" cy="19" r="1.5"/></svg>',
|
||||||
|
'copy-x': '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="3" width="13" height="13" rx="2"/><path d="M8 21h11a2 2 0 0 0 2-2V8"/><path d="M11 11l4 4M15 11l-4 4"/></svg>',
|
||||||
|
scale: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 3v18"/><path d="M5 8h14"/><path d="M5 8l-3 7h6z"/><path d="M19 8l-3 7h6z"/></svg>',
|
||||||
|
'map-pin': '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 22s7-7 7-13a7 7 0 0 0-14 0c0 6 7 13 7 13z"/><circle cx="12" cy="9" r="2.5"/></svg>',
|
||||||
|
'file-text': '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><path d="M8 13h8M8 17h8M8 9h2"/></svg>',
|
||||||
|
shield: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 2l8 4v6c0 5-3.5 9-8 10-4.5-1-8-5-8-10V6z"/><path d="M9 12l2 2 4-4"/></svg>',
|
||||||
|
'check-circle': '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M8 12l3 3 5-6"/></svg>',
|
||||||
|
bell: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M6 8a6 6 0 0 1 12 0c0 7 3 9 3 9H3s3-2 3-9"/><path d="M10 21a2 2 0 0 0 4 0"/></svg>',
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Wird einmal beim Seitenstart aufgerufen, hängt sich an WebSocket. */
|
||||||
|
init() {
|
||||||
|
if (this._wsBound) return;
|
||||||
|
if (typeof WS !== 'undefined' && WS.on) {
|
||||||
|
WS.on('pipeline_step', (msg) => this._onWsStep(msg));
|
||||||
|
// Erfolg: API-State neu laden (finaler Stand sichtbar)
|
||||||
|
WS.on('refresh_complete', (msg) => this._onRefreshDoneSuccess(msg));
|
||||||
|
// Cancel/Error: vor-Refresh-Snapshot zurueckspielen, damit Pipeline nicht im Mix-Zustand stehen bleibt
|
||||||
|
WS.on('refresh_cancelled', (msg) => this._onRefreshDoneCancel(msg));
|
||||||
|
WS.on('refresh_error', (msg) => this._onRefreshDoneError(msg));
|
||||||
|
this._wsBound = true;
|
||||||
|
}
|
||||||
|
// Hover-Tooltip-Element vorbereiten
|
||||||
|
if (!this._hoverTooltipEl) {
|
||||||
|
const t = document.createElement('div');
|
||||||
|
t.className = 'pipeline-tooltip';
|
||||||
|
t.setAttribute('role', 'tooltip');
|
||||||
|
document.body.appendChild(t);
|
||||||
|
this._hoverTooltipEl = t;
|
||||||
|
}
|
||||||
|
// Klick auf Body schliesst Tooltip-Popup
|
||||||
|
document.addEventListener('click', (e) => {
|
||||||
|
if (!e.target.closest('.pipeline-block') && !e.target.closest('.pipeline-popup')) {
|
||||||
|
this._closePopup();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Bindet die Pipeline an eine Lage. Lädt Daten und rendert. */
|
||||||
|
async bindToIncident(incidentId) {
|
||||||
|
this._incidentId = incidentId;
|
||||||
|
this._stateByKey = {};
|
||||||
|
this._snapshotState = null; // Snapshot ist immer lagen-spezifisch
|
||||||
|
this._isResearch = false;
|
||||||
|
this._passTotal = 1;
|
||||||
|
this._lastRefreshHeader = null;
|
||||||
|
this._renderEmpty('Lade...');
|
||||||
|
if (incidentId == null) return;
|
||||||
|
|
||||||
|
this._isLoading = true;
|
||||||
|
try {
|
||||||
|
const data = await API.getPipeline(incidentId);
|
||||||
|
// Lagen-Wechsel waehrend Request: alte Antwort verwerfen
|
||||||
|
if (this._incidentId !== incidentId) return;
|
||||||
|
|
||||||
|
this._definition = data.steps_definition || [];
|
||||||
|
this._isResearch = !!data.is_research;
|
||||||
|
this._lastRefreshHeader = data.last_refresh || null;
|
||||||
|
this._passTotal = (data.last_refresh && data.last_refresh.pass_total) || 1;
|
||||||
|
|
||||||
|
// Letzten Stand pro step_key konsolidieren (bei Multi-Pass: letzter Pass-Eintrag gewinnt)
|
||||||
|
(data.steps || []).forEach(s => {
|
||||||
|
const key = s.step_key;
|
||||||
|
const prev = this._stateByKey[key];
|
||||||
|
if (!prev || (s.pass_number || 1) >= (prev.pass_number || 1)) {
|
||||||
|
this._stateByKey[key] = {
|
||||||
|
status: s.status,
|
||||||
|
count_value: s.count_value,
|
||||||
|
count_secondary: s.count_secondary,
|
||||||
|
pass_number: s.pass_number || 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
this._render();
|
||||||
|
this._renderMini();
|
||||||
|
|
||||||
|
// Edge-Case: Lage ist gerade in Queue (z.B. via Lagen-Wechsel beim
|
||||||
|
// Klick in der Sidebar). API liefert den LETZTEN gespeicherten Stand
|
||||||
|
// (alles done = gruen), aber tatsaechlich wartet ein neuer Refresh.
|
||||||
|
// -> beginQueue() selbst ausloesen, damit Icons grau zeigen.
|
||||||
|
try {
|
||||||
|
if (typeof App !== 'undefined' && App._refreshingIncidents
|
||||||
|
&& App._refreshingIncidents.has(incidentId)
|
||||||
|
&& typeof UI !== 'undefined' && UI._progressState
|
||||||
|
&& UI._progressState[incidentId]
|
||||||
|
&& UI._progressState[incidentId].step === 'queued') {
|
||||||
|
this.beginQueue(incidentId);
|
||||||
|
}
|
||||||
|
} catch (e) { /* tolerant */ }
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('Pipeline laden fehlgeschlagen:', e);
|
||||||
|
this._renderEmpty('Pipeline-Daten konnten nicht geladen werden.');
|
||||||
|
} finally {
|
||||||
|
this._isLoading = false;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/** WebSocket: einzelner Pipeline-Schritt-Status. */
|
||||||
|
_onWsStep(msg) {
|
||||||
|
if (!msg || !msg.data) return;
|
||||||
|
if (this._incidentId == null || msg.incident_id !== this._incidentId) return;
|
||||||
|
|
||||||
|
const d = msg.data;
|
||||||
|
const key = d.step_key;
|
||||||
|
if (!key) return;
|
||||||
|
|
||||||
|
// State aktualisieren, letzter Pass gewinnt
|
||||||
|
const prev = this._stateByKey[key];
|
||||||
|
const passNr = d.pass_number || 1;
|
||||||
|
if (!prev || passNr >= (prev.pass_number || 1)) {
|
||||||
|
this._stateByKey[key] = {
|
||||||
|
status: d.status,
|
||||||
|
count_value: d.count_value !== undefined ? d.count_value : (prev ? prev.count_value : null),
|
||||||
|
count_secondary: d.count_secondary !== undefined ? d.count_secondary : (prev ? prev.count_secondary : null),
|
||||||
|
pass_number: passNr,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multi-Pass-Erkennung: pass_number > _passTotal -> erweitern + Loop-Animation triggern
|
||||||
|
if (passNr > this._passTotal) {
|
||||||
|
this._passTotal = passNr;
|
||||||
|
// Schleifen-Pfeil aufflackern
|
||||||
|
const stage = document.getElementById('pipeline-stage');
|
||||||
|
if (stage) {
|
||||||
|
stage.classList.add('is-looping');
|
||||||
|
setTimeout(() => stage.classList.remove('is-looping'), 1500);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wenn der ERSTE Schritt (sources_review) auf "active" geht, beginnt ein neuer
|
||||||
|
// Refresh oder ein neuer Multi-Pass-Durchlauf — alle nachfolgenden Schritte auf
|
||||||
|
// "pending" (grau) zuruecksetzen, damit der User sieht: das ist neu und
|
||||||
|
// noch nicht durchlaufen. Sonst stehen sie als "done" vom letzten Mal da.
|
||||||
|
let didReset = false;
|
||||||
|
if (d.status === 'active' && this._definition && this._definition.length
|
||||||
|
&& key === this._definition[0].key) {
|
||||||
|
this._definition.forEach(s => {
|
||||||
|
if (s.key !== key && this._stateByKey[s.key]) {
|
||||||
|
this._stateByKey[s.key].status = 'pending';
|
||||||
|
didReset = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (didReset) {
|
||||||
|
// Beim Reset alle Bloecke neu zeichnen, nicht nur den aktuellen
|
||||||
|
this._render();
|
||||||
|
this._renderMini();
|
||||||
|
} else {
|
||||||
|
this._patchBlock(key);
|
||||||
|
this._patchMiniBlock(key);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wird vom Frontend gerufen, wenn ein Refresh angestossen wurde (queued).
|
||||||
|
* Macht einen Snapshot des aktuellen Pipeline-Stands (zur spaeteren Wiederherstellung
|
||||||
|
* bei Cancel/Error) und setzt dann alle Steps auf "pending" - damit der User sieht:
|
||||||
|
* "neuer Refresh laeuft an, alte gruene Haekchen sind nicht mehr aktuell".
|
||||||
|
*/
|
||||||
|
beginQueue(incidentId) {
|
||||||
|
if (this._incidentId !== incidentId) return; // andere Lage offen
|
||||||
|
if (!this._definition) return; // noch keine Pipeline-Definition geladen
|
||||||
|
// Aktuellen Stand sichern (deep-copy). Bei Mehrfach-Refresh ohne Cancel
|
||||||
|
// dazwischen wird der Snapshot bewusst ueberschrieben - er soll immer
|
||||||
|
// der "Stand kurz vor diesem Refresh" sein.
|
||||||
|
this._snapshotState = JSON.parse(JSON.stringify(this._stateByKey));
|
||||||
|
// Alle Steps auf pending setzen
|
||||||
|
this._definition.forEach(s => {
|
||||||
|
if (this._stateByKey[s.key]) {
|
||||||
|
this._stateByKey[s.key].status = 'pending';
|
||||||
|
} else {
|
||||||
|
this._stateByKey[s.key] = { status: 'pending', count_value: null, count_secondary: null, pass_number: 1 };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
this._render();
|
||||||
|
this._renderMini();
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Restauriert den letzten Snapshot. Rueckgabe: true bei Erfolg, false wenn keiner da war. */
|
||||||
|
_restoreSnapshot() {
|
||||||
|
if (!this._snapshotState) return false;
|
||||||
|
this._stateByKey = this._snapshotState;
|
||||||
|
this._snapshotState = null;
|
||||||
|
this._render();
|
||||||
|
this._renderMini();
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
|
||||||
|
_onRefreshDoneSuccess(msg) {
|
||||||
|
if (this._incidentId == null || (msg && msg.incident_id !== this._incidentId)) return;
|
||||||
|
this._snapshotState = null; // verworfen, neuer Stand wird vom API geladen
|
||||||
|
// Daten frisch nachladen, damit Header (Dauer) und finale Zahlen passen
|
||||||
|
setTimeout(() => {
|
||||||
|
if (this._incidentId != null) this.bindToIncident(this._incidentId);
|
||||||
|
}, 600);
|
||||||
|
},
|
||||||
|
|
||||||
|
_onRefreshDoneCancel(msg) {
|
||||||
|
if (this._incidentId == null || (msg && msg.incident_id !== this._incidentId)) return;
|
||||||
|
if (!this._restoreSnapshot()) {
|
||||||
|
// Kein Snapshot vorhanden (z.B. Page-Reload mitten im Refresh) -> wie bisher API-Reload
|
||||||
|
setTimeout(() => {
|
||||||
|
if (this._incidentId != null) this.bindToIncident(this._incidentId);
|
||||||
|
}, 600);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
_onRefreshDoneError(msg) {
|
||||||
|
// Wie Cancel: vorheriger Stand zurueck (nicht im Mix-Zustand stehenbleiben)
|
||||||
|
this._onRefreshDoneCancel(msg);
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Vollbild-Pipeline (Tab "Analysepipeline") als 3x3-Snake rendern. */
|
||||||
|
_render() {
|
||||||
|
const stage = document.getElementById('pipeline-stage');
|
||||||
|
const meta = document.getElementById('pipeline-header-meta');
|
||||||
|
const sidenote = document.getElementById('pipeline-sidenote');
|
||||||
|
if (!stage) return;
|
||||||
|
|
||||||
|
if (meta) meta.textContent = this._formatHeader();
|
||||||
|
if (sidenote) sidenote.hidden = !this._isResearch;
|
||||||
|
|
||||||
|
// Brandneue Lage ohne Refresh
|
||||||
|
if (!this._lastRefreshHeader) {
|
||||||
|
this._renderEmpty('Noch nie aktualisiert. Starte den ersten Refresh.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sichtbare Blöcke (skipped komplett ausgeblendet, Anforderung 4b)
|
||||||
|
const visible = (this._definition || []).filter(s => {
|
||||||
|
const st = this._stateByKey[s.key];
|
||||||
|
return !st || st.status !== 'skipped';
|
||||||
|
});
|
||||||
|
|
||||||
|
// In Dreier-Reihen aufteilen, Snake-Direction abwechselnd
|
||||||
|
const ROW_SIZE = 3;
|
||||||
|
const rows = [];
|
||||||
|
for (let i = 0; i < visible.length; i += ROW_SIZE) {
|
||||||
|
rows.push({
|
||||||
|
steps: visible.slice(i, i + ROW_SIZE),
|
||||||
|
direction: (rows.length % 2 === 0) ? 'ltr' : 'rtl',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let trackHtml = '';
|
||||||
|
rows.forEach((row, rowIdx) => {
|
||||||
|
const isLastRow = rowIdx === rows.length - 1;
|
||||||
|
let rowHtml = `<div class="pipeline-row" data-direction="${row.direction}">`;
|
||||||
|
row.steps.forEach((s, i) => {
|
||||||
|
const isLastBlockOverall = isLastRow && i === row.steps.length - 1;
|
||||||
|
rowHtml += this._renderBlock(s, isLastBlockOverall);
|
||||||
|
// Inner-Pfeil zwischen Blöcken einer Reihe (nicht hinter dem letzten)
|
||||||
|
if (i < row.steps.length - 1) {
|
||||||
|
rowHtml += `<div class="pipeline-arrow" data-from="${s.key}" data-arrow-type="inner"></div>`;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
rowHtml += '</div>';
|
||||||
|
trackHtml += rowHtml;
|
||||||
|
|
||||||
|
// U-Turn-Pfeil zwischen dieser und der nächsten Reihe
|
||||||
|
if (!isLastRow) {
|
||||||
|
const lastInRow = row.steps[row.steps.length - 1];
|
||||||
|
const side = row.direction === 'ltr' ? 'right' : 'left';
|
||||||
|
trackHtml += this._renderUturn(side, lastInRow.key);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
stage.innerHTML = `<div class="pipeline-track">${trackHtml}</div>`;
|
||||||
|
this._bindBlockEvents(stage);
|
||||||
|
},
|
||||||
|
|
||||||
|
_renderBlock(stepDef, isLastOverall) {
|
||||||
|
const st = this._stateByKey[stepDef.key];
|
||||||
|
const status = (st && st.status) || 'pending';
|
||||||
|
const cv = st ? st.count_value : null;
|
||||||
|
const cs = st ? st.count_secondary : null;
|
||||||
|
const loopMark = isLastOverall && this._isResearch
|
||||||
|
? `<div class="pipeline-loop" title="Mehrfach-Durchlauf"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 12a9 9 0 1 1-3-6.7"/><path d="M21 4v5h-5"/></svg></div>`
|
||||||
|
: '';
|
||||||
|
const icon = this._icons[stepDef.icon] || this._icons.search;
|
||||||
|
return `
|
||||||
|
<div class="pipeline-block status-${status}" data-step-key="${stepDef.key}" tabindex="0" aria-label="${this._escape(stepDef.label)}">
|
||||||
|
<div class="pipeline-block-icon">${icon}</div>
|
||||||
|
<div class="pipeline-block-title">${this._escape(stepDef.label)}</div>
|
||||||
|
<div class="pipeline-block-count">${this._formatCount(stepDef.key, cv, cs, status)}</div>
|
||||||
|
<div class="pipeline-block-check" aria-hidden="true">
|
||||||
|
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12l5 5 9-11"/></svg>
|
||||||
|
</div>
|
||||||
|
${loopMark}
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Kompakter Reihenwechsel-Pfeil: kurzer ↓ direkt unter dem letzten Block der oberen Reihe. */
|
||||||
|
_renderUturn(side, fromKey) {
|
||||||
|
const arrowSvg = `
|
||||||
|
<div class="uturn-arrow">
|
||||||
|
<svg viewBox="0 0 24 32" preserveAspectRatio="xMidYMid meet">
|
||||||
|
<path d="M 12 2 L 12 24" class="pipeline-uturn-path"/>
|
||||||
|
<polyline points="6,18 12,24 18,18" class="pipeline-uturn-head"/>
|
||||||
|
</svg>
|
||||||
|
</div>`;
|
||||||
|
const spacers = '<span class="uturn-spacer"></span><span class="uturn-spacer"></span>';
|
||||||
|
const inner = side === 'right' ? (spacers + arrowSvg) : (arrowSvg + spacers);
|
||||||
|
return `
|
||||||
|
<div class="pipeline-uturn" data-side="${side}" data-from="${fromKey}" data-arrow-type="uturn" aria-hidden="true">
|
||||||
|
${inner}
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Einzelnen Block neu zeichnen (ohne kompletten Re-Render). */
|
||||||
|
_patchBlock(stepKey) {
|
||||||
|
const stage = document.getElementById('pipeline-stage');
|
||||||
|
if (!stage) return;
|
||||||
|
const def = (this._definition || []).find(s => s.key === stepKey);
|
||||||
|
if (!def) return;
|
||||||
|
const st = this._stateByKey[stepKey];
|
||||||
|
const status = (st && st.status) || 'pending';
|
||||||
|
|
||||||
|
// Übersprungene komplett ausblenden -> kompletter Re-Render
|
||||||
|
if (status === 'skipped') {
|
||||||
|
this._render();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const block = stage.querySelector(`.pipeline-block[data-step-key="${stepKey}"]`);
|
||||||
|
if (!block) {
|
||||||
|
// Block fehlt im DOM (z.B. vorher skipped): kompletter Re-Render
|
||||||
|
this._render();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
block.className = `pipeline-block status-${status}`;
|
||||||
|
block.setAttribute('tabindex', '0');
|
||||||
|
const cv = st ? st.count_value : null;
|
||||||
|
const cs = st ? st.count_secondary : null;
|
||||||
|
const cEl = block.querySelector('.pipeline-block-count');
|
||||||
|
if (cEl) cEl.innerHTML = this._formatCount(stepKey, cv, cs, status);
|
||||||
|
|
||||||
|
// Aktiven Pfeil/U-Turn zum nächsten Block markieren (alles mit data-from)
|
||||||
|
stage.querySelectorAll('.pipeline-arrow, .pipeline-uturn')
|
||||||
|
.forEach(a => a.classList.remove('is-flowing'));
|
||||||
|
if (status === 'done') {
|
||||||
|
const next = stage.querySelector(`[data-from="${stepKey}"]`);
|
||||||
|
if (next) next.classList.add('is-flowing');
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
_bindBlockEvents(stage) {
|
||||||
|
stage.querySelectorAll('.pipeline-block').forEach(block => {
|
||||||
|
const key = block.getAttribute('data-step-key');
|
||||||
|
const def = (this._definition || []).find(s => s.key === key);
|
||||||
|
if (!def) return;
|
||||||
|
|
||||||
|
block.addEventListener('mouseenter', (e) => this._showTooltip(e, def));
|
||||||
|
block.addEventListener('mouseleave', () => this._hideTooltip());
|
||||||
|
block.addEventListener('focus', (e) => this._showTooltip(e, def));
|
||||||
|
block.addEventListener('blur', () => this._hideTooltip());
|
||||||
|
block.addEventListener('click', (e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
this._openPopup(def);
|
||||||
|
});
|
||||||
|
block.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === 'Enter' || e.key === ' ') {
|
||||||
|
e.preventDefault();
|
||||||
|
this._openPopup(def);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
_showTooltip(evt, def) {
|
||||||
|
if (!this._hoverTooltipEl) return;
|
||||||
|
this._hoverTooltipEl.textContent = def.tooltip || def.label;
|
||||||
|
this._hoverTooltipEl.classList.add('visible');
|
||||||
|
const rect = evt.currentTarget.getBoundingClientRect();
|
||||||
|
const tipW = 280;
|
||||||
|
let left = rect.left + rect.width / 2 - tipW / 2;
|
||||||
|
if (left < 8) left = 8;
|
||||||
|
if (left + tipW > window.innerWidth - 8) left = window.innerWidth - tipW - 8;
|
||||||
|
this._hoverTooltipEl.style.left = left + 'px';
|
||||||
|
this._hoverTooltipEl.style.top = (rect.top - 8) + 'px';
|
||||||
|
this._hoverTooltipEl.style.transform = 'translateY(-100%)';
|
||||||
|
},
|
||||||
|
|
||||||
|
_hideTooltip() {
|
||||||
|
if (!this._hoverTooltipEl) return;
|
||||||
|
this._hoverTooltipEl.classList.remove('visible');
|
||||||
|
},
|
||||||
|
|
||||||
|
_openPopup(def) {
|
||||||
|
this._closePopup();
|
||||||
|
const popup = document.createElement('div');
|
||||||
|
popup.className = 'pipeline-popup';
|
||||||
|
popup.setAttribute('role', 'dialog');
|
||||||
|
popup.innerHTML = `
|
||||||
|
<div class="pipeline-popup-inner">
|
||||||
|
<div class="pipeline-popup-title">${this._escape(def.label)}</div>
|
||||||
|
<div class="pipeline-popup-text">${this._escape(def.tooltip || '')}</div>
|
||||||
|
<button class="pipeline-popup-close" aria-label="Schliessen">×</button>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
popup.querySelector('.pipeline-popup-close').addEventListener('click', () => this._closePopup());
|
||||||
|
document.body.appendChild(popup);
|
||||||
|
// ESC schliesst
|
||||||
|
this._escListener = (e) => { if (e.key === 'Escape') this._closePopup(); };
|
||||||
|
document.addEventListener('keydown', this._escListener);
|
||||||
|
},
|
||||||
|
|
||||||
|
_closePopup() {
|
||||||
|
const existing = document.querySelector('.pipeline-popup');
|
||||||
|
if (existing) existing.remove();
|
||||||
|
if (this._escListener) {
|
||||||
|
document.removeEventListener('keydown', this._escListener);
|
||||||
|
this._escListener = null;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Mini-Variante (Refresh-Popup): Icons + Status, keine Zahlen, keine Tooltips. */
|
||||||
|
_renderMini() {
|
||||||
|
const mini = document.getElementById('progress-pipeline-mini');
|
||||||
|
if (!mini) return;
|
||||||
|
if (!this._definition || !this._definition.length) {
|
||||||
|
mini.innerHTML = '';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const visible = this._definition.filter(s => {
|
||||||
|
const st = this._stateByKey[s.key];
|
||||||
|
return !st || st.status !== 'skipped';
|
||||||
|
});
|
||||||
|
const html = visible.map((s, i) => {
|
||||||
|
const st = this._stateByKey[s.key];
|
||||||
|
const status = (st && st.status) || 'pending';
|
||||||
|
const icon = this._icons[s.icon] || this._icons.search;
|
||||||
|
const sep = (i < visible.length - 1) ? '<span class="pipeline-mini-sep" aria-hidden="true"></span>' : '';
|
||||||
|
return `<span class="pipeline-mini-block status-${status}" data-step-key="${s.key}" title="${this._escape(s.label)}">${icon}</span>${sep}`;
|
||||||
|
}).join('');
|
||||||
|
mini.innerHTML = html;
|
||||||
|
},
|
||||||
|
|
||||||
|
_patchMiniBlock(stepKey) {
|
||||||
|
const mini = document.getElementById('progress-pipeline-mini');
|
||||||
|
if (!mini) return;
|
||||||
|
const st = this._stateByKey[stepKey];
|
||||||
|
const status = (st && st.status) || 'pending';
|
||||||
|
if (status === 'skipped') {
|
||||||
|
this._renderMini();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const el = mini.querySelector(`.pipeline-mini-block[data-step-key="${stepKey}"]`);
|
||||||
|
if (!el) {
|
||||||
|
this._renderMini();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
el.className = `pipeline-mini-block status-${status}`;
|
||||||
|
},
|
||||||
|
|
||||||
|
_renderEmpty(msg) {
|
||||||
|
const stage = document.getElementById('pipeline-stage');
|
||||||
|
const meta = document.getElementById('pipeline-header-meta');
|
||||||
|
const sidenote = document.getElementById('pipeline-sidenote');
|
||||||
|
if (meta) meta.textContent = '';
|
||||||
|
if (sidenote) sidenote.hidden = true;
|
||||||
|
if (stage) stage.innerHTML = `<div class="pipeline-empty">${msg}</div>`;
|
||||||
|
// Mini im Refresh-Popup zuruecksetzen
|
||||||
|
const mini = document.getElementById('progress-pipeline-mini');
|
||||||
|
if (mini) mini.innerHTML = '';
|
||||||
|
},
|
||||||
|
|
||||||
|
_formatHeader() {
|
||||||
|
const r = this._lastRefreshHeader;
|
||||||
|
if (!r) return '';
|
||||||
|
let parts = [];
|
||||||
|
if (r.started_at) {
|
||||||
|
const rel = this._relativeTime(r.started_at);
|
||||||
|
parts.push(rel ? `Letzter Refresh: ${rel}` : `Letzter Refresh: ${r.started_at}`);
|
||||||
|
}
|
||||||
|
if (r.duration_sec != null) {
|
||||||
|
parts.push(`Dauer: ${r.duration_sec} s`);
|
||||||
|
}
|
||||||
|
if (r.status === 'running') {
|
||||||
|
parts = ['Aktualisierung läuft...'];
|
||||||
|
} else if (r.status === 'cancelled') {
|
||||||
|
parts.push('abgebrochen');
|
||||||
|
} else if (r.status === 'error') {
|
||||||
|
parts.push('mit Fehler beendet');
|
||||||
|
}
|
||||||
|
return parts.join(' · ');
|
||||||
|
},
|
||||||
|
|
||||||
|
_relativeTime(dbStr) {
|
||||||
|
try {
|
||||||
|
// dbStr ist lokal "YYYY-MM-DD HH:MM:SS"
|
||||||
|
const d = new Date(dbStr.replace(' ', 'T'));
|
||||||
|
if (isNaN(d.getTime())) return '';
|
||||||
|
const diffMs = Date.now() - d.getTime();
|
||||||
|
const min = Math.floor(diffMs / 60000);
|
||||||
|
if (min < 1) return 'gerade eben';
|
||||||
|
if (min < 60) return `vor ${min} Min`;
|
||||||
|
const h = Math.floor(min / 60);
|
||||||
|
if (h < 24) return `vor ${h} Std`;
|
||||||
|
const days = Math.floor(h / 24);
|
||||||
|
return `vor ${days} Tag${days === 1 ? '' : 'en'}`;
|
||||||
|
} catch (e) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
_formatCount(stepKey, cv, cs, status) {
|
||||||
|
// Qualitaetscheck: KEINE Zahlen, nur Status (Anforderung 3 vom User)
|
||||||
|
if (stepKey === 'qc' || stepKey === 'summary') {
|
||||||
|
if (status === 'done') return '<span class="count-status">erledigt</span>';
|
||||||
|
if (status === 'active') return '<span class="count-status">läuft...</span>';
|
||||||
|
if (status === 'error') return '<span class="count-status">Fehler</span>';
|
||||||
|
return '<span class="count-status">-</span>';
|
||||||
|
}
|
||||||
|
if (status === 'pending') return '<span class="count-status">-</span>';
|
||||||
|
if (status === 'active') return '<span class="count-status">läuft...</span>';
|
||||||
|
if (status === 'error') return '<span class="count-status">Fehler</span>';
|
||||||
|
if (cv == null) return '<span class="count-status">-</span>';
|
||||||
|
|
||||||
|
switch (stepKey) {
|
||||||
|
case 'sources_review':
|
||||||
|
return `${cv} Quellen geprüft`;
|
||||||
|
case 'collect':
|
||||||
|
return cs != null
|
||||||
|
? `${cv} Meldungen<small> aus ${cs} Quellen</small>`
|
||||||
|
: `${cv} Meldungen`;
|
||||||
|
case 'dedup':
|
||||||
|
return cs != null
|
||||||
|
? `${cv} Duplikate<small> (${cs} verbleiben)</small>`
|
||||||
|
: `${cv} Duplikate`;
|
||||||
|
case 'relevance':
|
||||||
|
return cs != null && cs > 0
|
||||||
|
? `${cv} relevant<small> von ${cs}</small>`
|
||||||
|
: `${cv} relevant`;
|
||||||
|
case 'geoparsing':
|
||||||
|
return cs != null
|
||||||
|
? `${cv} Orte<small> aus ${cs} Meldungen</small>`
|
||||||
|
: `${cv} Orte erkannt`;
|
||||||
|
case 'factcheck':
|
||||||
|
return cs != null
|
||||||
|
? `${cv} neue Fakten<small> (${cs} gesamt)</small>`
|
||||||
|
: `${cv} Fakten geprüft`;
|
||||||
|
case 'notify':
|
||||||
|
return cv === 0 ? 'keine versendet' : `${cv} Hinweis${cv === 1 ? '' : 'e'} versendet`;
|
||||||
|
default:
|
||||||
|
return `${cv}`;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
_escape(s) {
|
||||||
|
if (s == null) return '';
|
||||||
|
return String(s).replace(/[&<>"']/g, c => ({
|
||||||
|
'&': '&', '<': '<', '>': '>', '"': '"', "'": '''
|
||||||
|
}[c]));
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', () => Pipeline.init());
|
||||||
265
src/static/js/update-system.js
Normale Datei
265
src/static/js/update-system.js
Normale Datei
@@ -0,0 +1,265 @@
|
|||||||
|
/**
|
||||||
|
* Update-System fuer den AegisSight Monitor.
|
||||||
|
*
|
||||||
|
* Zeigt zwei Dinge:
|
||||||
|
* 1) Beim ersten Page-Load nach einem Update -> Modal "Was ist neu?"
|
||||||
|
* mit den Eintraegen aus RELEASES.json, die der User noch nicht gesehen hat.
|
||||||
|
*
|
||||||
|
* 2) Wenn der User die Seite offen hat und im Hintergrund ein neues Update
|
||||||
|
* live geht -> kleiner Banner unten rechts:
|
||||||
|
* "Eine neue Version ist verfuegbar. [Jetzt aktualisieren]"
|
||||||
|
*
|
||||||
|
* Datenquellen (Backend):
|
||||||
|
* GET /api/version -> { commit, deployed_at }
|
||||||
|
* GET /api/release-notes -> { entries: [...], current }
|
||||||
|
*
|
||||||
|
* Persistenz im Browser:
|
||||||
|
* localStorage 'aegis_last_seen_release' -> "version"-Feld des zuletzt
|
||||||
|
* gesehenen Eintrags
|
||||||
|
*/
|
||||||
|
(function () {
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const POLL_INTERVAL_MS = 60_000; // alle 60 Sekunden
|
||||||
|
const STORAGE_KEY = 'aegis_last_seen_release';
|
||||||
|
|
||||||
|
let initialBootCommit = null; // Commit-Hash beim Page-Load
|
||||||
|
let pollTimer = null;
|
||||||
|
let updateBannerShown = false;
|
||||||
|
|
||||||
|
// ---- Mini-DOM-Helpers ----
|
||||||
|
function el(tag, attrs, ...children) {
|
||||||
|
const e = document.createElement(tag);
|
||||||
|
for (const k in (attrs || {})) {
|
||||||
|
if (k === 'class') e.className = attrs[k];
|
||||||
|
else if (k === 'html') e.innerHTML = attrs[k];
|
||||||
|
else if (k.startsWith('on')) e.addEventListener(k.slice(2), attrs[k]);
|
||||||
|
else e.setAttribute(k, attrs[k]);
|
||||||
|
}
|
||||||
|
for (const c of children) {
|
||||||
|
if (c == null) continue;
|
||||||
|
e.appendChild(typeof c === 'string' ? document.createTextNode(c) : c);
|
||||||
|
}
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Styles inline injecten (kein zusaetzlicher CSS-File noetig) ----
|
||||||
|
// Nutzt die globalen Theme-Variablen aus style.css, damit Banner und
|
||||||
|
// Modal automatisch dem Hell-/Dunkelmodus folgen.
|
||||||
|
function injectStyles() {
|
||||||
|
if (document.getElementById('aegis-update-styles')) return;
|
||||||
|
const css = `
|
||||||
|
#aegis-update-banner {
|
||||||
|
position: fixed; bottom: 24px; right: 24px; z-index: 99999;
|
||||||
|
background: var(--bg-card);
|
||||||
|
color: var(--text-primary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-left: 4px solid var(--accent);
|
||||||
|
padding: 14px 18px; border-radius: 10px;
|
||||||
|
box-shadow: 0 8px 32px rgba(0,0,0,0.25);
|
||||||
|
font-family: 'Inter', -apple-system, sans-serif; font-size: 0.92rem;
|
||||||
|
display: flex; align-items: center; gap: 12px; max-width: 380px;
|
||||||
|
animation: aegis-slide-in 0.4s cubic-bezier(0.4,0,0.2,1);
|
||||||
|
}
|
||||||
|
@keyframes aegis-slide-in {
|
||||||
|
from { transform: translateX(420px); opacity: 0; }
|
||||||
|
to { transform: translateX(0); opacity: 1; }
|
||||||
|
}
|
||||||
|
#aegis-update-banner b { font-weight: 700; color: var(--accent); }
|
||||||
|
#aegis-update-banner button {
|
||||||
|
background: var(--accent); color: #fff; border: 0; padding: 7px 14px;
|
||||||
|
border-radius: 6px; font: inherit; font-size: 0.86rem; font-weight: 600;
|
||||||
|
cursor: pointer; flex-shrink: 0;
|
||||||
|
}
|
||||||
|
#aegis-update-banner button:hover { background: var(--accent-hover); }
|
||||||
|
#aegis-update-banner .close {
|
||||||
|
background: transparent; color: var(--text-secondary); padding: 0 4px;
|
||||||
|
font-size: 1.2rem; line-height: 1;
|
||||||
|
}
|
||||||
|
#aegis-update-banner .close:hover { color: var(--text-primary); background: transparent; }
|
||||||
|
|
||||||
|
#aegis-update-modal-overlay {
|
||||||
|
position: fixed; inset: 0; background: rgba(0,0,0,0.55); z-index: 99998;
|
||||||
|
backdrop-filter: blur(3px);
|
||||||
|
display: flex; align-items: center; justify-content: center; padding: 24px;
|
||||||
|
animation: aegis-fade-in 0.25s ease;
|
||||||
|
}
|
||||||
|
@keyframes aegis-fade-in { from { opacity: 0; } to { opacity: 1; } }
|
||||||
|
#aegis-update-modal {
|
||||||
|
background: var(--bg-card);
|
||||||
|
color: var(--text-primary);
|
||||||
|
border-radius: 14px;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
box-shadow: 0 24px 80px rgba(0,0,0,0.4);
|
||||||
|
font-family: 'Inter', -apple-system, sans-serif;
|
||||||
|
max-width: 540px; width: 100%; max-height: 80vh; overflow: hidden;
|
||||||
|
display: flex; flex-direction: column;
|
||||||
|
}
|
||||||
|
#aegis-update-modal header {
|
||||||
|
padding: 22px 28px 18px; border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
#aegis-update-modal h2 { margin: 0 0 4px; color: var(--accent); font-size: 1.25rem; font-weight: 700; }
|
||||||
|
#aegis-update-modal header p { margin: 0; color: var(--text-secondary); font-size: 0.88rem; }
|
||||||
|
#aegis-update-modal .body { padding: 8px 28px; overflow-y: auto; }
|
||||||
|
.aegis-release { padding: 16px 0; border-bottom: 1px solid var(--border); }
|
||||||
|
.aegis-release:last-child { border: 0; }
|
||||||
|
.aegis-release-head { display: flex; align-items: baseline; gap: 12px; margin-bottom: 8px; }
|
||||||
|
.aegis-release-title { font-size: 1rem; font-weight: 600; color: var(--text-primary); }
|
||||||
|
.aegis-release-date { font-size: 0.78rem; color: var(--text-tertiary); }
|
||||||
|
.aegis-release-items { margin: 0; padding-left: 20px; color: var(--text-secondary); font-size: 0.92rem; line-height: 1.6; }
|
||||||
|
.aegis-release-items li { margin-bottom: 4px; }
|
||||||
|
#aegis-update-modal footer {
|
||||||
|
padding: 16px 28px 20px; border-top: 1px solid var(--border);
|
||||||
|
display: flex; justify-content: flex-end;
|
||||||
|
}
|
||||||
|
#aegis-update-modal footer button {
|
||||||
|
background: var(--accent); color: #fff; border: 0; padding: 10px 22px;
|
||||||
|
border-radius: 6px; font: inherit; font-size: 0.92rem; font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
#aegis-update-modal footer button:hover { background: var(--accent-hover); }
|
||||||
|
|
||||||
|
@media (max-width: 600px) {
|
||||||
|
#aegis-update-banner { left: 12px; right: 12px; bottom: 12px; max-width: none; }
|
||||||
|
}`;
|
||||||
|
document.head.appendChild(el('style', { id: 'aegis-update-styles', html: css }));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Backend-Kommunikation ----
|
||||||
|
async function fetchVersion() {
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/version', { cache: 'no-store' });
|
||||||
|
if (!r.ok) return null;
|
||||||
|
return await r.json();
|
||||||
|
} catch (e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchReleaseNotes(since) {
|
||||||
|
try {
|
||||||
|
const url = '/api/release-notes' + (since ? '?since=' + encodeURIComponent(since) : '');
|
||||||
|
const r = await fetch(url, { cache: 'no-store' });
|
||||||
|
if (!r.ok) return null;
|
||||||
|
return await r.json();
|
||||||
|
} catch (e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Banner ----
|
||||||
|
function showUpdateBanner() {
|
||||||
|
if (updateBannerShown) return;
|
||||||
|
if (document.getElementById('aegis-update-banner')) return;
|
||||||
|
updateBannerShown = true;
|
||||||
|
|
||||||
|
const banner = el('div', { id: 'aegis-update-banner' },
|
||||||
|
el('div', null,
|
||||||
|
el('b', null, 'Update verfügbar'),
|
||||||
|
document.createElement('br'),
|
||||||
|
el('span', { style: 'font-size:0.85rem;opacity:0.85' },
|
||||||
|
'Eine neue Version ist live. Bitte Seite neu laden, um sie zu nutzen.')
|
||||||
|
),
|
||||||
|
el('button', { onclick: () => location.reload() }, 'Aktualisieren'),
|
||||||
|
el('button', {
|
||||||
|
class: 'close', title: 'Schließen',
|
||||||
|
onclick: () => banner.remove()
|
||||||
|
}, '×')
|
||||||
|
);
|
||||||
|
document.body.appendChild(banner);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Modal ----
|
||||||
|
function showWhatsNewModal(entries, currentVersion) {
|
||||||
|
if (document.getElementById('aegis-update-modal-overlay')) return;
|
||||||
|
if (!entries || !entries.length) return;
|
||||||
|
|
||||||
|
const releases = entries.map(e => {
|
||||||
|
const items = (e.items || []).map(i => el('li', null, i));
|
||||||
|
return el('div', { class: 'aegis-release' },
|
||||||
|
el('div', { class: 'aegis-release-head' },
|
||||||
|
el('span', { class: 'aegis-release-title' }, e.title || 'Update'),
|
||||||
|
el('span', { class: 'aegis-release-date' }, e.date || '')
|
||||||
|
),
|
||||||
|
items.length ? el('ul', { class: 'aegis-release-items' }, ...items) : null
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
const overlay = el('div', { id: 'aegis-update-modal-overlay' },
|
||||||
|
el('div', { id: 'aegis-update-modal' },
|
||||||
|
el('header', null,
|
||||||
|
el('h2', null, 'Was ist neu?'),
|
||||||
|
el('p', null, 'Diese Änderungen sind seit deinem letzten Besuch dazugekommen.')
|
||||||
|
),
|
||||||
|
el('div', { class: 'body' }, ...releases),
|
||||||
|
el('footer', null,
|
||||||
|
el('button', {
|
||||||
|
onclick: () => {
|
||||||
|
// Hoechste (= neueste) Version als gesehen markieren
|
||||||
|
const newest = entries[0]?.version;
|
||||||
|
if (newest) localStorage.setItem(STORAGE_KEY, newest);
|
||||||
|
overlay.remove();
|
||||||
|
}
|
||||||
|
}, 'Verstanden')
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// ESC oder Klick auf Hintergrund -> wie "Verstanden"
|
||||||
|
overlay.addEventListener('click', (ev) => {
|
||||||
|
if (ev.target === overlay) {
|
||||||
|
const newest = entries[0]?.version;
|
||||||
|
if (newest) localStorage.setItem(STORAGE_KEY, newest);
|
||||||
|
overlay.remove();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
document.addEventListener('keydown', function escHandler(ev) {
|
||||||
|
if (ev.key === 'Escape' && document.getElementById('aegis-update-modal-overlay')) {
|
||||||
|
const newest = entries[0]?.version;
|
||||||
|
if (newest) localStorage.setItem(STORAGE_KEY, newest);
|
||||||
|
overlay.remove();
|
||||||
|
document.removeEventListener('keydown', escHandler);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.body.appendChild(overlay);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Polling ----
|
||||||
|
async function pollVersion() {
|
||||||
|
const v = await fetchVersion();
|
||||||
|
if (v && v.commit && initialBootCommit && v.commit !== initialBootCommit) {
|
||||||
|
showUpdateBanner();
|
||||||
|
// Polling beenden, sobald Banner gezeigt
|
||||||
|
if (pollTimer) { clearInterval(pollTimer); pollTimer = null; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Initial-Boot ----
|
||||||
|
async function init() {
|
||||||
|
injectStyles();
|
||||||
|
|
||||||
|
const v = await fetchVersion();
|
||||||
|
if (v && v.commit) initialBootCommit = v.commit;
|
||||||
|
|
||||||
|
// Was-ist-neu-Modal: nur wenn Eintraege NEUER als 'lastSeen' existieren
|
||||||
|
const lastSeen = localStorage.getItem(STORAGE_KEY);
|
||||||
|
const notes = await fetchReleaseNotes(lastSeen);
|
||||||
|
if (notes && notes.entries && notes.entries.length > 0) {
|
||||||
|
// Modal mit etwas Verzoegerung zeigen, damit das Dashboard erst rendert.
|
||||||
|
// Auch beim allerersten Besuch wird das Modal gezeigt — damit Kunden
|
||||||
|
// beim Onboarding sehen, was das Update-System leistet bzw. welche
|
||||||
|
// Highlights aktuell live sind.
|
||||||
|
setTimeout(() => showWhatsNewModal(notes.entries, v?.commit), 800);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Polling starten
|
||||||
|
pollTimer = setInterval(pollVersion, POLL_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document.readyState === 'loading') {
|
||||||
|
document.addEventListener('DOMContentLoaded', init);
|
||||||
|
} else {
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
})();
|
||||||
In neuem Issue referenzieren
Einen Benutzer sperren