Commits vergleichen
16 Commits
7c558b7cb4
...
main
| Autor | SHA1 | Datum | |
|---|---|---|---|
| 000b4ef526 | |||
| 52f5debe44 | |||
| 5435d0be8b | |||
|
|
8c75a70655 | ||
| 6bfff67c2f | |||
| 746b1bcd81 | |||
| 7ec153ca49 | |||
|
|
a27fe44b0b | ||
| 6c623a8ae5 | |||
| 240222cb2a | |||
|
|
bd476edb13 | ||
| ed38d68db7 | |||
| c7d6d2eedf | |||
| 031bd9e114 | |||
| c316c67294 | |||
| 430641b128 |
@@ -1,4 +1,40 @@
|
||||
[
|
||||
{
|
||||
"version": "2026-05-22T12:41Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "X-Recherche-Konten im Verwaltungsportal verwalten",
|
||||
"items": [
|
||||
"Recherche-Konten für X (ehemals Twitter) können jetzt direkt im Verwaltungsportal hinzugefügt, bearbeitet und entfernt werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T11:13Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "Interne Verbesserungen",
|
||||
"items": []
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T11:13Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "Interne Verbesserungen",
|
||||
"items": []
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T11:09Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "X-Konten direkt im Verwaltungsportal verwalten",
|
||||
"items": [
|
||||
"X-Konten können jetzt zentral über das Verwaltungsportal angelegt und verwaltet werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"version": "2026-05-22T09:37Z",
|
||||
"date": "2026-05-22",
|
||||
"title": "Neue Übersetzungsfunktion im Dashboard",
|
||||
"items": [
|
||||
"Texte können jetzt im Dashboard per Klick manuell übersetzt werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"version": "2026-05-17T19:19Z",
|
||||
"date": "2026-05-17",
|
||||
|
||||
@@ -9,3 +9,5 @@ httpx>=0.28
|
||||
feedparser>=6.0
|
||||
# PDF-Upload-Validierung
|
||||
pypdf>=5.0
|
||||
# X-Scraper-Konten-Verwaltung (twscrape-Account-Pool)
|
||||
twscrape @ git+https://github.com/vladkens/twscrape.git@206f0942fe41149da28530399f7c772ec00be17a
|
||||
|
||||
@@ -8,6 +8,10 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
||||
# Gemeinsame Datenbank (gleiche wie OSINT-Monitor)
|
||||
DB_PATH = os.environ.get("DB_PATH", "/mnt/gitea/osint-data/osint.db")
|
||||
|
||||
# twscrape-Account-Store: die X-Login-Konten, mit denen der Monitor bei X
|
||||
# recherchiert. Geteilt mit dem Monitor (gleicher Pfad-Default).
|
||||
X_ACCOUNTS_DB_PATH = os.environ.get("X_ACCOUNTS_DB_PATH", "/home/claude-dev/.x-scraper/accounts.db")
|
||||
|
||||
# JWT (eigener Secret fuer Verwaltungsportal)
|
||||
JWT_SECRET = os.environ.get("PORTAL_JWT_SECRET")
|
||||
if not JWT_SECRET:
|
||||
|
||||
@@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from config import STATIC_DIR, PORT
|
||||
from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit
|
||||
from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit, translation, x_scraper
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -42,6 +42,8 @@ app.include_router(dashboard.router)
|
||||
app.include_router(sources.router)
|
||||
app.include_router(token_usage.router)
|
||||
app.include_router(audit.router)
|
||||
app.include_router(translation.router)
|
||||
app.include_router(x_scraper.router)
|
||||
|
||||
# --- Statische Dateien ---
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
||||
|
||||
222
src/routers/translation.py
Normale Datei
222
src/routers/translation.py
Normale Datei
@@ -0,0 +1,222 @@
|
||||
"""Manuelle Artikel-Übersetzung.
|
||||
|
||||
Stößt die Haiku-Übersetzung fremdsprachiger Artikel an, die noch keine
|
||||
deutsche Fassung haben. Im Monitor läuft der Translator seit 2026-05-22 NICHT
|
||||
mehr automatisch (TRANSLATOR_ENABLED=false), weil ein sehr großer Lauf den
|
||||
Refresh-Worker blockierte. Dieser Endpoint ist der bewusste manuelle Ersatz:
|
||||
er läuft als entkoppelter Hintergrund-Task, blockiert keinen Request und ist
|
||||
jederzeit abbrechbar.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
|
||||
from auth import get_current_admin
|
||||
from audit import log_action, get_client_ip
|
||||
from database import get_db
|
||||
from translation_agent import translate_articles_batch
|
||||
|
||||
logger = logging.getLogger("verwaltung.translation")
|
||||
router = APIRouter(prefix="/api/translation", tags=["Translation"])
|
||||
|
||||
# Batch-Größe wie im Translator-Agent (durch das Haiku-Output-Limit bestimmt).
|
||||
_BATCH_SIZE = 5
|
||||
|
||||
# Grobe Schätzwerte aus Produktiv-Logs (Haiku, 5 Artikel/Batch):
|
||||
# rund 17 s und rund $0.03 pro Batch.
|
||||
_SECONDS_PER_ARTICLE = 3.5
|
||||
_COST_PER_ARTICLE = 0.006
|
||||
|
||||
# Artikel ohne deutsche Fassung: fremdsprachig (language gesetzt und != de)
|
||||
# und headline_de ODER content_de fehlt.
|
||||
_PENDING_WHERE = (
|
||||
"language IS NOT NULL AND LOWER(language) != 'de' "
|
||||
"AND (headline_de IS NULL OR headline_de = '' "
|
||||
"OR content_de IS NULL OR content_de = '')"
|
||||
)
|
||||
|
||||
# Modul-globaler Job-Status. Es gibt bewusst nur EINEN Übersetzungs-Job
|
||||
# gleichzeitig, das hält Claude-Last und DB-Schreiblast kalkulierbar.
|
||||
_job: dict = {
|
||||
"running": False,
|
||||
"started_at": None,
|
||||
"finished_at": None,
|
||||
"total": 0,
|
||||
"done": 0,
|
||||
"translated": 0,
|
||||
"failed_batches": 0,
|
||||
"cancelled": False,
|
||||
"error": None,
|
||||
"started_by": None,
|
||||
}
|
||||
_job_lock = asyncio.Lock()
|
||||
_cancel_event = asyncio.Event()
|
||||
# Referenz auf den laufenden Task halten, damit der Garbage Collector ihn
|
||||
# nicht vorzeitig einsammelt.
|
||||
_job_task: asyncio.Task | None = None
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
async def _count_pending(db) -> int:
|
||||
cursor = await db.execute(
|
||||
f"SELECT COUNT(*) FROM articles WHERE {_PENDING_WHERE}"
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
|
||||
async def _run_translation_job(started_by: str):
|
||||
"""Hintergrund-Task: übersetzt alle ausstehenden Artikel batchweise.
|
||||
|
||||
Schreibt nach jedem Batch in die DB zurück und aktualisiert den
|
||||
Fortschritt, damit das Frontend live mitlesen kann. Bricht zwischen den
|
||||
Batches ab, sobald _cancel_event gesetzt ist.
|
||||
"""
|
||||
db = await get_db()
|
||||
try:
|
||||
# Großzügiger Lock-Timeout, weil der Monitor parallel in dieselbe
|
||||
# geteilte DB schreiben kann (WAL erlaubt nur einen Writer).
|
||||
await db.execute("PRAGMA busy_timeout=30000")
|
||||
cursor = await db.execute(
|
||||
f"SELECT id, headline, content_original, language "
|
||||
f"FROM articles WHERE {_PENDING_WHERE} ORDER BY id DESC"
|
||||
)
|
||||
articles = [dict(r) for r in await cursor.fetchall()]
|
||||
_job["total"] = len(articles)
|
||||
logger.info(
|
||||
"Übersetzungs-Job gestartet von %s: %d Artikel",
|
||||
started_by, len(articles),
|
||||
)
|
||||
|
||||
for i in range(0, len(articles), _BATCH_SIZE):
|
||||
if _cancel_event.is_set():
|
||||
_job["cancelled"] = True
|
||||
logger.info(
|
||||
"Übersetzungs-Job abgebrochen bei %d/%d",
|
||||
_job["done"], _job["total"],
|
||||
)
|
||||
break
|
||||
batch = articles[i : i + _BATCH_SIZE]
|
||||
try:
|
||||
translations, _usage = await translate_articles_batch(batch, "de")
|
||||
except Exception as e: # pragma: no cover - defensiv
|
||||
_job["failed_batches"] += 1
|
||||
logger.error("Übersetzungs-Batch fehlgeschlagen: %s", e)
|
||||
_job["done"] = min(i + _BATCH_SIZE, len(articles))
|
||||
continue
|
||||
for t in translations:
|
||||
hd = t.get("headline_de")
|
||||
cd = t.get("content_de")
|
||||
if hd or cd:
|
||||
await db.execute(
|
||||
"UPDATE articles SET "
|
||||
"headline_de = COALESCE(?, headline_de), "
|
||||
"content_de = COALESCE(?, content_de) WHERE id = ?",
|
||||
(hd, cd, t["id"]),
|
||||
)
|
||||
_job["translated"] += 1
|
||||
await db.commit()
|
||||
_job["done"] = min(i + _BATCH_SIZE, len(articles))
|
||||
|
||||
logger.info(
|
||||
"Übersetzungs-Job beendet: %d/%d übersetzt, %d Batch-Fehler, abgebrochen=%s",
|
||||
_job["translated"], _job["total"], _job["failed_batches"],
|
||||
_job["cancelled"],
|
||||
)
|
||||
except Exception as e:
|
||||
_job["error"] = str(e)
|
||||
logger.error(
|
||||
"Übersetzungs-Job mit Fehler beendet: %s", e, exc_info=True
|
||||
)
|
||||
finally:
|
||||
_job["running"] = False
|
||||
_job["finished_at"] = _now_iso()
|
||||
await db.close()
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def translation_status(admin=Depends(get_current_admin)):
|
||||
"""Aktueller Job-Status plus Anzahl noch nicht übersetzter Artikel."""
|
||||
db = await get_db()
|
||||
try:
|
||||
pending = await _count_pending(db)
|
||||
finally:
|
||||
await db.close()
|
||||
snap = dict(_job)
|
||||
snap["pending"] = pending
|
||||
snap["estimate"] = {
|
||||
"seconds": round(pending * _SECONDS_PER_ARTICLE),
|
||||
"cost_usd": round(pending * _COST_PER_ARTICLE, 2),
|
||||
}
|
||||
return snap
|
||||
|
||||
|
||||
@router.post("/run")
|
||||
async def translation_run(request: Request, admin=Depends(get_current_admin)):
|
||||
"""Startet die Übersetzung aller ausstehenden Artikel als Hintergrund-Task."""
|
||||
global _job_task
|
||||
async with _job_lock:
|
||||
if _job["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409, detail="Es läuft bereits eine Übersetzung."
|
||||
)
|
||||
|
||||
db = await get_db()
|
||||
try:
|
||||
pending = await _count_pending(db)
|
||||
if pending == 0:
|
||||
return {"status": "nothing_to_do", "pending": 0}
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), "translation.run",
|
||||
resource_type="articles", after={"pending": pending},
|
||||
)
|
||||
finally:
|
||||
await db.close()
|
||||
|
||||
started_by = (
|
||||
admin.get("email") or admin.get("username") or str(admin.get("id"))
|
||||
)
|
||||
# Job-Status zurücksetzen und Task entkoppelt starten.
|
||||
_cancel_event.clear()
|
||||
_job.update({
|
||||
"running": True,
|
||||
"started_at": _now_iso(),
|
||||
"finished_at": None,
|
||||
"total": pending,
|
||||
"done": 0,
|
||||
"translated": 0,
|
||||
"failed_batches": 0,
|
||||
"cancelled": False,
|
||||
"error": None,
|
||||
"started_by": started_by,
|
||||
})
|
||||
_job_task = asyncio.create_task(_run_translation_job(started_by))
|
||||
|
||||
logger.info(
|
||||
"Übersetzung manuell gestartet von %s (%d Artikel)", started_by, pending
|
||||
)
|
||||
return {"status": "started", "pending": pending}
|
||||
|
||||
|
||||
@router.post("/cancel")
|
||||
async def translation_cancel(request: Request, admin=Depends(get_current_admin)):
|
||||
"""Bricht einen laufenden Übersetzungs-Job nach dem aktuellen Batch ab."""
|
||||
if not _job["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409, detail="Es läuft keine Übersetzung."
|
||||
)
|
||||
_cancel_event.set()
|
||||
db = await get_db()
|
||||
try:
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), "translation.cancel",
|
||||
resource_type="articles",
|
||||
)
|
||||
finally:
|
||||
await db.close()
|
||||
return {"status": "cancelling"}
|
||||
224
src/routers/x_scraper.py
Normale Datei
224
src/routers/x_scraper.py
Normale Datei
@@ -0,0 +1,224 @@
|
||||
"""X-Scraper-Konten: Verwaltung des twscrape-Account-Pools.
|
||||
|
||||
Das sind die X-Login-Konten, mit denen der Monitor bei X recherchiert
|
||||
(scrapen). Sie liegen im twscrape-Account-Store (config.X_ACCOUNTS_DB_PATH),
|
||||
nicht in der Verwaltungs-Datenbank. twscrape wird lazy importiert, damit das
|
||||
Portal auch ohne installiertes twscrape startet.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
import aiosqlite
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import get_current_admin
|
||||
from audit import log_action, get_client_ip
|
||||
from config import X_ACCOUNTS_DB_PATH
|
||||
from database import db_dependency
|
||||
|
||||
logger = logging.getLogger("verwaltung.x_scraper")
|
||||
|
||||
router = APIRouter(prefix="/api/x-scraper", tags=["x-scraper"])
|
||||
|
||||
|
||||
def _get_pool():
|
||||
"""twscrape-AccountsPool oeffnen. Wirft HTTPException wenn nicht verfuegbar."""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(X_ACCOUNTS_DB_PATH), exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from twscrape import API
|
||||
except ImportError:
|
||||
raise HTTPException(status_code=503, detail="twscrape ist nicht installiert")
|
||||
return API(X_ACCOUNTS_DB_PATH).pool
|
||||
|
||||
|
||||
def _summary(acc) -> dict:
|
||||
"""Account-Objekt auf ein anzeigbares Dict reduzieren -- ohne Geheimnisse."""
|
||||
now = datetime.now(timezone.utc)
|
||||
locked = False
|
||||
locked_until = None
|
||||
for ts in (acc.locks or {}).values():
|
||||
if ts and ts > now:
|
||||
locked = True
|
||||
if locked_until is None or ts > locked_until:
|
||||
locked_until = ts
|
||||
return {
|
||||
"username": acc.username,
|
||||
"email": acc.email if acc.email and acc.email != "_" else None,
|
||||
"active": bool(acc.active),
|
||||
"locked": locked,
|
||||
"locked_until": locked_until.isoformat() if locked_until else None,
|
||||
"has_cookies": bool(acc.cookies),
|
||||
"total_requests": sum((acc.stats or {}).values()),
|
||||
"last_used": acc.last_used.isoformat() if acc.last_used else None,
|
||||
"error_msg": acc.error_msg or None,
|
||||
}
|
||||
|
||||
|
||||
class XScraperCreate(BaseModel):
|
||||
username: str = Field(min_length=1, max_length=100)
|
||||
password: str = Field(default="", max_length=200)
|
||||
email: str = Field(default="", max_length=200)
|
||||
email_password: str = Field(default="", max_length=200)
|
||||
cookies: str = Field(min_length=1, max_length=4000)
|
||||
|
||||
|
||||
class XScraperCookies(BaseModel):
|
||||
cookies: str = Field(min_length=1, max_length=4000)
|
||||
|
||||
|
||||
class XScraperActive(BaseModel):
|
||||
active: bool
|
||||
|
||||
|
||||
@router.get("/accounts")
|
||||
async def list_accounts(admin: dict = Depends(get_current_admin)):
|
||||
"""Alle X-Scraper-Konten auflisten (ohne Passwoerter/Cookies)."""
|
||||
pool = _get_pool()
|
||||
try:
|
||||
accounts = await pool.get_all()
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper get_all fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Konten konnten nicht geladen werden")
|
||||
return [_summary(a) for a in accounts]
|
||||
|
||||
|
||||
@router.post("/accounts", status_code=201)
|
||||
async def add_account(
|
||||
data: XScraperCreate,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Neues X-Scraper-Konto anlegen."""
|
||||
pool = _get_pool()
|
||||
username = data.username.strip().lstrip("@")
|
||||
if not username:
|
||||
raise HTTPException(status_code=422, detail="Benutzername ist erforderlich")
|
||||
if await pool.get_account(username) is not None:
|
||||
raise HTTPException(status_code=409, detail=f"Konto '{username}' existiert bereits")
|
||||
try:
|
||||
await pool.add_account(
|
||||
username=username,
|
||||
password=data.password or "_",
|
||||
email=data.email or "_",
|
||||
email_password=data.email_password or "_",
|
||||
cookies=data.cookies.strip(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper add_account fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Konto konnte nicht angelegt werden")
|
||||
acc = await pool.get_account(username)
|
||||
if acc is None:
|
||||
raise HTTPException(status_code=500, detail="Konto wurde nicht gespeichert, bitte Cookies pruefen")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="create",
|
||||
resource_type="x_scraper_account", after={"username": username, "email": data.email},
|
||||
)
|
||||
return _summary(acc)
|
||||
|
||||
|
||||
@router.post("/accounts/{username}/cookies")
|
||||
async def refresh_cookies(
|
||||
username: str,
|
||||
data: XScraperCookies,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Cookies eines bestehenden Kontos erneuern (Login auffrischen)."""
|
||||
pool = _get_pool()
|
||||
acc = await pool.get_account(username)
|
||||
if acc is None:
|
||||
raise HTTPException(status_code=404, detail="Konto nicht gefunden")
|
||||
# twscrape hat keine Update-Methode -- Konto mit frischen Cookies neu anlegen.
|
||||
pw, em, emp = acc.password, acc.email, acc.email_password
|
||||
try:
|
||||
await pool.delete_accounts([username])
|
||||
await pool.add_account(
|
||||
username=username, password=pw, email=em,
|
||||
email_password=emp, cookies=data.cookies.strip(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper Cookie-Refresh fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Cookies konnten nicht erneuert werden")
|
||||
acc = await pool.get_account(username)
|
||||
if acc is None:
|
||||
raise HTTPException(status_code=500, detail="Konto nach Cookie-Refresh nicht gefunden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="update",
|
||||
resource_type="x_scraper_account", after={"username": username, "change": "cookies"},
|
||||
)
|
||||
return _summary(acc)
|
||||
|
||||
|
||||
@router.post("/accounts/{username}/active")
|
||||
async def set_active(
|
||||
username: str,
|
||||
data: XScraperActive,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Konto aktiv oder inaktiv schalten."""
|
||||
pool = _get_pool()
|
||||
if await pool.get_account(username) is None:
|
||||
raise HTTPException(status_code=404, detail="Konto nicht gefunden")
|
||||
try:
|
||||
await pool.set_active(username, data.active)
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper set_active fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Status konnte nicht geaendert werden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="update",
|
||||
resource_type="x_scraper_account", after={"username": username, "active": data.active},
|
||||
)
|
||||
acc = await pool.get_account(username)
|
||||
return _summary(acc)
|
||||
|
||||
|
||||
@router.delete("/accounts/{username}", status_code=204)
|
||||
async def delete_account(
|
||||
username: str,
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""X-Scraper-Konto entfernen."""
|
||||
pool = _get_pool()
|
||||
if await pool.get_account(username) is None:
|
||||
raise HTTPException(status_code=404, detail="Konto nicht gefunden")
|
||||
try:
|
||||
await pool.delete_accounts([username])
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper delete fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Konto konnte nicht entfernt werden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="delete",
|
||||
resource_type="x_scraper_account", before={"username": username},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/reset-locks")
|
||||
async def reset_locks(
|
||||
request: Request,
|
||||
admin: dict = Depends(get_current_admin),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Alle temporaeren Sperren der Konten zuruecksetzen."""
|
||||
pool = _get_pool()
|
||||
try:
|
||||
await pool.reset_locks()
|
||||
except Exception as e:
|
||||
logger.error("X-Scraper reset_locks fehlgeschlagen: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Sperren konnten nicht zurueckgesetzt werden")
|
||||
await log_action(
|
||||
db, admin, get_client_ip(request), action="update",
|
||||
resource_type="x_scraper_account", after={"change": "reset_locks"},
|
||||
)
|
||||
return {"status": "ok"}
|
||||
@@ -27,6 +27,7 @@ SOURCE_CATEGORIES: list[CategoryEntry] = [
|
||||
{"key": "international", "label": "International"},
|
||||
{"key": "regional", "label": "Regional"},
|
||||
{"key": "boulevard", "label": "Boulevard"},
|
||||
{"key": "stimmungsbild", "label": "Forum / Stimmungsbild"},
|
||||
{"key": "sonstige", "label": "Sonstige"},
|
||||
{"key": "cybercrime", "label": "Cybercrime / Hacktivismus"},
|
||||
{"key": "cybercrime-leaks", "label": "Cybercrime / Leaks"},
|
||||
|
||||
@@ -59,6 +59,32 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Artikel-Übersetzung -->
|
||||
<div class="card" id="translationCard" style="margin-top:16px;">
|
||||
<div class="card-header">
|
||||
<h2>Artikel-Übersetzung</h2>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="text-muted" style="margin-top:0;">
|
||||
Die automatische Übersetzung im Monitor ist deaktiviert. Hier lassen sich
|
||||
fremdsprachige Artikel ohne deutsche Fassung manuell übersetzen.
|
||||
</p>
|
||||
<p id="translationInfo" style="margin:12px 0;">Status wird geladen…</p>
|
||||
|
||||
<div id="translationProgressWrap" style="display:none; margin:12px 0;">
|
||||
<div style="background:rgba(128,128,128,0.25); border-radius:6px; height:14px; overflow:hidden;">
|
||||
<div id="translationProgressBar" style="background:#1565c0; height:100%; width:0%; transition:width .3s;"></div>
|
||||
</div>
|
||||
<p class="text-muted" id="translationProgressText" style="margin:6px 0 0;"></p>
|
||||
</div>
|
||||
|
||||
<div style="margin-top:12px; display:flex; gap:8px;">
|
||||
<button class="btn btn-primary" id="translationRunBtn">Übersetzung starten</button>
|
||||
<button class="btn btn-danger" id="translationCancelBtn" style="display:none;">Abbrechen</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Organizations Section -->
|
||||
@@ -303,6 +329,7 @@
|
||||
<button class="nav-tab" data-subtab="tenant-sources">Kundenquellen</button>
|
||||
<button class="nav-tab" data-subtab="source-health">Quellen-Health</button>
|
||||
<button class="nav-tab" data-subtab="classification-review">Klassifikation <span class="sources-tab-badge" id="classificationPendingBadge">0</span></button>
|
||||
<button class="nav-tab" data-subtab="x-scraper">X-Recherche-Konten</button>
|
||||
</div>
|
||||
|
||||
<!-- Grundquellen -->
|
||||
@@ -445,6 +472,37 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- X-Recherche-Konten (Sub-Tab) -->
|
||||
<div class="section" id="sub-x-scraper">
|
||||
<div class="action-bar">
|
||||
<div style="display:flex;align-items:center;gap:12px;flex-wrap:wrap;">
|
||||
<span class="text-secondary" id="xScraperCount"></span>
|
||||
</div>
|
||||
<div style="display:flex;gap:8px;">
|
||||
<button class="btn btn-secondary" onclick="resetXScraperLocks()">Sperren zurücksetzen</button>
|
||||
<button class="btn btn-primary" onclick="openXScraperAddModal()">+ Konto hinzufügen</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<p class="text-secondary" style="padding:0 4px 12px;">X-Login-Konten, mit denen der Monitor bei X recherchiert. Mehr Konten bedeuten paralleleres, schnelleres Scrapen. Cookies laufen periodisch ab und müssen dann erneuert werden.</p>
|
||||
<div class="table-wrap">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Benutzername</th>
|
||||
<th>E-Mail</th>
|
||||
<th>Status</th>
|
||||
<th>Anfragen</th>
|
||||
<th>Letzte Nutzung</th>
|
||||
<th>Aktionen</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="xScraperTable"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div> <!-- /sec-sources -->
|
||||
|
||||
<!-- Audit-Log Section -->
|
||||
@@ -912,8 +970,77 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/app.js?v=20260513a"></script>
|
||||
<script src="/static/js/sources.js?v=20260509d"></script>
|
||||
<!-- Modal: X-Recherche-Konto hinzufügen -->
|
||||
<div class="modal-overlay" id="modalXScraperAdd">
|
||||
<div class="modal">
|
||||
<div class="modal-header">
|
||||
<h3>X-Recherche-Konto hinzufügen</h3>
|
||||
<button class="modal-close" onclick="closeModal('modalXScraperAdd')">×</button>
|
||||
</div>
|
||||
<form id="xScraperAddForm">
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label for="xsUsername">X-Benutzername</label>
|
||||
<input type="text" id="xsUsername" required placeholder="Login-Handle des Kontos, ohne @">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsPassword">X-Passwort</label>
|
||||
<input type="password" id="xsPassword" placeholder="optional">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsEmail">E-Mail</label>
|
||||
<input type="text" id="xsEmail" placeholder="optional, z.B. konto@protonmail.com">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsEmailPassword">E-Mail-Passwort</label>
|
||||
<input type="password" id="xsEmailPassword" placeholder="optional">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsCookies">Cookies</label>
|
||||
<textarea id="xsCookies" rows="3" required placeholder="auth_token=...; ct0=..."></textarea>
|
||||
<small class="text-secondary">Aus dem eingeloggten X-Browser exportiert, mindestens auth_token und ct0.</small>
|
||||
</div>
|
||||
<div id="xScraperAddError" class="error-msg" style="display:none"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" onclick="closeModal('modalXScraperAdd')">Abbrechen</button>
|
||||
<button type="submit" class="btn btn-primary">Konto anlegen</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: X-Recherche-Konto Cookies erneuern -->
|
||||
<div class="modal-overlay" id="modalXScraperCookies">
|
||||
<div class="modal">
|
||||
<div class="modal-header">
|
||||
<h3>Cookies erneuern</h3>
|
||||
<button class="modal-close" onclick="closeModal('modalXScraperCookies')">×</button>
|
||||
</div>
|
||||
<form id="xScraperCookiesForm">
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label for="xsCookiesUsername">Konto</label>
|
||||
<input type="text" id="xsCookiesUsername" readonly>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="xsCookiesValue">Neue Cookies</label>
|
||||
<textarea id="xsCookiesValue" rows="3" required placeholder="auth_token=...; ct0=..."></textarea>
|
||||
<small class="text-secondary">Frisch aus dem eingeloggten X-Browser exportieren.</small>
|
||||
</div>
|
||||
<div id="xScraperCookiesError" class="error-msg" style="display:none"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" onclick="closeModal('modalXScraperCookies')">Abbrechen</button>
|
||||
<button type="submit" class="btn btn-primary">Cookies setzen</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/app.js?v=20260522a"></script>
|
||||
<script src="/static/js/sources.js?v=20260522x2"></script>
|
||||
<script src="/static/js/x-scraper.js?v=20260522a"></script>
|
||||
<script src="/static/js/source-health.js?v=20260509l"></script>
|
||||
<script src="/static/js/audit.js?v=20260509d"></script>
|
||||
<div id="toastContainer" class="toast-container" aria-live="polite" aria-atomic="true"></div>
|
||||
|
||||
@@ -59,8 +59,10 @@ document.addEventListener("DOMContentLoaded", () => {
|
||||
setupNavTabs();
|
||||
setupOrgDetailTabs();
|
||||
setupForms();
|
||||
setupTranslation();
|
||||
loadDashboard();
|
||||
loadDashboardTokenStats();
|
||||
loadTranslationStatus();
|
||||
loadOrgs();
|
||||
});
|
||||
|
||||
@@ -80,6 +82,7 @@ function setupNavTabs() {
|
||||
document.querySelectorAll(".app-content > .section").forEach(s => s.classList.remove("active"));
|
||||
document.getElementById(`sec-${section}`).classList.add("active");
|
||||
|
||||
if (section === "dashboard") loadTranslationStatus();
|
||||
if (section === "licenses") loadExpiringLicenses();
|
||||
if (section === "audit" && typeof loadAudit === "function") loadAudit();
|
||||
});
|
||||
@@ -652,6 +655,151 @@ function formatDate(iso) {
|
||||
}
|
||||
|
||||
|
||||
// ===== Artikel-Übersetzung =====
|
||||
let translationPollTimer = null;
|
||||
|
||||
function setupTranslation() {
|
||||
const runBtn = document.getElementById("translationRunBtn");
|
||||
const cancelBtn = document.getElementById("translationCancelBtn");
|
||||
if (runBtn) runBtn.addEventListener("click", startTranslation);
|
||||
if (cancelBtn) cancelBtn.addEventListener("click", cancelTranslation);
|
||||
}
|
||||
|
||||
function formatDuration(seconds) {
|
||||
seconds = Math.max(0, Math.round(seconds || 0));
|
||||
if (seconds < 60) return seconds + " Sek.";
|
||||
const min = Math.round(seconds / 60);
|
||||
if (min < 60) return min + " Min.";
|
||||
const h = Math.floor(min / 60), m = min % 60;
|
||||
return h + " Std. " + (m ? m + " Min." : "").trim();
|
||||
}
|
||||
|
||||
function renderTranslation(st) {
|
||||
const info = document.getElementById("translationInfo");
|
||||
const wrap = document.getElementById("translationProgressWrap");
|
||||
const bar = document.getElementById("translationProgressBar");
|
||||
const ptext = document.getElementById("translationProgressText");
|
||||
const runBtn = document.getElementById("translationRunBtn");
|
||||
const cancelBtn = document.getElementById("translationCancelBtn");
|
||||
if (!info || !runBtn) return;
|
||||
|
||||
if (st.running) {
|
||||
runBtn.style.display = "none";
|
||||
cancelBtn.style.display = "";
|
||||
wrap.style.display = "";
|
||||
const pct = st.total > 0 ? Math.round((st.done / st.total) * 100) : 0;
|
||||
bar.style.width = pct + "%";
|
||||
ptext.textContent = `${st.done} / ${st.total} verarbeitet, ${st.translated} übersetzt (${pct}%)`;
|
||||
info.textContent = "Übersetzung läuft…";
|
||||
return;
|
||||
}
|
||||
|
||||
runBtn.style.display = "";
|
||||
cancelBtn.style.display = "none";
|
||||
wrap.style.display = "none";
|
||||
|
||||
let resultLine = "";
|
||||
if (st.finished_at && (st.total > 0 || st.error)) {
|
||||
if (st.error) {
|
||||
resultLine = `Letzter Lauf mit Fehler beendet: ${st.error}. `;
|
||||
} else if (st.cancelled) {
|
||||
resultLine = `Letzter Lauf abgebrochen, ${st.translated} von ${st.total} Artikeln übersetzt. `;
|
||||
} else {
|
||||
resultLine = `Letzter Lauf abgeschlossen, ${st.translated} Artikel übersetzt. `;
|
||||
}
|
||||
}
|
||||
|
||||
if (st.pending > 0) {
|
||||
const est = st.estimate || {};
|
||||
info.textContent = resultLine +
|
||||
`${st.pending} Artikel ohne deutsche Übersetzung. ` +
|
||||
`Geschätzt: ${formatDuration(est.seconds)}, ca. $${est.cost_usd}.`;
|
||||
runBtn.disabled = false;
|
||||
} else {
|
||||
info.textContent = resultLine + "Alle Artikel sind übersetzt.";
|
||||
runBtn.disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
async function loadTranslationStatus() {
|
||||
try {
|
||||
const st = await API.get("/api/translation/status");
|
||||
renderTranslation(st);
|
||||
if (st.running && !translationPollTimer) {
|
||||
translationPollTimer = setInterval(pollTranslation, 3000);
|
||||
}
|
||||
} catch (e) {
|
||||
const info = document.getElementById("translationInfo");
|
||||
if (info) info.textContent = "Status nicht abrufbar: " + (e.message || e);
|
||||
}
|
||||
}
|
||||
|
||||
async function pollTranslation() {
|
||||
try {
|
||||
const st = await API.get("/api/translation/status");
|
||||
renderTranslation(st);
|
||||
if (!st.running) {
|
||||
clearInterval(translationPollTimer);
|
||||
translationPollTimer = null;
|
||||
if (st.error) {
|
||||
showToast("Übersetzung mit Fehler beendet", "error");
|
||||
} else if (st.cancelled) {
|
||||
showToast(`Übersetzung abgebrochen, ${st.translated} übersetzt`, "info");
|
||||
} else {
|
||||
showToast(`Übersetzung fertig: ${st.translated} Artikel`, "success");
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn("Translation-Poll fehlgeschlagen:", e);
|
||||
}
|
||||
}
|
||||
|
||||
async function startTranslation() {
|
||||
let st;
|
||||
try {
|
||||
st = await API.get("/api/translation/status");
|
||||
} catch (e) {
|
||||
showToast(e.message || "Status nicht abrufbar", "error");
|
||||
return;
|
||||
}
|
||||
if (st.running) { showToast("Es läuft bereits eine Übersetzung", "info"); return; }
|
||||
if (!st.pending) { showToast("Es gibt nichts zu übersetzen", "info"); return; }
|
||||
|
||||
const est = st.estimate || {};
|
||||
const ok = await showConfirm(
|
||||
"Übersetzung starten",
|
||||
`${st.pending} Artikel werden ins Deutsche übersetzt. ` +
|
||||
`Geschätzte Dauer: ${formatDuration(est.seconds)}, geschätzte Kosten: ca. $${est.cost_usd}. ` +
|
||||
`Der Lauf kann jederzeit abgebrochen werden.`
|
||||
);
|
||||
if (!ok) return;
|
||||
|
||||
try {
|
||||
const res = await API.post("/api/translation/run", {});
|
||||
if (res && res.status === "started") {
|
||||
showToast(`Übersetzung gestartet (${res.pending} Artikel)`, "success");
|
||||
await loadTranslationStatus();
|
||||
if (!translationPollTimer) {
|
||||
translationPollTimer = setInterval(pollTranslation, 3000);
|
||||
}
|
||||
} else {
|
||||
showToast("Es gibt nichts zu übersetzen", "info");
|
||||
loadTranslationStatus();
|
||||
}
|
||||
} catch (e) {
|
||||
showToast(e.message || "Start fehlgeschlagen", "error");
|
||||
}
|
||||
}
|
||||
|
||||
async function cancelTranslation() {
|
||||
try {
|
||||
await API.post("/api/translation/cancel", {});
|
||||
showToast("Übersetzung wird abgebrochen…", "info");
|
||||
} catch (e) {
|
||||
showToast(e.message || "Abbruch fehlgeschlagen", "error");
|
||||
}
|
||||
}
|
||||
|
||||
// ===== Token-Nutzung =====
|
||||
async function loadOrgTokenUsage(orgId) {
|
||||
try {
|
||||
|
||||
@@ -38,6 +38,7 @@ function setupSourceSubTabs() {
|
||||
else if (subtab === "tenant-sources") loadTenantSources();
|
||||
else if (subtab === "source-health") loadHealthData();
|
||||
else if (subtab === "classification-review") loadClassificationQueue();
|
||||
else if (subtab === "x-scraper") loadXScraperAccounts();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
169
src/static/js/x-scraper.js
Normale Datei
169
src/static/js/x-scraper.js
Normale Datei
@@ -0,0 +1,169 @@
|
||||
/* X-Recherche-Konten: Verwaltung des twscrape-Account-Pools */
|
||||
"use strict";
|
||||
|
||||
let xScraperCache = [];
|
||||
|
||||
async function loadXScraperAccounts() {
|
||||
setupXScraperForms();
|
||||
const tbody = document.getElementById("xScraperTable");
|
||||
tbody.innerHTML = '<tr><td colspan="6" class="text-muted">Lade...</td></tr>';
|
||||
try {
|
||||
xScraperCache = await API.get("/api/x-scraper/accounts");
|
||||
renderXScraperAccounts(xScraperCache || []);
|
||||
} catch (err) {
|
||||
tbody.innerHTML = '<tr><td colspan="6" class="text-muted">Fehler: ' + esc(err.message || "") + '</td></tr>';
|
||||
}
|
||||
}
|
||||
|
||||
function renderXScraperAccounts(list) {
|
||||
const tbody = document.getElementById("xScraperTable");
|
||||
const cnt = document.getElementById("xScraperCount");
|
||||
if (cnt) cnt.textContent = list.length + (list.length === 1 ? " Konto" : " Konten");
|
||||
if (!list.length) {
|
||||
tbody.innerHTML = '<tr><td colspan="6" class="text-muted">Keine X-Recherche-Konten. Mit „+ Konto hinzufügen" anlegen.</td></tr>';
|
||||
return;
|
||||
}
|
||||
tbody.innerHTML = list.map((a) => {
|
||||
let status;
|
||||
if (!a.active) status = '<span class="text-muted">Inaktiv</span>';
|
||||
else if (a.locked) status = '<span style="color:var(--warning,#b8860b);">Gesperrt</span>';
|
||||
else status = '<span style="color:var(--success,#2e7d32);">Aktiv</span>';
|
||||
const lastUsed = a.last_used && typeof formatDateTime === "function"
|
||||
? formatDateTime(a.last_used)
|
||||
: (a.last_used || "—");
|
||||
const errInfo = a.error_msg
|
||||
? ' <span class="info-icon" title="' + esc(a.error_msg) + '">!</span>'
|
||||
: "";
|
||||
const u = esc(a.username);
|
||||
const toggleLabel = a.active ? "Deaktivieren" : "Aktivieren";
|
||||
return '<tr>'
|
||||
+ '<td><strong>' + u + '</strong>' + errInfo + '</td>'
|
||||
+ '<td>' + esc(a.email || "—") + '</td>'
|
||||
+ '<td>' + status + '</td>'
|
||||
+ '<td>' + (a.total_requests || 0) + '</td>'
|
||||
+ '<td>' + esc(lastUsed) + '</td>'
|
||||
+ '<td>'
|
||||
+ '<button class="btn btn-secondary btn-small" onclick="openXScraperCookiesModal(\'' + u + '\')">Cookies erneuern</button> '
|
||||
+ '<button class="btn btn-secondary btn-small" onclick="toggleXScraperActive(\'' + u + '\',' + (!a.active) + ')">' + toggleLabel + '</button> '
|
||||
+ '<button class="btn btn-danger btn-small" onclick="confirmDeleteXScraper(\'' + u + '\')">Entfernen</button>'
|
||||
+ '</td>'
|
||||
+ '</tr>';
|
||||
}).join("");
|
||||
}
|
||||
|
||||
function openXScraperAddModal() {
|
||||
document.getElementById("xScraperAddError").style.display = "none";
|
||||
["xsUsername", "xsPassword", "xsEmail", "xsEmailPassword", "xsCookies"].forEach((id) => {
|
||||
const el = document.getElementById(id);
|
||||
if (el) el.value = "";
|
||||
});
|
||||
openModal("modalXScraperAdd");
|
||||
}
|
||||
|
||||
function openXScraperCookiesModal(username) {
|
||||
document.getElementById("xScraperCookiesError").style.display = "none";
|
||||
document.getElementById("xsCookiesUsername").value = username;
|
||||
document.getElementById("xsCookiesValue").value = "";
|
||||
openModal("modalXScraperCookies");
|
||||
}
|
||||
|
||||
async function toggleXScraperActive(username, active) {
|
||||
try {
|
||||
await API.post("/api/x-scraper/accounts/" + encodeURIComponent(username) + "/active", { active: active });
|
||||
showToast("Status geändert.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
showToast(err.message || "Status konnte nicht geändert werden", "error");
|
||||
}
|
||||
}
|
||||
|
||||
function confirmDeleteXScraper(username) {
|
||||
showConfirm(
|
||||
"Konto entfernen",
|
||||
'Soll das X-Recherche-Konto "' + username + '" entfernt werden? Der Monitor nutzt es dann nicht mehr zum Scrapen.',
|
||||
async () => {
|
||||
try {
|
||||
await API.del("/api/x-scraper/accounts/" + encodeURIComponent(username));
|
||||
showToast("Konto entfernt.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
showToast(err.message || "Konto konnte nicht entfernt werden", "error");
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function resetXScraperLocks() {
|
||||
showConfirm(
|
||||
"Sperren zurücksetzen",
|
||||
"Alle temporären Sperren der X-Recherche-Konten zurücksetzen?",
|
||||
async () => {
|
||||
try {
|
||||
await API.post("/api/x-scraper/reset-locks", {});
|
||||
showToast("Sperren zurückgesetzt.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
showToast(err.message || "Sperren konnten nicht zurückgesetzt werden", "error");
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function setupXScraperForms() {
|
||||
const addForm = document.getElementById("xScraperAddForm");
|
||||
if (addForm && !addForm.dataset.wired) {
|
||||
addForm.dataset.wired = "1";
|
||||
addForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const errEl = document.getElementById("xScraperAddError");
|
||||
errEl.style.display = "none";
|
||||
const body = {
|
||||
username: document.getElementById("xsUsername").value.trim().replace(/^@/, ""),
|
||||
password: document.getElementById("xsPassword").value,
|
||||
email: document.getElementById("xsEmail").value.trim(),
|
||||
email_password: document.getElementById("xsEmailPassword").value,
|
||||
cookies: document.getElementById("xsCookies").value.trim(),
|
||||
};
|
||||
if (!body.username || !body.cookies) {
|
||||
errEl.textContent = "Benutzername und Cookies sind erforderlich.";
|
||||
errEl.style.display = "block";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await API.post("/api/x-scraper/accounts", body);
|
||||
closeModal("modalXScraperAdd");
|
||||
showToast("Konto angelegt.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
errEl.textContent = err.message || "Anlegen fehlgeschlagen";
|
||||
errEl.style.display = "block";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const ckForm = document.getElementById("xScraperCookiesForm");
|
||||
if (ckForm && !ckForm.dataset.wired) {
|
||||
ckForm.dataset.wired = "1";
|
||||
ckForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const errEl = document.getElementById("xScraperCookiesError");
|
||||
errEl.style.display = "none";
|
||||
const username = document.getElementById("xsCookiesUsername").value;
|
||||
const cookies = document.getElementById("xsCookiesValue").value.trim();
|
||||
if (!cookies) {
|
||||
errEl.textContent = "Cookies sind erforderlich.";
|
||||
errEl.style.display = "block";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await API.post("/api/x-scraper/accounts/" + encodeURIComponent(username) + "/cookies", { cookies: cookies });
|
||||
closeModal("modalXScraperCookies");
|
||||
showToast("Cookies erneuert.", "success");
|
||||
loadXScraperAccounts();
|
||||
} catch (err) {
|
||||
errEl.textContent = err.message || "Cookies konnten nicht erneuert werden";
|
||||
errEl.style.display = "block";
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
422
src/translation_agent.py
Normale Datei
422
src/translation_agent.py
Normale Datei
@@ -0,0 +1,422 @@
|
||||
"""Translator-Agent: übersetzt fremdsprachige Artikel ins Deutsche.
|
||||
|
||||
Verwaltungs-Adaption des gleichnamigen Monitor-Agents. Nutzt CLAUDE_MODEL_FAST
|
||||
(Haiku) in Batches. Im Verwaltungsportal wird der Translator ausschließlich
|
||||
manuell über den Übersetzungs-Button (routers/translation.py) angestoßen,
|
||||
niemals automatisch.
|
||||
|
||||
Quelle: AegisSight-Monitor/src/agents/translator.py - bei größeren Änderungen
|
||||
am Monitor-Original hier nachziehen. Die Imports weichen bewusst ab
|
||||
(shared.agents.claude_client statt agents.claude_client). Der restliche Code
|
||||
unterhalb ist eine 1:1-Kopie und behält daher den Stil des Originals.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
from shared.agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator
|
||||
from config import CLAUDE_MODEL_FAST
|
||||
|
||||
logger = logging.getLogger("verwaltung.translation")
|
||||
|
||||
# Im Verwaltungsportal gibt es kein automatisches Übersetzen: der Translator
|
||||
# läuft nur, wenn translate_articles() explizit mit enabled=True gerufen wird.
|
||||
# Diese Konstante ist daher der konservative Default für enabled=None.
|
||||
TRANSLATOR_ENABLED = False
|
||||
|
||||
# Pro Batch nicht mehr als so viele Artikel an Claude geben.
|
||||
# Bei Haiku ist das Output-Limit ca. 8k Tokens. Pro Artikel kommen leicht
|
||||
# 400-600 Tokens raus (headline_de + content_de bis 1000 Zeichen). Bei 15
|
||||
# wurde regelmaessig getrunkt (mid-JSON broken). 5 ist sicher mit Reserve.
|
||||
DEFAULT_BATCH_SIZE = 5
|
||||
|
||||
# content_original wird ohnehin auf 1000 Zeichen gecappt (rss_parser).
|
||||
# Fuer den Translator nochmal verkuerzen, falls vorhanden mehr.
|
||||
CONTENT_INPUT_MAX = 1200
|
||||
|
||||
# content_de soll wie content_original auf 1000 Zeichen begrenzt sein.
|
||||
CONTENT_OUTPUT_MAX = 1000
|
||||
|
||||
|
||||
def _extract_complete_objects(text: str) -> list[dict]:
|
||||
"""Extrahiert vollstaendige JSON-Objekte aus moeglicherweise abgeschnittenem Text.
|
||||
|
||||
Klammer-Counter-Ansatz: jedes balancierte {...} wird probiert.
|
||||
"""
|
||||
results = []
|
||||
depth = 0
|
||||
start = -1
|
||||
in_string = False
|
||||
escape = False
|
||||
for i, ch in enumerate(text):
|
||||
if escape:
|
||||
escape = False
|
||||
continue
|
||||
if ch == "\\":
|
||||
escape = True
|
||||
continue
|
||||
if ch == '"' and not escape:
|
||||
in_string = not in_string
|
||||
continue
|
||||
if in_string:
|
||||
continue
|
||||
if ch == "{":
|
||||
if depth == 0:
|
||||
start = i
|
||||
depth += 1
|
||||
elif ch == "}":
|
||||
depth -= 1
|
||||
if depth == 0 and start >= 0:
|
||||
obj_text = text[start:i + 1]
|
||||
try:
|
||||
obj = json.loads(obj_text)
|
||||
if isinstance(obj, dict):
|
||||
results.append(obj)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
start = -1
|
||||
return results
|
||||
|
||||
|
||||
def _build_prompt(articles: list[dict], output_lang: str = "de") -> str:
|
||||
"""Bauen den Translation-Prompt fuer eine Batch."""
|
||||
lang_label = {"de": "Deutsch", "en": "Englisch"}.get(output_lang, output_lang)
|
||||
|
||||
items = []
|
||||
for a in articles:
|
||||
items.append({
|
||||
"id": a["id"],
|
||||
"headline": a.get("headline", "") or "",
|
||||
"content": (a.get("content_original") or "")[:CONTENT_INPUT_MAX],
|
||||
"source_lang": a.get("language", "en"),
|
||||
})
|
||||
|
||||
return f"""Du bist ein praeziser Uebersetzer fuer Nachrichten-Artikel.
|
||||
Uebersetze die folgenden Artikel nach {lang_label}.
|
||||
|
||||
WICHTIG:
|
||||
- Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) - NIEMALS Umschreibungen wie ae, oe, ue, ss.
|
||||
Beispiele: "Gespraeche" -> "Gespräche", "Fuehrer" -> "Führer", "grosse" -> "große".
|
||||
- Behalte Eigennamen (Personen, Orte, Organisationen) im Original.
|
||||
- Headline kurz und buendig wie im Original.
|
||||
- Content auf MAX {CONTENT_OUTPUT_MAX} Zeichen kuerzen, kein HTML, kein Markdown.
|
||||
- Wenn der Artikel schon auf {lang_label} ist (z.B. source_lang="{output_lang}"),
|
||||
kopiere headline und content unveraendert.
|
||||
|
||||
Antworte AUSSCHLIESSLICH mit einem flachen JSON-Array (kein Wrapper-Objekt!).
|
||||
Format genau so:
|
||||
[
|
||||
{{"id": 1, "headline_de": "Titel auf Deutsch", "content_de": "Inhalt auf Deutsch"}},
|
||||
{{"id": 2, "headline_de": "...", "content_de": "..."}}
|
||||
]
|
||||
|
||||
NICHT erlaubt: {{"translations": [...]}} oder {{"items": [...]}} oder Markdown-Codefences.
|
||||
Nur das Array, ohne Einleitung, ohne Erklaerung.
|
||||
|
||||
ARTIKEL:
|
||||
{json.dumps(items, ensure_ascii=False, indent=2)}
|
||||
"""
|
||||
|
||||
|
||||
def _parse_response(text: str) -> list[dict]:
|
||||
"""Robustes JSON-Array-Parsing.
|
||||
|
||||
Handhabt:
|
||||
- reines JSON
|
||||
- JSON in Markdown-Codefence ```json ... ```
|
||||
- abgeschnittene Antworten (extrahiert vollstaendige Top-Level-Objekte)
|
||||
"""
|
||||
text = text.strip()
|
||||
# Markdown-Codefence entfernen
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```\s*$", "", text)
|
||||
text = text.strip()
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
# Erst Array versuchen
|
||||
match = re.search(r"\[.*\]", text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group(0))
|
||||
except json.JSONDecodeError:
|
||||
# Truncate-Fallback: einzelne Top-Level-Objekte extrahieren
|
||||
data = _extract_complete_objects(text)
|
||||
else:
|
||||
data = _extract_complete_objects(text)
|
||||
|
||||
# Claude wraps das Array gelegentlich in {"translations": [...]} oder {"items": [...]}
|
||||
if isinstance(data, dict):
|
||||
for key in ("translations", "items", "results", "data"):
|
||||
if isinstance(data.get(key), list):
|
||||
data = data[key]
|
||||
break
|
||||
else:
|
||||
# Einzelnes Objekt? Dann als Liste mit einem Element behandeln
|
||||
if "id" in data:
|
||||
data = [data]
|
||||
else:
|
||||
raise ValueError(f"Translator-Antwort: Dict ohne erwarteten Array-Key (keys={list(data.keys())[:5]})")
|
||||
|
||||
if not isinstance(data, list):
|
||||
raise ValueError(f"Translator-Antwort ist kein Array: {type(data).__name__}")
|
||||
|
||||
cleaned = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
aid = item.get("id")
|
||||
if not isinstance(aid, int):
|
||||
try:
|
||||
aid = int(aid)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
cleaned.append({
|
||||
"id": aid,
|
||||
"headline_de": (item.get("headline_de") or "").strip() or None,
|
||||
"content_de": (item.get("content_de") or "").strip() or None,
|
||||
})
|
||||
return cleaned
|
||||
|
||||
|
||||
async def translate_articles_batch(
|
||||
articles: list[dict],
|
||||
output_lang: str = "de",
|
||||
) -> tuple[list[dict], ClaudeUsage]:
|
||||
"""Uebersetzt eine Batch von Artikeln.
|
||||
|
||||
Erwartet articles als Liste von Dicts mit den Feldern id, headline,
|
||||
content_original, language.
|
||||
|
||||
Rueckgabe: (uebersetzte_artikel, usage)
|
||||
Wenn der Call fehlschlaegt, wird ([], leere_usage) zurueckgegeben - der
|
||||
Caller kann entscheiden, ob retry oder skip.
|
||||
"""
|
||||
if not articles:
|
||||
return [], ClaudeUsage()
|
||||
|
||||
prompt = _build_prompt(articles, output_lang)
|
||||
|
||||
try:
|
||||
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
except Exception as e:
|
||||
logger.error(f"Translator Claude-Call fehlgeschlagen: {e}")
|
||||
return [], ClaudeUsage()
|
||||
|
||||
try:
|
||||
translations = _parse_response(result_text)
|
||||
except Exception as e:
|
||||
logger.error(f"Translator JSON-Parsing fehlgeschlagen: {e}; raw: {result_text[:300]!r}")
|
||||
return [], usage
|
||||
|
||||
# Validierung: nur Translations zurueckgeben, deren id wirklich
|
||||
# in der angefragten Batch war
|
||||
requested_ids = {a["id"] for a in articles}
|
||||
valid = [t for t in translations if t["id"] in requested_ids]
|
||||
if len(valid) != len(translations):
|
||||
logger.warning(
|
||||
"Translator: %d von %d Translations referenzieren unbekannte IDs",
|
||||
len(translations) - len(valid), len(translations),
|
||||
)
|
||||
return valid, usage
|
||||
|
||||
|
||||
# --- Pre-Topic-Filter: schmale Headline-Übersetzung -----------------------------
|
||||
#
|
||||
# Der Topic-Filter (analyzer.filter_relevant_articles) ist ein Haiku-Call, der pro
|
||||
# Artikel beurteilt, ob er thematisch zur Lage passt. Bei fremdsprachigen Headlines
|
||||
# (CJK/Arabisch/Hebräisch/Kyrillisch) bewertet Haiku konservativ und verwirft sie
|
||||
# häufig, weil er sie nur halb versteht. Damit landeten z.B. die japanischen
|
||||
# Ministeriums-Feeds (MOD, NHK, Asahi) in Lagen mit Japan-Bezug nie in der finalen
|
||||
# Auswahl, obwohl der RSS-Match korrekt griff.
|
||||
#
|
||||
# Diese Funktion übersetzt einen einzelnen Batch-Call alle nicht-lateinischen
|
||||
# Headlines + erste Content-Sätze ins Englische und hängt das Ergebnis als
|
||||
# article["headline_en_for_topic"] / article["content_en_for_topic"] an. Der
|
||||
# Topic-Filter zeigt das dem LLM zusätzlich zum Original.
|
||||
#
|
||||
# WICHTIG: Diese Mini-Übersetzung ist UNABHÄNGIG vom TRANSLATOR_ENABLED-Flag —
|
||||
# sie wird auch dann gemacht, wenn der nachgelagerte Volltext-Translator
|
||||
# deaktiviert ist (Pflicht für korrektes Topic-Filtering, sehr kleine Kosten).
|
||||
|
||||
_TOPIC_TRANSLATE_CONTENT_MAX = 500
|
||||
|
||||
|
||||
def _needs_pretopic_translate(article: dict) -> bool:
|
||||
"""Erkennt fremdsprachige Headlines, die für den Topic-Filter übersetzt
|
||||
werden sollten.
|
||||
|
||||
Heuristik: Headline enthält Non-ASCII-Zeichen, die NICHT in den typischen
|
||||
deutsch/franz./span./port./skand. Latin-1-Erweiterungen liegen.
|
||||
Das sind v.a. CJK (Kanji/Kana/Hangul), Arabisch, Hebräisch, Kyrillisch,
|
||||
Thai, Devanagari etc.
|
||||
"""
|
||||
headline = (article.get("headline_de") or article.get("headline") or "").strip()
|
||||
if not headline:
|
||||
return False
|
||||
for ch in headline:
|
||||
cp = ord(ch)
|
||||
# Bereiche ausschließen, die in Latin-Schrift normal sind:
|
||||
# ASCII (0-127), Latin-1 Supplement (128-255), Latin Extended-A/B (256-591)
|
||||
if cp <= 591:
|
||||
continue
|
||||
# Alles darüber sind fremde Schriftsysteme → übersetzen
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def translate_headlines_for_topic_filter(
|
||||
articles: list[dict],
|
||||
target_lang: str = "en",
|
||||
) -> tuple[int, ClaudeUsage]:
|
||||
"""Übersetzt die Headlines fremdsprachiger Artikel ins Englische, damit der
|
||||
nachgelagerte Topic-Filter (Haiku) sie zuverlässig beurteilen kann.
|
||||
|
||||
Setzt direkt auf den Artikel-Dicts:
|
||||
article["headline_en_for_topic"]: str | None
|
||||
article["content_en_for_topic"]: str | None
|
||||
|
||||
Returns:
|
||||
(anzahl_übersetzt, ClaudeUsage)
|
||||
"""
|
||||
if not articles:
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
candidates = [a for a in articles if _needs_pretopic_translate(a)]
|
||||
if not candidates:
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
# Eindeutige Indizes (auch wenn article kein "id"-Feld hat, weil noch nicht
|
||||
# in der DB): wir nutzen die Position in der gesamten articles-Liste.
|
||||
idx_by_obj = {id(a): i for i, a in enumerate(articles)}
|
||||
|
||||
items = []
|
||||
for a in candidates:
|
||||
idx = idx_by_obj.get(id(a))
|
||||
if idx is None:
|
||||
continue
|
||||
headline = (a.get("headline_de") or a.get("headline") or "").strip()
|
||||
content_src = (a.get("content_de") or a.get("content_original") or "")
|
||||
items.append({
|
||||
"i": idx,
|
||||
"h": headline[:200],
|
||||
"c": content_src[:_TOPIC_TRANSLATE_CONTENT_MAX],
|
||||
})
|
||||
|
||||
if not items:
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
lang_label = {"en": "English", "de": "German"}.get(target_lang, target_lang)
|
||||
prompt = f"""Translate these news headlines and short content snippets to {lang_label}.
|
||||
Keep proper names (people, organizations, places) untouched. Keep it concise; the goal
|
||||
is to let another model judge topical relevance, not to publish.
|
||||
|
||||
Return ONLY a JSON array. Each item: {{"i": <index>, "h": <headline in {lang_label}>, "c": <content snippet in {lang_label}>}}.
|
||||
Keep the same "i" values. No prose, no markdown fences.
|
||||
|
||||
INPUT:
|
||||
{json.dumps(items, ensure_ascii=False)}
|
||||
"""
|
||||
|
||||
try:
|
||||
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||
except Exception as e:
|
||||
logger.warning(f"Pre-Topic-Translate Claude-Call fehlgeschlagen: {e}")
|
||||
return 0, ClaudeUsage()
|
||||
|
||||
# Robustes Parsing (Markdown-Codefence + nacktes Array)
|
||||
text = result_text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```\s*$", "", text)
|
||||
text = text.strip()
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
m = re.search(r"\[.*\]", text, re.DOTALL)
|
||||
if not m:
|
||||
logger.warning(
|
||||
f"Pre-Topic-Translate: kein JSON-Array in Antwort. Sample: {text[:200]!r}"
|
||||
)
|
||||
return 0, usage
|
||||
try:
|
||||
data = json.loads(m.group(0))
|
||||
except json.JSONDecodeError:
|
||||
data = _extract_complete_objects(text)
|
||||
|
||||
if not isinstance(data, list):
|
||||
logger.warning(
|
||||
f"Pre-Topic-Translate: Antwort ist kein Array ({type(data).__name__})"
|
||||
)
|
||||
return 0, usage
|
||||
|
||||
applied = 0
|
||||
for entry in data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
idx = entry.get("i")
|
||||
if not isinstance(idx, int) or not (0 <= idx < len(articles)):
|
||||
try:
|
||||
idx = int(idx)
|
||||
if not (0 <= idx < len(articles)):
|
||||
continue
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
h = (entry.get("h") or "").strip() or None
|
||||
c = (entry.get("c") or "").strip() or None
|
||||
if h:
|
||||
articles[idx]["headline_en_for_topic"] = h
|
||||
if c:
|
||||
articles[idx]["content_en_for_topic"] = c
|
||||
if h or c:
|
||||
applied += 1
|
||||
|
||||
return applied, usage
|
||||
|
||||
|
||||
async def translate_articles(
|
||||
articles: list[dict],
|
||||
output_lang: str = "de",
|
||||
batch_size: int = DEFAULT_BATCH_SIZE,
|
||||
usage_accumulator: UsageAccumulator | None = None,
|
||||
enabled: bool | None = None,
|
||||
) -> list[dict]:
|
||||
"""Uebersetzt eine beliebige Anzahl Artikel in Batches.
|
||||
|
||||
Bringt die Batches durch Logik in `translate_articles_batch` und gibt
|
||||
EINE flache Liste der Translations zurueck. Wenn ein Batch fehlschlaegt,
|
||||
wird er uebersprungen (anderer Batches laufen weiter).
|
||||
|
||||
enabled: Pro-Aufruf-Override des globalen TRANSLATOR_ENABLED-Flags. Wenn None,
|
||||
greift das Modul-Default (config.TRANSLATOR_ENABLED, abgeleitet aus .env).
|
||||
Der Orchestrator setzt das aus dem Org-Setting 'translator_enabled', damit
|
||||
jp_demo (Translator zwingend an) trotz global deaktiviertem Flag funktioniert.
|
||||
"""
|
||||
if not articles:
|
||||
return []
|
||||
|
||||
is_enabled = TRANSLATOR_ENABLED if enabled is None else bool(enabled)
|
||||
if not is_enabled:
|
||||
logger.info(
|
||||
"Translator deaktiviert (enabled=%s, global TRANSLATOR_ENABLED=%s), %d Artikel uebersprungen",
|
||||
enabled, TRANSLATOR_ENABLED, len(articles),
|
||||
)
|
||||
return []
|
||||
|
||||
all_translations = []
|
||||
for i in range(0, len(articles), batch_size):
|
||||
batch = articles[i : i + batch_size]
|
||||
translations, usage = await translate_articles_batch(batch, output_lang)
|
||||
if usage_accumulator is not None:
|
||||
usage_accumulator.add(usage)
|
||||
all_translations.extend(translations)
|
||||
logger.info(
|
||||
"Translator-Batch %d/%d: %d/%d uebersetzt (cost=$%.4f)",
|
||||
(i // batch_size) + 1,
|
||||
(len(articles) + batch_size - 1) // batch_size,
|
||||
len(translations), len(batch),
|
||||
usage.cost_usd,
|
||||
)
|
||||
return all_translations
|
||||
@@ -10,7 +10,7 @@ def test_main_app_imports():
|
||||
def test_all_routers_importable():
|
||||
"""Bei Syntax-Fehlern in einem Router crasht das Ganze - hier fangen wir das ab."""
|
||||
for mod in ("auth", "organizations", "licenses", "users",
|
||||
"dashboard", "sources", "token_usage", "audit"):
|
||||
"dashboard", "sources", "token_usage", "audit", "translation"):
|
||||
m = importlib.import_module(f"routers.{mod}")
|
||||
assert hasattr(m, "router"), f"routers/{mod} hat keinen router-Objekt"
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren