From 8c75a706557519e078588bdf16b7715061c7710b Mon Sep 17 00:00:00 2001 From: claude-dev Date: Fri, 22 May 2026 11:22:35 +0000 Subject: [PATCH] feat(x-scraper): X-Recherche-Konten im Verwaltungsportal verwalten Neuer Sub-Tab "X-Recherche-Konten" unter Quellen: die X-Login-Konten, mit denen der Monitor bei X scrapt (twscrape-Account-Pool), anzeigen, hinzufuegen, Cookies erneuern, aktiv/inaktiv schalten, entfernen, plus Sperren-Reset. - neuer Router x_scraper.py, verwaltet den twscrape-Pool ueber dessen API - X_ACCOUNTS_DB_PATH in config.py - twscrape als Abhaengigkeit (git-main-Pin) - Sub-Tab, Tabelle und zwei Modals in dashboard.html, Logik in x-scraper.js Co-Authored-By: Claude Opus 4.7 (1M context) --- requirements.txt | 2 + src/config.py | 4 + src/main.py | 3 +- src/routers/x_scraper.py | 224 +++++++++++++++++++++++++++++++++++++ src/static/dashboard.html | 103 ++++++++++++++++- src/static/js/sources.js | 1 + src/static/js/x-scraper.js | 169 ++++++++++++++++++++++++++++ 7 files changed, 504 insertions(+), 2 deletions(-) create mode 100644 src/routers/x_scraper.py create mode 100644 src/static/js/x-scraper.js diff --git a/requirements.txt b/requirements.txt index 10fa183..aac9653 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ httpx>=0.28 feedparser>=6.0 # PDF-Upload-Validierung pypdf>=5.0 +# X-Scraper-Konten-Verwaltung (twscrape-Account-Pool) +twscrape @ git+https://github.com/vladkens/twscrape.git@206f0942fe41149da28530399f7c772ec00be17a diff --git a/src/config.py b/src/config.py index 4027ebd..0c10f73 100644 --- a/src/config.py +++ b/src/config.py @@ -8,6 +8,10 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") # Gemeinsame Datenbank (gleiche wie OSINT-Monitor) DB_PATH = os.environ.get("DB_PATH", "/mnt/gitea/osint-data/osint.db") +# twscrape-Account-Store: die X-Login-Konten, mit denen der Monitor bei X +# recherchiert. Geteilt mit dem Monitor (gleicher Pfad-Default). +X_ACCOUNTS_DB_PATH = os.environ.get("X_ACCOUNTS_DB_PATH", "/home/claude-dev/.x-scraper/accounts.db") + # JWT (eigener Secret fuer Verwaltungsportal) JWT_SECRET = os.environ.get("PORTAL_JWT_SECRET") if not JWT_SECRET: diff --git a/src/main.py b/src/main.py index db6134c..84674c3 100644 --- a/src/main.py +++ b/src/main.py @@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from config import STATIC_DIR, PORT -from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit, translation +from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit, translation, x_scraper logging.basicConfig( level=logging.INFO, @@ -43,6 +43,7 @@ app.include_router(sources.router) app.include_router(token_usage.router) app.include_router(audit.router) app.include_router(translation.router) +app.include_router(x_scraper.router) # --- Statische Dateien --- app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") diff --git a/src/routers/x_scraper.py b/src/routers/x_scraper.py new file mode 100644 index 0000000..a38887a --- /dev/null +++ b/src/routers/x_scraper.py @@ -0,0 +1,224 @@ +"""X-Scraper-Konten: Verwaltung des twscrape-Account-Pools. + +Das sind die X-Login-Konten, mit denen der Monitor bei X recherchiert +(scrapen). Sie liegen im twscrape-Account-Store (config.X_ACCOUNTS_DB_PATH), +nicht in der Verwaltungs-Datenbank. twscrape wird lazy importiert, damit das +Portal auch ohne installiertes twscrape startet. +""" +import logging +import os +from datetime import datetime, timezone +from typing import Optional + +import aiosqlite +from fastapi import APIRouter, Depends, HTTPException, Request +from pydantic import BaseModel, Field + +from auth import get_current_admin +from audit import log_action, get_client_ip +from config import X_ACCOUNTS_DB_PATH +from database import db_dependency + +logger = logging.getLogger("verwaltung.x_scraper") + +router = APIRouter(prefix="/api/x-scraper", tags=["x-scraper"]) + + +def _get_pool(): + """twscrape-AccountsPool oeffnen. Wirft HTTPException wenn nicht verfuegbar.""" + try: + os.makedirs(os.path.dirname(X_ACCOUNTS_DB_PATH), exist_ok=True) + except Exception: + pass + try: + from twscrape import API + except ImportError: + raise HTTPException(status_code=503, detail="twscrape ist nicht installiert") + return API(X_ACCOUNTS_DB_PATH).pool + + +def _summary(acc) -> dict: + """Account-Objekt auf ein anzeigbares Dict reduzieren -- ohne Geheimnisse.""" + now = datetime.now(timezone.utc) + locked = False + locked_until = None + for ts in (acc.locks or {}).values(): + if ts and ts > now: + locked = True + if locked_until is None or ts > locked_until: + locked_until = ts + return { + "username": acc.username, + "email": acc.email if acc.email and acc.email != "_" else None, + "active": bool(acc.active), + "locked": locked, + "locked_until": locked_until.isoformat() if locked_until else None, + "has_cookies": bool(acc.cookies), + "total_requests": sum((acc.stats or {}).values()), + "last_used": acc.last_used.isoformat() if acc.last_used else None, + "error_msg": acc.error_msg or None, + } + + +class XScraperCreate(BaseModel): + username: str = Field(min_length=1, max_length=100) + password: str = Field(default="", max_length=200) + email: str = Field(default="", max_length=200) + email_password: str = Field(default="", max_length=200) + cookies: str = Field(min_length=1, max_length=4000) + + +class XScraperCookies(BaseModel): + cookies: str = Field(min_length=1, max_length=4000) + + +class XScraperActive(BaseModel): + active: bool + + +@router.get("/accounts") +async def list_accounts(admin: dict = Depends(get_current_admin)): + """Alle X-Scraper-Konten auflisten (ohne Passwoerter/Cookies).""" + pool = _get_pool() + try: + accounts = await pool.get_all() + except Exception as e: + logger.error("X-Scraper get_all fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Konten konnten nicht geladen werden") + return [_summary(a) for a in accounts] + + +@router.post("/accounts", status_code=201) +async def add_account( + data: XScraperCreate, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Neues X-Scraper-Konto anlegen.""" + pool = _get_pool() + username = data.username.strip().lstrip("@") + if not username: + raise HTTPException(status_code=422, detail="Benutzername ist erforderlich") + if await pool.get_account(username) is not None: + raise HTTPException(status_code=409, detail=f"Konto '{username}' existiert bereits") + try: + await pool.add_account( + username=username, + password=data.password or "_", + email=data.email or "_", + email_password=data.email_password or "_", + cookies=data.cookies.strip(), + ) + except Exception as e: + logger.error("X-Scraper add_account fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Konto konnte nicht angelegt werden") + acc = await pool.get_account(username) + if acc is None: + raise HTTPException(status_code=500, detail="Konto wurde nicht gespeichert, bitte Cookies pruefen") + await log_action( + db, admin, get_client_ip(request), action="create", + resource_type="x_scraper_account", after={"username": username, "email": data.email}, + ) + return _summary(acc) + + +@router.post("/accounts/{username}/cookies") +async def refresh_cookies( + username: str, + data: XScraperCookies, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Cookies eines bestehenden Kontos erneuern (Login auffrischen).""" + pool = _get_pool() + acc = await pool.get_account(username) + if acc is None: + raise HTTPException(status_code=404, detail="Konto nicht gefunden") + # twscrape hat keine Update-Methode -- Konto mit frischen Cookies neu anlegen. + pw, em, emp = acc.password, acc.email, acc.email_password + try: + await pool.delete_accounts([username]) + await pool.add_account( + username=username, password=pw, email=em, + email_password=emp, cookies=data.cookies.strip(), + ) + except Exception as e: + logger.error("X-Scraper Cookie-Refresh fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Cookies konnten nicht erneuert werden") + acc = await pool.get_account(username) + if acc is None: + raise HTTPException(status_code=500, detail="Konto nach Cookie-Refresh nicht gefunden") + await log_action( + db, admin, get_client_ip(request), action="update", + resource_type="x_scraper_account", after={"username": username, "change": "cookies"}, + ) + return _summary(acc) + + +@router.post("/accounts/{username}/active") +async def set_active( + username: str, + data: XScraperActive, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Konto aktiv oder inaktiv schalten.""" + pool = _get_pool() + if await pool.get_account(username) is None: + raise HTTPException(status_code=404, detail="Konto nicht gefunden") + try: + await pool.set_active(username, data.active) + except Exception as e: + logger.error("X-Scraper set_active fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Status konnte nicht geaendert werden") + await log_action( + db, admin, get_client_ip(request), action="update", + resource_type="x_scraper_account", after={"username": username, "active": data.active}, + ) + acc = await pool.get_account(username) + return _summary(acc) + + +@router.delete("/accounts/{username}", status_code=204) +async def delete_account( + username: str, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """X-Scraper-Konto entfernen.""" + pool = _get_pool() + if await pool.get_account(username) is None: + raise HTTPException(status_code=404, detail="Konto nicht gefunden") + try: + await pool.delete_accounts([username]) + except Exception as e: + logger.error("X-Scraper delete fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Konto konnte nicht entfernt werden") + await log_action( + db, admin, get_client_ip(request), action="delete", + resource_type="x_scraper_account", before={"username": username}, + ) + + +@router.post("/reset-locks") +async def reset_locks( + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Alle temporaeren Sperren der Konten zuruecksetzen.""" + pool = _get_pool() + try: + await pool.reset_locks() + except Exception as e: + logger.error("X-Scraper reset_locks fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Sperren konnten nicht zurueckgesetzt werden") + await log_action( + db, admin, get_client_ip(request), action="update", + resource_type="x_scraper_account", after={"change": "reset_locks"}, + ) + return {"status": "ok"} diff --git a/src/static/dashboard.html b/src/static/dashboard.html index b4813a3..b1fe1e8 100644 --- a/src/static/dashboard.html +++ b/src/static/dashboard.html @@ -329,6 +329,7 @@ + @@ -471,6 +472,37 @@ + +
+
+
+ +
+
+ + +
+
+
+

X-Login-Konten, mit denen der Monitor bei X recherchiert. Mehr Konten bedeuten paralleleres, schnelleres Scrapen. Cookies laufen periodisch ab und müssen dann erneuert werden.

+
+ + + + + + + + + + + + +
BenutzernameE-MailStatusAnfragenLetzte NutzungAktionen
+
+
+
+ @@ -938,8 +970,77 @@ + + + + + + - + +
diff --git a/src/static/js/sources.js b/src/static/js/sources.js index 7c51850..a25cab0 100644 --- a/src/static/js/sources.js +++ b/src/static/js/sources.js @@ -38,6 +38,7 @@ function setupSourceSubTabs() { else if (subtab === "tenant-sources") loadTenantSources(); else if (subtab === "source-health") loadHealthData(); else if (subtab === "classification-review") loadClassificationQueue(); + else if (subtab === "x-scraper") loadXScraperAccounts(); }); }); } diff --git a/src/static/js/x-scraper.js b/src/static/js/x-scraper.js new file mode 100644 index 0000000..deb2bf7 --- /dev/null +++ b/src/static/js/x-scraper.js @@ -0,0 +1,169 @@ +/* X-Recherche-Konten: Verwaltung des twscrape-Account-Pools */ +"use strict"; + +let xScraperCache = []; + +async function loadXScraperAccounts() { + setupXScraperForms(); + const tbody = document.getElementById("xScraperTable"); + tbody.innerHTML = 'Lade...'; + try { + xScraperCache = await API.get("/api/x-scraper/accounts"); + renderXScraperAccounts(xScraperCache || []); + } catch (err) { + tbody.innerHTML = 'Fehler: ' + esc(err.message || "") + ''; + } +} + +function renderXScraperAccounts(list) { + const tbody = document.getElementById("xScraperTable"); + const cnt = document.getElementById("xScraperCount"); + if (cnt) cnt.textContent = list.length + (list.length === 1 ? " Konto" : " Konten"); + if (!list.length) { + tbody.innerHTML = 'Keine X-Recherche-Konten. Mit „+ Konto hinzufügen" anlegen.'; + return; + } + tbody.innerHTML = list.map((a) => { + let status; + if (!a.active) status = 'Inaktiv'; + else if (a.locked) status = 'Gesperrt'; + else status = 'Aktiv'; + const lastUsed = a.last_used && typeof formatDateTime === "function" + ? formatDateTime(a.last_used) + : (a.last_used || "—"); + const errInfo = a.error_msg + ? ' !' + : ""; + const u = esc(a.username); + const toggleLabel = a.active ? "Deaktivieren" : "Aktivieren"; + return '' + + '' + u + '' + errInfo + '' + + '' + esc(a.email || "—") + '' + + '' + status + '' + + '' + (a.total_requests || 0) + '' + + '' + esc(lastUsed) + '' + + '' + + ' ' + + ' ' + + '' + + '' + + ''; + }).join(""); +} + +function openXScraperAddModal() { + document.getElementById("xScraperAddError").style.display = "none"; + ["xsUsername", "xsPassword", "xsEmail", "xsEmailPassword", "xsCookies"].forEach((id) => { + const el = document.getElementById(id); + if (el) el.value = ""; + }); + openModal("modalXScraperAdd"); +} + +function openXScraperCookiesModal(username) { + document.getElementById("xScraperCookiesError").style.display = "none"; + document.getElementById("xsCookiesUsername").value = username; + document.getElementById("xsCookiesValue").value = ""; + openModal("modalXScraperCookies"); +} + +async function toggleXScraperActive(username, active) { + try { + await API.post("/api/x-scraper/accounts/" + encodeURIComponent(username) + "/active", { active: active }); + showToast("Status geändert.", "success"); + loadXScraperAccounts(); + } catch (err) { + showToast(err.message || "Status konnte nicht geändert werden", "error"); + } +} + +function confirmDeleteXScraper(username) { + showConfirm( + "Konto entfernen", + 'Soll das X-Recherche-Konto "' + username + '" entfernt werden? Der Monitor nutzt es dann nicht mehr zum Scrapen.', + async () => { + try { + await API.del("/api/x-scraper/accounts/" + encodeURIComponent(username)); + showToast("Konto entfernt.", "success"); + loadXScraperAccounts(); + } catch (err) { + showToast(err.message || "Konto konnte nicht entfernt werden", "error"); + } + } + ); +} + +function resetXScraperLocks() { + showConfirm( + "Sperren zurücksetzen", + "Alle temporären Sperren der X-Recherche-Konten zurücksetzen?", + async () => { + try { + await API.post("/api/x-scraper/reset-locks", {}); + showToast("Sperren zurückgesetzt.", "success"); + loadXScraperAccounts(); + } catch (err) { + showToast(err.message || "Sperren konnten nicht zurückgesetzt werden", "error"); + } + } + ); +} + +function setupXScraperForms() { + const addForm = document.getElementById("xScraperAddForm"); + if (addForm && !addForm.dataset.wired) { + addForm.dataset.wired = "1"; + addForm.addEventListener("submit", async (e) => { + e.preventDefault(); + const errEl = document.getElementById("xScraperAddError"); + errEl.style.display = "none"; + const body = { + username: document.getElementById("xsUsername").value.trim().replace(/^@/, ""), + password: document.getElementById("xsPassword").value, + email: document.getElementById("xsEmail").value.trim(), + email_password: document.getElementById("xsEmailPassword").value, + cookies: document.getElementById("xsCookies").value.trim(), + }; + if (!body.username || !body.cookies) { + errEl.textContent = "Benutzername und Cookies sind erforderlich."; + errEl.style.display = "block"; + return; + } + try { + await API.post("/api/x-scraper/accounts", body); + closeModal("modalXScraperAdd"); + showToast("Konto angelegt.", "success"); + loadXScraperAccounts(); + } catch (err) { + errEl.textContent = err.message || "Anlegen fehlgeschlagen"; + errEl.style.display = "block"; + } + }); + } + + const ckForm = document.getElementById("xScraperCookiesForm"); + if (ckForm && !ckForm.dataset.wired) { + ckForm.dataset.wired = "1"; + ckForm.addEventListener("submit", async (e) => { + e.preventDefault(); + const errEl = document.getElementById("xScraperCookiesError"); + errEl.style.display = "none"; + const username = document.getElementById("xsCookiesUsername").value; + const cookies = document.getElementById("xsCookiesValue").value.trim(); + if (!cookies) { + errEl.textContent = "Cookies sind erforderlich."; + errEl.style.display = "block"; + return; + } + try { + await API.post("/api/x-scraper/accounts/" + encodeURIComponent(username) + "/cookies", { cookies: cookies }); + closeModal("modalXScraperCookies"); + showToast("Cookies erneuert.", "success"); + loadXScraperAccounts(); + } catch (err) { + errEl.textContent = err.message || "Cookies konnten nicht erneuert werden"; + errEl.style.display = "block"; + } + }); + } +}