diff --git a/requirements.txt b/requirements.txt index 10fa183..aac9653 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ httpx>=0.28 feedparser>=6.0 # PDF-Upload-Validierung pypdf>=5.0 +# X-Scraper-Konten-Verwaltung (twscrape-Account-Pool) +twscrape @ git+https://github.com/vladkens/twscrape.git@206f0942fe41149da28530399f7c772ec00be17a diff --git a/src/config.py b/src/config.py index 4027ebd..0c10f73 100644 --- a/src/config.py +++ b/src/config.py @@ -8,6 +8,10 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") # Gemeinsame Datenbank (gleiche wie OSINT-Monitor) DB_PATH = os.environ.get("DB_PATH", "/mnt/gitea/osint-data/osint.db") +# twscrape-Account-Store: die X-Login-Konten, mit denen der Monitor bei X +# recherchiert. Geteilt mit dem Monitor (gleicher Pfad-Default). +X_ACCOUNTS_DB_PATH = os.environ.get("X_ACCOUNTS_DB_PATH", "/home/claude-dev/.x-scraper/accounts.db") + # JWT (eigener Secret fuer Verwaltungsportal) JWT_SECRET = os.environ.get("PORTAL_JWT_SECRET") if not JWT_SECRET: diff --git a/src/main.py b/src/main.py index db6134c..84674c3 100644 --- a/src/main.py +++ b/src/main.py @@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from config import STATIC_DIR, PORT -from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit, translation +from routers import auth, organizations, licenses, users, dashboard, sources, token_usage, audit, translation, x_scraper logging.basicConfig( level=logging.INFO, @@ -43,6 +43,7 @@ app.include_router(sources.router) app.include_router(token_usage.router) app.include_router(audit.router) app.include_router(translation.router) +app.include_router(x_scraper.router) # --- Statische Dateien --- app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") diff --git a/src/routers/x_scraper.py b/src/routers/x_scraper.py new file mode 100644 index 0000000..a38887a --- /dev/null +++ b/src/routers/x_scraper.py @@ -0,0 +1,224 @@ +"""X-Scraper-Konten: Verwaltung des twscrape-Account-Pools. + +Das sind die X-Login-Konten, mit denen der Monitor bei X recherchiert +(scrapen). Sie liegen im twscrape-Account-Store (config.X_ACCOUNTS_DB_PATH), +nicht in der Verwaltungs-Datenbank. twscrape wird lazy importiert, damit das +Portal auch ohne installiertes twscrape startet. +""" +import logging +import os +from datetime import datetime, timezone +from typing import Optional + +import aiosqlite +from fastapi import APIRouter, Depends, HTTPException, Request +from pydantic import BaseModel, Field + +from auth import get_current_admin +from audit import log_action, get_client_ip +from config import X_ACCOUNTS_DB_PATH +from database import db_dependency + +logger = logging.getLogger("verwaltung.x_scraper") + +router = APIRouter(prefix="/api/x-scraper", tags=["x-scraper"]) + + +def _get_pool(): + """twscrape-AccountsPool oeffnen. Wirft HTTPException wenn nicht verfuegbar.""" + try: + os.makedirs(os.path.dirname(X_ACCOUNTS_DB_PATH), exist_ok=True) + except Exception: + pass + try: + from twscrape import API + except ImportError: + raise HTTPException(status_code=503, detail="twscrape ist nicht installiert") + return API(X_ACCOUNTS_DB_PATH).pool + + +def _summary(acc) -> dict: + """Account-Objekt auf ein anzeigbares Dict reduzieren -- ohne Geheimnisse.""" + now = datetime.now(timezone.utc) + locked = False + locked_until = None + for ts in (acc.locks or {}).values(): + if ts and ts > now: + locked = True + if locked_until is None or ts > locked_until: + locked_until = ts + return { + "username": acc.username, + "email": acc.email if acc.email and acc.email != "_" else None, + "active": bool(acc.active), + "locked": locked, + "locked_until": locked_until.isoformat() if locked_until else None, + "has_cookies": bool(acc.cookies), + "total_requests": sum((acc.stats or {}).values()), + "last_used": acc.last_used.isoformat() if acc.last_used else None, + "error_msg": acc.error_msg or None, + } + + +class XScraperCreate(BaseModel): + username: str = Field(min_length=1, max_length=100) + password: str = Field(default="", max_length=200) + email: str = Field(default="", max_length=200) + email_password: str = Field(default="", max_length=200) + cookies: str = Field(min_length=1, max_length=4000) + + +class XScraperCookies(BaseModel): + cookies: str = Field(min_length=1, max_length=4000) + + +class XScraperActive(BaseModel): + active: bool + + +@router.get("/accounts") +async def list_accounts(admin: dict = Depends(get_current_admin)): + """Alle X-Scraper-Konten auflisten (ohne Passwoerter/Cookies).""" + pool = _get_pool() + try: + accounts = await pool.get_all() + except Exception as e: + logger.error("X-Scraper get_all fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Konten konnten nicht geladen werden") + return [_summary(a) for a in accounts] + + +@router.post("/accounts", status_code=201) +async def add_account( + data: XScraperCreate, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Neues X-Scraper-Konto anlegen.""" + pool = _get_pool() + username = data.username.strip().lstrip("@") + if not username: + raise HTTPException(status_code=422, detail="Benutzername ist erforderlich") + if await pool.get_account(username) is not None: + raise HTTPException(status_code=409, detail=f"Konto '{username}' existiert bereits") + try: + await pool.add_account( + username=username, + password=data.password or "_", + email=data.email or "_", + email_password=data.email_password or "_", + cookies=data.cookies.strip(), + ) + except Exception as e: + logger.error("X-Scraper add_account fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Konto konnte nicht angelegt werden") + acc = await pool.get_account(username) + if acc is None: + raise HTTPException(status_code=500, detail="Konto wurde nicht gespeichert, bitte Cookies pruefen") + await log_action( + db, admin, get_client_ip(request), action="create", + resource_type="x_scraper_account", after={"username": username, "email": data.email}, + ) + return _summary(acc) + + +@router.post("/accounts/{username}/cookies") +async def refresh_cookies( + username: str, + data: XScraperCookies, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Cookies eines bestehenden Kontos erneuern (Login auffrischen).""" + pool = _get_pool() + acc = await pool.get_account(username) + if acc is None: + raise HTTPException(status_code=404, detail="Konto nicht gefunden") + # twscrape hat keine Update-Methode -- Konto mit frischen Cookies neu anlegen. + pw, em, emp = acc.password, acc.email, acc.email_password + try: + await pool.delete_accounts([username]) + await pool.add_account( + username=username, password=pw, email=em, + email_password=emp, cookies=data.cookies.strip(), + ) + except Exception as e: + logger.error("X-Scraper Cookie-Refresh fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Cookies konnten nicht erneuert werden") + acc = await pool.get_account(username) + if acc is None: + raise HTTPException(status_code=500, detail="Konto nach Cookie-Refresh nicht gefunden") + await log_action( + db, admin, get_client_ip(request), action="update", + resource_type="x_scraper_account", after={"username": username, "change": "cookies"}, + ) + return _summary(acc) + + +@router.post("/accounts/{username}/active") +async def set_active( + username: str, + data: XScraperActive, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Konto aktiv oder inaktiv schalten.""" + pool = _get_pool() + if await pool.get_account(username) is None: + raise HTTPException(status_code=404, detail="Konto nicht gefunden") + try: + await pool.set_active(username, data.active) + except Exception as e: + logger.error("X-Scraper set_active fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Status konnte nicht geaendert werden") + await log_action( + db, admin, get_client_ip(request), action="update", + resource_type="x_scraper_account", after={"username": username, "active": data.active}, + ) + acc = await pool.get_account(username) + return _summary(acc) + + +@router.delete("/accounts/{username}", status_code=204) +async def delete_account( + username: str, + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """X-Scraper-Konto entfernen.""" + pool = _get_pool() + if await pool.get_account(username) is None: + raise HTTPException(status_code=404, detail="Konto nicht gefunden") + try: + await pool.delete_accounts([username]) + except Exception as e: + logger.error("X-Scraper delete fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Konto konnte nicht entfernt werden") + await log_action( + db, admin, get_client_ip(request), action="delete", + resource_type="x_scraper_account", before={"username": username}, + ) + + +@router.post("/reset-locks") +async def reset_locks( + request: Request, + admin: dict = Depends(get_current_admin), + db: aiosqlite.Connection = Depends(db_dependency), +): + """Alle temporaeren Sperren der Konten zuruecksetzen.""" + pool = _get_pool() + try: + await pool.reset_locks() + except Exception as e: + logger.error("X-Scraper reset_locks fehlgeschlagen: %s", e) + raise HTTPException(status_code=500, detail="Sperren konnten nicht zurueckgesetzt werden") + await log_action( + db, admin, get_client_ip(request), action="update", + resource_type="x_scraper_account", after={"change": "reset_locks"}, + ) + return {"status": "ok"} diff --git a/src/static/dashboard.html b/src/static/dashboard.html index b4813a3..b1fe1e8 100644 --- a/src/static/dashboard.html +++ b/src/static/dashboard.html @@ -329,6 +329,7 @@ + @@ -471,6 +472,37 @@ + +
X-Login-Konten, mit denen der Monitor bei X recherchiert. Mehr Konten bedeuten paralleleres, schnelleres Scrapen. Cookies laufen periodisch ab und müssen dann erneuert werden.
+| Benutzername | +Status | +Anfragen | +Letzte Nutzung | +Aktionen | +
|---|