From 168fbc3987cdaa959e39a1561d50bb7211ac6e7e Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sat, 16 May 2026 23:57:32 +0000 Subject: [PATCH] feat(sources): PDF-Upload auch in der Endkunden-App (Kundenquelle) - POST /api/sources/upload-pdf: tenant-scoped Upload, gleiche Speicher- Konvention wie der Verwaltungs-Endpoint (/pdfs/{sha}.pdf). Duplikat-Check beruecksichtigt globale Quellen. - dashboard.html: +PDF-Button in der Quellenverwaltungs-Toolbar + eigenes Modal modal-pdf-upload (closeModal-Quotes via '). - app.js: App.openPdfUpload + _bindPdfUploadFormOnce (Submit nur einmal binden). - api.js: API.upload(path, formData) Helper analog Verwaltung. --- src/routers/sources.py | 115 +++++++++++++++++++++++++++++++++++++- src/static/dashboard.html | 52 +++++++++++++++++ src/static/js/api.js | 25 +++++++++ src/static/js/app.js | 64 +++++++++++++++++++++ 4 files changed, 255 insertions(+), 1 deletion(-) diff --git a/src/routers/sources.py b/src/routers/sources.py index f1e35bd..b61d0a7 100644 --- a/src/routers/sources.py +++ b/src/routers/sources.py @@ -1,13 +1,19 @@ """Sources-Router: Quellenverwaltung (Multi-Tenant). Klassifikation: Read-Only — Pflege in der Verwaltung.""" import json import logging +import uuid +import re +import os +import hashlib from collections import defaultdict -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest from auth import get_current_user from database import db_dependency, refresh_source_counts from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES import aiosqlite +from config import DB_PATH +from typing import Optional logger = logging.getLogger("osint.sources") @@ -640,3 +646,110 @@ async def trigger_refresh_counts( await refresh_source_counts(db) return {"status": "ok"} + +# --- PDF-Upload (Kundenquelle vom Typ pdf_document) --- +# Analog zum Verwaltungs-Upload, aber tenant-spezifisch. +# Datei landet unter /pdfs/{sha256}.pdf. +# Der Worker (services.pdf_ingest) verarbeitet sie asynchron im Minutentakt. + +MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB +PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs") + + +def _pdf_dir() -> str: + os.makedirs(PDF_DIR, exist_ok=True) + return PDF_DIR + + +@router.post("/upload-pdf", status_code=status.HTTP_201_CREATED) +async def upload_pdf_source( + current_user: dict = Depends(get_current_user), + db: aiosqlite.Connection = Depends(db_dependency), + file: UploadFile = File(...), + name: Optional[str] = Form(None), + category: str = Form("sonstige"), + language: Optional[str] = Form(None), + notes: Optional[str] = Form(None), +): + """PDF hochladen + als Kundenquelle (source_type=pdf_document) registrieren. + + Idempotent ueber SHA256 innerhalb des Tenants: doppelter Upload erzeugt 409. + """ + head = await file.read(8) + if not head.startswith(b"%PDF-"): + raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF") + + tenant_id = current_user.get("tenant_id") + sha = hashlib.sha256() + sha.update(head) + total = len(head) + tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp") + try: + with open(tmp_path, "wb") as out: + out.write(head) + while True: + chunk = await file.read(1024 * 1024) + if not chunk: + break + total += len(chunk) + if total > MAX_PDF_SIZE_BYTES: + raise HTTPException(status_code=413, detail=f"PDF ueberschreitet {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB") + sha.update(chunk) + out.write(chunk) + sha_hex = sha.hexdigest() + final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf") + rel_path = os.path.join("pdfs", f"{sha_hex}.pdf") + + # Duplikat-Pruefung innerhalb des Tenants (oder global, falls eine + # gleiche PDF bereits als Grundquelle existiert -> dann sichtbar fuer alle). + cursor = await db.execute( + "SELECT id, name, tenant_id FROM sources WHERE pdf_sha256 = ? " + "AND (tenant_id IS NULL OR tenant_id = ?)", + (sha_hex, tenant_id), + ) + existing = await cursor.fetchone() + if existing: + os.unlink(tmp_path) + scope = "global" if existing["tenant_id"] is None else "Ihrer Organisation" + raise HTTPException( + status_code=409, + detail=f"PDF bereits in {scope} vorhanden als Quelle '{existing['name']}' (id={existing['id']})", + ) + + if not os.path.exists(final_path): + os.replace(tmp_path, final_path) + else: + os.unlink(tmp_path) + except HTTPException: + if os.path.exists(tmp_path): + try: os.unlink(tmp_path) + except OSError: pass + raise + except Exception as e: + if os.path.exists(tmp_path): + try: os.unlink(tmp_path) + except OSError: pass + logger.exception("PDF-Upload (tenant) fehlgeschlagen") + raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}") + + display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I) + display_name = display_name[:200] + + cursor = await db.execute( + """INSERT INTO sources + (name, url, domain, source_type, category, status, notes, language, + pdf_path, pdf_sha256, added_by, tenant_id) + VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, ?)""", + (display_name, category, notes, language, rel_path, sha_hex, + current_user["username"], tenant_id), + ) + src_id = cursor.lastrowid + await db.commit() + + cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,)) + row = await cursor.fetchone() + result = dict(row) + result["is_global"] = result.get("tenant_id") is None + result["state_affiliated"] = bool(result.get("state_affiliated")) + result["alignments"] = [] + return result diff --git a/src/static/dashboard.html b/src/static/dashboard.html index 427aa6c..b5ee2fc 100644 --- a/src/static/dashboard.html +++ b/src/static/dashboard.html @@ -555,6 +555,7 @@
+
@@ -633,6 +634,57 @@ + + +