feat(sources): PDF-Upload auch in der Endkunden-App (Kundenquelle)
- POST /api/sources/upload-pdf: tenant-scoped Upload, gleiche Speicher-
Konvention wie der Verwaltungs-Endpoint (<dirname(DB)>/pdfs/{sha}.pdf).
Duplikat-Check beruecksichtigt globale Quellen.
- dashboard.html: +PDF-Button in der Quellenverwaltungs-Toolbar +
eigenes Modal modal-pdf-upload (closeModal-Quotes via ').
- app.js: App.openPdfUpload + _bindPdfUploadFormOnce (Submit nur einmal
binden).
- api.js: API.upload(path, formData) Helper analog Verwaltung.
Dieser Commit ist enthalten in:
@@ -1,13 +1,19 @@
|
||||
"""Sources-Router: Quellenverwaltung (Multi-Tenant). Klassifikation: Read-Only — Pflege in der Verwaltung."""
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
import re
|
||||
import os
|
||||
import hashlib
|
||||
from collections import defaultdict
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
|
||||
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
|
||||
from auth import get_current_user
|
||||
from database import db_dependency, refresh_source_counts
|
||||
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
|
||||
import aiosqlite
|
||||
from config import DB_PATH
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger("osint.sources")
|
||||
|
||||
@@ -640,3 +646,110 @@ async def trigger_refresh_counts(
|
||||
await refresh_source_counts(db)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# --- PDF-Upload (Kundenquelle vom Typ pdf_document) ---
|
||||
# Analog zum Verwaltungs-Upload, aber tenant-spezifisch.
|
||||
# Datei landet unter <dirname(DB_PATH)>/pdfs/{sha256}.pdf.
|
||||
# Der Worker (services.pdf_ingest) verarbeitet sie asynchron im Minutentakt.
|
||||
|
||||
MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB
|
||||
PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs")
|
||||
|
||||
|
||||
def _pdf_dir() -> str:
|
||||
os.makedirs(PDF_DIR, exist_ok=True)
|
||||
return PDF_DIR
|
||||
|
||||
|
||||
@router.post("/upload-pdf", status_code=status.HTTP_201_CREATED)
|
||||
async def upload_pdf_source(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
file: UploadFile = File(...),
|
||||
name: Optional[str] = Form(None),
|
||||
category: str = Form("sonstige"),
|
||||
language: Optional[str] = Form(None),
|
||||
notes: Optional[str] = Form(None),
|
||||
):
|
||||
"""PDF hochladen + als Kundenquelle (source_type=pdf_document) registrieren.
|
||||
|
||||
Idempotent ueber SHA256 innerhalb des Tenants: doppelter Upload erzeugt 409.
|
||||
"""
|
||||
head = await file.read(8)
|
||||
if not head.startswith(b"%PDF-"):
|
||||
raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF")
|
||||
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
sha = hashlib.sha256()
|
||||
sha.update(head)
|
||||
total = len(head)
|
||||
tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp")
|
||||
try:
|
||||
with open(tmp_path, "wb") as out:
|
||||
out.write(head)
|
||||
while True:
|
||||
chunk = await file.read(1024 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if total > MAX_PDF_SIZE_BYTES:
|
||||
raise HTTPException(status_code=413, detail=f"PDF ueberschreitet {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB")
|
||||
sha.update(chunk)
|
||||
out.write(chunk)
|
||||
sha_hex = sha.hexdigest()
|
||||
final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf")
|
||||
rel_path = os.path.join("pdfs", f"{sha_hex}.pdf")
|
||||
|
||||
# Duplikat-Pruefung innerhalb des Tenants (oder global, falls eine
|
||||
# gleiche PDF bereits als Grundquelle existiert -> dann sichtbar fuer alle).
|
||||
cursor = await db.execute(
|
||||
"SELECT id, name, tenant_id FROM sources WHERE pdf_sha256 = ? "
|
||||
"AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(sha_hex, tenant_id),
|
||||
)
|
||||
existing = await cursor.fetchone()
|
||||
if existing:
|
||||
os.unlink(tmp_path)
|
||||
scope = "global" if existing["tenant_id"] is None else "Ihrer Organisation"
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"PDF bereits in {scope} vorhanden als Quelle '{existing['name']}' (id={existing['id']})",
|
||||
)
|
||||
|
||||
if not os.path.exists(final_path):
|
||||
os.replace(tmp_path, final_path)
|
||||
else:
|
||||
os.unlink(tmp_path)
|
||||
except HTTPException:
|
||||
if os.path.exists(tmp_path):
|
||||
try: os.unlink(tmp_path)
|
||||
except OSError: pass
|
||||
raise
|
||||
except Exception as e:
|
||||
if os.path.exists(tmp_path):
|
||||
try: os.unlink(tmp_path)
|
||||
except OSError: pass
|
||||
logger.exception("PDF-Upload (tenant) fehlgeschlagen")
|
||||
raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}")
|
||||
|
||||
display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I)
|
||||
display_name = display_name[:200]
|
||||
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources
|
||||
(name, url, domain, source_type, category, status, notes, language,
|
||||
pdf_path, pdf_sha256, added_by, tenant_id)
|
||||
VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, ?)""",
|
||||
(display_name, category, notes, language, rel_path, sha_hex,
|
||||
current_user["username"], tenant_id),
|
||||
)
|
||||
src_id = cursor.lastrowid
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,))
|
||||
row = await cursor.fetchone()
|
||||
result = dict(row)
|
||||
result["is_global"] = result.get("tenant_id") is None
|
||||
result["state_affiliated"] = bool(result.get("state_affiliated"))
|
||||
result["alignments"] = []
|
||||
return result
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren