feat(sources): PDF-Upload auch in der Endkunden-App (Kundenquelle)

- POST /api/sources/upload-pdf: tenant-scoped Upload, gleiche Speicher-
  Konvention wie der Verwaltungs-Endpoint (<dirname(DB)>/pdfs/{sha}.pdf).
  Duplikat-Check beruecksichtigt globale Quellen.
- dashboard.html: +PDF-Button in der Quellenverwaltungs-Toolbar +
  eigenes Modal modal-pdf-upload (closeModal-Quotes via &#39;).
- app.js: App.openPdfUpload + _bindPdfUploadFormOnce (Submit nur einmal
  binden).
- api.js: API.upload(path, formData) Helper analog Verwaltung.
Dieser Commit ist enthalten in:
Claude Code
2026-05-16 23:57:32 +00:00
Ursprung e68386f6bb
Commit 168fbc3987
4 geänderte Dateien mit 255 neuen und 1 gelöschten Zeilen

Datei anzeigen

@@ -1,13 +1,19 @@
"""Sources-Router: Quellenverwaltung (Multi-Tenant). Klassifikation: Read-Only — Pflege in der Verwaltung.""" """Sources-Router: Quellenverwaltung (Multi-Tenant). Klassifikation: Read-Only — Pflege in der Verwaltung."""
import json import json
import logging import logging
import uuid
import re
import os
import hashlib
from collections import defaultdict from collections import defaultdict
from fastapi import APIRouter, Depends, HTTPException, status from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
from auth import get_current_user from auth import get_current_user
from database import db_dependency, refresh_source_counts from database import db_dependency, refresh_source_counts
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
import aiosqlite import aiosqlite
from config import DB_PATH
from typing import Optional
logger = logging.getLogger("osint.sources") logger = logging.getLogger("osint.sources")
@@ -640,3 +646,110 @@ async def trigger_refresh_counts(
await refresh_source_counts(db) await refresh_source_counts(db)
return {"status": "ok"} return {"status": "ok"}
# --- PDF-Upload (Kundenquelle vom Typ pdf_document) ---
# Analog zum Verwaltungs-Upload, aber tenant-spezifisch.
# Datei landet unter <dirname(DB_PATH)>/pdfs/{sha256}.pdf.
# Der Worker (services.pdf_ingest) verarbeitet sie asynchron im Minutentakt.
MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB
PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs")
def _pdf_dir() -> str:
os.makedirs(PDF_DIR, exist_ok=True)
return PDF_DIR
@router.post("/upload-pdf", status_code=status.HTTP_201_CREATED)
async def upload_pdf_source(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
file: UploadFile = File(...),
name: Optional[str] = Form(None),
category: str = Form("sonstige"),
language: Optional[str] = Form(None),
notes: Optional[str] = Form(None),
):
"""PDF hochladen + als Kundenquelle (source_type=pdf_document) registrieren.
Idempotent ueber SHA256 innerhalb des Tenants: doppelter Upload erzeugt 409.
"""
head = await file.read(8)
if not head.startswith(b"%PDF-"):
raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF")
tenant_id = current_user.get("tenant_id")
sha = hashlib.sha256()
sha.update(head)
total = len(head)
tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp")
try:
with open(tmp_path, "wb") as out:
out.write(head)
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > MAX_PDF_SIZE_BYTES:
raise HTTPException(status_code=413, detail=f"PDF ueberschreitet {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB")
sha.update(chunk)
out.write(chunk)
sha_hex = sha.hexdigest()
final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf")
rel_path = os.path.join("pdfs", f"{sha_hex}.pdf")
# Duplikat-Pruefung innerhalb des Tenants (oder global, falls eine
# gleiche PDF bereits als Grundquelle existiert -> dann sichtbar fuer alle).
cursor = await db.execute(
"SELECT id, name, tenant_id FROM sources WHERE pdf_sha256 = ? "
"AND (tenant_id IS NULL OR tenant_id = ?)",
(sha_hex, tenant_id),
)
existing = await cursor.fetchone()
if existing:
os.unlink(tmp_path)
scope = "global" if existing["tenant_id"] is None else "Ihrer Organisation"
raise HTTPException(
status_code=409,
detail=f"PDF bereits in {scope} vorhanden als Quelle '{existing['name']}' (id={existing['id']})",
)
if not os.path.exists(final_path):
os.replace(tmp_path, final_path)
else:
os.unlink(tmp_path)
except HTTPException:
if os.path.exists(tmp_path):
try: os.unlink(tmp_path)
except OSError: pass
raise
except Exception as e:
if os.path.exists(tmp_path):
try: os.unlink(tmp_path)
except OSError: pass
logger.exception("PDF-Upload (tenant) fehlgeschlagen")
raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}")
display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I)
display_name = display_name[:200]
cursor = await db.execute(
"""INSERT INTO sources
(name, url, domain, source_type, category, status, notes, language,
pdf_path, pdf_sha256, added_by, tenant_id)
VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, ?)""",
(display_name, category, notes, language, rel_path, sha_hex,
current_user["username"], tenant_id),
)
src_id = cursor.lastrowid
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,))
row = await cursor.fetchone()
result = dict(row)
result["is_global"] = result.get("tenant_id") is None
result["state_affiliated"] = bool(result.get("state_affiliated"))
result["alignments"] = []
return result

Datei anzeigen

@@ -555,6 +555,7 @@
<input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()" data-i18n-attr="placeholder:sources_modal.search_placeholder"> <input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()" data-i18n-attr="placeholder:sources_modal.search_placeholder">
</div> </div>
<div class="sources-toolbar-actions"> <div class="sources-toolbar-actions">
<button class="btn btn-secondary btn-small" onclick="App.openPdfUpload()" style="margin-right:8px;">+ PDF hochladen</button>
<button class="btn btn-primary btn-small" onclick="App.toggleSourceForm()" data-i18n="sources_modal.add_source">+ Quelle</button> <button class="btn btn-primary btn-small" onclick="App.toggleSourceForm()" data-i18n="sources_modal.add_source">+ Quelle</button>
</div> </div>
</div> </div>
@@ -633,6 +634,57 @@
</div> </div>
</div> </div>
<!-- Modal: PDF als Quelle hochladen -->
<div class="modal-overlay" id="modal-pdf-upload" role="dialog" aria-modal="true" aria-labelledby="modal-pdf-upload-title">
<div class="modal">
<div class="modal-header">
<div class="modal-title" id="modal-pdf-upload-title">PDF als Quelle hochladen</div>
<button class="modal-close" onclick="closeModal(&#39;modal-pdf-upload&#39;)" aria-label="Schliessen">&times;</button>
</div>
<form id="pdf-upload-form" enctype="multipart/form-data">
<div class="modal-body">
<p class="text-secondary" style="margin-top:0;">
Die PDF wird gespeichert und im Hintergrund verarbeitet: Text wird extrahiert (OCR-Fallback fuer gescannte Dokumente) und nach Deutsch und Englisch uebersetzt. Sie erscheint danach in Ihrer Quellenliste.
</p>
<div class="form-group">
<label for="pdf-upload-file">PDF-Datei (max. 50 MB)</label>
<input type="file" id="pdf-upload-file" accept="application/pdf,.pdf" required>
</div>
<div class="form-group">
<label for="pdf-upload-name">Anzeige-Name (optional)</label>
<input type="text" id="pdf-upload-name" maxlength="200" placeholder="leer = Dateiname">
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
<div class="form-group">
<label for="pdf-upload-category">Kategorie</label>
<select id="pdf-upload-category">
<option value="sonstige" selected>Sonstige</option>
<option value="behoerde">Behoerde</option>
<option value="think-tank">Think-Tank</option>
<option value="fachmedien">Fachmedien</option>
<option value="international">International</option>
</select>
</div>
<div class="form-group">
<label for="pdf-upload-language">Sprache (optional)</label>
<input type="text" id="pdf-upload-language" placeholder="z.B. Deutsch, Englisch">
</div>
</div>
<div class="form-group">
<label for="pdf-upload-notes">Notizen</label>
<input type="text" id="pdf-upload-notes" placeholder="Optional">
</div>
<div id="pdf-upload-error" class="error-msg" style="display:none"></div>
<div id="pdf-upload-progress" class="text-secondary" style="display:none;margin-top:8px;">Laedt hoch &hellip;</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" onclick="closeModal(&#39;modal-pdf-upload&#39;)">Abbrechen</button>
<button type="submit" class="btn btn-primary" id="pdf-upload-submit">Hochladen</button>
</div>
</form>
</div>
</div>
<!-- Modal: Content-Viewer (wiederverwendbar für Lagebild, Faktencheck, Quellenübersicht, Timeline) --> <!-- Modal: Content-Viewer (wiederverwendbar für Lagebild, Faktencheck, Quellenübersicht, Timeline) -->
<div class="modal-overlay" id="modal-content-viewer" role="dialog" aria-modal="true" aria-labelledby="content-viewer-title"> <div class="modal-overlay" id="modal-content-viewer" role="dialog" aria-modal="true" aria-labelledby="content-viewer-title">
<div class="modal modal-content-viewer"> <div class="modal modal-content-viewer">

Datei anzeigen

@@ -22,6 +22,31 @@ const API = {
}; };
}, },
async upload(path, formData) {
const token = localStorage.getItem("osint_token");
const headers = {};
if (token) headers["Authorization"] = `Bearer ${token}`;
const response = await fetch(`${this.baseUrl}${path}`, {
method: "POST",
headers,
body: formData,
});
if (response.status === 401) {
localStorage.removeItem("osint_token");
localStorage.removeItem("osint_username");
window.location.href = "/";
return;
}
if (!response.ok) {
const data = await response.json().catch(() => ({}));
let d = data.detail;
if (Array.isArray(d)) d = d.map(e => e.msg || JSON.stringify(e)).join("; ");
else if (typeof d === "object" && d !== null) d = JSON.stringify(d);
throw new Error(d || `Fehler ${response.status}`);
}
return response.json();
},
async _request(method, path, body = null, externalSignal = null) { async _request(method, path, body = null, externalSignal = null) {
const controller = new AbortController(); const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 30000); const timeout = setTimeout(() => controller.abort(), 30000);

Datei anzeigen

@@ -3106,6 +3106,70 @@ async handleRefresh() {
_discoveredData: null, _discoveredData: null,
openPdfUpload() {
const form = document.getElementById("pdf-upload-form");
if (form) form.reset();
const err = document.getElementById("pdf-upload-error");
if (err) { err.style.display = "none"; err.textContent = ""; }
const prog = document.getElementById("pdf-upload-progress");
if (prog) prog.style.display = "none";
openModal("modal-pdf-upload");
this._bindPdfUploadFormOnce();
},
_bindPdfUploadFormOnce() {
const form = document.getElementById("pdf-upload-form");
if (!form || form.dataset.bound === "1") return;
form.dataset.bound = "1";
form.addEventListener("submit", async (e) => {
e.preventDefault();
const errEl = document.getElementById("pdf-upload-error");
const progEl = document.getElementById("pdf-upload-progress");
const submitBtn = document.getElementById("pdf-upload-submit");
errEl.style.display = "none";
const fileInput = document.getElementById("pdf-upload-file");
const f = fileInput && fileInput.files && fileInput.files[0];
if (!f) {
errEl.textContent = "Bitte eine PDF-Datei auswaehlen.";
errEl.style.display = "block";
return;
}
if (f.size > 50 * 1024 * 1024) {
errEl.textContent = "Datei ueberschreitet 50 MB.";
errEl.style.display = "block";
return;
}
const fd = new FormData();
fd.append("file", f);
const nm = (document.getElementById("pdf-upload-name").value || "").trim();
if (nm) fd.append("name", nm);
fd.append("category", document.getElementById("pdf-upload-category").value || "sonstige");
const lng = (document.getElementById("pdf-upload-language").value || "").trim();
if (lng) fd.append("language", lng);
const nt = (document.getElementById("pdf-upload-notes").value || "").trim();
if (nt) fd.append("notes", nt);
submitBtn.disabled = true;
progEl.style.display = "block";
try {
await API.upload("/sources/upload-pdf", fd);
closeModal("modal-pdf-upload");
if (typeof UI !== "undefined" && UI.showToast) {
UI.showToast("PDF hochgeladen -- Verarbeitung laeuft im Hintergrund", "success");
}
await App.loadSources();
} catch (err) {
errEl.textContent = err && err.message ? err.message : "Upload fehlgeschlagen";
errEl.style.display = "block";
} finally {
submitBtn.disabled = false;
progEl.style.display = "none";
}
});
},
toggleSourceForm(show) { toggleSourceForm(show) {
const form = document.getElementById('sources-add-form'); const form = document.getElementById('sources-add-form');
if (!form) return; if (!form) return;