Promote develop → main (2026-05-17 00:40 UTC) #6

Zusammengeführt
IntelSight_Admin hat 5 Commits von develop nach main 2026-05-17 02:40:41 +02:00 zusammengeführt
5 geänderte Dateien mit 261 neuen und 4 gelöschten Zeilen
Nur Änderungen aus Commit 27afce7c9e werden angezeigt - Alle Commits anzeigen

Datei anzeigen

@@ -7,3 +7,5 @@ python-multipart
aiosmtplib
httpx>=0.28
feedparser>=6.0
# PDF-Upload-Validierung
pypdf>=5.0

Datei anzeigen

@@ -1,9 +1,12 @@
"""Grundquellen-Verwaltung und Kundenquellen-Übersicht."""
import json
import logging
import hashlib
import os
import re
import uuid
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, status
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Request, UploadFile, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Optional
@@ -13,7 +16,7 @@ from auth import get_current_admin
from database import db_dependency, get_db
from audit import log_action, get_client_ip
from source_meta import get_meta
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S, DB_PATH
from shared.source_rules import (
discover_source,
discover_all_feeds,
@@ -115,7 +118,7 @@ class GlobalSourceCreate(BaseModel):
name: str = Field(min_length=1, max_length=200)
url: Optional[str] = None
domain: Optional[str] = None
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$")
category: str = Field(default="sonstige")
status: str = Field(default="active", pattern="^(active|inactive)$")
notes: Optional[str] = None
@@ -128,7 +131,7 @@ class GlobalSourceUpdate(BaseModel):
name: Optional[str] = Field(default=None, max_length=200)
url: Optional[str] = None
domain: Optional[str] = None
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$")
category: Optional[str] = None
status: Optional[str] = Field(default=None, pattern="^(active|inactive)$")
notes: Optional[str] = None
@@ -1502,3 +1505,116 @@ async def bulk_approve_classifications(
after={"bulk_approved_ids": approved_ids, "min_confidence": min_confidence},
)
return {"approved": len(approved_ids), "ids": approved_ids}
# --- PDF-Upload (Quelle vom Typ pdf_document) ---
# Speicherort relativ zur DB: <dirname(DB_PATH)>/pdfs/{sha256}.pdf
# Der Monitor pollt pdf_document-Quellen mit processed_at IS NULL und
# extrahiert Text + Uebersetzungen (DE/EN). Dieser Endpoint legt nur die
# Datei + den Source-Eintrag an (kein LLM-Call hier).
MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB
PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs")
def _pdf_dir() -> str:
os.makedirs(PDF_DIR, exist_ok=True)
return PDF_DIR
@router.post("/global/upload-pdf", status_code=201)
async def upload_pdf_source(
request: Request,
admin: dict = Depends(get_current_admin),
db: aiosqlite.Connection = Depends(db_dependency),
file: UploadFile = File(...),
name: Optional[str] = Form(None),
category: str = Form("sonstige"),
language: Optional[str] = Form(None),
notes: Optional[str] = Form(None),
):
"""PDF hochladen + als Grundquelle (source_type=pdf_document) registrieren.
Idempotent ueber SHA256: bestehender Eintrag wird zurueckgegeben (409 mit
Detail), die Datei wird nicht erneut gespeichert.
"""
# Magic-Bytes-Check (PDF beginnt mit %PDF-)
head = await file.read(8)
if not head.startswith(b"%PDF-"):
raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF (Magic-Bytes fehlen)")
# Datei streaming in Temp lesen + sha256 berechnen + Groesse pruefen
sha = hashlib.sha256()
sha.update(head)
total = len(head)
tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp")
try:
with open(tmp_path, "wb") as out:
out.write(head)
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > MAX_PDF_SIZE_BYTES:
raise HTTPException(status_code=413, detail=f"PDF ueberschreitet Maximum von {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB")
sha.update(chunk)
out.write(chunk)
sha_hex = sha.hexdigest()
final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf")
rel_path = os.path.join("pdfs", f"{sha_hex}.pdf")
# Duplikat-Check ueber sha256
cursor = await db.execute(
"SELECT id, name FROM sources WHERE pdf_sha256 = ? AND tenant_id IS NULL",
(sha_hex,),
)
existing = await cursor.fetchone()
if existing:
# Datei wegwerfen, bestehende Quelle zurueckgeben
os.unlink(tmp_path)
raise HTTPException(
status_code=409,
detail=f"PDF bereits hochgeladen als Quelle '{existing['name']}' (id={existing['id']})",
)
# Atomar umbenennen
if not os.path.exists(final_path):
os.replace(tmp_path, final_path)
else:
# Datei mit gleichem sha existiert physisch, aber keine Source -> wiederverwenden
os.unlink(tmp_path)
except HTTPException:
if os.path.exists(tmp_path):
try: os.unlink(tmp_path)
except OSError: pass
raise
except Exception as e:
if os.path.exists(tmp_path):
try: os.unlink(tmp_path)
except OSError: pass
logger.exception("PDF-Upload fehlgeschlagen")
raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}")
# Name herleiten falls nicht angegeben
display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I)
display_name = display_name[:200]
cursor = await db.execute(
"""INSERT INTO sources
(name, url, domain, source_type, category, status, notes, language,
pdf_path, pdf_sha256, added_by, tenant_id)
VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, NULL)""",
(display_name, category, notes, language, rel_path, sha_hex, admin.get("email") or "system"),
)
src_id = cursor.lastrowid
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,))
new_src = dict(await cursor.fetchone())
await log_action(
db, admin, get_client_ip(request),
action="upload_pdf", resource_type="source", resource_id=src_id,
after={"name": display_name, "pdf_sha256": sha_hex, "size_bytes": total},
)
return new_src

Datei anzeigen

@@ -328,6 +328,7 @@
<span class="text-secondary" id="globalSourceCount"></span>
</div>
<button class="btn btn-secondary" id="discoverSourceBtn">Erkennen</button>
<button class="btn btn-secondary" id="newPdfSourceBtn" style="margin-right:8px;">+ PDF hochladen</button>
<button class="btn btn-primary" id="newGlobalSourceBtn">+ Neue Grundquelle</button>
</div>
<div class="card">
@@ -641,6 +642,7 @@
<option value="telegram_channel">Telegram-Kanal</option>
<option value="podcast_feed">Podcast-Feed</option>
<option value="excluded">Ausgeschlossen</option>
<option value="pdf_document" disabled>PDF-Dokument (nur Upload)</option>
</select>
</div>
<div class="form-group">
@@ -799,6 +801,59 @@
</div>
</div>
<!-- Modal: PDF hochladen -->
<div class="modal-overlay" id="modalPdfUpload">
<div class="modal">
<div class="modal-header">
<h3>PDF als Quelle hochladen</h3>
<button class="modal-close" onclick="closeModal(modalPdfUpload)">&times;</button>
</div>
<form id="pdfUploadForm" enctype="multipart/form-data">
<div class="modal-body">
<p class="text-secondary" style="margin-top:0;">
Die PDF wird gespeichert und vom Monitor automatisch verarbeitet:
Text extrahieren (OCR-Fallback fuer gescannte Dokumente),
Übersetzung nach Deutsch und Englisch.
</p>
<div class="form-group">
<label for="pdfFile">PDF-Datei (max. 50 MB)</label>
<input type="file" id="pdfFile" accept="application/pdf,.pdf" required>
</div>
<div class="form-group">
<label for="pdfName">Anzeige-Name (optional)</label>
<input type="text" id="pdfName" maxlength="200" placeholder="leer = Dateiname">
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
<div class="form-group">
<label for="pdfCategory">Kategorie</label>
<select id="pdfCategory">
<option value="sonstige" selected>Sonstige</option>
<option value="behoerde">Behörde</option>
<option value="think-tank">Think-Tank</option>
<option value="fachmedien">Fachmedien</option>
<option value="international">International</option>
</select>
</div>
<div class="form-group">
<label for="pdfLanguage">Sprache (optional)</label>
<input type="text" id="pdfLanguage" list="languageSuggestions" placeholder="z.B. Deutsch, Englisch">
</div>
</div>
<div class="form-group">
<label for="pdfNotes">Notizen</label>
<input type="text" id="pdfNotes" placeholder="Optional">
</div>
<div id="pdfUploadError" class="error-msg" style="display:none"></div>
<div id="pdfUploadProgress" class="text-secondary" style="display:none;margin-top:8px;">Lädt hoch …</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" onclick="closeModal(modalPdfUpload)">Abbrechen</button>
<button type="submit" class="btn btn-primary" id="pdfUploadSubmitBtn">Hochladen</button>
</div>
</form>
</div>
</div>
<!-- Modal: Discover Sources -->
<div class="modal-overlay" id="modalDiscover">
<div class="modal" style="max-width:600px;">

Datei anzeigen

@@ -26,6 +26,23 @@ const API = {
post(path, body) { return this.request(path, { method: "POST", body: JSON.stringify(body) }); },
put(path, body) { return this.request(path, { method: "PUT", body: body ? JSON.stringify(body) : undefined }); },
del(path) { return this.request(path, { method: "DELETE" }); },
async upload(path, formData) {
const headers = {};
if (this.token) headers["Authorization"] = `Bearer ${this.token}`;
const res = await fetch(path, { method: "POST", headers, body: formData });
if (res.status === 401) {
localStorage.removeItem("token");
localStorage.removeItem("username");
window.location.href = "/";
return;
}
if (!res.ok) {
const data = await res.json().catch(() => ({}));
throw new Error(data.detail || `Fehler ${res.status}`);
}
return res.json();
},
};
// --- State ---

Datei anzeigen

@@ -311,6 +311,8 @@ function editGlobalSource(id) {
function setupSourceForms() {
document.getElementById("newGlobalSourceBtn").addEventListener("click", openNewGlobalSource);
document.getElementById("newPdfSourceBtn")?.addEventListener("click", openPdfUploadModal);
setupPdfUploadForm();
document.getElementById("discoverSourceBtn").addEventListener("click", () => {
document.getElementById("discoverUrl").value = "";
document.getElementById("discoverStatus").style.display = "none";
@@ -880,3 +882,68 @@ function toggleSourceInfo(id) {
if (btn) btn.classList.toggle("active", !isVisible);
}
}
// --- PDF-Quellen-Upload ---
function openPdfUploadModal() {
const form = document.getElementById("pdfUploadForm");
if (form) form.reset();
const err = document.getElementById("pdfUploadError");
if (err) { err.style.display = "none"; err.textContent = ""; }
const prog = document.getElementById("pdfUploadProgress");
if (prog) prog.style.display = "none";
openModal("modalPdfUpload");
}
function setupPdfUploadForm() {
const form = document.getElementById("pdfUploadForm");
if (!form || form.dataset.bound === "1") return;
form.dataset.bound = "1";
form.addEventListener("submit", async (e) => {
e.preventDefault();
const errEl = document.getElementById("pdfUploadError");
const progEl = document.getElementById("pdfUploadProgress");
const submitBtn = document.getElementById("pdfUploadSubmitBtn");
errEl.style.display = "none";
const fileInput = document.getElementById("pdfFile");
const f = fileInput?.files?.[0];
if (!f) {
errEl.textContent = "Bitte eine PDF-Datei auswaehlen.";
errEl.style.display = "block";
return;
}
if (f.size > 50 * 1024 * 1024) {
errEl.textContent = "Datei ueberschreitet 50 MB.";
errEl.style.display = "block";
return;
}
const fd = new FormData();
fd.append("file", f);
const nm = document.getElementById("pdfName").value.trim();
if (nm) fd.append("name", nm);
fd.append("category", document.getElementById("pdfCategory").value || "sonstige");
const lng = document.getElementById("pdfLanguage").value.trim();
if (lng) fd.append("language", lng);
const nt = document.getElementById("pdfNotes").value.trim();
if (nt) fd.append("notes", nt);
submitBtn.disabled = true;
progEl.style.display = "block";
try {
await API.upload("/api/sources/global/upload-pdf", fd);
closeModal("modalPdfUpload");
if (typeof showToast === "function") {
showToast("PDF hochgeladen -- Verarbeitung laeuft im Hintergrund", "success");
}
loadGlobalSources();
} catch (err) {
errEl.textContent = err.message || "Upload fehlgeschlagen";
errEl.style.display = "block";
} finally {
submitBtn.disabled = false;
progEl.style.display = "none";
}
});
}