Promote develop → main (2026-05-17 00:40 UTC) #6
@@ -7,3 +7,5 @@ python-multipart
|
|||||||
aiosmtplib
|
aiosmtplib
|
||||||
httpx>=0.28
|
httpx>=0.28
|
||||||
feedparser>=6.0
|
feedparser>=6.0
|
||||||
|
# PDF-Upload-Validierung
|
||||||
|
pypdf>=5.0
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
"""Grundquellen-Verwaltung und Kundenquellen-Übersicht."""
|
"""Grundquellen-Verwaltung und Kundenquellen-Übersicht."""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, status
|
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Request, UploadFile, status
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -13,7 +16,7 @@ from auth import get_current_admin
|
|||||||
from database import db_dependency, get_db
|
from database import db_dependency, get_db
|
||||||
from audit import log_action, get_client_ip
|
from audit import log_action, get_client_ip
|
||||||
from source_meta import get_meta
|
from source_meta import get_meta
|
||||||
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S
|
from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S, DB_PATH
|
||||||
from shared.source_rules import (
|
from shared.source_rules import (
|
||||||
discover_source,
|
discover_source,
|
||||||
discover_all_feeds,
|
discover_all_feeds,
|
||||||
@@ -115,7 +118,7 @@ class GlobalSourceCreate(BaseModel):
|
|||||||
name: str = Field(min_length=1, max_length=200)
|
name: str = Field(min_length=1, max_length=200)
|
||||||
url: Optional[str] = None
|
url: Optional[str] = None
|
||||||
domain: Optional[str] = None
|
domain: Optional[str] = None
|
||||||
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
|
source_type: str = Field(default="rss_feed", pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$")
|
||||||
category: str = Field(default="sonstige")
|
category: str = Field(default="sonstige")
|
||||||
status: str = Field(default="active", pattern="^(active|inactive)$")
|
status: str = Field(default="active", pattern="^(active|inactive)$")
|
||||||
notes: Optional[str] = None
|
notes: Optional[str] = None
|
||||||
@@ -128,7 +131,7 @@ class GlobalSourceUpdate(BaseModel):
|
|||||||
name: Optional[str] = Field(default=None, max_length=200)
|
name: Optional[str] = Field(default=None, max_length=200)
|
||||||
url: Optional[str] = None
|
url: Optional[str] = None
|
||||||
domain: Optional[str] = None
|
domain: Optional[str] = None
|
||||||
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$")
|
source_type: Optional[str] = Field(default=None, pattern="^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$")
|
||||||
category: Optional[str] = None
|
category: Optional[str] = None
|
||||||
status: Optional[str] = Field(default=None, pattern="^(active|inactive)$")
|
status: Optional[str] = Field(default=None, pattern="^(active|inactive)$")
|
||||||
notes: Optional[str] = None
|
notes: Optional[str] = None
|
||||||
@@ -1502,3 +1505,116 @@ async def bulk_approve_classifications(
|
|||||||
after={"bulk_approved_ids": approved_ids, "min_confidence": min_confidence},
|
after={"bulk_approved_ids": approved_ids, "min_confidence": min_confidence},
|
||||||
)
|
)
|
||||||
return {"approved": len(approved_ids), "ids": approved_ids}
|
return {"approved": len(approved_ids), "ids": approved_ids}
|
||||||
|
|
||||||
|
|
||||||
|
# --- PDF-Upload (Quelle vom Typ pdf_document) ---
|
||||||
|
# Speicherort relativ zur DB: <dirname(DB_PATH)>/pdfs/{sha256}.pdf
|
||||||
|
# Der Monitor pollt pdf_document-Quellen mit processed_at IS NULL und
|
||||||
|
# extrahiert Text + Uebersetzungen (DE/EN). Dieser Endpoint legt nur die
|
||||||
|
# Datei + den Source-Eintrag an (kein LLM-Call hier).
|
||||||
|
|
||||||
|
MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB
|
||||||
|
PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs")
|
||||||
|
|
||||||
|
|
||||||
|
def _pdf_dir() -> str:
|
||||||
|
os.makedirs(PDF_DIR, exist_ok=True)
|
||||||
|
return PDF_DIR
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/global/upload-pdf", status_code=201)
|
||||||
|
async def upload_pdf_source(
|
||||||
|
request: Request,
|
||||||
|
admin: dict = Depends(get_current_admin),
|
||||||
|
db: aiosqlite.Connection = Depends(db_dependency),
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
name: Optional[str] = Form(None),
|
||||||
|
category: str = Form("sonstige"),
|
||||||
|
language: Optional[str] = Form(None),
|
||||||
|
notes: Optional[str] = Form(None),
|
||||||
|
):
|
||||||
|
"""PDF hochladen + als Grundquelle (source_type=pdf_document) registrieren.
|
||||||
|
|
||||||
|
Idempotent ueber SHA256: bestehender Eintrag wird zurueckgegeben (409 mit
|
||||||
|
Detail), die Datei wird nicht erneut gespeichert.
|
||||||
|
"""
|
||||||
|
# Magic-Bytes-Check (PDF beginnt mit %PDF-)
|
||||||
|
head = await file.read(8)
|
||||||
|
if not head.startswith(b"%PDF-"):
|
||||||
|
raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF (Magic-Bytes fehlen)")
|
||||||
|
|
||||||
|
# Datei streaming in Temp lesen + sha256 berechnen + Groesse pruefen
|
||||||
|
sha = hashlib.sha256()
|
||||||
|
sha.update(head)
|
||||||
|
total = len(head)
|
||||||
|
tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp")
|
||||||
|
try:
|
||||||
|
with open(tmp_path, "wb") as out:
|
||||||
|
out.write(head)
|
||||||
|
while True:
|
||||||
|
chunk = await file.read(1024 * 1024)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
total += len(chunk)
|
||||||
|
if total > MAX_PDF_SIZE_BYTES:
|
||||||
|
raise HTTPException(status_code=413, detail=f"PDF ueberschreitet Maximum von {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB")
|
||||||
|
sha.update(chunk)
|
||||||
|
out.write(chunk)
|
||||||
|
sha_hex = sha.hexdigest()
|
||||||
|
final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf")
|
||||||
|
rel_path = os.path.join("pdfs", f"{sha_hex}.pdf")
|
||||||
|
|
||||||
|
# Duplikat-Check ueber sha256
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT id, name FROM sources WHERE pdf_sha256 = ? AND tenant_id IS NULL",
|
||||||
|
(sha_hex,),
|
||||||
|
)
|
||||||
|
existing = await cursor.fetchone()
|
||||||
|
if existing:
|
||||||
|
# Datei wegwerfen, bestehende Quelle zurueckgeben
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=409,
|
||||||
|
detail=f"PDF bereits hochgeladen als Quelle '{existing['name']}' (id={existing['id']})",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Atomar umbenennen
|
||||||
|
if not os.path.exists(final_path):
|
||||||
|
os.replace(tmp_path, final_path)
|
||||||
|
else:
|
||||||
|
# Datei mit gleichem sha existiert physisch, aber keine Source -> wiederverwenden
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except HTTPException:
|
||||||
|
if os.path.exists(tmp_path):
|
||||||
|
try: os.unlink(tmp_path)
|
||||||
|
except OSError: pass
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if os.path.exists(tmp_path):
|
||||||
|
try: os.unlink(tmp_path)
|
||||||
|
except OSError: pass
|
||||||
|
logger.exception("PDF-Upload fehlgeschlagen")
|
||||||
|
raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}")
|
||||||
|
|
||||||
|
# Name herleiten falls nicht angegeben
|
||||||
|
display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I)
|
||||||
|
display_name = display_name[:200]
|
||||||
|
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""INSERT INTO sources
|
||||||
|
(name, url, domain, source_type, category, status, notes, language,
|
||||||
|
pdf_path, pdf_sha256, added_by, tenant_id)
|
||||||
|
VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, NULL)""",
|
||||||
|
(display_name, category, notes, language, rel_path, sha_hex, admin.get("email") or "system"),
|
||||||
|
)
|
||||||
|
src_id = cursor.lastrowid
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,))
|
||||||
|
new_src = dict(await cursor.fetchone())
|
||||||
|
await log_action(
|
||||||
|
db, admin, get_client_ip(request),
|
||||||
|
action="upload_pdf", resource_type="source", resource_id=src_id,
|
||||||
|
after={"name": display_name, "pdf_sha256": sha_hex, "size_bytes": total},
|
||||||
|
)
|
||||||
|
return new_src
|
||||||
|
|||||||
@@ -328,6 +328,7 @@
|
|||||||
<span class="text-secondary" id="globalSourceCount"></span>
|
<span class="text-secondary" id="globalSourceCount"></span>
|
||||||
</div>
|
</div>
|
||||||
<button class="btn btn-secondary" id="discoverSourceBtn">Erkennen</button>
|
<button class="btn btn-secondary" id="discoverSourceBtn">Erkennen</button>
|
||||||
|
<button class="btn btn-secondary" id="newPdfSourceBtn" style="margin-right:8px;">+ PDF hochladen</button>
|
||||||
<button class="btn btn-primary" id="newGlobalSourceBtn">+ Neue Grundquelle</button>
|
<button class="btn btn-primary" id="newGlobalSourceBtn">+ Neue Grundquelle</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
@@ -641,6 +642,7 @@
|
|||||||
<option value="telegram_channel">Telegram-Kanal</option>
|
<option value="telegram_channel">Telegram-Kanal</option>
|
||||||
<option value="podcast_feed">Podcast-Feed</option>
|
<option value="podcast_feed">Podcast-Feed</option>
|
||||||
<option value="excluded">Ausgeschlossen</option>
|
<option value="excluded">Ausgeschlossen</option>
|
||||||
|
<option value="pdf_document" disabled>PDF-Dokument (nur Upload)</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
@@ -799,6 +801,59 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Modal: PDF hochladen -->
|
||||||
|
<div class="modal-overlay" id="modalPdfUpload">
|
||||||
|
<div class="modal">
|
||||||
|
<div class="modal-header">
|
||||||
|
<h3>PDF als Quelle hochladen</h3>
|
||||||
|
<button class="modal-close" onclick="closeModal(modalPdfUpload)">×</button>
|
||||||
|
</div>
|
||||||
|
<form id="pdfUploadForm" enctype="multipart/form-data">
|
||||||
|
<div class="modal-body">
|
||||||
|
<p class="text-secondary" style="margin-top:0;">
|
||||||
|
Die PDF wird gespeichert und vom Monitor automatisch verarbeitet:
|
||||||
|
Text extrahieren (OCR-Fallback fuer gescannte Dokumente),
|
||||||
|
Übersetzung nach Deutsch und Englisch.
|
||||||
|
</p>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="pdfFile">PDF-Datei (max. 50 MB)</label>
|
||||||
|
<input type="file" id="pdfFile" accept="application/pdf,.pdf" required>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="pdfName">Anzeige-Name (optional)</label>
|
||||||
|
<input type="text" id="pdfName" maxlength="200" placeholder="leer = Dateiname">
|
||||||
|
</div>
|
||||||
|
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="pdfCategory">Kategorie</label>
|
||||||
|
<select id="pdfCategory">
|
||||||
|
<option value="sonstige" selected>Sonstige</option>
|
||||||
|
<option value="behoerde">Behörde</option>
|
||||||
|
<option value="think-tank">Think-Tank</option>
|
||||||
|
<option value="fachmedien">Fachmedien</option>
|
||||||
|
<option value="international">International</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="pdfLanguage">Sprache (optional)</label>
|
||||||
|
<input type="text" id="pdfLanguage" list="languageSuggestions" placeholder="z.B. Deutsch, Englisch">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="pdfNotes">Notizen</label>
|
||||||
|
<input type="text" id="pdfNotes" placeholder="Optional">
|
||||||
|
</div>
|
||||||
|
<div id="pdfUploadError" class="error-msg" style="display:none"></div>
|
||||||
|
<div id="pdfUploadProgress" class="text-secondary" style="display:none;margin-top:8px;">Lädt hoch …</div>
|
||||||
|
</div>
|
||||||
|
<div class="modal-footer">
|
||||||
|
<button type="button" class="btn btn-secondary" onclick="closeModal(modalPdfUpload)">Abbrechen</button>
|
||||||
|
<button type="submit" class="btn btn-primary" id="pdfUploadSubmitBtn">Hochladen</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Modal: Discover Sources -->
|
<!-- Modal: Discover Sources -->
|
||||||
<div class="modal-overlay" id="modalDiscover">
|
<div class="modal-overlay" id="modalDiscover">
|
||||||
<div class="modal" style="max-width:600px;">
|
<div class="modal" style="max-width:600px;">
|
||||||
|
|||||||
@@ -26,6 +26,23 @@ const API = {
|
|||||||
post(path, body) { return this.request(path, { method: "POST", body: JSON.stringify(body) }); },
|
post(path, body) { return this.request(path, { method: "POST", body: JSON.stringify(body) }); },
|
||||||
put(path, body) { return this.request(path, { method: "PUT", body: body ? JSON.stringify(body) : undefined }); },
|
put(path, body) { return this.request(path, { method: "PUT", body: body ? JSON.stringify(body) : undefined }); },
|
||||||
del(path) { return this.request(path, { method: "DELETE" }); },
|
del(path) { return this.request(path, { method: "DELETE" }); },
|
||||||
|
|
||||||
|
async upload(path, formData) {
|
||||||
|
const headers = {};
|
||||||
|
if (this.token) headers["Authorization"] = `Bearer ${this.token}`;
|
||||||
|
const res = await fetch(path, { method: "POST", headers, body: formData });
|
||||||
|
if (res.status === 401) {
|
||||||
|
localStorage.removeItem("token");
|
||||||
|
localStorage.removeItem("username");
|
||||||
|
window.location.href = "/";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}));
|
||||||
|
throw new Error(data.detail || `Fehler ${res.status}`);
|
||||||
|
}
|
||||||
|
return res.json();
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// --- State ---
|
// --- State ---
|
||||||
|
|||||||
@@ -311,6 +311,8 @@ function editGlobalSource(id) {
|
|||||||
|
|
||||||
function setupSourceForms() {
|
function setupSourceForms() {
|
||||||
document.getElementById("newGlobalSourceBtn").addEventListener("click", openNewGlobalSource);
|
document.getElementById("newGlobalSourceBtn").addEventListener("click", openNewGlobalSource);
|
||||||
|
document.getElementById("newPdfSourceBtn")?.addEventListener("click", openPdfUploadModal);
|
||||||
|
setupPdfUploadForm();
|
||||||
document.getElementById("discoverSourceBtn").addEventListener("click", () => {
|
document.getElementById("discoverSourceBtn").addEventListener("click", () => {
|
||||||
document.getElementById("discoverUrl").value = "";
|
document.getElementById("discoverUrl").value = "";
|
||||||
document.getElementById("discoverStatus").style.display = "none";
|
document.getElementById("discoverStatus").style.display = "none";
|
||||||
@@ -880,3 +882,68 @@ function toggleSourceInfo(id) {
|
|||||||
if (btn) btn.classList.toggle("active", !isVisible);
|
if (btn) btn.classList.toggle("active", !isVisible);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- PDF-Quellen-Upload ---
|
||||||
|
function openPdfUploadModal() {
|
||||||
|
const form = document.getElementById("pdfUploadForm");
|
||||||
|
if (form) form.reset();
|
||||||
|
const err = document.getElementById("pdfUploadError");
|
||||||
|
if (err) { err.style.display = "none"; err.textContent = ""; }
|
||||||
|
const prog = document.getElementById("pdfUploadProgress");
|
||||||
|
if (prog) prog.style.display = "none";
|
||||||
|
openModal("modalPdfUpload");
|
||||||
|
}
|
||||||
|
|
||||||
|
function setupPdfUploadForm() {
|
||||||
|
const form = document.getElementById("pdfUploadForm");
|
||||||
|
if (!form || form.dataset.bound === "1") return;
|
||||||
|
form.dataset.bound = "1";
|
||||||
|
|
||||||
|
form.addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const errEl = document.getElementById("pdfUploadError");
|
||||||
|
const progEl = document.getElementById("pdfUploadProgress");
|
||||||
|
const submitBtn = document.getElementById("pdfUploadSubmitBtn");
|
||||||
|
errEl.style.display = "none";
|
||||||
|
|
||||||
|
const fileInput = document.getElementById("pdfFile");
|
||||||
|
const f = fileInput?.files?.[0];
|
||||||
|
if (!f) {
|
||||||
|
errEl.textContent = "Bitte eine PDF-Datei auswaehlen.";
|
||||||
|
errEl.style.display = "block";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (f.size > 50 * 1024 * 1024) {
|
||||||
|
errEl.textContent = "Datei ueberschreitet 50 MB.";
|
||||||
|
errEl.style.display = "block";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fd = new FormData();
|
||||||
|
fd.append("file", f);
|
||||||
|
const nm = document.getElementById("pdfName").value.trim();
|
||||||
|
if (nm) fd.append("name", nm);
|
||||||
|
fd.append("category", document.getElementById("pdfCategory").value || "sonstige");
|
||||||
|
const lng = document.getElementById("pdfLanguage").value.trim();
|
||||||
|
if (lng) fd.append("language", lng);
|
||||||
|
const nt = document.getElementById("pdfNotes").value.trim();
|
||||||
|
if (nt) fd.append("notes", nt);
|
||||||
|
|
||||||
|
submitBtn.disabled = true;
|
||||||
|
progEl.style.display = "block";
|
||||||
|
try {
|
||||||
|
await API.upload("/api/sources/global/upload-pdf", fd);
|
||||||
|
closeModal("modalPdfUpload");
|
||||||
|
if (typeof showToast === "function") {
|
||||||
|
showToast("PDF hochgeladen -- Verarbeitung laeuft im Hintergrund", "success");
|
||||||
|
}
|
||||||
|
loadGlobalSources();
|
||||||
|
} catch (err) {
|
||||||
|
errEl.textContent = err.message || "Upload fehlgeschlagen";
|
||||||
|
errEl.style.display = "block";
|
||||||
|
} finally {
|
||||||
|
submitBtn.disabled = false;
|
||||||
|
progEl.style.display = "none";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren