Commits vergleichen

...

3 Commits

Autor SHA1 Nachricht Datum
2cfc14b264 Promote develop → main (2026-05-17 00:40 UTC) 2026-05-17 02:40:37 +02:00
Claude Code
168fbc3987 feat(sources): PDF-Upload auch in der Endkunden-App (Kundenquelle)
- POST /api/sources/upload-pdf: tenant-scoped Upload, gleiche Speicher-
  Konvention wie der Verwaltungs-Endpoint (<dirname(DB)>/pdfs/{sha}.pdf).
  Duplikat-Check beruecksichtigt globale Quellen.
- dashboard.html: +PDF-Button in der Quellenverwaltungs-Toolbar +
  eigenes Modal modal-pdf-upload (closeModal-Quotes via &#39;).
- app.js: App.openPdfUpload + _bindPdfUploadFormOnce (Submit nur einmal
  binden).
- api.js: API.upload(path, formData) Helper analog Verwaltung.
2026-05-16 23:57:32 +00:00
Claude Code
e68386f6bb feat(sources): PDF-Dokumente als neuer Quellentyp pdf_document
- SOURCE_TYPE_PATTERN um pdf_document erweitert
- src/services/pdf_ingest.py: pdfplumber + Tesseract-OCR-Fallback,
  Uebersetzung nach DE+EN, ein Pool-Artikel pro PDF
- Scheduler-Job pdf_ingest laeuft im Minuten-Takt und verarbeitet
  pdf_document-Quellen mit processed_at IS NULL
- scripts/migrate_pdf_source.py: idempotente DB-Migration
  (sources.pdf_path/pdf_sha256/processed_at, articles.headline_en/content_en)
- requirements.txt: pdfplumber, pytesseract, pdf2image, Pillow
2026-05-16 23:21:50 +00:00
9 geänderte Dateien mit 534 neuen und 2 gelöschten Zeilen

Datei anzeigen

@@ -16,3 +16,8 @@ Jinja2>=3.1
weasyprint>=68.0
python-docx>=1.2
pikepdf>=9.0
# PDF-Quellen (Ingestion)
pdfplumber>=0.11
pytesseract>=0.3
pdf2image>=1.17
Pillow>=10.0

Datei anzeigen

@@ -0,0 +1,34 @@
"""Idempotente Migration: Quellen-Typ pdf_document + EN-Spalten in articles.
Beim Live-Promote anwenden:
python3 scripts/migrate_pdf_source.py /home/claude-dev/osint-data/osint.db
"""
import sqlite3
import sys
def add_col(db, table, col_def):
name = col_def.split()[0]
cols = {r[1] for r in db.execute(f"PRAGMA table_info({table})").fetchall()}
if name in cols:
return False
db.execute(f"ALTER TABLE {table} ADD COLUMN {col_def}")
return True
def main(path):
with sqlite3.connect(path) as db:
for col in ("pdf_path TEXT", "pdf_sha256 TEXT", "processed_at TIMESTAMP"):
print(f"sources.{col.split()[0]}:", "added" if add_col(db, "sources", col) else "exists")
for col in ("headline_en TEXT", "content_en TEXT"):
print(f"articles.{col.split()[0]}:", "added" if add_col(db, "articles", col) else "exists")
db.execute("CREATE INDEX IF NOT EXISTS idx_sources_pdf_sha256 ON sources(pdf_sha256)")
db.commit()
print("DONE")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: migrate_pdf_source.py /path/to/osint.db")
sys.exit(1)
main(sys.argv[1])

Datei anzeigen

@@ -298,6 +298,8 @@ async def lifespan(app: FastAPI):
orchestrator.set_ws_manager(ws_manager)
await orchestrator.start()
from services import pdf_ingest as _pdf_ingest
scheduler.add_job(_pdf_ingest.run_once, "interval", minutes=1, id="pdf_ingest", max_instances=1, coalesce=True)
scheduler.add_job(check_auto_refresh, "interval", minutes=1, id="auto_refresh")
scheduler.add_job(cleanup_expired, "interval", hours=1, id="cleanup")
scheduler.add_job(daily_source_health_check, "cron", hour=4, minute=0, id="source_health")

Datei anzeigen

@@ -140,7 +140,7 @@ class IncidentListItem(BaseModel):
# Sources (Quellenverwaltung)
SOURCE_TYPE_PATTERN = "^(rss_feed|web_source|excluded|telegram_channel|podcast_feed)$"
SOURCE_TYPE_PATTERN = "^(rss_feed|web_source|excluded|telegram_channel|podcast_feed|pdf_document)$"
SOURCE_CATEGORY_PATTERN = "^(nachrichtenagentur|oeffentlich-rechtlich|qualitaetszeitung|behoerde|fachmedien|think-tank|international|regional|boulevard|sonstige)$"
SOURCE_STATUS_PATTERN = "^(active|inactive)$"
class SourceCreate(BaseModel):

Datei anzeigen

@@ -1,13 +1,19 @@
"""Sources-Router: Quellenverwaltung (Multi-Tenant). Klassifikation: Read-Only — Pflege in der Verwaltung."""
import json
import logging
import uuid
import re
import os
import hashlib
from collections import defaultdict
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
from auth import get_current_user
from database import db_dependency, refresh_source_counts
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name, _DOMAIN_ALIASES
import aiosqlite
from config import DB_PATH
from typing import Optional
logger = logging.getLogger("osint.sources")
@@ -640,3 +646,110 @@ async def trigger_refresh_counts(
await refresh_source_counts(db)
return {"status": "ok"}
# --- PDF-Upload (Kundenquelle vom Typ pdf_document) ---
# Analog zum Verwaltungs-Upload, aber tenant-spezifisch.
# Datei landet unter <dirname(DB_PATH)>/pdfs/{sha256}.pdf.
# Der Worker (services.pdf_ingest) verarbeitet sie asynchron im Minutentakt.
MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB
PDF_DIR = os.path.join(os.path.dirname(os.path.abspath(DB_PATH)), "pdfs")
def _pdf_dir() -> str:
os.makedirs(PDF_DIR, exist_ok=True)
return PDF_DIR
@router.post("/upload-pdf", status_code=status.HTTP_201_CREATED)
async def upload_pdf_source(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
file: UploadFile = File(...),
name: Optional[str] = Form(None),
category: str = Form("sonstige"),
language: Optional[str] = Form(None),
notes: Optional[str] = Form(None),
):
"""PDF hochladen + als Kundenquelle (source_type=pdf_document) registrieren.
Idempotent ueber SHA256 innerhalb des Tenants: doppelter Upload erzeugt 409.
"""
head = await file.read(8)
if not head.startswith(b"%PDF-"):
raise HTTPException(status_code=415, detail="Datei ist kein gueltiges PDF")
tenant_id = current_user.get("tenant_id")
sha = hashlib.sha256()
sha.update(head)
total = len(head)
tmp_path = os.path.join(_pdf_dir(), f".upload-{uuid.uuid4().hex}.tmp")
try:
with open(tmp_path, "wb") as out:
out.write(head)
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > MAX_PDF_SIZE_BYTES:
raise HTTPException(status_code=413, detail=f"PDF ueberschreitet {MAX_PDF_SIZE_BYTES // 1024 // 1024} MB")
sha.update(chunk)
out.write(chunk)
sha_hex = sha.hexdigest()
final_path = os.path.join(_pdf_dir(), f"{sha_hex}.pdf")
rel_path = os.path.join("pdfs", f"{sha_hex}.pdf")
# Duplikat-Pruefung innerhalb des Tenants (oder global, falls eine
# gleiche PDF bereits als Grundquelle existiert -> dann sichtbar fuer alle).
cursor = await db.execute(
"SELECT id, name, tenant_id FROM sources WHERE pdf_sha256 = ? "
"AND (tenant_id IS NULL OR tenant_id = ?)",
(sha_hex, tenant_id),
)
existing = await cursor.fetchone()
if existing:
os.unlink(tmp_path)
scope = "global" if existing["tenant_id"] is None else "Ihrer Organisation"
raise HTTPException(
status_code=409,
detail=f"PDF bereits in {scope} vorhanden als Quelle '{existing['name']}' (id={existing['id']})",
)
if not os.path.exists(final_path):
os.replace(tmp_path, final_path)
else:
os.unlink(tmp_path)
except HTTPException:
if os.path.exists(tmp_path):
try: os.unlink(tmp_path)
except OSError: pass
raise
except Exception as e:
if os.path.exists(tmp_path):
try: os.unlink(tmp_path)
except OSError: pass
logger.exception("PDF-Upload (tenant) fehlgeschlagen")
raise HTTPException(status_code=500, detail=f"PDF-Upload fehlgeschlagen: {e}")
display_name = (name or "").strip() or re.sub(r"\.pdf$", "", file.filename or "PDF", flags=re.I)
display_name = display_name[:200]
cursor = await db.execute(
"""INSERT INTO sources
(name, url, domain, source_type, category, status, notes, language,
pdf_path, pdf_sha256, added_by, tenant_id)
VALUES (?, NULL, NULL, 'pdf_document', ?, 'active', ?, ?, ?, ?, ?, ?)""",
(display_name, category, notes, language, rel_path, sha_hex,
current_user["username"], tenant_id),
)
src_id = cursor.lastrowid
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (src_id,))
row = await cursor.fetchone()
result = dict(row)
result["is_global"] = result.get("tenant_id") is None
result["state_affiliated"] = bool(result.get("state_affiliated"))
result["alignments"] = []
return result

237
src/services/pdf_ingest.py Normale Datei
Datei anzeigen

@@ -0,0 +1,237 @@
"""PDF-Ingest: liest hochgeladene PDFs ein und legt sie als Pool-Artikel ab.
Quellen vom Typ `pdf_document` werden in der Verwaltung angelegt
(`processed_at IS NULL`). Dieser Service pollt sie, extrahiert den Text,
uebersetzt nach DE+EN und schreibt EINEN Artikel (incident_id=NULL) in
`articles`. Idempotent ueber `processed_at`.
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import re
from typing import Optional
import aiosqlite
from config import DB_PATH, CLAUDE_MODEL_FAST
from agents.claude_client import call_claude
logger = logging.getLogger("osint.pdf_ingest")
MAX_CHARS_PER_PDF = 200_000 # harte Obergrenze, schuetzt vor riesigen Dumps
TRANSLATE_INPUT_MAX = 12_000 # was wir dem LLM zum Uebersetzen geben (Cost-Control)
def _extract_text_pdfplumber(path: str) -> str:
import pdfplumber
parts: list[str] = []
with pdfplumber.open(path) as pdf:
for page in pdf.pages:
t = page.extract_text() or ""
if t:
parts.append(t)
return "\n\n".join(parts).strip()
def _extract_text_ocr(path: str) -> str:
"""Tesseract-Fallback ueber pdf2image -> Pillow -> pytesseract."""
from pdf2image import convert_from_path
import pytesseract
images = convert_from_path(path, dpi=200)
parts = []
for img in images:
# deu+eng zusammen, damit mehrsprachige PDFs gehen
t = pytesseract.image_to_string(img, lang="deu+eng")
if t and t.strip():
parts.append(t.strip())
return "\n\n".join(parts).strip()
def _extract_text(path: str) -> tuple[str, str]:
"""Gibt (text, method) zurueck. method: 'pdfplumber' oder 'ocr'."""
try:
text = _extract_text_pdfplumber(path)
except Exception as e:
logger.warning("pdfplumber-Extraktion fehlgeschlagen fuer %s: %s", path, e)
text = ""
if len(text) >= 50:
return text[:MAX_CHARS_PER_PDF], "pdfplumber"
logger.info("PDF hat keinen Text-Layer (oder <50 Zeichen), versuche OCR: %s", path)
text = _extract_text_ocr(path)
return text[:MAX_CHARS_PER_PDF], "ocr"
def _derive_headline(text: str, fallback: str) -> str:
"""Erste sinnvolle Zeile als Headline; sonst Fallback (Dateiname)."""
for raw in text.splitlines():
line = raw.strip()
if 5 <= len(line) <= 200:
return line
return fallback.strip() or "Untitled PDF"
async def _translate(text: str, headline: str, target_lang: str) -> tuple[str, str]:
"""Uebersetzt Headline + Content nach target_lang ('de' oder 'en').
Eigene mini-Funktion (statt agents.translator), weil wir je PDF nur EIN
Item haben und Headline+Content getrennt brauchen. Returnt (headline_t, content_t).
Bei Fehler oder leerem Text: ('', '').
"""
if not text and not headline:
return "", ""
lang_label = {"de": "Deutsch", "en": "Englisch"}.get(target_lang, target_lang)
content_in = (text or "")[:TRANSLATE_INPUT_MAX]
prompt = f"""Du bist ein praeziser Uebersetzer fuer Sachtexte.
Uebersetze Headline und Inhalt nach {lang_label}.
WICHTIG:
- Verwende IMMER echte UTF-8-Umlaute (ae->ä, oe->ö, ue->ü, ss->ß) bei Deutsch.
- Behalte Eigennamen im Original.
- Wenn der Text schon auf {lang_label} ist, gib ihn (nahezu) unveraendert zurueck.
- Behalte die wichtigsten Inhalte; kuerze stark auf MAX 3000 Zeichen Content.
Antworte AUSSCHLIESSLICH mit einem JSON-Objekt im Format:
{{"headline": "...", "content": "..."}}
Keine Markdown-Codefence, keine Einleitung.
HEADLINE: {headline}
INHALT:
{content_in}
"""
try:
result_text, _usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
except Exception as e:
logger.warning("PDF-Translator (%s) Claude-Call fehlgeschlagen: %s", target_lang, e)
return "", ""
raw = result_text.strip()
if raw.startswith("```"):
raw = re.sub(r"^```(?:json)?\s*", "", raw)
raw = re.sub(r"\s*```\s*$", "", raw).strip()
try:
data = json.loads(raw)
except json.JSONDecodeError:
m = re.search(r"\{.*\}", raw, re.DOTALL)
if not m:
logger.warning("PDF-Translator (%s) JSON nicht parsbar: %r", target_lang, raw[:200])
return "", ""
try:
data = json.loads(m.group(0))
except json.JSONDecodeError:
return "", ""
if not isinstance(data, dict):
return "", ""
return (data.get("headline") or "").strip(), (data.get("content") or "").strip()
async def _process_one(db: aiosqlite.Connection, src: dict) -> None:
sid = src["id"]
name = src["name"] or "PDF"
rel_path = src["pdf_path"]
if not rel_path:
logger.warning("PDF-Source #%d ohne pdf_path, ueberspringe", sid)
return
abs_path = rel_path if os.path.isabs(rel_path) else os.path.join(
os.path.dirname(DB_PATH), rel_path
)
if not os.path.exists(abs_path):
logger.error("PDF-Datei fehlt fuer Source #%d: %s", sid, abs_path)
# auf processed_at setzen aber Notiz hinterlegen, damit kein Endlos-Retry
await db.execute(
"UPDATE sources SET processed_at = CURRENT_TIMESTAMP, "
"notes = COALESCE(notes,'') || ' [PDF-Datei nicht gefunden]' WHERE id = ?",
(sid,),
)
await db.commit()
return
logger.info("PDF-Ingest start: source #%d (%s)", sid, abs_path)
try:
text, method = await asyncio.to_thread(_extract_text, abs_path)
except Exception as e:
logger.exception("PDF-Extraktion fehlgeschlagen fuer #%d: %s", sid, e)
await db.execute(
"UPDATE sources SET processed_at = CURRENT_TIMESTAMP, "
"notes = COALESCE(notes,'') || ' [PDF-Extraktion fehlgeschlagen]' WHERE id = ?",
(sid,),
)
await db.commit()
return
if not text:
logger.warning("PDF #%d ergab keinen Text (auch OCR leer)", sid)
await db.execute(
"UPDATE sources SET processed_at = CURRENT_TIMESTAMP, "
"notes = COALESCE(notes,'') || ' [PDF leer/nicht lesbar]' WHERE id = ?",
(sid,),
)
await db.commit()
return
fallback_name = re.sub(r"\.pdf$", "", os.path.basename(abs_path), flags=re.I)
headline = _derive_headline(text, fallback_name)
# Hochgeladene PDFs sind meist deutsch oder englisch; LLM kann das im Prompt erkennen
src_lang = (src.get("language") or "").lower() or "auto"
# Wir senden parallel DE + EN
(de_h, de_c), (en_h, en_c) = await asyncio.gather(
_translate(text, headline, "de"),
_translate(text, headline, "en"),
)
# Originaltext kappen, damit articles-Tabelle handhabbar bleibt
content_original = text[:5000]
await db.execute(
"""INSERT INTO articles (incident_id, headline, headline_de, headline_en,
source, source_url, content_original, content_de, content_en, language,
published_at, tenant_id, verification_status)
VALUES (NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, NULL, ?, 'unverified')""",
(
headline,
de_h or None,
en_h or None,
name,
f"pdf://{src.get('pdf_sha256') or sid}",
content_original,
de_c or None,
en_c or None,
src_lang if src_lang != "auto" else None,
src.get("tenant_id"),
),
)
await db.execute(
"UPDATE sources SET processed_at = CURRENT_TIMESTAMP, article_count = article_count + 1, "
"last_seen_at = CURRENT_TIMESTAMP WHERE id = ?",
(sid,),
)
await db.commit()
logger.info("PDF-Ingest fertig: source #%d (%s, %d Zeichen)", sid, method, len(text))
async def run_once() -> int:
"""Verarbeitet alle pdf_document-Sources ohne processed_at. Returnt Anzahl.
Wird vom APScheduler als interval-Job aufgerufen. Pro Tick max 5 PDFs,
damit ein hochgeladener Stapel nicht einen einzelnen Lauf monopolisiert.
"""
async with aiosqlite.connect(DB_PATH) as db:
db.row_factory = aiosqlite.Row
cursor = await db.execute(
"SELECT id, name, pdf_path, pdf_sha256, language, tenant_id "
"FROM sources WHERE source_type = 'pdf_document' AND processed_at IS NULL "
"ORDER BY created_at ASC LIMIT 5"
)
rows = [dict(r) for r in await cursor.fetchall()]
for src in rows:
try:
await _process_one(db, src)
except Exception:
logger.exception("PDF-Ingest unerwarteter Fehler bei source #%d", src["id"])
return len(rows)

Datei anzeigen

@@ -555,6 +555,7 @@
<input type="text" id="sources-search" class="timeline-filter-input sources-search-input" placeholder="Suche..." oninput="App.filterSources()" data-i18n-attr="placeholder:sources_modal.search_placeholder">
</div>
<div class="sources-toolbar-actions">
<button class="btn btn-secondary btn-small" onclick="App.openPdfUpload()" style="margin-right:8px;">+ PDF hochladen</button>
<button class="btn btn-primary btn-small" onclick="App.toggleSourceForm()" data-i18n="sources_modal.add_source">+ Quelle</button>
</div>
</div>
@@ -633,6 +634,57 @@
</div>
</div>
<!-- Modal: PDF als Quelle hochladen -->
<div class="modal-overlay" id="modal-pdf-upload" role="dialog" aria-modal="true" aria-labelledby="modal-pdf-upload-title">
<div class="modal">
<div class="modal-header">
<div class="modal-title" id="modal-pdf-upload-title">PDF als Quelle hochladen</div>
<button class="modal-close" onclick="closeModal(&#39;modal-pdf-upload&#39;)" aria-label="Schliessen">&times;</button>
</div>
<form id="pdf-upload-form" enctype="multipart/form-data">
<div class="modal-body">
<p class="text-secondary" style="margin-top:0;">
Die PDF wird gespeichert und im Hintergrund verarbeitet: Text wird extrahiert (OCR-Fallback fuer gescannte Dokumente) und nach Deutsch und Englisch uebersetzt. Sie erscheint danach in Ihrer Quellenliste.
</p>
<div class="form-group">
<label for="pdf-upload-file">PDF-Datei (max. 50 MB)</label>
<input type="file" id="pdf-upload-file" accept="application/pdf,.pdf" required>
</div>
<div class="form-group">
<label for="pdf-upload-name">Anzeige-Name (optional)</label>
<input type="text" id="pdf-upload-name" maxlength="200" placeholder="leer = Dateiname">
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
<div class="form-group">
<label for="pdf-upload-category">Kategorie</label>
<select id="pdf-upload-category">
<option value="sonstige" selected>Sonstige</option>
<option value="behoerde">Behoerde</option>
<option value="think-tank">Think-Tank</option>
<option value="fachmedien">Fachmedien</option>
<option value="international">International</option>
</select>
</div>
<div class="form-group">
<label for="pdf-upload-language">Sprache (optional)</label>
<input type="text" id="pdf-upload-language" placeholder="z.B. Deutsch, Englisch">
</div>
</div>
<div class="form-group">
<label for="pdf-upload-notes">Notizen</label>
<input type="text" id="pdf-upload-notes" placeholder="Optional">
</div>
<div id="pdf-upload-error" class="error-msg" style="display:none"></div>
<div id="pdf-upload-progress" class="text-secondary" style="display:none;margin-top:8px;">Laedt hoch &hellip;</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" onclick="closeModal(&#39;modal-pdf-upload&#39;)">Abbrechen</button>
<button type="submit" class="btn btn-primary" id="pdf-upload-submit">Hochladen</button>
</div>
</form>
</div>
</div>
<!-- Modal: Content-Viewer (wiederverwendbar für Lagebild, Faktencheck, Quellenübersicht, Timeline) -->
<div class="modal-overlay" id="modal-content-viewer" role="dialog" aria-modal="true" aria-labelledby="content-viewer-title">
<div class="modal modal-content-viewer">

Datei anzeigen

@@ -22,6 +22,31 @@ const API = {
};
},
async upload(path, formData) {
const token = localStorage.getItem("osint_token");
const headers = {};
if (token) headers["Authorization"] = `Bearer ${token}`;
const response = await fetch(`${this.baseUrl}${path}`, {
method: "POST",
headers,
body: formData,
});
if (response.status === 401) {
localStorage.removeItem("osint_token");
localStorage.removeItem("osint_username");
window.location.href = "/";
return;
}
if (!response.ok) {
const data = await response.json().catch(() => ({}));
let d = data.detail;
if (Array.isArray(d)) d = d.map(e => e.msg || JSON.stringify(e)).join("; ");
else if (typeof d === "object" && d !== null) d = JSON.stringify(d);
throw new Error(d || `Fehler ${response.status}`);
}
return response.json();
},
async _request(method, path, body = null, externalSignal = null) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 30000);

Datei anzeigen

@@ -3106,6 +3106,70 @@ async handleRefresh() {
_discoveredData: null,
openPdfUpload() {
const form = document.getElementById("pdf-upload-form");
if (form) form.reset();
const err = document.getElementById("pdf-upload-error");
if (err) { err.style.display = "none"; err.textContent = ""; }
const prog = document.getElementById("pdf-upload-progress");
if (prog) prog.style.display = "none";
openModal("modal-pdf-upload");
this._bindPdfUploadFormOnce();
},
_bindPdfUploadFormOnce() {
const form = document.getElementById("pdf-upload-form");
if (!form || form.dataset.bound === "1") return;
form.dataset.bound = "1";
form.addEventListener("submit", async (e) => {
e.preventDefault();
const errEl = document.getElementById("pdf-upload-error");
const progEl = document.getElementById("pdf-upload-progress");
const submitBtn = document.getElementById("pdf-upload-submit");
errEl.style.display = "none";
const fileInput = document.getElementById("pdf-upload-file");
const f = fileInput && fileInput.files && fileInput.files[0];
if (!f) {
errEl.textContent = "Bitte eine PDF-Datei auswaehlen.";
errEl.style.display = "block";
return;
}
if (f.size > 50 * 1024 * 1024) {
errEl.textContent = "Datei ueberschreitet 50 MB.";
errEl.style.display = "block";
return;
}
const fd = new FormData();
fd.append("file", f);
const nm = (document.getElementById("pdf-upload-name").value || "").trim();
if (nm) fd.append("name", nm);
fd.append("category", document.getElementById("pdf-upload-category").value || "sonstige");
const lng = (document.getElementById("pdf-upload-language").value || "").trim();
if (lng) fd.append("language", lng);
const nt = (document.getElementById("pdf-upload-notes").value || "").trim();
if (nt) fd.append("notes", nt);
submitBtn.disabled = true;
progEl.style.display = "block";
try {
await API.upload("/sources/upload-pdf", fd);
closeModal("modal-pdf-upload");
if (typeof UI !== "undefined" && UI.showToast) {
UI.showToast("PDF hochgeladen -- Verarbeitung laeuft im Hintergrund", "success");
}
await App.loadSources();
} catch (err) {
errEl.textContent = err && err.message ? err.message : "Upload fehlgeschlagen";
errEl.style.display = "block";
} finally {
submitBtn.disabled = false;
progEl.style.display = "none";
}
});
},
toggleSourceForm(show) {
const form = document.getElementById('sources-add-form');
if (!form) return;