Export: PDF/DOCX-Dateimetadaten (Title, Author, Subject, Keywords, Category, Comments)
- Neue Helper-Funktion _build_export_metadata baut einheitliches Metadaten-Dict - PDF via HTML-Meta-Tags (title, author, description, keywords, generator, lang) - DOCX via doc.core_properties (title, author, subject, keywords, comments, category, last_modified_by, language, content_status, created, modified) - Keywords aus OSINT + Typ + Organisation + category_labels + Top-5-Orten - Comments-Feld mit strukturiertem Block (Incident-ID, Typ, Scope, Umfang, Orte) - Router laedt Organisation + Top-Orte aus article_locations und reicht sie durch
Dieser Commit ist enthalten in:
@@ -391,10 +391,132 @@ LAGEBILD:
|
|||||||
return "<ul><li>Zusammenfassung konnte nicht generiert werden.</li></ul>"
|
return "<ul><li>Zusammenfassung konnte nicht generiert werden.</li></ul>"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_db_timestamp(value) -> datetime | None:
|
||||||
|
"""SQLite-Timestamp robust als datetime parsen (ISO oder 'YYYY-MM-DD HH:MM:SS')."""
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value
|
||||||
|
try:
|
||||||
|
text = str(value).replace("T", " ").replace("Z", "")
|
||||||
|
# Sekundenbruchteile und Timezone-Offset abschneiden (python-docx mag nur naive dt)
|
||||||
|
text = text.split(".")[0].split("+")[0].strip()
|
||||||
|
return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(str(value)[:10], "%Y-%m-%d")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _slug_scope_label(scope: str, sections: set[str] | None) -> str:
|
||||||
|
"""Scope-Label fuer Metadaten und Dateinamen."""
|
||||||
|
if sections:
|
||||||
|
if sections == {"zusammenfassung"}:
|
||||||
|
return "Zusammenfassung"
|
||||||
|
if "timeline" in sections:
|
||||||
|
return "Vollstaendiger Bericht"
|
||||||
|
return "Lagebericht"
|
||||||
|
return {"summary": "Zusammenfassung", "report": "Lagebericht", "full": "Vollstaendiger Bericht"}.get(
|
||||||
|
scope, "Lagebericht"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_export_metadata(
|
||||||
|
incident: dict,
|
||||||
|
articles: list,
|
||||||
|
fact_checks: list,
|
||||||
|
sources: list,
|
||||||
|
creator: str,
|
||||||
|
scope: str,
|
||||||
|
sections: set[str] | None,
|
||||||
|
organization_name: str | None,
|
||||||
|
top_locations: list[str] | None,
|
||||||
|
) -> dict:
|
||||||
|
"""Einheitlicher Metadaten-Dict fuer PDF (HTML-Meta-Tags) und DOCX (core_properties)."""
|
||||||
|
is_research = incident.get("type") == "research"
|
||||||
|
type_label = "Hintergrundrecherche" if is_research else "Live-Monitoring"
|
||||||
|
category = "OSINT-Hintergrundrecherche" if is_research else "OSINT-Lagebericht"
|
||||||
|
scope_label = _slug_scope_label(scope, sections)
|
||||||
|
|
||||||
|
title_raw = (incident.get("title") or "Unbenannte Lage").strip()
|
||||||
|
title = f"{title_raw} — {type_label}"
|
||||||
|
|
||||||
|
subject = (incident.get("description") or "").strip()
|
||||||
|
if not subject:
|
||||||
|
subject = f"{type_label} zu: {title_raw}"
|
||||||
|
|
||||||
|
# Keywords sammeln (Reihenfolge relevant für Anzeige, Dedup mit dict.fromkeys)
|
||||||
|
keywords: list[str] = ["OSINT", type_label]
|
||||||
|
if organization_name:
|
||||||
|
keywords.append(organization_name)
|
||||||
|
|
||||||
|
# category_labels ist ein Komma-getrennter String
|
||||||
|
cat_labels = incident.get("category_labels") or ""
|
||||||
|
for lbl in cat_labels.split(","):
|
||||||
|
lbl = lbl.strip()
|
||||||
|
if lbl:
|
||||||
|
keywords.append(lbl)
|
||||||
|
|
||||||
|
if top_locations:
|
||||||
|
keywords.extend([loc for loc in top_locations if loc])
|
||||||
|
|
||||||
|
# Dedup (case-insensitive) mit Reihenfolge erhalten, max 15
|
||||||
|
seen = set()
|
||||||
|
unique_keywords: list[str] = []
|
||||||
|
for kw in keywords:
|
||||||
|
key = kw.lower()
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_keywords.append(kw)
|
||||||
|
if len(unique_keywords) >= 15:
|
||||||
|
break
|
||||||
|
|
||||||
|
now = datetime.now(TIMEZONE)
|
||||||
|
created = _parse_db_timestamp(incident.get("created_at")) or now.replace(tzinfo=None)
|
||||||
|
modified = _parse_db_timestamp(incident.get("updated_at")) or created
|
||||||
|
|
||||||
|
# Strukturierter Comments-Block (wird in DOCX angezeigt, kompakt)
|
||||||
|
stand = now.strftime("%d.%m.%Y")
|
||||||
|
comments_lines = [
|
||||||
|
f"Incident-ID: {incident.get('id', '?')} | Typ: {incident.get('type', 'adhoc')} | Scope: {scope_label}",
|
||||||
|
f"Stand: {stand}",
|
||||||
|
]
|
||||||
|
if organization_name:
|
||||||
|
comments_lines.append(f"Organisation: {organization_name}")
|
||||||
|
comments_lines.append(
|
||||||
|
f"Umfang: {len(articles)} Artikel, {len(fact_checks)} Faktenchecks, {len(sources)} Quellen"
|
||||||
|
)
|
||||||
|
if top_locations:
|
||||||
|
comments_lines.append("Orte: " + ", ".join(top_locations[:5]))
|
||||||
|
comments = "\n".join(comments_lines)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"author": creator or "AegisSight Monitor",
|
||||||
|
"subject": subject,
|
||||||
|
"keywords": unique_keywords,
|
||||||
|
"keywords_comma": ", ".join(unique_keywords),
|
||||||
|
"keywords_semicolon": "; ".join(unique_keywords),
|
||||||
|
"category": category,
|
||||||
|
"comments": comments,
|
||||||
|
"creator_app": "AegisSight Monitor",
|
||||||
|
"language": "de-DE",
|
||||||
|
"created": created,
|
||||||
|
"modified": modified,
|
||||||
|
"created_iso": created.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||||
|
"modified_iso": modified.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||||
|
"type_label": type_label,
|
||||||
|
"scope_label": scope_label,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def generate_pdf(
|
async def generate_pdf(
|
||||||
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
||||||
scope: str, creator: str, executive_summary_html: str,
|
scope: str, creator: str, executive_summary_html: str,
|
||||||
sections: set[str] | None = None,
|
sections: set[str] | None = None,
|
||||||
|
organization_name: str | None = None,
|
||||||
|
top_locations: list[str] | None = None,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""PDF-Report via WeasyPrint generieren."""
|
"""PDF-Report via WeasyPrint generieren."""
|
||||||
# Sections aus scope ableiten wenn nicht explizit angegeben
|
# Sections aus scope ableiten wenn nicht explizit angegeben
|
||||||
@@ -424,6 +546,11 @@ async def generate_pdf(
|
|||||||
if not is_research and zusammenfassung_html:
|
if not is_research and zusammenfassung_html:
|
||||||
zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)
|
zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)
|
||||||
|
|
||||||
|
meta = _build_export_metadata(
|
||||||
|
incident, articles, fact_checks, all_sources, creator, scope, sections,
|
||||||
|
organization_name, top_locations,
|
||||||
|
)
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
||||||
template = env.get_template("report.html")
|
template = env.get_template("report.html")
|
||||||
|
|
||||||
@@ -449,6 +576,7 @@ async def generate_pdf(
|
|||||||
source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles),
|
source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles),
|
||||||
timeline=_prepare_timeline(articles) if scope == "full" else [],
|
timeline=_prepare_timeline(articles) if scope == "full" else [],
|
||||||
articles=articles if scope == "full" else [],
|
articles=articles if scope == "full" else [],
|
||||||
|
meta=meta,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Artikel pub_date aufbereiten
|
# Artikel pub_date aufbereiten
|
||||||
@@ -468,6 +596,8 @@ async def generate_docx(
|
|||||||
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
||||||
scope: str, creator: str, executive_summary_text: str,
|
scope: str, creator: str, executive_summary_text: str,
|
||||||
sections: set[str] | None = None,
|
sections: set[str] | None = None,
|
||||||
|
organization_name: str | None = None,
|
||||||
|
top_locations: list[str] | None = None,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Word-Report via python-docx generieren."""
|
"""Word-Report via python-docx generieren."""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
@@ -496,6 +626,28 @@ async def generate_docx(
|
|||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
bericht_summary = remaining
|
bericht_summary = remaining
|
||||||
|
|
||||||
|
meta = _build_export_metadata(
|
||||||
|
incident, articles, fact_checks, all_sources, creator, scope, sections,
|
||||||
|
organization_name, top_locations,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Dateimetadaten setzen (sichtbar in Explorer/Finder, DMS-Systemen)
|
||||||
|
cp = doc.core_properties
|
||||||
|
cp.title = meta["title"]
|
||||||
|
cp.author = meta["author"]
|
||||||
|
cp.subject = meta["subject"]
|
||||||
|
cp.keywords = meta["keywords_semicolon"]
|
||||||
|
cp.comments = meta["comments"]
|
||||||
|
cp.category = meta["category"]
|
||||||
|
cp.last_modified_by = meta["author"]
|
||||||
|
cp.language = meta["language"]
|
||||||
|
cp.content_status = "Final"
|
||||||
|
try:
|
||||||
|
cp.created = meta["created"]
|
||||||
|
cp.modified = meta["modified"]
|
||||||
|
except (ValueError, TypeError) as e:
|
||||||
|
logger.warning(f"DOCX created/modified konnte nicht gesetzt werden: {e}")
|
||||||
|
|
||||||
# Styles
|
# Styles
|
||||||
style = doc.styles['Normal']
|
style = doc.styles['Normal']
|
||||||
style.font.size = Pt(10)
|
style.font.size = Pt(10)
|
||||||
|
|||||||
@@ -1,7 +1,19 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="de">
|
<html lang="{{ meta.language if meta else 'de-DE' }}">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
|
{% if meta %}
|
||||||
|
<title>{{ meta.title }}</title>
|
||||||
|
<meta name="author" content="{{ meta.author }}">
|
||||||
|
<meta name="description" content="{{ meta.subject }}">
|
||||||
|
<meta name="keywords" content="{{ meta.keywords_comma }}">
|
||||||
|
<meta name="subject" content="{{ meta.subject }}">
|
||||||
|
<meta name="generator" content="{{ meta.creator_app }}">
|
||||||
|
<meta name="dcterms.created" content="{{ meta.created_iso }}">
|
||||||
|
<meta name="dcterms.modified" content="{{ meta.modified_iso }}">
|
||||||
|
{% else %}
|
||||||
|
<title>{{ incident.title }}</title>
|
||||||
|
{% endif %}
|
||||||
<style>
|
<style>
|
||||||
@page { margin: 20mm 18mm 20mm 18mm; size: A4; @bottom-center { content: "Seite " counter(page) " von " counter(pages); font-size: 8pt; color: #0a1832; } }
|
@page { margin: 20mm 18mm 20mm 18mm; size: A4; @bottom-center { content: "Seite " counter(page) " von " counter(pages); font-size: 8pt; color: #0a1832; } }
|
||||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
|||||||
@@ -988,6 +988,27 @@ async def export_incident(
|
|||||||
user_row = await cursor.fetchone()
|
user_row = await cursor.fetchone()
|
||||||
creator = user_row["email"] if user_row else "Unbekannt"
|
creator = user_row["email"] if user_row else "Unbekannt"
|
||||||
|
|
||||||
|
# Organisation (fuer Dateimetadaten)
|
||||||
|
organization_name = None
|
||||||
|
if incident.get("tenant_id"):
|
||||||
|
cursor = await db.execute(
|
||||||
|
"SELECT name FROM organizations WHERE id = ?", (incident["tenant_id"],)
|
||||||
|
)
|
||||||
|
org_row = await cursor.fetchone()
|
||||||
|
organization_name = org_row["name"] if org_row else None
|
||||||
|
|
||||||
|
# Top-Orte (fuer Keyword-Metadaten)
|
||||||
|
cursor = await db.execute(
|
||||||
|
"""SELECT location_name, COUNT(*) AS cnt
|
||||||
|
FROM article_locations
|
||||||
|
WHERE incident_id = ?
|
||||||
|
GROUP BY COALESCE(location_name_normalized, location_name)
|
||||||
|
ORDER BY cnt DESC
|
||||||
|
LIMIT 5""",
|
||||||
|
(incident_id,),
|
||||||
|
)
|
||||||
|
top_locations = [r["location_name"] for r in await cursor.fetchall() if r["location_name"]]
|
||||||
|
|
||||||
# Artikel
|
# Artikel
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
|
"SELECT * FROM articles WHERE incident_id = ? ORDER BY collected_at DESC",
|
||||||
@@ -1037,7 +1058,12 @@ async def export_incident(
|
|||||||
scope_labels_key = scope_labels.get(scope, "lagebericht")
|
scope_labels_key = scope_labels.get(scope, "lagebericht")
|
||||||
|
|
||||||
if format == "pdf":
|
if format == "pdf":
|
||||||
pdf_bytes = await generate_pdf(incident, articles, fact_checks, snapshots, scope, creator, exec_summary, sections=sections_set)
|
pdf_bytes = await generate_pdf(
|
||||||
|
incident, articles, fact_checks, snapshots, scope, creator, exec_summary,
|
||||||
|
sections=sections_set,
|
||||||
|
organization_name=organization_name,
|
||||||
|
top_locations=top_locations,
|
||||||
|
)
|
||||||
filename = f"{slug}_{scope_labels_key}_{date_str}.pdf"
|
filename = f"{slug}_{scope_labels_key}_{date_str}.pdf"
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
io.BytesIO(pdf_bytes),
|
io.BytesIO(pdf_bytes),
|
||||||
@@ -1045,7 +1071,12 @@ async def export_incident(
|
|||||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
docx_bytes = await generate_docx(incident, articles, fact_checks, snapshots, scope, creator, exec_summary, sections=sections_set)
|
docx_bytes = await generate_docx(
|
||||||
|
incident, articles, fact_checks, snapshots, scope, creator, exec_summary,
|
||||||
|
sections=sections_set,
|
||||||
|
organization_name=organization_name,
|
||||||
|
top_locations=top_locations,
|
||||||
|
)
|
||||||
filename = f"{slug}_{scope_labels_key}_{date_str}.docx"
|
filename = f"{slug}_{scope_labels_key}_{date_str}.docx"
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
io.BytesIO(docx_bytes),
|
io.BytesIO(docx_bytes),
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren