Export: XMP-Metadatenblock und CreationDate/ModDate via pikepdf nachziehen
WeasyPrint 68.1 schreibt weder XMP noch Create-/ModDate ins PDF. Das Post- Processing via pikepdf ergaenzt beide: - Info-Dict: /CreationDate + /ModDate im PDF-Standardformat (D:YYYYMMDDHHmmSS+HHmm) aus Incident.created_at / updated_at - XMP-Block mit Dublin Core (dc:title, dc:creator, dc:description, dc:subject, dc:language), PDF (pdf:Keywords) und XMP (CreatorTool, CreateDate, ModifyDate, MetadataDate) Namespaces Damit werden die Exporte sowohl von klassischen Tools (Explorer, Finder) als auch von DMS-Systemen (SharePoint, Bridge, Acrobat) vollstaendig indexiert. Fallback: Bei Fehler im Post-Processing wird das Original-PDF zurueckgegeben, Export schlaegt nie fehl.
Dieser Commit ist enthalten in:
@@ -8,6 +8,7 @@ from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pikepdf
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from weasyprint import HTML
|
||||
from docx import Document
|
||||
@@ -536,6 +537,63 @@ def _build_export_metadata(
|
||||
}
|
||||
|
||||
|
||||
def _format_pdf_date(dt: datetime) -> str:
|
||||
"""PDF-Datumsformat: D:YYYYMMDDHHmmSS+HH'mm' (mit Zeitzone) oder Z (UTC)."""
|
||||
if dt.tzinfo is None:
|
||||
# Naive dt — als lokale TIMEZONE interpretieren
|
||||
dt = dt.replace(tzinfo=TIMEZONE)
|
||||
base = dt.strftime("D:%Y%m%d%H%M%S")
|
||||
offset = dt.utcoffset()
|
||||
if offset is None:
|
||||
return base + "Z"
|
||||
total_minutes = int(offset.total_seconds() // 60)
|
||||
sign = "+" if total_minutes >= 0 else "-"
|
||||
total_minutes = abs(total_minutes)
|
||||
return f"{base}{sign}{total_minutes // 60:02d}'{total_minutes % 60:02d}'"
|
||||
|
||||
|
||||
def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
|
||||
"""PDF-Ausgabe um XMP-Metadaten und CreationDate/ModDate erweitern (post-process via pikepdf)."""
|
||||
try:
|
||||
buf_in = io.BytesIO(pdf_bytes)
|
||||
with pikepdf.Pdf.open(buf_in) as pdf:
|
||||
created: datetime = meta.get("created")
|
||||
modified: datetime = meta.get("modified")
|
||||
if created and created.tzinfo is None:
|
||||
created = created.replace(tzinfo=TIMEZONE)
|
||||
if modified and modified.tzinfo is None:
|
||||
modified = modified.replace(tzinfo=TIMEZONE)
|
||||
|
||||
# Klassisches Info-Dict: CreationDate + ModDate nachziehen
|
||||
if created:
|
||||
pdf.docinfo["/CreationDate"] = pikepdf.String(_format_pdf_date(created))
|
||||
if modified:
|
||||
pdf.docinfo["/ModDate"] = pikepdf.String(_format_pdf_date(modified))
|
||||
|
||||
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF Namespaces)
|
||||
with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp:
|
||||
xmp["dc:title"] = meta.get("title", "")
|
||||
xmp["dc:creator"] = [meta.get("author", "")]
|
||||
xmp["dc:description"] = meta.get("subject", "")
|
||||
if meta.get("keywords"):
|
||||
xmp["dc:subject"] = list(meta["keywords"])
|
||||
xmp["dc:language"] = [meta.get("language", "de-DE")]
|
||||
xmp["pdf:Keywords"] = meta.get("keywords_comma", "")
|
||||
xmp["xmp:CreatorTool"] = meta.get("creator_app", "AegisSight Monitor")
|
||||
if created:
|
||||
xmp["xmp:CreateDate"] = created.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||
if modified:
|
||||
xmp["xmp:ModifyDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||
xmp["xmp:MetadataDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||
|
||||
buf_out = io.BytesIO()
|
||||
pdf.save(buf_out)
|
||||
return buf_out.getvalue()
|
||||
except Exception as e:
|
||||
logger.warning(f"PDF-Metadaten-Anreicherung (XMP/Dates) fehlgeschlagen: {e}")
|
||||
return pdf_bytes
|
||||
|
||||
|
||||
async def generate_pdf(
|
||||
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
||||
scope: str, creator: str, executive_summary_html: str,
|
||||
@@ -614,6 +672,7 @@ async def generate_pdf(
|
||||
art["pub_date"] = pub[:10] if pub else ""
|
||||
|
||||
pdf_bytes = HTML(string=html_content).write_pdf()
|
||||
pdf_bytes = _enrich_pdf_metadata(pdf_bytes, meta)
|
||||
return pdf_bytes
|
||||
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren