Export: XMP-Metadatenblock und CreationDate/ModDate via pikepdf nachziehen
WeasyPrint 68.1 schreibt weder XMP noch Create-/ModDate ins PDF. Das Post- Processing via pikepdf ergaenzt beide: - Info-Dict: /CreationDate + /ModDate im PDF-Standardformat (D:YYYYMMDDHHmmSS+HHmm) aus Incident.created_at / updated_at - XMP-Block mit Dublin Core (dc:title, dc:creator, dc:description, dc:subject, dc:language), PDF (pdf:Keywords) und XMP (CreatorTool, CreateDate, ModifyDate, MetadataDate) Namespaces Damit werden die Exporte sowohl von klassischen Tools (Explorer, Finder) als auch von DMS-Systemen (SharePoint, Bridge, Acrobat) vollstaendig indexiert. Fallback: Bei Fehler im Post-Processing wird das Original-PDF zurueckgegeben, Export schlaegt nie fehl.
Dieser Commit ist enthalten in:
@@ -11,3 +11,4 @@ python-multipart
|
|||||||
aiosmtplib
|
aiosmtplib
|
||||||
geonamescache>=2.0
|
geonamescache>=2.0
|
||||||
telethon
|
telethon
|
||||||
|
pikepdf>=9.0
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from collections import defaultdict
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pikepdf
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
from weasyprint import HTML
|
from weasyprint import HTML
|
||||||
from docx import Document
|
from docx import Document
|
||||||
@@ -536,6 +537,63 @@ def _build_export_metadata(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pdf_date(dt: datetime) -> str:
|
||||||
|
"""PDF-Datumsformat: D:YYYYMMDDHHmmSS+HH'mm' (mit Zeitzone) oder Z (UTC)."""
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
# Naive dt — als lokale TIMEZONE interpretieren
|
||||||
|
dt = dt.replace(tzinfo=TIMEZONE)
|
||||||
|
base = dt.strftime("D:%Y%m%d%H%M%S")
|
||||||
|
offset = dt.utcoffset()
|
||||||
|
if offset is None:
|
||||||
|
return base + "Z"
|
||||||
|
total_minutes = int(offset.total_seconds() // 60)
|
||||||
|
sign = "+" if total_minutes >= 0 else "-"
|
||||||
|
total_minutes = abs(total_minutes)
|
||||||
|
return f"{base}{sign}{total_minutes // 60:02d}'{total_minutes % 60:02d}'"
|
||||||
|
|
||||||
|
|
||||||
|
def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
|
||||||
|
"""PDF-Ausgabe um XMP-Metadaten und CreationDate/ModDate erweitern (post-process via pikepdf)."""
|
||||||
|
try:
|
||||||
|
buf_in = io.BytesIO(pdf_bytes)
|
||||||
|
with pikepdf.Pdf.open(buf_in) as pdf:
|
||||||
|
created: datetime = meta.get("created")
|
||||||
|
modified: datetime = meta.get("modified")
|
||||||
|
if created and created.tzinfo is None:
|
||||||
|
created = created.replace(tzinfo=TIMEZONE)
|
||||||
|
if modified and modified.tzinfo is None:
|
||||||
|
modified = modified.replace(tzinfo=TIMEZONE)
|
||||||
|
|
||||||
|
# Klassisches Info-Dict: CreationDate + ModDate nachziehen
|
||||||
|
if created:
|
||||||
|
pdf.docinfo["/CreationDate"] = pikepdf.String(_format_pdf_date(created))
|
||||||
|
if modified:
|
||||||
|
pdf.docinfo["/ModDate"] = pikepdf.String(_format_pdf_date(modified))
|
||||||
|
|
||||||
|
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF Namespaces)
|
||||||
|
with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp:
|
||||||
|
xmp["dc:title"] = meta.get("title", "")
|
||||||
|
xmp["dc:creator"] = [meta.get("author", "")]
|
||||||
|
xmp["dc:description"] = meta.get("subject", "")
|
||||||
|
if meta.get("keywords"):
|
||||||
|
xmp["dc:subject"] = list(meta["keywords"])
|
||||||
|
xmp["dc:language"] = [meta.get("language", "de-DE")]
|
||||||
|
xmp["pdf:Keywords"] = meta.get("keywords_comma", "")
|
||||||
|
xmp["xmp:CreatorTool"] = meta.get("creator_app", "AegisSight Monitor")
|
||||||
|
if created:
|
||||||
|
xmp["xmp:CreateDate"] = created.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
if modified:
|
||||||
|
xmp["xmp:ModifyDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
xmp["xmp:MetadataDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
|
||||||
|
buf_out = io.BytesIO()
|
||||||
|
pdf.save(buf_out)
|
||||||
|
return buf_out.getvalue()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"PDF-Metadaten-Anreicherung (XMP/Dates) fehlgeschlagen: {e}")
|
||||||
|
return pdf_bytes
|
||||||
|
|
||||||
|
|
||||||
async def generate_pdf(
|
async def generate_pdf(
|
||||||
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
incident: dict, articles: list, fact_checks: list, snapshots: list,
|
||||||
scope: str, creator: str, executive_summary_html: str,
|
scope: str, creator: str, executive_summary_html: str,
|
||||||
@@ -614,6 +672,7 @@ async def generate_pdf(
|
|||||||
art["pub_date"] = pub[:10] if pub else ""
|
art["pub_date"] = pub[:10] if pub else ""
|
||||||
|
|
||||||
pdf_bytes = HTML(string=html_content).write_pdf()
|
pdf_bytes = HTML(string=html_content).write_pdf()
|
||||||
|
pdf_bytes = _enrich_pdf_metadata(pdf_bytes, meta)
|
||||||
return pdf_bytes
|
return pdf_bytes
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren