Export-Metadaten: Dublin Core, xmpRights und xmpMM nachruesten

Zusaetzliche XMP-Felder im PDF:
- dc:publisher (Organisation, Fallback AegisSight)
- dc:identifier (urn:aegissight:incident:<id>:<timestamp>)
- dc:date (Dokumentendatum, ergaenzend zu xmp:CreateDate)
- dc:format (application/pdf)
- dc:type (Report)
- dc:rights (Vertraulichkeitshinweis)
- pdf:Producer im XMP gespiegelt
- xmpRights:Marked (True) und xmpRights:UsageTerms (= dc:rights)
- xmpMM:DocumentID + xmpMM:InstanceID (UUIDs, frisch pro Export)

Damit koennen DMS-Systeme die Berichte versionieren, eindeutig
identifizieren und Vertraulichkeitshinweise anzeigen.
Dieser Commit ist enthalten in:
claude-dev
2026-04-20 19:23:54 +00:00
Ursprung 949df868ff
Commit 5add8d9d59

Datei anzeigen

@@ -4,6 +4,7 @@ import io
import json
import logging
import re
import uuid
from collections import defaultdict
from datetime import datetime
from pathlib import Path
@@ -517,6 +518,13 @@ def _build_export_metadata(
comments_lines.append("Orte: " + ", ".join(top_locations[:5]))
comments = "\n".join(comments_lines)
publisher = organization_name or "AegisSight"
identifier = f"urn:aegissight:incident:{incident.get('id', '0')}:{now.strftime('%Y%m%dT%H%M%S')}"
rights = (
"Vertrauliche Lageanalyse — AegisSight Monitor. "
"Weitergabe nur an autorisierte Empfaenger."
)
return {
"title": title,
"author": creator or "AegisSight Monitor",
@@ -534,6 +542,10 @@ def _build_export_metadata(
"modified_iso": modified.strftime("%Y-%m-%dT%H:%M:%S"),
"type_label": type_label,
"scope_label": scope_label,
"publisher": publisher,
"identifier": identifier,
"rights": rights,
"doc_type": "Report",
}
@@ -570,15 +582,32 @@ def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
if modified:
pdf.docinfo["/ModDate"] = pikepdf.String(_format_pdf_date(modified))
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF Namespaces)
# Document-/Instance-ID fuer DMS-Versionierung (frisch pro Export)
doc_uuid = f"uuid:{uuid.uuid4()}"
instance_uuid = f"uuid:{uuid.uuid4()}"
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF + xmpRights + xmpMM)
with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp:
# Dublin Core
xmp["dc:title"] = meta.get("title", "")
xmp["dc:creator"] = [meta.get("author", "")]
xmp["dc:description"] = meta.get("subject", "")
if meta.get("keywords"):
xmp["dc:subject"] = list(meta["keywords"])
xmp["dc:language"] = [meta.get("language", "de-DE")]
xmp["dc:publisher"] = [meta.get("publisher", "AegisSight")]
xmp["dc:identifier"] = meta.get("identifier", "")
xmp["dc:format"] = "application/pdf"
xmp["dc:type"] = [meta.get("doc_type", "Report")]
xmp["dc:rights"] = meta.get("rights", "")
if created:
xmp["dc:date"] = [created.strftime("%Y-%m-%dT%H:%M:%S%z")]
# PDF Namespace
xmp["pdf:Keywords"] = meta.get("keywords_comma", "")
xmp["pdf:Producer"] = "WeasyPrint + AegisSight Monitor"
# XMP Namespace
xmp["xmp:CreatorTool"] = meta.get("creator_app", "AegisSight Monitor")
if created:
xmp["xmp:CreateDate"] = created.strftime("%Y-%m-%dT%H:%M:%S%z")
@@ -586,6 +615,16 @@ def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
xmp["xmp:ModifyDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
xmp["xmp:MetadataDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
# xmpRights: Rechte- und Vertraulichkeitshinweis (XMP erwartet String "True")
xmp["xmpRights:Marked"] = "True"
if meta.get("rights"):
# String: pikepdf wrapped das automatisch als LangAlt mit x-default
xmp["xmpRights:UsageTerms"] = meta["rights"]
# xmpMM: Document- und Instance-ID fuer DMS-Versionierung
xmp["xmpMM:DocumentID"] = doc_uuid
xmp["xmpMM:InstanceID"] = instance_uuid
buf_out = io.BytesIO()
pdf.save(buf_out)
return buf_out.getvalue()