Export-Metadaten: Dublin Core, xmpRights und xmpMM nachruesten

Zusaetzliche XMP-Felder im PDF:
- dc:publisher (Organisation, Fallback AegisSight)
- dc:identifier (urn:aegissight:incident:<id>:<timestamp>)
- dc:date (Dokumentendatum, ergaenzend zu xmp:CreateDate)
- dc:format (application/pdf)
- dc:type (Report)
- dc:rights (Vertraulichkeitshinweis)
- pdf:Producer im XMP gespiegelt
- xmpRights:Marked (True) und xmpRights:UsageTerms (= dc:rights)
- xmpMM:DocumentID + xmpMM:InstanceID (UUIDs, frisch pro Export)

Damit koennen DMS-Systeme die Berichte versionieren, eindeutig
identifizieren und Vertraulichkeitshinweise anzeigen.
Dieser Commit ist enthalten in:
claude-dev
2026-04-20 19:23:54 +00:00
Ursprung 949df868ff
Commit 5add8d9d59

Datei anzeigen

@@ -4,6 +4,7 @@ import io
import json import json
import logging import logging
import re import re
import uuid
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@@ -517,6 +518,13 @@ def _build_export_metadata(
comments_lines.append("Orte: " + ", ".join(top_locations[:5])) comments_lines.append("Orte: " + ", ".join(top_locations[:5]))
comments = "\n".join(comments_lines) comments = "\n".join(comments_lines)
publisher = organization_name or "AegisSight"
identifier = f"urn:aegissight:incident:{incident.get('id', '0')}:{now.strftime('%Y%m%dT%H%M%S')}"
rights = (
"Vertrauliche Lageanalyse — AegisSight Monitor. "
"Weitergabe nur an autorisierte Empfaenger."
)
return { return {
"title": title, "title": title,
"author": creator or "AegisSight Monitor", "author": creator or "AegisSight Monitor",
@@ -534,6 +542,10 @@ def _build_export_metadata(
"modified_iso": modified.strftime("%Y-%m-%dT%H:%M:%S"), "modified_iso": modified.strftime("%Y-%m-%dT%H:%M:%S"),
"type_label": type_label, "type_label": type_label,
"scope_label": scope_label, "scope_label": scope_label,
"publisher": publisher,
"identifier": identifier,
"rights": rights,
"doc_type": "Report",
} }
@@ -570,15 +582,32 @@ def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
if modified: if modified:
pdf.docinfo["/ModDate"] = pikepdf.String(_format_pdf_date(modified)) pdf.docinfo["/ModDate"] = pikepdf.String(_format_pdf_date(modified))
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF Namespaces) # Document-/Instance-ID fuer DMS-Versionierung (frisch pro Export)
doc_uuid = f"uuid:{uuid.uuid4()}"
instance_uuid = f"uuid:{uuid.uuid4()}"
# XMP-Metadatenblock schreiben (Dublin Core + XMP + PDF + xmpRights + xmpMM)
with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp: with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp:
# Dublin Core
xmp["dc:title"] = meta.get("title", "") xmp["dc:title"] = meta.get("title", "")
xmp["dc:creator"] = [meta.get("author", "")] xmp["dc:creator"] = [meta.get("author", "")]
xmp["dc:description"] = meta.get("subject", "") xmp["dc:description"] = meta.get("subject", "")
if meta.get("keywords"): if meta.get("keywords"):
xmp["dc:subject"] = list(meta["keywords"]) xmp["dc:subject"] = list(meta["keywords"])
xmp["dc:language"] = [meta.get("language", "de-DE")] xmp["dc:language"] = [meta.get("language", "de-DE")]
xmp["dc:publisher"] = [meta.get("publisher", "AegisSight")]
xmp["dc:identifier"] = meta.get("identifier", "")
xmp["dc:format"] = "application/pdf"
xmp["dc:type"] = [meta.get("doc_type", "Report")]
xmp["dc:rights"] = meta.get("rights", "")
if created:
xmp["dc:date"] = [created.strftime("%Y-%m-%dT%H:%M:%S%z")]
# PDF Namespace
xmp["pdf:Keywords"] = meta.get("keywords_comma", "") xmp["pdf:Keywords"] = meta.get("keywords_comma", "")
xmp["pdf:Producer"] = "WeasyPrint + AegisSight Monitor"
# XMP Namespace
xmp["xmp:CreatorTool"] = meta.get("creator_app", "AegisSight Monitor") xmp["xmp:CreatorTool"] = meta.get("creator_app", "AegisSight Monitor")
if created: if created:
xmp["xmp:CreateDate"] = created.strftime("%Y-%m-%dT%H:%M:%S%z") xmp["xmp:CreateDate"] = created.strftime("%Y-%m-%dT%H:%M:%S%z")
@@ -586,6 +615,16 @@ def _enrich_pdf_metadata(pdf_bytes: bytes, meta: dict) -> bytes:
xmp["xmp:ModifyDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z") xmp["xmp:ModifyDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
xmp["xmp:MetadataDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z") xmp["xmp:MetadataDate"] = modified.strftime("%Y-%m-%dT%H:%M:%S%z")
# xmpRights: Rechte- und Vertraulichkeitshinweis (XMP erwartet String "True")
xmp["xmpRights:Marked"] = "True"
if meta.get("rights"):
# String: pikepdf wrapped das automatisch als LangAlt mit x-default
xmp["xmpRights:UsageTerms"] = meta["rights"]
# xmpMM: Document- und Instance-ID fuer DMS-Versionierung
xmp["xmpMM:DocumentID"] = doc_uuid
xmp["xmpMM:InstanceID"] = instance_uuid
buf_out = io.BytesIO() buf_out = io.BytesIO()
pdf.save(buf_out) pdf.save(buf_out)
return buf_out.getvalue() return buf_out.getvalue()