diff --git a/src/report_generator.py b/src/report_generator.py index 2830bb8..a57c3b3 100644 --- a/src/report_generator.py +++ b/src/report_generator.py @@ -391,10 +391,132 @@ LAGEBILD: return "" +def _parse_db_timestamp(value) -> datetime | None: + """SQLite-Timestamp robust als datetime parsen (ISO oder 'YYYY-MM-DD HH:MM:SS').""" + if not value: + return None + if isinstance(value, datetime): + return value + try: + text = str(value).replace("T", " ").replace("Z", "") + # Sekundenbruchteile und Timezone-Offset abschneiden (python-docx mag nur naive dt) + text = text.split(".")[0].split("+")[0].strip() + return datetime.strptime(text, "%Y-%m-%d %H:%M:%S") + except (ValueError, TypeError): + try: + return datetime.strptime(str(value)[:10], "%Y-%m-%d") + except (ValueError, TypeError): + return None + + +def _slug_scope_label(scope: str, sections: set[str] | None) -> str: + """Scope-Label fuer Metadaten und Dateinamen.""" + if sections: + if sections == {"zusammenfassung"}: + return "Zusammenfassung" + if "timeline" in sections: + return "Vollstaendiger Bericht" + return "Lagebericht" + return {"summary": "Zusammenfassung", "report": "Lagebericht", "full": "Vollstaendiger Bericht"}.get( + scope, "Lagebericht" + ) + + +def _build_export_metadata( + incident: dict, + articles: list, + fact_checks: list, + sources: list, + creator: str, + scope: str, + sections: set[str] | None, + organization_name: str | None, + top_locations: list[str] | None, +) -> dict: + """Einheitlicher Metadaten-Dict fuer PDF (HTML-Meta-Tags) und DOCX (core_properties).""" + is_research = incident.get("type") == "research" + type_label = "Hintergrundrecherche" if is_research else "Live-Monitoring" + category = "OSINT-Hintergrundrecherche" if is_research else "OSINT-Lagebericht" + scope_label = _slug_scope_label(scope, sections) + + title_raw = (incident.get("title") or "Unbenannte Lage").strip() + title = f"{title_raw} — {type_label}" + + subject = (incident.get("description") or "").strip() + if not subject: + subject = f"{type_label} zu: {title_raw}" + + # Keywords sammeln (Reihenfolge relevant für Anzeige, Dedup mit dict.fromkeys) + keywords: list[str] = ["OSINT", type_label] + if organization_name: + keywords.append(organization_name) + + # category_labels ist ein Komma-getrennter String + cat_labels = incident.get("category_labels") or "" + for lbl in cat_labels.split(","): + lbl = lbl.strip() + if lbl: + keywords.append(lbl) + + if top_locations: + keywords.extend([loc for loc in top_locations if loc]) + + # Dedup (case-insensitive) mit Reihenfolge erhalten, max 15 + seen = set() + unique_keywords: list[str] = [] + for kw in keywords: + key = kw.lower() + if key not in seen: + seen.add(key) + unique_keywords.append(kw) + if len(unique_keywords) >= 15: + break + + now = datetime.now(TIMEZONE) + created = _parse_db_timestamp(incident.get("created_at")) or now.replace(tzinfo=None) + modified = _parse_db_timestamp(incident.get("updated_at")) or created + + # Strukturierter Comments-Block (wird in DOCX angezeigt, kompakt) + stand = now.strftime("%d.%m.%Y") + comments_lines = [ + f"Incident-ID: {incident.get('id', '?')} | Typ: {incident.get('type', 'adhoc')} | Scope: {scope_label}", + f"Stand: {stand}", + ] + if organization_name: + comments_lines.append(f"Organisation: {organization_name}") + comments_lines.append( + f"Umfang: {len(articles)} Artikel, {len(fact_checks)} Faktenchecks, {len(sources)} Quellen" + ) + if top_locations: + comments_lines.append("Orte: " + ", ".join(top_locations[:5])) + comments = "\n".join(comments_lines) + + return { + "title": title, + "author": creator or "AegisSight Monitor", + "subject": subject, + "keywords": unique_keywords, + "keywords_comma": ", ".join(unique_keywords), + "keywords_semicolon": "; ".join(unique_keywords), + "category": category, + "comments": comments, + "creator_app": "AegisSight Monitor", + "language": "de-DE", + "created": created, + "modified": modified, + "created_iso": created.strftime("%Y-%m-%dT%H:%M:%S"), + "modified_iso": modified.strftime("%Y-%m-%dT%H:%M:%S"), + "type_label": type_label, + "scope_label": scope_label, + } + + async def generate_pdf( incident: dict, articles: list, fact_checks: list, snapshots: list, scope: str, creator: str, executive_summary_html: str, sections: set[str] | None = None, + organization_name: str | None = None, + top_locations: list[str] | None = None, ) -> bytes: """PDF-Report via WeasyPrint generieren.""" # Sections aus scope ableiten wenn nicht explizit angegeben @@ -424,6 +546,11 @@ async def generate_pdf( if not is_research and zusammenfassung_html: zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources) + meta = _build_export_metadata( + incident, articles, fact_checks, all_sources, creator, scope, sections, + organization_name, top_locations, + ) + env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR))) template = env.get_template("report.html") @@ -449,6 +576,7 @@ async def generate_pdf( source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles), timeline=_prepare_timeline(articles) if scope == "full" else [], articles=articles if scope == "full" else [], + meta=meta, ) # Artikel pub_date aufbereiten @@ -468,6 +596,8 @@ async def generate_docx( incident: dict, articles: list, fact_checks: list, snapshots: list, scope: str, creator: str, executive_summary_text: str, sections: set[str] | None = None, + organization_name: str | None = None, + top_locations: list[str] | None = None, ) -> bytes: """Word-Report via python-docx generieren.""" doc = Document() @@ -496,6 +626,28 @@ async def generate_docx( zusammenfassung_title = "Zusammenfassung" bericht_summary = remaining + meta = _build_export_metadata( + incident, articles, fact_checks, all_sources, creator, scope, sections, + organization_name, top_locations, + ) + + # Dateimetadaten setzen (sichtbar in Explorer/Finder, DMS-Systemen) + cp = doc.core_properties + cp.title = meta["title"] + cp.author = meta["author"] + cp.subject = meta["subject"] + cp.keywords = meta["keywords_semicolon"] + cp.comments = meta["comments"] + cp.category = meta["category"] + cp.last_modified_by = meta["author"] + cp.language = meta["language"] + cp.content_status = "Final" + try: + cp.created = meta["created"] + cp.modified = meta["modified"] + except (ValueError, TypeError) as e: + logger.warning(f"DOCX created/modified konnte nicht gesetzt werden: {e}") + # Styles style = doc.styles['Normal'] style.font.size = Pt(10) diff --git a/src/report_templates/report.html b/src/report_templates/report.html index 793b011..aad78ab 100644 --- a/src/report_templates/report.html +++ b/src/report_templates/report.html @@ -1,7 +1,19 @@ - + +{% if meta %} +{{ meta.title }} + + + + + + + +{% else %} +{{ incident.title }} +{% endif %}