2026-06-22 08:51:31 +02:00
--- a/src/report_generator.py
+++ b/src/report_generator.py
@@ -7,6 +7,7 @@ import re
 import uuid
 from collections import defaultdict
 from datetime import datetime
+from html import escape as _html_escape
 from pathlib import Path

 import pikepdf
@@ -153,6 +154,66 @@ def _markdown_to_html(text: str) -> str:
    return '\n'.join(result)


+def _parse_developments_for_export(text: str) -> list[tuple[str, str]]:
+    """Parst die 'Neuesten Entwicklungen' (latest_developments) fuer den Export.
+
+    Eingabeformat je Eintrag: '- [DD.MM. HH:MM] Text {Quelle|URL, ...}'.
+    Liefert (datum_label, body) je Eintrag in gespeicherter Reihenfolge.
+    Quellen-Klammern und [N]-Zitate werden entfernt — der Export zeigt bewusst
+    KEINE Links. Das gespeicherte Format enthaelt kein Jahr; fehlt es, wird das
+    aktuelle Jahr ergaenzt (Live-Monitoring-Berichte sind tagesaktuell).
+    """
+    if not text:
+        return []
+    year2 = datetime.now(TIMEZONE).strftime("%y")
+    bullet_re = re.compile(
+        r"^\s*(?:[-*•]\s*)?\[\s*(\d{1,2})\.(\d{1,2})\.?(?:(\d{2,4}))?\s+(\d{1,2}:\d{2})\s*\]\s*(.+?)\s*$"
+    )
+    trailing_braces = re.compile(r"\s*\{[^{}]*\}\s*\.?\s*$")
+    citation_re = re.compile(r"\s*\[\d{1,5}[a-z]?\]")
+    result: list[tuple[str, str]] = []
+    for raw in text.splitlines():
+        line = raw.strip()
+        if not line:
+            continue
+        m = bullet_re.match(line)
+        if not m:
+            continue
+        day, month, year, time = m.group(1), m.group(2), m.group(3), m.group(4)
+        body = m.group(5).strip()
+        # Quellen-Klammer am Ende und Inline-[N]-Zitate entfernen (keine Links)
+        body = trailing_braces.sub("", body).strip()
+        body = citation_re.sub("", body).strip()
+        if not body:
+            continue
+        yy = year[-2:] if year else year2
+        label = f"{int(day):02d}.{int(month):02d}.{yy}, {time} Uhr"
+        result.append((label, body))
+    return result
+
+
+def _format_latest_developments_html(text: str) -> str:
+    """Rendert die 'Neuesten Entwicklungen' als HTML-Block fuer den PDF-Export.
+
+    Pro Eintrag: Datum/Uhrzeit-Zeile, darunter (eigener Absatz) der Meldungstext.
+    Keine Quellen-Links. Faellt bei nicht-parsebarem Text auf _markdown_to_html zurueck.
+    """
+    pairs = _parse_developments_for_export(text)
+    if not pairs:
+        return _markdown_to_html(text)
+    blocks = []
+    for label, body in pairs:
+        body_html = _html_escape(body)
+        body_html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', body_html)
+        blocks.append(
+            '<div class="dev-entry">'
+            f'<div class="dev-entry-date">{_html_escape(label)}</div>'
+            f'<div class="dev-entry-body">{body_html}</div>'
+            '</div>'
+        )
+    return "\n".join(blocks)
+
+
 def _truncate_lagebild(summary_text: str, max_chars: int = 4000) -> str:
    """Lagebild für den Lagebericht auf die Zusammenfassung kürzen.

@@ -479,6 +540,10 @@ def _build_export_metadata(
    subject = (incident.get("description") or "").strip()
    if not subject:
        subject = f"{type_label} zu: {title_raw}"
+    # DOCX-Core-Property "subject" erzwingt ein 255-Zeichen-Limit; laengere
+    # Beschreibungen wuerden den Word-Export sonst mit ValueError abbrechen.
+    if len(subject) > 255:
+        subject = subject[:255]

    # Keywords sammeln (Reihenfolge relevant für Anzeige, Dedup mit dict.fromkeys)
    keywords: list[str] = ["OSINT", type_label]
@@ -711,22 +776,33 @@ async def generate_pdf(
        else:  # full
            sections = {"zusammenfassung", "bericht", "faktencheck", "quellen", "timeline"}

-    # Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
+    # Zusammenfassungs-Quelle bestimmen:
+    # - Research: ZUSAMMENFASSUNG/UEBERBLICK aus dem Bericht extrahieren.
+    # - Live-Monitoring (adhoc): "Neueste Entwicklungen" aus latest_developments,
+    #   ohne Quellen-Links, Datum/Uhrzeit als eigene Zeile.
+    # - sonst: KI-Executive-Summary (executive_summary_html).
    is_research = incident.get("type") == "research"
    all_sources = _prepare_sources(incident)
+    latest_dev = (incident.get("latest_developments") or "").strip()
    zusammenfassung_html = executive_summary_html
    bericht_summary = incident.get("summary", "")
    zusammenfassung_title = "Zusammenfassung"
+    summary_has_links = True

    if is_research and bericht_summary:
        extracted_html, remaining = _extract_zusammenfassung(bericht_summary, all_sources)
        if extracted_html:
            zusammenfassung_html = extracted_html
-            zusammenfassung_title = "Zusammenfassung"
            bericht_summary = remaining
+    elif not is_research and latest_dev:
+        dev_html = _format_latest_developments_html(latest_dev)
+        if dev_html:
+            zusammenfassung_html = dev_html
+            zusammenfassung_title = "Neueste Entwicklungen"
+            summary_has_links = False  # Quellen bewusst entfernt

-    # Auch das (nicht-research) Executive Summary linkifizieren — ggf. enthaelt es Zitate
-    if not is_research and zusammenfassung_html:
+    # KI-/Research-Zusammenfassung linkifizieren; Developments bleiben linkfrei
+    if not is_research and summary_has_links and zusammenfassung_html:
        zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)

    meta = _build_export_metadata(
@@ -799,20 +875,27 @@ async def generate_docx(
        else:  # full
            sections = {"zusammenfassung", "bericht", "faktencheck", "quellen", "timeline"}

-    # Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
+    # Zusammenfassungs-Quelle bestimmen (analog generate_pdf):
+    # Research -> Bericht-Extrakt, Live-Monitoring -> "Neueste Entwicklungen", sonst KI.
    is_research = incident.get("type") == "research"
    all_sources = _prepare_sources(incident)
+    latest_dev = (incident.get("latest_developments") or "").strip()
    zusammenfassung_text = executive_summary_text
    bericht_summary = incident.get("summary") or "Keine Zusammenfassung verfügbar."
    zusammenfassung_title = "Zusammenfassung"
    zusammenfassung_lines: list[str] = []
+    zusammenfassung_developments: list[tuple[str, str]] = []

    if is_research and bericht_summary:
        extracted_lines, remaining = _extract_zusammenfassung_lines(bericht_summary)
        if extracted_lines:
            zusammenfassung_lines = extracted_lines
-            zusammenfassung_title = "Zusammenfassung"
            bericht_summary = remaining
+    elif not is_research and latest_dev:
+        dev_pairs = _parse_developments_for_export(latest_dev)
+        if dev_pairs:
+            zusammenfassung_developments = dev_pairs
+            zusammenfassung_title = "Neueste Entwicklungen"

    meta = _build_export_metadata(
        incident, articles, fact_checks, all_sources, creator, scope, sections,
@@ -890,11 +973,23 @@ async def generate_docx(

    doc.add_page_break()

-    # --- Zusammenfassung / Executive Summary ---
+    # --- Zusammenfassung / Neueste Entwicklungen ---
    if "zusammenfassung" in sections:
        doc.add_heading(zusammenfassung_title, level=1)

-        if zusammenfassung_lines:
+        if zusammenfassung_developments:
+            # Live-Monitoring: pro Eintrag Datum/Uhrzeit-Zeile + Absatz mit Text, ohne Links
+            for label, body in zusammenfassung_developments:
+                date_para = doc.add_paragraph()
+                date_para.paragraph_format.space_after = Pt(1)
+                run = date_para.add_run(label)
+                run.bold = True
+                run.font.size = Pt(9)
+                run.font.color.rgb = RGBColor(0x0a, 0x18, 0x32)
+                body_para = doc.add_paragraph()
+                body_para.paragraph_format.space_after = Pt(8)
+                body_para.add_run(re.sub(r'\*\*(.+?)\*\*', r'\1', body))
+        elif zusammenfassung_lines:
            for line in zusammenfassung_lines:
                _add_docx_paragraph_with_citations(doc, line, all_sources, style='List Bullet')
        else:
--- a/src/report_templates/report.html
+++ b/src/report_templates/report.html
@@ -47,6 +47,12 @@ body { font-family: -apple-system, 'Segoe UI', Roboto, Helvetica, Arial, sans-se
 .exec-summary ul { margin: 8px 0 0 18px; }
 .exec-summary li { margin-bottom: 6px; line-height: 1.6; }

+/* Neueste Entwicklungen (Live-Monitoring) */
+.dev-entry { margin-bottom: 12px; }
+.dev-entry:last-child { margin-bottom: 0; }
+.dev-entry-date { font-size: 9pt; font-weight: 600; color: #0a1832; margin-bottom: 2px; }
+.dev-entry-body { font-size: 10.5pt; line-height: 1.5; }
+
 /* Lagebild */
 .lagebild-content { line-height: 1.7; }
 .lagebild-content p { margin-bottom: 8px; }
@@ -99,7 +105,7 @@ tr:nth-child(even) { background: #f8f9fa; }
 <div class="toc">
    <h2>Inhaltsverzeichnis</h2>
    <ul class="toc-list">
-        {% if 'zusammenfassung' in sections %}<li><a href="#sec-zusammenfassung">Zusammenfassung</a></li>{% endif %}
+        {% if 'zusammenfassung' in sections %}<li><a href="#sec-zusammenfassung">{{ zusammenfassung_title }}</a></li>{% endif %}
        {% if 'bericht' in sections %}<li><a href="#sec-bericht">{% if incident.type == "research" %}Recherchebericht{% else %}Lagebild{% endif %}</a></li>{% endif %}
        {% if 'faktencheck' in sections and fact_checks %}<li><a href="#sec-faktencheck">Faktencheck</a></li>{% endif %}
        {% if 'quellen' in sections and sources %}<li><a href="#sec-quellen">Quellenverzeichnis</a></li>{% endif %}
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -1232,18 +1232,14 @@ async def export_incident(
        snapshots = [dict(r) for r in await cursor.fetchall()]

    # Zusammenfassung fuer den Export:
-    # - Bei Adhoc-Lagen primaer "Neueste Entwicklungen" (latest_developments) als Markdown-Bullets,
-    #   weil Live-Monitoring von Aktualitaet lebt.
-    # - Fallback (oder bei Research): Executive Summary (KI-generiert, gecacht).
+    # - Live-Monitoring (adhoc) zeigt primaer "Neueste Entwicklungen" (latest_developments).
+    #   Das Rendering (Datum/Uhrzeit als eigene Zeile, ohne Links) uebernimmt der
+    #   Report-Generator direkt aus incident["latest_developments"].
+    # - Executive Summary (KI, gecacht) dient nur als Fallback (oder bei Research-Lagen).
    is_adhoc = (incident.get("type") or "adhoc") != "research"
    latest_dev = (incident.get("latest_developments") or "").strip()
-    exec_summary = None
-    if is_adhoc and latest_dev:
-        from report_generator import _markdown_to_html as _md_to_html
-        exec_summary = _md_to_html(latest_dev)
-    if not exec_summary:
    exec_summary = incident.get("executive_summary")
-    if not exec_summary:
+    if not exec_summary and not (is_adhoc and latest_dev):
        summary_text = incident.get("summary") or ""
        exec_summary = await generate_executive_summary(summary_text)
        await db.execute(
@@ -1251,6 +1247,7 @@ async def export_incident(
            (exec_summary, incident_id),
        )
        await db.commit()
+    exec_summary = exec_summary or ""

    date_str = datetime.now(TIMEZONE).strftime("%Y%m%d")
    slug = _slugify(incident["title"])