report-export: verlinkte Zitate in Zusammenfassung und Bericht
Dieser Commit ist enthalten in:
@@ -171,53 +171,151 @@ def _strip_citation_numbers(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def _find_source_for_citation(num: str, sources: list) -> dict | None:
|
||||
"""Sucht eine Quelle anhand der Zitat-Nummer (inkl. Suffix-Fallback wie 1383a -> 1383)."""
|
||||
if not sources:
|
||||
return None
|
||||
for s in sources:
|
||||
try:
|
||||
if str(s.get("nr")) == num:
|
||||
return s
|
||||
except Exception:
|
||||
continue
|
||||
# Suffix-Fallback: 1383a -> 1383
|
||||
if re.search(r"[a-z]$", num):
|
||||
base = re.sub(r"[a-z]$", "", num)
|
||||
for s in sources:
|
||||
if str(s.get("nr")) == base:
|
||||
return s
|
||||
return None
|
||||
|
||||
|
||||
def _linkify_citations_html(text: str, sources: list) -> str:
|
||||
"""Ersetzt [1234]-Zitate durch HTML-Links zur jeweiligen Quelle.
|
||||
|
||||
Nummern ohne zugeordnete Quelle bleiben als sichtbare Zahl erhalten.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
if not sources:
|
||||
return text
|
||||
|
||||
def repl(match: re.Match) -> str:
|
||||
num = match.group(1)
|
||||
src = _find_source_for_citation(num, sources)
|
||||
if src and src.get("url"):
|
||||
url = src["url"].replace('"', """)
|
||||
name = (src.get("name") or "").replace('"', """)
|
||||
return f'<a href="{url}" class="citation" title="{name}">[{num}]</a>'
|
||||
return match.group(0)
|
||||
|
||||
return re.sub(r"\[(\d{1,5}[a-z]?)\]", repl, text)
|
||||
|
||||
|
||||
def _add_docx_hyperlink(paragraph, url: str, text: str):
|
||||
"""Fügt einen klickbaren Hyperlink in ein python-docx-Paragraph-Objekt ein."""
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
||||
part = paragraph.part
|
||||
r_id = part.relate_to(
|
||||
url,
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
|
||||
is_external=True,
|
||||
)
|
||||
hyperlink = OxmlElement("w:hyperlink")
|
||||
hyperlink.set(qn("r:id"), r_id)
|
||||
|
||||
new_run = OxmlElement("w:r")
|
||||
rPr = OxmlElement("w:rPr")
|
||||
color = OxmlElement("w:color")
|
||||
color.set(qn("w:val"), "0066CC")
|
||||
rPr.append(color)
|
||||
u = OxmlElement("w:u")
|
||||
u.set(qn("w:val"), "single")
|
||||
rPr.append(u)
|
||||
sz = OxmlElement("w:sz")
|
||||
sz.set(qn("w:val"), "20")
|
||||
rPr.append(sz)
|
||||
new_run.append(rPr)
|
||||
|
||||
t = OxmlElement("w:t")
|
||||
t.text = text
|
||||
t.set(qn("xml:space"), "preserve")
|
||||
new_run.append(t)
|
||||
hyperlink.append(new_run)
|
||||
paragraph._p.append(hyperlink)
|
||||
return hyperlink
|
||||
|
||||
|
||||
def _add_docx_paragraph_with_citations(doc_or_para, text: str, sources: list, style: str | None = None):
|
||||
"""Fügt ein Paragraph hinzu, bei dem [1234]-Zitate als Hyperlink-Runs eingefügt werden.
|
||||
|
||||
doc_or_para darf ein Document sein (neues Paragraph wird angelegt) oder bereits ein Paragraph.
|
||||
"""
|
||||
if hasattr(doc_or_para, "add_paragraph"):
|
||||
para = doc_or_para.add_paragraph(style=style) if style else doc_or_para.add_paragraph()
|
||||
else:
|
||||
para = doc_or_para
|
||||
|
||||
pattern = re.compile(r"\[(\d{1,5}[a-z]?)\]")
|
||||
pos = 0
|
||||
for m in pattern.finditer(text):
|
||||
if m.start() > pos:
|
||||
para.add_run(text[pos:m.start()])
|
||||
num = m.group(1)
|
||||
src = _find_source_for_citation(num, sources)
|
||||
if src and src.get("url"):
|
||||
_add_docx_hyperlink(para, src["url"], f"[{num}]")
|
||||
else:
|
||||
para.add_run(m.group(0))
|
||||
pos = m.end()
|
||||
if pos < len(text):
|
||||
para.add_run(text[pos:])
|
||||
return para
|
||||
|
||||
|
||||
|
||||
def _extract_zusammenfassung(summary_text: str) -> tuple[str, str]:
|
||||
"""Extrahiert die ZUSAMMENFASSUNG-Sektion aus einem Research-Briefing.
|
||||
|
||||
|
||||
def _extract_zusammenfassung_lines(summary_text: str) -> tuple[list[str], str]:
|
||||
"""Extrahiert die ZUSAMMENFASSUNG-Sektion als Liste von Rohzeilen (ohne Zitatbearbeitung).
|
||||
|
||||
Returns:
|
||||
(zusammenfassung_html, remaining_summary)
|
||||
zusammenfassung_html: HTML-formatierte Bullet Points
|
||||
remaining_summary: Der Rest des Berichts ohne die Zusammenfassung
|
||||
(lines, remaining_summary)
|
||||
"""
|
||||
if not summary_text:
|
||||
return "", summary_text
|
||||
return [], summary_text
|
||||
|
||||
# Suche nach ## ZUSAMMENFASSUNG ... bis zur naechsten ## Ueberschrift
|
||||
pattern = r"(## (?:ZUSAMMENFASSUNG|ÜBERBLICK)\s*\n)(.*?)(?=\n## |\Z)"
|
||||
match = re.search(pattern, summary_text, re.DOTALL)
|
||||
if not match:
|
||||
return "", summary_text
|
||||
return [], summary_text
|
||||
|
||||
zusammenfassung_raw = match.group(2).strip()
|
||||
# Rest des Berichts ohne die Zusammenfassung-Sektion
|
||||
remaining = summary_text[:match.start()] + summary_text[match.end():]
|
||||
remaining = remaining.strip()
|
||||
|
||||
# Bullet Points als HTML formatieren
|
||||
lines = []
|
||||
lines: list[str] = []
|
||||
for line in zusammenfassung_raw.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("- "):
|
||||
clean = _strip_citation_numbers(stripped[2:].strip())
|
||||
if clean:
|
||||
lines.append(clean)
|
||||
elif stripped.startswith("* "):
|
||||
clean = _strip_citation_numbers(stripped[2:].strip())
|
||||
if clean:
|
||||
lines.append(clean)
|
||||
if stripped.startswith("- ") or stripped.startswith("* "):
|
||||
content = stripped[2:].strip()
|
||||
if content:
|
||||
lines.append(content)
|
||||
elif stripped and not stripped.startswith("#"):
|
||||
clean = _strip_citation_numbers(stripped)
|
||||
if clean:
|
||||
lines.append(clean)
|
||||
lines.append(stripped)
|
||||
return lines, remaining
|
||||
|
||||
if lines:
|
||||
html = "<ul>\n" + "\n".join(f"<li>{line}</li>" for line in lines) + "\n</ul>"
|
||||
else:
|
||||
html = f"<p>{_strip_citation_numbers(zusammenfassung_raw)}</p>"
|
||||
|
||||
def _extract_zusammenfassung(summary_text: str, sources: list | None = None) -> tuple[str, str]:
|
||||
"""Extrahiert die ZUSAMMENFASSUNG-Sektion und liefert sie als HTML mit verlinkten Zitaten."""
|
||||
lines, remaining = _extract_zusammenfassung_lines(summary_text)
|
||||
if not lines:
|
||||
return "", summary_text
|
||||
|
||||
src_list = sources or []
|
||||
html_lines = [f"<li>{_linkify_citations_html(line, src_list)}</li>" for line in lines]
|
||||
html = "<ul>\n" + "\n".join(html_lines) + "\n</ul>"
|
||||
return html, remaining
|
||||
|
||||
|
||||
@@ -310,17 +408,22 @@ async def generate_pdf(
|
||||
|
||||
# Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
|
||||
is_research = incident.get("type") == "research"
|
||||
all_sources = _prepare_sources(incident)
|
||||
zusammenfassung_html = executive_summary_html
|
||||
bericht_summary = incident.get("summary", "")
|
||||
zusammenfassung_title = "Zusammenfassung"
|
||||
|
||||
if is_research and bericht_summary:
|
||||
extracted_html, remaining = _extract_zusammenfassung(bericht_summary)
|
||||
extracted_html, remaining = _extract_zusammenfassung(bericht_summary, all_sources)
|
||||
if extracted_html:
|
||||
zusammenfassung_html = extracted_html
|
||||
zusammenfassung_title = "Zusammenfassung"
|
||||
bericht_summary = remaining
|
||||
|
||||
# Auch das (nicht-research) Executive Summary linkifizieren — ggf. enthaelt es Zitate
|
||||
if not is_research and zusammenfassung_html:
|
||||
zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)
|
||||
|
||||
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
||||
template = env.get_template("report.html")
|
||||
|
||||
@@ -337,8 +440,8 @@ async def generate_pdf(
|
||||
zusammenfassung_title=zusammenfassung_title,
|
||||
sections=sections,
|
||||
scope=scope,
|
||||
lagebild_html=_markdown_to_html(
|
||||
_strip_citation_numbers(bericht_summary)
|
||||
lagebild_html=_linkify_citations_html(
|
||||
_markdown_to_html(bericht_summary), all_sources
|
||||
),
|
||||
lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "),
|
||||
sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident),
|
||||
@@ -380,14 +483,16 @@ async def generate_docx(
|
||||
|
||||
# Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
|
||||
is_research = incident.get("type") == "research"
|
||||
all_sources = _prepare_sources(incident)
|
||||
zusammenfassung_text = executive_summary_text
|
||||
bericht_summary = incident.get("summary") or "Keine Zusammenfassung verfuegbar."
|
||||
zusammenfassung_title = "Zusammenfassung"
|
||||
zusammenfassung_lines: list[str] = []
|
||||
|
||||
if is_research and bericht_summary:
|
||||
extracted_html, remaining = _extract_zusammenfassung(bericht_summary)
|
||||
if extracted_html:
|
||||
zusammenfassung_text = extracted_html
|
||||
extracted_lines, remaining = _extract_zusammenfassung_lines(bericht_summary)
|
||||
if extracted_lines:
|
||||
zusammenfassung_lines = extracted_lines
|
||||
zusammenfassung_title = "Zusammenfassung"
|
||||
bericht_summary = remaining
|
||||
|
||||
@@ -446,31 +551,35 @@ async def generate_docx(
|
||||
if "zusammenfassung" in sections:
|
||||
doc.add_heading(zusammenfassung_title, level=1)
|
||||
|
||||
# HTML-Tags entfernen und als Bullet Points
|
||||
clean_text = re.sub(r'<[^>]+>', '', zusammenfassung_text)
|
||||
lines = [line.strip().lstrip("- ").lstrip("* ") for line in clean_text.strip().split("\n") if line.strip()]
|
||||
for line in lines:
|
||||
if line:
|
||||
doc.add_paragraph(line, style='List Bullet')
|
||||
if zusammenfassung_lines:
|
||||
for line in zusammenfassung_lines:
|
||||
_add_docx_paragraph_with_citations(doc, line, all_sources, style='List Bullet')
|
||||
else:
|
||||
# Fallback: HTML-Tags aus executive_summary_text strippen, dann Bullets bilden
|
||||
clean_text = re.sub(r'<[^>]+>', '', zusammenfassung_text or '')
|
||||
lines = [line.strip().lstrip("- ").lstrip("* ") for line in clean_text.strip().split("\n") if line.strip()]
|
||||
for line in lines:
|
||||
if line:
|
||||
_add_docx_paragraph_with_citations(doc, line, all_sources, style='List Bullet')
|
||||
|
||||
if "bericht" in sections:
|
||||
# --- Lagebild / Recherchebericht ---
|
||||
doc.add_heading("Recherchebericht" if is_research else "Lagebild", level=1)
|
||||
summary = _strip_citation_numbers(bericht_summary)
|
||||
# Markdown-Formatierung entfernen
|
||||
clean_summary = re.sub(r'\*\*(.+?)\*\*', r'\1', summary)
|
||||
clean_summary = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_summary)
|
||||
# Markdown-Formatierung entfernen, Zitate aber als [NNN] beibehalten und als Hyperlinks rendern
|
||||
clean_summary = re.sub(r'\*\*(.+?)\*\*', r'\1', bericht_summary)
|
||||
clean_summary = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', clean_summary)
|
||||
clean_summary = re.sub(r'^#{1,3}\s+', '', clean_summary, flags=re.MULTILINE)
|
||||
for para_text in clean_summary.split("\n\n"):
|
||||
para_text = para_text.strip()
|
||||
if para_text:
|
||||
if para_text.startswith("- "):
|
||||
for bullet in para_text.split("\n"):
|
||||
bullet = bullet.lstrip("- ").strip()
|
||||
if bullet:
|
||||
doc.add_paragraph(bullet, style='List Bullet')
|
||||
else:
|
||||
doc.add_paragraph(para_text)
|
||||
if not para_text:
|
||||
continue
|
||||
if para_text.startswith("- "):
|
||||
for bullet in para_text.split("\n"):
|
||||
bullet = bullet.lstrip("- ").strip()
|
||||
if bullet:
|
||||
_add_docx_paragraph_with_citations(doc, bullet, all_sources, style='List Bullet')
|
||||
else:
|
||||
_add_docx_paragraph_with_citations(doc, para_text, all_sources)
|
||||
|
||||
if "faktencheck" in sections:
|
||||
# --- Faktencheck ---
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren