report-export: verlinkte Zitate in Zusammenfassung und Bericht
Dieser Commit ist enthalten in:
@@ -171,53 +171,151 @@ def _strip_citation_numbers(text: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _find_source_for_citation(num: str, sources: list) -> dict | None:
|
||||||
|
"""Sucht eine Quelle anhand der Zitat-Nummer (inkl. Suffix-Fallback wie 1383a -> 1383)."""
|
||||||
|
if not sources:
|
||||||
|
return None
|
||||||
|
for s in sources:
|
||||||
|
try:
|
||||||
|
if str(s.get("nr")) == num:
|
||||||
|
return s
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
# Suffix-Fallback: 1383a -> 1383
|
||||||
|
if re.search(r"[a-z]$", num):
|
||||||
|
base = re.sub(r"[a-z]$", "", num)
|
||||||
|
for s in sources:
|
||||||
|
if str(s.get("nr")) == base:
|
||||||
|
return s
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _linkify_citations_html(text: str, sources: list) -> str:
|
||||||
|
"""Ersetzt [1234]-Zitate durch HTML-Links zur jeweiligen Quelle.
|
||||||
|
|
||||||
|
Nummern ohne zugeordnete Quelle bleiben als sichtbare Zahl erhalten.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return text
|
||||||
|
if not sources:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def repl(match: re.Match) -> str:
|
||||||
|
num = match.group(1)
|
||||||
|
src = _find_source_for_citation(num, sources)
|
||||||
|
if src and src.get("url"):
|
||||||
|
url = src["url"].replace('"', """)
|
||||||
|
name = (src.get("name") or "").replace('"', """)
|
||||||
|
return f'<a href="{url}" class="citation" title="{name}">[{num}]</a>'
|
||||||
|
return match.group(0)
|
||||||
|
|
||||||
|
return re.sub(r"\[(\d{1,5}[a-z]?)\]", repl, text)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_docx_hyperlink(paragraph, url: str, text: str):
|
||||||
|
"""Fügt einen klickbaren Hyperlink in ein python-docx-Paragraph-Objekt ein."""
|
||||||
|
from docx.oxml.shared import OxmlElement, qn
|
||||||
|
|
||||||
|
part = paragraph.part
|
||||||
|
r_id = part.relate_to(
|
||||||
|
url,
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
|
||||||
|
is_external=True,
|
||||||
|
)
|
||||||
|
hyperlink = OxmlElement("w:hyperlink")
|
||||||
|
hyperlink.set(qn("r:id"), r_id)
|
||||||
|
|
||||||
|
new_run = OxmlElement("w:r")
|
||||||
|
rPr = OxmlElement("w:rPr")
|
||||||
|
color = OxmlElement("w:color")
|
||||||
|
color.set(qn("w:val"), "0066CC")
|
||||||
|
rPr.append(color)
|
||||||
|
u = OxmlElement("w:u")
|
||||||
|
u.set(qn("w:val"), "single")
|
||||||
|
rPr.append(u)
|
||||||
|
sz = OxmlElement("w:sz")
|
||||||
|
sz.set(qn("w:val"), "20")
|
||||||
|
rPr.append(sz)
|
||||||
|
new_run.append(rPr)
|
||||||
|
|
||||||
|
t = OxmlElement("w:t")
|
||||||
|
t.text = text
|
||||||
|
t.set(qn("xml:space"), "preserve")
|
||||||
|
new_run.append(t)
|
||||||
|
hyperlink.append(new_run)
|
||||||
|
paragraph._p.append(hyperlink)
|
||||||
|
return hyperlink
|
||||||
|
|
||||||
|
|
||||||
|
def _add_docx_paragraph_with_citations(doc_or_para, text: str, sources: list, style: str | None = None):
|
||||||
|
"""Fügt ein Paragraph hinzu, bei dem [1234]-Zitate als Hyperlink-Runs eingefügt werden.
|
||||||
|
|
||||||
|
doc_or_para darf ein Document sein (neues Paragraph wird angelegt) oder bereits ein Paragraph.
|
||||||
|
"""
|
||||||
|
if hasattr(doc_or_para, "add_paragraph"):
|
||||||
|
para = doc_or_para.add_paragraph(style=style) if style else doc_or_para.add_paragraph()
|
||||||
|
else:
|
||||||
|
para = doc_or_para
|
||||||
|
|
||||||
|
pattern = re.compile(r"\[(\d{1,5}[a-z]?)\]")
|
||||||
|
pos = 0
|
||||||
|
for m in pattern.finditer(text):
|
||||||
|
if m.start() > pos:
|
||||||
|
para.add_run(text[pos:m.start()])
|
||||||
|
num = m.group(1)
|
||||||
|
src = _find_source_for_citation(num, sources)
|
||||||
|
if src and src.get("url"):
|
||||||
|
_add_docx_hyperlink(para, src["url"], f"[{num}]")
|
||||||
|
else:
|
||||||
|
para.add_run(m.group(0))
|
||||||
|
pos = m.end()
|
||||||
|
if pos < len(text):
|
||||||
|
para.add_run(text[pos:])
|
||||||
|
return para
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_zusammenfassung(summary_text: str) -> tuple[str, str]:
|
|
||||||
"""Extrahiert die ZUSAMMENFASSUNG-Sektion aus einem Research-Briefing.
|
|
||||||
|
def _extract_zusammenfassung_lines(summary_text: str) -> tuple[list[str], str]:
|
||||||
|
"""Extrahiert die ZUSAMMENFASSUNG-Sektion als Liste von Rohzeilen (ohne Zitatbearbeitung).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(zusammenfassung_html, remaining_summary)
|
(lines, remaining_summary)
|
||||||
zusammenfassung_html: HTML-formatierte Bullet Points
|
|
||||||
remaining_summary: Der Rest des Berichts ohne die Zusammenfassung
|
|
||||||
"""
|
"""
|
||||||
if not summary_text:
|
if not summary_text:
|
||||||
return "", summary_text
|
return [], summary_text
|
||||||
|
|
||||||
# Suche nach ## ZUSAMMENFASSUNG ... bis zur naechsten ## Ueberschrift
|
|
||||||
pattern = r"(## (?:ZUSAMMENFASSUNG|ÜBERBLICK)\s*\n)(.*?)(?=\n## |\Z)"
|
pattern = r"(## (?:ZUSAMMENFASSUNG|ÜBERBLICK)\s*\n)(.*?)(?=\n## |\Z)"
|
||||||
match = re.search(pattern, summary_text, re.DOTALL)
|
match = re.search(pattern, summary_text, re.DOTALL)
|
||||||
if not match:
|
if not match:
|
||||||
return "", summary_text
|
return [], summary_text
|
||||||
|
|
||||||
zusammenfassung_raw = match.group(2).strip()
|
zusammenfassung_raw = match.group(2).strip()
|
||||||
# Rest des Berichts ohne die Zusammenfassung-Sektion
|
|
||||||
remaining = summary_text[:match.start()] + summary_text[match.end():]
|
remaining = summary_text[:match.start()] + summary_text[match.end():]
|
||||||
remaining = remaining.strip()
|
remaining = remaining.strip()
|
||||||
|
|
||||||
# Bullet Points als HTML formatieren
|
lines: list[str] = []
|
||||||
lines = []
|
|
||||||
for line in zusammenfassung_raw.split("\n"):
|
for line in zusammenfassung_raw.split("\n"):
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
if stripped.startswith("- "):
|
if stripped.startswith("- ") or stripped.startswith("* "):
|
||||||
clean = _strip_citation_numbers(stripped[2:].strip())
|
content = stripped[2:].strip()
|
||||||
if clean:
|
if content:
|
||||||
lines.append(clean)
|
lines.append(content)
|
||||||
elif stripped.startswith("* "):
|
|
||||||
clean = _strip_citation_numbers(stripped[2:].strip())
|
|
||||||
if clean:
|
|
||||||
lines.append(clean)
|
|
||||||
elif stripped and not stripped.startswith("#"):
|
elif stripped and not stripped.startswith("#"):
|
||||||
clean = _strip_citation_numbers(stripped)
|
lines.append(stripped)
|
||||||
if clean:
|
return lines, remaining
|
||||||
lines.append(clean)
|
|
||||||
|
|
||||||
if lines:
|
|
||||||
html = "<ul>\n" + "\n".join(f"<li>{line}</li>" for line in lines) + "\n</ul>"
|
|
||||||
else:
|
|
||||||
html = f"<p>{_strip_citation_numbers(zusammenfassung_raw)}</p>"
|
|
||||||
|
|
||||||
|
def _extract_zusammenfassung(summary_text: str, sources: list | None = None) -> tuple[str, str]:
|
||||||
|
"""Extrahiert die ZUSAMMENFASSUNG-Sektion und liefert sie als HTML mit verlinkten Zitaten."""
|
||||||
|
lines, remaining = _extract_zusammenfassung_lines(summary_text)
|
||||||
|
if not lines:
|
||||||
|
return "", summary_text
|
||||||
|
|
||||||
|
src_list = sources or []
|
||||||
|
html_lines = [f"<li>{_linkify_citations_html(line, src_list)}</li>" for line in lines]
|
||||||
|
html = "<ul>\n" + "\n".join(html_lines) + "\n</ul>"
|
||||||
return html, remaining
|
return html, remaining
|
||||||
|
|
||||||
|
|
||||||
@@ -310,17 +408,22 @@ async def generate_pdf(
|
|||||||
|
|
||||||
# Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
|
# Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
|
||||||
is_research = incident.get("type") == "research"
|
is_research = incident.get("type") == "research"
|
||||||
|
all_sources = _prepare_sources(incident)
|
||||||
zusammenfassung_html = executive_summary_html
|
zusammenfassung_html = executive_summary_html
|
||||||
bericht_summary = incident.get("summary", "")
|
bericht_summary = incident.get("summary", "")
|
||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
|
|
||||||
if is_research and bericht_summary:
|
if is_research and bericht_summary:
|
||||||
extracted_html, remaining = _extract_zusammenfassung(bericht_summary)
|
extracted_html, remaining = _extract_zusammenfassung(bericht_summary, all_sources)
|
||||||
if extracted_html:
|
if extracted_html:
|
||||||
zusammenfassung_html = extracted_html
|
zusammenfassung_html = extracted_html
|
||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
bericht_summary = remaining
|
bericht_summary = remaining
|
||||||
|
|
||||||
|
# Auch das (nicht-research) Executive Summary linkifizieren — ggf. enthaelt es Zitate
|
||||||
|
if not is_research and zusammenfassung_html:
|
||||||
|
zusammenfassung_html = _linkify_citations_html(zusammenfassung_html, all_sources)
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
env = Environment(loader=FileSystemLoader(str(TEMPLATE_DIR)))
|
||||||
template = env.get_template("report.html")
|
template = env.get_template("report.html")
|
||||||
|
|
||||||
@@ -337,8 +440,8 @@ async def generate_pdf(
|
|||||||
zusammenfassung_title=zusammenfassung_title,
|
zusammenfassung_title=zusammenfassung_title,
|
||||||
sections=sections,
|
sections=sections,
|
||||||
scope=scope,
|
scope=scope,
|
||||||
lagebild_html=_markdown_to_html(
|
lagebild_html=_linkify_citations_html(
|
||||||
_strip_citation_numbers(bericht_summary)
|
_markdown_to_html(bericht_summary), all_sources
|
||||||
),
|
),
|
||||||
lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "),
|
lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "),
|
||||||
sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident),
|
sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident),
|
||||||
@@ -380,14 +483,16 @@ async def generate_docx(
|
|||||||
|
|
||||||
# Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
|
# Fuer Research-Lagen: Zusammenfassung aus dem Bericht extrahieren
|
||||||
is_research = incident.get("type") == "research"
|
is_research = incident.get("type") == "research"
|
||||||
|
all_sources = _prepare_sources(incident)
|
||||||
zusammenfassung_text = executive_summary_text
|
zusammenfassung_text = executive_summary_text
|
||||||
bericht_summary = incident.get("summary") or "Keine Zusammenfassung verfuegbar."
|
bericht_summary = incident.get("summary") or "Keine Zusammenfassung verfuegbar."
|
||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
|
zusammenfassung_lines: list[str] = []
|
||||||
|
|
||||||
if is_research and bericht_summary:
|
if is_research and bericht_summary:
|
||||||
extracted_html, remaining = _extract_zusammenfassung(bericht_summary)
|
extracted_lines, remaining = _extract_zusammenfassung_lines(bericht_summary)
|
||||||
if extracted_html:
|
if extracted_lines:
|
||||||
zusammenfassung_text = extracted_html
|
zusammenfassung_lines = extracted_lines
|
||||||
zusammenfassung_title = "Zusammenfassung"
|
zusammenfassung_title = "Zusammenfassung"
|
||||||
bericht_summary = remaining
|
bericht_summary = remaining
|
||||||
|
|
||||||
@@ -446,31 +551,35 @@ async def generate_docx(
|
|||||||
if "zusammenfassung" in sections:
|
if "zusammenfassung" in sections:
|
||||||
doc.add_heading(zusammenfassung_title, level=1)
|
doc.add_heading(zusammenfassung_title, level=1)
|
||||||
|
|
||||||
# HTML-Tags entfernen und als Bullet Points
|
if zusammenfassung_lines:
|
||||||
clean_text = re.sub(r'<[^>]+>', '', zusammenfassung_text)
|
for line in zusammenfassung_lines:
|
||||||
|
_add_docx_paragraph_with_citations(doc, line, all_sources, style='List Bullet')
|
||||||
|
else:
|
||||||
|
# Fallback: HTML-Tags aus executive_summary_text strippen, dann Bullets bilden
|
||||||
|
clean_text = re.sub(r'<[^>]+>', '', zusammenfassung_text or '')
|
||||||
lines = [line.strip().lstrip("- ").lstrip("* ") for line in clean_text.strip().split("\n") if line.strip()]
|
lines = [line.strip().lstrip("- ").lstrip("* ") for line in clean_text.strip().split("\n") if line.strip()]
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if line:
|
if line:
|
||||||
doc.add_paragraph(line, style='List Bullet')
|
_add_docx_paragraph_with_citations(doc, line, all_sources, style='List Bullet')
|
||||||
|
|
||||||
if "bericht" in sections:
|
if "bericht" in sections:
|
||||||
# --- Lagebild / Recherchebericht ---
|
# --- Lagebild / Recherchebericht ---
|
||||||
doc.add_heading("Recherchebericht" if is_research else "Lagebild", level=1)
|
doc.add_heading("Recherchebericht" if is_research else "Lagebild", level=1)
|
||||||
summary = _strip_citation_numbers(bericht_summary)
|
# Markdown-Formatierung entfernen, Zitate aber als [NNN] beibehalten und als Hyperlinks rendern
|
||||||
# Markdown-Formatierung entfernen
|
clean_summary = re.sub(r'\*\*(.+?)\*\*', r'\1', bericht_summary)
|
||||||
clean_summary = re.sub(r'\*\*(.+?)\*\*', r'\1', summary)
|
clean_summary = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', clean_summary)
|
||||||
clean_summary = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_summary)
|
|
||||||
clean_summary = re.sub(r'^#{1,3}\s+', '', clean_summary, flags=re.MULTILINE)
|
clean_summary = re.sub(r'^#{1,3}\s+', '', clean_summary, flags=re.MULTILINE)
|
||||||
for para_text in clean_summary.split("\n\n"):
|
for para_text in clean_summary.split("\n\n"):
|
||||||
para_text = para_text.strip()
|
para_text = para_text.strip()
|
||||||
if para_text:
|
if not para_text:
|
||||||
|
continue
|
||||||
if para_text.startswith("- "):
|
if para_text.startswith("- "):
|
||||||
for bullet in para_text.split("\n"):
|
for bullet in para_text.split("\n"):
|
||||||
bullet = bullet.lstrip("- ").strip()
|
bullet = bullet.lstrip("- ").strip()
|
||||||
if bullet:
|
if bullet:
|
||||||
doc.add_paragraph(bullet, style='List Bullet')
|
_add_docx_paragraph_with_citations(doc, bullet, all_sources, style='List Bullet')
|
||||||
else:
|
else:
|
||||||
doc.add_paragraph(para_text)
|
_add_docx_paragraph_with_citations(doc, para_text, all_sources)
|
||||||
|
|
||||||
if "faktencheck" in sections:
|
if "faktencheck" in sections:
|
||||||
# --- Faktencheck ---
|
# --- Faktencheck ---
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren