Report: Lagebericht kompakter — Limits für Faktencheck/Quellen/Quellenverzeichnis
- Lagebild auf ~4000 Zeichen gekürzt (scope=report), Hinweis auf Vollständigen Bericht - Faktencheck: Top 20 im Lagebericht (alle im Vollständigen) - Quellenstatistik: Top 20 im Lagebericht - Quellenverzeichnis: Top 30 im Lagebericht, URLs kleiner (7pt) mit word-break - Quellenreferenzen [1234] aus Text entfernt - Sektionsreihenfolge: Exec Summary -> Faktencheck -> Quellenstatistik -> Lagebild - Lagebericht jetzt ~8-10 Seiten statt 196
Dieser Commit ist enthalten in:
@@ -128,6 +128,54 @@ def _markdown_to_html(text: str) -> str:
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
def _truncate_lagebild(summary_text: str, max_chars: int = 4000) -> str:
|
||||
"""Lagebild für den Lagebericht auf die Zusammenfassung kürzen.
|
||||
|
||||
Nimmt nur den ersten Abschnitt (bis zur zweiten H2/H3-Überschrift)
|
||||
oder kürzt auf max_chars Zeichen mit sauberem Abbruch am Absatzende.
|
||||
"""
|
||||
if not summary_text or len(summary_text) <= max_chars:
|
||||
return summary_text
|
||||
|
||||
lines = summary_text.split("\n")
|
||||
result_lines = []
|
||||
heading_count = 0
|
||||
char_count = 0
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
# Zähle Überschriften (## oder ###)
|
||||
if stripped.startswith("## ") or stripped.startswith("### "):
|
||||
heading_count += 1
|
||||
# Nach der 3. Überschrift abbrechen (= 2 Abschnitte)
|
||||
if heading_count > 3:
|
||||
break
|
||||
|
||||
result_lines.append(line)
|
||||
char_count += len(line) + 1
|
||||
|
||||
# Hard-Limit bei max_chars, aber am Absatzende abbrechen
|
||||
if char_count > max_chars and stripped == "":
|
||||
break
|
||||
|
||||
text = "\n".join(result_lines).rstrip()
|
||||
if len(text) < len(summary_text) - 100:
|
||||
text += "\n\n*[Vollständiges Lagebild im Vollständigen Bericht]*"
|
||||
return text
|
||||
|
||||
|
||||
def _strip_citation_numbers(text: str) -> str:
|
||||
"""Entfernt [1234]-Quellenreferenzen aus dem Text."""
|
||||
# Einzelne Referenzen: [1302]
|
||||
text = re.sub(r"\s*\[\d{1,5}\]", "", text)
|
||||
# Mehrfach-Referenzen: [725][765][768]
|
||||
text = re.sub(r"(\[\d{1,5}\]){2,}", "", text)
|
||||
# Aufräumen: Doppelte Leerzeichen
|
||||
text = re.sub(r" +", " ", text)
|
||||
return text
|
||||
|
||||
|
||||
|
||||
async def generate_executive_summary(summary_text: str) -> str:
|
||||
"""KI-verdichtetes Executive Summary aus dem Lagebild."""
|
||||
if not summary_text or len(summary_text.strip()) < 50:
|
||||
@@ -221,11 +269,16 @@ async def generate_pdf(
|
||||
logo_base64=_get_logo_base64(),
|
||||
executive_summary=executive_summary_html,
|
||||
scope=scope,
|
||||
lagebild_html=_markdown_to_html(incident.get("summary", "")),
|
||||
lagebild_html=_markdown_to_html(
|
||||
_strip_citation_numbers(
|
||||
_truncate_lagebild(incident.get("summary", ""), 4000) if scope == "report"
|
||||
else incident.get("summary", "")
|
||||
)
|
||||
),
|
||||
lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "),
|
||||
sources=_prepare_sources(incident),
|
||||
fact_checks=_prepare_fact_checks(fact_checks),
|
||||
source_stats=_prepare_source_stats(articles),
|
||||
sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident),
|
||||
fact_checks=_prepare_fact_checks(fact_checks[:20] if scope == "report" else fact_checks),
|
||||
source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles),
|
||||
timeline=_prepare_timeline(articles) if scope == "full" else [],
|
||||
articles=articles if scope == "full" else [],
|
||||
)
|
||||
@@ -325,7 +378,10 @@ async def generate_docx(
|
||||
if scope in ("report", "full"):
|
||||
# --- Lagebild ---
|
||||
doc.add_heading("Lagebild", level=1)
|
||||
summary = incident.get("summary") or "Kein Lagebild verfügbar."
|
||||
raw_summary = incident.get("summary") or "Kein Lagebild verfügbar."
|
||||
summary = _strip_citation_numbers(
|
||||
_truncate_lagebild(raw_summary, 4000) if scope == "report" else raw_summary
|
||||
)
|
||||
# Markdown-Formatierung entfernen
|
||||
clean_summary = re.sub(r'\*\*(.+?)\*\*', r'\1', summary)
|
||||
clean_summary = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_summary)
|
||||
@@ -342,7 +398,8 @@ async def generate_docx(
|
||||
doc.add_paragraph(para_text)
|
||||
|
||||
# --- Faktencheck ---
|
||||
if fact_checks:
|
||||
report_fcs = fact_checks[:20] if scope == 'report' else fact_checks
|
||||
if report_fcs:
|
||||
doc.add_heading("Faktencheck", level=1)
|
||||
table = doc.add_table(rows=1, cols=3)
|
||||
table.style = 'Table Grid'
|
||||
@@ -355,7 +412,7 @@ async def generate_docx(
|
||||
for p in cell.paragraphs:
|
||||
p.runs[0].font.bold = True
|
||||
p.runs[0].font.size = Pt(9)
|
||||
for fc in fact_checks:
|
||||
for fc in report_fcs:
|
||||
row = table.add_row().cells
|
||||
row[0].text = fc.get("claim", "")
|
||||
row[1].text = FC_STATUS_LABELS.get(fc.get("status", ""), fc.get("status", ""))
|
||||
@@ -363,6 +420,8 @@ async def generate_docx(
|
||||
|
||||
# --- Quellenstatistik ---
|
||||
source_stats = _prepare_source_stats(articles)
|
||||
if scope == 'report':
|
||||
source_stats = source_stats[:20]
|
||||
if source_stats:
|
||||
doc.add_heading("Quellenstatistik", level=1)
|
||||
table = doc.add_table(rows=1, cols=3)
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren