diff --git a/src/report_generator.py b/src/report_generator.py index e2d70cf..f472f5a 100644 --- a/src/report_generator.py +++ b/src/report_generator.py @@ -128,6 +128,54 @@ def _markdown_to_html(text: str) -> str: return '\n'.join(result) +def _truncate_lagebild(summary_text: str, max_chars: int = 4000) -> str: + """Lagebild für den Lagebericht auf die Zusammenfassung kürzen. + + Nimmt nur den ersten Abschnitt (bis zur zweiten H2/H3-Überschrift) + oder kürzt auf max_chars Zeichen mit sauberem Abbruch am Absatzende. + """ + if not summary_text or len(summary_text) <= max_chars: + return summary_text + + lines = summary_text.split("\n") + result_lines = [] + heading_count = 0 + char_count = 0 + + for line in lines: + stripped = line.strip() + # Zähle Überschriften (## oder ###) + if stripped.startswith("## ") or stripped.startswith("### "): + heading_count += 1 + # Nach der 3. Überschrift abbrechen (= 2 Abschnitte) + if heading_count > 3: + break + + result_lines.append(line) + char_count += len(line) + 1 + + # Hard-Limit bei max_chars, aber am Absatzende abbrechen + if char_count > max_chars and stripped == "": + break + + text = "\n".join(result_lines).rstrip() + if len(text) < len(summary_text) - 100: + text += "\n\n*[Vollständiges Lagebild im Vollständigen Bericht]*" + return text + + +def _strip_citation_numbers(text: str) -> str: + """Entfernt [1234]-Quellenreferenzen aus dem Text.""" + # Einzelne Referenzen: [1302] + text = re.sub(r"\s*\[\d{1,5}\]", "", text) + # Mehrfach-Referenzen: [725][765][768] + text = re.sub(r"(\[\d{1,5}\]){2,}", "", text) + # Aufräumen: Doppelte Leerzeichen + text = re.sub(r" +", " ", text) + return text + + + async def generate_executive_summary(summary_text: str) -> str: """KI-verdichtetes Executive Summary aus dem Lagebild.""" if not summary_text or len(summary_text.strip()) < 50: @@ -221,11 +269,16 @@ async def generate_pdf( logo_base64=_get_logo_base64(), executive_summary=executive_summary_html, scope=scope, - lagebild_html=_markdown_to_html(incident.get("summary", "")), + lagebild_html=_markdown_to_html( + _strip_citation_numbers( + _truncate_lagebild(incident.get("summary", ""), 4000) if scope == "report" + else incident.get("summary", "") + ) + ), lagebild_timestamp=(incident.get("updated_at") or "")[:16].replace("T", " "), - sources=_prepare_sources(incident), - fact_checks=_prepare_fact_checks(fact_checks), - source_stats=_prepare_source_stats(articles), + sources=_prepare_sources(incident)[:30] if scope == "report" else _prepare_sources(incident), + fact_checks=_prepare_fact_checks(fact_checks[:20] if scope == "report" else fact_checks), + source_stats=_prepare_source_stats(articles)[:20] if scope == "report" else _prepare_source_stats(articles), timeline=_prepare_timeline(articles) if scope == "full" else [], articles=articles if scope == "full" else [], ) @@ -325,7 +378,10 @@ async def generate_docx( if scope in ("report", "full"): # --- Lagebild --- doc.add_heading("Lagebild", level=1) - summary = incident.get("summary") or "Kein Lagebild verfügbar." + raw_summary = incident.get("summary") or "Kein Lagebild verfügbar." + summary = _strip_citation_numbers( + _truncate_lagebild(raw_summary, 4000) if scope == "report" else raw_summary + ) # Markdown-Formatierung entfernen clean_summary = re.sub(r'\*\*(.+?)\*\*', r'\1', summary) clean_summary = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_summary) @@ -342,7 +398,8 @@ async def generate_docx( doc.add_paragraph(para_text) # --- Faktencheck --- - if fact_checks: + report_fcs = fact_checks[:20] if scope == 'report' else fact_checks + if report_fcs: doc.add_heading("Faktencheck", level=1) table = doc.add_table(rows=1, cols=3) table.style = 'Table Grid' @@ -355,7 +412,7 @@ async def generate_docx( for p in cell.paragraphs: p.runs[0].font.bold = True p.runs[0].font.size = Pt(9) - for fc in fact_checks: + for fc in report_fcs: row = table.add_row().cells row[0].text = fc.get("claim", "") row[1].text = FC_STATUS_LABELS.get(fc.get("status", ""), fc.get("status", "")) @@ -363,6 +420,8 @@ async def generate_docx( # --- Quellenstatistik --- source_stats = _prepare_source_stats(articles) + if scope == 'report': + source_stats = source_stats[:20] if source_stats: doc.add_heading("Quellenstatistik", level=1) table = doc.add_table(rows=1, cols=3) diff --git a/src/report_templates/report.html b/src/report_templates/report.html index 3a2b4a3..50da798 100644 --- a/src/report_templates/report.html +++ b/src/report_templates/report.html @@ -47,6 +47,7 @@ body { font-family: -apple-system, 'Segoe UI', Roboto, Helvetica, Arial, sans-se /* Tabellen */ table { width: 100%; border-collapse: collapse; font-size: 9.5pt; margin-bottom: 14px; } +.quellen-table { table-layout: fixed; font-size: 8pt; } th { background: #0a1832; color: #fff; text-align: left; padding: 6px 10px; font-weight: 600; font-size: 8.5pt; text-transform: uppercase; letter-spacing: 0.5px; } td { padding: 5px 10px; border-bottom: 1px solid #e0e0e0; } tr:nth-child(even) { background: #f8f9fa; } @@ -64,7 +65,7 @@ tr:nth-child(even) { background: #f8f9fa; } .tl-source { font-size: 8pt; color: #aaa; } /* Quellenverzeichnis */ -.source-ref { font-size: 9pt; color: #666; } +.source-ref { font-size: 7pt; color: #666; word-break: break-all; max-width: 350px; overflow: hidden; text-overflow: ellipsis; } /* Footer */ .report-footer { margin-top: 30px; padding-top: 10px; border-top: 1px solid #ddd; font-size: 8pt; color: #999; text-align: center; } @@ -98,28 +99,6 @@ tr:nth-child(even) { background: #f8f9fa; } {% if scope in ('report', 'full') %} - -
Aktualisiert: {{ lagebild_timestamp }}
{% endif %} -| # | Quelle | URL |
|---|---|---|
| {{ loop.index }} | {{ src.name or src.title or '' }} | {{ src.url or '' }} |
Aktualisiert: {{ lagebild_timestamp }}
{% endif %} +| # | Quelle | URL |
|---|---|---|
| {{ loop.index }} | {{ src.name or src.title or '' }} | {{ src.url or '' }} |