diff --git a/src/report_generator.py b/src/report_generator.py index a57c3b3..a446c43 100644 --- a/src/report_generator.py +++ b/src/report_generator.py @@ -451,24 +451,49 @@ def _build_export_metadata( if organization_name: keywords.append(organization_name) - # category_labels ist ein Komma-getrennter String - cat_labels = incident.get("category_labels") or "" - for lbl in cat_labels.split(","): - lbl = lbl.strip() - if lbl: - keywords.append(lbl) + # category_labels: kann JSON-Dict (Karte primary/secondary/...), JSON-Liste + # oder ein Komma-getrennter String sein. Nur die Label-Werte extrahieren. + cat_labels_raw = (incident.get("category_labels") or "").strip() + if cat_labels_raw: + cat_values: list[str] = [] + try: + parsed = json.loads(cat_labels_raw) + if isinstance(parsed, dict): + cat_values = [str(v).strip() for v in parsed.values() if isinstance(v, str) and v.strip()] + elif isinstance(parsed, list): + cat_values = [str(v).strip() for v in parsed if isinstance(v, str) and v.strip()] + except (json.JSONDecodeError, TypeError): + cat_values = [lbl.strip() for lbl in cat_labels_raw.split(",") if lbl.strip()] + # Keine JSON-Fragmente (geschweifte/eckige Klammern) als Keyword zulassen + for lbl in cat_values: + if lbl and not any(c in lbl for c in "{}[]"): + keywords.append(lbl) if top_locations: keywords.extend([loc for loc in top_locations if loc]) + # Sanitize: Zeilenumbrueche/Tabs weg, Sonderzeichen mit PDF-Sonderbedeutung filtern + def _sanitize_keyword(kw: str) -> str: + if not kw: + return "" + # Whitespace normalisieren + cleaned = re.sub(r"\s+", " ", kw).strip() + # PDF-Dict/Array-Klammern und Backslash raus (WeasyPrint escaped () bei Strings, + # { und [ koennen aber den Keywords-Stream abschneiden) + cleaned = re.sub(r"[{}\[\]\\]", "", cleaned) + return cleaned.strip(" ,;:") + # Dedup (case-insensitive) mit Reihenfolge erhalten, max 15 seen = set() unique_keywords: list[str] = [] for kw in keywords: - key = kw.lower() + clean_kw = _sanitize_keyword(kw) + if not clean_kw: + continue + key = clean_kw.lower() if key not in seen: seen.add(key) - unique_keywords.append(kw) + unique_keywords.append(clean_kw) if len(unique_keywords) >= 15: break