Fix broken source links caused by LLM-generated letter suffixes (e.g. 1383a)
The LLM occasionally generates source references with letter suffixes (e.g. [1383a], [1396b]) despite being instructed not to. This caused broken links because the sources array only contained integer nr values. Backend: Add _sanitize_sources() to strip letter suffixes after parsing and deduplicate, preferring entries with valid URLs. Frontend: Add fallback in citation renderer - when a suffix reference like [1383a] has no matching source with URL, fall back to the base number [1383]. Also cleaned up 99 broken suffix entries and 44 suffix references in the Irankonflikt incident (ID 6) database records. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -242,6 +242,7 @@ class AnalyzerAgent:
|
||||
result, usage = await call_claude(prompt)
|
||||
analysis = self._parse_response(result)
|
||||
if analysis:
|
||||
analysis = self._sanitize_sources(analysis)
|
||||
logger.info(f"Erstanalyse abgeschlossen: {len(analysis.get('sources', []))} Quellen referenziert")
|
||||
return analysis, usage
|
||||
except Exception as e:
|
||||
@@ -303,6 +304,8 @@ class AnalyzerAgent:
|
||||
try:
|
||||
result, usage = await call_claude(prompt)
|
||||
analysis = self._parse_response(result)
|
||||
if analysis:
|
||||
analysis = self._sanitize_sources(analysis)
|
||||
if analysis and self._all_previous_sources:
|
||||
# Merge: alte Quellen beibehalten, neue hinzufuegen
|
||||
returned_sources = analysis.get("sources", [])
|
||||
@@ -325,6 +328,51 @@ class AnalyzerAgent:
|
||||
logger.error(f"Inkrementelle Analyse-Fehler: {e}")
|
||||
return None, None
|
||||
|
||||
def _sanitize_sources(self, analysis: dict) -> dict:
|
||||
"""Entfernt Buchstaben-Suffixe aus Quellennummern (z.B. '1383a' -> 1383).
|
||||
|
||||
Das LLM erzeugt trotz Anweisung gelegentlich Suffix-Nummern.
|
||||
Diese werden hier auf die Basisnummer normalisiert.
|
||||
Duplikate werden entfernt, wobei Eintraege mit URL bevorzugt werden.
|
||||
"""
|
||||
sources = analysis.get("sources", [])
|
||||
if not sources:
|
||||
return analysis
|
||||
|
||||
cleaned = {}
|
||||
suffix_count = 0
|
||||
for s in sources:
|
||||
nr = s.get("nr", "")
|
||||
nr_str = str(nr)
|
||||
# Prüfe auf Buchstaben-Suffix (z.B. "1383a", "1383b")
|
||||
m = re.match(r"^(\d+)[a-z]$", nr_str)
|
||||
if m:
|
||||
base_nr = int(m.group(1))
|
||||
suffix_count += 1
|
||||
# Nur übernehmen wenn Basisnummer noch nicht existiert oder
|
||||
# dieser Eintrag eine URL hat und der bisherige nicht
|
||||
if base_nr not in cleaned:
|
||||
s_copy = dict(s)
|
||||
s_copy["nr"] = base_nr
|
||||
cleaned[base_nr] = s_copy
|
||||
elif s.get("url") and not cleaned[base_nr].get("url"):
|
||||
s_copy = dict(s)
|
||||
s_copy["nr"] = base_nr
|
||||
cleaned[base_nr] = s_copy
|
||||
else:
|
||||
nr_int = int(nr) if isinstance(nr, (int, float)) or (isinstance(nr, str) and nr.isdigit()) else nr
|
||||
if nr_int not in cleaned:
|
||||
cleaned[nr_int] = s
|
||||
elif s.get("url") and not cleaned[nr_int].get("url"):
|
||||
cleaned[nr_int] = s
|
||||
|
||||
if suffix_count > 0:
|
||||
logger.info(f"Quellen-Sanitierung: {suffix_count} Buchstaben-Suffixe entfernt")
|
||||
analysis["sources"] = sorted(cleaned.values(),
|
||||
key=lambda s: s.get("nr", 0) if isinstance(s.get("nr"), int) else 9999)
|
||||
|
||||
return analysis
|
||||
|
||||
def _parse_response(self, response: str) -> dict | None:
|
||||
"""Parst die Claude-Antwort als JSON-Objekt mit robustem Fallback."""
|
||||
# Markdown-Code-Fences entfernen
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren