diff --git a/scripts/backfill_latest_developments.py b/scripts/backfill_latest_developments.py new file mode 100644 index 0000000..e2e5d9d --- /dev/null +++ b/scripts/backfill_latest_developments.py @@ -0,0 +1,87 @@ +"""Einmaliger Backfill: Laedt die 30 neuesten Artikel einer Lage und generiert + latest_developments als kompletten Rebuild (previous_developments=None). + + Verwendung: python3 scripts/backfill_latest_developments.py [limit] +""" +import asyncio +import sqlite3 +import sys +sys.path.insert(0, "src") + +from agents.analyzer import AnalyzerAgent + + +async def backfill(incident_id: int, limit: int = 30): + c = sqlite3.connect("data/osint.db") + c.row_factory = sqlite3.Row + + inc = c.execute("SELECT * FROM incidents WHERE id=?", (incident_id,)).fetchone() + if not inc: + print(f"Incident #{incident_id} nicht gefunden.") + return + title = inc["title"] + description = inc["description"] or "" + + rows = c.execute( + """SELECT id, source, source_url, language, published_at, + headline, headline_de, content_original, content_de + FROM articles WHERE incident_id=? + ORDER BY datetime(published_at) DESC LIMIT ?""", + (incident_id, limit), + ).fetchall() + + # Bias-Anreicherung analog zum Orchestrator (optional, Tabelle evtl. nicht vorhanden) + bias_by_name: dict[str, str] = {} + bias_by_domain: dict[str, str] = {} + try: + bias_rows = c.execute("SELECT name, domain, bias FROM source_bias").fetchall() + bias_by_name = {r["name"].lower(): r["bias"] for r in bias_rows if r["name"]} + bias_by_domain = {r["domain"].lower(): r["bias"] for r in bias_rows if r["domain"]} + except sqlite3.OperationalError: + pass + + articles = [] + for r in rows: + a = dict(r) + src = (a.get("source") or "").lower() + url = (a.get("source_url") or "").lower() + bias = bias_by_name.get(src) + if not bias: + for dom, b in bias_by_domain.items(): + if dom and dom in url: + bias = b + break + if bias: + a["source_bias"] = bias + articles.append(a) + + print(f"Backfill fuer #{incident_id} {title!r}") + print(f"Artikel als Input: {len(articles)} (neueste first)") + for a in articles[:5]: + print(f" ID {a['id']} | {a.get('published_at', '?')} | {a.get('source', '?')}") + + analyzer = AnalyzerAgent() + dev_text, usage = await analyzer.generate_latest_developments( + title=title, + description=description, + new_articles=articles, + previous_developments=None, + ) + + print() + print("=== Neue latest_developments ===") + print(dev_text or "(leer)") + + if dev_text: + c.execute("UPDATE incidents SET latest_developments=? WHERE id=?", (dev_text, incident_id)) + c.commit() + print(f"\nDB aktualisiert: Incident #{incident_id}") + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: backfill_latest_developments.py [limit]") + sys.exit(1) + iid = int(sys.argv[1]) + lim = int(sys.argv[2]) if len(sys.argv) > 2 else 30 + asyncio.run(backfill(iid, lim)) diff --git a/src/agents/analyzer.py b/src/agents/analyzer.py index 06bb46c..fcbad50 100644 --- a/src/agents/analyzer.py +++ b/src/agents/analyzer.py @@ -238,7 +238,7 @@ REGELN: - KEINE Einleitung, KEINE Überschrift, KEINE Nachbemerkungen. - Wenn aus den neuen Meldungen kein neues Ereignis extrahierbar ist: BISHERIGE ENTWICKLUNGEN unverändert zurückgeben. -OUTPUT-FORMAT (ausschliesslich, keine Anführungszeichen, kein Code-Fence): +OUTPUT-FORMAT (ausschliesslich, keine Anführungszeichen, kein Code-Fence, JEDE Zeile beginnt mit "- "): - [DD.MM. HH:MM] Ereignistext neu. {{M}} - [DD.MM. HH:MM] Ereignistext neu mit mehreren Belegen. {{M, M}} - [DD.MM. HH:MM] Ereignistext aus BISHERIGE ENTWICKLUNGEN. {{Quellenname1, Quellenname2}} @@ -447,7 +447,10 @@ class AnalyzerAgent: articles_by_id[str(aid)] = name bullets: list[str] = [] - bullet_re = re.compile(r"^\s*[-*•]\s*\[(\d{1,2}\.\d{1,2}\.(?:\d{2,4})?\s+\d{1,2}:\d{2})\]\s*(.+?)\s*$") + # Dash-Praefix + zweiter Datums-Punkt + optionales Jahr: Claude Haiku laesst diese gelegentlich weg. + bullet_re = re.compile( + r"^\s*(?:[-*•]\s*)?\[\s*(\d{1,2})\.(\d{1,2})\.?(?:\d{2,4})?\s+(\d{1,2}:\d{2})\s*\]\s*(.+?)\s*$" + ) trailing_braces = re.compile(r"\{([^{}]+)\}\s*\.?\s*$") id_item = re.compile(r"^[M#]\s*(\d+)$", re.IGNORECASE) junk_item = re.compile(r"^(unbekannt|unknown|n/?a|keine|keine quelle|tba)$", re.IGNORECASE) @@ -459,8 +462,9 @@ class AnalyzerAgent: m = bullet_re.match(line) if not m: continue - ts = m.group(1) - body = m.group(2).rstrip() + day, month, time = m.group(1), m.group(2), m.group(3) + ts = f"{int(day):02d}.{int(month):02d}. {time}" + body = m.group(4).rstrip() brace_match = trailing_braces.search(body) if not brace_match: diff --git a/src/static/js/components.js b/src/static/js/components.js index 54d1c43..6627c0c 100644 --- a/src/static/js/components.js +++ b/src/static/js/components.js @@ -754,12 +754,12 @@ const UI = { let sources = []; try { sources = JSON.parse(sourcesJson || '[]'); } catch(e) {} - const bulletLines = text.split("\n").map(l => l.trim()).filter(l => l.startsWith("- ")); + const bulletLines = text.split("\n").map(l => l.trim()).filter(l => l && (l.startsWith("- ") || l.startsWith("["))); if (bulletLines.length === 0) { return this.renderZusammenfassung(text, sourcesJson); } - const bulletRe = /^-\s*\[(\d{1,2}\.\d{1,2}\.)\s+(\d{1,2}:\d{2})\]\s*(.+?)\s*$/; + const bulletRe = /^(?:-\s*)?\[\s*(\d{1,2})\.(\d{1,2})\.?(?:\d{2,4})?\s+(\d{1,2}:\d{2})\s*\]\s*(.+?)\s*$/; const citationRe = /\[(\d+[a-z]?)\]/g; const trailingNamesRe = /\s*\{([^{}]+)\}\s*\.?\s*$/; @@ -800,9 +800,11 @@ const UI = { const body = this.escape(line.replace(/^-\s*/, '')); return `
${body}
`; } - const date = m[1]; - const time = m[2]; - let rawBody = m[3]; + const day = m[1].padStart(2, '0'); + const month = m[2].padStart(2, '0'); + const date = `${day}.${month}.`; + const time = m[3]; + let rawBody = m[4]; let pillsHtml = '';