Live-Monitoring: Parser toleranter (Dash optional, Datum ohne zweiten Punkt) + Backfill-Script
Claude Haiku 4.5 laesst gelegentlich den fuehrenden Dash oder den zweiten
Datums-Punkt im Bullet-Format weg (z.B. "[18.04 21:49]" statt
"- [18.04. 21:49]"). Der strikte Parser-Regex verwarf dadurch alle Bullets.
- Regex akzeptiert nun Dash als optional und zweiten Datums-Punkt als optional
- Parser normalisiert Datum + Zeit auf kanonisches Format "DD.MM. HH:MM" mit Zero-Padding
- Frontend-Regex analog toleranter (auch fuer Altdaten-Mix)
- OUTPUT-FORMAT-Hinweis im Prompt verschaerft ("JEDE Zeile beginnt mit - ")
Backfill-Skript (scripts/backfill_latest_developments.py): Laedt die N
neuesten Artikel einer Lage aus der DB und ruft generate_latest_developments
mit previous_developments=None auf — nuetzlich nach DB-Cleanups, wenn die
inkrementelle Logik zu wenige Bullets liefert.
Einmaliger Run fuer Lage #66 (Militaerblogger): 8 Bullets vom 18.04. mit
aufgeloesten Quellen (Spiegel, Guardian, Bloomberg, n-tv, Telegram-Kanaele).
Dieser Commit ist enthalten in:
87
scripts/backfill_latest_developments.py
Normale Datei
87
scripts/backfill_latest_developments.py
Normale Datei
@@ -0,0 +1,87 @@
|
|||||||
|
"""Einmaliger Backfill: Laedt die 30 neuesten Artikel einer Lage und generiert
|
||||||
|
latest_developments als kompletten Rebuild (previous_developments=None).
|
||||||
|
|
||||||
|
Verwendung: python3 scripts/backfill_latest_developments.py <incident_id> [limit]
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, "src")
|
||||||
|
|
||||||
|
from agents.analyzer import AnalyzerAgent
|
||||||
|
|
||||||
|
|
||||||
|
async def backfill(incident_id: int, limit: int = 30):
|
||||||
|
c = sqlite3.connect("data/osint.db")
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
inc = c.execute("SELECT * FROM incidents WHERE id=?", (incident_id,)).fetchone()
|
||||||
|
if not inc:
|
||||||
|
print(f"Incident #{incident_id} nicht gefunden.")
|
||||||
|
return
|
||||||
|
title = inc["title"]
|
||||||
|
description = inc["description"] or ""
|
||||||
|
|
||||||
|
rows = c.execute(
|
||||||
|
"""SELECT id, source, source_url, language, published_at,
|
||||||
|
headline, headline_de, content_original, content_de
|
||||||
|
FROM articles WHERE incident_id=?
|
||||||
|
ORDER BY datetime(published_at) DESC LIMIT ?""",
|
||||||
|
(incident_id, limit),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Bias-Anreicherung analog zum Orchestrator (optional, Tabelle evtl. nicht vorhanden)
|
||||||
|
bias_by_name: dict[str, str] = {}
|
||||||
|
bias_by_domain: dict[str, str] = {}
|
||||||
|
try:
|
||||||
|
bias_rows = c.execute("SELECT name, domain, bias FROM source_bias").fetchall()
|
||||||
|
bias_by_name = {r["name"].lower(): r["bias"] for r in bias_rows if r["name"]}
|
||||||
|
bias_by_domain = {r["domain"].lower(): r["bias"] for r in bias_rows if r["domain"]}
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
for r in rows:
|
||||||
|
a = dict(r)
|
||||||
|
src = (a.get("source") or "").lower()
|
||||||
|
url = (a.get("source_url") or "").lower()
|
||||||
|
bias = bias_by_name.get(src)
|
||||||
|
if not bias:
|
||||||
|
for dom, b in bias_by_domain.items():
|
||||||
|
if dom and dom in url:
|
||||||
|
bias = b
|
||||||
|
break
|
||||||
|
if bias:
|
||||||
|
a["source_bias"] = bias
|
||||||
|
articles.append(a)
|
||||||
|
|
||||||
|
print(f"Backfill fuer #{incident_id} {title!r}")
|
||||||
|
print(f"Artikel als Input: {len(articles)} (neueste first)")
|
||||||
|
for a in articles[:5]:
|
||||||
|
print(f" ID {a['id']} | {a.get('published_at', '?')} | {a.get('source', '?')}")
|
||||||
|
|
||||||
|
analyzer = AnalyzerAgent()
|
||||||
|
dev_text, usage = await analyzer.generate_latest_developments(
|
||||||
|
title=title,
|
||||||
|
description=description,
|
||||||
|
new_articles=articles,
|
||||||
|
previous_developments=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("=== Neue latest_developments ===")
|
||||||
|
print(dev_text or "(leer)")
|
||||||
|
|
||||||
|
if dev_text:
|
||||||
|
c.execute("UPDATE incidents SET latest_developments=? WHERE id=?", (dev_text, incident_id))
|
||||||
|
c.commit()
|
||||||
|
print(f"\nDB aktualisiert: Incident #{incident_id}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: backfill_latest_developments.py <incident_id> [limit]")
|
||||||
|
sys.exit(1)
|
||||||
|
iid = int(sys.argv[1])
|
||||||
|
lim = int(sys.argv[2]) if len(sys.argv) > 2 else 30
|
||||||
|
asyncio.run(backfill(iid, lim))
|
||||||
@@ -238,7 +238,7 @@ REGELN:
|
|||||||
- KEINE Einleitung, KEINE Überschrift, KEINE Nachbemerkungen.
|
- KEINE Einleitung, KEINE Überschrift, KEINE Nachbemerkungen.
|
||||||
- Wenn aus den neuen Meldungen kein neues Ereignis extrahierbar ist: BISHERIGE ENTWICKLUNGEN unverändert zurückgeben.
|
- Wenn aus den neuen Meldungen kein neues Ereignis extrahierbar ist: BISHERIGE ENTWICKLUNGEN unverändert zurückgeben.
|
||||||
|
|
||||||
OUTPUT-FORMAT (ausschliesslich, keine Anführungszeichen, kein Code-Fence):
|
OUTPUT-FORMAT (ausschliesslich, keine Anführungszeichen, kein Code-Fence, JEDE Zeile beginnt mit "- "):
|
||||||
- [DD.MM. HH:MM] Ereignistext neu. {{M<ID>}}
|
- [DD.MM. HH:MM] Ereignistext neu. {{M<ID>}}
|
||||||
- [DD.MM. HH:MM] Ereignistext neu mit mehreren Belegen. {{M<ID1>, M<ID2>}}
|
- [DD.MM. HH:MM] Ereignistext neu mit mehreren Belegen. {{M<ID1>, M<ID2>}}
|
||||||
- [DD.MM. HH:MM] Ereignistext aus BISHERIGE ENTWICKLUNGEN. {{Quellenname1, Quellenname2}}
|
- [DD.MM. HH:MM] Ereignistext aus BISHERIGE ENTWICKLUNGEN. {{Quellenname1, Quellenname2}}
|
||||||
@@ -447,7 +447,10 @@ class AnalyzerAgent:
|
|||||||
articles_by_id[str(aid)] = name
|
articles_by_id[str(aid)] = name
|
||||||
|
|
||||||
bullets: list[str] = []
|
bullets: list[str] = []
|
||||||
bullet_re = re.compile(r"^\s*[-*•]\s*\[(\d{1,2}\.\d{1,2}\.(?:\d{2,4})?\s+\d{1,2}:\d{2})\]\s*(.+?)\s*$")
|
# Dash-Praefix + zweiter Datums-Punkt + optionales Jahr: Claude Haiku laesst diese gelegentlich weg.
|
||||||
|
bullet_re = re.compile(
|
||||||
|
r"^\s*(?:[-*•]\s*)?\[\s*(\d{1,2})\.(\d{1,2})\.?(?:\d{2,4})?\s+(\d{1,2}:\d{2})\s*\]\s*(.+?)\s*$"
|
||||||
|
)
|
||||||
trailing_braces = re.compile(r"\{([^{}]+)\}\s*\.?\s*$")
|
trailing_braces = re.compile(r"\{([^{}]+)\}\s*\.?\s*$")
|
||||||
id_item = re.compile(r"^[M#]\s*(\d+)$", re.IGNORECASE)
|
id_item = re.compile(r"^[M#]\s*(\d+)$", re.IGNORECASE)
|
||||||
junk_item = re.compile(r"^(unbekannt|unknown|n/?a|keine|keine quelle|tba)$", re.IGNORECASE)
|
junk_item = re.compile(r"^(unbekannt|unknown|n/?a|keine|keine quelle|tba)$", re.IGNORECASE)
|
||||||
@@ -459,8 +462,9 @@ class AnalyzerAgent:
|
|||||||
m = bullet_re.match(line)
|
m = bullet_re.match(line)
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
ts = m.group(1)
|
day, month, time = m.group(1), m.group(2), m.group(3)
|
||||||
body = m.group(2).rstrip()
|
ts = f"{int(day):02d}.{int(month):02d}. {time}"
|
||||||
|
body = m.group(4).rstrip()
|
||||||
|
|
||||||
brace_match = trailing_braces.search(body)
|
brace_match = trailing_braces.search(body)
|
||||||
if not brace_match:
|
if not brace_match:
|
||||||
|
|||||||
@@ -754,12 +754,12 @@ const UI = {
|
|||||||
let sources = [];
|
let sources = [];
|
||||||
try { sources = JSON.parse(sourcesJson || '[]'); } catch(e) {}
|
try { sources = JSON.parse(sourcesJson || '[]'); } catch(e) {}
|
||||||
|
|
||||||
const bulletLines = text.split("\n").map(l => l.trim()).filter(l => l.startsWith("- "));
|
const bulletLines = text.split("\n").map(l => l.trim()).filter(l => l && (l.startsWith("- ") || l.startsWith("[")));
|
||||||
if (bulletLines.length === 0) {
|
if (bulletLines.length === 0) {
|
||||||
return this.renderZusammenfassung(text, sourcesJson);
|
return this.renderZusammenfassung(text, sourcesJson);
|
||||||
}
|
}
|
||||||
|
|
||||||
const bulletRe = /^-\s*\[(\d{1,2}\.\d{1,2}\.)\s+(\d{1,2}:\d{2})\]\s*(.+?)\s*$/;
|
const bulletRe = /^(?:-\s*)?\[\s*(\d{1,2})\.(\d{1,2})\.?(?:\d{2,4})?\s+(\d{1,2}:\d{2})\s*\]\s*(.+?)\s*$/;
|
||||||
const citationRe = /\[(\d+[a-z]?)\]/g;
|
const citationRe = /\[(\d+[a-z]?)\]/g;
|
||||||
const trailingNamesRe = /\s*\{([^{}]+)\}\s*\.?\s*$/;
|
const trailingNamesRe = /\s*\{([^{}]+)\}\s*\.?\s*$/;
|
||||||
|
|
||||||
@@ -800,9 +800,11 @@ const UI = {
|
|||||||
const body = this.escape(line.replace(/^-\s*/, ''));
|
const body = this.escape(line.replace(/^-\s*/, ''));
|
||||||
return `<div class="dev-bullet"><div class="dev-body">${body}</div></div>`;
|
return `<div class="dev-bullet"><div class="dev-body">${body}</div></div>`;
|
||||||
}
|
}
|
||||||
const date = m[1];
|
const day = m[1].padStart(2, '0');
|
||||||
const time = m[2];
|
const month = m[2].padStart(2, '0');
|
||||||
let rawBody = m[3];
|
const date = `${day}.${month}.`;
|
||||||
|
const time = m[3];
|
||||||
|
let rawBody = m[4];
|
||||||
|
|
||||||
let pillsHtml = '';
|
let pillsHtml = '';
|
||||||
|
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren