#!/usr/bin/env python3 """Einmal-/Sync-Import des EUvsDisinfo-Falschbehauptungsbestands in den Monitor. Kopiert die Claims (Text, Verdict, Widerlegung, Quell-Referenz, Embedding-BLOB) aus der Vigil-Datenbank in die Monitor-Tabelle fimi_claims. Die Embeddings werden als BLOB 1:1 uebernommen (384-dim float32, L2-normalisiert) und im Monitor mit demselben Modell (paraphrase-multilingual-MiniLM-L12-v2) gematcht. Idempotent: UPSERT auf der stabilen Vigil-claim.id. Bestehende Treffer in article_fimi_matches bleiben dadurch gueltig. Aufruf (Staging): python scripts/import_fimi_claims.py \ --vigil-db /home/claude-dev/vigil-data/vigil.db \ --osint-db /home/claude-dev/AegisSight-Monitor-staging/data/osint.db """ from __future__ import annotations import argparse import sqlite3 import sys EUVSDISINFO_REPORT_BASE = "https://euvsdisinfo.eu/report/" def case_url_from_source_ref(source_ref: str | None) -> str | None: """Leitet die EUvsDisinfo-Case-URL aus 'euvsdisinfo:' ab.""" if not source_ref: return None prefix = "euvsdisinfo:" if source_ref.startswith(prefix): slug = source_ref[len(prefix):].strip().strip("/") if slug: return f"{EUVSDISINFO_REPORT_BASE}{slug}/" return None def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--vigil-db", required=True, help="Pfad zur Vigil-SQLite-DB (Quelle)") ap.add_argument("--osint-db", required=True, help="Pfad zur Monitor-SQLite-DB (Ziel)") ap.add_argument("--limit", type=int, default=0, help="Optional: nur N Claims importieren (Test)") args = ap.parse_args() src = sqlite3.connect(args.vigil_db) src.row_factory = sqlite3.Row q = ( "SELECT id, text, text_normalized, language, verdict, verdict_summary, " "source_id, embedding, first_seen_at FROM claims WHERE embedding IS NOT NULL" ) if args.limit: q += f" LIMIT {int(args.limit)}" rows = src.execute(q).fetchall() src.close() print(f"Vigil: {len(rows)} Claims mit Embedding gelesen", flush=True) dst = sqlite3.connect(args.osint_db) dst.execute("PRAGMA busy_timeout=10000") # Sicherstellen, dass die Zieltabelle existiert (falls Skript vor init_db laeuft) dst.execute( """CREATE TABLE IF NOT EXISTS fimi_claims ( id INTEGER PRIMARY KEY, text TEXT NOT NULL, text_normalized TEXT, language TEXT, verdict TEXT NOT NULL DEFAULT 'false', verdict_summary TEXT, source_ref TEXT, case_url TEXT, embedding BLOB, first_seen_at TIMESTAMP, imported_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP )""" ) dst.execute("CREATE INDEX IF NOT EXISTS idx_fimi_claims_source_ref ON fimi_claims(source_ref)") inserted = 0 with_url = 0 for r in rows: case_url = case_url_from_source_ref(r["source_id"]) if case_url: with_url += 1 dst.execute( """INSERT INTO fimi_claims (id, text, text_normalized, language, verdict, verdict_summary, source_ref, case_url, embedding, first_seen_at, imported_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ON CONFLICT(id) DO UPDATE SET text=excluded.text, text_normalized=excluded.text_normalized, language=excluded.language, verdict=excluded.verdict, verdict_summary=excluded.verdict_summary, source_ref=excluded.source_ref, case_url=excluded.case_url, embedding=excluded.embedding, first_seen_at=excluded.first_seen_at, imported_at=CURRENT_TIMESTAMP""", ( r["id"], r["text"], r["text_normalized"], r["language"], r["verdict"] or "false", r["verdict_summary"], r["source_id"], case_url, r["embedding"], r["first_seen_at"], ), ) inserted += 1 dst.commit() total = dst.execute("SELECT COUNT(*) FROM fimi_claims").fetchone()[0] dst.close() print(f"Monitor: {inserted} Claims upserted ({with_url} mit Case-URL), " f"fimi_claims enthaelt jetzt {total} Eintraege", flush=True) return 0 if __name__ == "__main__": sys.exit(main())