#!/usr/bin/env python3 """Bulk-Seed fuer Militaer- und Polizei-Technik-Quellen + internationale Waffen-Spezialisten. Liest scripts/seed_military_sources.json und legt jede Quelle idempotent in der Ziel-DB an (Default: Verwaltungs-Staging-DB). Bestehende Quellen werden anhand der URL erkannt und uebersprungen. Beispiel: .venv/bin/python scripts/seed_military_sources.py .venv/bin/python scripts/seed_military_sources.py --db /home/claude-dev/osint-data/osint.db """ from __future__ import annotations import argparse import json import sqlite3 import sys from pathlib import Path DEFAULT_DB = "/home/claude-dev/AegisSight-Monitor-staging/data/osint.db" SEED_FILE = Path(__file__).with_suffix(".json") INSERT_SQL = """ INSERT INTO sources ( name, url, domain, source_type, category, status, notes, language, country_code, fetch_strategy, added_by, tenant_id ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'seed_military', NULL) """ EXISTS_SQL = "SELECT id FROM sources WHERE url = ? AND tenant_id IS NULL" def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--db", default=DEFAULT_DB, help="Pfad zur Ziel-SQLite-DB") ap.add_argument("--seed", default=str(SEED_FILE), help="Pfad zur Seed-JSON") ap.add_argument("--dry-run", action="store_true", help="Nur loggen, nichts schreiben") args = ap.parse_args() seed_path = Path(args.seed) if not seed_path.is_file(): print(f"FEHLER: Seed-Datei nicht gefunden: {seed_path}", file=sys.stderr) return 2 with seed_path.open("r", encoding="utf-8") as fh: seed = json.load(fh) sources = seed.get("sources", []) if not sources: print("FEHLER: Seed-Datei enthaelt keine sources", file=sys.stderr) return 2 print(f"DB: {args.db}") print(f"Seed: {seed_path} ({len(sources)} Eintraege)") print(f"Dry-Run: {args.dry_run}") print() con = sqlite3.connect(args.db) con.row_factory = sqlite3.Row cur = con.cursor() created: list[tuple[int, str]] = [] skipped: list[tuple[int, str]] = [] for entry in sources: url = entry.get("url") name = entry.get("name", "?") if not url: skipped.append((-1, f"{name}: ohne url")) continue row = cur.execute(EXISTS_SQL, (url,)).fetchone() if row is not None: skipped.append((row["id"], f"{name}: existiert bereits (id={row['id']})")) continue params = ( name, url, entry.get("domain"), entry.get("source_type", "rss_feed"), entry.get("category", "fachmedien"), entry.get("status", "active"), entry.get("notes"), entry.get("language"), entry.get("country_code"), entry.get("fetch_strategy", "default"), ) if args.dry_run: created.append((-1, name)) continue cur.execute(INSERT_SQL, params) created.append((cur.lastrowid, name)) if not args.dry_run: con.commit() con.close() print(f"Angelegt: {len(created)}") print(f"Uebersprungen:{len(skipped)}") print() if created: print("--- Neue IDs ---") for src_id, name in created: print(f" {src_id:>5} {name}") if skipped: print() print("--- Uebersprungen ---") for src_id, msg in skipped: print(f" {src_id:>5} {msg}") return 0 if __name__ == "__main__": sys.exit(main())