Neues idempotentes Skript scripts/seed_military_sources.{json,py} legt 85
internationale Defense-Quellen an (RSS/Web/Telegram), kategorisiert mit
Topic-Tags im notes-Feld: [militaertechnik], [waffen-international],
[polizei-technik]. Sprachen EN/DE/FR/RU/FA/PL, country_code manuell gesetzt.
Erstlauf auf Staging-DB: 83 neu (IDs 384-466), 2 Duplikate (rybar,
osintdefender bereits vorhanden). URL-Check verhindert Duplikate, das
gleiche Skript laeuft ohne Aenderung gegen Live-DB:
venv/bin/python scripts/seed_military_sources.py \
--db /home/claude-dev/osint-data/osint.db
Sektionen: 31 internationale Equipment-Fachredaktionen (Janes, TWZ, Defense
News, Naval News, Army Recognition, Aviation Week ...), 8 deutsche
(ESuT, Soldat & Technik, hartpunkt, Augen geradeaus ...), 5 franzoesische
(Opex360, Mer et Marine ...), 5 russische (Topwar, TASS, RIA, bmpd, Zvezda),
4 ukrainisch/polnische (Defense Express, Militarnyi, Defence24), 2
israelische, 3 iranische, 3 chinesisch/asiatische, 8 OSINT-Tracker (ORYX,
WarSpotting, CIT, 5 Telegram), 5 Polizei-Technik (Behoerden-Spiegel, pvt,
Police Magazine ...) und 11 Waffen-Spezialisten (Small Arms Survey, SIPRI,
Conflict Armament Research, ARES, Calibre Obscura, ICRC ...).
Plan: ~/.claude/plans/gleaming-inventing-fern.md
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
117 Zeilen
3.5 KiB
Python
Ausführbare Datei
117 Zeilen
3.5 KiB
Python
Ausführbare Datei
#!/usr/bin/env python3
|
|
"""Bulk-Seed fuer Militaer- und Polizei-Technik-Quellen + internationale Waffen-Spezialisten.
|
|
|
|
Liest scripts/seed_military_sources.json und legt jede Quelle idempotent in der
|
|
Ziel-DB an (Default: Verwaltungs-Staging-DB). Bestehende Quellen werden anhand
|
|
der URL erkannt und uebersprungen.
|
|
|
|
Beispiel:
|
|
.venv/bin/python scripts/seed_military_sources.py
|
|
.venv/bin/python scripts/seed_military_sources.py --db /home/claude-dev/osint-data/osint.db
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
DEFAULT_DB = "/home/claude-dev/AegisSight-Monitor-staging/data/osint.db"
|
|
SEED_FILE = Path(__file__).with_suffix(".json")
|
|
|
|
INSERT_SQL = """
|
|
INSERT INTO sources (
|
|
name, url, domain, source_type, category, status, notes,
|
|
language, country_code, fetch_strategy, added_by, tenant_id
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'seed_military', NULL)
|
|
"""
|
|
|
|
EXISTS_SQL = "SELECT id FROM sources WHERE url = ? AND tenant_id IS NULL"
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser(description=__doc__)
|
|
ap.add_argument("--db", default=DEFAULT_DB, help="Pfad zur Ziel-SQLite-DB")
|
|
ap.add_argument("--seed", default=str(SEED_FILE), help="Pfad zur Seed-JSON")
|
|
ap.add_argument("--dry-run", action="store_true", help="Nur loggen, nichts schreiben")
|
|
args = ap.parse_args()
|
|
|
|
seed_path = Path(args.seed)
|
|
if not seed_path.is_file():
|
|
print(f"FEHLER: Seed-Datei nicht gefunden: {seed_path}", file=sys.stderr)
|
|
return 2
|
|
|
|
with seed_path.open("r", encoding="utf-8") as fh:
|
|
seed = json.load(fh)
|
|
sources = seed.get("sources", [])
|
|
if not sources:
|
|
print("FEHLER: Seed-Datei enthaelt keine sources", file=sys.stderr)
|
|
return 2
|
|
|
|
print(f"DB: {args.db}")
|
|
print(f"Seed: {seed_path} ({len(sources)} Eintraege)")
|
|
print(f"Dry-Run: {args.dry_run}")
|
|
print()
|
|
|
|
con = sqlite3.connect(args.db)
|
|
con.row_factory = sqlite3.Row
|
|
cur = con.cursor()
|
|
|
|
created: list[tuple[int, str]] = []
|
|
skipped: list[tuple[int, str]] = []
|
|
|
|
for entry in sources:
|
|
url = entry.get("url")
|
|
name = entry.get("name", "?")
|
|
if not url:
|
|
skipped.append((-1, f"{name}: ohne url"))
|
|
continue
|
|
|
|
row = cur.execute(EXISTS_SQL, (url,)).fetchone()
|
|
if row is not None:
|
|
skipped.append((row["id"], f"{name}: existiert bereits (id={row['id']})"))
|
|
continue
|
|
|
|
params = (
|
|
name,
|
|
url,
|
|
entry.get("domain"),
|
|
entry.get("source_type", "rss_feed"),
|
|
entry.get("category", "fachmedien"),
|
|
entry.get("status", "active"),
|
|
entry.get("notes"),
|
|
entry.get("language"),
|
|
entry.get("country_code"),
|
|
entry.get("fetch_strategy", "default"),
|
|
)
|
|
|
|
if args.dry_run:
|
|
created.append((-1, name))
|
|
continue
|
|
|
|
cur.execute(INSERT_SQL, params)
|
|
created.append((cur.lastrowid, name))
|
|
|
|
if not args.dry_run:
|
|
con.commit()
|
|
con.close()
|
|
|
|
print(f"Angelegt: {len(created)}")
|
|
print(f"Uebersprungen:{len(skipped)}")
|
|
print()
|
|
if created:
|
|
print("--- Neue IDs ---")
|
|
for src_id, name in created:
|
|
print(f" {src_id:>5} {name}")
|
|
if skipped:
|
|
print()
|
|
print("--- Uebersprungen ---")
|
|
for src_id, msg in skipped:
|
|
print(f" {src_id:>5} {msg}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|