feat(scripts): Bulk-Seed fuer 83 Militaer-, Polizei-Technik und Waffen-Quellen
Neues idempotentes Skript scripts/seed_military_sources.{json,py} legt 85
internationale Defense-Quellen an (RSS/Web/Telegram), kategorisiert mit
Topic-Tags im notes-Feld: [militaertechnik], [waffen-international],
[polizei-technik]. Sprachen EN/DE/FR/RU/FA/PL, country_code manuell gesetzt.
Erstlauf auf Staging-DB: 83 neu (IDs 384-466), 2 Duplikate (rybar,
osintdefender bereits vorhanden). URL-Check verhindert Duplikate, das
gleiche Skript laeuft ohne Aenderung gegen Live-DB:
venv/bin/python scripts/seed_military_sources.py \
--db /home/claude-dev/osint-data/osint.db
Sektionen: 31 internationale Equipment-Fachredaktionen (Janes, TWZ, Defense
News, Naval News, Army Recognition, Aviation Week ...), 8 deutsche
(ESuT, Soldat & Technik, hartpunkt, Augen geradeaus ...), 5 franzoesische
(Opex360, Mer et Marine ...), 5 russische (Topwar, TASS, RIA, bmpd, Zvezda),
4 ukrainisch/polnische (Defense Express, Militarnyi, Defence24), 2
israelische, 3 iranische, 3 chinesisch/asiatische, 8 OSINT-Tracker (ORYX,
WarSpotting, CIT, 5 Telegram), 5 Polizei-Technik (Behoerden-Spiegel, pvt,
Police Magazine ...) und 11 Waffen-Spezialisten (Small Arms Survey, SIPRI,
Conflict Armament Research, ARES, Calibre Obscura, ICRC ...).
Plan: ~/.claude/plans/gleaming-inventing-fern.md
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
116
scripts/seed_military_sources.py
Ausführbare Datei
116
scripts/seed_military_sources.py
Ausführbare Datei
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bulk-Seed fuer Militaer- und Polizei-Technik-Quellen + internationale Waffen-Spezialisten.
|
||||
|
||||
Liest scripts/seed_military_sources.json und legt jede Quelle idempotent in der
|
||||
Ziel-DB an (Default: Verwaltungs-Staging-DB). Bestehende Quellen werden anhand
|
||||
der URL erkannt und uebersprungen.
|
||||
|
||||
Beispiel:
|
||||
.venv/bin/python scripts/seed_military_sources.py
|
||||
.venv/bin/python scripts/seed_military_sources.py --db /home/claude-dev/osint-data/osint.db
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
DEFAULT_DB = "/home/claude-dev/AegisSight-Monitor-staging/data/osint.db"
|
||||
SEED_FILE = Path(__file__).with_suffix(".json")
|
||||
|
||||
INSERT_SQL = """
|
||||
INSERT INTO sources (
|
||||
name, url, domain, source_type, category, status, notes,
|
||||
language, country_code, fetch_strategy, added_by, tenant_id
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'seed_military', NULL)
|
||||
"""
|
||||
|
||||
EXISTS_SQL = "SELECT id FROM sources WHERE url = ? AND tenant_id IS NULL"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--db", default=DEFAULT_DB, help="Pfad zur Ziel-SQLite-DB")
|
||||
ap.add_argument("--seed", default=str(SEED_FILE), help="Pfad zur Seed-JSON")
|
||||
ap.add_argument("--dry-run", action="store_true", help="Nur loggen, nichts schreiben")
|
||||
args = ap.parse_args()
|
||||
|
||||
seed_path = Path(args.seed)
|
||||
if not seed_path.is_file():
|
||||
print(f"FEHLER: Seed-Datei nicht gefunden: {seed_path}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
with seed_path.open("r", encoding="utf-8") as fh:
|
||||
seed = json.load(fh)
|
||||
sources = seed.get("sources", [])
|
||||
if not sources:
|
||||
print("FEHLER: Seed-Datei enthaelt keine sources", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
print(f"DB: {args.db}")
|
||||
print(f"Seed: {seed_path} ({len(sources)} Eintraege)")
|
||||
print(f"Dry-Run: {args.dry_run}")
|
||||
print()
|
||||
|
||||
con = sqlite3.connect(args.db)
|
||||
con.row_factory = sqlite3.Row
|
||||
cur = con.cursor()
|
||||
|
||||
created: list[tuple[int, str]] = []
|
||||
skipped: list[tuple[int, str]] = []
|
||||
|
||||
for entry in sources:
|
||||
url = entry.get("url")
|
||||
name = entry.get("name", "?")
|
||||
if not url:
|
||||
skipped.append((-1, f"{name}: ohne url"))
|
||||
continue
|
||||
|
||||
row = cur.execute(EXISTS_SQL, (url,)).fetchone()
|
||||
if row is not None:
|
||||
skipped.append((row["id"], f"{name}: existiert bereits (id={row['id']})"))
|
||||
continue
|
||||
|
||||
params = (
|
||||
name,
|
||||
url,
|
||||
entry.get("domain"),
|
||||
entry.get("source_type", "rss_feed"),
|
||||
entry.get("category", "fachmedien"),
|
||||
entry.get("status", "active"),
|
||||
entry.get("notes"),
|
||||
entry.get("language"),
|
||||
entry.get("country_code"),
|
||||
entry.get("fetch_strategy", "default"),
|
||||
)
|
||||
|
||||
if args.dry_run:
|
||||
created.append((-1, name))
|
||||
continue
|
||||
|
||||
cur.execute(INSERT_SQL, params)
|
||||
created.append((cur.lastrowid, name))
|
||||
|
||||
if not args.dry_run:
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
print(f"Angelegt: {len(created)}")
|
||||
print(f"Uebersprungen:{len(skipped)}")
|
||||
print()
|
||||
if created:
|
||||
print("--- Neue IDs ---")
|
||||
for src_id, name in created:
|
||||
print(f" {src_id:>5} {name}")
|
||||
if skipped:
|
||||
print()
|
||||
print("--- Uebersprungen ---")
|
||||
for src_id, msg in skipped:
|
||||
print(f" {src_id:>5} {msg}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren