diff --git a/migrations/migrate_html_strip_2026-05-03.py b/migrations/migrate_html_strip_2026-05-03.py
new file mode 100644
index 0000000..28d0dc6
--- /dev/null
+++ b/migrations/migrate_html_strip_2026-05-03.py
@@ -0,0 +1,133 @@
+"""Backfill-Migration 2026-05-03: HTML-Tags aus articles.content_original / content_de strippen.
+
+Ursache: rss_parser.py hat bis 2026-05-03 die `summary` aus dem RSS-Feed
+ungefiltert in content_original/content_de gespeichert. Bei vielen Quellen
+(Guardian, AP, SZ, Golem, ...) ist das HTML.
+
+Diese Migration sucht alle articles, deren content_original oder content_de
+HTML-Tags enthalten, und ersetzt den Inhalt durch die plain-Text-Variante
+(via html_to_text aus feeds/transcript_extractors/_common.py).
+
+Idempotent: wiederholter Lauf findet nichts mehr.
+"""
+import argparse
+import shutil
+import sqlite3
+import sys
+from datetime import datetime
+
+# html_to_text aus dem Monitor-Repo importieren
+sys.path.insert(0, "/home/claude-dev/AegisSight-Monitor/src")
+try:
+ from feeds.transcript_extractors._common import html_to_text
+except ImportError:
+ # Fallback: aus Staging-Repo
+ sys.path.insert(0, "/home/claude-dev/AegisSight-Monitor-staging/src")
+ from feeds.transcript_extractors._common import html_to_text
+
+
+def _clean(s):
+ """html_to_text + Cap auf 1000 Zeichen (passend zum Parser-Verhalten).
+
+ Liefert None wenn Eingabe None oder Ergebnis leer.
+ """
+ if not s:
+ return None
+ cleaned = html_to_text(s)
+ if not cleaned or not cleaned.strip():
+ return None
+ return cleaned[:1000]
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--db", required=True, help="Pfad zur SQLite-DB")
+ ap.add_argument("--no-backup", action="store_true", help="Backup ueberspringen")
+ ap.add_argument("--dry-run", action="store_true", help="Nur zeigen, nichts schreiben")
+ args = ap.parse_args()
+
+ if not args.no_backup and not args.dry_run:
+ ts = datetime.now().strftime("%Y-%m-%d-%H%M")
+ backup_path = f"{args.db}.html-strip-bak-{ts}"
+ shutil.copy2(args.db, backup_path)
+ print(f"Backup angelegt: {backup_path}")
+
+ db = sqlite3.connect(args.db)
+ db.row_factory = sqlite3.Row
+
+ # Selektieren: alles mit Tag-Pattern in content_original ODER content_de
+ cur = db.execute(
+ """SELECT id, content_original, content_de
+ FROM articles
+ WHERE content_original LIKE '%<%>%' OR content_de LIKE '%<%>%'"""
+ )
+ rows = cur.fetchall()
+ print(f"Gefundene Artikel mit HTML: {len(rows)}")
+
+ if not rows:
+ print("Nichts zu tun.")
+ db.close()
+ return 0
+
+ updated = 0
+ set_null_orig = 0
+ set_null_de = 0
+ sample_before = []
+ sample_after = []
+
+ for r in rows:
+ rid = r["id"]
+ old_orig = r["content_original"]
+ old_de = r["content_de"]
+ new_orig = _clean(old_orig)
+ new_de = _clean(old_de)
+
+ if new_orig is None and old_orig is not None:
+ set_null_orig += 1
+ if new_de is None and old_de is not None:
+ set_null_de += 1
+
+ # Sample fuer Verifikation (erste 3)
+ if len(sample_before) < 3 and old_orig and old_orig != new_orig:
+ sample_before.append((rid, old_orig[:120]))
+ sample_after.append((rid, (new_orig or "")[:120]))
+
+ if not args.dry_run:
+ db.execute(
+ "UPDATE articles SET content_original=?, content_de=? WHERE id=?",
+ (new_orig, new_de, rid),
+ )
+ updated += 1
+
+ if not args.dry_run:
+ db.commit()
+ print(f"Updates committed: {updated}")
+ else:
+ print(f"DRY-RUN: {updated} Updates wuerden ausgefuehrt")
+ print(f" davon content_original auf NULL: {set_null_orig}")
+ print(f" davon content_de auf NULL: {set_null_de}")
+
+ print()
+ print("=== Sample (vorher -> nachher) ===")
+ for (rid_b, before), (rid_a, after) in zip(sample_before, sample_after):
+ print(f" [{rid_b}] BEFORE: {before!r}")
+ print(f" [{rid_a}] AFTER: {after!r}")
+ print()
+
+ # Verifikation
+ if not args.dry_run:
+ remaining = db.execute(
+ "SELECT COUNT(*) FROM articles WHERE content_original LIKE '%<%>%' OR content_de LIKE '%<%>%'"
+ ).fetchone()[0]
+ print(f"Nach Migration verbleibend mit HTML: {remaining}")
+ if remaining != 0:
+ print("WARNUNG: Es sind noch HTML-Reste vorhanden!")
+ return 1
+
+ db.close()
+ print("Fertig.")
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())