Promote develop → main (2026-05-03 00:02 UTC) #14
@@ -6,6 +6,7 @@ import httpx
|
||||
from datetime import datetime, timezone
|
||||
from config import TIMEZONE, MAX_ARTICLES_PER_DOMAIN_RSS
|
||||
from source_rules import _extract_domain
|
||||
from feeds.transcript_extractors._common import html_to_text
|
||||
|
||||
logger = logging.getLogger("osint.rss")
|
||||
|
||||
@@ -152,7 +153,11 @@ class RSSParser:
|
||||
|
||||
for entry in feed.entries[:50]:
|
||||
title = entry.get("title", "")
|
||||
summary = entry.get("summary", "")
|
||||
# RSS-summary ist bei vielen Quellen HTML (Guardian, AP, SZ, ...).
|
||||
# Vor weiterer Verwendung strippen, sonst landet HTML in DB
|
||||
# und KI-Agenten und Sprach-Heuristik werden gestoert.
|
||||
summary_raw = entry.get("summary", "")
|
||||
summary = html_to_text(summary_raw) if summary_raw else ""
|
||||
text = f"{title} {summary}".lower()
|
||||
|
||||
# Adaptive Match-Schwelle:
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren