Promote develop → main (2026-05-03 00:02 UTC) #14
@@ -6,6 +6,7 @@ import httpx
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from config import TIMEZONE, MAX_ARTICLES_PER_DOMAIN_RSS
|
from config import TIMEZONE, MAX_ARTICLES_PER_DOMAIN_RSS
|
||||||
from source_rules import _extract_domain
|
from source_rules import _extract_domain
|
||||||
|
from feeds.transcript_extractors._common import html_to_text
|
||||||
|
|
||||||
logger = logging.getLogger("osint.rss")
|
logger = logging.getLogger("osint.rss")
|
||||||
|
|
||||||
@@ -152,7 +153,11 @@ class RSSParser:
|
|||||||
|
|
||||||
for entry in feed.entries[:50]:
|
for entry in feed.entries[:50]:
|
||||||
title = entry.get("title", "")
|
title = entry.get("title", "")
|
||||||
summary = entry.get("summary", "")
|
# RSS-summary ist bei vielen Quellen HTML (Guardian, AP, SZ, ...).
|
||||||
|
# Vor weiterer Verwendung strippen, sonst landet HTML in DB
|
||||||
|
# und KI-Agenten und Sprach-Heuristik werden gestoert.
|
||||||
|
summary_raw = entry.get("summary", "")
|
||||||
|
summary = html_to_text(summary_raw) if summary_raw else ""
|
||||||
text = f"{title} {summary}".lower()
|
text = f"{title} {summary}".lower()
|
||||||
|
|
||||||
# Adaptive Match-Schwelle:
|
# Adaptive Match-Schwelle:
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren