fix: echten Publisher aus Google-News source-Tag #34
@@ -176,6 +176,11 @@ class RSSParser:
|
|||||||
name = feed_config["name"]
|
name = feed_config["name"]
|
||||||
url = feed_config["url"]
|
url = feed_config["url"]
|
||||||
articles = []
|
articles = []
|
||||||
|
# Google-News-Feeds (Site-Search ODER Volltext-Suche) buendeln Artikel
|
||||||
|
# vieler echter Publisher. Pro Item steht der echte Publisher im
|
||||||
|
# <source>-Tag — den nutzen wir als source-Name, sonst zaehlt der
|
||||||
|
# Faktencheck 25 Artikel als "eine Quelle".
|
||||||
|
_is_google_news = "news.google.com" in (url or "")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||||
@@ -222,10 +227,26 @@ class RSSParser:
|
|||||||
# Relevanz-Score: Anteil der gematchten Suchworte (0.0-1.0)
|
# Relevanz-Score: Anteil der gematchten Suchworte (0.0-1.0)
|
||||||
relevance_score = match_count / len(search_words) if search_words else 0.0
|
relevance_score = match_count / len(search_words) if search_words else 0.0
|
||||||
|
|
||||||
|
# Bei Google-News-Feeds: echten Publisher aus <source>-Tag holen
|
||||||
|
article_source = name
|
||||||
|
if _is_google_news:
|
||||||
|
src_obj = entry.get("source")
|
||||||
|
src_title = ""
|
||||||
|
if isinstance(src_obj, dict):
|
||||||
|
src_title = (src_obj.get("title") or "").strip()
|
||||||
|
elif src_obj:
|
||||||
|
src_title = str(getattr(src_obj, "title", "") or "").strip()
|
||||||
|
if src_title:
|
||||||
|
article_source = src_title
|
||||||
|
else:
|
||||||
|
# Google-News-Titel enden oft mit " - Publishername"
|
||||||
|
if " - " in title:
|
||||||
|
article_source = title.rsplit(" - ", 1)[-1].strip() or name
|
||||||
|
|
||||||
articles.append({
|
articles.append({
|
||||||
"headline": title,
|
"headline": title,
|
||||||
"headline_de": title if self._is_german(title) else None,
|
"headline_de": title if self._is_german(title) else None,
|
||||||
"source": name,
|
"source": article_source,
|
||||||
"source_url": entry.get("link", ""),
|
"source_url": entry.get("link", ""),
|
||||||
# Die Quell-Domain aus der DB (z.B. "mod.go.jp"), nicht aus
|
# Die Quell-Domain aus der DB (z.B. "mod.go.jp"), nicht aus
|
||||||
# der URL — relevant für Google-News-RSS-Quellen, deren URLs
|
# der URL — relevant für Google-News-RSS-Quellen, deren URLs
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren