Commits vergleichen
2 Commits
74f50c3b6e
...
dd6a7d66a4
| Autor | SHA1 | Datum | |
|---|---|---|---|
| dd6a7d66a4 | |||
| 4b193d5784 |
@@ -222,6 +222,11 @@ class RSSParser:
|
|||||||
"headline_de": title if self._is_german(title) else None,
|
"headline_de": title if self._is_german(title) else None,
|
||||||
"source": name,
|
"source": name,
|
||||||
"source_url": entry.get("link", ""),
|
"source_url": entry.get("link", ""),
|
||||||
|
# Die Quell-Domain aus der DB (z.B. "mod.go.jp"), nicht aus
|
||||||
|
# der URL — relevant für Google-News-RSS-Quellen, deren URLs
|
||||||
|
# alle "news.google.com" sind, obwohl sie für 14 verschiedene
|
||||||
|
# Behörden/Zeitungen stehen. Wird vom Domain-Cap genutzt.
|
||||||
|
"source_domain": feed_config.get("domain") or "",
|
||||||
"content_original": summary[:1000] if summary else None,
|
"content_original": summary[:1000] if summary else None,
|
||||||
"content_de": summary[:1000] if summary and self._is_german(summary) else None,
|
"content_de": summary[:1000] if summary and self._is_german(summary) else None,
|
||||||
"language": "de" if self._is_german(title) else "en",
|
"language": "de" if self._is_german(title) else "en",
|
||||||
@@ -243,10 +248,16 @@ class RSSParser:
|
|||||||
if not articles:
|
if not articles:
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
# Nach Domain gruppieren
|
# Nach Domain gruppieren. Bevorzugt source_domain (aus dem Feed-Eintrag,
|
||||||
|
# z.B. "mod.go.jp" bei einer Google-News-Site-Search-RSS-Quelle), fällt
|
||||||
|
# erst dann auf die URL-Domain zurück. Sonst landen alle Google-News-
|
||||||
|
# Feeds (14 ja-Quellen) im selben "news.google.com"-Topf und werden
|
||||||
|
# vom Cap auf 10 begrenzt.
|
||||||
by_domain: dict[str, list[dict]] = {}
|
by_domain: dict[str, list[dict]] = {}
|
||||||
for article in articles:
|
for article in articles:
|
||||||
domain = _extract_domain(article.get("source_url", ""))
|
domain = (article.get("source_domain") or "").strip().lower()
|
||||||
|
if not domain:
|
||||||
|
domain = _extract_domain(article.get("source_url", ""))
|
||||||
if not domain:
|
if not domain:
|
||||||
domain = "__unknown__"
|
domain = "__unknown__"
|
||||||
by_domain.setdefault(domain, []).append(article)
|
by_domain.setdefault(domain, []).append(article)
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren