Domain-Balance vor Haiku entfernt — Feeds sollen alle verfügbar bleiben
Die Feed-Vorfilterung (max 3 pro Domain) vor der Haiku-Selektion war falsch: Alle thematischen Feeds (z.B. Guardian World, Politics, Middle East) sollen Haiku zur Auswahl stehen. Die Quellenvielfalt wird stattdessen durch den Prompt (QUELLENVIELFALT-Regel) und den Artikel-Cap nach dem RSS-Fetch (max 10 Artikel/Domain) sichergestellt. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -4,9 +4,8 @@ import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from config import TIMEZONE, MAX_FEEDS_PER_DOMAIN
|
||||
from config import TIMEZONE
|
||||
from typing import Optional
|
||||
from collections import defaultdict
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from agents.claude_client import UsageAccumulator
|
||||
@@ -163,14 +162,6 @@ async def _background_discover_sources(articles: list[dict]):
|
||||
# 3. Gegen DB prüfen — welche Domains existieren schon?
|
||||
new_count = 0
|
||||
for domain, url, category in domains_to_check:
|
||||
cursor = await db.execute(
|
||||
"SELECT id FROM sources WHERE LOWER(domain) = ? AND source_type = 'rss_feed' AND status = 'active'",
|
||||
(domain.lower(),),
|
||||
)
|
||||
existing_feeds = await cursor.fetchall()
|
||||
if len(existing_feeds) >= MAX_FEEDS_PER_DOMAIN:
|
||||
continue # Domain hat bereits genug aktive Feeds
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT id FROM sources WHERE LOWER(domain) = ?",
|
||||
(domain.lower(),),
|
||||
@@ -587,28 +578,6 @@ class AgentOrchestrator:
|
||||
from source_rules import get_feeds_with_metadata
|
||||
all_feeds = await get_feeds_with_metadata(tenant_id=tenant_id)
|
||||
|
||||
# Domain-Balance: Max. MAX_FEEDS_PER_DOMAIN Feeds pro Domain
|
||||
feeds_by_domain: dict[str, list[dict]] = defaultdict(list)
|
||||
for feed in all_feeds:
|
||||
feeds_by_domain[feed.get("domain", "")].append(feed)
|
||||
|
||||
balanced_feeds = []
|
||||
for domain, domain_feeds in feeds_by_domain.items():
|
||||
if len(domain_feeds) > MAX_FEEDS_PER_DOMAIN:
|
||||
# Nach article_count sortieren, meistgenutzte behalten
|
||||
domain_feeds.sort(key=lambda f: f.get("article_count", 0), reverse=True)
|
||||
kept = domain_feeds[:MAX_FEEDS_PER_DOMAIN]
|
||||
logger.info(
|
||||
f"Domain-Balance: {domain} von {len(domain_feeds)} auf {MAX_FEEDS_PER_DOMAIN} Feeds begrenzt"
|
||||
)
|
||||
balanced_feeds.extend(kept)
|
||||
else:
|
||||
balanced_feeds.extend(domain_feeds)
|
||||
|
||||
if len(balanced_feeds) < len(all_feeds):
|
||||
logger.info(f"Domain-Balance gesamt: {len(all_feeds)} → {len(balanced_feeds)} Feeds")
|
||||
all_feeds = balanced_feeds
|
||||
|
||||
feed_usage = None
|
||||
if len(all_feeds) > 20:
|
||||
selected_feeds, feed_usage = await rss_researcher.select_relevant_feeds(
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren