From 7672bbcad69e6ed2043574a9317105ddb9cde4c5 Mon Sep 17 00:00:00 2001 From: claude-dev Date: Wed, 4 Mar 2026 23:28:37 +0100 Subject: [PATCH] =?UTF-8?q?Domain-Balance=20vor=20Haiku=20entfernt=20?= =?UTF-8?q?=E2=80=94=20Feeds=20sollen=20alle=20verf=C3=BCgbar=20bleiben?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Die Feed-Vorfilterung (max 3 pro Domain) vor der Haiku-Selektion war falsch: Alle thematischen Feeds (z.B. Guardian World, Politics, Middle East) sollen Haiku zur Auswahl stehen. Die Quellenvielfalt wird stattdessen durch den Prompt (QUELLENVIELFALT-Regel) und den Artikel-Cap nach dem RSS-Fetch (max 10 Artikel/Domain) sichergestellt. Co-Authored-By: Claude Opus 4.6 --- src/agents/orchestrator.py | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/src/agents/orchestrator.py b/src/agents/orchestrator.py index 3cc96bf..501b1d5 100644 --- a/src/agents/orchestrator.py +++ b/src/agents/orchestrator.py @@ -4,9 +4,8 @@ import json import logging import re from datetime import datetime, timezone -from config import TIMEZONE, MAX_FEEDS_PER_DOMAIN +from config import TIMEZONE from typing import Optional -from collections import defaultdict from urllib.parse import urlparse, urlunparse from agents.claude_client import UsageAccumulator @@ -163,14 +162,6 @@ async def _background_discover_sources(articles: list[dict]): # 3. Gegen DB prüfen — welche Domains existieren schon? new_count = 0 for domain, url, category in domains_to_check: - cursor = await db.execute( - "SELECT id FROM sources WHERE LOWER(domain) = ? AND source_type = 'rss_feed' AND status = 'active'", - (domain.lower(),), - ) - existing_feeds = await cursor.fetchall() - if len(existing_feeds) >= MAX_FEEDS_PER_DOMAIN: - continue # Domain hat bereits genug aktive Feeds - cursor = await db.execute( "SELECT id FROM sources WHERE LOWER(domain) = ?", (domain.lower(),), @@ -587,28 +578,6 @@ class AgentOrchestrator: from source_rules import get_feeds_with_metadata all_feeds = await get_feeds_with_metadata(tenant_id=tenant_id) - # Domain-Balance: Max. MAX_FEEDS_PER_DOMAIN Feeds pro Domain - feeds_by_domain: dict[str, list[dict]] = defaultdict(list) - for feed in all_feeds: - feeds_by_domain[feed.get("domain", "")].append(feed) - - balanced_feeds = [] - for domain, domain_feeds in feeds_by_domain.items(): - if len(domain_feeds) > MAX_FEEDS_PER_DOMAIN: - # Nach article_count sortieren, meistgenutzte behalten - domain_feeds.sort(key=lambda f: f.get("article_count", 0), reverse=True) - kept = domain_feeds[:MAX_FEEDS_PER_DOMAIN] - logger.info( - f"Domain-Balance: {domain} von {len(domain_feeds)} auf {MAX_FEEDS_PER_DOMAIN} Feeds begrenzt" - ) - balanced_feeds.extend(kept) - else: - balanced_feeds.extend(domain_feeds) - - if len(balanced_feeds) < len(all_feeds): - logger.info(f"Domain-Balance gesamt: {len(all_feeds)} → {len(balanced_feeds)} Feeds") - all_feeds = balanced_feeds - feed_usage = None if len(all_feeds) > 20: selected_feeds, feed_usage = await rss_researcher.select_relevant_feeds(