Dynamische Keyword-Extraktion fuer RSS-Filterung + min_matches-Fix
- researcher.py: Neuer dedizierter Haiku-Call extract_dynamic_keywords() analysiert die letzten 30 Headlines und generiert 5 DE+EN Begriffspaare - orchestrator.py: Dynamische Keywords vor Feed-Selektion aus DB-Headlines - rss_parser.py: min_matches auf max 2 gedeckelt (vorher n/2, bei 10 Keywords = 5) - analyzer.py: Fettdruck-Anweisungen entfernt Vorher: 0 RSS-Treffer (min_matches=5 unerreichbar) Nachher: 22 RSS-Treffer (Tagesschau 11, Al Jazeera 5, BBC 4, NYT 2) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -568,7 +568,7 @@ class AgentOrchestrator:
|
||||
|
||||
# Schritt 1+2: RSS-Feeds und Claude-Recherche parallel ausführen
|
||||
async def _rss_pipeline():
|
||||
"""RSS-Feed-Suche (Feed-Selektion + Parsing)."""
|
||||
"""RSS-Feed-Suche (Feed-Selektion + dynamische Keywords + Parsing)."""
|
||||
if incident_type != "adhoc":
|
||||
logger.info("Recherche-Modus: RSS-Feeds übersprungen")
|
||||
return [], None
|
||||
@@ -579,13 +579,29 @@ class AgentOrchestrator:
|
||||
from source_rules import get_feeds_with_metadata
|
||||
all_feeds = await get_feeds_with_metadata(tenant_id=tenant_id)
|
||||
|
||||
# Dynamische Keywords aus den letzten Headlines extrahieren
|
||||
cursor_hl = await db.execute(
|
||||
"""SELECT COALESCE(headline_de, headline) as hl
|
||||
FROM articles WHERE incident_id = ?
|
||||
AND COALESCE(headline_de, headline) IS NOT NULL
|
||||
ORDER BY collected_at DESC LIMIT 30""",
|
||||
(incident_id,),
|
||||
)
|
||||
recent_headlines = [row["hl"] for row in await cursor_hl.fetchall() if row["hl"]]
|
||||
dynamic_keywords, kw_usage = await rss_researcher.extract_dynamic_keywords(title, recent_headlines)
|
||||
if kw_usage:
|
||||
usage_acc.add(kw_usage)
|
||||
|
||||
feed_usage = None
|
||||
keywords = None
|
||||
keywords = dynamic_keywords # Dynamische Keywords bevorzugen
|
||||
if len(all_feeds) > 20:
|
||||
selected_feeds, keywords, feed_usage = await rss_researcher.select_relevant_feeds(
|
||||
selected_feeds, feed_sel_keywords, feed_usage = await rss_researcher.select_relevant_feeds(
|
||||
title, description, international, all_feeds
|
||||
)
|
||||
logger.info(f"Feed-Selektion: {len(selected_feeds)} von {len(all_feeds)} Feeds ausgewählt")
|
||||
# Feed-Selektion-Keywords nur als Fallback wenn dynamische fehlen
|
||||
if not keywords:
|
||||
keywords = feed_sel_keywords
|
||||
articles = await rss_parser.search_feeds_selective(title, selected_feeds, keywords=keywords)
|
||||
else:
|
||||
articles = await rss_parser.search_feeds(title, international=international, tenant_id=tenant_id, keywords=keywords)
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren