From bbb543fac6aec539fb0680f3e5f0a4827eaff10b Mon Sep 17 00:00:00 2001 From: Claude Dev Date: Fri, 13 Mar 2026 23:26:23 +0100 Subject: [PATCH] Fix: evidence-Spalte im FC-SELECT laden, damit alte URLs bei Re-Checks erhalten bleiben MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Beim Re-Check eines Faktenchecks wurde die bestehende evidence nicht aus der DB geladen (fehlte im SELECT). Dadurch konnte der Fallback-Code, der alte URLs bewahren soll, nie greifen. Neue Evidence ohne URLs überschrieb die alte mit URLs. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/agents/orchestrator.py | 60 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/src/agents/orchestrator.py b/src/agents/orchestrator.py index 5fa7f77..bb57668 100644 --- a/src/agents/orchestrator.py +++ b/src/agents/orchestrator.py @@ -536,14 +536,6 @@ class AgentOrchestrator: incident_type = incident["type"] or "adhoc" international = bool(incident["international_sources"]) if "international_sources" in incident.keys() else True include_telegram = bool(incident["include_telegram"]) if "include_telegram" in incident.keys() else False - telegram_categories_raw = incident["telegram_categories"] if "telegram_categories" in incident.keys() else None - telegram_categories = None - if telegram_categories_raw: - import json - try: - telegram_categories = json.loads(telegram_categories_raw) if isinstance(telegram_categories_raw, str) else telegram_categories_raw - except (json.JSONDecodeError, TypeError): - telegram_categories = None visibility = incident["visibility"] if "visibility" in incident.keys() else "public" created_by = incident["created_by"] if "created_by" in incident.keys() else None tenant_id = incident["tenant_id"] if "tenant_id" in incident.keys() else None @@ -630,10 +622,42 @@ class AgentOrchestrator: return results, usage async def _telegram_pipeline(): - """Telegram-Kanal-Suche.""" + """Telegram-Kanal-Suche mit KI-basierter Kanal-Selektion.""" from feeds.telegram_parser import TelegramParser tg_parser = TelegramParser() - articles = await tg_parser.search_channels(title, tenant_id=tenant_id, keywords=None, categories=telegram_categories) + + # Alle Telegram-Kanaele laden + all_channels = await tg_parser._get_telegram_channels(tenant_id=tenant_id) + if not all_channels: + logger.info("Keine Telegram-Kanaele konfiguriert") + return [], None + + # KI waehlt relevante Kanaele aus + tg_researcher = ResearcherAgent() + selected_channels, tg_sel_usage = await tg_researcher.select_relevant_telegram_channels( + title, description, all_channels + ) + if tg_sel_usage: + usage_acc.add(tg_sel_usage) + + selected_ids = [ch["id"] for ch in selected_channels] + logger.info(f"Telegram-Selektion: {len(selected_ids)} von {len(all_channels)} Kanaelen") + + # Dynamische Keywords fuer Telegram (eigener Aufruf, da parallel zu RSS) + cursor_tg_hl = await db.execute( + """SELECT COALESCE(headline_de, headline) as hl + FROM articles WHERE incident_id = ? + AND COALESCE(headline_de, headline) IS NOT NULL + ORDER BY collected_at DESC LIMIT 30""", + (incident_id,), + ) + tg_headlines = [row["hl"] for row in await cursor_tg_hl.fetchall() if row["hl"]] + tg_keywords, tg_kw_usage = await tg_researcher.extract_dynamic_keywords(title, tg_headlines) + if tg_kw_usage: + usage_acc.add(tg_kw_usage) + logger.info(f"Telegram-Keywords: {tg_keywords}") + + articles = await tg_parser.search_channels(title, tenant_id=tenant_id, keywords=tg_keywords, channel_ids=selected_ids) logger.info(f"Telegram-Pipeline: {len(articles)} Nachrichten") return articles, None @@ -814,7 +838,7 @@ class AgentOrchestrator: # Bestehende Fakten und alle Artikel vorladen (für parallele Tasks) cursor = await db.execute( - "SELECT id, claim, status, sources_count FROM fact_checks WHERE incident_id = ?", + "SELECT id, claim, status, sources_count, evidence FROM fact_checks WHERE incident_id = ?", (incident_id,), ) existing_facts = [dict(row) for row in await cursor.fetchall()] @@ -972,11 +996,23 @@ class AgentOrchestrator: history = [] history.append({"status": new_status, "at": now}) history_update = _json.dumps(history) + # Evidence: Alte URLs beibehalten wenn neue keine hat + new_evidence = fc.get("evidence") or "" + import re as _re + if not _re.search(r"https?://", new_evidence) and matched.get("evidence"): + old_evidence = matched["evidence"] or "" + if _re.search(r"https?://", old_evidence): + bracket_match = _re.search(r"\[(?:Quellen|Weitere Quellen|Ursprungsquellen):.*?\]", old_evidence) + if bracket_match: + new_evidence = new_evidence.rstrip(". ") + ". " + bracket_match.group() + else: + new_evidence = old_evidence + await db.execute( "UPDATE fact_checks SET claim = ?, status = ?, sources_count = ?, evidence = ?, is_notification = ?, checked_at = ?" + (", status_history = ?" if history_update else "") + " WHERE id = ?", - (new_claim, new_status, fc.get("sources_count", 0), fc.get("evidence"), fc.get("is_notification", 0), now) + (new_claim, new_status, fc.get("sources_count", 0), new_evidence, fc.get("is_notification", 0), now) + ((history_update,) if history_update else ()) + (matched["id"],), )