Promote develop → main (2026-05-01 15:16 UTC)
This commit was merged in pull request #9.
Dieser Commit ist enthalten in:
@@ -844,7 +844,7 @@ class AgentOrchestrator:
|
|||||||
return articles, feed_usage
|
return articles, feed_usage
|
||||||
|
|
||||||
async def _web_search_pipeline():
|
async def _web_search_pipeline():
|
||||||
"""Claude WebSearch-Recherche."""
|
"""Claude WebSearch-Recherche mit Vorselektion eingetragener Web-Quellen."""
|
||||||
researcher = ResearcherAgent()
|
researcher = ResearcherAgent()
|
||||||
# Bestehende Artikel als Kontext mitgeben (Research + Adhoc)
|
# Bestehende Artikel als Kontext mitgeben (Research + Adhoc)
|
||||||
existing_for_context = None
|
existing_for_context = None
|
||||||
@@ -855,13 +855,31 @@ class AgentOrchestrator:
|
|||||||
"source_url": row["source_url"]}
|
"source_url": row["source_url"]}
|
||||||
for row in existing_db_articles_full
|
for row in existing_db_articles_full
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Web-Quellen vorselektieren (Haiku) — nur thematisch passende werden Claude im Prompt empfohlen
|
||||||
|
preferred_sources = []
|
||||||
|
try:
|
||||||
|
from source_rules import get_feeds_with_metadata
|
||||||
|
web_sources = await get_feeds_with_metadata(tenant_id=tenant_id, source_type="web_source")
|
||||||
|
if web_sources:
|
||||||
|
preferred_sources, web_sel_usage = await researcher.select_relevant_web_sources(
|
||||||
|
title, description, web_sources,
|
||||||
|
)
|
||||||
|
if web_sel_usage:
|
||||||
|
usage_acc.add(web_sel_usage)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Web-Source-Vorselektion fehlgeschlagen (Pipeline laeuft weiter): {e}")
|
||||||
|
preferred_sources = []
|
||||||
|
|
||||||
results, usage, parse_failed = await researcher.search(
|
results, usage, parse_failed = await researcher.search(
|
||||||
title, description, incident_type,
|
title, description, incident_type,
|
||||||
international=international, user_id=user_id,
|
international=international, user_id=user_id,
|
||||||
existing_articles=existing_for_context,
|
existing_articles=existing_for_context,
|
||||||
|
preferred_sources=preferred_sources,
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Claude-Recherche: {len(results)} Ergebnisse"
|
f"Claude-Recherche: {len(results)} Ergebnisse"
|
||||||
|
+ (f" (mit {len(preferred_sources)} Web-Quellen-Hinweis)" if preferred_sources else "")
|
||||||
+ (" (Parser fehlgeschlagen)" if parse_failed else "")
|
+ (" (Parser fehlgeschlagen)" if parse_failed else "")
|
||||||
)
|
)
|
||||||
return results, usage, parse_failed
|
return results, usage, parse_failed
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
AUFTRAG: Suche nach aktuellen Informationen zu folgendem Vorfall:
|
AUFTRAG: Suche nach aktuellen Informationen zu folgendem Vorfall:
|
||||||
Titel: {title}
|
Titel: {title}
|
||||||
Kontext: {description}
|
Kontext: {description}
|
||||||
{existing_context}
|
{existing_context}{preferred_sources_block}
|
||||||
REGELN:
|
REGELN:
|
||||||
- Suche nur bei seriösen Nachrichtenquellen (Nachrichtenagenturen, Qualitätszeitungen, öffentlich-rechtliche Medien, Behörden)
|
- Suche nur bei seriösen Nachrichtenquellen (Nachrichtenagenturen, Qualitätszeitungen, öffentlich-rechtliche Medien, Behörden)
|
||||||
- KEIN Social Media (Twitter/X, Facebook, Instagram, TikTok, Reddit)
|
- KEIN Social Media (Twitter/X, Facebook, Instagram, TikTok, Reddit)
|
||||||
@@ -100,7 +100,7 @@ WICHTIG: Verwende IMMER echte UTF-8-Umlaute (ä, ö, ü, ß) — NIEMALS Umschre
|
|||||||
AUFTRAG: Führe eine umfassende, mehrstufige Hintergrundrecherche durch zu:
|
AUFTRAG: Führe eine umfassende, mehrstufige Hintergrundrecherche durch zu:
|
||||||
Titel: {title}
|
Titel: {title}
|
||||||
Kontext: {description}
|
Kontext: {description}
|
||||||
{existing_context}
|
{existing_context}{preferred_sources_block}
|
||||||
RECHERCHE IN 4 PHASEN — Führe ALLE Phasen nacheinander durch:
|
RECHERCHE IN 4 PHASEN — Führe ALLE Phasen nacheinander durch:
|
||||||
|
|
||||||
PHASE 1 — BREITE ERFASSUNG:
|
PHASE 1 — BREITE ERFASSUNG:
|
||||||
@@ -212,6 +212,24 @@ Antwort NUR als JSON-Array:
|
|||||||
[{{"de": "iran", "en": "iran"}}, {{"de": "israel", "en": "israel"}}, {{"de": "teheran", "en": "tehran"}}, {{"de": "luftangriff", "en": "airstrike"}}, {{"de": "trump", "en": "trump"}}]"""
|
[{{"de": "iran", "en": "iran"}}, {{"de": "israel", "en": "israel"}}, {{"de": "teheran", "en": "tehran"}}, {{"de": "luftangriff", "en": "airstrike"}}, {{"de": "trump", "en": "trump"}}]"""
|
||||||
|
|
||||||
|
|
||||||
|
WEB_SOURCE_SELECTION_PROMPT = """Du bist ein OSINT-Analyst. Pruefe diese eingetragenen Web-Quellen und waehle nur die thematisch passenden aus.
|
||||||
|
|
||||||
|
LAGE: {title}
|
||||||
|
KONTEXT: {description}
|
||||||
|
|
||||||
|
WEB-QUELLEN:
|
||||||
|
{source_list}
|
||||||
|
|
||||||
|
REGELN:
|
||||||
|
- Waehle nur Quellen, die thematisch tatsaechlich zur Lage passen
|
||||||
|
- Lieber leere Liste zurueckgeben als pauschal alle aufnehmen
|
||||||
|
- Behoerden- und institutionelle Quellen sind oft hochwertig, aber nur wenn das Thema passt
|
||||||
|
- Petitions-Plattformen z.B. nur bei Lagen zu Buergerinitiativen, Gesetzen, oeffentlichem Druck
|
||||||
|
- Bei reinen Kriegs-/Konflikt-/Tagesnachrichten meistens leere Liste
|
||||||
|
|
||||||
|
Antworte NUR mit einem JSON-Array der Quellen-Nummern, z.B. [1, 3] oder []."""
|
||||||
|
|
||||||
|
|
||||||
TELEGRAM_CHANNEL_SELECTION_PROMPT = """Du bist ein OSINT-Analyst. Waehle aus dieser Liste von Telegram-Kanaelen diejenigen aus, die fuer die Lage relevant sein koennten.
|
TELEGRAM_CHANNEL_SELECTION_PROMPT = """Du bist ein OSINT-Analyst. Waehle aus dieser Liste von Telegram-Kanaelen diejenigen aus, die fuer die Lage relevant sein koennten.
|
||||||
|
|
||||||
LAGE: {title}
|
LAGE: {title}
|
||||||
@@ -355,7 +373,7 @@ class ResearcherAgent:
|
|||||||
logger.warning(f"Keyword-Extraktion fehlgeschlagen: {e}")
|
logger.warning(f"Keyword-Extraktion fehlgeschlagen: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
async def search(self, title: str, description: str = "", incident_type: str = "adhoc", international: bool = True, user_id: int = None, existing_articles: list[dict] = None) -> tuple[list[dict], ClaudeUsage | None, bool]:
|
async def search(self, title: str, description: str = "", incident_type: str = "adhoc", international: bool = True, user_id: int = None, existing_articles: list[dict] = None, preferred_sources: list[dict] = None) -> tuple[list[dict], ClaudeUsage | None, bool]:
|
||||||
"""Sucht nach Informationen zu einem Vorfall.
|
"""Sucht nach Informationen zu einem Vorfall.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@@ -364,6 +382,26 @@ class ResearcherAgent:
|
|||||||
"echt keine Treffer" und "kaputte Antwort" unterscheiden.
|
"echt keine Treffer" und "kaputte Antwort" unterscheiden.
|
||||||
"""
|
"""
|
||||||
from config import OUTPUT_LANGUAGE
|
from config import OUTPUT_LANGUAGE
|
||||||
|
|
||||||
|
# Bevorzugte Web-Quellen als Prompt-Block (optional)
|
||||||
|
preferred_sources_block = ""
|
||||||
|
if preferred_sources:
|
||||||
|
ps_lines = []
|
||||||
|
for s in preferred_sources:
|
||||||
|
domain = s.get("domain", "")
|
||||||
|
name = s.get("name", domain) or domain
|
||||||
|
if not domain:
|
||||||
|
continue
|
||||||
|
ps_lines.append(f"- {domain} ({name})")
|
||||||
|
if ps_lines:
|
||||||
|
preferred_sources_block = (
|
||||||
|
"\nEINGETRAGENE WEB-QUELLEN (vom Betreiber als seriös markiert):\n"
|
||||||
|
+ "\n".join(ps_lines) + "\n"
|
||||||
|
"EMPFEHLUNG: Wenn diese Domains thematisch zur Lage passen, suche dort gezielt "
|
||||||
|
"mit \"site:domain [Suchbegriff]\". Sie sind vertrauenswuerdig eingetragen, ersetzen "
|
||||||
|
"aber nicht deine sonstige Recherche.\n"
|
||||||
|
)
|
||||||
|
|
||||||
if incident_type == "research":
|
if incident_type == "research":
|
||||||
lang_instruction = LANG_DEEP_INTERNATIONAL if international else LANG_DEEP_GERMAN_ONLY
|
lang_instruction = LANG_DEEP_INTERNATIONAL if international else LANG_DEEP_GERMAN_ONLY
|
||||||
# Bestehende Artikel als Kontext für den Prompt aufbereiten
|
# Bestehende Artikel als Kontext für den Prompt aufbereiten
|
||||||
@@ -383,6 +421,7 @@ class ResearcherAgent:
|
|||||||
prompt = DEEP_RESEARCH_PROMPT_TEMPLATE.format(
|
prompt = DEEP_RESEARCH_PROMPT_TEMPLATE.format(
|
||||||
title=title, description=description, language_instruction=lang_instruction,
|
title=title, description=description, language_instruction=lang_instruction,
|
||||||
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
||||||
|
preferred_sources_block=preferred_sources_block,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
lang_instruction = LANG_INTERNATIONAL if international else LANG_GERMAN_ONLY
|
lang_instruction = LANG_INTERNATIONAL if international else LANG_GERMAN_ONLY
|
||||||
@@ -401,6 +440,7 @@ class ResearcherAgent:
|
|||||||
prompt = RESEARCH_PROMPT_TEMPLATE.format(
|
prompt = RESEARCH_PROMPT_TEMPLATE.format(
|
||||||
title=title, description=description, language_instruction=lang_instruction,
|
title=title, description=description, language_instruction=lang_instruction,
|
||||||
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
output_language=OUTPUT_LANGUAGE, existing_context=existing_context,
|
||||||
|
preferred_sources_block=preferred_sources_block,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -514,6 +554,67 @@ class ResearcherAgent:
|
|||||||
)
|
)
|
||||||
raise ResearcherParseError(f"Claude-Antwort enthielt kein verwertbares JSON (Laenge: {len(text)})")
|
raise ResearcherParseError(f"Claude-Antwort enthielt kein verwertbares JSON (Laenge: {len(text)})")
|
||||||
|
|
||||||
|
async def select_relevant_web_sources(
|
||||||
|
self,
|
||||||
|
title: str,
|
||||||
|
description: str,
|
||||||
|
web_sources: list[dict],
|
||||||
|
) -> tuple[list[dict], ClaudeUsage | None]:
|
||||||
|
"""Laesst Claude die thematisch passenden Web-Quellen auswaehlen (Haiku).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(ausgewaehlte Quellen, usage). Bei Fehler: ([], None).
|
||||||
|
Leere Auswahl ist explizit erlaubt — keine Quelle wird zwangsweise aufgenommen.
|
||||||
|
"""
|
||||||
|
if not web_sources:
|
||||||
|
return [], None
|
||||||
|
|
||||||
|
# Bei sehr wenigen Quellen lohnt der Selektions-Call kaum — alle weiterreichen.
|
||||||
|
if len(web_sources) <= 3:
|
||||||
|
logger.info("Web-Source-Selektion: Nur %d Quellen, alle uebernehmen", len(web_sources))
|
||||||
|
return list(web_sources), None
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for i, src in enumerate(web_sources, 1):
|
||||||
|
cat = src.get("category", "sonstige")
|
||||||
|
notes = (src.get("notes") or "")[:80]
|
||||||
|
domain = src.get("domain", "")
|
||||||
|
line = f"{i}. {src.get('name', domain)} ({domain}) [{cat}]"
|
||||||
|
if notes:
|
||||||
|
line += f" - {notes}"
|
||||||
|
lines.append(line)
|
||||||
|
|
||||||
|
prompt = WEB_SOURCE_SELECTION_PROMPT.format(
|
||||||
|
title=title,
|
||||||
|
description=description or "Keine weitere Beschreibung",
|
||||||
|
source_list="\n".join(lines),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
|
||||||
|
indices = _extract_json_array(result)
|
||||||
|
if not isinstance(indices, list):
|
||||||
|
logger.warning(
|
||||||
|
"Web-Source-Selektion: Kein JSON in Antwort, ignoriere Quellen. Sample: %s",
|
||||||
|
_truncate_for_log(result),
|
||||||
|
)
|
||||||
|
return [], usage
|
||||||
|
|
||||||
|
selected = []
|
||||||
|
for idx in indices:
|
||||||
|
if isinstance(idx, int) and 1 <= idx <= len(web_sources):
|
||||||
|
selected.append(web_sources[idx - 1])
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Web-Source-Selektion: %d von %d ausgewaehlt%s",
|
||||||
|
len(selected), len(web_sources),
|
||||||
|
f" ({', '.join(s.get('domain', '') for s in selected)})" if selected else "",
|
||||||
|
)
|
||||||
|
return selected, usage
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Web-Source-Selektion fehlgeschlagen (%s)", e)
|
||||||
|
return [], None
|
||||||
|
|
||||||
async def select_relevant_telegram_channels(
|
async def select_relevant_telegram_channels(
|
||||||
self,
|
self,
|
||||||
title: str,
|
title: str,
|
||||||
|
|||||||
@@ -155,8 +155,16 @@ class RSSParser:
|
|||||||
summary = entry.get("summary", "")
|
summary = entry.get("summary", "")
|
||||||
text = f"{title} {summary}".lower()
|
text = f"{title} {summary}".lower()
|
||||||
|
|
||||||
# Flexibles Keyword-Matching: mindestens die Hälfte der Suchworte muss vorkommen (aufgerundet)
|
# Adaptive Match-Schwelle:
|
||||||
min_matches = min(2, max(1, (len(search_words) + 1) // 2))
|
# - Bei mindestens einem spezifischen Keyword (>=7 Zeichen) im Text reicht 1 Treffer.
|
||||||
|
# Verhindert, dass Headlines mit nur einem starken Keyword wie "buckelwal"
|
||||||
|
# rausfallen, wenn die Lage thematisch eng ist (Bug 1, vom User dokumentiert).
|
||||||
|
# - Sonst: alte Heuristik (mindestens halb der Wörter, max. 2).
|
||||||
|
specific_in_text = any(w in text for w in search_words if len(w) >= 7)
|
||||||
|
if specific_in_text:
|
||||||
|
min_matches = 1
|
||||||
|
else:
|
||||||
|
min_matches = min(2, max(1, (len(search_words) + 1) // 2))
|
||||||
match_count = sum(1 for word in search_words if word in text)
|
match_count = sum(1 for word in search_words if word in text)
|
||||||
|
|
||||||
if match_count >= min_matches:
|
if match_count >= min_matches:
|
||||||
|
|||||||
@@ -649,14 +649,14 @@ async def get_feeds_with_metadata(tenant_id: int = None, source_type: str = "rss
|
|||||||
try:
|
try:
|
||||||
if tenant_id:
|
if tenant_id:
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT name, url, domain, category, COALESCE(article_count, 0) AS article_count FROM sources "
|
"SELECT name, url, domain, category, notes, COALESCE(article_count, 0) AS article_count FROM sources "
|
||||||
"WHERE source_type = ? AND status = 'active' "
|
"WHERE source_type = ? AND status = 'active' "
|
||||||
"AND (tenant_id IS NULL OR tenant_id = ?)",
|
"AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||||
(source_type, tenant_id),
|
(source_type, tenant_id),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"SELECT name, url, domain, category, COALESCE(article_count, 0) AS article_count FROM sources "
|
"SELECT name, url, domain, category, notes, COALESCE(article_count, 0) AS article_count FROM sources "
|
||||||
"WHERE source_type = ? AND status = 'active'",
|
"WHERE source_type = ? AND status = 'active'",
|
||||||
(source_type,),
|
(source_type,),
|
||||||
)
|
)
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren