feat: Intelligente Telegram-Kanal-Selektion und verbesserte Quellenzuordnung

- Researcher: Claude-basierte Vorauswahl relevanter Telegram-Kanäle per Haiku - FactChecker: Verbesserte Quellen-Zuordnung mit Relevanz-Scoring (Top 5) - FactChecker: URLs werden nicht mehr doppelt zugeordnet, sources_count wird aktualisiert - TelegramParser: Kanal-Filterung per channel_ids statt categories - TelegramParser: Lockereres Keyword-Matching (1 Match reicht, da vorselektiert) - Models: telegram_categories Feld entfernt (durch KI-Selektion ersetzt) - Main: Chat-Router eingebunden unter /api/chat Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 18:01:32 +01:00
Commit 0019d74aea
--- a/src/feeds/telegram_parser.py
+++ b/src/feeds/telegram_parser.py
@@ -61,7 +61,7 @@ class TelegramParser:
                return None

    async def search_channels(self, search_term: str, tenant_id: int = None,
-                              keywords: list[str] = None, categories: list[str] = None) -> list[dict]:
+                              keywords: list[str] = None, channel_ids: list[int] = None) -> list[dict]:
        """Liest Nachrichten aus konfigurierten Telegram-Kanaelen.

        Gibt Artikel-Dicts zurueck (kompatibel mit RSS-Parser-Format).
@@ -72,7 +72,7 @@ class TelegramParser:
            return []

        # Telegram-Kanaele aus DB laden
-        channels = await self._get_telegram_channels(tenant_id, categories=categories)
+        channels = await self._get_telegram_channels(tenant_id, channel_ids=channel_ids)
        if not channels:
            logger.info("Keine Telegram-Kanaele konfiguriert")
            return []
@@ -106,25 +106,24 @@ class TelegramParser:
        logger.info("Telegram: %d relevante Nachrichten aus %d Kanaelen", len(all_articles), len(channels))
        return all_articles

-    async def _get_telegram_channels(self, tenant_id: int = None, categories: list[str] = None) -> list[dict]:
+    async def _get_telegram_channels(self, tenant_id: int = None, channel_ids: list[int] = None) -> list[dict]:
        """Laedt Telegram-Kanaele aus der sources-Tabelle."""
        try:
            from database import get_db
            db = await get_db()
            try:
-                if categories and len(categories) > 0:
-                    placeholders = ",".join("?" for _ in categories)
+                if channel_ids and len(channel_ids) > 0:
+                    placeholders = ",".join("?" for _ in channel_ids)
                    cursor = await db.execute(
-                        f"""SELECT id, name, url FROM sources
+                        f"""SELECT id, name, url, category, notes FROM sources
                           WHERE source_type = 'telegram_channel'
                           AND status = 'active'
-                           AND (tenant_id IS NULL OR tenant_id = ?)
-                           AND category IN ({placeholders})""",
-                        (tenant_id, *categories),
+                           AND id IN ({placeholders})""",
+                        tuple(channel_ids),
                    )
                else:
                    cursor = await db.execute(
-                        """SELECT id, name, url FROM sources
+                        """SELECT id, name, url, category, notes FROM sources
                           WHERE source_type = 'telegram_channel'
                           AND status = 'active'
                           AND (tenant_id IS NULL OR tenant_id = ?)""",
@@ -171,11 +170,11 @@ class TelegramParser:
                text = msg.text
                text_lower = text.lower()

-                # Keyword-Matching (gleiche Logik wie RSS-Parser)
-                min_matches = min(2, max(1, (len(search_words) + 1) // 2))
+                # Keyword-Matching (lockerer als RSS: 1 Match reicht,
+                # da Kanaele bereits thematisch vorselektiert sind)
                match_count = sum(1 for word in search_words if word in text_lower)

-                if match_count < min_matches:
+                if match_count < 1:
                    continue

                # Erste Zeile als Headline, Rest als Content