From 72b306d90c083a1796d3472268ba63331c633159 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sat, 9 May 2026 04:43:01 +0000 Subject: [PATCH] fix(source_health): tenant-faehig + History (Phase 2 in den Monitor ziehen) Phase 2 hatte die Verbesserungen nur in der Verwaltung (src/shared/services/source_health.py). Der Daily-Health-Check laeuft aber im Monitor-Backend (Cron 04:00 UTC) und nutzte deshalb weiter den alten Code - Folge: - Tenant-Quellen wurden NIE gecheckt (0 Eintraege in source_health_checks fuer tenant_id IS NOT NULL). - source_health_history blieb leer. Diese Aenderung holt die Phase-2-Logik in den Monitor: - services/source_health.py: Verwaltung-Version 1:1 uebernommen (tenant_id-Filter weg + History-Save vor DELETE + UA/Timeout aus config). - config.py: HEALTH_CHECK_USER_AGENT + HEALTH_CHECK_TIMEOUT_S ergaenzt. Manueller Test auf Staging-Monitor: 283 Quellen geprueft, 253 Issues, 61 davon Tenant-Quellen. History 0 -> 458 Eintraege. Damit ist die shared/-LOCKED-FILES-Markierung in der Verwaltung obsolet - beide Repos haben jetzt den gleichen Code. --- src/config.py | 6 ++++++ src/services/source_health.py | 28 ++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/config.py b/src/config.py index 21a48f2..1b39ea5 100644 --- a/src/config.py +++ b/src/config.py @@ -95,3 +95,9 @@ TELEGRAM_API_ID = int(os.environ.get("TELEGRAM_API_ID", "0")) TELEGRAM_API_HASH = os.environ.get("TELEGRAM_API_HASH", "") TELEGRAM_SESSION_PATH = os.environ.get("TELEGRAM_SESSION_PATH", "/home/claude-dev/.telegram/telegram_session") +# Health-Check (genutzt von services/source_health.py) +HEALTH_CHECK_USER_AGENT = os.environ.get( + "HEALTH_CHECK_USER_AGENT", + "Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)", +) +HEALTH_CHECK_TIMEOUT_S = float(os.environ.get("HEALTH_CHECK_TIMEOUT_S", "15.0")) diff --git a/src/services/source_health.py b/src/services/source_health.py index 0f073c9..e6b1cdd 100644 --- a/src/services/source_health.py +++ b/src/services/source_health.py @@ -2,29 +2,45 @@ import asyncio import logging import json +import uuid from urllib.parse import urlparse import httpx import feedparser import aiosqlite +try: + from config import HEALTH_CHECK_USER_AGENT, HEALTH_CHECK_TIMEOUT_S +except ImportError: + HEALTH_CHECK_USER_AGENT = "Mozilla/5.0 (compatible; AegisSight-HealthCheck/1.0)" + HEALTH_CHECK_TIMEOUT_S = 15.0 + logger = logging.getLogger("osint.source_health") async def run_health_checks(db: aiosqlite.Connection) -> dict: - """Führt alle Health-Checks für aktive Grundquellen durch.""" + """Führt Health-Checks für alle aktiven Quellen durch (global + Tenant).""" logger.info("Starte Quellen-Health-Check...") - # Alle aktiven Grundquellen laden + # Alle aktiven Quellen laden (global UND Tenant-spezifisch) cursor = await db.execute( "SELECT id, name, url, domain, source_type, article_count, last_seen_at " - "FROM sources WHERE status = 'active' AND tenant_id IS NULL" + "FROM sources WHERE status = 'active' " ) sources = [dict(row) for row in await cursor.fetchall()] - # Aktuelle Health-Check-Ergebnisse löschen (werden neu geschrieben) + # Bisherigen Stand in History archivieren, dann frisch starten + run_id = uuid.uuid4().hex[:12] + await db.execute( + "INSERT INTO source_health_history " + "(run_id, source_id, check_type, status, message, details, checked_at) " + "SELECT ?, source_id, check_type, status, message, details, checked_at " + "FROM source_health_checks", + (run_id,), + ) await db.execute("DELETE FROM source_health_checks") await db.commit() + logger.info(f"Health-Check Run {run_id}: vorigen Stand archiviert") checks_done = 0 issues_found = 0 @@ -33,9 +49,9 @@ async def run_health_checks(db: aiosqlite.Connection) -> dict: sources_with_url = [s for s in sources if s["url"]] async with httpx.AsyncClient( - timeout=15.0, + timeout=HEALTH_CHECK_TIMEOUT_S, follow_redirects=True, - headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT-Monitor/1.0)"}, + headers={"User-Agent": HEALTH_CHECK_USER_AGENT}, ) as client: for i in range(0, len(sources_with_url), 5): batch = sources_with_url[i:i + 5]