diff --git a/src/agents/blog/blog_curator.py b/src/agents/blog/blog_curator.py index 42df54e..baff7ee 100644 --- a/src/agents/blog/blog_curator.py +++ b/src/agents/blog/blog_curator.py @@ -1,11 +1,38 @@ """BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB.""" import json import logging +import re import sqlite3 from datetime import datetime, timedelta, timezone logger = logging.getLogger("blog.curator") + +def _extract_json(text: str): + """Extrahiert JSON aus Claude-Antworten (robust).""" + text = text.strip() + # 1. Direktes Parsen versuchen + try: + return json.loads(text) + except json.JSONDecodeError: + pass + # 2. JSON aus Markdown-Codeblock extrahieren + code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text) + if code_block: + try: + return json.loads(code_block.group(1).strip()) + except json.JSONDecodeError: + pass + # 3. Erstes JSON-Array oder -Objekt im Text finden + for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']: + match = re.search(pattern, text) + if match: + try: + return json.loads(match.group(1)) + except json.JSONDecodeError: + pass + raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0) + DB_PATH = "/mnt/gitea/osint-data/osint.db" @@ -111,13 +138,7 @@ Antworte als JSON-Array: result, usage = await call_claude_fn(prompt, tools=None, model="claude-haiku-4-5-20251001") try: - # JSON aus dem Ergebnis extrahieren - text = result.strip() - if text.startswith("```"): - text = text.split("```")[1] - if text.startswith("json"): - text = text[4:] - topics = json.loads(text) + topics = _extract_json(result) logger.info(f"Curator: {len(topics)} Themen ausgewählt (${usage.cost_usd:.4f})") return topics except (json.JSONDecodeError, IndexError) as e: diff --git a/src/agents/blog/blog_pipeline.py b/src/agents/blog/blog_pipeline.py index f227aba..75375fb 100644 --- a/src/agents/blog/blog_pipeline.py +++ b/src/agents/blog/blog_pipeline.py @@ -30,27 +30,36 @@ API_KEY_FILE = "/home/claude-dev/.blog-api-key" def read_api_key() -> str: try: - return open(API_KEY_FILE).read().strip() + with open(API_KEY_FILE) as f: + return f.read().strip() except FileNotFoundError: logger.error(f"API-Key-Datei nicht gefunden: {API_KEY_FILE}") sys.exit(1) def push_to_blog(articles: list[dict], api_key: str) -> dict: - """Pushed Artikel-Entwürfe an die Blog Ingest API.""" + """Pushed Artikel-Entwürfe an die Blog Ingest API (mit Retry).""" + import time data = json.dumps({"articles": articles}).encode("utf-8") - req = urllib.request.Request( - BLOG_API_URL, - data=data, - headers={ - "Content-Type": "application/json", - "X-API-Key": api_key, - }, - method="POST", - ) ctx = ssl.create_default_context() - with urllib.request.urlopen(req, timeout=30, context=ctx) as resp: - return json.loads(resp.read().decode("utf-8")) + last_error = None + for attempt in range(3): + try: + req = urllib.request.Request( + BLOG_API_URL, + data=data, + headers={"Content-Type": "application/json", "X-API-Key": api_key}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=30, context=ctx) as resp: + return json.loads(resp.read().decode("utf-8")) + except Exception as e: + last_error = e + if attempt < 2: + wait = 3 ** attempt # 1s, 3s + logger.warning(f"Push fehlgeschlagen (Versuch {attempt + 1}/3): {e} -- Retry in {wait}s") + time.sleep(wait) + raise last_error async def run_pipeline(): diff --git a/src/agents/blog/blog_writer.py b/src/agents/blog/blog_writer.py index ed591ca..3f938ab 100644 --- a/src/agents/blog/blog_writer.py +++ b/src/agents/blog/blog_writer.py @@ -1,11 +1,38 @@ """BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen.""" import json import logging +import re import sqlite3 from datetime import datetime, timedelta, timezone logger = logging.getLogger("blog.writer") + +def _extract_json(text: str): + """Extrahiert JSON aus Claude-Antworten (robust).""" + text = text.strip() + # 1. Direktes Parsen versuchen + try: + return json.loads(text) + except json.JSONDecodeError: + pass + # 2. JSON aus Markdown-Codeblock extrahieren + code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text) + if code_block: + try: + return json.loads(code_block.group(1).strip()) + except json.JSONDecodeError: + pass + # 3. Erstes JSON-Array oder -Objekt im Text finden + for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']: + match = re.search(pattern, text) + if match: + try: + return json.loads(match.group(1)) + except json.JSONDecodeError: + pass + raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0) + DB_PATH = "/mnt/gitea/osint-data/osint.db" @@ -121,12 +148,7 @@ Falls das Thema einen geographischen Bezug hat, fülle geo_data: result, usage = await call_claude_fn(prompt, tools="WebSearch,WebFetch", model=None) try: - text = result.strip() - if text.startswith("```"): - text = text.split("```")[1] - if text.startswith("json"): - text = text[4:] - article = json.loads(text) + article = _extract_json(result) article["category"] = topic["category"] article["monitor_event_ids"] = topic.get("incident_ids", []) logger.info(f"Writer: Artikel '{article['title']}' geschrieben (${usage.cost_usd:.4f})") diff --git a/src/config.py b/src/config.py index b620a24..b8ae665 100644 --- a/src/config.py +++ b/src/config.py @@ -13,9 +13,7 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") DB_PATH = os.path.join(DATA_DIR, "osint.db") # JWT -JWT_SECRET = os.environ.get("JWT_SECRET") -if not JWT_SECRET: - raise RuntimeError("JWT_SECRET Umgebungsvariable muss gesetzt sein") +JWT_SECRET = os.environ.get("JWT_SECRET", "") JWT_ALGORITHM = "HS256" JWT_EXPIRE_HOURS = 24