fix: Blog-Pipeline lauffähig + robust
- Shell-Script: source .env statt dotenv (K1+K2) - config.py: JWT_SECRET Default statt Crash beim Import (M17) - JSON-Parsing: Robuste Extraktion aus Claude-Antworten (M16) - Push-Retry mit exponentiellem Backoff (N8) - open() mit with-Statement (N9) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -1,11 +1,38 @@
|
||||
"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB."""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
logger = logging.getLogger("blog.curator")
|
||||
|
||||
|
||||
def _extract_json(text: str):
|
||||
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||
text = text.strip()
|
||||
# 1. Direktes Parsen versuchen
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 2. JSON aus Markdown-Codeblock extrahieren
|
||||
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
||||
if code_block:
|
||||
try:
|
||||
return json.loads(code_block.group(1).strip())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
||||
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||
|
||||
DB_PATH = "/mnt/gitea/osint-data/osint.db"
|
||||
|
||||
|
||||
@@ -111,13 +138,7 @@ Antworte als JSON-Array:
|
||||
result, usage = await call_claude_fn(prompt, tools=None, model="claude-haiku-4-5-20251001")
|
||||
|
||||
try:
|
||||
# JSON aus dem Ergebnis extrahieren
|
||||
text = result.strip()
|
||||
if text.startswith("```"):
|
||||
text = text.split("```")[1]
|
||||
if text.startswith("json"):
|
||||
text = text[4:]
|
||||
topics = json.loads(text)
|
||||
topics = _extract_json(result)
|
||||
logger.info(f"Curator: {len(topics)} Themen ausgewählt (${usage.cost_usd:.4f})")
|
||||
return topics
|
||||
except (json.JSONDecodeError, IndexError) as e:
|
||||
|
||||
@@ -30,27 +30,36 @@ API_KEY_FILE = "/home/claude-dev/.blog-api-key"
|
||||
|
||||
def read_api_key() -> str:
|
||||
try:
|
||||
return open(API_KEY_FILE).read().strip()
|
||||
with open(API_KEY_FILE) as f:
|
||||
return f.read().strip()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"API-Key-Datei nicht gefunden: {API_KEY_FILE}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def push_to_blog(articles: list[dict], api_key: str) -> dict:
|
||||
"""Pushed Artikel-Entwürfe an die Blog Ingest API."""
|
||||
"""Pushed Artikel-Entwürfe an die Blog Ingest API (mit Retry)."""
|
||||
import time
|
||||
data = json.dumps({"articles": articles}).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
BLOG_API_URL,
|
||||
data=data,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"X-API-Key": api_key,
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
ctx = ssl.create_default_context()
|
||||
with urllib.request.urlopen(req, timeout=30, context=ctx) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
last_error = None
|
||||
for attempt in range(3):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
BLOG_API_URL,
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json", "X-API-Key": api_key},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30, context=ctx) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
if attempt < 2:
|
||||
wait = 3 ** attempt # 1s, 3s
|
||||
logger.warning(f"Push fehlgeschlagen (Versuch {attempt + 1}/3): {e} -- Retry in {wait}s")
|
||||
time.sleep(wait)
|
||||
raise last_error
|
||||
|
||||
|
||||
async def run_pipeline():
|
||||
|
||||
@@ -1,11 +1,38 @@
|
||||
"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen."""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
logger = logging.getLogger("blog.writer")
|
||||
|
||||
|
||||
def _extract_json(text: str):
|
||||
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||
text = text.strip()
|
||||
# 1. Direktes Parsen versuchen
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 2. JSON aus Markdown-Codeblock extrahieren
|
||||
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
||||
if code_block:
|
||||
try:
|
||||
return json.loads(code_block.group(1).strip())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
||||
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||
|
||||
DB_PATH = "/mnt/gitea/osint-data/osint.db"
|
||||
|
||||
|
||||
@@ -121,12 +148,7 @@ Falls das Thema einen geographischen Bezug hat, fülle geo_data:
|
||||
result, usage = await call_claude_fn(prompt, tools="WebSearch,WebFetch", model=None)
|
||||
|
||||
try:
|
||||
text = result.strip()
|
||||
if text.startswith("```"):
|
||||
text = text.split("```")[1]
|
||||
if text.startswith("json"):
|
||||
text = text[4:]
|
||||
article = json.loads(text)
|
||||
article = _extract_json(result)
|
||||
article["category"] = topic["category"]
|
||||
article["monitor_event_ids"] = topic.get("incident_ids", [])
|
||||
logger.info(f"Writer: Artikel '{article['title']}' geschrieben (${usage.cost_usd:.4f})")
|
||||
|
||||
@@ -13,9 +13,7 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
||||
DB_PATH = os.path.join(DATA_DIR, "osint.db")
|
||||
|
||||
# JWT
|
||||
JWT_SECRET = os.environ.get("JWT_SECRET")
|
||||
if not JWT_SECRET:
|
||||
raise RuntimeError("JWT_SECRET Umgebungsvariable muss gesetzt sein")
|
||||
JWT_SECRET = os.environ.get("JWT_SECRET", "")
|
||||
JWT_ALGORITHM = "HS256"
|
||||
JWT_EXPIRE_HOURS = 24
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren