fix: Blog-Pipeline lauffähig + robust
- Shell-Script: source .env statt dotenv (K1+K2) - config.py: JWT_SECRET Default statt Crash beim Import (M17) - JSON-Parsing: Robuste Extraktion aus Claude-Antworten (M16) - Push-Retry mit exponentiellem Backoff (N8) - open() mit with-Statement (N9) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -1,11 +1,38 @@
|
|||||||
"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB."""
|
"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB."""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
logger = logging.getLogger("blog.curator")
|
logger = logging.getLogger("blog.curator")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json(text: str):
|
||||||
|
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||||
|
text = text.strip()
|
||||||
|
# 1. Direktes Parsen versuchen
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
# 2. JSON aus Markdown-Codeblock extrahieren
|
||||||
|
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
||||||
|
if code_block:
|
||||||
|
try:
|
||||||
|
return json.loads(code_block.group(1).strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
||||||
|
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
return json.loads(match.group(1))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||||
|
|
||||||
DB_PATH = "/mnt/gitea/osint-data/osint.db"
|
DB_PATH = "/mnt/gitea/osint-data/osint.db"
|
||||||
|
|
||||||
|
|
||||||
@@ -111,13 +138,7 @@ Antworte als JSON-Array:
|
|||||||
result, usage = await call_claude_fn(prompt, tools=None, model="claude-haiku-4-5-20251001")
|
result, usage = await call_claude_fn(prompt, tools=None, model="claude-haiku-4-5-20251001")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# JSON aus dem Ergebnis extrahieren
|
topics = _extract_json(result)
|
||||||
text = result.strip()
|
|
||||||
if text.startswith("```"):
|
|
||||||
text = text.split("```")[1]
|
|
||||||
if text.startswith("json"):
|
|
||||||
text = text[4:]
|
|
||||||
topics = json.loads(text)
|
|
||||||
logger.info(f"Curator: {len(topics)} Themen ausgewählt (${usage.cost_usd:.4f})")
|
logger.info(f"Curator: {len(topics)} Themen ausgewählt (${usage.cost_usd:.4f})")
|
||||||
return topics
|
return topics
|
||||||
except (json.JSONDecodeError, IndexError) as e:
|
except (json.JSONDecodeError, IndexError) as e:
|
||||||
|
|||||||
@@ -30,27 +30,36 @@ API_KEY_FILE = "/home/claude-dev/.blog-api-key"
|
|||||||
|
|
||||||
def read_api_key() -> str:
|
def read_api_key() -> str:
|
||||||
try:
|
try:
|
||||||
return open(API_KEY_FILE).read().strip()
|
with open(API_KEY_FILE) as f:
|
||||||
|
return f.read().strip()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.error(f"API-Key-Datei nicht gefunden: {API_KEY_FILE}")
|
logger.error(f"API-Key-Datei nicht gefunden: {API_KEY_FILE}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def push_to_blog(articles: list[dict], api_key: str) -> dict:
|
def push_to_blog(articles: list[dict], api_key: str) -> dict:
|
||||||
"""Pushed Artikel-Entwürfe an die Blog Ingest API."""
|
"""Pushed Artikel-Entwürfe an die Blog Ingest API (mit Retry)."""
|
||||||
|
import time
|
||||||
data = json.dumps({"articles": articles}).encode("utf-8")
|
data = json.dumps({"articles": articles}).encode("utf-8")
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
last_error = None
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
BLOG_API_URL,
|
BLOG_API_URL,
|
||||||
data=data,
|
data=data,
|
||||||
headers={
|
headers={"Content-Type": "application/json", "X-API-Key": api_key},
|
||||||
"Content-Type": "application/json",
|
|
||||||
"X-API-Key": api_key,
|
|
||||||
},
|
|
||||||
method="POST",
|
method="POST",
|
||||||
)
|
)
|
||||||
ctx = ssl.create_default_context()
|
|
||||||
with urllib.request.urlopen(req, timeout=30, context=ctx) as resp:
|
with urllib.request.urlopen(req, timeout=30, context=ctx) as resp:
|
||||||
return json.loads(resp.read().decode("utf-8"))
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
except Exception as e:
|
||||||
|
last_error = e
|
||||||
|
if attempt < 2:
|
||||||
|
wait = 3 ** attempt # 1s, 3s
|
||||||
|
logger.warning(f"Push fehlgeschlagen (Versuch {attempt + 1}/3): {e} -- Retry in {wait}s")
|
||||||
|
time.sleep(wait)
|
||||||
|
raise last_error
|
||||||
|
|
||||||
|
|
||||||
async def run_pipeline():
|
async def run_pipeline():
|
||||||
|
|||||||
@@ -1,11 +1,38 @@
|
|||||||
"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen."""
|
"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen."""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
logger = logging.getLogger("blog.writer")
|
logger = logging.getLogger("blog.writer")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json(text: str):
|
||||||
|
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||||
|
text = text.strip()
|
||||||
|
# 1. Direktes Parsen versuchen
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
# 2. JSON aus Markdown-Codeblock extrahieren
|
||||||
|
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
||||||
|
if code_block:
|
||||||
|
try:
|
||||||
|
return json.loads(code_block.group(1).strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
||||||
|
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
return json.loads(match.group(1))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||||
|
|
||||||
DB_PATH = "/mnt/gitea/osint-data/osint.db"
|
DB_PATH = "/mnt/gitea/osint-data/osint.db"
|
||||||
|
|
||||||
|
|
||||||
@@ -121,12 +148,7 @@ Falls das Thema einen geographischen Bezug hat, fülle geo_data:
|
|||||||
result, usage = await call_claude_fn(prompt, tools="WebSearch,WebFetch", model=None)
|
result, usage = await call_claude_fn(prompt, tools="WebSearch,WebFetch", model=None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text = result.strip()
|
article = _extract_json(result)
|
||||||
if text.startswith("```"):
|
|
||||||
text = text.split("```")[1]
|
|
||||||
if text.startswith("json"):
|
|
||||||
text = text[4:]
|
|
||||||
article = json.loads(text)
|
|
||||||
article["category"] = topic["category"]
|
article["category"] = topic["category"]
|
||||||
article["monitor_event_ids"] = topic.get("incident_ids", [])
|
article["monitor_event_ids"] = topic.get("incident_ids", [])
|
||||||
logger.info(f"Writer: Artikel '{article['title']}' geschrieben (${usage.cost_usd:.4f})")
|
logger.info(f"Writer: Artikel '{article['title']}' geschrieben (${usage.cost_usd:.4f})")
|
||||||
|
|||||||
@@ -13,9 +13,7 @@ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
|||||||
DB_PATH = os.path.join(DATA_DIR, "osint.db")
|
DB_PATH = os.path.join(DATA_DIR, "osint.db")
|
||||||
|
|
||||||
# JWT
|
# JWT
|
||||||
JWT_SECRET = os.environ.get("JWT_SECRET")
|
JWT_SECRET = os.environ.get("JWT_SECRET", "")
|
||||||
if not JWT_SECRET:
|
|
||||||
raise RuntimeError("JWT_SECRET Umgebungsvariable muss gesetzt sein")
|
|
||||||
JWT_ALGORITHM = "HS256"
|
JWT_ALGORITHM = "HS256"
|
||||||
JWT_EXPIRE_HOURS = 24
|
JWT_EXPIRE_HOURS = 24
|
||||||
|
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren