fix: Pipeline JSON-Parsing robust (first-open-to-last-close + strict=False)
- _extract_json: Neuer Ansatz findet erstes { bis letztes } statt
fragiler Codeblock-Regex (loest Problem mit Backticks im Markdown)
- json.loads(strict=False) ueberall: Erlaubt rohe Newlines in Strings
(Claude liefert content_markdown mit echten Newlines statt \n)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -1,34 +1,27 @@
|
||||
"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB."""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
logger = logging.getLogger("blog.curator")
|
||||
|
||||
|
||||
def _extract_json(text: str):
|
||||
def _extract_json(text):
|
||||
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||
text = text.strip()
|
||||
# 1. Direktes Parsen versuchen
|
||||
try:
|
||||
return json.loads(text)
|
||||
return json.loads(text, strict=False)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 2. JSON aus Markdown-Codeblock extrahieren
|
||||
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
||||
if code_block:
|
||||
try:
|
||||
return json.loads(code_block.group(1).strip())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
||||
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
# 2. Erstes JSON-Objekt oder Array finden
|
||||
for open_c, close_c in [("{", "}"), ("[", "]")]:
|
||||
start = text.find(open_c)
|
||||
end = text.rfind(close_c)
|
||||
if start != -1 and end > start:
|
||||
try:
|
||||
return json.loads(match.group(1))
|
||||
return json.loads(text[start:end+1], strict=False)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||
@@ -141,7 +134,7 @@ Antworte als JSON-Array:
|
||||
topics = _extract_json(result)
|
||||
# Doppelt-encodiertes JSON abfangen
|
||||
if isinstance(topics, str):
|
||||
topics = json.loads(topics)
|
||||
topics = json.loads(topics, strict=False)
|
||||
if not isinstance(topics, list):
|
||||
logger.error(f"Curator: Unerwarteter Typ {type(topics).__name__}, erwartet list")
|
||||
return []
|
||||
|
||||
@@ -1,34 +1,27 @@
|
||||
"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen."""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
logger = logging.getLogger("blog.writer")
|
||||
|
||||
|
||||
def _extract_json(text: str):
|
||||
def _extract_json(text):
|
||||
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||
text = text.strip()
|
||||
# 1. Direktes Parsen versuchen
|
||||
try:
|
||||
return json.loads(text)
|
||||
return json.loads(text, strict=False)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 2. JSON aus Markdown-Codeblock extrahieren
|
||||
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
||||
if code_block:
|
||||
try:
|
||||
return json.loads(code_block.group(1).strip())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
||||
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
# 2. Erstes JSON-Objekt oder Array finden
|
||||
for open_c, close_c in [("{", "}"), ("[", "]")]:
|
||||
start = text.find(open_c)
|
||||
end = text.rfind(close_c)
|
||||
if start != -1 and end > start:
|
||||
try:
|
||||
return json.loads(match.group(1))
|
||||
return json.loads(text[start:end+1], strict=False)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||
@@ -151,7 +144,7 @@ Falls das Thema einen geographischen Bezug hat, fülle geo_data:
|
||||
article = _extract_json(result)
|
||||
# Doppelt-encodiertes JSON abfangen
|
||||
if isinstance(article, str):
|
||||
article = json.loads(article)
|
||||
article = json.loads(article, strict=False)
|
||||
if not isinstance(article, dict):
|
||||
logger.error(f"Writer: Unerwarteter Typ {type(article).__name__}")
|
||||
return None
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren