fix: Pipeline JSON-Parsing robust (first-open-to-last-close + strict=False)
- _extract_json: Neuer Ansatz findet erstes { bis letztes } statt
fragiler Codeblock-Regex (loest Problem mit Backticks im Markdown)
- json.loads(strict=False) ueberall: Erlaubt rohe Newlines in Strings
(Claude liefert content_markdown mit echten Newlines statt \n)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -1,34 +1,27 @@
|
|||||||
"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB."""
|
"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB."""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
logger = logging.getLogger("blog.curator")
|
logger = logging.getLogger("blog.curator")
|
||||||
|
|
||||||
|
|
||||||
def _extract_json(text: str):
|
def _extract_json(text):
|
||||||
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
# 1. Direktes Parsen versuchen
|
# 1. Direktes Parsen versuchen
|
||||||
try:
|
try:
|
||||||
return json.loads(text)
|
return json.loads(text, strict=False)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
# 2. JSON aus Markdown-Codeblock extrahieren
|
# 2. Erstes JSON-Objekt oder Array finden
|
||||||
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
for open_c, close_c in [("{", "}"), ("[", "]")]:
|
||||||
if code_block:
|
start = text.find(open_c)
|
||||||
|
end = text.rfind(close_c)
|
||||||
|
if start != -1 and end > start:
|
||||||
try:
|
try:
|
||||||
return json.loads(code_block.group(1).strip())
|
return json.loads(text[start:end+1], strict=False)
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
|
||||||
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
|
||||||
match = re.search(pattern, text)
|
|
||||||
if match:
|
|
||||||
try:
|
|
||||||
return json.loads(match.group(1))
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||||
@@ -141,7 +134,7 @@ Antworte als JSON-Array:
|
|||||||
topics = _extract_json(result)
|
topics = _extract_json(result)
|
||||||
# Doppelt-encodiertes JSON abfangen
|
# Doppelt-encodiertes JSON abfangen
|
||||||
if isinstance(topics, str):
|
if isinstance(topics, str):
|
||||||
topics = json.loads(topics)
|
topics = json.loads(topics, strict=False)
|
||||||
if not isinstance(topics, list):
|
if not isinstance(topics, list):
|
||||||
logger.error(f"Curator: Unerwarteter Typ {type(topics).__name__}, erwartet list")
|
logger.error(f"Curator: Unerwarteter Typ {type(topics).__name__}, erwartet list")
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -1,34 +1,27 @@
|
|||||||
"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen."""
|
"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen."""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
logger = logging.getLogger("blog.writer")
|
logger = logging.getLogger("blog.writer")
|
||||||
|
|
||||||
|
|
||||||
def _extract_json(text: str):
|
def _extract_json(text):
|
||||||
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
"""Extrahiert JSON aus Claude-Antworten (robust)."""
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
# 1. Direktes Parsen versuchen
|
# 1. Direktes Parsen versuchen
|
||||||
try:
|
try:
|
||||||
return json.loads(text)
|
return json.loads(text, strict=False)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
# 2. JSON aus Markdown-Codeblock extrahieren
|
# 2. Erstes JSON-Objekt oder Array finden
|
||||||
code_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
|
for open_c, close_c in [("{", "}"), ("[", "]")]:
|
||||||
if code_block:
|
start = text.find(open_c)
|
||||||
|
end = text.rfind(close_c)
|
||||||
|
if start != -1 and end > start:
|
||||||
try:
|
try:
|
||||||
return json.loads(code_block.group(1).strip())
|
return json.loads(text[start:end+1], strict=False)
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
# 3. Erstes JSON-Array oder -Objekt im Text finden
|
|
||||||
for pattern in [r'(\[[\s\S]*\])', r'(\{[\s\S]*\})']:
|
|
||||||
match = re.search(pattern, text)
|
|
||||||
if match:
|
|
||||||
try:
|
|
||||||
return json.loads(match.group(1))
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
raise json.JSONDecodeError("Kein gueltiges JSON gefunden", text, 0)
|
||||||
@@ -151,7 +144,7 @@ Falls das Thema einen geographischen Bezug hat, fülle geo_data:
|
|||||||
article = _extract_json(result)
|
article = _extract_json(result)
|
||||||
# Doppelt-encodiertes JSON abfangen
|
# Doppelt-encodiertes JSON abfangen
|
||||||
if isinstance(article, str):
|
if isinstance(article, str):
|
||||||
article = json.loads(article)
|
article = json.loads(article, strict=False)
|
||||||
if not isinstance(article, dict):
|
if not isinstance(article, dict):
|
||||||
logger.error(f"Writer: Unerwarteter Typ {type(article).__name__}")
|
logger.error(f"Writer: Unerwarteter Typ {type(article).__name__}")
|
||||||
return None
|
return None
|
||||||
|
|||||||
In neuem Issue referenzieren
Einen Benutzer sperren