diff --git a/src/agents/blog/__init__.py b/src/agents/blog/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/agents/blog/blog_curator.py b/src/agents/blog/blog_curator.py new file mode 100644 index 0000000..42df54e --- /dev/null +++ b/src/agents/blog/blog_curator.py @@ -0,0 +1,125 @@ +"""BlogCurator -- Wählt tägliche Blog-Themen aus der Monitor-DB.""" +import json +import logging +import sqlite3 +from datetime import datetime, timedelta, timezone + +logger = logging.getLogger("blog.curator") + +DB_PATH = "/mnt/gitea/osint-data/osint.db" + + +def get_recent_data(hours: int = 24) -> dict: + """Holt aktuelle Artikel und Faktenchecks aus der Monitor-DB.""" + cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat() + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + + # Aktuelle Artikel + cursor = conn.execute( + """SELECT a.headline_de, a.headline, a.source, a.source_url, + a.content_de, a.published_at, a.collected_at, + i.title as incident_title, i.id as incident_id + FROM articles a + LEFT JOIN incidents i ON a.incident_id = i.id + WHERE a.collected_at > ? OR a.published_at > ? + ORDER BY a.collected_at DESC LIMIT 100""", + (cutoff, cutoff), + ) + articles = [dict(r) for r in cursor.fetchall()] + + # Aktive Lagen + cursor = conn.execute( + "SELECT id, title, summary, type, status FROM incidents WHERE status = 'active' ORDER BY updated_at DESC LIMIT 10" + ) + incidents = [dict(r) for r in cursor.fetchall()] + + # Aktuelle Faktenchecks + cursor = conn.execute( + """SELECT claim, status, evidence, checked_at, incident_id + FROM fact_checks WHERE checked_at > ? ORDER BY checked_at DESC LIMIT 30""", + (cutoff,), + ) + fact_checks = [dict(r) for r in cursor.fetchall()] + + conn.close() + return {"articles": articles, "incidents": incidents, "fact_checks": fact_checks} + + +async def curate_topics(call_claude_fn) -> list[dict]: + """Wählt 2-4 blogwürdige Themen aus.""" + data = get_recent_data() + + if not data["articles"]: + logger.warning("Keine neuen Artikel in den letzten 24h") + return [] + + # Zusammenfassung für Claude + article_summary = [] + for a in data["articles"][:50]: + title = a["headline_de"] or a["headline"] or "" + source = a["source"] or "" + incident = a["incident_title"] or "" + article_summary.append(f"- [{source}] {title} (Lage: {incident})") + + fc_summary = [] + for fc in data["fact_checks"][:15]: + fc_summary.append(f"- [{fc['status']}] {fc['claim'][:150]}") + + incident_summary = [] + for inc in data["incidents"]: + summary_short = (inc["summary"] or "")[:200] + incident_summary.append(f"- Lage #{inc['id']}: {inc['title']} -- {summary_short}") + + prompt = f"""Du bist der Redaktionsleiter des OSINT-Blogs "AegisSight Mosaic". +Wähle aus den folgenden aktuellen OSINT-Daten 2-4 Themen aus, die sich für fundierte Blog-Artikel eignen. + +KATEGORIEN (wähle passend): +- OSINT: Nachrichtenanalyse, Quellenauswertung, Faktencheck +- GEOINT: Karten, Satellitenbilder, räumliche Analyse +- CYBINT: Cyber-Bedrohungen, digitale Infrastruktur +- SOCMINT: Desinformation, Narrative, Social-Media-Trends + +AKTUELLE LAGEN: +{chr(10).join(incident_summary)} + +AKTUELLE ARTIKEL (letzte 24h): +{chr(10).join(article_summary[:30])} + +AKTUELLE FAKTENCHECKS: +{chr(10).join(fc_summary)} + +REGELN: +- Wähle Themen die für ein breites Publikum interessant sind +- Keine rein technischen oder internen Themen +- Bevorzuge Themen mit mehreren Quellen und Faktenchecks +- Jedes Thema muss einer Kategorie zugeordnet werden +- Gib relevante Artikel-IDs und Incident-IDs als Kontext mit + +Antworte als JSON-Array: +[ + {{ + "topic": "Kurzer Thementitel", + "category": "OSINT|GEOINT|CYBINT|SOCMINT", + "angle": "Welcher Blickwinkel/welche Story?", + "key_sources": ["Quellenname 1", "Quellenname 2"], + "incident_ids": [6, 18], + "relevance": "Warum ist das jetzt relevant?" + }} +]""" + + result, usage = await call_claude_fn(prompt, tools=None, model="claude-haiku-4-5-20251001") + + try: + # JSON aus dem Ergebnis extrahieren + text = result.strip() + if text.startswith("```"): + text = text.split("```")[1] + if text.startswith("json"): + text = text[4:] + topics = json.loads(text) + logger.info(f"Curator: {len(topics)} Themen ausgewählt (${usage.cost_usd:.4f})") + return topics + except (json.JSONDecodeError, IndexError) as e: + logger.error(f"Curator JSON-Parse-Fehler: {e}\nRaw: {result[:500]}") + return [] diff --git a/src/agents/blog/blog_pipeline.py b/src/agents/blog/blog_pipeline.py new file mode 100644 index 0000000..51b9bf3 --- /dev/null +++ b/src/agents/blog/blog_pipeline.py @@ -0,0 +1,103 @@ +"""Blog-Pipeline: Curator -> Writer -> Push zum Blog.""" +import asyncio +import json +import logging +import ssl +import sys +import urllib.request +from pathlib import Path + +# Projekt-Root zum Path hinzufügen +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from src.agents.claude_client import call_claude +from src.agents.blog.blog_curator import curate_topics +from src.agents.blog.blog_writer import write_article + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + handlers=[ + logging.StreamHandler(), + ], +) +logger = logging.getLogger("blog.pipeline") + +BLOG_API_URL = "https://blog.aegis-sight.de/api/ingest/drafts" +API_KEY_FILE = "/home/claude-dev/.blog-api-key" + + +def read_api_key() -> str: + try: + return open(API_KEY_FILE).read().strip() + except FileNotFoundError: + logger.error(f"API-Key-Datei nicht gefunden: {API_KEY_FILE}") + sys.exit(1) + + +def push_to_blog(articles: list[dict], api_key: str) -> dict: + """Pushed Artikel-Entwürfe an die Blog Ingest API.""" + data = json.dumps({"articles": articles}).encode("utf-8") + req = urllib.request.Request( + BLOG_API_URL, + data=data, + headers={ + "Content-Type": "application/json", + "X-API-Key": api_key, + }, + method="POST", + ) + ctx = ssl.create_default_context() + with urllib.request.urlopen(req, timeout=30, context=ctx) as resp: + return json.loads(resp.read().decode("utf-8")) + + +async def run_pipeline(): + """Führt die komplette Blog-Pipeline aus.""" + logger.info("=== Blog-Pipeline gestartet ===") + + # 1. Themen auswählen + logger.info("Schritt 1: Themen auswählen...") + topics = await curate_topics(call_claude) + if not topics: + logger.warning("Keine Themen ausgewählt -- Pipeline beendet") + return + + logger.info(f"{len(topics)} Themen ausgewählt: {[t['topic'] for t in topics]}") + + # 2. Artikel schreiben + logger.info("Schritt 2: Artikel schreiben...") + articles = [] + for topic in topics: + logger.info(f"Schreibe: {topic['topic']} ({topic['category']})") + article = await write_article(topic, call_claude) + if article: + articles.append(article) + else: + logger.warning(f"Artikel fehlgeschlagen: {topic['topic']}") + + if not articles: + logger.warning("Keine Artikel geschrieben -- Pipeline beendet") + return + + logger.info(f"{len(articles)} Artikel geschrieben") + + # 3. An Blog pushen + logger.info("Schritt 3: An Blog pushen...") + api_key = read_api_key() + try: + result = push_to_blog(articles, api_key) + logger.info(f"Push-Ergebnis: {result['accepted']} akzeptiert, {result.get('rejected', 0)} abgelehnt") + except Exception as e: + logger.error(f"Push fehlgeschlagen: {e}") + return + + logger.info("=== Blog-Pipeline abgeschlossen ===") + + +def main(): + asyncio.run(run_pipeline()) + + +if __name__ == "__main__": + main() diff --git a/src/agents/blog/blog_writer.py b/src/agents/blog/blog_writer.py new file mode 100644 index 0000000..ed591ca --- /dev/null +++ b/src/agents/blog/blog_writer.py @@ -0,0 +1,136 @@ +"""BlogWriter -- Schreibt Blog-Artikel aus Curator-Themen.""" +import json +import logging +import sqlite3 +from datetime import datetime, timedelta, timezone + +logger = logging.getLogger("blog.writer") + +DB_PATH = "/mnt/gitea/osint-data/osint.db" + + +def get_context_for_topic(topic: dict) -> str: + """Holt relevante Daten aus der DB für ein Thema.""" + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + + context_parts = [] + + # Incident-Zusammenfassungen + for inc_id in topic.get("incident_ids", []): + cursor = conn.execute( + "SELECT title, summary, updated_at FROM incidents WHERE id = ?", (inc_id,) + ) + inc = cursor.fetchone() + if inc: + summary = (dict(inc).get("summary") or "")[:2000] + context_parts.append(f"## Lage: {inc['title']}\n{summary}") + + # Relevante Artikel der letzten 48h + cutoff = (datetime.now(timezone.utc) - timedelta(hours=48)).isoformat() + incident_ids = topic.get("incident_ids", []) + if incident_ids: + placeholders = ",".join("?" for _ in incident_ids) + cursor = conn.execute( + f"""SELECT headline_de, headline, source, source_url, content_de, published_at + FROM articles WHERE incident_id IN ({placeholders}) AND (collected_at > ? OR published_at > ?) + ORDER BY collected_at DESC LIMIT 20""", + (*incident_ids, cutoff, cutoff), + ) + else: + cursor = conn.execute( + """SELECT headline_de, headline, source, source_url, content_de, published_at + FROM articles WHERE collected_at > ? OR published_at > ? + ORDER BY collected_at DESC LIMIT 20""", + (cutoff, cutoff), + ) + articles = [dict(r) for r in cursor.fetchall()] + + if articles: + art_text = [] + for a in articles: + title = a["headline_de"] or a["headline"] or "" + content = (a["content_de"] or "")[:500] + source = a["source"] or "" + url = a["source_url"] or "" + art_text.append(f"### {title}\nQuelle: {source} ({url})\n{content}") + context_parts.append("## Aktuelle Berichte\n" + "\n\n".join(art_text[:10])) + + # Relevante Faktenchecks + if incident_ids: + placeholders = ",".join("?" for _ in incident_ids) + cursor = conn.execute( + f"""SELECT claim, status, evidence FROM fact_checks + WHERE incident_id IN ({placeholders}) ORDER BY checked_at DESC LIMIT 10""", + incident_ids, + ) + fcs = [dict(r) for r in cursor.fetchall()] + if fcs: + fc_text = [] + for fc in fcs: + evidence = (fc["evidence"] or "")[:300] + fc_text.append(f"- [{fc['status']}] {fc['claim']}\n Evidenz: {evidence}") + context_parts.append("## Faktenchecks\n" + "\n".join(fc_text)) + + conn.close() + return "\n\n".join(context_parts) + + +async def write_article(topic: dict, call_claude_fn) -> dict | None: + """Schreibt einen Blog-Artikel zu einem Thema.""" + context = get_context_for_topic(topic) + today = datetime.now(timezone.utc).strftime('%Y-%m-%d') + + prompt = f"""Du bist Journalist beim OSINT-Blog "AegisSight Mosaic". Schreibe einen fundierten Artikel auf Deutsch. + +THEMA: {topic["topic"]} +KATEGORIE: {topic["category"]} +BLICKWINKEL: {topic.get("angle", "")} + +KONTEXT AUS DER OSINT-ANALYSE: +{context} + +REGELN FÜR DEN ARTIKEL: +1. Schreibe wie ein professioneller Journalist, KEIN Lagebericht-Stil +2. Erzählerischer Fließtext mit Kontext und Einordnung +3. Verwende echte Umlaute (ü, ä, ö, ß) +4. Markdown-Format: ## für Zwischenüberschriften, **fett** für Betonung +5. 800-1500 Wörter, gut strukturiert +6. Nenne und verlinke Quellen im Text wo möglich +7. Meta-Description: 1 Satz, max 155 Zeichen, für Suchmaschinen +8. Am Ende: Einordnung/Ausblick (was bedeutet das?) + +Antworte als JSON: +{{ + "title": "Aussagekräftiger Titel (kein Clickbait)", + "content_markdown": "## Kompletter Artikel in Markdown...", + "meta_description": "Kurze Beschreibung für SEO (max 155 Zeichen)", + "sources": [ + {{"title": "Quellenname", "url": "https://...", "accessed_at": "{today}"}} + ], + "geo_data": null +}} + +Falls das Thema einen geographischen Bezug hat, fülle geo_data: +{{ + "center": [lat, lng], + "zoom": 5, + "markers": [{{"lat": 0, "lng": 0, "label": "Ort", "popup": "Beschreibung"}}] +}}""" + + result, usage = await call_claude_fn(prompt, tools="WebSearch,WebFetch", model=None) + + try: + text = result.strip() + if text.startswith("```"): + text = text.split("```")[1] + if text.startswith("json"): + text = text[4:] + article = json.loads(text) + article["category"] = topic["category"] + article["monitor_event_ids"] = topic.get("incident_ids", []) + logger.info(f"Writer: Artikel '{article['title']}' geschrieben (${usage.cost_usd:.4f})") + return article + except (json.JSONDecodeError, IndexError, KeyError) as e: + logger.error(f"Writer JSON-Parse-Fehler: {e}\nRaw: {result[:500]}") + return None