Geoparsing von spaCy auf Haiku umgestellt

- geoparsing.py: Komplett-Rewrite (spaCy NER + Nominatim -> Haiku + geonamescache) - orchestrator.py: incident_context an geoparse_articles, category in INSERT - incidents.py: incident_context aus DB laden und an Geoparsing uebergeben - public_api.py: Locations aggregiert im Lagebild-Endpoint - components.js: response-Kategorie neben retaliation (beide akzeptiert) - requirements.txt: spaCy und geopy entfernt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 22:00:40 +01:00
Commit 5ae61a1379
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,4 @@ apscheduler==3.10.4
 websockets
 python-multipart
 aiosmtplib
 spacy>=3.7,<4.0
 geonamescache>=2.0
 geopy>=2.4
--- a/src/agents/geoparsing.py
+++ b/src/agents/geoparsing.py
@@ -1,361 +1,318 @@
-"""Geoparsing-Modul: NER-basierte Ortsextraktion und Geocoding fuer Artikel."""
+"""Geoparsing-Modul: Haiku-basierte Ortsextraktion und Geocoding fuer Artikel."""
-import asyncio
+import asyncio
-import logging
+import json
-import re
+import logging
-from difflib import SequenceMatcher
+import re
-from typing import Optional
+from typing import Optional
-
+
-logger = logging.getLogger("osint.geoparsing")
+from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator
-
+from config import CLAUDE_MODEL_FAST
-# Lazy-loaded spaCy-Modelle (erst beim ersten Aufruf geladen)
+
-_nlp_de = None
+logger = logging.getLogger("osint.geoparsing")
-_nlp_en = None
+
-
+# Geocoding-Cache (in-memory, lebt solange der Prozess laeuft)
-# Stopwords: Entitaeten die von spaCy faelschlicherweise als Orte erkannt werden
+_geocode_cache: dict[str, Optional[dict]] = {}
-LOCATION_STOPWORDS = {
+
-    "EU", "UN", "NATO", "WHO", "OSZE", "OPEC", "G7", "G20", "BRICS",
+# geonamescache-Instanz (lazy)
-    "Nato", "Eu", "Un", "Onu",
+_gc = None
-    "Bundesregierung", "Bundestag", "Bundesrat", "Bundeskanzler",
+
-    "Kreml", "Weisses Haus", "White House", "Pentagon", "Elysee",
+
-    "Twitter", "Facebook", "Telegram", "Signal", "WhatsApp",
+def _get_geonamescache():
-    "Reuters", "AP", "AFP", "DPA", "dpa",
+    """Laedt geonamescache lazy."""
-    "Internet", "Online", "Web",
+    global _gc
-    # Regionale/vage Begriffe (kein einzelner Punkt auf der Karte)
+    if _gc is None:
-    "Naher Osten", "Mittlerer Osten", "Middle East", "Near East",
+        try:
-    "Golf-Staaten", "Golfstaaten", "Golfregion", "Gulf States", "Persian Gulf",
+            import geonamescache
-    "Nordafrika", "Subsahara", "Zentralasien", "Suedostasien",
+            _gc = geonamescache.GeonamesCache()
-    "Westeuropa", "Osteuropa", "Suedeuropa", "Nordeuropa",
+            logger.info("geonamescache geladen")
-    "Balkan", "Kaukasus", "Levante", "Maghreb", "Sahel",
+        except ImportError:
-    "Arabische Welt", "Arab World",
+            logger.error("geonamescache nicht installiert - pip install geonamescache")
-}
+            return None
-
+    return _gc
-# Maximale Textlaenge fuer NER-Verarbeitung
+
-MAX_TEXT_LENGTH = 10000
+
-
+def _geocode_offline(name: str, country_code: str = "") -> Optional[dict]:
-
+    """Geocoding ueber geonamescache (offline).
-# Marker-Kategorien fuer Karten-Klassifizierung
+
-CATEGORY_KEYWORDS = {
+    Args:
-    "target": [
+        name: Ortsname (normalisiert von Haiku)
-        "angriff", "angegriff", "bombardier", "luftschlag", "luftangriff",
+        country_code: ISO-2 Laendercode (von Haiku) fuer bessere Disambiguierung
-        "beschuss", "beschossen", "getroffen", "zerstoer", "einschlag",
+    """
-        "detonation", "explosion", "strike", "attack", "bombed", "hit",
+    gc = _get_geonamescache()
-        "shelled", "destroyed", "targeted", "missile hit", "air strike",
+    if gc is None:
-        "airstrike", "bombardment", "killed", "casualties", "dead",
+        return None
-        "tote", "opfer", "getoetet",
+
-    ],
+    name_lower = name.lower().strip()
-    "retaliation": [
+
-        "gegenschlag", "vergeltung", "reaktion", "gegenangriff",
+    # 1. Stadtsuche
-        "abgefeuert", "retaliat", "counter-attack", "counterattack",
+    cities = gc.get_cities()
-        "counter-strike", "response", "fired back", "launched",
+    matches = []
-        "rakete abgefeuert", "vergeltungsschlag", "abfangen",
+    for gid, city in cities.items():
-        "abgefangen", "intercepted", "eskalation", "escalat",
+        city_name = city.get("name", "")
-    ],
+        alt_names = city.get("alternatenames", "")
-    "actor": [
+        if isinstance(alt_names, list):
-        "regierung", "praesident", "ministerium", "hauptquartier",
+            alt_list = [n.strip().lower() for n in alt_names if n.strip()]
-        "kommando", "nato", "pentagon", "kongress", "senat", "parlament",
+        else:
-        "government", "president", "ministry", "headquarters", "command",
+            alt_list = [n.strip().lower() for n in str(alt_names).split(",") if n.strip()]
-        "congress", "senate", "parliament", "white house", "weisses haus",
+        if city_name.lower() == name_lower or name_lower in alt_list:
-        "verteidigungsminister", "aussenminister", "generalstab",
+            matches.append(city)
-        "defense secretary", "secretary of state", "general staff",
+
-        "un-sicherheitsrat", "security council", "summit", "gipfel",
+    if matches:
-        "diplomati", "botschaft", "embassy",
+        # Disambiguierung: country_code bevorzugen, dann Population
-    ],
+        if country_code:
-}
+            cc_matches = [c for c in matches if c.get("countrycode", "").upper() == country_code.upper()]
-
+            if cc_matches:
-
+                matches = cc_matches
-def _classify_location(source_text: str, article_text: str = "") -> str:
+        best = max(matches, key=lambda c: c.get("population", 0))
-    """Klassifiziert eine Location basierend auf dem Kontext.
+        return {
-
+            "lat": float(best["latitude"]),
-    Returns:
+            "lon": float(best["longitude"]),
-        Kategorie: 'target', 'retaliation', 'actor', oder 'mentioned'
+            "country_code": best.get("countrycode", ""),
-    """
+            "normalized_name": best["name"],
-    text = (source_text + " " + article_text[:500]).lower()
+            "confidence": min(1.0, 0.6 + (best.get("population", 0) / 10_000_000)),
-
+        }
-    scores = {"target": 0, "retaliation": 0, "actor": 0}
+
-    for category, keywords in CATEGORY_KEYWORDS.items():
+    # 2. Laendersuche
-        for kw in keywords:
+    countries = gc.get_countries()
-            if kw in text:
+    for code, country in countries.items():
-                scores[category] += 1
+        if country.get("name", "").lower() == name_lower:
-
+            capital = country.get("capital", "")
-    best = max(scores, key=scores.get)
+            if capital:
-    if scores[best] >= 1:
+                cap_result = _geocode_offline(capital)
-        return best
+                if cap_result:
-    return "mentioned"
+                    cap_result["normalized_name"] = country["name"]
-
+                    cap_result["confidence"] = 0.5
-
+                    return cap_result
-
+
-def _load_spacy_model(lang: str):
+    return None
-    """Laedt ein spaCy-Modell lazy (nur beim ersten Aufruf)."""
+
-    global _nlp_de, _nlp_en
+
-    try:
+def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[dict] = None) -> Optional[dict]:
-        import spacy
+    """Geocoded einen Ortsnamen. Prioritaet: geonamescache > Haiku-Koordinaten.
-    except ImportError:
+
-        logger.error("spaCy nicht installiert - pip install spacy")
+    Args:
-        return None
+        name: Ortsname
-
+        country_code: ISO-2 Code (von Haiku)
-    if lang == "de" and _nlp_de is None:
+        haiku_coords: {"lat": float, "lon": float} (Fallback von Haiku)
-        try:
+    """
-            _nlp_de = spacy.load("de_core_news_sm", disable=["parser", "lemmatizer", "textcat"])
+    cache_key = f"{name.lower().strip()}|{country_code.upper()}"
-            logger.info("spaCy-Modell de_core_news_sm geladen")
+    if cache_key in _geocode_cache:
-        except OSError:
+        return _geocode_cache[cache_key]
-            logger.warning("spaCy-Modell de_core_news_sm nicht gefunden - python -m spacy download de_core_news_sm")
+
-            return None
+    result = _geocode_offline(name, country_code)
-    elif lang == "en" and _nlp_en is None:
+
-        try:
+    # Fallback: Haiku-Koordinaten nutzen
-            _nlp_en = spacy.load("en_core_web_sm", disable=["parser", "lemmatizer", "textcat"])
+    if result is None and haiku_coords:
-            logger.info("spaCy-Modell en_core_web_sm geladen")
+        lat = haiku_coords.get("lat")
-        except OSError:
+        lon = haiku_coords.get("lon")
-            logger.warning("spaCy-Modell en_core_web_sm nicht gefunden - python -m spacy download en_core_web_sm")
+        if lat is not None and lon is not None:
-            return None
+            result = {
-
+                "lat": float(lat),
-    return _nlp_de if lang == "de" else _nlp_en
+                "lon": float(lon),
-
+                "country_code": country_code.upper() if country_code else "",
-
+                "normalized_name": name,
-def _extract_locations_from_text(text: str, language: str = "de") -> list[dict]:
+                "confidence": 0.45,
-    """Extrahiert Ortsnamen aus Text via spaCy NER.
+            }
-
+
-    Returns:
+    _geocode_cache[cache_key] = result
-        Liste von dicts: [{name: str, source_text: str}]
+    return result
-    """
+
-    if not text:
+
-        return []
+HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines.
-
+
-    text = text[:MAX_TEXT_LENGTH]
+Kontext der Lage: "{incident_context}"
-
+
-    nlp = _load_spacy_model(language)
+Regeln:
-    if nlp is None:
+- Nur echte Orte (Staedte, Laender, Regionen)
-        # Fallback: anderes Modell versuchen
+- Keine Personen, Organisationen, Gebaeude, Alltagswoerter
-        fallback = "en" if language == "de" else "de"
+- Bei "US-Militaer" etc: Land (USA) extrahieren, nicht das Kompositum
-        nlp = _load_spacy_model(fallback)
+- HTML-Tags ignorieren
-        if nlp is None:
+- Jeder Ort nur einmal pro Headline
-            return []
+- Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte)
-
+
-    doc = nlp(text)
+Klassifiziere basierend auf dem Lage-Kontext:
-
+- "target": Wo das Ereignis passiert / Schaden entsteht
-    locations = []
+- "response": Wo Reaktionen / Gegenmassnahmen stattfinden
-    seen = set()
+- "actor": Wo Entscheidungen getroffen werden / Entscheider sitzen
-    for ent in doc.ents:
+- "mentioned": Nur erwaehnt, kein direkter Bezug
-        if ent.label_ in ("LOC", "GPE"):
+
-            name = ent.text.strip()
+Headlines:
-            # Filter: zu kurz, Stopword, oder nur Zahlen/Sonderzeichen
+{headlines}
-            if len(name) < 2:
+
-                continue
+Antwort NUR als JSON-Array, kein anderer Text:
-            if name in LOCATION_STOPWORDS:
+[{{"headline_idx": 0, "locations": [
-                continue
+  {{"name": "Teheran", "normalized": "Tehran", "country_code": "IR",
-            if re.match(r'^[\d\W]+$', name):
+   "type": "city", "category": "target",
-                continue
+   "lat": 35.69, "lon": 51.42}}
-
+]}}]"""
-            name_lower = name.lower()
+
-            if name_lower not in seen:
+
-                seen.add(name_lower)
+async def _extract_locations_haiku(
-                # Kontext: 50 Zeichen um die Entitaet herum
+    headlines: list[dict], incident_context: str
-                start = max(0, ent.start_char - 25)
+) -> dict[int, list[dict]]:
-                end = min(len(text), ent.end_char + 25)
+    """Extrahiert Orte aus Headlines via Haiku.
-                source_text = text[start:end].strip()
+
-                locations.append({"name": name, "source_text": source_text})
+    Args:
-
+        headlines: [{"idx": article_id, "text": headline_text}, ...]
-    return locations
+        incident_context: Lage-Kontext fuer Klassifizierung
-
+
-
+    Returns:
-# Geocoding-Cache (in-memory, lebt solange der Prozess laeuft)
+        dict[article_id -> list[{name, normalized, country_code, type, category, lat, lon}]]
-_geocode_cache: dict[str, Optional[dict]] = {}
+    """
-
+    if not headlines:
-# geonamescache-Instanz (lazy)
+        return {}
-_gc = None
+
-
+    # Headlines formatieren
-
+    headline_lines = []
-def _get_geonamescache():
+    for i, h in enumerate(headlines):
-    """Laedt geonamescache lazy."""
+        headline_lines.append(f"[{i}] {h['text']}")
-    global _gc
+
-    if _gc is None:
+    prompt = HAIKU_GEOPARSE_PROMPT.format(
-        try:
+        incident_context=incident_context or "Allgemeine Nachrichtenlage",
-            import geonamescache
+        headlines="\n".join(headline_lines),
-            _gc = geonamescache.GeonamesCache()
+    )
-            logger.info("geonamescache geladen")
+
-        except ImportError:
+    try:
-            logger.error("geonamescache nicht installiert - pip install geonamescache")
+        result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
-            return None
+    except Exception as e:
-    return _gc
+        logger.error(f"Haiku-Geoparsing fehlgeschlagen: {e}")
-
+        return {}
-
+
-def _geocode_location(name: str) -> Optional[dict]:
+    # JSON parsen (mit Regex-Fallback)
-    """Geocoded einen Ortsnamen. Offline via geonamescache, Fallback Nominatim.
+    parsed = None
-
+    try:
-    Returns:
+        parsed = json.loads(result_text)
-        dict mit {lat, lon, country_code, normalized_name, confidence} oder None
+    except json.JSONDecodeError:
-    """
+        match = re.search(r'\[.*\]', result_text, re.DOTALL)
-    name_lower = name.lower().strip()
+        if match:
-    if name_lower in _geocode_cache:
+            try:
-        return _geocode_cache[name_lower]
+                parsed = json.loads(match.group())
-
+            except json.JSONDecodeError:
-    result = _geocode_offline(name)
+                logger.warning("Haiku-Geoparsing: JSON-Parse fehlgeschlagen auch mit Regex-Fallback")
-    if result is None:
+                return {}
-        result = _geocode_nominatim(name)
+
-
+    if not parsed or not isinstance(parsed, list):
-    _geocode_cache[name_lower] = result
+        logger.warning("Haiku-Geoparsing: Kein gueltiges JSON-Array erhalten")
-    return result
+        return {}
-
+
-
+    # Ergebnisse den Artikeln zuordnen
-def _geocode_offline(name: str) -> Optional[dict]:
+    results = {}
-    """Versucht Geocoding ueber geonamescache (offline)."""
+    for entry in parsed:
-    gc = _get_geonamescache()
+        if not isinstance(entry, dict):
-    if gc is None:
+            continue
-        return None
+        headline_idx = entry.get("headline_idx")
-
+        if headline_idx is None or headline_idx >= len(headlines):
-    name_lower = name.lower().strip()
+            continue
-
+
-    # 1. Direkte Suche in Staedten
+        article_id = headlines[headline_idx]["idx"]
-    cities = gc.get_cities()
+        locations = entry.get("locations", [])
-    matches = []
+
-    for gid, city in cities.items():
+        if not locations:
-        city_name = city.get("name", "")
+            continue
-        alt_names = city.get("alternatenames", "")
+
-        # alternatenames kann String (komma-getrennt) oder Liste sein
+        article_locs = []
-        if isinstance(alt_names, list):
+        for loc in locations:
-            alt_list = [n.strip().lower() for n in alt_names if n.strip()]
+            if not isinstance(loc, dict):
-        else:
+                continue
-            alt_list = [n.strip().lower() for n in str(alt_names).split(",") if n.strip()]
+            loc_type = loc.get("type", "city")
-        if city_name.lower() == name_lower:
+            # Regionen nicht speichern (kein sinnvoller Punkt auf der Karte)
-            matches.append(city)
+            if loc_type == "region":
-        elif name_lower in alt_list:
+                continue
-            matches.append(city)
+
-
+            name = loc.get("name", "")
-    if matches:
+            if not name:
-        # Disambiguierung: groesste Stadt gewinnt
+                continue
-        best = max(matches, key=lambda c: c.get("population", 0))
+
-        return {
+            article_locs.append({
-            "lat": float(best["latitude"]),
+                "name": name,
-            "lon": float(best["longitude"]),
+                "normalized": loc.get("normalized", name),
-            "country_code": best.get("countrycode", ""),
+                "country_code": loc.get("country_code", ""),
-            "normalized_name": best["name"],
+                "type": loc_type,
-            "confidence": min(1.0, 0.6 + (best.get("population", 0) / 10_000_000)),
+                "category": loc.get("category", "mentioned"),
-        }
+                "lat": loc.get("lat"),
-
+                "lon": loc.get("lon"),
-    # 2. Laendersuche
+            })
-    countries = gc.get_countries()
+
-    for code, country in countries.items():
+        if article_locs:
-        if country.get("name", "").lower() == name_lower:
+            results[article_id] = article_locs
-            # Hauptstadt-Koordinaten als Fallback
+
-            capital = country.get("capital", "")
+    return results
-            if capital:
+
-                cap_result = _geocode_offline(capital)
+
-                if cap_result:
+async def geoparse_articles(
-                    cap_result["normalized_name"] = country["name"]
+    articles: list[dict],
-                    cap_result["confidence"] = 0.5  # Land, nicht Stadt
+    incident_context: str = "",
-                    return cap_result
+) -> dict[int, list[dict]]:
-
+    """Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache.
-    return None
+
-
+    Args:
-
+        articles: Liste von Artikel-Dicts (mit id, headline, headline_de, language)
-def _geocode_nominatim(name: str) -> Optional[dict]:
+        incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung
-    """Fallback-Geocoding ueber Nominatim (1 Request/Sekunde)."""
+
-    try:
+    Returns:
-        from geopy.geocoders import Nominatim
+        dict[article_id -> list[{location_name, location_name_normalized, country_code,
-        from geopy.exc import GeocoderTimedOut, GeocoderServiceError
+                                  lat, lon, confidence, source_text, category}]]
-    except ImportError:
+    """
-        return None
+    if not articles:
-
+        return {}
-    try:
+
-        geocoder = Nominatim(user_agent="aegissight-monitor/1.0", timeout=5)
+    # Headlines sammeln
-        location = geocoder.geocode(name, language="de", exactly_one=True)
+    headlines = []
-        if location:
+    for article in articles:
-            # Country-Code aus Address extrahieren falls verfuegbar
+        article_id = article.get("id")
-            raw = location.raw or {}
+        if not article_id:
-            country_code = ""
+            continue
-            if "address" in raw:
+
-                country_code = raw["address"].get("country_code", "").upper()
+        # Deutsche Headline bevorzugen
-
+        headline = article.get("headline_de") or article.get("headline") or ""
-            normalized_name = location.address.split(",")[0] if location.address else name
+        headline = headline.strip()
-
+        if not headline:
-            # Plausibilitaetspruefung: Nominatim-Ergebnis muss zum Suchbegriff passen
+            continue
-            similarity = SequenceMatcher(None, name.lower(), normalized_name.lower()).ratio()
+
-            if similarity < 0.3:
+        headlines.append({"idx": article_id, "text": headline})
-                logger.debug(f"Nominatim-Ergebnis verworfen: '{name}' -> '{normalized_name}' (Aehnlichkeit {similarity:.2f})")
+
-                return None
+    if not headlines:
-
+        return {}
-            return {
+
-                "lat": float(location.latitude),
+    # Batches bilden (max 50 Headlines pro Haiku-Call)
-                "lon": float(location.longitude),
+    batch_size = 50
-                "country_code": country_code,
+    all_haiku_results = {}
-                "normalized_name": normalized_name,
+    for i in range(0, len(headlines), batch_size):
-                "confidence": 0.4,  # Nominatim-Ergebnis = niedrigere Konfidenz
+        batch = headlines[i:i + batch_size]
-            }
+        batch_results = await _extract_locations_haiku(batch, incident_context)
-    except (GeocoderTimedOut, GeocoderServiceError) as e:
+        all_haiku_results.update(batch_results)
-        logger.debug(f"Nominatim-Fehler fuer '{name}': {e}")
+
-    except Exception as e:
+    if not all_haiku_results:
-        logger.debug(f"Geocoding-Fehler fuer '{name}': {e}")
+        return {}
-
+
-    return None
+    # Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback)
-
+    result = {}
-
+    for article_id, haiku_locs in all_haiku_results.items():
-async def geoparse_articles(articles: list[dict]) -> dict[int, list[dict]]:
+        locations = []
-    """Geoparsing fuer eine Liste von Artikeln.
+        for loc in haiku_locs:
-
+            haiku_coords = None
-    Args:
+            if loc.get("lat") is not None and loc.get("lon") is not None:
-        articles: Liste von Artikel-Dicts (mit id, content_de, content_original, language, headline, headline_de)
+                haiku_coords = {"lat": loc["lat"], "lon": loc["lon"]}
-
+
-    Returns:
+            geo = _geocode_location(
-        dict[article_id -> list[{location_name, location_name_normalized, country_code, lat, lon, confidence, source_text}]]
+                loc["normalized"],
-    """
+                loc.get("country_code", ""),
-    if not articles:
+                haiku_coords,
-        return {}
+            )
-
+
-    result = {}
+            if geo:
-
+                locations.append({
-    for article in articles:
+                    "location_name": loc["name"],
-        article_id = article.get("id")
+                    "location_name_normalized": geo["normalized_name"],
-        if not article_id:
+                    "country_code": geo["country_code"],
-            continue
+                    "lat": geo["lat"],
-
+                    "lon": geo["lon"],
-        language = article.get("language", "de")
+                    "confidence": geo["confidence"],
-
+                    "source_text": loc["name"],
-        # Text zusammenbauen: Headline + Content
+                    "category": loc.get("category", "mentioned"),
-        text_parts = []
+                })
-        if language == "de":
+
-            if article.get("headline_de"):
+        if locations:
-                text_parts.append(article["headline_de"])
+            result[article_id] = locations
-            elif article.get("headline"):
+
-                text_parts.append(article["headline"])
+    return result
            if article.get("content_de"):
                text_parts.append(article["content_de"])
            elif article.get("content_original"):
                text_parts.append(article["content_original"])
        else:
            if article.get("headline"):
                text_parts.append(article["headline"])
            if article.get("content_original"):
                text_parts.append(article["content_original"])
        text = "\n".join(text_parts)
        if not text.strip():
            continue
        # NER-Extraktion (CPU-bound, in Thread ausfuehren)
        locations_raw = await asyncio.to_thread(
            _extract_locations_from_text, text, language
        )
        if not locations_raw:
            continue
        # Geocoding (enthaelt potentiell Netzwerk-Calls)
        locations = []
        for loc in locations_raw:
            geo = await asyncio.to_thread(_geocode_location, loc["name"])
            if geo:
                category = _classify_location(loc.get("source_text", ""), text)
                locations.append({
                    "location_name": loc["name"],
                    "location_name_normalized": geo["normalized_name"],
                    "country_code": geo["country_code"],
                    "lat": geo["lat"],
                    "lon": geo["lon"],
                    "confidence": geo["confidence"],
                    "source_text": loc.get("source_text", ""),
                    "category": category,
                })
        if locations:
            result[article_id] = locations
    return result
--- a/src/agents/orchestrator.py
+++ b/src/agents/orchestrator.py
@@ -714,19 +714,20 @@ class AgentOrchestrator:
            if new_articles_for_analysis:
                try:
                    from agents.geoparsing import geoparse_articles
                    incident_context = f"{title} - {description}"
                    logger.info(f"Geoparsing fuer {len(new_articles_for_analysis)} neue Artikel...")
-                    geo_results = await geoparse_articles(new_articles_for_analysis)
+                    geo_results = await geoparse_articles(new_articles_for_analysis, incident_context)
                    geo_count = 0
                    for art_id, locations in geo_results.items():
                        for loc in locations:
                            await db.execute(
                                """INSERT INTO article_locations
                                   (article_id, incident_id, location_name, location_name_normalized,
-                                    country_code, latitude, longitude, confidence, source_text, tenant_id)
+                                    country_code, latitude, longitude, confidence, source_text, tenant_id, category)
-                                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                                (art_id, incident_id, loc["location_name"], loc["location_name_normalized"],
                                 loc["country_code"], loc["lat"], loc["lon"], loc["confidence"],
-                                 loc.get("source_text", ""), tenant_id),
+                                 loc.get("source_text", ""), tenant_id, loc.get("category", "mentioned")),
                            )
                            geo_count += 1
                    if geo_count > 0:
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -351,6 +351,15 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
        from agents.geoparsing import geoparse_articles
        db = await get_db()
        # Incident-Kontext fuer Haiku laden
        cursor = await db.execute(
            "SELECT title, description FROM incidents WHERE id = ?", (incident_id,)
        )
        inc_row = await cursor.fetchone()
        incident_context = ""
        if inc_row:
            incident_context = f"{inc_row['title']} - {inc_row['description'] or ''}"
        cursor = await db.execute(
            """SELECT a.* FROM articles a
               WHERE a.incident_id = ?
@@ -373,7 +382,7 @@ async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
        processed = 0
        for i in range(0, total, batch_size):
            batch = articles[i:i + batch_size]
-            geo_results = await geoparse_articles(batch)
+            geo_results = await geoparse_articles(batch, incident_context)
            for art_id, locations in geo_results.items():
                for loc in locations:
                    await db.execute(
--- a/src/routers/public_api.py
+++ b/src/routers/public_api.py
@@ -108,6 +108,23 @@ async def get_lagebild(db=Depends(db_dependency)):
    except (json.JSONDecodeError, TypeError):
        sources_json = []
    # Locations aggregiert nach normalisierten Ortsnamen
    cursor = await db.execute(
        f"""SELECT
                al.location_name_normalized as name,
                al.latitude as lat,
                al.longitude as lon,
                al.country_code,
                al.category,
                COUNT(*) as article_count,
                MAX(al.confidence) as confidence
           FROM article_locations al
           WHERE al.incident_id IN ({ids})
           GROUP BY al.location_name_normalized
           ORDER BY article_count DESC"""
    )
    locations = [dict(r) for r in await cursor.fetchall()]
    return {
        "generated_at": datetime.now(TIMEZONE).isoformat(),
        "incident": {
@@ -130,6 +147,7 @@ async def get_lagebild(db=Depends(db_dependency)):
        "articles": articles,
        "fact_checks": fact_checks,
        "available_snapshots": available_snapshots,
        "locations": locations,
    }
--- a/src/static/js/components.js
+++ b/src/static/js/components.js
@@ -642,6 +642,7 @@ const UI = {
        this._markerIcons = {
            target: this._createSvgIcon('#dc3545', '#a71d2a'),
            retaliation: this._createSvgIcon('#f39c12', '#c47d0a'),
            response: this._createSvgIcon('#f39c12', '#c47d0a'),
            actor: this._createSvgIcon('#2a81cb', '#1a5c8f'),
            mentioned: this._createSvgIcon('#7b7b7b', '#555555'),
        };
@@ -650,12 +651,14 @@ const UI = {
    _categoryLabels: {
        target: 'Angegriffene Ziele',
        retaliation: 'Vergeltung / Eskalation',
        response: 'Reaktion / Gegenmassnahmen',
        actor: 'Strategische Akteure',
        mentioned: 'Erwaehnt',
    },
    _categoryColors: {
        target: '#cb2b3e',
        retaliation: '#f39c12',
        response: '#f39c12',
        actor: '#2a81cb',
        mentioned: '#7b7b7b',
    },
@@ -799,7 +802,7 @@ const UI = {
            legend.onAdd = function() {
                const div = L.DomUtil.create('div', 'map-legend-ctrl');
                let html = '<strong style="display:block;margin-bottom:6px;">Legende</strong>';
-                ['target', 'retaliation', 'actor', 'mentioned'].forEach(cat => {
+                ['target', 'retaliation', 'response', 'actor', 'mentioned'].forEach(cat => {
                    if (usedCategories.has(cat)) {
                        html += `<div style="display:flex;align-items:center;gap:6px;margin:3px 0;"><span style="width:10px;height:10px;border-radius:50%;background:${self2._categoryColors[cat]};flex-shrink:0;"></span><span>${self2._categoryLabels[cat]}</span></div>`;
                    }