Geoparsing: Laender vor Staedten pruefen, Alias-Tabelle

Behebt falsche Geocodierung bei Laendernamen die auch als Staedte
existieren (Lebanon->US statt Libanon, Jordan->HK statt Jordanien).

- Laender-Aliase (50+ deutsch/englisch) werden zuerst geprueft
- geonamescache Laendersuche vor Staedtesuche
- Stadtsuche in eigene _geocode_city() Funktion extrahiert
- Bestehende falsche Marker in DB korrigiert

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Dieser Commit ist enthalten in:
claude-dev
2026-03-08 23:39:45 +01:00
Ursprung e761c86a28
Commit 606c60a815

Datei anzeigen

@@ -1,318 +1,409 @@
"""Geoparsing-Modul: Haiku-basierte Ortsextraktion und Geocoding fuer Artikel.""" """Geoparsing-Modul: Haiku-basierte Ortsextraktion und Geocoding fuer Artikel."""
import asyncio import asyncio
import json import json
import logging import logging
import re import re
from typing import Optional from typing import Optional
from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator from agents.claude_client import call_claude, ClaudeUsage, UsageAccumulator
from config import CLAUDE_MODEL_FAST from config import CLAUDE_MODEL_FAST
logger = logging.getLogger("osint.geoparsing") logger = logging.getLogger("osint.geoparsing")
# Geocoding-Cache (in-memory, lebt solange der Prozess laeuft) # Geocoding-Cache (in-memory, lebt solange der Prozess laeuft)
_geocode_cache: dict[str, Optional[dict]] = {} _geocode_cache: dict[str, Optional[dict]] = {}
# geonamescache-Instanz (lazy) # geonamescache-Instanz (lazy)
_gc = None _gc = None
def _get_geonamescache(): def _get_geonamescache():
"""Laedt geonamescache lazy.""" """Laedt geonamescache lazy."""
global _gc global _gc
if _gc is None: if _gc is None:
try: try:
import geonamescache import geonamescache
_gc = geonamescache.GeonamesCache() _gc = geonamescache.GeonamesCache()
logger.info("geonamescache geladen") logger.info("geonamescache geladen")
except ImportError: except ImportError:
logger.error("geonamescache nicht installiert - pip install geonamescache") logger.error("geonamescache nicht installiert - pip install geonamescache")
return None return None
return _gc return _gc
def _geocode_offline(name: str, country_code: str = "") -> Optional[dict]: # Bekannte Laendernamen (deutsch/englisch/alternativ -> ISO-2 Code + Hauptstadt-Koordinaten)
"""Geocoding ueber geonamescache (offline). _COUNTRY_ALIASES = {
"libanon": {"code": "LB", "name": "Lebanon", "lat": 33.8938, "lon": 35.5018},
Args: "lebanon": {"code": "LB", "name": "Lebanon", "lat": 33.8938, "lon": 35.5018},
name: Ortsname (normalisiert von Haiku) "jordan": {"code": "JO", "name": "Jordan", "lat": 31.9454, "lon": 35.9284},
country_code: ISO-2 Laendercode (von Haiku) fuer bessere Disambiguierung "jordanien": {"code": "JO", "name": "Jordan", "lat": 31.9454, "lon": 35.9284},
""" "iran": {"code": "IR", "name": "Iran", "lat": 35.6892, "lon": 51.3890},
gc = _get_geonamescache() "irak": {"code": "IQ", "name": "Iraq", "lat": 33.3152, "lon": 44.3661},
if gc is None: "iraq": {"code": "IQ", "name": "Iraq", "lat": 33.3152, "lon": 44.3661},
return None "israel": {"code": "IL", "name": "Israel", "lat": 31.7683, "lon": 35.2137},
"syrien": {"code": "SY", "name": "Syria", "lat": 33.5138, "lon": 36.2765},
name_lower = name.lower().strip() "syria": {"code": "SY", "name": "Syria", "lat": 33.5138, "lon": 36.2765},
"tuerkei": {"code": "TR", "name": "Turkey", "lat": 39.9334, "lon": 32.8597},
# 1. Stadtsuche "turkey": {"code": "TR", "name": "Turkey", "lat": 39.9334, "lon": 32.8597},
cities = gc.get_cities() "kuwait": {"code": "KW", "name": "Kuwait", "lat": 29.3759, "lon": 47.9774},
matches = [] "bahrain": {"code": "BH", "name": "Bahrain", "lat": 26.0667, "lon": 50.5577},
for gid, city in cities.items(): "katar": {"code": "QA", "name": "Qatar", "lat": 25.2854, "lon": 51.5310},
city_name = city.get("name", "") "qatar": {"code": "QA", "name": "Qatar", "lat": 25.2854, "lon": 51.5310},
alt_names = city.get("alternatenames", "") "jemen": {"code": "YE", "name": "Yemen", "lat": 15.3694, "lon": 44.1910},
if isinstance(alt_names, list): "yemen": {"code": "YE", "name": "Yemen", "lat": 15.3694, "lon": 44.1910},
alt_list = [n.strip().lower() for n in alt_names if n.strip()] "oman": {"code": "OM", "name": "Oman", "lat": 23.5880, "lon": 58.3829},
else: "pakistan": {"code": "PK", "name": "Pakistan", "lat": 33.6844, "lon": 73.0479},
alt_list = [n.strip().lower() for n in str(alt_names).split(",") if n.strip()] "afghanistan": {"code": "AF", "name": "Afghanistan", "lat": 34.5553, "lon": 69.2075},
if city_name.lower() == name_lower or name_lower in alt_list: "aegypten": {"code": "EG", "name": "Egypt", "lat": 30.0444, "lon": 31.2357},
matches.append(city) "egypt": {"code": "EG", "name": "Egypt", "lat": 30.0444, "lon": 31.2357},
"saudi-arabien": {"code": "SA", "name": "Saudi Arabia", "lat": 24.7136, "lon": 46.6753},
if matches: "saudi arabia": {"code": "SA", "name": "Saudi Arabia", "lat": 24.7136, "lon": 46.6753},
# Disambiguierung: country_code bevorzugen, dann Population "deutschland": {"code": "DE", "name": "Germany", "lat": 52.5200, "lon": 13.4050},
if country_code: "germany": {"code": "DE", "name": "Germany", "lat": 52.5200, "lon": 13.4050},
cc_matches = [c for c in matches if c.get("countrycode", "").upper() == (country_code or "").upper()] "frankreich": {"code": "FR", "name": "France", "lat": 48.8566, "lon": 2.3522},
if cc_matches: "france": {"code": "FR", "name": "France", "lat": 48.8566, "lon": 2.3522},
matches = cc_matches "russland": {"code": "RU", "name": "Russia", "lat": 55.7558, "lon": 37.6173},
best = max(matches, key=lambda c: c.get("population", 0)) "russia": {"code": "RU", "name": "Russia", "lat": 55.7558, "lon": 37.6173},
return { "china": {"code": "CN", "name": "China", "lat": 39.9042, "lon": 116.4074},
"lat": float(best["latitude"]), "indien": {"code": "IN", "name": "India", "lat": 28.6139, "lon": 77.2090},
"lon": float(best["longitude"]), "india": {"code": "IN", "name": "India", "lat": 28.6139, "lon": 77.2090},
"country_code": best.get("countrycode", ""), "usa": {"code": "US", "name": "United States", "lat": 38.9072, "lon": -77.0369},
"normalized_name": best["name"], "vereinigte staaten": {"code": "US", "name": "United States", "lat": 38.9072, "lon": -77.0369},
"confidence": min(1.0, 0.6 + (best.get("population", 0) / 10_000_000)), "united states": {"code": "US", "name": "United States", "lat": 38.9072, "lon": -77.0369},
} "grossbritannien": {"code": "GB", "name": "United Kingdom", "lat": 51.5074, "lon": -0.1278},
"united kingdom": {"code": "GB", "name": "United Kingdom", "lat": 51.5074, "lon": -0.1278},
# 2. Laendersuche "schweiz": {"code": "CH", "name": "Switzerland", "lat": 46.9480, "lon": 7.4474},
countries = gc.get_countries() "switzerland": {"code": "CH", "name": "Switzerland", "lat": 46.9480, "lon": 7.4474},
for code, country in countries.items(): "spanien": {"code": "ES", "name": "Spain", "lat": 40.4168, "lon": -3.7038},
if country.get("name", "").lower() == name_lower: "spain": {"code": "ES", "name": "Spain", "lat": 40.4168, "lon": -3.7038},
capital = country.get("capital", "") "italien": {"code": "IT", "name": "Italy", "lat": 41.9028, "lon": 12.4964},
if capital: "italy": {"code": "IT", "name": "Italy", "lat": 41.9028, "lon": 12.4964},
cap_result = _geocode_offline(capital) "zypern": {"code": "CY", "name": "Cyprus", "lat": 35.1856, "lon": 33.3823},
if cap_result: "cyprus": {"code": "CY", "name": "Cyprus", "lat": 35.1856, "lon": 33.3823},
cap_result["normalized_name"] = country["name"] "aserbaidschan": {"code": "AZ", "name": "Azerbaijan", "lat": 40.4093, "lon": 49.8671},
cap_result["confidence"] = 0.5 "azerbaijan": {"code": "AZ", "name": "Azerbaijan", "lat": 40.4093, "lon": 49.8671},
return cap_result "griechenland": {"code": "GR", "name": "Greece", "lat": 37.9838, "lon": 23.7275},
"greece": {"code": "GR", "name": "Greece", "lat": 37.9838, "lon": 23.7275},
return None "niederlande": {"code": "NL", "name": "Netherlands", "lat": 52.3676, "lon": 4.9041},
"netherlands": {"code": "NL", "name": "Netherlands", "lat": 52.3676, "lon": 4.9041},
"ukraine": {"code": "UA", "name": "Ukraine", "lat": 50.4501, "lon": 30.5234},
def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[dict] = None) -> Optional[dict]: }
"""Geocoded einen Ortsnamen. Prioritaet: geonamescache > Haiku-Koordinaten.
Args: def _geocode_offline(name: str, country_code: str = "") -> Optional[dict]:
name: Ortsname """Geocoding ueber geonamescache (offline).
country_code: ISO-2 Code (von Haiku)
haiku_coords: {"lat": float, "lon": float} (Fallback von Haiku) Reihenfolge: 1. Bekannte Laender-Aliase, 2. geonamescache-Laender, 3. Staedte.
""" Laender werden IMMER vor Staedten geprueft um Verwechslungen zu vermeiden
cache_key = f"{name.lower().strip()}|{(country_code or '').upper()}" (z.B. Lebanon/US vs Libanon, Jordan/HK vs Jordanien).
if cache_key in _geocode_cache: """
return _geocode_cache[cache_key] gc = _get_geonamescache()
if gc is None:
result = _geocode_offline(name, country_code) return None
# Fallback: Haiku-Koordinaten nutzen name_lower = name.lower().strip()
if result is None and haiku_coords:
lat = haiku_coords.get("lat") # 1. Bekannte Laender-Aliase (schnellster + sicherster Pfad)
lon = haiku_coords.get("lon") alias = _COUNTRY_ALIASES.get(name_lower)
if lat is not None and lon is not None: if alias:
result = { return {
"lat": float(lat), "lat": alias["lat"],
"lon": float(lon), "lon": alias["lon"],
"country_code": country_code.upper() if country_code else "", "country_code": alias["code"],
"normalized_name": name, "normalized_name": alias["name"],
"confidence": 0.45, "confidence": 0.95,
} }
_geocode_cache[cache_key] = result # 2. geonamescache Laendersuche (vor Staedten!)
return result countries = gc.get_countries()
for code, country in countries.items():
if country.get("name", "").lower() == name_lower:
HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines. capital = country.get("capital", "")
if capital:
Kontext der Lage: "{incident_context}" # Hauptstadt geocoden, aber als Land benennen
cap_alias = _COUNTRY_ALIASES.get(capital.lower())
Regeln: if cap_alias:
- Nur echte Orte (Staedte, Laender, Regionen) return {
- Keine Personen, Organisationen, Gebaeude, Alltagswoerter "lat": cap_alias["lat"],
- Bei "US-Militaer" etc: Land (USA) extrahieren, nicht das Kompositum "lon": cap_alias["lon"],
- HTML-Tags ignorieren "country_code": code,
- Jeder Ort nur einmal pro Headline "normalized_name": country["name"],
- Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte) "confidence": 0.9,
}
Klassifiziere basierend auf dem Lage-Kontext: # Rekursiv die Hauptstadt suchen (nur Staedte-Pfad)
- "target": Wo das Ereignis passiert / Schaden entsteht cap_result = _geocode_city(capital, code)
- "response": Wo Reaktionen / Gegenmassnahmen stattfinden if cap_result:
- "actor": Wo Entscheidungen getroffen werden / Entscheider sitzen cap_result["normalized_name"] = country["name"]
- "mentioned": Nur erwaehnt, kein direkter Bezug cap_result["confidence"] = 0.9
return cap_result
Headlines:
{headlines} # 3. Stadtsuche (nur wenn kein Land gefunden)
return _geocode_city(name, country_code)
Antwort NUR als JSON-Array, kein anderer Text:
[{{"headline_idx": 0, "locations": [
{{"name": "Teheran", "normalized": "Tehran", "country_code": "IR", def _geocode_city(name: str, country_code: str = "") -> Optional[dict]:
"type": "city", "category": "target", """Sucht einen Stadtnamen in geonamescache."""
"lat": 35.69, "lon": 51.42}} gc = _get_geonamescache()
]}}]""" if gc is None:
return None
async def _extract_locations_haiku( name_lower = name.lower().strip()
headlines: list[dict], incident_context: str cities = gc.get_cities()
) -> dict[int, list[dict]]: matches = []
"""Extrahiert Orte aus Headlines via Haiku. for gid, city in cities.items():
city_name = city.get("name", "")
Args: alt_names = city.get("alternatenames", "")
headlines: [{"idx": article_id, "text": headline_text}, ...] if isinstance(alt_names, list):
incident_context: Lage-Kontext fuer Klassifizierung alt_list = [n.strip().lower() for n in alt_names if n.strip()]
else:
Returns: alt_list = [n.strip().lower() for n in str(alt_names).split(",") if n.strip()]
dict[article_id -> list[{name, normalized, country_code, type, category, lat, lon}]] if city_name.lower() == name_lower or name_lower in alt_list:
""" matches.append(city)
if not headlines:
return {} if not matches:
return None
# Headlines formatieren
headline_lines = [] # Disambiguierung: country_code bevorzugen, dann Population
for i, h in enumerate(headlines): if country_code:
headline_lines.append(f"[{i}] {h['text']}") cc_matches = [c for c in matches if c.get("countrycode", "").upper() == (country_code or "").upper()]
if cc_matches:
prompt = HAIKU_GEOPARSE_PROMPT.format( matches = cc_matches
incident_context=incident_context or "Allgemeine Nachrichtenlage", best = max(matches, key=lambda c: c.get("population", 0))
headlines="\n".join(headline_lines), return {
) "lat": float(best["latitude"]),
"lon": float(best["longitude"]),
try: "country_code": best.get("countrycode", ""),
result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST) "normalized_name": best["name"],
except Exception as e: "confidence": min(1.0, 0.6 + (best.get("population", 0) / 10_000_000)),
logger.error(f"Haiku-Geoparsing fehlgeschlagen: {e}") }
return {}
# JSON parsen (mit Regex-Fallback) def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[dict] = None) -> Optional[dict]:
parsed = None """Geocoded einen Ortsnamen. Prioritaet: geonamescache > Haiku-Koordinaten.
try:
parsed = json.loads(result_text) Args:
except json.JSONDecodeError: name: Ortsname
match = re.search(r'\[.*\]', result_text, re.DOTALL) country_code: ISO-2 Code (von Haiku)
if match: haiku_coords: {"lat": float, "lon": float} (Fallback von Haiku)
try: """
parsed = json.loads(match.group()) cache_key = f"{name.lower().strip()}|{(country_code or '').upper()}"
except json.JSONDecodeError: if cache_key in _geocode_cache:
logger.warning("Haiku-Geoparsing: JSON-Parse fehlgeschlagen auch mit Regex-Fallback") return _geocode_cache[cache_key]
return {}
result = _geocode_offline(name, country_code)
if not parsed or not isinstance(parsed, list):
logger.warning("Haiku-Geoparsing: Kein gueltiges JSON-Array erhalten") # Fallback: Haiku-Koordinaten nutzen
return {} if result is None and haiku_coords:
lat = haiku_coords.get("lat")
# Ergebnisse den Artikeln zuordnen lon = haiku_coords.get("lon")
results = {} if lat is not None and lon is not None:
for entry in parsed: result = {
if not isinstance(entry, dict): "lat": float(lat),
continue "lon": float(lon),
headline_idx = entry.get("headline_idx") "country_code": country_code.upper() if country_code else "",
if headline_idx is None or headline_idx >= len(headlines): "normalized_name": name,
continue "confidence": 0.45,
}
article_id = headlines[headline_idx]["idx"]
locations = entry.get("locations", []) _geocode_cache[cache_key] = result
return result
if not locations:
continue
HAIKU_GEOPARSE_PROMPT = """Extrahiere alle geographischen Orte aus diesen Nachrichten-Headlines.
article_locs = []
for loc in locations: Kontext der Lage: "{incident_context}"
if not isinstance(loc, dict):
continue Regeln:
loc_type = loc.get("type", "city") - Nur echte Orte (Staedte, Laender, Regionen)
# Regionen nicht speichern (kein sinnvoller Punkt auf der Karte) - Keine Personen, Organisationen, Gebaeude, Alltagswoerter
if loc_type == "region": - Bei "US-Militaer" etc: Land (USA) extrahieren, nicht das Kompositum
continue - HTML-Tags ignorieren
- Jeder Ort nur einmal pro Headline
name = loc.get("name", "") - Regionen wie "Middle East", "Gulf", "Naher Osten" NICHT extrahieren (kein einzelner Punkt auf der Karte)
if not name:
continue Klassifiziere basierend auf dem Lage-Kontext:
- "target": Wo das Ereignis passiert / Schaden entsteht
article_locs.append({ - "response": Wo Reaktionen / Gegenmassnahmen stattfinden
"name": name, - "actor": Wo Entscheidungen getroffen werden / Entscheider sitzen
"normalized": loc.get("normalized", name), - "mentioned": Nur erwaehnt, kein direkter Bezug
"country_code": loc.get("country_code", ""),
"type": loc_type, Headlines:
"category": loc.get("category", "mentioned"), {headlines}
"lat": loc.get("lat"),
"lon": loc.get("lon"), Antwort NUR als JSON-Array, kein anderer Text:
}) [{{"headline_idx": 0, "locations": [
{{"name": "Teheran", "normalized": "Tehran", "country_code": "IR",
if article_locs: "type": "city", "category": "target",
results[article_id] = article_locs "lat": 35.69, "lon": 51.42}}
]}}]"""
return results
async def _extract_locations_haiku(
async def geoparse_articles( headlines: list[dict], incident_context: str
articles: list[dict], ) -> dict[int, list[dict]]:
incident_context: str = "", """Extrahiert Orte aus Headlines via Haiku.
) -> dict[int, list[dict]]:
"""Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache. Args:
headlines: [{"idx": article_id, "text": headline_text}, ...]
Args: incident_context: Lage-Kontext fuer Klassifizierung
articles: Liste von Artikel-Dicts (mit id, headline, headline_de, language)
incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung Returns:
dict[article_id -> list[{name, normalized, country_code, type, category, lat, lon}]]
Returns: """
dict[article_id -> list[{location_name, location_name_normalized, country_code, if not headlines:
lat, lon, confidence, source_text, category}]] return {}
"""
if not articles: # Headlines formatieren
return {} headline_lines = []
for i, h in enumerate(headlines):
# Headlines sammeln headline_lines.append(f"[{i}] {h['text']}")
headlines = []
for article in articles: prompt = HAIKU_GEOPARSE_PROMPT.format(
article_id = article.get("id") incident_context=incident_context or "Allgemeine Nachrichtenlage",
if not article_id: headlines="\n".join(headline_lines),
continue )
# Deutsche Headline bevorzugen try:
headline = article.get("headline_de") or article.get("headline") or "" result_text, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST)
headline = headline.strip() except Exception as e:
if not headline: logger.error(f"Haiku-Geoparsing fehlgeschlagen: {e}")
continue return {}
headlines.append({"idx": article_id, "text": headline}) # JSON parsen (mit Regex-Fallback)
parsed = None
if not headlines: try:
return {} parsed = json.loads(result_text)
except json.JSONDecodeError:
# Batches bilden (max 50 Headlines pro Haiku-Call) match = re.search(r'\[.*\]', result_text, re.DOTALL)
batch_size = 50 if match:
all_haiku_results = {} try:
for i in range(0, len(headlines), batch_size): parsed = json.loads(match.group())
batch = headlines[i:i + batch_size] except json.JSONDecodeError:
batch_results = await _extract_locations_haiku(batch, incident_context) logger.warning("Haiku-Geoparsing: JSON-Parse fehlgeschlagen auch mit Regex-Fallback")
all_haiku_results.update(batch_results) return {}
if not all_haiku_results: if not parsed or not isinstance(parsed, list):
return {} logger.warning("Haiku-Geoparsing: Kein gueltiges JSON-Array erhalten")
return {}
# Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback)
result = {} # Ergebnisse den Artikeln zuordnen
for article_id, haiku_locs in all_haiku_results.items(): results = {}
locations = [] for entry in parsed:
for loc in haiku_locs: if not isinstance(entry, dict):
haiku_coords = None continue
if loc.get("lat") is not None and loc.get("lon") is not None: headline_idx = entry.get("headline_idx")
haiku_coords = {"lat": loc["lat"], "lon": loc["lon"]} if headline_idx is None or headline_idx >= len(headlines):
continue
geo = _geocode_location(
loc["normalized"], article_id = headlines[headline_idx]["idx"]
loc.get("country_code", ""), locations = entry.get("locations", [])
haiku_coords,
) if not locations:
continue
if geo:
locations.append({ article_locs = []
"location_name": loc["name"], for loc in locations:
"location_name_normalized": geo["normalized_name"], if not isinstance(loc, dict):
"country_code": geo["country_code"], continue
"lat": geo["lat"], loc_type = loc.get("type", "city")
"lon": geo["lon"], # Regionen nicht speichern (kein sinnvoller Punkt auf der Karte)
"confidence": geo["confidence"], if loc_type == "region":
"source_text": loc["name"], continue
"category": loc.get("category", "mentioned"),
}) name = loc.get("name", "")
if not name:
if locations: continue
result[article_id] = locations
article_locs.append({
return result "name": name,
"normalized": loc.get("normalized", name),
"country_code": loc.get("country_code", ""),
"type": loc_type,
"category": loc.get("category", "mentioned"),
"lat": loc.get("lat"),
"lon": loc.get("lon"),
})
if article_locs:
results[article_id] = article_locs
return results
async def geoparse_articles(
articles: list[dict],
incident_context: str = "",
) -> dict[int, list[dict]]:
"""Geoparsing fuer eine Liste von Artikeln via Haiku + geonamescache.
Args:
articles: Liste von Artikel-Dicts (mit id, headline, headline_de, language)
incident_context: Lage-Kontext (Titel + Beschreibung) fuer kontextbewusste Klassifizierung
Returns:
dict[article_id -> list[{location_name, location_name_normalized, country_code,
lat, lon, confidence, source_text, category}]]
"""
if not articles:
return {}
# Headlines sammeln
headlines = []
for article in articles:
article_id = article.get("id")
if not article_id:
continue
# Deutsche Headline bevorzugen
headline = article.get("headline_de") or article.get("headline") or ""
headline = headline.strip()
if not headline:
continue
headlines.append({"idx": article_id, "text": headline})
if not headlines:
return {}
# Batches bilden (max 50 Headlines pro Haiku-Call)
batch_size = 50
all_haiku_results = {}
for i in range(0, len(headlines), batch_size):
batch = headlines[i:i + batch_size]
batch_results = await _extract_locations_haiku(batch, incident_context)
all_haiku_results.update(batch_results)
if not all_haiku_results:
return {}
# Geocoding via geonamescache (mit Haiku-Koordinaten als Fallback)
result = {}
for article_id, haiku_locs in all_haiku_results.items():
locations = []
for loc in haiku_locs:
haiku_coords = None
if loc.get("lat") is not None and loc.get("lon") is not None:
haiku_coords = {"lat": loc["lat"], "lon": loc["lon"]}
geo = _geocode_location(
loc["normalized"],
loc.get("country_code", ""),
haiku_coords,
)
if geo:
locations.append({
"location_name": loc["name"],
"location_name_normalized": geo["normalized_name"],
"country_code": geo["country_code"],
"lat": geo["lat"],
"lon": geo["lon"],
"confidence": geo["confidence"],
"source_text": loc["name"],
"category": loc.get("category", "mentioned"),
})
if locations:
result[article_id] = locations
return result