Geoparsing: Laender vor Staedten pruefen, Alias-Tabelle
Behebt falsche Geocodierung bei Laendernamen die auch als Staedte existieren (Lebanon->US statt Libanon, Jordan->HK statt Jordanien). - Laender-Aliase (50+ deutsch/englisch) werden zuerst geprueft - geonamescache Laendersuche vor Staedtesuche - Stadtsuche in eigene _geocode_city() Funktion extrahiert - Bestehende falsche Marker in DB korrigiert Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Dieser Commit ist enthalten in:
@@ -31,12 +31,71 @@ def _get_geonamescache():
|
||||
return _gc
|
||||
|
||||
|
||||
# Bekannte Laendernamen (deutsch/englisch/alternativ -> ISO-2 Code + Hauptstadt-Koordinaten)
|
||||
_COUNTRY_ALIASES = {
|
||||
"libanon": {"code": "LB", "name": "Lebanon", "lat": 33.8938, "lon": 35.5018},
|
||||
"lebanon": {"code": "LB", "name": "Lebanon", "lat": 33.8938, "lon": 35.5018},
|
||||
"jordan": {"code": "JO", "name": "Jordan", "lat": 31.9454, "lon": 35.9284},
|
||||
"jordanien": {"code": "JO", "name": "Jordan", "lat": 31.9454, "lon": 35.9284},
|
||||
"iran": {"code": "IR", "name": "Iran", "lat": 35.6892, "lon": 51.3890},
|
||||
"irak": {"code": "IQ", "name": "Iraq", "lat": 33.3152, "lon": 44.3661},
|
||||
"iraq": {"code": "IQ", "name": "Iraq", "lat": 33.3152, "lon": 44.3661},
|
||||
"israel": {"code": "IL", "name": "Israel", "lat": 31.7683, "lon": 35.2137},
|
||||
"syrien": {"code": "SY", "name": "Syria", "lat": 33.5138, "lon": 36.2765},
|
||||
"syria": {"code": "SY", "name": "Syria", "lat": 33.5138, "lon": 36.2765},
|
||||
"tuerkei": {"code": "TR", "name": "Turkey", "lat": 39.9334, "lon": 32.8597},
|
||||
"turkey": {"code": "TR", "name": "Turkey", "lat": 39.9334, "lon": 32.8597},
|
||||
"kuwait": {"code": "KW", "name": "Kuwait", "lat": 29.3759, "lon": 47.9774},
|
||||
"bahrain": {"code": "BH", "name": "Bahrain", "lat": 26.0667, "lon": 50.5577},
|
||||
"katar": {"code": "QA", "name": "Qatar", "lat": 25.2854, "lon": 51.5310},
|
||||
"qatar": {"code": "QA", "name": "Qatar", "lat": 25.2854, "lon": 51.5310},
|
||||
"jemen": {"code": "YE", "name": "Yemen", "lat": 15.3694, "lon": 44.1910},
|
||||
"yemen": {"code": "YE", "name": "Yemen", "lat": 15.3694, "lon": 44.1910},
|
||||
"oman": {"code": "OM", "name": "Oman", "lat": 23.5880, "lon": 58.3829},
|
||||
"pakistan": {"code": "PK", "name": "Pakistan", "lat": 33.6844, "lon": 73.0479},
|
||||
"afghanistan": {"code": "AF", "name": "Afghanistan", "lat": 34.5553, "lon": 69.2075},
|
||||
"aegypten": {"code": "EG", "name": "Egypt", "lat": 30.0444, "lon": 31.2357},
|
||||
"egypt": {"code": "EG", "name": "Egypt", "lat": 30.0444, "lon": 31.2357},
|
||||
"saudi-arabien": {"code": "SA", "name": "Saudi Arabia", "lat": 24.7136, "lon": 46.6753},
|
||||
"saudi arabia": {"code": "SA", "name": "Saudi Arabia", "lat": 24.7136, "lon": 46.6753},
|
||||
"deutschland": {"code": "DE", "name": "Germany", "lat": 52.5200, "lon": 13.4050},
|
||||
"germany": {"code": "DE", "name": "Germany", "lat": 52.5200, "lon": 13.4050},
|
||||
"frankreich": {"code": "FR", "name": "France", "lat": 48.8566, "lon": 2.3522},
|
||||
"france": {"code": "FR", "name": "France", "lat": 48.8566, "lon": 2.3522},
|
||||
"russland": {"code": "RU", "name": "Russia", "lat": 55.7558, "lon": 37.6173},
|
||||
"russia": {"code": "RU", "name": "Russia", "lat": 55.7558, "lon": 37.6173},
|
||||
"china": {"code": "CN", "name": "China", "lat": 39.9042, "lon": 116.4074},
|
||||
"indien": {"code": "IN", "name": "India", "lat": 28.6139, "lon": 77.2090},
|
||||
"india": {"code": "IN", "name": "India", "lat": 28.6139, "lon": 77.2090},
|
||||
"usa": {"code": "US", "name": "United States", "lat": 38.9072, "lon": -77.0369},
|
||||
"vereinigte staaten": {"code": "US", "name": "United States", "lat": 38.9072, "lon": -77.0369},
|
||||
"united states": {"code": "US", "name": "United States", "lat": 38.9072, "lon": -77.0369},
|
||||
"grossbritannien": {"code": "GB", "name": "United Kingdom", "lat": 51.5074, "lon": -0.1278},
|
||||
"united kingdom": {"code": "GB", "name": "United Kingdom", "lat": 51.5074, "lon": -0.1278},
|
||||
"schweiz": {"code": "CH", "name": "Switzerland", "lat": 46.9480, "lon": 7.4474},
|
||||
"switzerland": {"code": "CH", "name": "Switzerland", "lat": 46.9480, "lon": 7.4474},
|
||||
"spanien": {"code": "ES", "name": "Spain", "lat": 40.4168, "lon": -3.7038},
|
||||
"spain": {"code": "ES", "name": "Spain", "lat": 40.4168, "lon": -3.7038},
|
||||
"italien": {"code": "IT", "name": "Italy", "lat": 41.9028, "lon": 12.4964},
|
||||
"italy": {"code": "IT", "name": "Italy", "lat": 41.9028, "lon": 12.4964},
|
||||
"zypern": {"code": "CY", "name": "Cyprus", "lat": 35.1856, "lon": 33.3823},
|
||||
"cyprus": {"code": "CY", "name": "Cyprus", "lat": 35.1856, "lon": 33.3823},
|
||||
"aserbaidschan": {"code": "AZ", "name": "Azerbaijan", "lat": 40.4093, "lon": 49.8671},
|
||||
"azerbaijan": {"code": "AZ", "name": "Azerbaijan", "lat": 40.4093, "lon": 49.8671},
|
||||
"griechenland": {"code": "GR", "name": "Greece", "lat": 37.9838, "lon": 23.7275},
|
||||
"greece": {"code": "GR", "name": "Greece", "lat": 37.9838, "lon": 23.7275},
|
||||
"niederlande": {"code": "NL", "name": "Netherlands", "lat": 52.3676, "lon": 4.9041},
|
||||
"netherlands": {"code": "NL", "name": "Netherlands", "lat": 52.3676, "lon": 4.9041},
|
||||
"ukraine": {"code": "UA", "name": "Ukraine", "lat": 50.4501, "lon": 30.5234},
|
||||
}
|
||||
|
||||
|
||||
def _geocode_offline(name: str, country_code: str = "") -> Optional[dict]:
|
||||
"""Geocoding ueber geonamescache (offline).
|
||||
|
||||
Args:
|
||||
name: Ortsname (normalisiert von Haiku)
|
||||
country_code: ISO-2 Laendercode (von Haiku) fuer bessere Disambiguierung
|
||||
Reihenfolge: 1. Bekannte Laender-Aliase, 2. geonamescache-Laender, 3. Staedte.
|
||||
Laender werden IMMER vor Staedten geprueft um Verwechslungen zu vermeiden
|
||||
(z.B. Lebanon/US vs Libanon, Jordan/HK vs Jordanien).
|
||||
"""
|
||||
gc = _get_geonamescache()
|
||||
if gc is None:
|
||||
@@ -44,7 +103,51 @@ def _geocode_offline(name: str, country_code: str = "") -> Optional[dict]:
|
||||
|
||||
name_lower = name.lower().strip()
|
||||
|
||||
# 1. Stadtsuche
|
||||
# 1. Bekannte Laender-Aliase (schnellster + sicherster Pfad)
|
||||
alias = _COUNTRY_ALIASES.get(name_lower)
|
||||
if alias:
|
||||
return {
|
||||
"lat": alias["lat"],
|
||||
"lon": alias["lon"],
|
||||
"country_code": alias["code"],
|
||||
"normalized_name": alias["name"],
|
||||
"confidence": 0.95,
|
||||
}
|
||||
|
||||
# 2. geonamescache Laendersuche (vor Staedten!)
|
||||
countries = gc.get_countries()
|
||||
for code, country in countries.items():
|
||||
if country.get("name", "").lower() == name_lower:
|
||||
capital = country.get("capital", "")
|
||||
if capital:
|
||||
# Hauptstadt geocoden, aber als Land benennen
|
||||
cap_alias = _COUNTRY_ALIASES.get(capital.lower())
|
||||
if cap_alias:
|
||||
return {
|
||||
"lat": cap_alias["lat"],
|
||||
"lon": cap_alias["lon"],
|
||||
"country_code": code,
|
||||
"normalized_name": country["name"],
|
||||
"confidence": 0.9,
|
||||
}
|
||||
# Rekursiv die Hauptstadt suchen (nur Staedte-Pfad)
|
||||
cap_result = _geocode_city(capital, code)
|
||||
if cap_result:
|
||||
cap_result["normalized_name"] = country["name"]
|
||||
cap_result["confidence"] = 0.9
|
||||
return cap_result
|
||||
|
||||
# 3. Stadtsuche (nur wenn kein Land gefunden)
|
||||
return _geocode_city(name, country_code)
|
||||
|
||||
|
||||
def _geocode_city(name: str, country_code: str = "") -> Optional[dict]:
|
||||
"""Sucht einen Stadtnamen in geonamescache."""
|
||||
gc = _get_geonamescache()
|
||||
if gc is None:
|
||||
return None
|
||||
|
||||
name_lower = name.lower().strip()
|
||||
cities = gc.get_cities()
|
||||
matches = []
|
||||
for gid, city in cities.items():
|
||||
@@ -57,34 +160,22 @@ def _geocode_offline(name: str, country_code: str = "") -> Optional[dict]:
|
||||
if city_name.lower() == name_lower or name_lower in alt_list:
|
||||
matches.append(city)
|
||||
|
||||
if matches:
|
||||
# Disambiguierung: country_code bevorzugen, dann Population
|
||||
if country_code:
|
||||
cc_matches = [c for c in matches if c.get("countrycode", "").upper() == (country_code or "").upper()]
|
||||
if cc_matches:
|
||||
matches = cc_matches
|
||||
best = max(matches, key=lambda c: c.get("population", 0))
|
||||
return {
|
||||
"lat": float(best["latitude"]),
|
||||
"lon": float(best["longitude"]),
|
||||
"country_code": best.get("countrycode", ""),
|
||||
"normalized_name": best["name"],
|
||||
"confidence": min(1.0, 0.6 + (best.get("population", 0) / 10_000_000)),
|
||||
}
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# 2. Laendersuche
|
||||
countries = gc.get_countries()
|
||||
for code, country in countries.items():
|
||||
if country.get("name", "").lower() == name_lower:
|
||||
capital = country.get("capital", "")
|
||||
if capital:
|
||||
cap_result = _geocode_offline(capital)
|
||||
if cap_result:
|
||||
cap_result["normalized_name"] = country["name"]
|
||||
cap_result["confidence"] = 0.5
|
||||
return cap_result
|
||||
|
||||
return None
|
||||
# Disambiguierung: country_code bevorzugen, dann Population
|
||||
if country_code:
|
||||
cc_matches = [c for c in matches if c.get("countrycode", "").upper() == (country_code or "").upper()]
|
||||
if cc_matches:
|
||||
matches = cc_matches
|
||||
best = max(matches, key=lambda c: c.get("population", 0))
|
||||
return {
|
||||
"lat": float(best["latitude"]),
|
||||
"lon": float(best["longitude"]),
|
||||
"country_code": best.get("countrycode", ""),
|
||||
"normalized_name": best["name"],
|
||||
"confidence": min(1.0, 0.6 + (best.get("population", 0) / 10_000_000)),
|
||||
}
|
||||
|
||||
|
||||
def _geocode_location(name: str, country_code: str = "", haiku_coords: Optional[dict] = None) -> Optional[dict]:
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren