Geoparsing als Hintergrund-Task mit Fortschrittsanzeige

- Endpunkt startet async Background-Task statt synchron zu warten
- Batch-Verarbeitung (50 Artikel pro Batch)
- Neuer Status-Endpunkt GET /incidents/{id}/geoparse-status
- Frontend pollt alle 3s und zeigt Fortschritt im Button (z.B. "150/427 Artikel...")
- Kein Timeout-Problem mehr bei grossen Lagen

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Dieser Commit ist enthalten in:
claude-dev
2026-03-04 22:39:53 +01:00
Ursprung 6112cd5882
Commit 2f6dd97100
3 geänderte Dateien mit 135 neuen und 34 gelöschten Zeilen

Datei anzeigen

@@ -1,16 +1,20 @@
"""Incidents-Router: Lagen verwalten (Multi-Tenant).""" """Incidents-Router: Lagen verwalten (Multi-Tenant)."""
from fastapi import APIRouter, Depends, HTTPException, Query, status from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, status
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from models import IncidentCreate, IncidentUpdate, IncidentResponse, SubscriptionUpdate, SubscriptionResponse from models import IncidentCreate, IncidentUpdate, IncidentResponse, SubscriptionUpdate, SubscriptionResponse
from auth import get_current_user from auth import get_current_user
from middleware.license_check import require_writable_license from middleware.license_check import require_writable_license
from database import db_dependency from database import db_dependency, get_db
from datetime import datetime, timezone from datetime import datetime, timezone
import asyncio
import aiosqlite import aiosqlite
import json import json
import logging
import re import re
import unicodedata import unicodedata
_geoparse_logger = logging.getLogger("osint.geoparse_bg")
router = APIRouter(prefix="/api/incidents", tags=["incidents"]) router = APIRouter(prefix="/api/incidents", tags=["incidents"])
INCIDENT_UPDATE_COLUMNS = { INCIDENT_UPDATE_COLUMNS = {
@@ -320,17 +324,18 @@ async def get_locations(
return list(loc_map.values()) return list(loc_map.values())
@router.post("/{incident_id}/geoparse") # Geoparse-Status pro Incident (in-memory)
async def trigger_geoparse( _geoparse_status: dict[int, dict] = {}
incident_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency), async def _run_geoparse_background(incident_id: int, tenant_id: int | None):
): """Hintergrund-Task: Geoparsing fuer alle Artikel einer Lage."""
"""Geoparsing fuer alle Artikel einer Lage nachholen (bestehende Orte werden uebersprungen).""" _geoparse_status[incident_id] = {"status": "running", "processed": 0, "total": 0, "locations": 0}
tenant_id = current_user.get("tenant_id") db = None
await _check_incident_access(db, incident_id, current_user["id"], tenant_id) try:
from agents.geoparsing import geoparse_articles
db = await get_db()
# Artikel laden, die noch keine Locations haben
cursor = await db.execute( cursor = await db.execute(
"""SELECT a.* FROM articles a """SELECT a.* FROM articles a
WHERE a.incident_id = ? WHERE a.incident_id = ?
@@ -340,12 +345,20 @@ async def trigger_geoparse(
articles = [dict(row) for row in await cursor.fetchall()] articles = [dict(row) for row in await cursor.fetchall()]
if not articles: if not articles:
return {"message": "Alle Artikel wurden bereits geoparsed", "new_locations": 0} _geoparse_status[incident_id] = {"status": "done", "processed": 0, "total": 0, "locations": 0}
return
try: total = len(articles)
from agents.geoparsing import geoparse_articles _geoparse_status[incident_id]["total"] = total
geo_results = await geoparse_articles(articles) _geoparse_logger.info(f"Geoparsing Hintergrund: {total} Artikel fuer Lage {incident_id}")
# In Batches verarbeiten (50 Artikel pro Batch)
batch_size = 50
geo_count = 0 geo_count = 0
processed = 0
for i in range(0, total, batch_size):
batch = articles[i:i + batch_size]
geo_results = await geoparse_articles(batch)
for art_id, locations in geo_results.items(): for art_id, locations in geo_results.items():
for loc in locations: for loc in locations:
await db.execute( await db.execute(
@@ -359,9 +372,60 @@ async def trigger_geoparse(
) )
geo_count += 1 geo_count += 1
await db.commit() await db.commit()
return {"message": f"{geo_count} Orte aus {len(geo_results)} Artikeln extrahiert", "new_locations": geo_count} processed += len(batch)
_geoparse_status[incident_id] = {"status": "running", "processed": processed, "total": total, "locations": geo_count}
_geoparse_status[incident_id] = {"status": "done", "processed": processed, "total": total, "locations": geo_count}
_geoparse_logger.info(f"Geoparsing fertig: {geo_count} Orte aus {processed} Artikeln (Lage {incident_id})")
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Geoparsing fehlgeschlagen: {str(e)}") _geoparse_status[incident_id] = {"status": "error", "error": str(e)}
_geoparse_logger.error(f"Geoparsing Fehler (Lage {incident_id}): {e}")
finally:
if db:
await db.close()
@router.post("/{incident_id}/geoparse")
async def trigger_geoparse(
incident_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Geoparsing fuer alle Artikel einer Lage als Hintergrund-Task starten."""
tenant_id = current_user.get("tenant_id")
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
# Bereits laufend?
existing = _geoparse_status.get(incident_id, {})
if existing.get("status") == "running":
return {"status": "running", "message": f"Läuft bereits ({existing.get('processed', 0)}/{existing.get('total', 0)} Artikel)"}
# Pruefen ob es ueberhaupt ungeparsete Artikel gibt
cursor = await db.execute(
"""SELECT COUNT(*) as cnt FROM articles a
WHERE a.incident_id = ?
AND a.id NOT IN (SELECT DISTINCT article_id FROM article_locations WHERE incident_id = ?)""",
(incident_id, incident_id),
)
count = (await cursor.fetchone())["cnt"]
if count == 0:
return {"status": "done", "message": "Alle Artikel wurden bereits verarbeitet", "locations": 0}
# Hintergrund-Task starten
asyncio.create_task(_run_geoparse_background(incident_id, tenant_id))
return {"status": "started", "message": f"Geoparsing gestartet für {count} Artikel"}
@router.get("/{incident_id}/geoparse-status")
async def get_geoparse_status(
incident_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Status des laufenden Geoparsing-Tasks abfragen."""
tenant_id = current_user.get("tenant_id")
await _check_incident_access(db, incident_id, current_user["id"], tenant_id)
return _geoparse_status.get(incident_id, {"status": "idle"})
@router.get("/{incident_id}/refresh-log") @router.get("/{incident_id}/refresh-log")

Datei anzeigen

@@ -110,6 +110,10 @@ const API = {
return this._request('POST', `/incidents/${incidentId}/geoparse`); return this._request('POST', `/incidents/${incidentId}/geoparse`);
}, },
getGeoparseStatus(incidentId) {
return this._request('GET', `/incidents/${incidentId}/geoparse-status`);
},
refreshIncident(id) { refreshIncident(id) {
return this._request('POST', `/incidents/${id}/refresh`); return this._request('POST', `/incidents/${id}/refresh`);
}, },

Datei anzeigen

@@ -1429,24 +1429,57 @@ const App = {
} }
}, },
_geoparsePolling: null,
async triggerGeoparse() { async triggerGeoparse() {
if (!this.currentIncidentId) return; if (!this.currentIncidentId) return;
const btn = document.getElementById('geoparse-btn'); const btn = document.getElementById('geoparse-btn');
if (btn) { btn.disabled = true; btn.textContent = 'Erkennung...'; } if (btn) { btn.disabled = true; btn.textContent = 'Wird gestartet...'; }
try { try {
const result = await API.triggerGeoparse(this.currentIncidentId); const result = await API.triggerGeoparse(this.currentIncidentId);
UI.showToast(result.message, result.new_locations > 0 ? 'success' : 'info'); if (result.status === 'done') {
if (result.new_locations > 0) { UI.showToast(result.message, 'info');
const locations = await API.getLocations(this.currentIncidentId).catch(() => []); if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
UI.renderMap(locations); return;
} }
UI.showToast(result.message, 'info');
this._pollGeoparse(this.currentIncidentId);
} catch (err) { } catch (err) {
UI.showToast('Geoparsing fehlgeschlagen: ' + err.message, 'error'); UI.showToast('Geoparsing fehlgeschlagen: ' + err.message, 'error');
} finally {
if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; } if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
} }
}, },
_pollGeoparse(incidentId) {
if (this._geoparsePolling) clearInterval(this._geoparsePolling);
const btn = document.getElementById('geoparse-btn');
this._geoparsePolling = setInterval(async () => {
try {
const st = await API.getGeoparseStatus(incidentId);
if (st.status === 'running') {
if (btn) btn.textContent = `${st.processed}/${st.total} Artikel...`;
} else {
clearInterval(this._geoparsePolling);
this._geoparsePolling = null;
if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
if (st.status === 'done' && st.locations > 0) {
UI.showToast(`${st.locations} Orte aus ${st.processed} Artikeln erkannt`, 'success');
const locations = await API.getLocations(incidentId).catch(() => []);
UI.renderMap(locations);
} else if (st.status === 'done') {
UI.showToast('Keine neuen Orte gefunden', 'info');
} else if (st.status === 'error') {
UI.showToast('Geoparsing fehlgeschlagen: ' + (st.error || ''), 'error');
}
}
} catch {
clearInterval(this._geoparsePolling);
this._geoparsePolling = null;
if (btn) { btn.disabled = false; btn.textContent = 'Orte erkennen'; }
}
}, 3000);
},
_formatInterval(minutes) { _formatInterval(minutes) {
if (minutes >= 10080 && minutes % 10080 === 0) { if (minutes >= 10080 && minutes % 10080 === 0) {
const w = minutes / 10080; const w = minutes / 10080;