diff --git a/src/agents/claude_client.py b/src/agents/claude_client.py
index e624c90..a79d72f 100644
--- a/src/agents/claude_client.py
+++ b/src/agents/claude_client.py
@@ -13,6 +13,35 @@ _cancel_event_var: contextvars.ContextVar[asyncio.Event | None] = contextvars.Co
logger = logging.getLogger("osint.claude_client")
+class ClaudeCliError(RuntimeError):
+ """Strukturierter Fehler aus dem Claude CLI mit Kategorie.
+
+ error_type:
+ - "rate_limit": Anthropic Rate-Limit oder Overload (transient, retry-tauglich)
+ - "auth_error": Account-Problem (Organisation hat keinen Claude-Zugang,
+ Token abgelaufen/ungueltig) - kein Retry sinnvoll, Admin-Aktion noetig
+ - "timeout": Claude CLI Timeout (transient)
+ - "cli_error": Sonstiger CLI-Fehler (unspezifisch, Default)
+ """
+
+ def __init__(self, error_type: str, message: str):
+ self.error_type = error_type
+ self.message = message
+ super().__init__(f"Claude CLI [{error_type}]: {message}")
+
+
+def _classify_cli_error(combined_output: str) -> str:
+ """Ordnet einer Fehler-Ausgabe eine error_type-Kategorie zu."""
+ txt = combined_output.lower()
+ rate_limit_keywords = ["hit your limit", "rate limit", "resets", "rate_limit", "overloaded"]
+ auth_error_keywords = ["does not have access", "login again", "contact your administrator"]
+ if any(kw in txt for kw in rate_limit_keywords):
+ return "rate_limit"
+ if any(kw in txt for kw in auth_error_keywords):
+ return "auth_error"
+ return "cli_error"
+
+
@dataclass
class ClaudeUsage:
"""Token-Verbrauch eines einzelnen Claude CLI Aufrufs."""
@@ -121,19 +150,20 @@ async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", mod
error_msg = stderr.decode("utf-8", errors="replace").strip()
stdout_msg = stdout.decode("utf-8", errors="replace").strip()
- # Rate-Limit-Fehler kommen als JSON auf stdout, nicht auf stderr
- error_type = "cli_error"
- rate_limit_keywords = ["hit your limit", "rate limit", "resets", "rate_limit", "overloaded"]
- combined_output = f"{error_msg} {stdout_msg}".lower()
- if any(kw in combined_output for kw in rate_limit_keywords):
- error_type = "rate_limit"
+ # Rate-Limit/Auth-Fehler kommen teils als JSON auf stdout, nicht auf stderr
+ combined_output = f"{error_msg} {stdout_msg}"
+ error_type = _classify_cli_error(combined_output)
+
+ if error_type == "rate_limit":
logger.warning(f"Claude CLI Rate-Limit (Exit {process.returncode}): {stdout_msg or error_msg}")
+ elif error_type == "auth_error":
+ logger.error(f"Claude CLI Auth-Fehler (Exit {process.returncode}): {stdout_msg or error_msg}")
else:
logger.error(f"Claude CLI Fehler (Exit {process.returncode}): {error_msg}")
if stdout_msg:
logger.error(f"Claude CLI stdout bei Fehler: {stdout_msg[:500]}")
- raise RuntimeError(f"Claude CLI Fehler [{error_type}]: {stdout_msg or error_msg}")
+ raise ClaudeCliError(error_type, stdout_msg or error_msg)
raw = stdout.decode("utf-8", errors="replace").strip()
usage = ClaudeUsage()
@@ -141,6 +171,19 @@ async def call_claude(prompt: str, tools: str | None = "WebSearch,WebFetch", mod
try:
data = json.loads(raw)
+ # CLI kann returncode=0 liefern und trotzdem is_error=true setzen
+ # (z.B. "Your organization does not have access to Claude")
+ if data.get("is_error"):
+ error_text = str(data.get("result", ""))
+ error_type = _classify_cli_error(error_text)
+ if error_type == "rate_limit":
+ logger.warning(f"Claude CLI Rate-Limit (is_error): {error_text}")
+ elif error_type == "auth_error":
+ logger.error(f"Claude CLI Auth-Fehler (is_error): {error_text}")
+ else:
+ logger.error(f"Claude CLI Fehler (is_error): {error_text}")
+ raise ClaudeCliError(error_type, error_text)
+
result_text = data.get("result", raw)
u = data.get("usage", {})
usage = ClaudeUsage(
diff --git a/src/agents/orchestrator.py b/src/agents/orchestrator.py
index 23c5247..997bfeb 100644
--- a/src/agents/orchestrator.py
+++ b/src/agents/orchestrator.py
@@ -527,8 +527,12 @@ class AgentOrchestrator:
RETRY_DELAYS = [0, 120, 300] # Sekunden: sofort, 2min, 5min
TRANSIENT_ERRORS = (asyncio.TimeoutError, TimeoutError, ConnectionError, OSError)
+ from agents.claude_client import ClaudeCliError
last_error = None
+ def _is_transient_cli(err: Exception) -> bool:
+ return isinstance(err, ClaudeCliError) and err.error_type in ("rate_limit", "timeout")
+
try:
# Research-Lagen: Automatisch 3 Durchläufe nur beim ersten Refresh
incident_type, has_summary = await self._get_incident_info(incident_id)
@@ -557,32 +561,44 @@ class AgentOrchestrator:
}, _vis, _cb, _tid)
last_error = None
break
- except TRANSIENT_ERRORS as e:
- last_error = e
- logger.warning(f"Transienter Fehler bei Lage {incident_id} (Versuch {attempt + 1}/3): {e}")
- if attempt < 2:
- await self._mark_refresh_failed(incident_id, str(e))
- delay = RETRY_DELAYS[attempt + 1]
- logger.info(f"Retry in {delay}s für Lage {incident_id}")
- # Retry-Status per WebSocket senden
- if self._ws_manager:
- try:
- _vis, _cb, _tid = await self._get_incident_visibility(incident_id)
- except Exception:
- _vis, _cb, _tid = "public", None, None
- await self._ws_manager.broadcast_for_incident({
- "type": "status_update",
- "incident_id": incident_id,
- "data": {"status": "retrying", "attempt": attempt + 1, "delay": delay},
- }, _vis, _cb, _tid)
- await asyncio.sleep(delay)
- else:
- await self._mark_refresh_failed(incident_id, f"Endgültig fehlgeschlagen nach 3 Versuchen: {e}")
except Exception as e:
+ # Auth/CLI-Fehler: sofort abbrechen, kein Retry sinnvoll
+ if isinstance(e, ClaudeCliError) and e.error_type in ("auth_error", "cli_error"):
+ last_error = e
+ logger.error(f"Permanenter Claude-Fehler [{e.error_type}] bei Lage {incident_id}: {e}")
+ await self._mark_refresh_failed(incident_id, str(e))
+ break
+
+ # Transiente Fehler: Retry bis 3x
+ if isinstance(e, TRANSIENT_ERRORS) or _is_transient_cli(e):
+ last_error = e
+ kind = e.error_type if isinstance(e, ClaudeCliError) else type(e).__name__
+ logger.warning(f"Transienter Fehler [{kind}] bei Lage {incident_id} (Versuch {attempt + 1}/3): {e}")
+ if attempt < 2:
+ await self._mark_refresh_failed(incident_id, str(e))
+ delay = RETRY_DELAYS[attempt + 1]
+ logger.info(f"Retry in {delay}s für Lage {incident_id}")
+ if self._ws_manager:
+ try:
+ _vis, _cb, _tid = await self._get_incident_visibility(incident_id)
+ except Exception:
+ _vis, _cb, _tid = "public", None, None
+ await self._ws_manager.broadcast_for_incident({
+ "type": "status_update",
+ "incident_id": incident_id,
+ "data": {"status": "retrying", "attempt": attempt + 1, "delay": delay},
+ }, _vis, _cb, _tid)
+ await asyncio.sleep(delay)
+ continue
+ else:
+ await self._mark_refresh_failed(incident_id, f"Endgültig fehlgeschlagen nach 3 Versuchen: {e}")
+ break
+
+ # Alles andere: permanent
last_error = e
logger.error(f"Permanenter Fehler bei Refresh für Lage {incident_id}: {e}")
await self._mark_refresh_failed(incident_id, str(e))
- break # Permanenter Fehler, kein Retry
+ break
if last_error and self._ws_manager:
try:
diff --git a/src/routers/chat.py b/src/routers/chat.py
index 93fc0a1..30c1277 100644
--- a/src/routers/chat.py
+++ b/src/routers/chat.py
@@ -15,7 +15,7 @@ from config import CLAUDE_PATH, CLAUDE_MODEL_FAST
from database import db_dependency
from middleware.license_check import require_writable_license
from services.license_service import charge_usage_to_tenant
-from agents.claude_client import ClaudeUsage
+from agents.claude_client import ClaudeUsage, ClaudeCliError, _classify_cli_error
import aiosqlite
logger = logging.getLogger("osint.chat")
@@ -59,10 +59,11 @@ async def _call_claude_chat(prompt: str) -> tuple[str, int, ClaudeUsage]:
if process.returncode != 0:
err_msg = stderr.decode("utf-8", errors="replace").strip()
- logger.error(f"Chat Claude CLI Fehler (rc={process.returncode}): {err_msg[:500]}")
- if "rate_limit" in err_msg.lower() or "overloaded" in err_msg.lower():
- raise RuntimeError("rate_limit")
- raise RuntimeError(f"Claude CLI Fehler: {err_msg[:200]}")
+ stdout_msg = stdout.decode("utf-8", errors="replace").strip()
+ combined = f"{err_msg} {stdout_msg}"
+ error_type = _classify_cli_error(combined)
+ logger.error(f"Chat Claude CLI Fehler [{error_type}] (rc={process.returncode}): {(stdout_msg or err_msg)[:500]}")
+ raise ClaudeCliError(error_type, stdout_msg or err_msg)
raw = stdout.decode("utf-8", errors="replace").strip()
duration_ms = 0
@@ -71,6 +72,12 @@ async def _call_claude_chat(prompt: str) -> tuple[str, int, ClaudeUsage]:
try:
data = _json.loads(raw)
+ if data.get("is_error"):
+ error_text = str(data.get("result", ""))
+ error_type = _classify_cli_error(error_text)
+ logger.error(f"Chat Claude CLI Fehler [{error_type}] (is_error): {error_text[:500]}")
+ raise ClaudeCliError(error_type, error_text)
+
result_text = data.get("result", raw)
duration_ms = data.get("duration_ms", 0)
u = data.get("usage", {})
@@ -437,11 +444,15 @@ async def chat(
result, duration_ms, usage = await _call_claude_chat(prompt)
except TimeoutError:
raise HTTPException(status_code=504, detail="Der Assistent antwortet gerade nicht. Bitte versuche es erneut.")
- except RuntimeError as e:
- error_str = str(e)
- if "rate_limit" in error_str:
+ except ClaudeCliError as e:
+ if e.error_type == "rate_limit":
raise HTTPException(status_code=429, detail="Der Assistent ist gerade ausgelastet. Bitte versuche es in einer Minute erneut.")
- logger.error(f"Chat Claude-Fehler: {e}")
+ if e.error_type == "auth_error":
+ raise HTTPException(status_code=503, detail="KI-Zugang aktuell nicht verfuegbar. Bitte Administrator kontaktieren.")
+ logger.error(f"Chat Claude-Fehler [{e.error_type}]: {e}")
+ raise HTTPException(status_code=502, detail="Der Assistent ist voruebergehend nicht erreichbar.")
+ except RuntimeError as e:
+ logger.error(f"Chat Claude-Fehler (unspezifisch): {e}")
raise HTTPException(status_code=502, detail="Der Assistent ist voruebergehend nicht erreichbar.")
# Credits buchen
diff --git a/src/routers/incidents.py b/src/routers/incidents.py
index fd02e8c..42276ea 100644
--- a/src/routers/incidents.py
+++ b/src/routers/incidents.py
@@ -245,7 +245,7 @@ async def enhance_description(
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Generiert eine strukturierte Beschreibung per KI aus dem Titel."""
- from agents.claude_client import call_claude
+ from agents.claude_client import call_claude, ClaudeCliError
from config import CLAUDE_MODEL_FAST
from services.license_service import charge_usage_to_tenant
@@ -255,17 +255,30 @@ async def enhance_description(
try:
result, usage = await call_claude(prompt, tools=None, model=CLAUDE_MODEL_FAST, raw_text=True)
- _enhance_logger.info(
- f"Beschreibung generiert fuer \"{data.title[:50]}\": "
- f"{usage.input_tokens}in/{usage.output_tokens}out"
- )
- await charge_usage_to_tenant(db, current_user.get("tenant_id"), usage, source="enhance")
- await db.commit()
- return {"description": result.strip()}
+ except ClaudeCliError as e:
+ _enhance_logger.error(f"Beschreibung generieren: ClaudeCliError [{e.error_type}]: {e.message}")
+ if e.error_type == "auth_error":
+ raise HTTPException(status_code=503, detail="KI-Zugang aktuell nicht verfuegbar. Bitte Administrator kontaktieren.")
+ if e.error_type == "rate_limit":
+ raise HTTPException(status_code=429, detail="KI ist gerade ausgelastet. Bitte in einer Minute erneut versuchen.")
+ raise HTTPException(status_code=500, detail="Beschreibung konnte nicht generiert werden")
+ except TimeoutError:
+ _enhance_logger.error("Beschreibung generieren: Timeout")
+ raise HTTPException(status_code=504, detail="Die KI antwortet gerade nicht. Bitte erneut versuchen.")
+ except HTTPException:
+ raise
except Exception as e:
_enhance_logger.error(f"Beschreibung generieren fehlgeschlagen: {e}")
raise HTTPException(status_code=500, detail="Beschreibung konnte nicht generiert werden")
+ _enhance_logger.info(
+ f"Beschreibung generiert fuer \"{data.title[:50]}\": "
+ f"{usage.input_tokens}in/{usage.output_tokens}out"
+ )
+ await charge_usage_to_tenant(db, current_user.get("tenant_id"), usage, source="enhance")
+ await db.commit()
+ return {"description": result.strip()}
+
@router.get("/{incident_id}", response_model=IncidentResponse)
async def get_incident(
diff --git a/src/static/dashboard.html b/src/static/dashboard.html
index 8e2af02..6f5defe 100644
--- a/src/static/dashboard.html
+++ b/src/static/dashboard.html
@@ -624,11 +624,11 @@
-
+
-
+
diff --git a/src/static/js/api.js b/src/static/js/api.js
index eb8e1a1..e841a05 100644
--- a/src/static/js/api.js
+++ b/src/static/js/api.js
@@ -1,6 +1,16 @@
/**
* API-Client für den OSINT Lagemonitor.
*/
+
+class ApiError extends Error {
+ constructor(status, detail) {
+ super(detail || `Fehler ${status}`);
+ this.name = 'ApiError';
+ this.status = status;
+ this.detail = detail;
+ }
+}
+
const API = {
baseUrl: '/api',
@@ -57,7 +67,7 @@ const API = {
} else if (typeof detail === 'object' && detail !== null) {
detail = JSON.stringify(detail);
}
- throw new Error(detail || `Fehler ${response.status}`);
+ throw new ApiError(response.status, detail);
}
if (response.status === 204) return null;
diff --git a/src/static/js/app.js b/src/static/js/app.js
index 82fd47a..2f0dc71 100644
--- a/src/static/js/app.js
+++ b/src/static/js/app.js
@@ -1816,8 +1816,15 @@ async generateDescription() {
textarea.value = result.description;
_autoResizeTextarea(textarea);
} catch (err) {
- if (err.name !== 'AbortError') {
- UI.showToast('Beschreibung konnte nicht generiert werden', 'error');
+ if (err.name === 'AbortError') {
+ // still
+ } else {
+ let msg = 'Beschreibung konnte nicht generiert werden';
+ if (err.status === 503) msg = 'KI-Zugang aktuell nicht verfügbar. Bitte Administrator kontaktieren.';
+ else if (err.status === 429) msg = 'KI ist gerade ausgelastet. Bitte kurz warten und erneut versuchen.';
+ else if (err.status === 504) msg = 'KI antwortet gerade nicht. Bitte erneut versuchen.';
+ else if (err.status === 403) msg = err.detail || 'Zugriff verweigert.';
+ UI.showToast(msg, 'error');
}
} finally {
btnText.textContent = 'Beschreibung generieren';