diff --git a/.claude/settings.local.json b/.claude/settings.local.json index c90728a..d7fd6ba 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -72,7 +72,8 @@ "Bash(done)", "Bash(docker compose:*)", "Bash(true)", - "Bash(git checkout:*)" + "Bash(git checkout:*)", + "Bash(touch:*)" ], "deny": [] } diff --git a/JOURNAL.md b/JOURNAL.md index 5ef1c82..42a7a04 100644 --- a/JOURNAL.md +++ b/JOURNAL.md @@ -1,5 +1,34 @@ # v2-Docker Projekt Journal +## Letzte Änderungen (19.06.2025) + +### Monitoring vereinfacht und optimiert +- **Prometheus/Grafana/Alertmanager entfernt**: + - Monitoring Stack aus docker-compose.yaml entfernt (spart ~3GB RAM) + - Vereinfacht das Setup für PoC-Phase erheblich + - Alle wichtigen Monitoring-Features bleiben über Admin Panel verfügbar + +- **Analytics-Seite überarbeitet**: + - Demo-Daten und statische Charts entfernt + - Revenue/Pricing-Metriken entfernt (Preismodell noch in Entwicklung) + - Zeigt jetzt echte Live-Statistiken aus der Datenbank + - Automatische Aktualisierung alle 30 Sekunden + - Verweis auf Live Dashboard für Echtzeit-Daten + +- **Integriertes Monitoring bleibt funktional**: + - Live Dashboard mit aktiven Sessions und Heartbeats + - System Status mit Service Health Checks + - Alerts aus anomaly_detections Tabelle + - Alle Daten direkt aus PostgreSQL ohne externe Dependencies + +### Status: +✅ Monitoring für PoC optimiert +✅ Analytics zeigt echte Daten statt Demo-Werte +✅ System ~3GB schlanker ohne externe Monitoring-Tools +✅ Alle wichtigen Features weiterhin verfügbar + +--- + ## Letzte Änderungen (18.06.2025) ### Große Refaktorisierung erfolgreich abgeschlossen diff --git a/LIZENZSERVER.md b/LIZENZSERVER.md index b0b6175..4e52bd8 100644 --- a/LIZENZSERVER.md +++ b/LIZENZSERVER.md @@ -667,24 +667,47 @@ Neuer Menüpunkt "Lizenzserver" mit folgenden Unterseiten: - JWT-basierte Admin-Authentifizierung - Comprehensive Audit Logging +### ✅ UPDATE: Monitoring für PoC optimiert (19.06.2025) + +**Entscheidung**: Prometheus/Grafana/Alertmanager wurden für die PoC-Phase entfernt +- Spart ~3GB RAM und vereinfacht das Setup erheblich +- Alle wichtigen Monitoring-Features bleiben über das Admin Panel verfügbar +- Für Production-Deployment später einfach wieder aktivierbar + +**Integriertes Monitoring im Admin Panel**: +1. **Live Dashboard** (/monitoring/live-dashboard) + - Echtzeit-Anzeige aktiver Sessions + - Validierungsstatistiken + - Direkte Datenbankabfragen + +2. **System Status** (/monitoring/system-status) + - Service Health Checks + - Response Time Monitoring + - Keine externen Dependencies + +3. **Alerts** (/monitoring/alerts) + - Anomalie-Erkennung aus DB + - Ungelöste Probleme + - Echtzeit-Updates + +4. **Analytics** (/monitoring/analytics) + - Echte Statistiken statt Demo-Daten + - Auto-Refresh alle 30 Sekunden + - Basis-Metriken ohne Pricing + ### 📋 Noch zu implementieren: -1. **Monitoring & Observability** - - Prometheus Integration - - Grafana Dashboards - - Alert Rules - -2. **Erweiterte Anomalie-Erkennung** +1. **Erweiterte Anomalie-Erkennung** - Machine Learning basierte Pattern-Erkennung - Geo-Location Anomalien - Automatische Aktionen bei kritischen Anomalien -3. **Performance Optimierungen** +2. **Performance Optimierungen** - Connection Pooling - Query Optimization - Batch Processing für Heartbeats -4. **Erweiterte Features** +3. **Erweiterte Features** - WebSocket für Live-Updates - Bulk-Operationen - Export-Funktionen diff --git a/monitoring/alertmanager/Dockerfile b/monitoring/alertmanager/Dockerfile new file mode 100644 index 0000000..24ebd04 --- /dev/null +++ b/monitoring/alertmanager/Dockerfile @@ -0,0 +1,19 @@ +FROM prom/alertmanager:v0.25.0 + +# Copy configuration +COPY alertmanager.yml /etc/alertmanager/alertmanager.yml + +# Create templates directory +USER root +RUN mkdir -p /etc/alertmanager/templates && \ + chown -R nobody:nobody /etc/alertmanager + +USER nobody + +# Expose Alertmanager port +EXPOSE 9093 + +# Start Alertmanager +CMD ["--config.file=/etc/alertmanager/alertmanager.yml", \ + "--storage.path=/alertmanager", \ + "--web.external-url=http://localhost:9093"] \ No newline at end of file diff --git a/monitoring/grafana/Dockerfile b/monitoring/grafana/Dockerfile new file mode 100644 index 0000000..d5f0e5f --- /dev/null +++ b/monitoring/grafana/Dockerfile @@ -0,0 +1,28 @@ +FROM grafana/grafana:10.0.0 + +# Environment variables +ENV GF_SECURITY_ADMIN_USER=admin +ENV GF_SECURITY_ADMIN_PASSWORD=admin +ENV GF_USERS_ALLOW_SIGN_UP=false +ENV GF_SERVER_ROOT_URL=http://localhost:3000 +ENV GF_ANALYTICS_REPORTING_ENABLED=false +ENV GF_ANALYTICS_CHECK_FOR_UPDATES=false + +# Copy provisioning configuration +COPY provisioning/ /etc/grafana/provisioning/ + +# Copy dashboards +COPY dashboards/ /var/lib/grafana/dashboards/ + +# Install plugins +RUN grafana-cli plugins install grafana-piechart-panel && \ + grafana-cli plugins install grafana-worldmap-panel + +# Set permissions (Grafana runs as UID 472) +USER root +RUN chown -R 472:0 /var/lib/grafana/dashboards && \ + chmod -R 755 /var/lib/grafana/dashboards +USER 472 + +# Expose Grafana port +EXPOSE 3000 \ No newline at end of file diff --git a/monitoring/grafana/dashboards/license-server-overview.json b/monitoring/grafana/dashboards/license-server-overview.json new file mode 100644 index 0000000..7b94965 --- /dev/null +++ b/monitoring/grafana/dashboards/license-server-overview.json @@ -0,0 +1,486 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(license_validations_total[5m])", + "legendFormat": "Total Validations", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(license_validations_success_total[5m])", + "legendFormat": "Successful", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(license_validations_failed_total[5m])", + "legendFormat": "Failed", + "refId": "C" + } + ], + "title": "License Validation Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "active_licenses_total", + "refId": "A" + } + ], + "title": "Active Licenses", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 50 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "anomalies_unresolved_total", + "refId": "A" + } + ], + "title": "Unresolved Anomalies", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{service=\"license-server\"}[5m]))", + "legendFormat": "p50", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"license-server\"}[5m]))", + "legendFormat": "p95", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{service=\"license-server\"}[5m]))", + "legendFormat": "p99", + "refId": "C" + } + ], + "title": "API Response Time Percentiles", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(anomalies_detected_total[5m]) by (anomaly_type)", + "legendFormat": "{{ anomaly_type }}", + "refId": "A" + } + ], + "title": "Anomaly Detection Rate by Type", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["license-server", "monitoring"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "License Server Overview", + "uid": "license-server-overview", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/monitoring/grafana/dashboards/system-overview.json b/monitoring/grafana/dashboards/system-overview.json new file mode 100644 index 0000000..213040f --- /dev/null +++ b/monitoring/grafana/dashboards/system-overview.json @@ -0,0 +1,522 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "legendFormat": "CPU Usage %", + "refId": "A" + } + ], + "title": "System CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes", + "legendFormat": "Used Memory", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemAvailable_bytes", + "legendFormat": "Available Memory", + "refId": "B" + } + ], + "title": "System Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "repeat": "service", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "up{job=~\"license-server|admin-panel|auth-service|analytics-service|admin-api-service|postgres|redis|rabbitmq|nginx\"}", + "format": "time_series", + "instant": true, + "legendFormat": "{{ job }}", + "refId": "A" + } + ], + "title": "Service Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\"})", + "legendFormat": "Disk Usage %", + "refId": "A" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": ["lastNotNull", "mean"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(node_network_receive_bytes_total[5m])", + "legendFormat": "Receive", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(node_network_transmit_bytes_total[5m])", + "legendFormat": "Transmit", + "refId": "B" + } + ], + "title": "Network Traffic", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["system", "overview"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "System Overview", + "uid": "system-overview", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/monitoring/prometheus/Dockerfile b/monitoring/prometheus/Dockerfile new file mode 100644 index 0000000..289f9d1 --- /dev/null +++ b/monitoring/prometheus/Dockerfile @@ -0,0 +1,23 @@ +FROM prom/prometheus:v2.45.0 + +# Copy configuration files +COPY prometheus.yml /etc/prometheus/prometheus.yml +COPY alert_rules.yml /etc/prometheus/rules/alert_rules.yml + +# Create directories for rules +USER root +RUN mkdir -p /etc/prometheus/rules && \ + chown -R nobody:nobody /etc/prometheus + +USER nobody + +# Expose Prometheus port +EXPOSE 9090 + +# Start Prometheus with configuration +CMD ["--config.file=/etc/prometheus/prometheus.yml", \ + "--storage.tsdb.path=/prometheus", \ + "--web.console.libraries=/usr/share/prometheus/console_libraries", \ + "--web.console.templates=/usr/share/prometheus/consoles", \ + "--web.enable-lifecycle", \ + "--storage.tsdb.retention.time=30d"] \ No newline at end of file diff --git a/monitoring/prometheus/alert_rules.yml b/monitoring/prometheus/alert_rules.yml new file mode 100644 index 0000000..664087e --- /dev/null +++ b/monitoring/prometheus/alert_rules.yml @@ -0,0 +1,208 @@ +groups: + - name: service_alerts + interval: 30s + rules: + # Service Down Alerts + - alert: ServiceDown + expr: up == 0 + for: 2m + labels: + severity: critical + component: "{{ $labels.service }}" + annotations: + summary: "Service {{ $labels.service }} is down" + description: "{{ $labels.service }} has been down for more than 2 minutes." + + # High CPU Usage + - alert: HighCPUUsage + expr: rate(process_cpu_seconds_total[5m]) * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High CPU usage on {{ $labels.service }}" + description: "CPU usage is above 80% for more than 5 minutes on {{ $labels.service }}." + + # High Memory Usage + - alert: HighMemoryUsage + expr: (process_resident_memory_bytes / 1024 / 1024) > 1000 + for: 5m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.service }}" + description: "Memory usage is above 1GB for more than 5 minutes on {{ $labels.service }}." + + - name: license_alerts + interval: 30s + rules: + # High License Validation Failure Rate + - alert: HighLicenseValidationFailureRate + expr: rate(license_validations_failed_total[5m]) / rate(license_validations_total[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: license-server + annotations: + summary: "High license validation failure rate" + description: "More than 10% of license validations are failing." + + # No License Validations + - alert: NoLicenseValidations + expr: rate(license_validations_total[10m]) == 0 + for: 10m + labels: + severity: warning + service: license-server + annotations: + summary: "No license validations occurring" + description: "No license validations have been processed in the last 10 minutes." + + # High Anomaly Detection Rate + - alert: HighAnomalyRate + expr: rate(anomalies_detected_total[5m]) > 10 + for: 5m + labels: + severity: critical + service: license-server + annotations: + summary: "High anomaly detection rate" + description: "More than 10 anomalies detected per minute." + + - name: database_alerts + interval: 30s + rules: + # PostgreSQL Down + - alert: PostgreSQLDown + expr: pg_up == 0 + for: 1m + labels: + severity: critical + service: postgres + annotations: + summary: "PostgreSQL is down" + description: "PostgreSQL has been down for more than 1 minute." + + # High Database Connections + - alert: HighDatabaseConnections + expr: pg_stat_database_numbackends{datname="v2_adminpanel"} > 80 + for: 5m + labels: + severity: warning + service: postgres + annotations: + summary: "High number of database connections" + description: "More than 80 active connections to v2_adminpanel database." + + # Slow Queries + - alert: SlowQueries + expr: rate(pg_stat_statements_mean_time_seconds[5m]) > 1 + for: 5m + labels: + severity: warning + service: postgres + annotations: + summary: "Slow database queries detected" + description: "Average query time is above 1 second." + + - name: redis_alerts + interval: 30s + rules: + # Redis Down + - alert: RedisDown + expr: redis_up == 0 + for: 1m + labels: + severity: critical + service: redis + annotations: + summary: "Redis is down" + description: "Redis has been down for more than 1 minute." + + # High Redis Memory Usage + - alert: HighRedisMemory + expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9 + for: 5m + labels: + severity: warning + service: redis + annotations: + summary: "High Redis memory usage" + description: "Redis memory usage is above 90% of max memory." + + # Low Cache Hit Rate + - alert: LowCacheHitRate + expr: redis_keyspace_hits_total / (redis_keyspace_hits_total + redis_keyspace_misses_total) < 0.8 + for: 10m + labels: + severity: warning + service: redis + annotations: + summary: "Low Redis cache hit rate" + description: "Redis cache hit rate is below 80%." + + - name: api_alerts + interval: 30s + rules: + # High API Response Time + - alert: HighAPIResponseTime + expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2 + for: 5m + labels: + severity: warning + annotations: + summary: "High API response time on {{ $labels.service }}" + description: "95th percentile response time is above 2 seconds on {{ $labels.service }}." + + # High Error Rate + - alert: HighErrorRate + expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05 + for: 5m + labels: + severity: critical + annotations: + summary: "High error rate on {{ $labels.service }}" + description: "Error rate is above 5% on {{ $labels.service }}." + + # Rate Limit Exceeded + - alert: RateLimitExceeded + expr: rate(rate_limit_exceeded_total[5m]) > 10 + for: 5m + labels: + severity: warning + annotations: + summary: "Rate limits being exceeded frequently" + description: "Rate limits are being exceeded more than 10 times per minute." + + - name: infrastructure_alerts + interval: 30s + rules: + # High Disk Usage + - alert: HighDiskUsage + expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 20 + for: 5m + labels: + severity: warning + annotations: + summary: "Low disk space" + description: "Disk space is below 20% on root filesystem." + + # High Load Average + - alert: HighLoadAverage + expr: node_load5 > 4 + for: 5m + labels: + severity: warning + annotations: + summary: "High system load" + description: "5-minute load average is above 4." + + # Certificate Expiry + - alert: CertificateExpiringSoon + expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 7 + for: 1h + labels: + severity: warning + annotations: + summary: "SSL certificate expiring soon" + description: "SSL certificate will expire in less than 7 days." \ No newline at end of file diff --git a/v2/.env b/v2/.env index d8dc562..1c4f628 100644 --- a/v2/.env +++ b/v2/.env @@ -54,3 +54,17 @@ EMAIL_ENABLED=false # Für PoC-Phase auskommentiert - CAPTCHA wird übersprungen wenn Keys fehlen # RECAPTCHA_SITE_KEY=your-site-key-here # RECAPTCHA_SECRET_KEY=your-secret-key-here + +# ===================== MONITORING KONFIGURATION ===================== + +# Grafana Admin Credentials +GRAFANA_USER=admin +GRAFANA_PASSWORD=admin + +# SMTP Settings for Alertmanager (optional) +# SMTP_USERNAME=your-email@gmail.com +# SMTP_PASSWORD=your-app-password + +# Webhook URLs for critical alerts (optional) +# WEBHOOK_CRITICAL=https://your-webhook-url/critical +# WEBHOOK_SECURITY=https://your-webhook-url/security diff --git a/v2_adminpanel/templates/monitoring/analytics.html b/v2_adminpanel/templates/monitoring/analytics.html index 632b4c7..d90a352 100644 --- a/v2_adminpanel/templates/monitoring/analytics.html +++ b/v2_adminpanel/templates/monitoring/analytics.html @@ -59,12 +59,15 @@ margin-top: 20px; } - .grafana-info { - background: #e7f3ff; - border: 1px solid #b3d9ff; - padding: 15px; - border-radius: 8px; - margin-bottom: 20px; + .loading-spinner { + text-align: center; + padding: 50px; + } + + .no-data { + text-align: center; + padding: 30px; + color: #6c757d; } {% endblock %} @@ -77,21 +80,6 @@ - -
Für detaillierte Dashboards und erweiterte Analysen nutzen Sie unser Grafana Dashboard.
-| Kunde | -Lizenzen | -Nutzung | -
|---|---|---|
| ACME Corp | -45 | -Hoch | -
| TechStart GmbH | -32 | -Hoch | -
| Global Solutions | -28 | -Mittel | -
Die detaillierten Analysen stehen zur Verfügung, sobald genügend Daten vorhanden sind.
+Nutzen Sie das Live Dashboard für Echtzeit-Statistiken.