Lizenzserver ist fertig

2025-06-18 23:22:38 +02:00
Commit 7017549fcd
--- a/lizenzserver/services/auth/app.py
+++ b/lizenzserver/services/auth/app.py
@@ -6,6 +6,7 @@ import jwt
 from datetime import datetime, timedelta
 import logging
 from functools import wraps
 from prometheus_flask_exporter import PrometheusMetrics
 # Add parent directory to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
@@ -23,6 +24,10 @@ config = get_config()
 app.config.from_object(config)
 CORS(app)
 # Initialize Prometheus metrics
 metrics = PrometheusMetrics(app)
 metrics.info('auth_service_info', 'Auth Service Information', version='1.0.0')
 # Initialize repository
 db_repo = BaseRepository(config.DATABASE_URL)
--- a/lizenzserver/services/auth/requirements.txt
+++ b/lizenzserver/services/auth/requirements.txt
@@ -5,4 +5,5 @@ psycopg2-binary==2.9.9
 redis==5.0.1
 python-dotenv==1.0.0
 gunicorn==21.2.0
-marshmallow==3.20.1
+marshmallow==3.20.1
 prometheus-flask-exporter==0.23.0
--- a/monitoring/README.md
+++ b/monitoring/README.md
@@ -0,0 +1,272 @@
 # V2 Docker Monitoring Stack
 ## Übersicht
 Die Monitoring-Lösung für V2 Docker basiert auf dem Prometheus-Stack und bietet umfassende Einblicke in die Performance und Gesundheit aller Services.
 ## Komponenten
 ### 1. **Prometheus** (Port 9090)
 - Zentrale Metrik-Sammlung
 - Konfigurierte Scrape-Jobs für alle Services
 - 30 Tage Datenaufbewahrung
 - Alert-Rules für kritische Ereignisse
 ### 2. **Grafana** (Port 3000)
 - Visualisierung der Metriken
 - Vorkonfigurierte Dashboards
 - Alerting-Integration
 - Standard-Login: admin/admin (beim ersten Login ändern)
 ### 3. **Alertmanager** (Port 9093)
 - Alert-Routing und -Gruppierung
 - Email-Benachrichtigungen
 - Webhook-Integration
 - Alert-Silencing und -Inhibition
 ### 4. **Exporters**
 - **PostgreSQL Exporter**: Datenbank-Metriken
 - **Redis Exporter**: Cache-Metriken
 - **Node Exporter**: System-Metriken
 - **Nginx Exporter**: Proxy-Metriken
 ## Installation
 ### 1. Monitoring-Stack starten
 ```bash
 cd monitoring
 docker-compose -f docker-compose.monitoring.yml up -d
 ```
 ### 2. Services überprüfen
 ```bash
 docker-compose -f docker-compose.monitoring.yml ps
 ```
 ### 3. Grafana-Zugang
 1. Öffnen Sie https://monitoring.v2-docker.com (oder http://localhost:3000)
 2. Login mit admin/admin
 3. Neues Passwort setzen
 4. Dashboard "License Server Overview" öffnen
 ## Konfiguration
 ### Environment-Variablen
 Erstellen Sie eine `.env` Datei im monitoring-Verzeichnis:
 ```env
 # Grafana
 GRAFANA_USER=admin
 GRAFANA_PASSWORD=secure-password
 # PostgreSQL Connection
 POSTGRES_PASSWORD=your-postgres-password
 # Alertmanager SMTP
 SMTP_USERNAME=alerts@yourdomain.com
 SMTP_PASSWORD=smtp-password
 # Webhook URLs
 WEBHOOK_CRITICAL=https://your-webhook-url/critical
 WEBHOOK_SECURITY=https://your-webhook-url/security
 ```
 ### Alert-Konfiguration
 Alerts sind in `prometheus/rules/license-server-alerts.yml` definiert:
 - **HighLicenseValidationErrorRate**: Fehlerrate > 5%
 - **PossibleLicenseAbuse**: Verdächtige Aktivitäten
 - **LicenseServerDown**: Service nicht erreichbar
 - **HighLicenseValidationLatency**: Antwortzeit > 500ms
 - **DatabaseConnectionPoolExhausted**: DB-Verbindungen > 90%
 ### Neue Alerts hinzufügen
 1. Editieren Sie `prometheus/rules/license-server-alerts.yml`
 2. Fügen Sie neue Alert-Regel hinzu:
 ```yaml
 - alert: YourAlertName
  expr: your_prometheus_query > threshold
  for: 5m
  labels:
    severity: warning
    service: your-service
  annotations:
    summary: "Alert summary"
    description: "Detailed description"
 ```
 3. Prometheus neu laden:
 ```bash
 curl -X POST http://localhost:9090/-/reload
 ```
 ## Dashboards
 ### License Server Overview
 Zeigt wichtige Metriken:
 - Aktive Lizenzen
 - Validierungen pro Sekunde
 - Fehlerrate
 - Response Time Percentiles
 - Anomalie-Erkennung
 - Top 10 aktivste Lizenzen
 ### Neue Dashboards erstellen
 1. In Grafana einloggen
 2. Create → Dashboard
 3. Panel hinzufügen
 4. Prometheus-Query eingeben
 5. Dashboard speichern
 6. Export als JSON für Backup
 ## Metriken
 ### License Server Metriken
 - `license_validation_total`: Anzahl der Validierungen
 - `license_validation_duration_seconds`: Validierungs-Dauer
 - `active_licenses_total`: Aktive Lizenzen
 - `anomaly_detections_total`: Erkannte Anomalien
 ### System Metriken
 - `node_cpu_seconds_total`: CPU-Auslastung
 - `node_memory_MemAvailable_bytes`: Verfügbarer Speicher
 - `node_filesystem_avail_bytes`: Verfügbarer Festplattenspeicher
 ### Datenbank Metriken
 - `pg_stat_database_numbackends`: Aktive DB-Verbindungen
 - `pg_stat_database_tup_fetched`: Abgerufene Tupel
 - `pg_stat_database_conflicts`: Konflikte
 ## Troubleshooting
 ### Prometheus erreicht Service nicht
 1. Netzwerk überprüfen:
 ```bash
 docker network inspect v2_internal_net
 ```
 2. Service-Discovery testen:
 ```bash
 docker exec prometheus wget -O- http://license-server:8443/metrics
 ```
 ### Keine Daten in Grafana
 1. Datasource überprüfen:
   - Settings → Data Sources → Prometheus
   - Test Connection
 2. Prometheus Targets checken:
   - http://localhost:9090/targets
   - Alle Targets sollten "UP" sein
 ### Alerts werden nicht gesendet
 1. Alertmanager Logs prüfen:
 ```bash
 docker logs alertmanager
 ```
 2. SMTP-Konfiguration verifizieren
 3. Webhook-URLs testen
 ## Wartung
 ### Backup
 1. Prometheus-Daten:
 ```bash
 docker exec prometheus tar czf /prometheus/backup.tar.gz /prometheus
 docker cp prometheus:/prometheus/backup.tar.gz ./backups/
 ```
 2. Grafana-Dashboards:
   - Export über UI als JSON
   - Speichern in `grafana/dashboards/`
 ### Updates
 1. Images updaten:
 ```bash
 docker-compose -f docker-compose.monitoring.yml pull
 docker-compose -f docker-compose.monitoring.yml up -d
 ```
 2. Konfiguration neu laden:
 ```bash
 # Prometheus
 curl -X POST http://localhost:9090/-/reload
 # Alertmanager
 curl -X POST http://localhost:9093/-/reload
 ```
 ## Performance-Optimierung
 ### Retention anpassen
 In `docker-compose.monitoring.yml`:
 ```yaml
 command:
  - '--storage.tsdb.retention.time=15d'  # Reduzieren für weniger Speicher
 ```
 ### Scrape-Intervalle
 In `prometheus/prometheus.yml`:
 ```yaml
 global:
  scrape_interval: 30s  # Erhöhen für weniger Last
 ```
 ### Resource Limits
 Passen Sie die Limits in `docker-compose.monitoring.yml` an Ihre Umgebung an.
 ## Sicherheit
 1. **Grafana**: Ändern Sie das Standard-Passwort sofort
 2. **Prometheus**: Kein öffentlicher Zugriff (nur intern)
 3. **Alertmanager**: Webhook-URLs geheim halten
 4. **Exporters**: Nur im internen Netzwerk erreichbar
 ## Integration
 ### In CI/CD Pipeline
 ```bash
 # Deployment-Metriken senden
 curl -X POST http://prometheus-pushgateway:9091/metrics/job/deployment \
  -d 'deployment_status{version="1.2.3",environment="production"} 1'
 ```
 ### Custom Metriken
 In Ihrer Anwendung:
 ```python
 from prometheus_client import Counter, Histogram
 custom_metric = Counter('my_custom_total', 'Description')
 custom_metric.inc()
 ```
 ## Support
 Bei Problemen:
 1. Logs überprüfen: `docker-compose -f docker-compose.monitoring.yml logs [service]`
 2. Dokumentation: https://prometheus.io/docs/
 3. Grafana Docs: https://grafana.com/docs/
--- a/monitoring/alertmanager/alertmanager.yml
+++ b/monitoring/alertmanager/alertmanager.yml
@@ -0,0 +1,94 @@
 global:
  resolve_timeout: 5m
  smtp_from: 'alerts@v2-docker.com'
  smtp_smarthost: 'smtp.gmail.com:587'
  smtp_auth_username: '${SMTP_USERNAME}'
  smtp_auth_password: '${SMTP_PASSWORD}'
  smtp_require_tls: true
 # Templates for notifications
 templates:
  - '/etc/alertmanager/templates/*.tmpl'
 # Route tree
 route:
  group_by: ['alertname', 'cluster', 'service']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 12h
  receiver: 'default'
  routes:
    # Critical alerts
    - match:
        severity: critical
      receiver: 'critical'
      continue: true
    # License abuse alerts
    - match:
        alertname: PossibleLicenseAbuse
      receiver: 'security'
      repeat_interval: 1h
    # Database alerts
    - match:
        service: postgres
      receiver: 'database'
    # Infrastructure alerts
    - match_re:
        alertname: ^(HighCPUUsage|HighMemoryUsage|LowDiskSpace)$
      receiver: 'infrastructure'
 # Receivers
 receivers:
  - name: 'default'
    email_configs:
      - to: 'admin@v2-docker.com'
        headers:
          Subject: '[{{ .Status | toUpper }}] {{ .GroupLabels.alertname }}'
        html: |
          <h2>Alert: {{ .GroupLabels.alertname }}</h2>
          <p><b>Status:</b> {{ .Status }}</p>
          {{ range .Alerts }}
          <hr>
          <p><b>Summary:</b> {{ .Annotations.summary }}</p>
          <p><b>Description:</b> {{ .Annotations.description }}</p>
          <p><b>Labels:</b></p>
          <ul>
          {{ range .Labels.SortedPairs }}
            <li><b>{{ .Name }}:</b> {{ .Value }}</li>
          {{ end }}
          </ul>
          {{ end }}
  - name: 'critical'
    email_configs:
      - to: 'critical-alerts@v2-docker.com'
        send_resolved: true
    webhook_configs:
      - url: '${WEBHOOK_CRITICAL}'
        send_resolved: true
  - name: 'security'
    email_configs:
      - to: 'security@v2-docker.com'
    webhook_configs:
      - url: '${WEBHOOK_SECURITY}'
  - name: 'database'
    email_configs:
      - to: 'dba@v2-docker.com'
  - name: 'infrastructure'
    email_configs:
      - to: 'ops@v2-docker.com'
 # Inhibition rules
 inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'instance']
--- a/monitoring/docker-compose.monitoring.yml
+++ b/monitoring/docker-compose.monitoring.yml
@@ -0,0 +1,149 @@
 version: '3.8'
 services:
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    restart: unless-stopped
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - ./prometheus/rules:/etc/prometheus/rules
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=30d'
      - '--web.enable-lifecycle'
    networks:
      - v2_internal_net
    ports:
      - "9090:9090"
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 2g
  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    restart: unless-stopped
    environment:
      - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin}
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}
      - GF_USERS_ALLOW_SIGN_UP=false
      - GF_SERVER_ROOT_URL=https://monitoring.v2-docker.com
      - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning
      - ./grafana/dashboards:/var/lib/grafana/dashboards
    networks:
      - v2_internal_net
    ports:
      - "3000:3000"
    depends_on:
      - prometheus
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 512m
  alertmanager:
    image: prom/alertmanager:latest
    container_name: alertmanager
    restart: unless-stopped
    volumes:
      - ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
      - alertmanager_data:/alertmanager
    command:
      - '--config.file=/etc/alertmanager/alertmanager.yml'
      - '--storage.path=/alertmanager'
    networks:
      - v2_internal_net
    ports:
      - "9093:9093"
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 256m
  # PostgreSQL Exporter
  postgres-exporter:
    image: prometheuscommunity/postgres-exporter:latest
    container_name: postgres-exporter
    restart: unless-stopped
    environment:
      DATA_SOURCE_NAME: "postgresql://postgres:${POSTGRES_PASSWORD}@postgres:5432/v2_adminpanel?sslmode=disable"
    networks:
      - v2_internal_net
    deploy:
      resources:
        limits:
          cpus: '0.25'
          memory: 128m
  # Redis Exporter
  redis-exporter:
    image: oliver006/redis_exporter:latest
    container_name: redis-exporter
    restart: unless-stopped
    environment:
      REDIS_ADDR: "redis://redis:6379"
    networks:
      - v2_internal_net
    deploy:
      resources:
        limits:
          cpus: '0.25'
          memory: 128m
  # Node Exporter (for host metrics)
  node-exporter:
    image: prom/node-exporter:latest
    container_name: node-exporter
    restart: unless-stopped
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.rootfs=/rootfs'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
    networks:
      - v2_internal_net
    deploy:
      resources:
        limits:
          cpus: '0.25'
          memory: 128m
  # Nginx Exporter
  nginx-exporter:
    image: nginx/nginx-prometheus-exporter:latest
    container_name: nginx-exporter
    restart: unless-stopped
    command:
      - '-nginx.scrape-uri=http://nginx-proxy:8080/nginx_status'
    networks:
      - v2_internal_net
    deploy:
      resources:
        limits:
          cpus: '0.25'
          memory: 128m
 networks:
  v2_internal_net:
    external: true
 volumes:
  prometheus_data:
  grafana_data:
  alertmanager_data:
--- a/monitoring/grafana/dashboards/license-server-dashboard.json
+++ b/monitoring/grafana/dashboards/license-server-dashboard.json
@@ -0,0 +1,562 @@
 {
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 1,
  "id": null,
  "links": [],
  "panels": [
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "short"
        }
      },
      "gridPos": {
        "h": 4,
        "w": 6,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "text": {},
        "textMode": "auto"
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "sum(active_licenses_total)",
          "refId": "A"
        }
      ],
      "title": "Active Licenses",
      "type": "stat"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "ops"
        }
      },
      "gridPos": {
        "h": 4,
        "w": 6,
        "x": 6,
        "y": 0
      },
      "id": 2,
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "text": {},
        "textMode": "auto"
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "sum(rate(license_validation_total[5m]))",
          "refId": "A"
        }
      ],
      "title": "Validations/sec",
      "type": "stat"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 0.01
              },
              {
                "color": "red",
                "value": 0.05
              }
            ]
          },
          "unit": "percentunit"
        }
      },
      "gridPos": {
        "h": 4,
        "w": 6,
        "x": 12,
        "y": 0
      },
      "id": 3,
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "text": {},
        "textMode": "auto"
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "sum(rate(license_validation_errors_total[5m])) / sum(rate(license_validation_total[5m]))",
          "refId": "A"
        }
      ],
      "title": "Error Rate",
      "type": "stat"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 200
              },
              {
                "color": "red",
                "value": 500
              }
            ]
          },
          "unit": "ms"
        }
      },
      "gridPos": {
        "h": 4,
        "w": 6,
        "x": 18,
        "y": 0
      },
      "id": 4,
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "text": {},
        "textMode": "auto"
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "histogram_quantile(0.95, sum(rate(license_validation_duration_seconds_bucket[5m])) by (le)) * 1000",
          "refId": "A"
        }
      ],
      "title": "95th Percentile Latency",
      "type": "stat"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "tooltip": false,
              "viz": false,
              "legend": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "reqps"
        }
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 4
      },
      "id": 5,
      "options": {
        "tooltip": {
          "mode": "single"
        },
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        }
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "sum(rate(license_validation_total{result=\"success\"}[5m]))",
          "legendFormat": "Success",
          "refId": "A"
        },
        {
          "expr": "sum(rate(license_validation_total{result=\"invalid\"}[5m]))",
          "legendFormat": "Invalid",
          "refId": "B"
        },
        {
          "expr": "sum(rate(license_validation_total{result=\"expired\"}[5m]))",
          "legendFormat": "Expired",
          "refId": "C"
        }
      ],
      "title": "License Validation Rate",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "tooltip": false,
              "viz": false,
              "legend": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "ms"
        }
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 4
      },
      "id": 6,
      "options": {
        "tooltip": {
          "mode": "single"
        },
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        }
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "histogram_quantile(0.50, sum(rate(license_validation_duration_seconds_bucket[5m])) by (le)) * 1000",
          "legendFormat": "50th percentile",
          "refId": "A"
        },
        {
          "expr": "histogram_quantile(0.95, sum(rate(license_validation_duration_seconds_bucket[5m])) by (le)) * 1000",
          "legendFormat": "95th percentile",
          "refId": "B"
        },
        {
          "expr": "histogram_quantile(0.99, sum(rate(license_validation_duration_seconds_bucket[5m])) by (le)) * 1000",
          "legendFormat": "99th percentile",
          "refId": "C"
        }
      ],
      "title": "Response Time Percentiles",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "tooltip": false,
              "viz": false,
              "legend": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "short"
        }
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 12
      },
      "id": 7,
      "options": {
        "tooltip": {
          "mode": "single"
        },
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        }
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "sum(rate(anomaly_detections_total{severity=\"low\"}[5m]))",
          "legendFormat": "Low",
          "refId": "A"
        },
        {
          "expr": "sum(rate(anomaly_detections_total{severity=\"medium\"}[5m]))",
          "legendFormat": "Medium",
          "refId": "B"
        },
        {
          "expr": "sum(rate(anomaly_detections_total{severity=\"high\"}[5m]))",
          "legendFormat": "High",
          "refId": "C"
        },
        {
          "expr": "sum(rate(anomaly_detections_total{severity=\"critical\"}[5m]))",
          "legendFormat": "Critical",
          "refId": "D"
        }
      ],
      "title": "Anomaly Detection Rate by Severity",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "displayMode": "auto"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          }
        }
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 12
      },
      "id": 8,
      "options": {
        "showHeader": true
      },
      "pluginVersion": "8.0.0",
      "targets": [
        {
          "expr": "topk(10, sum by (license_id) (rate(license_validation_total[1h])))",
          "format": "table",
          "instant": true,
          "refId": "A"
        }
      ],
      "title": "Top 10 Most Active Licenses (Last Hour)",
      "type": "table"
    }
  ],
  "refresh": "10s",
  "schemaVersion": 27,
  "style": "dark",
  "tags": ["license-server", "monitoring"],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-1h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "License Server Overview",
  "uid": "license-server-overview",
  "version": 0
 }
--- a/monitoring/grafana/provisioning/dashboards/dashboards.yml
+++ b/monitoring/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,12 @@
 apiVersion: 1
 providers:
  - name: 'V2 Docker Dashboards'
    orgId: 1
    folder: ''
    type: file
    disableDeletion: false
    updateIntervalSeconds: 10
    allowUiUpdates: true
    options:
      path: /var/lib/grafana/dashboards
--- a/monitoring/grafana/provisioning/datasources/prometheus.yml
+++ b/monitoring/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,13 @@
 apiVersion: 1
 datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
    url: http://prometheus:9090
    isDefault: true
    editable: true
    jsonData:
      timeInterval: 15s
      queryTimeout: 60s
      httpMethod: POST
--- a/monitoring/prometheus/prometheus.yml
+++ b/monitoring/prometheus/prometheus.yml
@@ -0,0 +1,111 @@
 global:
  scrape_interval: 15s
  evaluation_interval: 15s
  external_labels:
    monitor: 'v2-docker-monitor'
    environment: 'production'
 # Alertmanager configuration
 alerting:
  alertmanagers:
    - static_configs:
        - targets:
            - alertmanager:9093
 # Load rules once and periodically evaluate them
 rule_files:
  - '/etc/prometheus/rules/*.yml'
 # Scrape configurations
 scrape_configs:
  # Prometheus itself
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']
        labels:
          service: 'prometheus'
  # License Server metrics
  - job_name: 'license-server'
    metrics_path: '/metrics'
    static_configs:
      - targets: ['license-server:8443']
        labels:
          service: 'license-server'
          component: 'api'
  # Auth Service metrics
  - job_name: 'auth-service'
    metrics_path: '/metrics'
    static_configs:
      - targets: ['auth-service:5001']
        labels:
          service: 'auth-service'
          component: 'authentication'
  # Analytics Service metrics
  - job_name: 'analytics-service'
    metrics_path: '/metrics'
    static_configs:
      - targets: ['analytics-service:5003']
        labels:
          service: 'analytics-service'
          component: 'analytics'
  # Admin API Service metrics
  - job_name: 'admin-api-service'
    metrics_path: '/metrics'
    static_configs:
      - targets: ['admin-api-service:5004']
        labels:
          service: 'admin-api-service'
          component: 'admin'
  # Admin Panel metrics
  - job_name: 'admin-panel'
    metrics_path: '/metrics'
    static_configs:
      - targets: ['admin-panel:5000']
        labels:
          service: 'admin-panel'
          component: 'ui'
  # PostgreSQL Exporter
  - job_name: 'postgres'
    static_configs:
      - targets: ['postgres-exporter:9187']
        labels:
          service: 'postgres'
          component: 'database'
  # Redis Exporter
  - job_name: 'redis'
    static_configs:
      - targets: ['redis-exporter:9121']
        labels:
          service: 'redis'
          component: 'cache'
  # RabbitMQ metrics
  - job_name: 'rabbitmq'
    static_configs:
      - targets: ['rabbitmq:15692']
        labels:
          service: 'rabbitmq'
          component: 'messaging'
  # Node Exporter for host metrics
  - job_name: 'node'
    static_configs:
      - targets: ['node-exporter:9100']
        labels:
          service: 'node-exporter'
          component: 'infrastructure'
  # Nginx metrics
  - job_name: 'nginx'
    static_configs:
      - targets: ['nginx-exporter:9113']
        labels:
          service: 'nginx'
          component: 'proxy'
--- a/monitoring/prometheus/rules/license-server-alerts.yml
+++ b/monitoring/prometheus/rules/license-server-alerts.yml
@@ -0,0 +1,174 @@
 groups:
  - name: license_server_alerts
    interval: 30s
    rules:
      # High error rate
      - alert: HighLicenseValidationErrorRate
        expr: |
          (
            sum(rate(license_validation_errors_total[5m]))
            /
            sum(rate(license_validation_total[5m]))
          ) > 0.05
        for: 5m
        labels:
          severity: warning
          service: license-server
        annotations:
          summary: "High license validation error rate ({{ $value | humanizePercentage }})"
          description: "License validation error rate is above 5% for the last 5 minutes"
      # License abuse detection
      - alert: PossibleLicenseAbuse
        expr: |
          rate(license_validation_total{result="multiple_ips"}[5m]) > 0.1
        for: 10m
        labels:
          severity: critical
          service: license-server
        annotations:
          summary: "Possible license abuse detected"
          description: "High rate of validations from multiple IPs for same license"
      # Service down
      - alert: LicenseServerDown
        expr: up{job="license-server"} == 0
        for: 2m
        labels:
          severity: critical
          service: license-server
        annotations:
          summary: "License server is down"
          description: "License server has been down for more than 2 minutes"
      # High response time
      - alert: HighLicenseValidationLatency
        expr: |
          histogram_quantile(0.95, 
            sum(rate(license_validation_duration_seconds_bucket[5m])) by (le)
          ) > 0.5
        for: 5m
        labels:
          severity: warning
          service: license-server
        annotations:
          summary: "High license validation latency"
          description: "95th percentile latency is above 500ms"
      # Anomaly detection
      - alert: HighAnomalyDetectionRate
        expr: |
          sum(rate(anomaly_detections_total{severity=~"high|critical"}[5m])) > 0.5
        for: 5m
        labels:
          severity: critical
          service: license-server
        annotations:
          summary: "High rate of critical anomalies detected"
          description: "More than 0.5 critical anomalies per second detected"
  - name: database_alerts
    interval: 30s
    rules:
      # Database connection pool exhaustion
      - alert: DatabaseConnectionPoolExhausted
        expr: |
          (
            pg_stat_database_numbackends{datname="v2_adminpanel"}
            /
            pg_settings_max_connections
          ) > 0.9
        for: 5m
        labels:
          severity: critical
          service: postgres
        annotations:
          summary: "Database connection pool nearly exhausted"
          description: "PostgreSQL connection usage is above 90%"
      # Database replication lag
      - alert: DatabaseReplicationLag
        expr: |
          pg_replication_lag_seconds > 10
        for: 5m
        labels:
          severity: warning
          service: postgres
        annotations:
          summary: "Database replication lag detected"
          description: "Replication lag is {{ $value }} seconds"
  - name: infrastructure_alerts
    interval: 30s
    rules:
      # High CPU usage
      - alert: HighCPUUsage
        expr: |
          (
            100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
          ) > 80
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "High CPU usage on {{ $labels.instance }}"
          description: "CPU usage is above 80% for 10 minutes"
      # High memory usage
      - alert: HighMemoryUsage
        expr: |
          (
            1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)
          ) > 0.9
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High memory usage on {{ $labels.instance }}"
          description: "Memory usage is above 90%"
      # Disk space
      - alert: LowDiskSpace
        expr: |
          (
            node_filesystem_avail_bytes{mountpoint="/"}
            /
            node_filesystem_size_bytes{mountpoint="/"}
          ) < 0.1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Low disk space on {{ $labels.instance }}"
          description: "Less than 10% disk space remaining"
  - name: cache_alerts
    interval: 30s
    rules:
      # Redis connection errors
      - alert: RedisConnectionErrors
        expr: |
          rate(redis_connection_errors_total[5m]) > 0.1
        for: 5m
        labels:
          severity: warning
          service: redis
        annotations:
          summary: "Redis connection errors detected"
          description: "Redis connection error rate is {{ $value }} per second"
      # Cache hit rate
      - alert: LowCacheHitRate
        expr: |
          (
            redis_keyspace_hits_total
            /
            (redis_keyspace_hits_total + redis_keyspace_misses_total)
          ) < 0.7
        for: 10m
        labels:
          severity: warning
          service: redis
        annotations:
          summary: "Low Redis cache hit rate"
          description: "Cache hit rate is below 70%"
--- a/v2/docker-compose.yaml
+++ b/v2/docker-compose.yaml
@@ -81,6 +81,30 @@ services:
          cpus: '2'
          memory: 4g
  auth-service:
    build:
      context: ../lizenzserver/services/auth
    container_name: auth-service
    restart: always
    # Port 5001 - nur intern erreichbar
    env_file: .env
    environment:
      TZ: Europe/Berlin
      DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD}@postgres:5432/v2_adminpanel
      REDIS_URL: redis://redis:6379/1
      JWT_SECRET: ${JWT_SECRET}
      FLASK_ENV: production
    depends_on:
      - postgres
      - redis
    networks:
      - internal_net
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 1g
  analytics-service:
    build:
      context: ../v2_lizenzserver/services/analytics
@@ -166,6 +190,7 @@ services:
    depends_on:
      - admin-panel
      - license-server
      - auth-service
      - analytics-service
      - admin-api-service
    networks:
--- a/v2_adminpanel/app.py
+++ b/v2_adminpanel/app.py
@@ -10,12 +10,17 @@ from flask import Flask, render_template, session
 from flask_session import Session
 from werkzeug.middleware.proxy_fix import ProxyFix
 from apscheduler.schedulers.background import BackgroundScheduler
 from prometheus_flask_exporter import PrometheusMetrics
 # Import our configuration and utilities
 import config
 from utils.backup import create_backup
 app = Flask(__name__)
 # Initialize Prometheus metrics
 metrics = PrometheusMetrics(app)
 metrics.info('admin_panel_info', 'Admin Panel Information', version='1.0.0')
 # Load configuration from config module
 app.config['SECRET_KEY'] = config.SECRET_KEY
 app.config['SESSION_TYPE'] = config.SESSION_TYPE
--- a/v2_adminpanel/requirements.txt
+++ b/v2_adminpanel/requirements.txt
@@ -13,3 +13,4 @@ bcrypt
 pyotp
 qrcode[pil]
 PyJWT
 prometheus-flask-exporter
--- a/v2_lizenzserver/app/core/metrics.py
+++ b/v2_lizenzserver/app/core/metrics.py
@@ -0,0 +1,175 @@
 from prometheus_client import Counter, Histogram, Gauge, Info
 from functools import wraps
 import time
 # License validation metrics
 license_validation_total = Counter(
    'license_validation_total',
    'Total number of license validations',
    ['result', 'license_type']
 )
 license_validation_errors_total = Counter(
    'license_validation_errors_total',
    'Total number of license validation errors',
    ['error_type']
 )
 license_validation_duration_seconds = Histogram(
    'license_validation_duration_seconds',
    'License validation duration in seconds',
    buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0]
 )
 # Active licenses gauge
 active_licenses_total = Gauge(
    'active_licenses_total',
    'Total number of active licenses',
    ['license_type']
 )
 # Heartbeat metrics
 license_heartbeat_total = Counter(
    'license_heartbeat_total',
    'Total number of license heartbeats received'
 )
 # Activation metrics
 license_activation_total = Counter(
    'license_activation_total',
    'Total number of license activations',
    ['result']
 )
 # Anomaly detection metrics
 anomaly_detections_total = Counter(
    'anomaly_detections_total',
    'Total number of anomalies detected',
    ['anomaly_type', 'severity']
 )
 # Concurrent sessions gauge
 concurrent_sessions_total = Gauge(
    'concurrent_sessions_total',
    'Total number of concurrent active sessions'
 )
 # Database connection pool metrics
 db_connection_pool_size = Gauge(
    'db_connection_pool_size',
    'Database connection pool size'
 )
 db_connection_pool_used = Gauge(
    'db_connection_pool_used',
    'Database connections currently in use'
 )
 # API client metrics
 api_requests_total = Counter(
    'api_requests_total',
    'Total number of API requests',
    ['method', 'endpoint', 'status']
 )
 api_request_duration_seconds = Histogram(
    'api_request_duration_seconds',
    'API request duration in seconds',
    ['method', 'endpoint'],
    buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
 )
 # Cache metrics
 cache_hits_total = Counter(
    'cache_hits_total',
    'Total number of cache hits',
    ['cache_type']
 )
 cache_misses_total = Counter(
    'cache_misses_total',
    'Total number of cache misses',
    ['cache_type']
 )
 # System info
 system_info = Info(
    'license_server_info',
    'License server information'
 )
 def track_request_metrics(method: str, endpoint: str):
    """Decorator to track API request metrics"""
    def decorator(func):
        @wraps(func)
        async def wrapper(*args, **kwargs):
            start_time = time.time()
            status = "success"
            try:
                result = await func(*args, **kwargs)
                return result
            except Exception as e:
                status = "error"
                raise
            finally:
                duration = time.time() - start_time
                api_requests_total.labels(
                    method=method,
                    endpoint=endpoint,
                    status=status
                ).inc()
                api_request_duration_seconds.labels(
                    method=method,
                    endpoint=endpoint
                ).observe(duration)
        return wrapper
    return decorator
 def track_validation_metrics():
    """Decorator to track license validation metrics"""
    def decorator(func):
        @wraps(func)
        async def wrapper(*args, **kwargs):
            start_time = time.time()
            try:
                result = await func(*args, **kwargs)
                # Extract result type from the validation result
                if result.get('valid'):
                    result_type = 'success'
                elif result.get('error') == 'expired':
                    result_type = 'expired'
                elif result.get('error') == 'invalid':
                    result_type = 'invalid'
                else:
                    result_type = 'error'
                license_type = result.get('license_type', 'unknown')
                license_validation_total.labels(
                    result=result_type,
                    license_type=license_type
                ).inc()
                return result
            except Exception as e:
                license_validation_errors_total.labels(
                    error_type=type(e).__name__
                ).inc()
                raise
            finally:
                duration = time.time() - start_time
                license_validation_duration_seconds.observe(duration)
        return wrapper
    return decorator
 # Initialize system info
 def init_metrics(version: str = "1.0.0"):
    """Initialize system metrics"""
    system_info.info({
        'version': version,
        'service': 'license-server'
    })
--- a/v2_lizenzserver/app/main.py
+++ b/v2_lizenzserver/app/main.py
@@ -1,12 +1,14 @@
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, Response
 import uvicorn
 import logging
 from datetime import datetime
 from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
 from app.api import license, version
 from app.core.config import settings
 from app.core.metrics import init_metrics, track_request_metrics
 from app.db.database import engine, Base
 logging.basicConfig(level=logging.INFO)
@@ -14,6 +16,9 @@ logger = logging.getLogger(__name__)
 Base.metadata.create_all(bind=engine)
 # Initialize metrics
 init_metrics(version="1.0.0")
 app = FastAPI(
    title="License Server API",
    description="API for software license management",
@@ -53,6 +58,11 @@ async def health_check():
        "timestamp": datetime.utcnow().isoformat()
    }
@app.get("/metrics")
 async def metrics():
    """Prometheus metrics endpoint"""
    return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST)
 app.include_router(license.router, prefix="/api/license", tags=["license"])
 app.include_router(version.router, prefix="/api/version", tags=["version"])
--- a/v2_lizenzserver/requirements.txt
+++ b/v2_lizenzserver/requirements.txt
@@ -11,4 +11,5 @@ alembic==1.12.1
 python-dotenv==1.0.0
 httpx==0.25.2
 redis==5.0.1
-packaging==23.2
+packaging==23.2
 prometheus-client==0.19.0
--- a/v2_lizenzserver/services/admin/app.py
+++ b/v2_lizenzserver/services/admin/app.py
@@ -13,6 +13,7 @@ import jwt
 import uuid
 from typing import List, Dict, Optional
 import bcrypt
 from prometheus_flask_exporter import PrometheusMetrics
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -20,6 +21,10 @@ logger = logging.getLogger(__name__)
 app = Flask(__name__)
 CORS(app)
 # Initialize Prometheus metrics
 metrics = PrometheusMetrics(app)
 metrics.info('admin_api_service_info', 'Admin API Service Information', version='1.0.0')
 # Configuration
 DATABASE_URL = os.environ.get('DATABASE_URL', 'postgresql://postgres:postgres@postgres:5432/v2_adminpanel')
 REDIS_URL = os.environ.get('REDIS_URL', 'redis://redis:6379/3')
--- a/v2_lizenzserver/services/admin/requirements.txt
+++ b/v2_lizenzserver/services/admin/requirements.txt
@@ -6,4 +6,5 @@ PyJWT==2.8.0
 bcrypt==4.1.2
 requests==2.31.0
 python-dotenv==1.0.0
-gunicorn==21.2.0
+gunicorn==21.2.0
 prometheus-flask-exporter==0.23.0
--- a/v2_lizenzserver/services/analytics/app.py
+++ b/v2_lizenzserver/services/analytics/app.py
@@ -12,6 +12,7 @@ from functools import wraps
 import jwt
 from collections import defaultdict
 import numpy as np
 from prometheus_flask_exporter import PrometheusMetrics
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -19,6 +20,10 @@ logger = logging.getLogger(__name__)
 app = Flask(__name__)
 CORS(app)
 # Initialize Prometheus metrics
 metrics = PrometheusMetrics(app)
 metrics.info('analytics_service_info', 'Analytics Service Information', version='1.0.0')
 # Configuration
 DATABASE_URL = os.environ.get('DATABASE_URL', 'postgresql://postgres:postgres@postgres:5432/v2_adminpanel')
 REDIS_URL = os.environ.get('REDIS_URL', 'redis://redis:6379/2')
--- a/v2_lizenzserver/services/analytics/requirements.txt
+++ b/v2_lizenzserver/services/analytics/requirements.txt
@@ -6,4 +6,5 @@ PyJWT==2.8.0
 numpy==1.26.2
 requests==2.31.0
 python-dotenv==1.0.0
-gunicorn==21.2.0
+gunicorn==21.2.0
 prometheus-flask-exporter==0.23.0
--- a/v2_nginx/nginx.conf
+++ b/v2_nginx/nginx.conf
@@ -3,6 +3,19 @@ events {
 }
 http {
    # Enable nginx status page for monitoring
    server {
        listen 127.0.0.1:8080;
        server_name localhost;
        location /nginx_status {
            stub_status on;
            access_log off;
            allow 127.0.0.1;
            allow 172.16.0.0/12;  # Docker networks
            deny all;
        }
    }
    # Moderne SSL-Einstellungen für maximale Sicherheit
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384';
@@ -60,6 +73,16 @@ http {
            proxy_set_header Connection "upgrade";
        }
        # Auth Service API (internal only)
        location /api/v1/auth/ {
            proxy_pass http://auth-service:5001/api/v1/auth/;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header Authorization $http_authorization;
        }
        # Analytics Service API (internal only)
        location /api/v1/analytics/ {
            proxy_pass http://analytics-service:5003/api/v1/analytics/;