Zwischenstand - ohne Prometheus
Dieser Commit ist enthalten in:
19
monitoring/alertmanager/Dockerfile
Normale Datei
19
monitoring/alertmanager/Dockerfile
Normale Datei
@@ -0,0 +1,19 @@
|
||||
FROM prom/alertmanager:v0.25.0
|
||||
|
||||
# Copy configuration
|
||||
COPY alertmanager.yml /etc/alertmanager/alertmanager.yml
|
||||
|
||||
# Create templates directory
|
||||
USER root
|
||||
RUN mkdir -p /etc/alertmanager/templates && \
|
||||
chown -R nobody:nobody /etc/alertmanager
|
||||
|
||||
USER nobody
|
||||
|
||||
# Expose Alertmanager port
|
||||
EXPOSE 9093
|
||||
|
||||
# Start Alertmanager
|
||||
CMD ["--config.file=/etc/alertmanager/alertmanager.yml", \
|
||||
"--storage.path=/alertmanager", \
|
||||
"--web.external-url=http://localhost:9093"]
|
||||
28
monitoring/grafana/Dockerfile
Normale Datei
28
monitoring/grafana/Dockerfile
Normale Datei
@@ -0,0 +1,28 @@
|
||||
FROM grafana/grafana:10.0.0
|
||||
|
||||
# Environment variables
|
||||
ENV GF_SECURITY_ADMIN_USER=admin
|
||||
ENV GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
ENV GF_USERS_ALLOW_SIGN_UP=false
|
||||
ENV GF_SERVER_ROOT_URL=http://localhost:3000
|
||||
ENV GF_ANALYTICS_REPORTING_ENABLED=false
|
||||
ENV GF_ANALYTICS_CHECK_FOR_UPDATES=false
|
||||
|
||||
# Copy provisioning configuration
|
||||
COPY provisioning/ /etc/grafana/provisioning/
|
||||
|
||||
# Copy dashboards
|
||||
COPY dashboards/ /var/lib/grafana/dashboards/
|
||||
|
||||
# Install plugins
|
||||
RUN grafana-cli plugins install grafana-piechart-panel && \
|
||||
grafana-cli plugins install grafana-worldmap-panel
|
||||
|
||||
# Set permissions (Grafana runs as UID 472)
|
||||
USER root
|
||||
RUN chown -R 472:0 /var/lib/grafana/dashboards && \
|
||||
chmod -R 755 /var/lib/grafana/dashboards
|
||||
USER 472
|
||||
|
||||
# Expose Grafana port
|
||||
EXPOSE 3000
|
||||
486
monitoring/grafana/dashboards/license-server-overview.json
Normale Datei
486
monitoring/grafana/dashboards/license-server-overview.json
Normale Datei
@@ -0,0 +1,486 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(license_validations_total[5m])",
|
||||
"legendFormat": "Total Validations",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(license_validations_success_total[5m])",
|
||||
"legendFormat": "Successful",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(license_validations_failed_total[5m])",
|
||||
"legendFormat": "Failed",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "License Validation Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": ""
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "active_licenses_total",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Active Licenses",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": ""
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "anomalies_unresolved_total",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Unresolved Anomalies",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{service=\"license-server\"}[5m]))",
|
||||
"legendFormat": "p50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"license-server\"}[5m]))",
|
||||
"legendFormat": "p95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{service=\"license-server\"}[5m]))",
|
||||
"legendFormat": "p99",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "API Response Time Percentiles",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(anomalies_detected_total[5m]) by (anomaly_type)",
|
||||
"legendFormat": "{{ anomaly_type }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Anomaly Detection Rate by Type",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["license-server", "monitoring"],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "License Server Overview",
|
||||
"uid": "license-server-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
522
monitoring/grafana/dashboards/system-overview.json
Normale Datei
522
monitoring/grafana/dashboards/system-overview.json
Normale Datei
@@ -0,0 +1,522 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 70
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 85
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
|
||||
"legendFormat": "CPU Usage %",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "System CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes",
|
||||
"legendFormat": "Used Memory",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "node_memory_MemAvailable_bytes",
|
||||
"legendFormat": "Available Memory",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "System Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"index": 1,
|
||||
"text": "Down"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"index": 0,
|
||||
"text": "Up"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": ""
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"repeat": "service",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "up{job=~\"license-server|admin-panel|auth-service|analytics-service|admin-api-service|postgres|redis|rabbitmq|nginx\"}",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"legendFormat": "{{ job }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Service Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\"})",
|
||||
"legendFormat": "Disk Usage %",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "binBps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "mean"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(node_network_receive_bytes_total[5m])",
|
||||
"legendFormat": "Receive",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(node_network_transmit_bytes_total[5m])",
|
||||
"legendFormat": "Transmit",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Network Traffic",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["system", "overview"],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "System Overview",
|
||||
"uid": "system-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
23
monitoring/prometheus/Dockerfile
Normale Datei
23
monitoring/prometheus/Dockerfile
Normale Datei
@@ -0,0 +1,23 @@
|
||||
FROM prom/prometheus:v2.45.0
|
||||
|
||||
# Copy configuration files
|
||||
COPY prometheus.yml /etc/prometheus/prometheus.yml
|
||||
COPY alert_rules.yml /etc/prometheus/rules/alert_rules.yml
|
||||
|
||||
# Create directories for rules
|
||||
USER root
|
||||
RUN mkdir -p /etc/prometheus/rules && \
|
||||
chown -R nobody:nobody /etc/prometheus
|
||||
|
||||
USER nobody
|
||||
|
||||
# Expose Prometheus port
|
||||
EXPOSE 9090
|
||||
|
||||
# Start Prometheus with configuration
|
||||
CMD ["--config.file=/etc/prometheus/prometheus.yml", \
|
||||
"--storage.tsdb.path=/prometheus", \
|
||||
"--web.console.libraries=/usr/share/prometheus/console_libraries", \
|
||||
"--web.console.templates=/usr/share/prometheus/consoles", \
|
||||
"--web.enable-lifecycle", \
|
||||
"--storage.tsdb.retention.time=30d"]
|
||||
208
monitoring/prometheus/alert_rules.yml
Normale Datei
208
monitoring/prometheus/alert_rules.yml
Normale Datei
@@ -0,0 +1,208 @@
|
||||
groups:
|
||||
- name: service_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# Service Down Alerts
|
||||
- alert: ServiceDown
|
||||
expr: up == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: "{{ $labels.service }}"
|
||||
annotations:
|
||||
summary: "Service {{ $labels.service }} is down"
|
||||
description: "{{ $labels.service }} has been down for more than 2 minutes."
|
||||
|
||||
# High CPU Usage
|
||||
- alert: HighCPUUsage
|
||||
expr: rate(process_cpu_seconds_total[5m]) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage on {{ $labels.service }}"
|
||||
description: "CPU usage is above 80% for more than 5 minutes on {{ $labels.service }}."
|
||||
|
||||
# High Memory Usage
|
||||
- alert: HighMemoryUsage
|
||||
expr: (process_resident_memory_bytes / 1024 / 1024) > 1000
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage on {{ $labels.service }}"
|
||||
description: "Memory usage is above 1GB for more than 5 minutes on {{ $labels.service }}."
|
||||
|
||||
- name: license_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# High License Validation Failure Rate
|
||||
- alert: HighLicenseValidationFailureRate
|
||||
expr: rate(license_validations_failed_total[5m]) / rate(license_validations_total[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: license-server
|
||||
annotations:
|
||||
summary: "High license validation failure rate"
|
||||
description: "More than 10% of license validations are failing."
|
||||
|
||||
# No License Validations
|
||||
- alert: NoLicenseValidations
|
||||
expr: rate(license_validations_total[10m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
service: license-server
|
||||
annotations:
|
||||
summary: "No license validations occurring"
|
||||
description: "No license validations have been processed in the last 10 minutes."
|
||||
|
||||
# High Anomaly Detection Rate
|
||||
- alert: HighAnomalyRate
|
||||
expr: rate(anomalies_detected_total[5m]) > 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
service: license-server
|
||||
annotations:
|
||||
summary: "High anomaly detection rate"
|
||||
description: "More than 10 anomalies detected per minute."
|
||||
|
||||
- name: database_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# PostgreSQL Down
|
||||
- alert: PostgreSQLDown
|
||||
expr: pg_up == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: postgres
|
||||
annotations:
|
||||
summary: "PostgreSQL is down"
|
||||
description: "PostgreSQL has been down for more than 1 minute."
|
||||
|
||||
# High Database Connections
|
||||
- alert: HighDatabaseConnections
|
||||
expr: pg_stat_database_numbackends{datname="v2_adminpanel"} > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: postgres
|
||||
annotations:
|
||||
summary: "High number of database connections"
|
||||
description: "More than 80 active connections to v2_adminpanel database."
|
||||
|
||||
# Slow Queries
|
||||
- alert: SlowQueries
|
||||
expr: rate(pg_stat_statements_mean_time_seconds[5m]) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: postgres
|
||||
annotations:
|
||||
summary: "Slow database queries detected"
|
||||
description: "Average query time is above 1 second."
|
||||
|
||||
- name: redis_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# Redis Down
|
||||
- alert: RedisDown
|
||||
expr: redis_up == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: redis
|
||||
annotations:
|
||||
summary: "Redis is down"
|
||||
description: "Redis has been down for more than 1 minute."
|
||||
|
||||
# High Redis Memory Usage
|
||||
- alert: HighRedisMemory
|
||||
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: redis
|
||||
annotations:
|
||||
summary: "High Redis memory usage"
|
||||
description: "Redis memory usage is above 90% of max memory."
|
||||
|
||||
# Low Cache Hit Rate
|
||||
- alert: LowCacheHitRate
|
||||
expr: redis_keyspace_hits_total / (redis_keyspace_hits_total + redis_keyspace_misses_total) < 0.8
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
service: redis
|
||||
annotations:
|
||||
summary: "Low Redis cache hit rate"
|
||||
description: "Redis cache hit rate is below 80%."
|
||||
|
||||
- name: api_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# High API Response Time
|
||||
- alert: HighAPIResponseTime
|
||||
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High API response time on {{ $labels.service }}"
|
||||
description: "95th percentile response time is above 2 seconds on {{ $labels.service }}."
|
||||
|
||||
# High Error Rate
|
||||
- alert: HighErrorRate
|
||||
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "High error rate on {{ $labels.service }}"
|
||||
description: "Error rate is above 5% on {{ $labels.service }}."
|
||||
|
||||
# Rate Limit Exceeded
|
||||
- alert: RateLimitExceeded
|
||||
expr: rate(rate_limit_exceeded_total[5m]) > 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Rate limits being exceeded frequently"
|
||||
description: "Rate limits are being exceeded more than 10 times per minute."
|
||||
|
||||
- name: infrastructure_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# High Disk Usage
|
||||
- alert: HighDiskUsage
|
||||
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 20
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Low disk space"
|
||||
description: "Disk space is below 20% on root filesystem."
|
||||
|
||||
# High Load Average
|
||||
- alert: HighLoadAverage
|
||||
expr: node_load5 > 4
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High system load"
|
||||
description: "5-minute load average is above 4."
|
||||
|
||||
# Certificate Expiry
|
||||
- alert: CertificateExpiringSoon
|
||||
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 7
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "SSL certificate expiring soon"
|
||||
description: "SSL certificate will expire in less than 7 days."
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren