Initial commit
Dieser Commit ist enthalten in:
306
infrastructure/repositories/analytics_repository.py
Normale Datei
306
infrastructure/repositories/analytics_repository.py
Normale Datei
@ -0,0 +1,306 @@
|
||||
"""
|
||||
Analytics Repository - Persistierung von Analytics und Events
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from typing import List, Optional, Dict, Any, Union
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
|
||||
from infrastructure.repositories.base_repository import BaseRepository
|
||||
from domain.entities.account_creation_event import AccountCreationEvent, WorkflowStep
|
||||
from domain.entities.error_event import ErrorEvent, ErrorType
|
||||
from domain.value_objects.error_summary import ErrorSummary
|
||||
|
||||
|
||||
class AnalyticsRepository(BaseRepository):
|
||||
"""Repository für Analytics Events und Reporting"""
|
||||
|
||||
def save_account_creation_event(self, event: AccountCreationEvent) -> None:
|
||||
"""Speichert ein Account Creation Event"""
|
||||
query = """
|
||||
INSERT INTO account_creation_analytics (
|
||||
event_id, timestamp, account_id, session_id, fingerprint_id,
|
||||
duration_seconds, success, error_type, error_message,
|
||||
workflow_steps, metadata, total_retry_count, network_requests,
|
||||
screenshots_taken, proxy_used, proxy_type, browser_type,
|
||||
headless, success_rate
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
# Serialisiere komplexe Daten
|
||||
workflow_steps_json = self._serialize_json([
|
||||
step.to_dict() for step in event.steps_completed
|
||||
])
|
||||
|
||||
metadata = {
|
||||
'platform': event.account_data.platform if event.account_data else None,
|
||||
'username': event.account_data.username if event.account_data else None,
|
||||
'email': event.account_data.email if event.account_data else None,
|
||||
'additional': event.account_data.metadata if event.account_data else {}
|
||||
}
|
||||
|
||||
params = (
|
||||
event.event_id,
|
||||
event.timestamp,
|
||||
event.account_data.username if event.account_data else None,
|
||||
event.session_id,
|
||||
event.fingerprint_id,
|
||||
event.duration.total_seconds() if event.duration else 0,
|
||||
event.success,
|
||||
event.error_details.error_type if event.error_details else None,
|
||||
event.error_details.error_message if event.error_details else None,
|
||||
workflow_steps_json,
|
||||
self._serialize_json(metadata),
|
||||
event.total_retry_count,
|
||||
event.network_requests,
|
||||
event.screenshots_taken,
|
||||
event.proxy_used,
|
||||
event.proxy_type,
|
||||
event.browser_type,
|
||||
event.headless,
|
||||
event.get_success_rate()
|
||||
)
|
||||
|
||||
self._execute_insert(query, params)
|
||||
|
||||
def save_error_event(self, event: ErrorEvent) -> None:
|
||||
"""Speichert ein Error Event"""
|
||||
query = """
|
||||
INSERT INTO error_events (
|
||||
error_id, timestamp, error_type, error_message, stack_trace,
|
||||
context, recovery_attempted, recovery_successful, recovery_attempts,
|
||||
severity, platform, session_id, account_id, correlation_id,
|
||||
user_impact, system_impact, data_loss
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
context_json = self._serialize_json({
|
||||
'url': event.context.url,
|
||||
'action': event.context.action,
|
||||
'step_name': event.context.step_name,
|
||||
'screenshot_path': event.context.screenshot_path,
|
||||
'additional_data': event.context.additional_data
|
||||
})
|
||||
|
||||
recovery_attempts_json = self._serialize_json([
|
||||
{
|
||||
'strategy': attempt.strategy,
|
||||
'timestamp': attempt.timestamp.isoformat(),
|
||||
'successful': attempt.successful,
|
||||
'error_message': attempt.error_message,
|
||||
'duration_seconds': attempt.duration_seconds
|
||||
}
|
||||
for attempt in event.recovery_attempts
|
||||
])
|
||||
|
||||
params = (
|
||||
event.error_id,
|
||||
event.timestamp,
|
||||
event.error_type.value,
|
||||
event.error_message,
|
||||
event.stack_trace,
|
||||
context_json,
|
||||
event.recovery_attempted,
|
||||
event.recovery_successful,
|
||||
recovery_attempts_json,
|
||||
event.severity.value,
|
||||
event.platform,
|
||||
event.session_id,
|
||||
event.account_id,
|
||||
event.correlation_id,
|
||||
event.user_impact,
|
||||
event.system_impact,
|
||||
event.data_loss
|
||||
)
|
||||
|
||||
self._execute_insert(query, params)
|
||||
|
||||
def get_success_rate(self, timeframe: Optional[timedelta] = None,
|
||||
platform: Optional[str] = None) -> float:
|
||||
"""Berechnet die Erfolgsrate"""
|
||||
query = """
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful
|
||||
FROM account_creation_analytics
|
||||
WHERE 1=1
|
||||
"""
|
||||
params = []
|
||||
|
||||
if timeframe:
|
||||
query += " AND timestamp > datetime('now', '-' || ? || ' seconds')"
|
||||
params.append(int(timeframe.total_seconds()))
|
||||
|
||||
if platform:
|
||||
query += " AND json_extract(metadata, '$.platform') = ?"
|
||||
params.append(platform)
|
||||
|
||||
row = self._execute_query(query, tuple(params))[0]
|
||||
|
||||
if row['total'] > 0:
|
||||
return row['successful'] / row['total']
|
||||
return 0.0
|
||||
|
||||
def get_common_errors(self, limit: int = 10,
|
||||
timeframe: Optional[timedelta] = None) -> List[ErrorSummary]:
|
||||
"""Holt die häufigsten Fehler"""
|
||||
query = """
|
||||
SELECT
|
||||
error_type,
|
||||
COUNT(*) as error_count,
|
||||
MIN(timestamp) as first_occurrence,
|
||||
MAX(timestamp) as last_occurrence,
|
||||
AVG(CASE WHEN recovery_successful = 1 THEN 1.0 ELSE 0.0 END) as recovery_rate,
|
||||
GROUP_CONCAT(DISTINCT session_id) as sessions,
|
||||
GROUP_CONCAT(DISTINCT account_id) as accounts,
|
||||
SUM(user_impact) as total_user_impact,
|
||||
SUM(system_impact) as total_system_impact,
|
||||
SUM(data_loss) as data_loss_incidents
|
||||
FROM error_events
|
||||
WHERE 1=1
|
||||
"""
|
||||
params = []
|
||||
|
||||
if timeframe:
|
||||
query += " AND timestamp > datetime('now', '-' || ? || ' seconds')"
|
||||
params.append(int(timeframe.total_seconds()))
|
||||
|
||||
query += " GROUP BY error_type ORDER BY error_count DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
rows = self._execute_query(query, tuple(params))
|
||||
|
||||
summaries = []
|
||||
for row in rows:
|
||||
# Hole zusätzliche Details für diesen Fehlertyp
|
||||
detail_query = """
|
||||
SELECT
|
||||
json_extract(context, '$.url') as url,
|
||||
json_extract(context, '$.action') as action,
|
||||
json_extract(context, '$.step_name') as step,
|
||||
COUNT(*) as count
|
||||
FROM error_events
|
||||
WHERE error_type = ?
|
||||
GROUP BY url, action, step
|
||||
ORDER BY count DESC
|
||||
LIMIT 5
|
||||
"""
|
||||
details = self._execute_query(detail_query, (row['error_type'],))
|
||||
|
||||
urls = []
|
||||
actions = []
|
||||
steps = []
|
||||
|
||||
for detail in details:
|
||||
if detail['url']:
|
||||
urls.append(detail['url'])
|
||||
if detail['action']:
|
||||
actions.append(detail['action'])
|
||||
if detail['step']:
|
||||
steps.append(detail['step'])
|
||||
|
||||
summary = ErrorSummary(
|
||||
error_type=row['error_type'],
|
||||
error_count=row['error_count'],
|
||||
first_occurrence=self._parse_datetime(row['first_occurrence']),
|
||||
last_occurrence=self._parse_datetime(row['last_occurrence']),
|
||||
affected_sessions=row['sessions'].split(',') if row['sessions'] else [],
|
||||
affected_accounts=row['accounts'].split(',') if row['accounts'] else [],
|
||||
avg_recovery_time=0.0, # TODO: Berechnen aus recovery_attempts
|
||||
recovery_success_rate=row['recovery_rate'] or 0.0,
|
||||
most_common_urls=urls,
|
||||
most_common_actions=actions,
|
||||
most_common_steps=steps,
|
||||
total_user_impact=row['total_user_impact'] or 0,
|
||||
total_system_impact=row['total_system_impact'] or 0,
|
||||
data_loss_incidents=row['data_loss_incidents'] or 0
|
||||
)
|
||||
|
||||
summaries.append(summary)
|
||||
|
||||
return summaries
|
||||
|
||||
def get_timeline_data(self, metric: str, hours: int = 24,
|
||||
platform: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||
"""Holt Timeline-Daten für Graphen"""
|
||||
# Erstelle stündliche Buckets
|
||||
query = """
|
||||
SELECT
|
||||
strftime('%Y-%m-%d %H:00:00', timestamp) as hour,
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
|
||||
AVG(duration_seconds) as avg_duration
|
||||
FROM account_creation_analytics
|
||||
WHERE timestamp > datetime('now', '-' || ? || ' hours')
|
||||
"""
|
||||
params = [hours]
|
||||
|
||||
if platform:
|
||||
query += " AND json_extract(metadata, '$.platform') = ?"
|
||||
params.append(platform)
|
||||
|
||||
query += " GROUP BY hour ORDER BY hour"
|
||||
|
||||
rows = self._execute_query(query, tuple(params))
|
||||
|
||||
timeline = []
|
||||
for row in rows:
|
||||
data = {
|
||||
'timestamp': row['hour'],
|
||||
'total': row['total'],
|
||||
'successful': row['successful'],
|
||||
'success_rate': row['successful'] / row['total'] if row['total'] > 0 else 0,
|
||||
'avg_duration': row['avg_duration']
|
||||
}
|
||||
timeline.append(data)
|
||||
|
||||
return timeline
|
||||
|
||||
def get_platform_stats(self, timeframe: Optional[timedelta] = None) -> Dict[str, Dict[str, Any]]:
|
||||
"""Holt Statistiken pro Plattform"""
|
||||
query = """
|
||||
SELECT
|
||||
json_extract(metadata, '$.platform') as platform,
|
||||
COUNT(*) as total_attempts,
|
||||
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
|
||||
AVG(duration_seconds) as avg_duration,
|
||||
AVG(total_retry_count) as avg_retries
|
||||
FROM account_creation_analytics
|
||||
WHERE json_extract(metadata, '$.platform') IS NOT NULL
|
||||
"""
|
||||
params = []
|
||||
|
||||
if timeframe:
|
||||
query += " AND timestamp > datetime('now', '-' || ? || ' seconds')"
|
||||
params.append(int(timeframe.total_seconds()))
|
||||
|
||||
query += " GROUP BY platform"
|
||||
|
||||
rows = self._execute_query(query, tuple(params))
|
||||
|
||||
stats = {}
|
||||
for row in rows:
|
||||
stats[row['platform']] = {
|
||||
'total_attempts': row['total_attempts'],
|
||||
'successful_accounts': row['successful'],
|
||||
'failed_attempts': row['total_attempts'] - row['successful'],
|
||||
'success_rate': row['successful'] / row['total_attempts'] if row['total_attempts'] > 0 else 0,
|
||||
'avg_duration_seconds': row['avg_duration'],
|
||||
'avg_retries': row['avg_retries']
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def cleanup_old_events(self, older_than: datetime) -> int:
|
||||
"""Bereinigt alte Events"""
|
||||
count1 = self._execute_delete(
|
||||
"DELETE FROM account_creation_analytics WHERE timestamp < ?",
|
||||
(older_than,)
|
||||
)
|
||||
count2 = self._execute_delete(
|
||||
"DELETE FROM error_events WHERE timestamp < ?",
|
||||
(older_than,)
|
||||
)
|
||||
return count1 + count2
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren