306 Zeilen
12 KiB
Python
306 Zeilen
12 KiB
Python
"""
|
|
Analytics Repository - Persistierung von Analytics und Events
|
|
"""
|
|
|
|
import json
|
|
import sqlite3
|
|
from typing import List, Optional, Dict, Any, Union
|
|
from datetime import datetime, timedelta
|
|
from collections import defaultdict
|
|
|
|
from infrastructure.repositories.base_repository import BaseRepository
|
|
from domain.entities.account_creation_event import AccountCreationEvent, WorkflowStep
|
|
from domain.entities.error_event import ErrorEvent, ErrorType
|
|
from domain.value_objects.error_summary import ErrorSummary
|
|
|
|
|
|
class AnalyticsRepository(BaseRepository):
|
|
"""Repository für Analytics Events und Reporting"""
|
|
|
|
def save_account_creation_event(self, event: AccountCreationEvent) -> None:
|
|
"""Speichert ein Account Creation Event"""
|
|
query = """
|
|
INSERT INTO account_creation_analytics (
|
|
event_id, timestamp, account_id, session_id, fingerprint_id,
|
|
duration_seconds, success, error_type, error_message,
|
|
workflow_steps, metadata, total_retry_count, network_requests,
|
|
screenshots_taken, proxy_used, proxy_type, browser_type,
|
|
headless, success_rate
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
"""
|
|
|
|
# Serialisiere komplexe Daten
|
|
workflow_steps_json = self._serialize_json([
|
|
step.to_dict() for step in event.steps_completed
|
|
])
|
|
|
|
metadata = {
|
|
'platform': event.account_data.platform if event.account_data else None,
|
|
'username': event.account_data.username if event.account_data else None,
|
|
'email': event.account_data.email if event.account_data else None,
|
|
'additional': event.account_data.metadata if event.account_data else {}
|
|
}
|
|
|
|
params = (
|
|
event.event_id,
|
|
event.timestamp,
|
|
event.account_data.username if event.account_data else None,
|
|
event.session_id,
|
|
event.fingerprint_id,
|
|
event.duration.total_seconds() if event.duration else 0,
|
|
event.success,
|
|
event.error_details.error_type if event.error_details else None,
|
|
event.error_details.error_message if event.error_details else None,
|
|
workflow_steps_json,
|
|
self._serialize_json(metadata),
|
|
event.total_retry_count,
|
|
event.network_requests,
|
|
event.screenshots_taken,
|
|
event.proxy_used,
|
|
event.proxy_type,
|
|
event.browser_type,
|
|
event.headless,
|
|
event.get_success_rate()
|
|
)
|
|
|
|
self._execute_insert(query, params)
|
|
|
|
def save_error_event(self, event: ErrorEvent) -> None:
|
|
"""Speichert ein Error Event"""
|
|
query = """
|
|
INSERT INTO error_events (
|
|
error_id, timestamp, error_type, error_message, stack_trace,
|
|
context, recovery_attempted, recovery_successful, recovery_attempts,
|
|
severity, platform, session_id, account_id, correlation_id,
|
|
user_impact, system_impact, data_loss
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
"""
|
|
|
|
context_json = self._serialize_json({
|
|
'url': event.context.url,
|
|
'action': event.context.action,
|
|
'step_name': event.context.step_name,
|
|
'screenshot_path': event.context.screenshot_path,
|
|
'additional_data': event.context.additional_data
|
|
})
|
|
|
|
recovery_attempts_json = self._serialize_json([
|
|
{
|
|
'strategy': attempt.strategy,
|
|
'timestamp': attempt.timestamp.isoformat(),
|
|
'successful': attempt.successful,
|
|
'error_message': attempt.error_message,
|
|
'duration_seconds': attempt.duration_seconds
|
|
}
|
|
for attempt in event.recovery_attempts
|
|
])
|
|
|
|
params = (
|
|
event.error_id,
|
|
event.timestamp,
|
|
event.error_type.value,
|
|
event.error_message,
|
|
event.stack_trace,
|
|
context_json,
|
|
event.recovery_attempted,
|
|
event.recovery_successful,
|
|
recovery_attempts_json,
|
|
event.severity.value,
|
|
event.platform,
|
|
event.session_id,
|
|
event.account_id,
|
|
event.correlation_id,
|
|
event.user_impact,
|
|
event.system_impact,
|
|
event.data_loss
|
|
)
|
|
|
|
self._execute_insert(query, params)
|
|
|
|
def get_success_rate(self, timeframe: Optional[timedelta] = None,
|
|
platform: Optional[str] = None) -> float:
|
|
"""Berechnet die Erfolgsrate"""
|
|
query = """
|
|
SELECT
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful
|
|
FROM account_creation_analytics
|
|
WHERE 1=1
|
|
"""
|
|
params = []
|
|
|
|
if timeframe:
|
|
query += " AND timestamp > datetime('now', '-' || ? || ' seconds')"
|
|
params.append(int(timeframe.total_seconds()))
|
|
|
|
if platform:
|
|
query += " AND json_extract(metadata, '$.platform') = ?"
|
|
params.append(platform)
|
|
|
|
row = self._execute_query(query, tuple(params))[0]
|
|
|
|
if row['total'] > 0:
|
|
return row['successful'] / row['total']
|
|
return 0.0
|
|
|
|
def get_common_errors(self, limit: int = 10,
|
|
timeframe: Optional[timedelta] = None) -> List[ErrorSummary]:
|
|
"""Holt die häufigsten Fehler"""
|
|
query = """
|
|
SELECT
|
|
error_type,
|
|
COUNT(*) as error_count,
|
|
MIN(timestamp) as first_occurrence,
|
|
MAX(timestamp) as last_occurrence,
|
|
AVG(CASE WHEN recovery_successful = 1 THEN 1.0 ELSE 0.0 END) as recovery_rate,
|
|
GROUP_CONCAT(DISTINCT session_id) as sessions,
|
|
GROUP_CONCAT(DISTINCT account_id) as accounts,
|
|
SUM(user_impact) as total_user_impact,
|
|
SUM(system_impact) as total_system_impact,
|
|
SUM(data_loss) as data_loss_incidents
|
|
FROM error_events
|
|
WHERE 1=1
|
|
"""
|
|
params = []
|
|
|
|
if timeframe:
|
|
query += " AND timestamp > datetime('now', '-' || ? || ' seconds')"
|
|
params.append(int(timeframe.total_seconds()))
|
|
|
|
query += " GROUP BY error_type ORDER BY error_count DESC LIMIT ?"
|
|
params.append(limit)
|
|
|
|
rows = self._execute_query(query, tuple(params))
|
|
|
|
summaries = []
|
|
for row in rows:
|
|
# Hole zusätzliche Details für diesen Fehlertyp
|
|
detail_query = """
|
|
SELECT
|
|
json_extract(context, '$.url') as url,
|
|
json_extract(context, '$.action') as action,
|
|
json_extract(context, '$.step_name') as step,
|
|
COUNT(*) as count
|
|
FROM error_events
|
|
WHERE error_type = ?
|
|
GROUP BY url, action, step
|
|
ORDER BY count DESC
|
|
LIMIT 5
|
|
"""
|
|
details = self._execute_query(detail_query, (row['error_type'],))
|
|
|
|
urls = []
|
|
actions = []
|
|
steps = []
|
|
|
|
for detail in details:
|
|
if detail['url']:
|
|
urls.append(detail['url'])
|
|
if detail['action']:
|
|
actions.append(detail['action'])
|
|
if detail['step']:
|
|
steps.append(detail['step'])
|
|
|
|
summary = ErrorSummary(
|
|
error_type=row['error_type'],
|
|
error_count=row['error_count'],
|
|
first_occurrence=self._parse_datetime(row['first_occurrence']),
|
|
last_occurrence=self._parse_datetime(row['last_occurrence']),
|
|
affected_sessions=row['sessions'].split(',') if row['sessions'] else [],
|
|
affected_accounts=row['accounts'].split(',') if row['accounts'] else [],
|
|
avg_recovery_time=0.0, # TODO: Berechnen aus recovery_attempts
|
|
recovery_success_rate=row['recovery_rate'] or 0.0,
|
|
most_common_urls=urls,
|
|
most_common_actions=actions,
|
|
most_common_steps=steps,
|
|
total_user_impact=row['total_user_impact'] or 0,
|
|
total_system_impact=row['total_system_impact'] or 0,
|
|
data_loss_incidents=row['data_loss_incidents'] or 0
|
|
)
|
|
|
|
summaries.append(summary)
|
|
|
|
return summaries
|
|
|
|
def get_timeline_data(self, metric: str, hours: int = 24,
|
|
platform: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
"""Holt Timeline-Daten für Graphen"""
|
|
# Erstelle stündliche Buckets
|
|
query = """
|
|
SELECT
|
|
strftime('%Y-%m-%d %H:00:00', timestamp) as hour,
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
|
|
AVG(duration_seconds) as avg_duration
|
|
FROM account_creation_analytics
|
|
WHERE timestamp > datetime('now', '-' || ? || ' hours')
|
|
"""
|
|
params = [hours]
|
|
|
|
if platform:
|
|
query += " AND json_extract(metadata, '$.platform') = ?"
|
|
params.append(platform)
|
|
|
|
query += " GROUP BY hour ORDER BY hour"
|
|
|
|
rows = self._execute_query(query, tuple(params))
|
|
|
|
timeline = []
|
|
for row in rows:
|
|
data = {
|
|
'timestamp': row['hour'],
|
|
'total': row['total'],
|
|
'successful': row['successful'],
|
|
'success_rate': row['successful'] / row['total'] if row['total'] > 0 else 0,
|
|
'avg_duration': row['avg_duration']
|
|
}
|
|
timeline.append(data)
|
|
|
|
return timeline
|
|
|
|
def get_platform_stats(self, timeframe: Optional[timedelta] = None) -> Dict[str, Dict[str, Any]]:
|
|
"""Holt Statistiken pro Plattform"""
|
|
query = """
|
|
SELECT
|
|
json_extract(metadata, '$.platform') as platform,
|
|
COUNT(*) as total_attempts,
|
|
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
|
|
AVG(duration_seconds) as avg_duration,
|
|
AVG(total_retry_count) as avg_retries
|
|
FROM account_creation_analytics
|
|
WHERE json_extract(metadata, '$.platform') IS NOT NULL
|
|
"""
|
|
params = []
|
|
|
|
if timeframe:
|
|
query += " AND timestamp > datetime('now', '-' || ? || ' seconds')"
|
|
params.append(int(timeframe.total_seconds()))
|
|
|
|
query += " GROUP BY platform"
|
|
|
|
rows = self._execute_query(query, tuple(params))
|
|
|
|
stats = {}
|
|
for row in rows:
|
|
stats[row['platform']] = {
|
|
'total_attempts': row['total_attempts'],
|
|
'successful_accounts': row['successful'],
|
|
'failed_attempts': row['total_attempts'] - row['successful'],
|
|
'success_rate': row['successful'] / row['total_attempts'] if row['total_attempts'] > 0 else 0,
|
|
'avg_duration_seconds': row['avg_duration'],
|
|
'avg_retries': row['avg_retries']
|
|
}
|
|
|
|
return stats
|
|
|
|
def cleanup_old_events(self, older_than: datetime) -> int:
|
|
"""Bereinigt alte Events"""
|
|
count1 = self._execute_delete(
|
|
"DELETE FROM account_creation_analytics WHERE timestamp < ?",
|
|
(older_than,)
|
|
)
|
|
count2 = self._execute_delete(
|
|
"DELETE FROM error_events WHERE timestamp < ?",
|
|
(older_than,)
|
|
)
|
|
return count1 + count2 |