""" Analytics Repository - Persistierung von Analytics und Events """ import json import sqlite3 from typing import List, Optional, Dict, Any, Union from datetime import datetime, timedelta from collections import defaultdict from infrastructure.repositories.base_repository import BaseRepository from domain.entities.account_creation_event import AccountCreationEvent, WorkflowStep from domain.entities.error_event import ErrorEvent, ErrorType from domain.value_objects.error_summary import ErrorSummary class AnalyticsRepository(BaseRepository): """Repository für Analytics Events und Reporting""" def save_account_creation_event(self, event: AccountCreationEvent) -> None: """Speichert ein Account Creation Event""" query = """ INSERT INTO account_creation_analytics ( event_id, timestamp, account_id, session_id, fingerprint_id, duration_seconds, success, error_type, error_message, workflow_steps, metadata, total_retry_count, network_requests, screenshots_taken, proxy_used, proxy_type, browser_type, headless, success_rate ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ # Serialisiere komplexe Daten workflow_steps_json = self._serialize_json([ step.to_dict() for step in event.steps_completed ]) metadata = { 'platform': event.account_data.platform if event.account_data else None, 'username': event.account_data.username if event.account_data else None, 'email': event.account_data.email if event.account_data else None, 'additional': event.account_data.metadata if event.account_data else {} } params = ( event.event_id, event.timestamp, event.account_data.username if event.account_data else None, event.session_id, event.fingerprint_id, event.duration.total_seconds() if event.duration else 0, event.success, event.error_details.error_type if event.error_details else None, event.error_details.error_message if event.error_details else None, workflow_steps_json, self._serialize_json(metadata), event.total_retry_count, event.network_requests, event.screenshots_taken, event.proxy_used, event.proxy_type, event.browser_type, event.headless, event.get_success_rate() ) self._execute_insert(query, params) def save_error_event(self, event: ErrorEvent) -> None: """Speichert ein Error Event""" query = """ INSERT INTO error_events ( error_id, timestamp, error_type, error_message, stack_trace, context, recovery_attempted, recovery_successful, recovery_attempts, severity, platform, session_id, account_id, correlation_id, user_impact, system_impact, data_loss ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ context_json = self._serialize_json({ 'url': event.context.url, 'action': event.context.action, 'step_name': event.context.step_name, 'screenshot_path': event.context.screenshot_path, 'additional_data': event.context.additional_data }) recovery_attempts_json = self._serialize_json([ { 'strategy': attempt.strategy, 'timestamp': attempt.timestamp.isoformat(), 'successful': attempt.successful, 'error_message': attempt.error_message, 'duration_seconds': attempt.duration_seconds } for attempt in event.recovery_attempts ]) params = ( event.error_id, event.timestamp, event.error_type.value, event.error_message, event.stack_trace, context_json, event.recovery_attempted, event.recovery_successful, recovery_attempts_json, event.severity.value, event.platform, event.session_id, event.account_id, event.correlation_id, event.user_impact, event.system_impact, event.data_loss ) self._execute_insert(query, params) def get_success_rate(self, timeframe: Optional[timedelta] = None, platform: Optional[str] = None) -> float: """Berechnet die Erfolgsrate""" query = """ SELECT COUNT(*) as total, SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful FROM account_creation_analytics WHERE 1=1 """ params = [] if timeframe: query += " AND timestamp > datetime('now', '-' || ? || ' seconds')" params.append(int(timeframe.total_seconds())) if platform: query += " AND json_extract(metadata, '$.platform') = ?" params.append(platform) row = self._execute_query(query, tuple(params))[0] if row['total'] > 0: return row['successful'] / row['total'] return 0.0 def get_common_errors(self, limit: int = 10, timeframe: Optional[timedelta] = None) -> List[ErrorSummary]: """Holt die häufigsten Fehler""" query = """ SELECT error_type, COUNT(*) as error_count, MIN(timestamp) as first_occurrence, MAX(timestamp) as last_occurrence, AVG(CASE WHEN recovery_successful = 1 THEN 1.0 ELSE 0.0 END) as recovery_rate, GROUP_CONCAT(DISTINCT session_id) as sessions, GROUP_CONCAT(DISTINCT account_id) as accounts, SUM(user_impact) as total_user_impact, SUM(system_impact) as total_system_impact, SUM(data_loss) as data_loss_incidents FROM error_events WHERE 1=1 """ params = [] if timeframe: query += " AND timestamp > datetime('now', '-' || ? || ' seconds')" params.append(int(timeframe.total_seconds())) query += " GROUP BY error_type ORDER BY error_count DESC LIMIT ?" params.append(limit) rows = self._execute_query(query, tuple(params)) summaries = [] for row in rows: # Hole zusätzliche Details für diesen Fehlertyp detail_query = """ SELECT json_extract(context, '$.url') as url, json_extract(context, '$.action') as action, json_extract(context, '$.step_name') as step, COUNT(*) as count FROM error_events WHERE error_type = ? GROUP BY url, action, step ORDER BY count DESC LIMIT 5 """ details = self._execute_query(detail_query, (row['error_type'],)) urls = [] actions = [] steps = [] for detail in details: if detail['url']: urls.append(detail['url']) if detail['action']: actions.append(detail['action']) if detail['step']: steps.append(detail['step']) summary = ErrorSummary( error_type=row['error_type'], error_count=row['error_count'], first_occurrence=self._parse_datetime(row['first_occurrence']), last_occurrence=self._parse_datetime(row['last_occurrence']), affected_sessions=row['sessions'].split(',') if row['sessions'] else [], affected_accounts=row['accounts'].split(',') if row['accounts'] else [], avg_recovery_time=0.0, # TODO: Berechnen aus recovery_attempts recovery_success_rate=row['recovery_rate'] or 0.0, most_common_urls=urls, most_common_actions=actions, most_common_steps=steps, total_user_impact=row['total_user_impact'] or 0, total_system_impact=row['total_system_impact'] or 0, data_loss_incidents=row['data_loss_incidents'] or 0 ) summaries.append(summary) return summaries def get_timeline_data(self, metric: str, hours: int = 24, platform: Optional[str] = None) -> List[Dict[str, Any]]: """Holt Timeline-Daten für Graphen""" # Erstelle stündliche Buckets query = """ SELECT strftime('%Y-%m-%d %H:00:00', timestamp) as hour, COUNT(*) as total, SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful, AVG(duration_seconds) as avg_duration FROM account_creation_analytics WHERE timestamp > datetime('now', '-' || ? || ' hours') """ params = [hours] if platform: query += " AND json_extract(metadata, '$.platform') = ?" params.append(platform) query += " GROUP BY hour ORDER BY hour" rows = self._execute_query(query, tuple(params)) timeline = [] for row in rows: data = { 'timestamp': row['hour'], 'total': row['total'], 'successful': row['successful'], 'success_rate': row['successful'] / row['total'] if row['total'] > 0 else 0, 'avg_duration': row['avg_duration'] } timeline.append(data) return timeline def get_platform_stats(self, timeframe: Optional[timedelta] = None) -> Dict[str, Dict[str, Any]]: """Holt Statistiken pro Plattform""" query = """ SELECT json_extract(metadata, '$.platform') as platform, COUNT(*) as total_attempts, SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful, AVG(duration_seconds) as avg_duration, AVG(total_retry_count) as avg_retries FROM account_creation_analytics WHERE json_extract(metadata, '$.platform') IS NOT NULL """ params = [] if timeframe: query += " AND timestamp > datetime('now', '-' || ? || ' seconds')" params.append(int(timeframe.total_seconds())) query += " GROUP BY platform" rows = self._execute_query(query, tuple(params)) stats = {} for row in rows: stats[row['platform']] = { 'total_attempts': row['total_attempts'], 'successful_accounts': row['successful'], 'failed_attempts': row['total_attempts'] - row['successful'], 'success_rate': row['successful'] / row['total_attempts'] if row['total_attempts'] > 0 else 0, 'avg_duration_seconds': row['avg_duration'], 'avg_retries': row['avg_retries'] } return stats def cleanup_old_events(self, older_than: datetime) -> int: """Bereinigt alte Events""" count1 = self._execute_delete( "DELETE FROM account_creation_analytics WHERE timestamp < ?", (older_than,) ) count2 = self._execute_delete( "DELETE FROM error_events WHERE timestamp < ?", (older_than,) ) return count1 + count2