""" Rate Limit Repository - Persistierung von Rate Limit Events und Policies """ import json import sqlite3 from typing import List, Optional, Dict, Any from datetime import datetime, timedelta from collections import defaultdict from infrastructure.repositories.base_repository import BaseRepository from domain.entities.rate_limit_policy import RateLimitPolicy from domain.value_objects.action_timing import ActionTiming, ActionType class RateLimitRepository(BaseRepository): """Repository für Rate Limit Daten""" def save_timing(self, timing: ActionTiming) -> None: """Speichert ein Action Timing Event""" query = """ INSERT INTO rate_limit_events ( timestamp, action_type, duration_ms, success, response_code, session_id, url, element_selector, error_message, retry_count, metadata ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ params = ( timing.timestamp, timing.action_type.value, int(timing.duration_ms), timing.success, timing.metadata.get('response_code') if timing.metadata else None, timing.metadata.get('session_id') if timing.metadata else None, timing.url, timing.element_selector, timing.error_message, timing.retry_count, self._serialize_json(timing.metadata) if timing.metadata else None ) self._execute_insert(query, params) def get_recent_timings(self, action_type: Optional[ActionType] = None, hours: int = 1) -> List[ActionTiming]: """Holt kürzliche Timing-Events""" query = """ SELECT * FROM rate_limit_events WHERE timestamp > datetime('now', '-' || ? || ' hours') """ params = [hours] if action_type: query += " AND action_type = ?" params.append(action_type.value) query += " ORDER BY timestamp DESC" rows = self._execute_query(query, tuple(params)) timings = [] for row in rows: timing = ActionTiming( action_type=ActionType(row['action_type']), timestamp=self._parse_datetime(row['timestamp']), duration=row['duration_ms'] / 1000.0, success=bool(row['success']), url=row['url'], element_selector=row['element_selector'], error_message=row['error_message'], retry_count=row['retry_count'], metadata=self._deserialize_json(row['metadata']) ) timings.append(timing) return timings def save_policy(self, action_type: ActionType, policy: RateLimitPolicy) -> None: """Speichert oder aktualisiert eine Rate Limit Policy""" query = """ INSERT OR REPLACE INTO rate_limit_policies ( action_type, min_delay, max_delay, adaptive, backoff_multiplier, max_retries, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?) """ params = ( action_type.value, policy.min_delay, policy.max_delay, policy.adaptive, policy.backoff_multiplier, policy.max_retries, datetime.now() ) self._execute_insert(query, params) def get_policy(self, action_type: ActionType) -> Optional[RateLimitPolicy]: """Holt eine Rate Limit Policy""" query = "SELECT * FROM rate_limit_policies WHERE action_type = ?" rows = self._execute_query(query, (action_type.value,)) if not rows: return None row = rows[0] return RateLimitPolicy( min_delay=row['min_delay'], max_delay=row['max_delay'], adaptive=bool(row['adaptive']), backoff_multiplier=row['backoff_multiplier'], max_retries=row['max_retries'] ) def get_all_policies(self) -> Dict[ActionType, RateLimitPolicy]: """Holt alle gespeicherten Policies""" query = "SELECT * FROM rate_limit_policies" rows = self._execute_query(query) policies = {} for row in rows: try: action_type = ActionType(row['action_type']) policy = RateLimitPolicy( min_delay=row['min_delay'], max_delay=row['max_delay'], adaptive=bool(row['adaptive']), backoff_multiplier=row['backoff_multiplier'], max_retries=row['max_retries'] ) policies[action_type] = policy except ValueError: # Unbekannter ActionType pass return policies def get_statistics(self, action_type: Optional[ActionType] = None, timeframe: Optional[timedelta] = None) -> Dict[str, Any]: """Berechnet Statistiken über Rate Limiting""" query = """ SELECT action_type, COUNT(*) as total_actions, AVG(duration_ms) as avg_duration_ms, MIN(duration_ms) as min_duration_ms, MAX(duration_ms) as max_duration_ms, SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful_actions, SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failed_actions, AVG(retry_count) as avg_retry_count, MAX(retry_count) as max_retry_count FROM rate_limit_events WHERE 1=1 """ params = [] if timeframe: query += " AND timestamp > datetime('now', '-' || ? || ' seconds')" params.append(int(timeframe.total_seconds())) if action_type: query += " AND action_type = ?" params.append(action_type.value) query += " GROUP BY action_type" else: query += " GROUP BY action_type" rows = self._execute_query(query, tuple(params)) if action_type and rows: # Einzelne Action Type Statistik row = rows[0] return { 'action_type': row['action_type'], 'total_actions': row['total_actions'], 'avg_duration_ms': row['avg_duration_ms'], 'min_duration_ms': row['min_duration_ms'], 'max_duration_ms': row['max_duration_ms'], 'success_rate': row['successful_actions'] / row['total_actions'] if row['total_actions'] > 0 else 0, 'failed_actions': row['failed_actions'], 'avg_retry_count': row['avg_retry_count'], 'max_retry_count': row['max_retry_count'] } else: # Statistiken für alle Action Types stats = {} for row in rows: stats[row['action_type']] = { 'total_actions': row['total_actions'], 'avg_duration_ms': row['avg_duration_ms'], 'min_duration_ms': row['min_duration_ms'], 'max_duration_ms': row['max_duration_ms'], 'success_rate': row['successful_actions'] / row['total_actions'] if row['total_actions'] > 0 else 0, 'failed_actions': row['failed_actions'], 'avg_retry_count': row['avg_retry_count'], 'max_retry_count': row['max_retry_count'] } return stats def detect_anomalies(self, action_type: ActionType, threshold_multiplier: float = 2.0) -> List[Dict[str, Any]]: """Erkennt Anomalien in den Timing-Daten""" # Berechne Durchschnitt und Standardabweichung query = """ SELECT AVG(duration_ms) as avg_duration, AVG(duration_ms * duration_ms) - AVG(duration_ms) * AVG(duration_ms) as variance FROM rate_limit_events WHERE action_type = ? AND timestamp > datetime('now', '-1 hour') AND success = 1 """ row = self._execute_query(query, (action_type.value,))[0] if not row['avg_duration']: return [] avg_duration = row['avg_duration'] std_dev = (row['variance'] ** 0.5) if row['variance'] > 0 else 0 threshold = avg_duration + (std_dev * threshold_multiplier) # Finde Anomalien query = """ SELECT * FROM rate_limit_events WHERE action_type = ? AND timestamp > datetime('now', '-1 hour') AND duration_ms > ? ORDER BY duration_ms DESC LIMIT 10 """ rows = self._execute_query(query, (action_type.value, threshold)) anomalies = [] for row in rows: anomalies.append({ 'timestamp': row['timestamp'], 'duration_ms': row['duration_ms'], 'deviation': (row['duration_ms'] - avg_duration) / std_dev if std_dev > 0 else 0, 'success': bool(row['success']), 'url': row['url'], 'error_message': row['error_message'] }) return anomalies def cleanup_old_events(self, older_than: datetime) -> int: """Bereinigt alte Rate Limit Events""" query = "DELETE FROM rate_limit_events WHERE timestamp < ?" return self._execute_delete(query, (older_than,))