251 Zeilen
14 KiB
Python
251 Zeilen
14 KiB
Python
"""
|
|
Cookie Consent Handler für Browser-Sessions
|
|
|
|
Behandelt Cookie-Consent-Seiten bei der Session-Wiederherstellung
|
|
"""
|
|
|
|
import logging
|
|
from typing import Optional
|
|
from playwright.sync_api import Page
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CookieConsentHandler:
|
|
"""Behandelt Cookie-Consent-Dialoge verschiedener Plattformen"""
|
|
|
|
@staticmethod
|
|
def handle_instagram_consent(page: Page) -> bool:
|
|
"""
|
|
Behandelt Instagram's Cookie-Consent-Seite
|
|
|
|
Args:
|
|
page: Playwright Page-Objekt
|
|
|
|
Returns:
|
|
bool: True wenn Consent behandelt wurde, False sonst
|
|
"""
|
|
try:
|
|
# Warte kurz auf Seitenladung
|
|
page.wait_for_load_state('networkidle', timeout=5000)
|
|
|
|
# Prüfe ob wir auf der Cookie-Consent-Seite sind
|
|
consent_indicators = [
|
|
# Deutsche Texte
|
|
"text=/.*cookies erlauben.*/i",
|
|
"text=/.*optionale cookies ablehnen.*/i",
|
|
"button:has-text('Optionale Cookies ablehnen')",
|
|
"button:has-text('Nur erforderliche Cookies erlauben')",
|
|
# Englische Texte
|
|
"button:has-text('Decline optional cookies')",
|
|
"button:has-text('Only allow essential cookies')",
|
|
# Allgemeine Selektoren
|
|
"[aria-label*='cookie']",
|
|
"text=/.*verwendung von cookies.*/i"
|
|
]
|
|
|
|
# Versuche "Optionale Cookies ablehnen" zu klicken (datenschutzfreundlich)
|
|
decline_buttons = [
|
|
"button:has-text('Optionale Cookies ablehnen')",
|
|
"button:has-text('Nur erforderliche Cookies erlauben')",
|
|
"button:has-text('Decline optional cookies')",
|
|
"button:has-text('Only allow essential cookies')"
|
|
]
|
|
|
|
for button_selector in decline_buttons:
|
|
try:
|
|
button = page.locator(button_selector).first
|
|
if button.is_visible():
|
|
logger.info(f"Found consent decline button: {button_selector}")
|
|
|
|
# Verwende robuste Click-Methoden für Cookie-Consent
|
|
success = False
|
|
try:
|
|
# Strategie 1: Standard Click
|
|
button.click(timeout=5000)
|
|
success = True
|
|
except Exception as click_error:
|
|
logger.warning(f"Standard click fehlgeschlagen: {click_error}")
|
|
|
|
# Strategie 2: Force Click
|
|
try:
|
|
button.click(force=True, timeout=5000)
|
|
success = True
|
|
except Exception as force_error:
|
|
logger.warning(f"Force click fehlgeschlagen: {force_error}")
|
|
|
|
# Strategie 3: JavaScript Click
|
|
try:
|
|
js_result = page.evaluate(f"""
|
|
() => {{
|
|
const button = document.querySelector('{button_selector}');
|
|
if (button) {{
|
|
button.click();
|
|
return true;
|
|
}}
|
|
return false;
|
|
}}
|
|
""")
|
|
if js_result:
|
|
success = True
|
|
logger.info("JavaScript click erfolgreich für Cookie-Consent")
|
|
except Exception as js_error:
|
|
logger.warning(f"JavaScript click fehlgeschlagen: {js_error}")
|
|
|
|
if success:
|
|
logger.info("Clicked decline optional cookies button")
|
|
|
|
# Warte auf Navigation
|
|
page.wait_for_load_state('networkidle', timeout=5000)
|
|
|
|
# Setze Consent im LocalStorage
|
|
page.evaluate("""
|
|
() => {
|
|
// Instagram Consent Storage für "nur erforderliche Cookies"
|
|
localStorage.setItem('ig_cb', '2'); // 2 = nur erforderliche Cookies
|
|
localStorage.setItem('ig_consent_timestamp', Date.now().toString());
|
|
|
|
// Meta Consent
|
|
localStorage.setItem('consent_status', 'essential_only');
|
|
localStorage.setItem('cookie_consent', JSON.stringify({
|
|
necessary: true,
|
|
analytics: false,
|
|
marketing: false,
|
|
timestamp: Date.now()
|
|
}));
|
|
}
|
|
""")
|
|
|
|
return True
|
|
else:
|
|
logger.error(f"Alle Click-Strategien für Cookie-Consent Button fehlgeschlagen: {button_selector}")
|
|
continue
|
|
except Exception as e:
|
|
logger.debug(f"Consent check failed for {button_selector}: {e}")
|
|
continue
|
|
|
|
# Fallback: Prüfe ob Consent-Seite überhaupt angezeigt wird
|
|
for indicator in consent_indicators:
|
|
try:
|
|
if page.locator(indicator).first.is_visible():
|
|
logger.warning("Cookie consent page detected but couldn't find decline button")
|
|
|
|
# Als letzter Ausweg: Akzeptiere alle Cookies
|
|
accept_buttons = [
|
|
"button:has-text('Alle Cookies erlauben')",
|
|
"button:has-text('Allow all cookies')",
|
|
"button:has-text('Accept all')",
|
|
# Spezifischer Instagram-Selektor basierend auf div-role
|
|
"div[role='button']:has-text('Alle Cookies erlauben')",
|
|
# Fallback mit Partial Text
|
|
"[role='button']:has-text('Cookies erlauben')",
|
|
# XPath als letzter Fallback
|
|
"xpath=//div[@role='button' and contains(text(),'Alle Cookies erlauben')]"
|
|
]
|
|
|
|
for accept_button in accept_buttons:
|
|
try:
|
|
button = page.locator(accept_button).first
|
|
if button.is_visible():
|
|
logger.info(f"Fallback: Accepting all cookies with {accept_button}")
|
|
|
|
# Verwende robuste Click-Methoden
|
|
success = False
|
|
try:
|
|
# Strategie 1: Standard Click
|
|
button.click(timeout=5000)
|
|
success = True
|
|
except Exception as click_error:
|
|
logger.warning(f"Standard click fehlgeschlagen für Accept: {click_error}")
|
|
|
|
# Strategie 2: Force Click
|
|
try:
|
|
button.click(force=True, timeout=5000)
|
|
success = True
|
|
except Exception as force_error:
|
|
logger.warning(f"Force click fehlgeschlagen für Accept: {force_error}")
|
|
|
|
# Strategie 3: JavaScript Click für div[role='button']
|
|
try:
|
|
# Spezielle Behandlung für div-basierte Buttons
|
|
js_result = page.evaluate("""
|
|
(selector) => {
|
|
const elements = document.querySelectorAll(selector);
|
|
for (const elem of elements) {
|
|
if (elem && elem.textContent && elem.textContent.includes('Cookies erlauben')) {
|
|
elem.click();
|
|
return true;
|
|
}
|
|
}
|
|
// Fallback: Suche nach role='button' mit Text
|
|
const roleButtons = document.querySelectorAll('[role="button"]');
|
|
for (const btn of roleButtons) {
|
|
if (btn && btn.textContent && btn.textContent.includes('Cookies erlauben')) {
|
|
btn.click();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
""", "[role='button']")
|
|
|
|
if js_result:
|
|
success = True
|
|
logger.info("JavaScript click erfolgreich für Cookie Accept Button")
|
|
except Exception as js_error:
|
|
logger.warning(f"JavaScript click fehlgeschlagen für Accept: {js_error}")
|
|
|
|
if success:
|
|
page.wait_for_load_state('networkidle', timeout=5000)
|
|
|
|
# Setze Consent im LocalStorage für "alle Cookies"
|
|
page.evaluate("""
|
|
() => {
|
|
// Instagram Consent Storage für "alle Cookies"
|
|
localStorage.setItem('ig_cb', '1'); // 1 = alle Cookies akzeptiert
|
|
localStorage.setItem('ig_consent_timestamp', Date.now().toString());
|
|
|
|
// Meta Consent
|
|
localStorage.setItem('consent_status', 'all_accepted');
|
|
localStorage.setItem('cookie_consent', JSON.stringify({
|
|
necessary: true,
|
|
analytics: true,
|
|
marketing: true,
|
|
timestamp: Date.now()
|
|
}));
|
|
}
|
|
""")
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Fehler bei Accept-Button {accept_button}: {e}")
|
|
continue
|
|
|
|
return False
|
|
except:
|
|
continue
|
|
|
|
logger.debug("No cookie consent page detected")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error handling cookie consent: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def check_and_handle_consent(page: Page, platform: str = "instagram") -> bool:
|
|
"""
|
|
Prüft und behandelt Cookie-Consent für die angegebene Plattform
|
|
|
|
Args:
|
|
page: Playwright Page-Objekt
|
|
platform: Plattform-Name (default: "instagram")
|
|
|
|
Returns:
|
|
bool: True wenn Consent behandelt wurde, False sonst
|
|
"""
|
|
if platform.lower() == "instagram":
|
|
return CookieConsentHandler.handle_instagram_consent(page)
|
|
else:
|
|
logger.warning(f"No consent handler implemented for platform: {platform}")
|
|
return False |