# robots.txt - AegisSight UG # Crawling allgemein erlaubt, ausser API/interne Pfade # Keine Trainingsdaten-Verwendung durch AI-Crawler (Training-Bots geblockt) # Live-Search-AI-Bots (OAI-SearchBot, ChatGPT-User, ClaudeBot, PerplexityBot) sind erlaubt User-agent: * Allow: / Disallow: /api/ Disallow: /_archiv/ Disallow: /insights/ # Sitemap Sitemap: https://aegis-sight.de/sitemap.xml # ---------------------------------------------------------------------- # AI-Training-Crawler -- BLOCKED (kein Training auf unseren Inhalten) # ---------------------------------------------------------------------- User-agent: GPTBot Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / User-agent: Google-Extended Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: Bytespider Disallow: / User-agent: cohere-ai Disallow: / User-agent: FacebookBot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Diffbot Disallow: / User-agent: Omgilibot Disallow: / # ---------------------------------------------------------------------- # AI-Live-Search-Crawler -- ALLOWED (Sichtbarkeit in KI-Antworten) # OAI-SearchBot, ChatGPT-User, ClaudeBot, PerplexityBot werden NICHT # blockiert. Sie crawlen fuer Live-Antworten, nicht fuer Training. # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- # Archiv-Bots # ---------------------------------------------------------------------- User-agent: ia_archiver Disallow: / User-agent: archive.org_bot Disallow: / # ---------------------------------------------------------------------- # SEO-/Spam-Crawler # ---------------------------------------------------------------------- User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SEOkicks-Robot Disallow: / User-agent: MauiBot Disallow: / User-agent: Majestic-12 Disallow: / User-agent: BLEXBot Disallow: / User-agent: SerendeputyBot Disallow: / # ---------------------------------------------------------------------- # Download-Manager # ---------------------------------------------------------------------- User-agent: HTTrack Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebCopier Disallow: /