# robots.txt - AegisSight UG
# Crawling allgemein erlaubt, ausser API/interne Pfade
# Keine Trainingsdaten-Verwendung durch AI-Crawler (Training-Bots geblockt)
# Live-Search-AI-Bots (OAI-SearchBot, ChatGPT-User, ClaudeBot, PerplexityBot) sind erlaubt

User-agent: *
Allow: /
Disallow: /api/
Disallow: /_archiv/
Disallow: /insights/

# Sitemap
Sitemap: https://aegis-sight.de/sitemap.xml

# ----------------------------------------------------------------------
# AI-Training-Crawler -- BLOCKED (kein Training auf unseren Inhalten)
# ----------------------------------------------------------------------
User-agent: GPTBot
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: Omgilibot
Disallow: /

# ----------------------------------------------------------------------
# AI-Live-Search-Crawler -- ALLOWED (Sichtbarkeit in KI-Antworten)
# OAI-SearchBot, ChatGPT-User, ClaudeBot, PerplexityBot werden NICHT
# blockiert. Sie crawlen fuer Live-Antworten, nicht fuer Training.
# ----------------------------------------------------------------------

# ----------------------------------------------------------------------
# Archiv-Bots
# ----------------------------------------------------------------------
User-agent: ia_archiver
Disallow: /

User-agent: archive.org_bot
Disallow: /

# ----------------------------------------------------------------------
# SEO-/Spam-Crawler
# ----------------------------------------------------------------------
User-agent: AhrefsBot
Disallow: /

User-agent: SemrushBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: SEOkicks-Robot
Disallow: /

User-agent: MauiBot
Disallow: /

User-agent: Majestic-12
Disallow: /

User-agent: BLEXBot
Disallow: /

User-agent: SerendeputyBot
Disallow: /

# ----------------------------------------------------------------------
# Download-Manager
# ----------------------------------------------------------------------
User-agent: HTTrack
Disallow: /

User-agent: SiteSnagger
Disallow: /

User-agent: WebCopier
Disallow: /
