c8h4.io/static/robots.txt

# Primarily based on https://git.sr.ht/~sircmpwn/sr.ht-nginx/tree/master/item/robots.txt
# All credit for collecting to Drew, the sourcehut crew and its contributers!

# Too aggressive, marketing/SEO
User-agent: SemrushBot
Disallow: /

# Too aggressive, marketing/SEO
User-agent: SemrushBot-SA
Disallow: /

# Marketing/SEO
User-agent: AhrefsBot
Disallow: /

# Marketing/SEO
User-agent: dotbot
Disallow: /

# Marketing/SEO
User-agent: rogerbot
Disallow: /

User-agent: BLEXBot
Disallow: /

# Huwei something or another, badly behaved
User-agent: AspiegelBot
Disallow: /

# Marketing/SEO
User-agent: ZoominfoBot
Disallow: /

# YandexBot is a dickhead, too aggressive
User-agent: Yandex
Disallow: /

# Marketing/SEO
User-agent: MJ12bot
Disallow: /

# Marketing/SEO
User-agent: DataForSeoBot
Disallow: /

# Used for Alexa, I guess, who cares
User-agent: Amazonbot
Disallow: /

# No
User-agent: turnitinbot
Disallow: /

User-agent: Turnitin
Disallow: /

# Does not respect * directives
User-agent: Seekport Crawler
Disallow: /

# No thanks
User-agent: GPTBot
Disallow: /

# Fairly certain that this is an LLM data vacuum
User-agent: ClaudeBot
Disallow: /

# Same
User-agent: Google-Extended
Disallow: /

# Marketing
User-agent: serpstatbot
Disallow: /

#
# Thanks for the additional list, you AI shills!
# https://github.com/samber/the-great-gpt-firewall
#

# ChatGPT plugins
User-agent: ChatGPT-User
Disallow: /

# Common Crawl, used by e.g. OpenAI .. blargh
User-agent: CCBot
Disallow: /

# Claude
User-agent: anthropic-ai
Disallow: /

# Many thanks to
# https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/
# for the next few!
User-agent: Omgilibot
Disallow: /

User-agent: Omgili
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: Bytespider
Disallow: /