static: add proper robots.txt

Signed-off-by: Christoph Heiss <christoph@c8h4.io>
This commit is contained in:
Christoph Heiss 2024-01-25 12:26:50 +01:00
parent 0040b1fc0b
commit b032e9590a
Signed by: c8h4
GPG key ID: 1538094429952F86
2 changed files with 77 additions and 1 deletions

View file

@ -3,7 +3,7 @@ baseURL: https://c8h4.io/
languageCode: en-us
title: Christoph Heiss
theme: hacker
enableRobotsTXT: true
enableRobotsTXT: false
markup:
highlight:

76
static/robots.txt Normal file
View file

@ -0,0 +1,76 @@
# Based on https://git.sr.ht/~sircmpwn/sr.ht-nginx/tree/master/item/robots.txt
# All credit for collecting to Drew, the sourcehut crew and its contributers!
# Too aggressive, marketing/SEO
User-agent: SemrushBot
Disallow: /
# Too aggressive, marketing/SEO
User-agent: SemrushBot-SA
Disallow: /
# Marketing/SEO
User-agent: AhrefsBot
Disallow: /
# Marketing/SEO
User-agent: dotbot
Disallow: /
# Marketing/SEO
User-agent: rogerbot
Disallow: /
User-agent: BLEXBot
Disallow: /
# Huwei something or another, badly behaved
User-agent: AspiegelBot
Disallow: /
# Marketing/SEO
User-agent: ZoominfoBot
Disallow: /
# YandexBot is a dickhead, too aggressive
User-agent: Yandex
Disallow: /
# Marketing/SEO
User-agent: MJ12bot
Disallow: /
# Marketing/SEO
User-agent: DataForSeoBot
Disallow: /
# Used for Alexa, I guess, who cares
User-agent: Amazonbot
Disallow: /
# No
User-agent: turnitinbot
Disallow: /
User-agent: Turnitin
Disallow: /
# Does not respect * directives
User-agent: Seekport Crawler
Disallow: /
# No thanks
User-agent: GPTBot
Disallow: /
# Fairly certain that this is an LLM data vacuum
User-agent: ClaudeBot
Disallow: /
# Same
User-agent: Google-Extended
Disallow: /
# Marketing
User-agent: serpstatbot
Disallow: /