From b032e9590a0c721979d28e1ab5e1407cd04a4466 Mon Sep 17 00:00:00 2001 From: Christoph Heiss Date: Thu, 25 Jan 2024 12:26:50 +0100 Subject: [PATCH] static: add proper `robots.txt` Signed-off-by: Christoph Heiss --- config.yml | 2 +- static/robots.txt | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 static/robots.txt diff --git a/config.yml b/config.yml index 088ff68..eb70cb9 100644 --- a/config.yml +++ b/config.yml @@ -3,7 +3,7 @@ baseURL: https://c8h4.io/ languageCode: en-us title: Christoph Heiss theme: hacker -enableRobotsTXT: true +enableRobotsTXT: false markup: highlight: diff --git a/static/robots.txt b/static/robots.txt new file mode 100644 index 0000000..7cae39e --- /dev/null +++ b/static/robots.txt @@ -0,0 +1,76 @@ +# Based on https://git.sr.ht/~sircmpwn/sr.ht-nginx/tree/master/item/robots.txt +# All credit for collecting to Drew, the sourcehut crew and its contributers! + +# Too aggressive, marketing/SEO +User-agent: SemrushBot +Disallow: / + +# Too aggressive, marketing/SEO +User-agent: SemrushBot-SA +Disallow: / + +# Marketing/SEO +User-agent: AhrefsBot +Disallow: / + +# Marketing/SEO +User-agent: dotbot +Disallow: / + +# Marketing/SEO +User-agent: rogerbot +Disallow: / + +User-agent: BLEXBot +Disallow: / + +# Huwei something or another, badly behaved +User-agent: AspiegelBot +Disallow: / + +# Marketing/SEO +User-agent: ZoominfoBot +Disallow: / + +# YandexBot is a dickhead, too aggressive +User-agent: Yandex +Disallow: / + +# Marketing/SEO +User-agent: MJ12bot +Disallow: / + +# Marketing/SEO +User-agent: DataForSeoBot +Disallow: / + +# Used for Alexa, I guess, who cares +User-agent: Amazonbot +Disallow: / + +# No +User-agent: turnitinbot +Disallow: / + +User-agent: Turnitin +Disallow: / + +# Does not respect * directives +User-agent: Seekport Crawler +Disallow: / + +# No thanks +User-agent: GPTBot +Disallow: / + +# Fairly certain that this is an LLM data vacuum +User-agent: ClaudeBot +Disallow: / + +# Same +User-agent: Google-Extended +Disallow: / + +# Marketing +User-agent: serpstatbot +Disallow: /