services: nginx: rate limit crawlers via nginx instead for forgejo
All checks were successful
flake / build (push) Successful in 3m2s
All checks were successful
flake / build (push) Successful in 3m2s
.. as these requests are actually expensive, for other services it's fine. Signed-off-by: Christoph Heiss <christoph@c8h4.io>
This commit is contained in:
parent
4a3fc75793
commit
0cdf73b58c
2 changed files with 28 additions and 49 deletions
|
@ -146,7 +146,18 @@ in {
|
|||
};
|
||||
};
|
||||
|
||||
services.nginx.virtualHosts.${fqdn} =
|
||||
services.nginx = {
|
||||
appendHttpConfig = ''
|
||||
map $http_user_agent $git_bad_crawlers {
|
||||
default "";
|
||||
~*(?<name>Amazonbot|Bytespider|meta-externalagent|ClaudeBot|YandexBot) $name;
|
||||
}
|
||||
|
||||
# heavily limit these crawlers to 1 request per minute, since requests
|
||||
# on git repositories are quite heavy in comparison
|
||||
limit_req_zone $git_bad_crawlers zone=gitbadcrawlers:32m rate=1r/m;
|
||||
'';
|
||||
virtualHosts.${fqdn} =
|
||||
let inherit (config.services.forgejo.settings.server) HTTP_ADDR HTTP_PORT;
|
||||
in {
|
||||
forceSSL = true;
|
||||
|
@ -156,10 +167,13 @@ in {
|
|||
proxyPass = "http://[${HTTP_ADDR}]:${toString HTTP_PORT}";
|
||||
proxyWebsockets = true;
|
||||
extraConfig = ''
|
||||
limit_req_status 429;
|
||||
limit_req zone=gitbadcrawlers burst=2 nodelay;
|
||||
client_max_body_size 256M;
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
users.groups.${cfg.group} = { };
|
||||
users.users.${cfg.user} = {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
recommendedTlsSettings = true;
|
||||
recommendedZstdSettings = true;
|
||||
clientMaxBodySize = lib.mkDefault "16M";
|
||||
appendHttpConfig = ''
|
||||
commonHttpConfig = ''
|
||||
# avoid hitting the disk
|
||||
proxy_max_temp_file_size 0;
|
||||
'';
|
||||
|
@ -48,39 +48,4 @@
|
|||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
};
|
||||
|
||||
services.fail2ban.jails = lib.mkIf config.services.fail2ban.enable {
|
||||
apache-badbots.settings = {
|
||||
enabled = true;
|
||||
backend = "pyinotify";
|
||||
port = "http,https";
|
||||
filter = "apache-badbots";
|
||||
logpath = "/var/log/nginx/access.log";
|
||||
maxretry = 1;
|
||||
bantime = "72h";
|
||||
};
|
||||
disrespectful-crawlers = {
|
||||
filter = {
|
||||
Definition = {
|
||||
badcrawlers =
|
||||
".*(Amazonbot|Bytespider|meta-externalagent|ClaudeBot).*";
|
||||
failregex =
|
||||
''^<HOST> -.*"(GET|POST|HEAD).*HTTP.*"(?:%(badcrawlers)s)"$'';
|
||||
ignoreregex = "";
|
||||
datepattern = ''
|
||||
^[^\[]*\[({DATE})
|
||||
{^LN-BEG}'';
|
||||
};
|
||||
};
|
||||
settings = {
|
||||
enabled = true;
|
||||
backend = "pyinotify";
|
||||
port = "http,https";
|
||||
filter = "disrespectful-crawlers";
|
||||
logpath = "/var/log/nginx/access.log";
|
||||
maxretry = 1;
|
||||
bantime = "72h";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue