Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 2 additions & 45 deletions data/botPolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,8 @@
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.

bots:
# Pathological bots to deny
- # This correlates to data/bots/deny-pathological.yaml in the source tree
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml

# Aggressively block AI/LLM related bots/agents by default
- import: (data)/meta/ai-block-aggressive.yaml

# Consider replacing the aggressive AI policy with more selective policies:
# - import: (data)/meta/ai-block-moderate.yaml
# - import: (data)/meta/ai-block-permissive.yaml

# Search engine crawlers to allow, defaults to:
# - Google (so they don't try to bypass Anubis)
# - Apple
# - Bing
# - DuckDuckGo
# - Qwant
# - The Internet Archive
# - Kagi
# - Marginalia
# - Mojeek
- import: (data)/crawlers/_allow-good.yaml
# Challenge Firefox AI previews
- import: (data)/clients/x-firefox-ai.yaml

# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
- import: (data)/common/keep-internet-working.yaml

# # Punish any bot with "bot" in the user-agent string
# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
# challenge:
# difficulty: 16 # impossible
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time

# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: CHALLENGE
- # load the default rules
import: (data)/bots.yaml

dnsbl: false

Expand Down
45 changes: 45 additions & 0 deletions data/bots.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Pathological bots to deny
- # This correlates to data/bots/deny-pathological.yaml in the source tree
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml

# Aggressively block AI/LLM related bots/agents by default
- import: (data)/meta/ai-block-aggressive.yaml

# Consider replacing the aggressive AI policy with more selective policies:
# - import: (data)/meta/ai-block-moderate.yaml
# - import: (data)/meta/ai-block-permissive.yaml

# Search engine crawlers to allow, defaults to:
# - Google (so they don't try to bypass Anubis)
# - Apple
# - Bing
# - DuckDuckGo
# - Qwant
# - The Internet Archive
# - Kagi
# - Marginalia
# - Mojeek
- import: (data)/crawlers/_allow-good.yaml
# Challenge Firefox AI previews
- import: (data)/clients/x-firefox-ai.yaml

# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
- import: (data)/common/keep-internet-working.yaml

# # Punish any bot with "bot" in the user-agent string
# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
# challenge:
# difficulty: 16 # impossible
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time

# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: CHALLENGE
Loading