TecharoHQ · foosinn · Jun 3, 2025
diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml
@@ -11,51 +11,8 @@
 ## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
 
 bots:
-  # Pathological bots to deny
-  - # This correlates to data/bots/deny-pathological.yaml in the source tree
-    # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
-    import: (data)/bots/_deny-pathological.yaml
-  - import: (data)/bots/aggressive-brazilian-scrapers.yaml
-
-  # Aggressively block AI/LLM related bots/agents by default
-  - import: (data)/meta/ai-block-aggressive.yaml
-
-  # Consider replacing the aggressive AI policy with more selective policies:
-  # - import: (data)/meta/ai-block-moderate.yaml
-  # - import: (data)/meta/ai-block-permissive.yaml
-
-  # Search engine crawlers to allow, defaults to:
-  #   - Google (so they don't try to bypass Anubis)
-  #   - Apple
-  #   - Bing
-  #   - DuckDuckGo
-  #   - Qwant
-  #   - The Internet Archive
-  #   - Kagi
-  #   - Marginalia
-  #   - Mojeek
-  - import: (data)/crawlers/_allow-good.yaml
-  # Challenge Firefox AI previews
-  - import: (data)/clients/x-firefox-ai.yaml
-
-  # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
-  - import: (data)/common/keep-internet-working.yaml
-
-  # # Punish any bot with "bot" in the user-agent string
-  # # This is known to have a high false-positive rate, use at your own risk
-  # - name: generic-bot-catchall
-  #   user_agent_regex: (?i:bot|crawler)
-  #   action: CHALLENGE
-  #   challenge:
-  #     difficulty: 16  # impossible
-  #     report_as: 4    # lie to the operator
-  #     algorithm: slow # intentionally waste CPU cycles and time
-
-  # Generic catchall rule
-  - name: generic-browser
-    user_agent_regex: >-
-      Mozilla|Opera
-    action: CHALLENGE
+  - # load the default rules
+    import: (data)/bots.yaml
 
 dnsbl: false
 

diff --git a/data/bots.yaml b/data/bots.yaml
@@ -0,0 +1,45 @@
+# Pathological bots to deny
+- # This correlates to data/bots/deny-pathological.yaml in the source tree
+  # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
+  import: (data)/bots/_deny-pathological.yaml
+- import: (data)/bots/aggressive-brazilian-scrapers.yaml
+
+# Aggressively block AI/LLM related bots/agents by default
+- import: (data)/meta/ai-block-aggressive.yaml
+
+# Consider replacing the aggressive AI policy with more selective policies:
+# - import: (data)/meta/ai-block-moderate.yaml
+# - import: (data)/meta/ai-block-permissive.yaml
+
+# Search engine crawlers to allow, defaults to:
+#   - Google (so they don't try to bypass Anubis)
+#   - Apple
+#   - Bing
+#   - DuckDuckGo
+#   - Qwant
+#   - The Internet Archive
+#   - Kagi
+#   - Marginalia
+#   - Mojeek
+- import: (data)/crawlers/_allow-good.yaml
+# Challenge Firefox AI previews
+- import: (data)/clients/x-firefox-ai.yaml
+
+# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
+- import: (data)/common/keep-internet-working.yaml
+
+# # Punish any bot with "bot" in the user-agent string
+# # This is known to have a high false-positive rate, use at your own risk
+# - name: generic-bot-catchall
+#   user_agent_regex: (?i:bot|crawler)
+#   action: CHALLENGE
+#   challenge:
+#     difficulty: 16  # impossible
+#     report_as: 4    # lie to the operator
+#     algorithm: slow # intentionally waste CPU cycles and time
+
+# Generic catchall rule
+- name: generic-browser
+  user_agent_regex: >-
+    Mozilla|Opera
+  action: CHALLENGE