summary refs log tree commit diff
path: root/static/robots.txt
diff options
context:
space:
mode:
authorvenomade <venomade@venomade.com>2025-08-11 21:16:43 +0100
committervenomade <venomade@venomade.com>2025-08-11 21:16:43 +0100
commit189e8c37b0ff5655d472fa0a0226ee940281c5c9 (patch)
tree4e4284e7f2d2d894202de340721731105133e666 /static/robots.txt
parent91b64f5127befe36974e6c9b69e8f5563f9fbf0e (diff)
Switch to Hugo and add Portuguese
Diffstat (limited to 'static/robots.txt')
-rw-r--r--static/robots.txt67
1 files changed, 67 insertions, 0 deletions
diff --git a/static/robots.txt b/static/robots.txt
new file mode 100644
index 0000000..eab12f6
--- /dev/null
+++ b/static/robots.txt
@@ -0,0 +1,67 @@
+# Block all known AI crawlers and assistants
+# from using content for training AI models.
+# Source: https://robotstxt.com/ai
+User-Agent: GPTBot
+User-Agent: ClaudeBot
+User-Agent: Claude-User
+User-Agent: Claude-SearchBot
+User-Agent: CCBot
+User-Agent: Google-Extended
+User-Agent: Applebot-Extended
+User-Agent: Facebookbot
+User-Agent: Meta-ExternalAgent
+User-Agent: Meta-ExternalFetcher
+User-Agent: diffbot
+User-Agent: PerplexityBot
+User-Agent: Perplexity‑User
+User-Agent: Omgili
+User-Agent: Omgilibot
+User-Agent: webzio-extended
+User-Agent: ImagesiftBot
+User-Agent: Bytespider
+User-Agent: TikTokSpider
+User-Agent: Amazonbot
+User-Agent: Youbot
+User-Agent: SemrushBot-OCOB
+User-Agent: Petalbot
+User-Agent: VelenPublicWebCrawler
+User-Agent: TurnitinBot
+User-Agent: Timpibot
+User-Agent: OAI-SearchBot
+User-Agent: ICC-Crawler
+User-Agent: AI2Bot
+User-Agent: AI2Bot-Dolma
+User-Agent: DataForSeoBot
+User-Agent: AwarioBot
+User-Agent: AwarioSmartBot
+User-Agent: AwarioRssBot
+User-Agent: Google-CloudVertexBot
+User-Agent: PanguBot
+User-Agent: Kangaroo Bot
+User-Agent: Sentibot
+User-Agent: img2dataset
+User-Agent: Meltwater
+User-Agent: Seekr
+User-Agent: peer39_crawler
+User-Agent: cohere-ai
+User-Agent: cohere-training-data-crawler
+User-Agent: DuckAssistBot
+User-Agent: Scrapy
+User-Agent: Cotoyogi
+User-Agent: aiHitBot
+User-Agent: Factset_spyderbot
+User-Agent: FirecrawlAgent
+
+Disallow: /
+DisallowAITraining: /
+
+# Block any non-specified AI crawlers (e.g., new
+# or unknown bots) from using content for training
+# AI models, while allowing the website to be
+# indexed and accessed by bots.  These directives
+# are still experimental and may not be supported
+# by all AI crawlers.
+User-Agent: *
+DisallowAITraining: /
+Content-Usage: ai=n
+Allow: /