diff --git a/root/robots.txt b/root/robots.txt index 5deb55de66..70fd28846b 100644 --- a/root/robots.txt +++ b/root/robots.txt @@ -1,5 +1,4 @@ -# http://www.robotstxt.org/wc/norobots.html - +# Stop anything going into these locations User-agent: * Disallow: /login/ Disallow: */diff/ @@ -14,3 +13,70 @@ Disallow: /*?*size=* Sitemap: https://metacpan.org/sitemap-authors.xml.gz Sitemap: https://metacpan.org/sitemap-releases.xml.gz + +# Stop the bots, using list from: +# https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt +User-agent: AI2Bot +User-agent: Ai2Bot-Dolma +User-agent: aiHitBot +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: Applebot-Extended +User-agent: Brightbot 1.0 +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: Claude-SearchBot +User-agent: Claude-User +User-agent: Claude-Web +User-agent: ClaudeBot +User-agent: cohere-ai +User-agent: cohere-training-data-crawler +User-agent: Cotoyogi +User-agent: Crawlspace +User-agent: Diffbot +User-agent: DuckAssistBot +User-agent: FacebookBot +User-agent: Factset_spyderbot +User-agent: FirecrawlAgent +User-agent: FriendlyCrawler +User-agent: Google-CloudVertexBot +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GoogleOther-Image +User-agent: GoogleOther-Video +User-agent: GPTBot +User-agent: iaskspider/2.0 +User-agent: ICC-Crawler +User-agent: ImagesiftBot +User-agent: img2dataset +User-agent: imgproxy +User-agent: ISSCyberRiskCrawler +User-agent: Kangaroo Bot +User-agent: meta-externalagent +User-agent: Meta-ExternalAgent +User-agent: meta-externalfetcher +User-agent: Meta-ExternalFetcher +User-agent: MistralAI-User/1.0 +User-agent: NovaAct +User-agent: OAI-SearchBot +User-agent: omgili +User-agent: omgilibot +User-agent: Operator +User-agent: PanguBot +User-agent: Perplexity-User +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: QualifiedBot +User-agent: Scrapy +User-agent: SemrushBot-OCOB +User-agent: SemrushBot-SWA +User-agent: Sidetrade indexer bot +User-agent: TikTokSpider +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: Webzio-Extended +User-agent: wpbot +User-agent: YouBot +Disallow: /