Skip to content

Commit 13ca3a3

Browse files
chore(ua-blocker): update robots.json from upstream (#1570)
Co-authored-by: yusukebe <10682+yusukebe@users.noreply.github.com>
1 parent 41f2241 commit 13ca3a3

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

.changeset/auto-sync-robots.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@hono/ua-blocker': patch
3+
---
4+
5+
chore(ua-blocker): sync `robots.json` with upstream

packages/ua-blocker/src/data/robots.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"frequency": "No information provided.",
4242
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
4343
},
44-
"amazon-kendra-": {
44+
"amazon-kendra": {
4545
"operator": "Amazon",
4646
"respect": "Yes",
4747
"function": "Collects data for AI natural language search",

packages/ua-blocker/src/generated.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ User-agent: Ai2Bot-Dolma
66
User-agent: aiHitBot
77
User-agent: AmazonBuyForMe
88
User-agent: atlassian-bot
9-
User-agent: amazon-kendra-
9+
User-agent: amazon-kendra
1010
User-agent: Amazonbot
1111
User-agent: Andibot
1212
User-agent: Anomura
@@ -113,10 +113,10 @@ User-agent: YouBot
113113
Disallow: /
114114
`;
115115
// prettier-ignore
116-
export const ALL_BOTS = ["AddSearchBot", "AI2Bot", "Ai2Bot-Dolma", "aiHitBot", "AmazonBuyForMe", "atlassian-bot", "amazon-kendra-", "Amazonbot", "Andibot", "Anomura", "anthropic-ai", "Applebot", "Applebot-Extended", "Awario", "bedrockbot", "bigsur.ai", "Bravebot", "Brightbot 1.0", "BuddyBot", "Bytespider", "CCBot", "ChatGPT Agent", "ChatGPT-User", "Claude-SearchBot", "Claude-User", "Claude-Web", "ClaudeBot", "Cloudflare-AutoRAG", "CloudVertexBot", "cohere-ai", "cohere-training-data-crawler", "Cotoyogi", "Crawlspace", "Datenbank Crawler", "DeepSeekBot", "Devin", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "FacebookBot", "facebookexternalhit", "Factset_spyderbot", "FirecrawlAgent", "FriendlyCrawler", "Gemini-Deep-Research", "Google-CloudVertexBot", "Google-Extended", "Google-Firebase", "Google-NotebookLM", "GoogleAgent-Mariner", "GoogleOther", "GoogleOther-Image", "GoogleOther-Video", "GPTBot", "iaskspider/2.0", "IbouBot", "ICC-Crawler", "ImagesiftBot", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "KlaviyoAIBot", "LinerBot", "Linguee Bot", "meta-externalagent", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "meta-webindexer", "MistralAI-User", "MistralAI-User/1.0", "MyCentralAIScraperBot", "netEstate Imprint Crawler", "NotebookLM", "NovaAct", "OAI-SearchBot", "omgili", "omgilibot", "OpenAI", "Operator", "PanguBot", "Panscient", "panscient.com", "Perplexity-User", "PerplexityBot", "PetalBot", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "SBIntuitionsBot", "Scrapy", "SemrushBot-OCOB", "SemrushBot-SWA", "ShapBot", "Sidetrade indexer bot", "TerraCotta", "Thinkbot", "TikTokSpider", "Timpibot", "VelenPublicWebCrawler", "WARDBot", "Webzio-Extended", "wpbot", "YaK", "YandexAdditional", "YandexAdditionalBot", "YouBot"];
116+
export const ALL_BOTS = ["AddSearchBot", "AI2Bot", "Ai2Bot-Dolma", "aiHitBot", "AmazonBuyForMe", "atlassian-bot", "amazon-kendra", "Amazonbot", "Andibot", "Anomura", "anthropic-ai", "Applebot", "Applebot-Extended", "Awario", "bedrockbot", "bigsur.ai", "Bravebot", "Brightbot 1.0", "BuddyBot", "Bytespider", "CCBot", "ChatGPT Agent", "ChatGPT-User", "Claude-SearchBot", "Claude-User", "Claude-Web", "ClaudeBot", "Cloudflare-AutoRAG", "CloudVertexBot", "cohere-ai", "cohere-training-data-crawler", "Cotoyogi", "Crawlspace", "Datenbank Crawler", "DeepSeekBot", "Devin", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "FacebookBot", "facebookexternalhit", "Factset_spyderbot", "FirecrawlAgent", "FriendlyCrawler", "Gemini-Deep-Research", "Google-CloudVertexBot", "Google-Extended", "Google-Firebase", "Google-NotebookLM", "GoogleAgent-Mariner", "GoogleOther", "GoogleOther-Image", "GoogleOther-Video", "GPTBot", "iaskspider/2.0", "IbouBot", "ICC-Crawler", "ImagesiftBot", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "KlaviyoAIBot", "LinerBot", "Linguee Bot", "meta-externalagent", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "meta-webindexer", "MistralAI-User", "MistralAI-User/1.0", "MyCentralAIScraperBot", "netEstate Imprint Crawler", "NotebookLM", "NovaAct", "OAI-SearchBot", "omgili", "omgilibot", "OpenAI", "Operator", "PanguBot", "Panscient", "panscient.com", "Perplexity-User", "PerplexityBot", "PetalBot", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "SBIntuitionsBot", "Scrapy", "SemrushBot-OCOB", "SemrushBot-SWA", "ShapBot", "Sidetrade indexer bot", "TerraCotta", "Thinkbot", "TikTokSpider", "Timpibot", "VelenPublicWebCrawler", "WARDBot", "Webzio-Extended", "wpbot", "YaK", "YandexAdditional", "YandexAdditionalBot", "YouBot"];
117117
// prettier-ignore
118118
export const NON_RESPECTING_BOTS = ["AddSearchBot", "AmazonBuyForMe", "Andibot", "anthropic-ai", "Applebot", "Awario", "bigsur.ai", "Brightbot 1.0", "BuddyBot", "Bytespider", "Claude-Web", "CloudVertexBot", "cohere-ai", "cohere-training-data-crawler", "Datenbank Crawler", "DeepSeekBot", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "facebookexternalhit", "Factset_spyderbot", "Gemini-Deep-Research", "Google-Firebase", "Google-NotebookLM", "GoogleAgent-Mariner", "iaskspider/2.0", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "LinerBot", "Linguee Bot", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "meta-webindexer", "MistralAI-User", "MyCentralAIScraperBot", "netEstate Imprint Crawler", "NotebookLM", "NovaAct", "Operator", "PanguBot", "Perplexity-User", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "Scrapy", "Sidetrade indexer bot", "Thinkbot", "TikTokSpider", "Timpibot", "WARDBot", "Webzio-Extended", "wpbot", "YaK"];
119119
// prettier-ignore
120-
export const ALL_BOTS_REGEX = /(ADDSEARCHBOT|AI2BOT|AI2BOT-DOLMA|AIHITBOT|AMAZONBUYFORME|ATLASSIAN-BOT|AMAZON-KENDRA-|AMAZONBOT|ANDIBOT|ANOMURA|ANTHROPIC-AI|APPLEBOT|APPLEBOT-EXTENDED|AWARIO|BEDROCKBOT|BIGSUR.AI|BRAVEBOT|BRIGHTBOT 1.0|BUDDYBOT|BYTESPIDER|CCBOT|CHATGPT AGENT|CHATGPT-USER|CLAUDE-SEARCHBOT|CLAUDE-USER|CLAUDE-WEB|CLAUDEBOT|CLOUDFLARE-AUTORAG|CLOUDVERTEXBOT|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|COTOYOGI|CRAWLSPACE|DATENBANK CRAWLER|DEEPSEEKBOT|DEVIN|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|FIRECRAWLAGENT|FRIENDLYCRAWLER|GEMINI-DEEP-RESEARCH|GOOGLE-CLOUDVERTEXBOT|GOOGLE-EXTENDED|GOOGLE-FIREBASE|GOOGLE-NOTEBOOKLM|GOOGLEAGENT-MARINER|GOOGLEOTHER|GOOGLEOTHER-IMAGE|GOOGLEOTHER-VIDEO|GPTBOT|IASKSPIDER\/2.0|IBOUBOT|ICC-CRAWLER|IMAGESIFTBOT|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|KLAVIYOAIBOT|LINERBOT|LINGUEE BOT|META-EXTERNALAGENT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|META-WEBINDEXER|MISTRALAI-USER|MISTRALAI-USER\/1.0|MYCENTRALAISCRAPERBOT|NETESTATE IMPRINT CRAWLER|NOTEBOOKLM|NOVAACT|OAI-SEARCHBOT|OMGILI|OMGILIBOT|OPENAI|OPERATOR|PANGUBOT|PANSCIENT|PANSCIENT.COM|PERPLEXITY-USER|PERPLEXITYBOT|PETALBOT|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SBINTUITIONSBOT|SCRAPY|SEMRUSHBOT-OCOB|SEMRUSHBOT-SWA|SHAPBOT|SIDETRADE INDEXER BOT|TERRACOTTA|THINKBOT|TIKTOKSPIDER|TIMPIBOT|VELENPUBLICWEBCRAWLER|WARDBOT|WEBZIO-EXTENDED|WPBOT|YAK|YANDEXADDITIONAL|YANDEXADDITIONALBOT|YOUBOT)/;
120+
export const ALL_BOTS_REGEX = /(ADDSEARCHBOT|AI2BOT|AI2BOT-DOLMA|AIHITBOT|AMAZONBUYFORME|ATLASSIAN-BOT|AMAZON-KENDRA|AMAZONBOT|ANDIBOT|ANOMURA|ANTHROPIC-AI|APPLEBOT|APPLEBOT-EXTENDED|AWARIO|BEDROCKBOT|BIGSUR.AI|BRAVEBOT|BRIGHTBOT 1.0|BUDDYBOT|BYTESPIDER|CCBOT|CHATGPT AGENT|CHATGPT-USER|CLAUDE-SEARCHBOT|CLAUDE-USER|CLAUDE-WEB|CLAUDEBOT|CLOUDFLARE-AUTORAG|CLOUDVERTEXBOT|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|COTOYOGI|CRAWLSPACE|DATENBANK CRAWLER|DEEPSEEKBOT|DEVIN|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|FIRECRAWLAGENT|FRIENDLYCRAWLER|GEMINI-DEEP-RESEARCH|GOOGLE-CLOUDVERTEXBOT|GOOGLE-EXTENDED|GOOGLE-FIREBASE|GOOGLE-NOTEBOOKLM|GOOGLEAGENT-MARINER|GOOGLEOTHER|GOOGLEOTHER-IMAGE|GOOGLEOTHER-VIDEO|GPTBOT|IASKSPIDER\/2.0|IBOUBOT|ICC-CRAWLER|IMAGESIFTBOT|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|KLAVIYOAIBOT|LINERBOT|LINGUEE BOT|META-EXTERNALAGENT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|META-WEBINDEXER|MISTRALAI-USER|MISTRALAI-USER\/1.0|MYCENTRALAISCRAPERBOT|NETESTATE IMPRINT CRAWLER|NOTEBOOKLM|NOVAACT|OAI-SEARCHBOT|OMGILI|OMGILIBOT|OPENAI|OPERATOR|PANGUBOT|PANSCIENT|PANSCIENT.COM|PERPLEXITY-USER|PERPLEXITYBOT|PETALBOT|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SBINTUITIONSBOT|SCRAPY|SEMRUSHBOT-OCOB|SEMRUSHBOT-SWA|SHAPBOT|SIDETRADE INDEXER BOT|TERRACOTTA|THINKBOT|TIKTOKSPIDER|TIMPIBOT|VELENPUBLICWEBCRAWLER|WARDBOT|WEBZIO-EXTENDED|WPBOT|YAK|YANDEXADDITIONAL|YANDEXADDITIONALBOT|YOUBOT)/;
121121
// prettier-ignore
122122
export const NON_RESPECTING_BOTS_REGEX = /(ADDSEARCHBOT|AMAZONBUYFORME|ANDIBOT|ANTHROPIC-AI|APPLEBOT|AWARIO|BIGSUR.AI|BRIGHTBOT 1.0|BUDDYBOT|BYTESPIDER|CLAUDE-WEB|CLOUDVERTEXBOT|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|DATENBANK CRAWLER|DEEPSEEKBOT|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|GEMINI-DEEP-RESEARCH|GOOGLE-FIREBASE|GOOGLE-NOTEBOOKLM|GOOGLEAGENT-MARINER|IASKSPIDER\/2.0|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|LINERBOT|LINGUEE BOT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|META-WEBINDEXER|MISTRALAI-USER|MYCENTRALAISCRAPERBOT|NETESTATE IMPRINT CRAWLER|NOTEBOOKLM|NOVAACT|OPERATOR|PANGUBOT|PERPLEXITY-USER|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SCRAPY|SIDETRADE INDEXER BOT|THINKBOT|TIKTOKSPIDER|TIMPIBOT|WARDBOT|WEBZIO-EXTENDED|WPBOT|YAK)/;

0 commit comments

Comments
 (0)