@@ -63,6 +63,7 @@ User-agent: img2dataset
6363User-agent: ISSCyberRiskCrawler
6464User-agent: Kangaroo Bot
6565User-agent: KlaviyoAIBot
66+ User-agent: LAIONDownloader
6667User-agent: LinerBot
6768User-agent: Linguee Bot
6869User-agent: meta-externalagent
@@ -113,10 +114,10 @@ User-agent: YouBot
113114Disallow: /
114115` ;
115116// prettier-ignore
116- export const ALL_BOTS = [ "AddSearchBot" , "AI2Bot" , "Ai2Bot-Dolma" , "aiHitBot" , "AmazonBuyForMe" , "atlassian-bot" , "amazon-kendra" , "Amazonbot" , "Andibot" , "Anomura" , "anthropic-ai" , "Applebot" , "Applebot-Extended" , "Awario" , "bedrockbot" , "bigsur.ai" , "Bravebot" , "Brightbot 1.0" , "BuddyBot" , "Bytespider" , "CCBot" , "ChatGPT Agent" , "ChatGPT-User" , "Claude-SearchBot" , "Claude-User" , "Claude-Web" , "ClaudeBot" , "Cloudflare-AutoRAG" , "CloudVertexBot" , "cohere-ai" , "cohere-training-data-crawler" , "Cotoyogi" , "Crawlspace" , "Datenbank Crawler" , "DeepSeekBot" , "Devin" , "Diffbot" , "DuckAssistBot" , "Echobot Bot" , "EchoboxBot" , "FacebookBot" , "facebookexternalhit" , "Factset_spyderbot" , "FirecrawlAgent" , "FriendlyCrawler" , "Gemini-Deep-Research" , "Google-CloudVertexBot" , "Google-Extended" , "Google-Firebase" , "Google-NotebookLM" , "GoogleAgent-Mariner" , "GoogleOther" , "GoogleOther-Image" , "GoogleOther-Video" , "GPTBot" , "iaskspider/2.0" , "IbouBot" , "ICC-Crawler" , "ImagesiftBot" , "img2dataset" , "ISSCyberRiskCrawler" , "Kangaroo Bot" , "KlaviyoAIBot" , "LinerBot" , "Linguee Bot" , "meta-externalagent" , "Meta-ExternalAgent" , "meta-externalfetcher" , "Meta-ExternalFetcher" , "meta-webindexer" , "MistralAI-User" , "MistralAI-User/1.0" , "MyCentralAIScraperBot" , "netEstate Imprint Crawler" , "NotebookLM" , "NovaAct" , "OAI-SearchBot" , "omgili" , "omgilibot" , "OpenAI" , "Operator" , "PanguBot" , "Panscient" , "panscient.com" , "Perplexity-User" , "PerplexityBot" , "PetalBot" , "PhindBot" , "Poseidon Research Crawler" , "QualifiedBot" , "QuillBot" , "quillbot.com" , "SBIntuitionsBot" , "Scrapy" , "SemrushBot-OCOB" , "SemrushBot-SWA" , "ShapBot" , "Sidetrade indexer bot" , "TerraCotta" , "Thinkbot" , "TikTokSpider" , "Timpibot" , "VelenPublicWebCrawler" , "WARDBot" , "Webzio-Extended" , "wpbot" , "YaK" , "YandexAdditional" , "YandexAdditionalBot" , "YouBot" ] ;
117+ export const ALL_BOTS = [ "AddSearchBot" , "AI2Bot" , "Ai2Bot-Dolma" , "aiHitBot" , "AmazonBuyForMe" , "atlassian-bot" , "amazon-kendra" , "Amazonbot" , "Andibot" , "Anomura" , "anthropic-ai" , "Applebot" , "Applebot-Extended" , "Awario" , "bedrockbot" , "bigsur.ai" , "Bravebot" , "Brightbot 1.0" , "BuddyBot" , "Bytespider" , "CCBot" , "ChatGPT Agent" , "ChatGPT-User" , "Claude-SearchBot" , "Claude-User" , "Claude-Web" , "ClaudeBot" , "Cloudflare-AutoRAG" , "CloudVertexBot" , "cohere-ai" , "cohere-training-data-crawler" , "Cotoyogi" , "Crawlspace" , "Datenbank Crawler" , "DeepSeekBot" , "Devin" , "Diffbot" , "DuckAssistBot" , "Echobot Bot" , "EchoboxBot" , "FacebookBot" , "facebookexternalhit" , "Factset_spyderbot" , "FirecrawlAgent" , "FriendlyCrawler" , "Gemini-Deep-Research" , "Google-CloudVertexBot" , "Google-Extended" , "Google-Firebase" , "Google-NotebookLM" , "GoogleAgent-Mariner" , "GoogleOther" , "GoogleOther-Image" , "GoogleOther-Video" , "GPTBot" , "iaskspider/2.0" , "IbouBot" , "ICC-Crawler" , "ImagesiftBot" , "img2dataset" , "ISSCyberRiskCrawler" , "Kangaroo Bot" , "KlaviyoAIBot" , "LAIONDownloader" , "LinerBot" , "Linguee Bot" , "meta-externalagent" , "Meta-ExternalAgent" , "meta-externalfetcher" , "Meta-ExternalFetcher" , "meta-webindexer" , "MistralAI-User" , "MistralAI-User/1.0" , "MyCentralAIScraperBot" , "netEstate Imprint Crawler" , "NotebookLM" , "NovaAct" , "OAI-SearchBot" , "omgili" , "omgilibot" , "OpenAI" , "Operator" , "PanguBot" , "Panscient" , "panscient.com" , "Perplexity-User" , "PerplexityBot" , "PetalBot" , "PhindBot" , "Poseidon Research Crawler" , "QualifiedBot" , "QuillBot" , "quillbot.com" , "SBIntuitionsBot" , "Scrapy" , "SemrushBot-OCOB" , "SemrushBot-SWA" , "ShapBot" , "Sidetrade indexer bot" , "TerraCotta" , "Thinkbot" , "TikTokSpider" , "Timpibot" , "VelenPublicWebCrawler" , "WARDBot" , "Webzio-Extended" , "wpbot" , "YaK" , "YandexAdditional" , "YandexAdditionalBot" , "YouBot" ] ;
117118// prettier-ignore
118- export const NON_RESPECTING_BOTS = [ "AddSearchBot" , "AmazonBuyForMe" , "Andibot" , "anthropic-ai" , "Applebot" , "Awario" , "bigsur.ai" , "Brightbot 1.0" , "BuddyBot" , "Bytespider" , "Claude-Web" , "CloudVertexBot" , "cohere-ai" , "cohere-training-data-crawler" , "Datenbank Crawler" , "DeepSeekBot" , "Diffbot" , "DuckAssistBot" , "Echobot Bot" , "EchoboxBot" , "facebookexternalhit" , "Factset_spyderbot" , "Gemini-Deep-Research" , "Google-Firebase" , "Google-NotebookLM" , "GoogleAgent-Mariner" , "iaskspider/2.0" , "img2dataset" , "ISSCyberRiskCrawler" , "Kangaroo Bot" , "LinerBot" , "Linguee Bot" , "Meta-ExternalAgent" , "meta-externalfetcher" , "Meta-ExternalFetcher" , "meta-webindexer" , "MistralAI-User" , "MyCentralAIScraperBot" , "netEstate Imprint Crawler" , "NotebookLM" , "NovaAct" , "Operator" , "PanguBot" , "Perplexity-User" , "PhindBot" , "Poseidon Research Crawler" , "QualifiedBot" , "QuillBot" , "quillbot.com" , "Scrapy" , "Sidetrade indexer bot" , "Thinkbot" , "TikTokSpider" , "Timpibot" , "WARDBot" , "Webzio-Extended" , "wpbot" , "YaK" ] ;
119+ export const NON_RESPECTING_BOTS = [ "AddSearchBot" , "AmazonBuyForMe" , "Andibot" , "anthropic-ai" , "Applebot" , "Awario" , "bigsur.ai" , "Brightbot 1.0" , "BuddyBot" , "Bytespider" , "Claude-Web" , "CloudVertexBot" , "cohere-ai" , "cohere-training-data-crawler" , "Datenbank Crawler" , "DeepSeekBot" , "Diffbot" , "DuckAssistBot" , "Echobot Bot" , "EchoboxBot" , "facebookexternalhit" , "Factset_spyderbot" , "Gemini-Deep-Research" , "Google-Firebase" , "Google-NotebookLM" , "GoogleAgent-Mariner" , "iaskspider/2.0" , "img2dataset" , "ISSCyberRiskCrawler" , "Kangaroo Bot" , "LAIONDownloader" , "LinerBot" , "Linguee Bot" , "Meta-ExternalAgent" , "meta-externalfetcher" , "Meta-ExternalFetcher" , "meta-webindexer" , "MistralAI-User" , "MyCentralAIScraperBot" , "netEstate Imprint Crawler" , "NotebookLM" , "NovaAct" , "Operator" , "PanguBot" , "Perplexity-User" , "PhindBot" , "Poseidon Research Crawler" , "QualifiedBot" , "QuillBot" , "quillbot.com" , "Scrapy" , "Sidetrade indexer bot" , "Thinkbot" , "TikTokSpider" , "Timpibot" , "WARDBot" , "Webzio-Extended" , "wpbot" , "YaK" ] ;
119120// prettier-ignore
120- export const ALL_BOTS_REGEX = / ( A D D S E A R C H B O T | A I 2 B O T | A I 2 B O T - D O L M A | A I H I T B O T | A M A Z O N B U Y F O R M E | A T L A S S I A N - B O T | A M A Z O N - K E N D R A | A M A Z O N B O T | A N D I B O T | A N O M U R A | A N T H R O P I C - A I | A P P L E B O T | A P P L E B O T - E X T E N D E D | A W A R I O | B E D R O C K B O T | B I G S U R .A I | B R A V E B O T | B R I G H T B O T 1 .0 | B U D D Y B O T | B Y T E S P I D E R | C C B O T | C H A T G P T A G E N T | C H A T G P T - U S E R | C L A U D E - S E A R C H B O T | C L A U D E - U S E R | C L A U D E - W E B | C L A U D E B O T | C L O U D F L A R E - A U T O R A G | C L O U D V E R T E X B O T | C O H E R E - A I | C O H E R E - T R A I N I N G - D A T A - C R A W L E R | C O T O Y O G I | C R A W L S P A C E | D A T E N B A N K C R A W L E R | D E E P S E E K B O T | D E V I N | D I F F B O T | D U C K A S S I S T B O T | E C H O B O T B O T | E C H O B O X B O T | F A C E B O O K B O T | F A C E B O O K E X T E R N A L H I T | F A C T S E T _ S P Y D E R B O T | F I R E C R A W L A G E N T | F R I E N D L Y C R A W L E R | G E M I N I - D E E P - R E S E A R C H | G O O G L E - C L O U D V E R T E X B O T | G O O G L E - E X T E N D E D | G O O G L E - F I R E B A S E | G O O G L E - N O T E B O O K L M | G O O G L E A G E N T - M A R I N E R | G O O G L E O T H E R | G O O G L E O T H E R - I M A G E | G O O G L E O T H E R - V I D E O | G P T B O T | I A S K S P I D E R \/ 2 .0 | I B O U B O T | I C C - C R A W L E R | I M A G E S I F T B O T | I M G 2 D A T A S E T | I S S C Y B E R R I S K C R A W L E R | K A N G A R O O B O T | K L A V I Y O A I B O T | L I N E R B O T | L I N G U E E B O T | M E T A - E X T E R N A L A G E N T | M E T A - E X T E R N A L A G E N T | M E T A - E X T E R N A L F E T C H E R | M E T A - E X T E R N A L F E T C H E R | M E T A - W E B I N D E X E R | M I S T R A L A I - U S E R | M I S T R A L A I - U S E R \/ 1 .0 | M Y C E N T R A L A I S C R A P E R B O T | N E T E S T A T E I M P R I N T C R A W L E R | N O T E B O O K L M | N O V A A C T | O A I - S E A R C H B O T | O M G I L I | O M G I L I B O T | O P E N A I | O P E R A T O R | P A N G U B O T | P A N S C I E N T | P A N S C I E N T .C O M | P E R P L E X I T Y - U S E R | P E R P L E X I T Y B O T | P E T A L B O T | P H I N D B O T | P O S E I D O N R E S E A R C H C R A W L E R | Q U A L I F I E D B O T | Q U I L L B O T | Q U I L L B O T .C O M | S B I N T U I T I O N S B O T | S C R A P Y | S E M R U S H B O T - O C O B | S E M R U S H B O T - S W A | S H A P B O T | S I D E T R A D E I N D E X E R B O T | T E R R A C O T T A | T H I N K B O T | T I K T O K S P I D E R | T I M P I B O T | V E L E N P U B L I C W E B C R A W L E R | W A R D B O T | W E B Z I O - E X T E N D E D | W P B O T | Y A K | Y A N D E X A D D I T I O N A L | Y A N D E X A D D I T I O N A L B O T | Y O U B O T ) / ;
121+ export const ALL_BOTS_REGEX = / ( A D D S E A R C H B O T | A I 2 B O T | A I 2 B O T - D O L M A | A I H I T B O T | A M A Z O N B U Y F O R M E | A T L A S S I A N - B O T | A M A Z O N - K E N D R A | A M A Z O N B O T | A N D I B O T | A N O M U R A | A N T H R O P I C - A I | A P P L E B O T | A P P L E B O T - E X T E N D E D | A W A R I O | B E D R O C K B O T | B I G S U R .A I | B R A V E B O T | B R I G H T B O T 1 .0 | B U D D Y B O T | B Y T E S P I D E R | C C B O T | C H A T G P T A G E N T | C H A T G P T - U S E R | C L A U D E - S E A R C H B O T | C L A U D E - U S E R | C L A U D E - W E B | C L A U D E B O T | C L O U D F L A R E - A U T O R A G | C L O U D V E R T E X B O T | C O H E R E - A I | C O H E R E - T R A I N I N G - D A T A - C R A W L E R | C O T O Y O G I | C R A W L S P A C E | D A T E N B A N K C R A W L E R | D E E P S E E K B O T | D E V I N | D I F F B O T | D U C K A S S I S T B O T | E C H O B O T B O T | E C H O B O X B O T | F A C E B O O K B O T | F A C E B O O K E X T E R N A L H I T | F A C T S E T _ S P Y D E R B O T | F I R E C R A W L A G E N T | F R I E N D L Y C R A W L E R | G E M I N I - D E E P - R E S E A R C H | G O O G L E - C L O U D V E R T E X B O T | G O O G L E - E X T E N D E D | G O O G L E - F I R E B A S E | G O O G L E - N O T E B O O K L M | G O O G L E A G E N T - M A R I N E R | G O O G L E O T H E R | G O O G L E O T H E R - I M A G E | G O O G L E O T H E R - V I D E O | G P T B O T | I A S K S P I D E R \/ 2 .0 | I B O U B O T | I C C - C R A W L E R | I M A G E S I F T B O T | I M G 2 D A T A S E T | I S S C Y B E R R I S K C R A W L E R | K A N G A R O O B O T | K L A V I Y O A I B O T | L A I O N D O W N L O A D E R | L I N E R B O T | L I N G U E E B O T | M E T A - E X T E R N A L A G E N T | M E T A - E X T E R N A L A G E N T | M E T A - E X T E R N A L F E T C H E R | M E T A - E X T E R N A L F E T C H E R | M E T A - W E B I N D E X E R | M I S T R A L A I - U S E R | M I S T R A L A I - U S E R \/ 1 .0 | M Y C E N T R A L A I S C R A P E R B O T | N E T E S T A T E I M P R I N T C R A W L E R | N O T E B O O K L M | N O V A A C T | O A I - S E A R C H B O T | O M G I L I | O M G I L I B O T | O P E N A I | O P E R A T O R | P A N G U B O T | P A N S C I E N T | P A N S C I E N T .C O M | P E R P L E X I T Y - U S E R | P E R P L E X I T Y B O T | P E T A L B O T | P H I N D B O T | P O S E I D O N R E S E A R C H C R A W L E R | Q U A L I F I E D B O T | Q U I L L B O T | Q U I L L B O T .C O M | S B I N T U I T I O N S B O T | S C R A P Y | S E M R U S H B O T - O C O B | S E M R U S H B O T - S W A | S H A P B O T | S I D E T R A D E I N D E X E R B O T | T E R R A C O T T A | T H I N K B O T | T I K T O K S P I D E R | T I M P I B O T | V E L E N P U B L I C W E B C R A W L E R | W A R D B O T | W E B Z I O - E X T E N D E D | W P B O T | Y A K | Y A N D E X A D D I T I O N A L | Y A N D E X A D D I T I O N A L B O T | Y O U B O T ) / ;
121122// prettier-ignore
122- export const NON_RESPECTING_BOTS_REGEX = / ( A D D S E A R C H B O T | A M A Z O N B U Y F O R M E | A N D I B O T | A N T H R O P I C - A I | A P P L E B O T | A W A R I O | B I G S U R .A I | B R I G H T B O T 1 .0 | B U D D Y B O T | B Y T E S P I D E R | C L A U D E - W E B | C L O U D V E R T E X B O T | C O H E R E - A I | C O H E R E - T R A I N I N G - D A T A - C R A W L E R | D A T E N B A N K C R A W L E R | D E E P S E E K B O T | D I F F B O T | D U C K A S S I S T B O T | E C H O B O T B O T | E C H O B O X B O T | F A C E B O O K E X T E R N A L H I T | F A C T S E T _ S P Y D E R B O T | G E M I N I - D E E P - R E S E A R C H | G O O G L E - F I R E B A S E | G O O G L E - N O T E B O O K L M | G O O G L E A G E N T - M A R I N E R | I A S K S P I D E R \/ 2 .0 | I M G 2 D A T A S E T | I S S C Y B E R R I S K C R A W L E R | K A N G A R O O B O T | L I N E R B O T | L I N G U E E B O T | M E T A - E X T E R N A L A G E N T | M E T A - E X T E R N A L F E T C H E R | M E T A - E X T E R N A L F E T C H E R | M E T A - W E B I N D E X E R | M I S T R A L A I - U S E R | M Y C E N T R A L A I S C R A P E R B O T | N E T E S T A T E I M P R I N T C R A W L E R | N O T E B O O K L M | N O V A A C T | O P E R A T O R | P A N G U B O T | P E R P L E X I T Y - U S E R | P H I N D B O T | P O S E I D O N R E S E A R C H C R A W L E R | Q U A L I F I E D B O T | Q U I L L B O T | Q U I L L B O T .C O M | S C R A P Y | S I D E T R A D E I N D E X E R B O T | T H I N K B O T | T I K T O K S P I D E R | T I M P I B O T | W A R D B O T | W E B Z I O - E X T E N D E D | W P B O T | Y A K ) / ;
123+ export const NON_RESPECTING_BOTS_REGEX = / ( A D D S E A R C H B O T | A M A Z O N B U Y F O R M E | A N D I B O T | A N T H R O P I C - A I | A P P L E B O T | A W A R I O | B I G S U R .A I | B R I G H T B O T 1 .0 | B U D D Y B O T | B Y T E S P I D E R | C L A U D E - W E B | C L O U D V E R T E X B O T | C O H E R E - A I | C O H E R E - T R A I N I N G - D A T A - C R A W L E R | D A T E N B A N K C R A W L E R | D E E P S E E K B O T | D I F F B O T | D U C K A S S I S T B O T | E C H O B O T B O T | E C H O B O X B O T | F A C E B O O K E X T E R N A L H I T | F A C T S E T _ S P Y D E R B O T | G E M I N I - D E E P - R E S E A R C H | G O O G L E - F I R E B A S E | G O O G L E - N O T E B O O K L M | G O O G L E A G E N T - M A R I N E R | I A S K S P I D E R \/ 2 .0 | I M G 2 D A T A S E T | I S S C Y B E R R I S K C R A W L E R | K A N G A R O O B O T | L A I O N D O W N L O A D E R | L I N E R B O T | L I N G U E E B O T | M E T A - E X T E R N A L A G E N T | M E T A - E X T E R N A L F E T C H E R | M E T A - E X T E R N A L F E T C H E R | M E T A - W E B I N D E X E R | M I S T R A L A I - U S E R | M Y C E N T R A L A I S C R A P E R B O T | N E T E S T A T E I M P R I N T C R A W L E R | N O T E B O O K L M | N O V A A C T | O P E R A T O R | P A N G U B O T | P E R P L E X I T Y - U S E R | P H I N D B O T | P O S E I D O N R E S E A R C H C R A W L E R | Q U A L I F I E D B O T | Q U I L L B O T | Q U I L L B O T .C O M | S C R A P Y | S I D E T R A D E I N D E X E R B O T | T H I N K B O T | T I K T O K S P I D E R | T I M P I B O T | W A R D B O T | W E B Z I O - E X T E N D E D | W P B O T | Y A K ) / ;
0 commit comments