|
8 | 8 | * License: GNU/GPLv2 |
9 | 9 | * @see LICENSE.txt |
10 | 10 | * |
11 | | - * This file: Bot user agents module (last modified: 2025.07.19). |
| 11 | + * This file: Bot user agents module (last modified: 2025.07.24). |
12 | 12 | * |
13 | 13 | * False positive risk (an approximate, rough estimate only): « [ ]Low [x]Medium [ ]High » |
14 | 14 | */ |
|
151 | 151 | $Trigger(preg_match( |
152 | 152 | '~^(?:wp-iphone$|\'?test|-|default|foo)|_sitemapper|3mir|' . |
153 | 153 | 'a(?:boundex|dmantx|dnormcrawler|dvbot|lphaserver|thens|ttache)|' . |
154 | | - 'blekko|blogsnowbot|bytespider|' . |
| 154 | + 'blekko|blogsnowbot|' . |
155 | 155 | 'cmscrawler|co(?:ccoc|llect|modo-webinspector-crawler|mpspy)|crawler(?:4j|\.feedback)|' . |
156 | 156 | 'd(?:atacha|igout4uagent|ioscout|kimrepbot|sarobot)|' . |
157 | 157 | 'easou|exabot|' . |
|
167 | 167 | 'user-agent|visaduhoc|vonchimpenfurlr|webtarantula|wolf|' . |
168 | 168 | 'y(?:acy|isouspider|[ry]spider|unrang|unyun)|zoominfobot~', |
169 | 169 | $UANoSpace |
170 | | - ) || strpos($UA, ' ') !== false, 'Banned UA'); // 2021.07.08 mod 2025.03.31 |
| 170 | + ) || strpos($UA, ' ') !== false, 'Banned UA'); // 2021.07.08 mod 2025.07.24 |
171 | 171 |
|
172 | 172 | if (!$Trigger(( |
173 | 173 | preg_match('~^python-requests/2\.27~', $UANoSpace) && |
174 | 174 | preg_match('~admin|config\.php~', $CIDRAM['BlockInfo']['rURI']) |
175 | 175 | ), 'Hack attempt')) { // 2022.05.08 |
176 | 176 | $Trigger(preg_match( |
177 | 177 | '~c(?:copyright|enturyb|9hilkat|olly)|fetch/|flipboard|googlealerts|grub|' . |
178 | | - 'indeedbot|quick-crawler|scrapinghub|ttd-content|zgrab|^(?:abot|python-re' . |
179 | | - 'quests/|spider)~', |
| 178 | + 'indeedbot|quick-crawler|scrapinghub|ttd-content|^(?:abot|python-requests' . |
| 179 | + '/|spider)~', |
180 | 180 | $UANoSpace |
181 | | - ), 'Scraper UA'); // 2022.05.11 |
| 181 | + ), 'Scraper UA'); // 2022.05.11 mod 2025.07.24 |
182 | 182 | } |
183 | 183 |
|
184 | 184 | $Trigger(preg_match('~^mozila/~', $UANoSpace), 'Hack attempt'); // 2022.05.31 |
185 | 185 |
|
186 | 186 | $Trigger(preg_match( |
187 | 187 | '~007ac9|200please|360spider|3d-ftp|' . |
188 | 188 | 'a(?:6-indexer|ccelo|ffinity|ghaven|href|ipbot|naly(?:ticsseo|zer)|pp3lewebkit|rtviper|wcheck)|' . |
189 | | - 'b(?:acklink|azqux|ender|inlar|itvo|ixo|lex|nf.fr|ogahn|oitho|pimagewalker)|' . |
190 | | - 'c(?:cbot|ent(?:iverse|ric)|ityreview|msworldmap|omment|ommoncrawl|overscout|r4nk|rawl(?:erbotalpha|fire)|razywebcrawler|uriousgeorge|ydral)|' . |
191 | | - 'd(?:ataprovider|atenbank|aylife|ebate|igext|(?:cp|isco|ot|ouban|ownload)bot|otcomdotnet|otnetdotcom|owjones|tsagent)|' . |
| 189 | + 'b(?:abbar\.tech|acklink|arkrowler|azqux|ender|inlar|itvo|ixo|lex|nf.fr|ogahn|oitho|pimagewalker)|' . |
| 190 | + 'c(?:ent(?:iverse|ric)|ityreview|msworldmap|omment|ommoncrawl|overscout|r4nk|rawl(?:erbotalpha|fire)|razywebcrawler|uriousgeorge|ydral)|' . |
| 191 | + 'd(?:ataprovider|aylife|ebate|igext|(?:cp|isco|ot|ouban|ownload)bot|otcomdotnet|otnetdotcom|owjones|tsagent)|' . |
192 | 192 | 'e(?:(?:na|uro|xperi)bot|nvolk|stimatewebstats|vaal|zoom)|' . |
193 | 193 | 'f(?:dm|etch(?:er.0|or)|ibgen)|' . |
194 | 194 | 'g(?:alaxydownloads|et(?:download\.ws|ty|url11)|slfbot|umgum|urujibot)|' . |
|
197 | 197 | 'k(?:eywenbot|eywordsearchtool|imengi|kman)|' . |
198 | 198 | 'l(?:abjs\.pro|arbin|ink(?:dex|walker)|iperhey|(?:t|ush)bot)|' . |
199 | 199 | 'm(?:ahiti|ahonie|attters|egaindex|iabot|lbot|oreover|ormor|ot-v980|oz\.com|rchrome|ulticrawler)|' . |
200 | | - 'n(?:eofonie|etestate|ewsbot|extgensearchbot|ineconnections)|' . |
| 200 | + 'n(?:eofonie|ewsbot|extgensearchbot|ineconnections)|' . |
201 | 201 | 'o(?:afcrawl|fflinenavigator|odlebot|ptimizer)|' . |
202 | | - 'p(?:age(?:fetch|gett|_verifi)er|agesinventory|anscient|ath2|ic(?:grabber|s|tsnapshot|turefinder)|i(?:pl|xmatch|xray)|oe-component-client-|owermarks|rofiler|roximic|(?:s|ure)bot|urity)|qqdownload|' . |
| 202 | + 'p(?:age(?:fetch|gett|_verifi)er|agesinventory|ath2|ic(?:grabber|s|tsnapshot|turefinder)|i(?:pl|xmatch|xray)|oe-component-client-|owermarks|rofiler|roximic|(?:s|ure)bot|urity)|qqdownload|' . |
203 | 203 | 'r(?:6_|adian6|ankivabot|ebi-shoveler|everseget|ganalytics|ocketcrawler|ogerbot|sscrawl|ulinki)|' . |
204 | | - 's(?:afeassign|bider|bl[.-]bot|crap[ey]|creamingfrog|earchmetricsbot|emrush|eo(?:bulls|eng|hunt|kicks|mon|profiler|stat|tool)|erpstat|istrix|ite(?:bot|intel)|n[iy]per|olomono|pbot|p(?:hi|y)der|search|webot)|' . |
| 204 | + 's(?:afeassign|bider|bl[.-]bot|creamingfrog|earchmetricsbot|emrush|eo(?:bulls|eng|hunt|kicks|mon|profiler|stat|tool)|erpstat|istrix|ite(?:bot|intel)|n[iy]per|olomono|pbot|search|webot)|' . |
205 | 205 | 't(?:-h-u-n|agsdir|ineye|opseo|raumacadx|urnitinbot)|' . |
206 | 206 | 'u(?:12bot|p(?:downer|ictobot))|' . |
207 | 207 | 'v(?:agabondo|bseo|isbot|oyager)|' . |
208 | 208 | 'w(?:arebay|auuu|bsearchbot|eb(?:alta|capture|download|mastercoffee|meup|ripper)|ikio|indows(?:3|seven)|ise-guys|khtmlto|orldbot|otbox)|' . |
209 | | - 'xtractorpro|' . |
210 | 209 | 'yoofind~', |
211 | 210 | $UANoSpace |
212 | | - ), 'Backlink/SEO/Scraper UA'); // 2022.09.19 mod 2025.07.19 |
| 211 | + ), 'Backlink/SEO/Scraper UA'); // 2022.09.19 mod 2025.07.24 |
213 | 212 |
|
214 | 213 | $Trigger(strpos($UANoSpace, 'catch') !== false, 'Risky UA'); // 2017.01.13 |
215 | 214 |
|
|
228 | 227 | '~\.buzz|(?<!amazona)dbot/|(?:\W|^)(?:cu|pe)rl(?:\W|$)|#boss#|' . |
229 | 228 | '^(?:[aim]$|(?!linkedinbot).*http-?(?:agent|client))|-xpanse|' . |
230 | 229 | 'a(?:bonti|ccserver|cme.spider|dreview/\d|jbaxy|nthill$|nyevent-http|ppengine|xios)|' . |
231 | | - 'b(?:abbar\.tech|igbozz|itsight|lackbird|logsearch|logbot|salsa)|' . |
| 230 | + 'b(?:igbozz|itsight|lackbird|logsearch|logbot|salsa)|' . |
232 | 231 | 'c(?:astlebot|atexplorador|k=\{\}|lickagy|liqzbot|ms-?checker|ontextad|orporama|ortex/\d|rowsnest|yberpatrol)|' . |
233 | 232 | 'd(?:eepfield|le_spider|nbcrawler|omainappender|umprendertree)|' . |
234 | 233 | 'expanse|' . |
|
243 | 242 | 'p(?:4bot|4load|acrawler|ageglimpse|aloalto(?:company|network)|arsijoo|egasusmonitoring|hantomjs|hpcrawl|ingdom|rlog)|' . |
244 | 243 | 'r(?:arelyused|obo(?:cop|spider)|yze)|' . |
245 | 244 | 's(?:/got|can\.lol|caninfo|creener|eekport|itedomain|mut|nap(?:preview)?bot|oapclient|ocial(?:ayer|searcher)|oso|pyglass|quider|treetbot|ynapse)|' . |
246 | | - 't(?:impi|omba|weezler|ryghost)|' . |
| 245 | + 't(?:omba|weezler|ryghost)|' . |
247 | 246 | 'urlappendbot|urltest|' . |
248 | 247 | 'w(?:asalive|atchmouse|eb(?:-monitoring|bot|masteraid|money|pros|site-info\.net|thumbnail)|hatweb|ikiapiary|ininet|maid\.com|pbot/1\.|sr-agent|wwtype)|' . |
249 | 248 | 'xenu|xovi|' . |
|
252 | 251 | ) || preg_match( |
253 | 252 | '~^Mozilla/5\.0( [A-Za-z]{2,5}/0\..)?$~', |
254 | 253 | $CIDRAM['BlockInfo']['UA'] |
255 | | - ), 'Unauthorised'); // 2023.09.15 mod 2025.07.19 |
| 254 | + ), 'Unauthorised'); // 2023.09.15 mod 2025.07.24 |
256 | 255 |
|
257 | 256 | if ($Trigger(preg_match('~ivre-|masscan~', $UANoSpace), 'Port scanner and synflood tool detected')) { |
258 | 257 | $CIDRAM['Reporter']->report([14, 15, 19], ['MASSCAN port scanner and synflood tool detected.'], $CIDRAM['BlockInfo']['IPAddr']); |
|
292 | 291 | $CIDRAM['Reporter']->report([4, 19], ['BitTorrent user agent seen at HTTP server endpoint (possible flood/DDoS attempt).'], $CIDRAM['BlockInfo']['IPAddr']); |
293 | 292 | } // 2017.02.25 |
294 | 293 |
|
295 | | - $Trigger(preg_match( |
296 | | - '~foregenix|modat|nuclei|projectdiscovery|sslyze|threatview~', |
| 294 | + if ($Trigger(preg_match( |
| 295 | + '~foregenix|modat|nuclei|isscyberrisk|projectdiscovery|sslyze|threatview~', |
297 | 296 | $UA |
298 | | - ), 'Vulnerability scanner detected; Unauthorised'); // 2023.06.16 mod 2025.01.12 |
| 297 | + ), 'Unauthorised vulnerability scanner detected')) { |
| 298 | + $CIDRAM['Reporter']->report([15, 19, 21], ['Unauthorised vulnerability scanner detected.'], $CIDRAM['BlockInfo']['IPAddr']); |
| 299 | + $CIDRAM['Tracking options override'] = 'extended'; |
| 300 | + } // 2023.06.16 mod 2025.07.24 |
299 | 301 |
|
300 | 302 | $Trigger(preg_match('~^python/|aiohttp/|\.post0~', $UANoSpace), 'Bad context (Python/AIO clients not permitted here)'); // 2021.05.18 |
301 | 303 |
|
|
314 | 316 | } // 2022.05.08 |
315 | 317 |
|
316 | 318 | $Trigger(preg_match( |
317 | | - '~adbar|anonymous-?coward|' . |
318 | | - 'banana-?bot|bot-?test|brands-?bot|' . |
319 | | - 'clark-?crawler|' . |
320 | | - 'fidget-?spinner-?bot|friendly-?spider|' . |
321 | | - 'imagesift|' . |
| 319 | + '~80legs|' . |
| 320 | + 'a(?:dbar|i2bot|ihitbot|i.?searchbot|liyun|ndibot|nonymous-?coward|wario)|' . |
| 321 | + 'b(?:anana-?bot|edrockbot|ot-?test|rands-?bot|rightbot|ytespider)|' . |
| 322 | + 'c(?:asperbot|cbot|hinaclaw|lark-?crawler|ohere-)|' . |
| 323 | + 'd(?:atenbank|eep-?research)|' . |
| 324 | + 'echobo[tx]|' . |
| 325 | + 'f(?:idget-?spinner-?bot|irecrawl|riendly-?(?:crawler|spider))|' . |
| 326 | + 'i(?:askspider|magesift|mg2dataset)|' . |
322 | 327 | 'jaddjabot|' . |
323 | | - 'keys-?so-?bot|' . |
| 328 | + 'k(?:angaroobot|eys-?so-?bot)|' . |
| 329 | + 'm(?:amac(?:asper|yber)|istral|ozilla/0|ycentralai)|' . |
| 330 | + 'n(?:etestate|ovaact)|' . |
324 | 331 | 'orbbot|' . |
325 | | - 'phxbot|' . |
326 | | - 'storm-?crawler|' . |
327 | | - 't(?:est-?bot|hesis-?research-?bot|hinkchaos|iny-?(?:bot|test)|rafilatura)|' . |
328 | | - 'whatstuffwherebot|winhttp|' . |
329 | | - 'zephuli-?bot~', |
| 332 | + 'p(?:angubot|anscient|erplexity|hindbot|hxbot|oseidon|ublicwebcrawler)|' . |
| 333 | + 'q(?:ualifiedbot|uillbot)|' . |
| 334 | + 'research.?crawler|' . |
| 335 | + 's(?:bintuition|crap[ey]|idetrade|p(?:hi|y)der|torm-?crawler|ummalybot)|' . |
| 336 | + 't(?:est-?bot|heknowledgeai|hesis-?research-?bot|hinkchaos|impi|iny-?(?:bot|test)|rafilatura)|' . |
| 337 | + 'velenpublic|' . |
| 338 | + 'w(?:ardbot|ebzio|hatstuffwherebot|inhttp)|' . |
| 339 | + 'xtractorpro|' . |
| 340 | + 'z(?:ephuli-?bot|grab)~', |
330 | 341 | $UANoSpace |
331 | | - ), 'Scraper UA'); // 2023.11.17 mod 2025.07.19 |
| 342 | + ), 'Scraper UA')) { |
| 343 | + $CIDRAM['Tracking options override'] = 'extended'; |
| 344 | + } // 2023.11.17 mod 2025.07.24 |
332 | 345 |
|
333 | 346 | $Trigger(preg_match('~ct‑git‑scanner/~i', $CIDRAM['BlockInfo']['UA']), 'Unauthorised Git scanner'); // 2025.07.05 |
334 | 347 |
|
|
357 | 370 | strpos($UANoSpace, '}__') !== false || |
358 | 371 | preg_match('~0wn[3e]d|dkemdif.\d|f' . 'uck|:(?:\{[\w]:|[\w\d][;:]\})~', $UANoSpace) |
359 | 372 | ), 'Hack UA') || // 2021.06.28 |
360 | | - $Trigger(preg_match('~(?:(aihit|casper)bot|mamac(asper|yber)|mozilla/0)~', $UANoSpace), 'Probe UA') || // 2017.02.25 |
361 | 373 | $Trigger(strpos($UANoSpace, 'wopbot') !== false, 'Bash/Shellshock UA') || // 2017.01.06 |
362 | 374 | $Trigger(preg_match('/(?:x(rumer|pymep)|хрумер)/', $UANoSpace), 'Spam UA') || // 2017.01.02 |
363 | | - $Trigger(preg_match('~loadimpact|re-?animator|root|theknowledgeai|webster~', $UANoSpace), 'Banned UA') || // 2021.02.10 |
| 375 | + $Trigger(preg_match('~loadimpact|re-?animator|root|webster~', $UANoSpace), 'Banned UA') || // 2021.02.10 mod 2025.07.24 |
364 | 376 | $Trigger(strpos($UANoSpace, '(somename)') !== false, 'Banned UA') || // 2017.02.02 |
365 | | - $Trigger(preg_match('~80legs|chinaclaw~', $UANoSpace), 'Scraper UA') || // 2017.01.08 mod 2021.06.28 |
366 | 377 | $Trigger(preg_match('~brandwatch|magpie~', $UANoSpace), 'Snoop UA') || // 2017.01.13 mod 2021.06.28 |
367 | 378 | $Trigger(strpos($CIDRAM['BlockInfo']['UA'], 'MSIECrawler') !== false, 'Hostile / Fake IE') // 2017.02.25 mod 2021.06.28 |
368 | 379 | ) { |
|
389 | 400 | $CIDRAM['Reporter']->report([15], ['Shell upload attempt detected in user agent.'], $CIDRAM['BlockInfo']['IPAddr']); |
390 | 401 | } elseif (strpos($CIDRAM['BlockInfo']['WhyReason'], 'Hack UA') !== false) { |
391 | 402 | $CIDRAM['Reporter']->report([15, 19, 21], ['Hack identifier detected in user agent.'], $CIDRAM['BlockInfo']['IPAddr']); |
392 | | - } elseif (strpos($CIDRAM['BlockInfo']['WhyReason'], 'Vulner') !== false) { |
393 | | - $CIDRAM['Reporter']->report([15, 19, 21], ['Caught looking for vulnerabilities.'], $CIDRAM['BlockInfo']['IPAddr']); |
394 | 403 | } elseif (strpos($CIDRAM['BlockInfo']['WhyReason'], 'UASQLi') !== false) { |
395 | 404 | $CIDRAM['Reporter']->report([16], ['SQLi attempt detected in user agent.'], $CIDRAM['BlockInfo']['IPAddr']); |
396 | 405 | } elseif (strpos($CIDRAM['BlockInfo']['WhyReason'], 'Probe UA') !== false) { |
|
430 | 439 | * @link https://www.reddit.com/r/singularity/comments/1cdm97j/anthropics_claudebot_is_aggressively_scraping_the/ |
431 | 440 | * @link https://www.linode.com/community/questions/24842/ddos-from-anthropic-ai |
432 | 441 | */ |
433 | | - if ($Trigger((strpos($UANoSpace, 'anthropic') !== false || strpos($UANoSpace, 'claudebot') !== false), 'Unauthorised AI scanner')) { |
| 442 | + if ($Trigger(preg_match('~anthropic|claude-?(?:bot|searchbot|user|web)~', $UANoSpace), 'Unauthorised AI scanner')) { |
434 | 443 | $CIDRAM['Reporter']->report([4, 19], ['AI scanner notorious for flooding and DDoS attacks detected.'], $CIDRAM['BlockInfo']['IPAddr']); |
435 | 444 | $CIDRAM['Tracking options override'] = 'extended'; |
436 | | - } // 2023.08.10 mod 2024.04.27 |
| 445 | + } // 2023.08.10 mod 2025.07.24 |
437 | 446 |
|
438 | 447 | /** |
439 | 448 | * @link https://github.com/CIDRAM/CIDRAM/issues/606 |
|
0 commit comments