Skip to content

Commit adc3d83

Browse files
authored
add more bots (#55)
1 parent e29d24e commit adc3d83

File tree

3 files changed

+35
-3
lines changed

3 files changed

+35
-3
lines changed

src/MyCSharp.HttpUserAgentParser/HttpUserAgentStatics.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,21 @@ private static Regex CreateDefaultBrowserRegex(string key)
214214
public static readonly (string Key, string Value)[] Robots =
215215
[
216216
( "googlebot", "Googlebot" ),
217+
( "meta-externalagent", "meta-externalagent" ),
218+
( "openai.com/searchbot", "OAI-SearchBot" ),
219+
( "CCBot", "CCBot" ),
220+
( "archive.org/details/archive.org_bot", "archive.org" ),
221+
( "opensiteexplorer.org/dotbot", "DotBot" ),
222+
( "awario.com/bots.html", "AwarioBot" ),
223+
( "Turnitin", "Turnitin" ),
224+
( "openai.com/gptbot", "GPTBot" ),
225+
( "perplexity.ai/perplexitybot", "PerplexityBot" ),
226+
( "developer.amazon.com/support/amazonbot", "Amazonbot" ),
227+
( "trendictionbot", "trendictionbot" ),
228+
( "openai.com/searchbot", "OAI-SearchBot" ),
229+
( "Bytespider", "Bytespider" ),
230+
( "MojeekBot", "MojeekBot" ),
231+
( "SeekportBot", "SeekportBot" ),
217232
( "googleweblight", "Google Web Light" ),
218233
( "PetalBot", "PetalBot"),
219234
( "DuplexWeb-Google", "DuplexWeb-Google"),

tests/MyCSharp.HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ public void BrowserTests(string ua, string name, string version, string platform
109109
[InlineData("Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +http://megaindex.com/crawler)", "MegaIndex")]
110110
[InlineData("Mozilla/5.0 (compatible; AhrefsBot/5.2; +http://ahrefs.com/robot/)", "Ahrefs")]
111111
[InlineData("Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)", "SEMRush")]
112-
[InlineData("Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, [email protected])", "OpenSite")]
113112
[InlineData("Mozilla/5.0 (X11; U; Linux Core i7-4980HQ; de; rv:32.0; compatible; JobboerseBot; http://www.jobboerse.com/bot.htm) Gecko/20100101 Firefox/38.0", "Jobboerse")]
114113
[InlineData("Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)", "Majestic")]
115114
[InlineData("Mozilla/5.0 (compatible; SemrushBot/2~bl; +http://www.semrush.com/bot.html)", "SEMRush")]
@@ -132,6 +131,21 @@ public void BrowserTests(string ua, string name, string version, string platform
132131
[InlineData("WhatsApp/2.22.20.72 A", "WhatsApp")]
133132
[InlineData("WhatsApp/2.22.19.78 I", "WhatsApp")]
134133
[InlineData("WhatsApp/2.2236.3 N", "WhatsApp")]
134+
[InlineData("Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36", "Amazonbot")]
135+
[InlineData("Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +openai.com/gptbot)", "GPTBot")]
136+
[InlineData("Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/116.0.1938.76 Safari/537.36", "BingBot")]
137+
[InlineData("Mozilla/5.0 (compatible; AwarioBot/1.0; +awario.com/bots.html)", "AwarioBot")]
138+
[InlineData("Mozilla/5.0 (compatible; DotBot/1.2; +opensiteexplorer.org/dotbot; [email protected])", "DotBot")]
139+
[InlineData("Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20100101 Firefox/125.0", "trendictionbot")]
140+
[InlineData("Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; [email protected])", "Bytespider")]
141+
[InlineData("Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +perplexity.ai/perplexitybot)", "PerplexityBot")]
142+
[InlineData("Turnitin (bit.ly/2UvnfoQ)", "Turnitin")]
143+
[InlineData("meta-externalagent/1.1 (+developers.facebook.com/docs/sharing/webmasters/crawler)", "meta-externalagent")]
144+
[InlineData("CCBot/2.0 (commoncrawl.org/faq)", "CCBot")]
145+
[InlineData("Mozilla/5.0 (compatible; SeekportBot; +bot.seekport.com)", "SeekportBot")]
146+
[InlineData("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36; compatible; OAI-SearchBot/1.0; +openai.com/searchbot", "OAI-SearchBot")]
147+
[InlineData("Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot)", "archive.org")]
148+
[InlineData("Mozilla/5.0 (compatible; MojeekBot/0.11; +mojeek.com/bot.html)", "MojeekBot")]
135149
public void BotTests(string ua, string name)
136150
{
137151
HttpUserAgentInformation uaInfo = HttpUserAgentInformation.Parse(ua);

tests/MyCSharp.HttpUserAgentParser.UnitTests/HttpUserAgentPlatformInformationTests.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,22 @@
55

66
namespace MyCSharp.HttpUserAgentParser.UnitTests;
77

8-
public class HttpUserAgentPlatformInformationTests
8+
public partial class HttpUserAgentPlatformInformationTests
99
{
1010
[Theory]
1111
[InlineData("Batman", HttpUserAgentPlatformType.Android)]
1212
[InlineData("Robin", HttpUserAgentPlatformType.Windows)]
1313
public void Ctor(string name, HttpUserAgentPlatformType platform)
1414
{
15-
Regex regex = new("");
15+
Regex regex = EmptyRegex();
1616

1717
HttpUserAgentPlatformInformation info = new(regex, name, platform);
1818

1919
Assert.Equal(regex, info.Regex);
2020
Assert.Equal(name, info.Name);
2121
Assert.Equal(platform, info.PlatformType);
2222
}
23+
24+
[GeneratedRegex("")]
25+
private static partial Regex EmptyRegex();
2326
}

0 commit comments

Comments
 (0)