Skip to content

Commit 99d5f6d

Browse files
committed
feat(parser): enhance user agent parsing logic and add tests for invalid inputs
1 parent ce92376 commit 99d5f6d

File tree

3 files changed

+108
-27
lines changed

3 files changed

+108
-27
lines changed

src/HttpUserAgentParser/HttpUserAgentParser.cs

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ public static class HttpUserAgentParser
1818
/// </summary>
1919
public static HttpUserAgentInformation Parse(string userAgent)
2020
{
21-
// prepare
2221
userAgent = Cleanup(userAgent);
2322

2423
// analyze
@@ -78,6 +77,11 @@ public static bool TryGetPlatform(string userAgent, [NotNullWhen(true)] out Http
7877
public static (string Name, string? Version)? GetBrowser(string userAgent)
7978
{
8079
ReadOnlySpan<char> ua = userAgent.AsSpan();
80+
// Require a realistic browser UA prefix to avoid classifying truncated tokens as browsers
81+
if (!ContainsIgnoreCase(ua, "Mozilla/"))
82+
{
83+
return null;
84+
}
8185
foreach ((string Name, string DetectToken, string? VersionToken) browserRule in HttpUserAgentStatics.s_browserRules)
8286
{
8387
if (!TryIndexOf(ua, browserRule.DetectToken, out int detectIndex))
@@ -86,7 +90,19 @@ public static (string Name, string? Version)? GetBrowser(string userAgent)
8690
}
8791

8892
// Version token may differ (e.g., Safari uses "Version/")
89-
int versionSearchStart = detectIndex;
93+
// Keep full span immutable across iterations
94+
ReadOnlySpan<char> uaFull = userAgent.AsSpan();
95+
int versionSearchStart;
96+
// For rules without a specific version token, ensure pattern Token/<digits>
97+
if (string.IsNullOrEmpty(browserRule.VersionToken))
98+
{
99+
int afterDetect = detectIndex + browserRule.DetectToken.Length;
100+
if (afterDetect >= uaFull.Length || uaFull[afterDetect] != '/')
101+
{
102+
// Likely a misspelling or partial token (e.g., Edgg, Oprea, Chromee)
103+
continue;
104+
}
105+
}
90106
if (!string.IsNullOrEmpty(browserRule.VersionToken))
91107
{
92108
if (TryIndexOf(ua, browserRule.VersionToken!, out int vtIndex))
@@ -104,14 +120,22 @@ public static (string Name, string? Version)? GetBrowser(string userAgent)
104120
versionSearchStart = detectIndex + browserRule.DetectToken.Length;
105121
}
106122

107-
string? version = null;
108-
ua = ua.Slice(versionSearchStart);
109-
if (TryExtractVersion(ua, out Range range))
123+
// Work on a local slice to avoid mutating the main span for following rules
124+
if (versionSearchStart < 0 || versionSearchStart >= uaFull.Length)
125+
{
126+
// Nothing to search; try next rule
127+
continue;
128+
}
129+
130+
ReadOnlySpan<char> search = uaFull.Slice(versionSearchStart);
131+
if (TryExtractVersion(search, out Range range))
110132
{
111-
version = ua[range].ToString();
133+
string? version = search[range].ToString();
134+
return (browserRule.Name, version);
112135
}
113136

114-
return (browserRule.Name, version);
137+
// If we didn't find a version for this rule, try next rule
138+
continue;
115139
}
116140

117141
return null;
@@ -198,39 +222,43 @@ private static bool TryExtractVersion(ReadOnlySpan<char> haystack, out Range ran
198222

199223
// Limit search window to avoid scanning entire UA string unnecessarily
200224
const int Window = 128;
201-
if (haystack.Length >= Window)
225+
if (haystack.Length > Window)
202226
{
203227
haystack = haystack.Slice(0, Window);
204228
}
205229

206-
int i = 0;
207-
for (; i < haystack.Length; ++i)
230+
// Find first digit
231+
int start = -1;
232+
for (int i = 0; i < haystack.Length; i++)
208233
{
209234
char c = haystack[i];
210-
if (char.IsBetween(c, '0', '9'))
235+
if (c >= '0' && c <= '9')
211236
{
237+
start = i;
212238
break;
213239
}
214240
}
215241

216-
int s = i;
217-
haystack = haystack.Slice(i + 1);
218-
for (i = 0; i < haystack.Length; ++i)
242+
if (start < 0)
219243
{
220-
char c = haystack[i];
221-
if (!(char.IsBetween(c, '0', '9') || c == '.'))
222-
{
223-
break;
224-
}
244+
// No digit found => no version
245+
return false;
225246
}
226-
i += s + 1; // shift back the previous domain
227247

228-
if (i == s)
248+
// Consume digits and dots after first digit
249+
int end = start + 1;
250+
while (end < haystack.Length)
229251
{
230-
return false;
252+
char c = haystack[end];
253+
if (!((c >= '0' && c <= '9') || c == '.'))
254+
{
255+
break;
256+
}
257+
end++;
231258
}
232259

233-
range = new Range(s, i);
260+
// Create exclusive end range
261+
range = new Range(start, end);
234262
return true;
235263
}
236264
}

src/HttpUserAgentParser/HttpUserAgentStatics.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,7 @@ private static Regex CreateDefaultBrowserRegex(string key)
165165
{ CreateDefaultBrowserRegex("Camino"), "Camino" },
166166
{ CreateDefaultBrowserRegex("Netscape"), "Netscape" },
167167
{ CreateDefaultBrowserRegex("OmniWeb"), "OmniWeb" },
168-
{ CreateDefaultBrowserRegex("Version"), "Safari" }, // https://github.com/mycsharp/HttpUserAgentParser/issues/34
169-
{ CreateDefaultBrowserRegex("Mozilla"), "Mozilla" },
168+
{ CreateDefaultBrowserRegex("Version"), "Safari" }, // https://github.com/mycsharp/HttpUserAgentParser/issues/34
170169
{ CreateDefaultBrowserRegex("Konqueror"), "Konqueror" },
171170
{ CreateDefaultBrowserRegex("icab"), "iCab" },
172171
{ CreateDefaultBrowserRegex("Lynx"), "Lynx" },
@@ -207,8 +206,7 @@ internal static readonly (string Name, string DetectToken, string? VersionToken)
207206
("Camino", "Camino", null),
208207
("Netscape", "Netscape", null),
209208
("OmniWeb", "OmniWeb", null),
210-
("Safari", "Version/", "Version/"),
211-
("Mozilla", "Mozilla", null),
209+
("Safari", "Version/", "Version/"),
212210
("Konqueror", "Konqueror", null),
213211
("iCab", "icab", null),
214212
("Lynx", "Lynx", null),

tests/HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,59 @@ public void BotTests(string ua, string name)
173173
Assert.False(uaInfo.IsMobile());
174174
Assert.True(uaInfo.IsRobot());
175175
}
176+
177+
[Theory]
178+
[InlineData("")]
179+
[InlineData("???")]
180+
[InlineData("NotAUserAgent")]
181+
[InlineData("Mozilla")]
182+
[InlineData("Mozilla/")]
183+
[InlineData("()")]
184+
[InlineData("UserAgent/")]
185+
[InlineData("Bot/123 (")]
186+
[InlineData("123456")]
187+
[InlineData("curl")]
188+
[InlineData("invalid/useragent")]
189+
[InlineData("Mozilla (Windows)")]
190+
[InlineData("Chrome/ABC")]
191+
[InlineData(";;!!##")]
192+
[InlineData("Safari/ ")]
193+
[InlineData("Opera( )")]
194+
[InlineData("Mozilla/5.0 (X11; ) Gecko")]
195+
[InlineData("FakeUA/1.0 (Test)???")]
196+
[InlineData("Mozilla/ (iPhone; U; CPU iPhone OS like Mac OS X) AppleWebKit/ (KHTML, like Gecko) Version/ Mobile/ Safari/")]
197+
[InlineData("Mozzila/5.0 (Windows NT 10.0; Win64; x64)")]
198+
[InlineData("Chorme/91.0.4472.124 (Windows NT 10.0; Win64; x64)")]
199+
[InlineData("FireFoxx/89.0 (Macintosh; Intel Mac OS X 10_15_7)")]
200+
[InlineData("Safarii/14.1 (iPhone; CPU iPhone OS 14_6 like Mac OS X)")]
201+
[InlineData("Edg/91.0.864.59 (Windows NT 10.0; Win64; x64)")] // falsch gekürzt
202+
[InlineData("Mozila/5.0 (Linux; Android 11; Pixel 4) AppleWebkit/537.36")]
203+
[InlineData("InternetExploder/11.0 (Windows NT 6.1; WOW64)")]
204+
[InlineData("Operaa/77.0.4054.172 (Windows NT 10.0; Win64; x64)")]
205+
[InlineData("Bravee/1.25.72 (Windows NT 10.0; Win64; x64)")]
206+
[InlineData("Mozzila/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0)")]
207+
[InlineData("Chromee/99.0.4758.102 (X11; Linux x86_64)")]
208+
[InlineData("FirreFox/100.0 (Windows NT 10.0; rv:100.0)")]
209+
[InlineData("Saffari/605.1.15 (iPad; CPU OS 14_6 like Mac OS X)")]
210+
[InlineData("Edgg/103.0.1264.37 (Macintosh; Intel Mac OS X 11_5_2)")]
211+
[InlineData("Mozillaa/4.0 (compatible; MSIE 6.0; Windows NT 5.1)")]
212+
[InlineData("Chorome/91.0.4472.124 (Linux; Android 10; SM-G973F)")]
213+
[InlineData("Mozila/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0)")]
214+
[InlineData("Safarrii/537.36 (KHTML, like Gecko) Chrome/99.0.4758.102")]
215+
[InlineData("Oprea/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14")]
216+
[InlineData("Edgee/18.18363 (Windows 10 1909; Win64; x64)")]
217+
public void InvalidUserAgent(string userAgent)
218+
{
219+
HttpUserAgentInformation info = HttpUserAgentInformation.Parse(userAgent);
220+
221+
// Invalid or malformed UAs must be classified as Unknown
222+
Assert.Equal(HttpUserAgentType.Unknown, info.Type);
223+
Assert.Null(info.Name);
224+
Assert.Null(info.Version);
225+
Assert.Equal(userAgent, info.UserAgent);
226+
227+
// Should not be considered a browser or a robot
228+
Assert.False(info.IsBrowser());
229+
Assert.False(info.IsRobot());
230+
}
176231
}

0 commit comments

Comments
 (0)