Skip to content

Commit ef396d1

Browse files
committed
feat(parser): enhance user agent parsing with zero-allocation checks
1 parent 9f87e8b commit ef396d1

File tree

4 files changed

+209
-17
lines changed

4 files changed

+209
-17
lines changed

perf/Directory,Build.props

Lines changed: 0 additions & 9 deletions
This file was deleted.

perf/HttpUserAgentParser.Benchmarks/HttpUserAgentParser.Benchmarks.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
<Nullable>disable</Nullable>
66
</PropertyGroup>
77

8+
<!-- Use project build name as assembly name to satisfy benchmark.NET -->
9+
<PropertyGroup>
10+
<RootNamespace>$(MSBuildProjectName)</RootNamespace>
11+
<AssemblyName>$(MSBuildProjectName)</AssemblyName>
12+
</PropertyGroup>
13+
814
<PropertyGroup Condition="'$(OS)' == 'Windows_NT'">
915
<DefineConstants>$(DefineConstants);OS_WIN</DefineConstants>
1016
</PropertyGroup>

src/HttpUserAgentParser/HttpUserAgentParser.cs

Lines changed: 105 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright © https://myCSharp.de - all rights reserved
22

33
using System.Diagnostics.CodeAnalysis;
4-
using System.Text.RegularExpressions;
4+
using System.Runtime.CompilerServices;
55

66
namespace MyCSharp.HttpUserAgentParser;
77

@@ -48,11 +48,16 @@ public static HttpUserAgentInformation Parse(string userAgent)
4848
/// </summary>
4949
public static HttpUserAgentPlatformInformation? GetPlatform(string userAgent)
5050
{
51-
foreach (HttpUserAgentPlatformInformation item in HttpUserAgentStatics.Platforms)
51+
// Fast, allocation-free token scan (keeps public statics untouched)
52+
ReadOnlySpan<char> ua = userAgent.AsSpan();
53+
foreach ((string Token, string Name, HttpUserAgentPlatformType PlatformType) p in HttpUserAgentStatics.s_platformRules)
5254
{
53-
if (item.Regex.IsMatch(userAgent))
55+
if (ContainsIgnoreCase(ua, p.Token))
5456
{
55-
return item;
57+
return new HttpUserAgentPlatformInformation(
58+
regex: HttpUserAgentStatics.GetPlatformRegexForToken(p.Token),
59+
name: p.Name,
60+
platformType: p.PlatformType);
5661
}
5762
}
5863

@@ -73,13 +78,40 @@ public static bool TryGetPlatform(string userAgent, [NotNullWhen(true)] out Http
7378
/// </summary>
7479
public static (string Name, string? Version)? GetBrowser(string userAgent)
7580
{
76-
foreach ((Regex key, string? value) in HttpUserAgentStatics.Browsers)
81+
ReadOnlySpan<char> ua = userAgent.AsSpan();
82+
foreach ((string Name, string DetectToken, string? VersionToken) rule in HttpUserAgentStatics.s_browserRules)
7783
{
78-
Match match = key.Match(userAgent);
79-
if (match.Success)
84+
if (!TryIndexOf(ua, rule.DetectToken, out int detectIndex))
8085
{
81-
return (value, match.Groups[1].Value);
86+
continue;
8287
}
88+
89+
// Version token may differ (e.g., Safari uses "Version/")
90+
int versionSearchStart = detectIndex;
91+
if (!string.IsNullOrEmpty(rule.VersionToken))
92+
{
93+
if (TryIndexOf(ua, rule.VersionToken!, out int vtIndex))
94+
{
95+
versionSearchStart = vtIndex + rule.VersionToken!.Length;
96+
}
97+
else
98+
{
99+
// If specific version token wasn't found, fall back to detect token area
100+
versionSearchStart = detectIndex + rule.DetectToken.Length;
101+
}
102+
}
103+
else
104+
{
105+
versionSearchStart = detectIndex + rule.DetectToken.Length;
106+
}
107+
108+
string? version = null;
109+
if (TryExtractVersion(ua, versionSearchStart, out Range range))
110+
{
111+
version = userAgent.AsSpan(range.Start.Value, range.End.Value - range.Start.Value).ToString();
112+
}
113+
114+
return (rule.Name, version);
83115
}
84116

85117
return null;
@@ -143,4 +175,69 @@ public static bool TryGetMobileDevice(string userAgent, [NotNullWhen(true)] out
143175
device = GetMobileDevice(userAgent);
144176
return device is not null;
145177
}
178+
179+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
180+
private static bool ContainsIgnoreCase(ReadOnlySpan<char> haystack, string needle)
181+
=> TryIndexOf(haystack, needle, out _);
182+
183+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
184+
private static bool TryIndexOf(ReadOnlySpan<char> haystack, string needle, out int index)
185+
{
186+
index = haystack.IndexOf(needle.AsSpan(), StringComparison.OrdinalIgnoreCase);
187+
return index >= 0;
188+
}
189+
190+
/// <summary>
191+
/// Extracts a dotted numeric version starting at or after <paramref name="startIndex"/>.
192+
/// Accepts digits and dots; skips common separators ('/', ' ', ':', '=') until first digit.
193+
/// Returns false if no version-like token is found.
194+
/// </summary>
195+
private static bool TryExtractVersion(ReadOnlySpan<char> haystack, int startIndex, out Range range)
196+
{
197+
range = default;
198+
if ((uint)startIndex >= (uint)haystack.Length)
199+
{
200+
return false;
201+
}
202+
203+
// Limit search window to avoid scanning entire UA string unnecessarily
204+
const int window = 128;
205+
int end = Math.Min(haystack.Length, startIndex + window);
206+
int i = startIndex;
207+
208+
// Skip separators until we hit a digit
209+
while (i < end)
210+
{
211+
char c = haystack[i];
212+
if ((uint)(c - '0') <= 9)
213+
{
214+
break;
215+
}
216+
i++;
217+
}
218+
219+
if (i >= end)
220+
{
221+
return false;
222+
}
223+
224+
int s = i;
225+
while (i < end)
226+
{
227+
char c = haystack[i];
228+
if (!((uint)(c - '0') <= 9 || c == '.'))
229+
{
230+
break;
231+
}
232+
i++;
233+
}
234+
235+
if (i == s)
236+
{
237+
return false;
238+
}
239+
240+
range = new Range(s, i);
241+
return true;
242+
}
146243
}

src/HttpUserAgentParser/HttpUserAgentStatics.cs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,62 @@ public static class HttpUserAgentStatics
7070
new(CreateDefaultPlatformRegex("symbian"), "Symbian OS", HttpUserAgentPlatformType.Symbian),
7171
];
7272

73+
/// <summary>
74+
/// Fast-path platform token rules for zero-allocation Contains checks
75+
/// </summary>
76+
internal static readonly (string Token, string Name, HttpUserAgentPlatformType PlatformType)[] s_platformRules =
77+
[
78+
("windows nt 10.0", "Windows 10", HttpUserAgentPlatformType.Windows),
79+
("windows nt 6.3", "Windows 8.1", HttpUserAgentPlatformType.Windows),
80+
("windows nt 6.2", "Windows 8", HttpUserAgentPlatformType.Windows),
81+
("windows nt 6.1", "Windows 7", HttpUserAgentPlatformType.Windows),
82+
("windows nt 6.0", "Windows Vista", HttpUserAgentPlatformType.Windows),
83+
("windows nt 5.2", "Windows 2003", HttpUserAgentPlatformType.Windows),
84+
("windows nt 5.1", "Windows XP", HttpUserAgentPlatformType.Windows),
85+
("windows nt 5.0", "Windows 2000", HttpUserAgentPlatformType.Windows),
86+
("windows nt 4.0", "Windows NT 4.0", HttpUserAgentPlatformType.Windows),
87+
("winnt4.0", "Windows NT 4.0", HttpUserAgentPlatformType.Windows),
88+
("winnt 4.0", "Windows NT", HttpUserAgentPlatformType.Windows),
89+
("winnt", "Windows NT", HttpUserAgentPlatformType.Windows),
90+
("windows 98", "Windows 98", HttpUserAgentPlatformType.Windows),
91+
("win98", "Windows 98", HttpUserAgentPlatformType.Windows),
92+
("windows 95", "Windows 95", HttpUserAgentPlatformType.Windows),
93+
("win95", "Windows 95", HttpUserAgentPlatformType.Windows),
94+
("windows phone", "Windows Phone", HttpUserAgentPlatformType.Windows),
95+
("windows", "Unknown Windows OS", HttpUserAgentPlatformType.Windows),
96+
("android", "Android", HttpUserAgentPlatformType.Android),
97+
("blackberry", "BlackBerry", HttpUserAgentPlatformType.BlackBerry),
98+
("iphone", "iOS", HttpUserAgentPlatformType.IOS),
99+
("ipad", "iOS", HttpUserAgentPlatformType.IOS),
100+
("ipod", "iOS", HttpUserAgentPlatformType.IOS),
101+
("cros", "ChromeOS", HttpUserAgentPlatformType.ChromeOS),
102+
("os x", "Mac OS X", HttpUserAgentPlatformType.MacOS),
103+
("ppc mac", "Power PC Mac", HttpUserAgentPlatformType.MacOS),
104+
("freebsd", "FreeBSD", HttpUserAgentPlatformType.Linux),
105+
("ppc", "Macintosh", HttpUserAgentPlatformType.Linux),
106+
("linux", "Linux", HttpUserAgentPlatformType.Linux),
107+
("debian", "Debian", HttpUserAgentPlatformType.Linux),
108+
("sunos", "Sun Solaris", HttpUserAgentPlatformType.Generic),
109+
("beos", "BeOS", HttpUserAgentPlatformType.Generic),
110+
("apachebench", "ApacheBench", HttpUserAgentPlatformType.Generic),
111+
("aix", "AIX", HttpUserAgentPlatformType.Generic),
112+
("irix", "Irix", HttpUserAgentPlatformType.Generic),
113+
("osf", "DEC OSF", HttpUserAgentPlatformType.Generic),
114+
("hp-ux", "HP-UX", HttpUserAgentPlatformType.Windows),
115+
("netbsd", "NetBSD", HttpUserAgentPlatformType.Generic),
116+
("bsdi", "BSDi", HttpUserAgentPlatformType.Generic),
117+
("openbsd", "OpenBSD", HttpUserAgentPlatformType.Unix),
118+
("gnu", "GNU/Linux", HttpUserAgentPlatformType.Linux),
119+
("unix", "Unknown Unix OS", HttpUserAgentPlatformType.Unix),
120+
("symbian", "Symbian OS", HttpUserAgentPlatformType.Symbian),
121+
];
122+
123+
// Precompiled platform regex map to attach to PlatformInformation without per-call allocations
124+
private static readonly Dictionary<string, Regex> s_platformRegexMap = s_platformRules
125+
.ToDictionary(p => p.Token, p => CreateDefaultPlatformRegex(p.Token), StringComparer.OrdinalIgnoreCase);
126+
127+
internal static Regex GetPlatformRegexForToken(string token) => s_platformRegexMap[token];
128+
73129
/// <summary>
74130
/// Regex defauls for browser mappings
75131
/// </summary>
@@ -122,6 +178,48 @@ private static Regex CreateDefaultBrowserRegex(string key)
122178
{ CreateDefaultBrowserRegex("Ubuntu"), "Ubuntu Web Browser" },
123179
};
124180

181+
/// <summary>
182+
/// Fast-path browser token rules. If these fail to extract a version, code will fall back to regex rules.
183+
/// </summary>
184+
internal static readonly (string Name, string DetectToken, string? VersionToken)[] s_browserRules =
185+
[
186+
("Opera", "OPR", null),
187+
("Flock", "Flock", null),
188+
("Edge", "Edge", null),
189+
("Edge", "EdgA", null),
190+
("Edge", "Edg", null),
191+
("Vivaldi", "Vivaldi", null),
192+
("Brave", "Brave Chrome", null),
193+
("Chrome", "Chrome", null),
194+
("Chrome", "CriOS", null),
195+
("Opera", "Opera", "Version/"),
196+
("Opera", "Opera", null),
197+
("Internet Explorer", "MSIE", "MSIE "),
198+
("Internet Explorer", "Internet Explorer", null),
199+
("Internet Explorer", "Trident", "rv:"),
200+
("Shiira", "Shiira", null),
201+
("Firefox", "Firefox", null),
202+
("Firefox", "FxiOS", null),
203+
("Chimera", "Chimera", null),
204+
("Phoenix", "Phoenix", null),
205+
("Firebird", "Firebird", null),
206+
("Camino", "Camino", null),
207+
("Netscape", "Netscape", null),
208+
("OmniWeb", "OmniWeb", null),
209+
("Safari", "Version/", "Version/"),
210+
("Mozilla", "Mozilla", null),
211+
("Konqueror", "Konqueror", null),
212+
("iCab", "icab", null),
213+
("Lynx", "Lynx", null),
214+
("Links", "Links", null),
215+
("HotJava", "hotjava", null),
216+
("Amaya", "amaya", null),
217+
("IBrowse", "IBrowse", null),
218+
("Maxthon", "Maxthon", null),
219+
("Apple iPod", "ipod touch", null),
220+
("Ubuntu Web Browser", "Ubuntu", null),
221+
];
222+
125223
/// <summary>
126224
/// Mobiles
127225
/// </summary>

0 commit comments

Comments
 (0)