@@ -15,6 +15,9 @@ namespace SimdUnicodeBenchmarks
15
15
public class Checker
16
16
{
17
17
List < char [ ] > names ;
18
+ List < char [ ] > nonAsciichars ;
19
+ public List < byte [ ] > nonAsciiByteArrays ; // Declare at the class level
20
+
18
21
List < bool > results ;
19
22
20
23
public static bool RuntimeIsAsciiApproach ( ReadOnlySpan < char > s )
@@ -50,23 +53,45 @@ public static char[] GetRandomASCIIString(uint n)
50
53
return chars ;
51
54
}
52
55
56
+ public static char [ ] GetRandomNonASCIIString ( uint n )
57
+ {
58
+ // Chose a few Latin Extended-A and Latin Extended-B characters alongside ASCII chars
59
+ var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89šžŸũŭůűųŷŹźŻżŽ" ;
60
+
61
+ var chars = new char [ n ] ;
62
+ var rd = new Random ( 12345 ) ; // fixed seed
63
+
64
+ for ( var i = 0 ; i < n ; i ++ )
65
+ {
66
+ chars [ i ] = allowedChars [ rd . Next ( 0 , allowedChars . Length ) ] ;
67
+ }
68
+
69
+ return chars ;
70
+ }
71
+
72
+
53
73
54
- [ Params ( 100 , 200 , 500 ) ]
74
+ [ Params ( 100 , 200 , 500 , 1000 , 2000 ) ]
55
75
public uint N ;
56
76
77
+
57
78
[ GlobalSetup ]
58
79
public void Setup ( )
59
80
{
60
81
names = new List < char [ ] > ( ) ;
82
+ nonAsciiByteArrays = new List < byte [ ] > ( ) ; // Initialize the list of byte arrays
61
83
results = new List < bool > ( ) ;
62
84
63
85
for ( int i = 0 ; i < 100 ; i ++ )
64
86
{
65
87
names . Add ( GetRandomASCIIString ( N ) ) ;
88
+ char [ ] nonAsciiChars = GetRandomNonASCIIString ( N ) ;
89
+ nonAsciiByteArrays . Add ( Encoding . UTF8 . GetBytes ( nonAsciiChars ) ) ; // Convert to byte array and store
66
90
results . Add ( false ) ;
67
91
}
68
92
}
69
93
94
+
70
95
[ Benchmark ]
71
96
public void FastUnicodeIsAscii ( )
72
97
{
@@ -99,33 +124,23 @@ public void RuntimeIsAscii()
99
124
count += 1 ;
100
125
}
101
126
}
102
-
103
-
104
127
[ Benchmark ]
105
128
public void TestErrorGetIndexOfFirstNonAsciiByteBenchmark ( )
106
129
{
107
- foreach ( char [ ] chars in names )
130
+ foreach ( byte [ ] nonAsciiBytes in nonAsciiByteArrays ) // Use nonAsciiByteArrays directly
108
131
{
109
- byte [ ] ascii = Encoding . UTF8 . GetBytes ( chars ) ;
110
-
111
- for ( int i = 0 ; i < ascii . Length ; i ++ )
132
+ unsafe
112
133
{
113
- ascii [ i ] += 0b10000000 ;
114
-
115
- unsafe
134
+ fixed ( byte * pNonAscii = nonAsciiBytes )
116
135
{
117
- fixed ( byte * pAscii = ascii )
118
- {
119
- nuint result = Ascii . GetIndexOfFirstNonAsciiByte ( pAscii , ( nuint ) ascii . Length ) ;
120
- }
136
+ nuint result = Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) nonAsciiBytes . Length ) ;
121
137
}
122
-
123
- ascii [ i ] -= 0b10000000 ;
124
138
}
125
139
}
126
140
}
127
141
128
142
143
+
129
144
}
130
145
131
146
public class Program
0 commit comments