Skip to content

Commit 8722467

Browse files
committed
Added tests for ASCII validation
1 parent 0bf6362 commit 8722467

File tree

2 files changed

+58
-5
lines changed

2 files changed

+58
-5
lines changed

src/Ascii.cs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,20 @@
66
using System.Runtime.InteropServices;
77

88

9+
/* PAR:
10+
| Method | N | Mean | Error | StdDev |
11+
|----------------------- |---- |-----------:|---------:|---------:|
12+
| FastUnicodeIsAscii | 100 | 652.6 ns | 2.20 ns | 1.95 ns |
13+
| StandardUnicodeIsAscii | 100 | 2,466.5 ns | 21.77 ns | 20.36 ns |
14+
| RuntimeIsAscii | 100 | 2,502.7 ns | 29.81 ns | 27.89 ns |
15+
| FastUnicodeIsAscii | 200 | 1,300.8 ns | 17.95 ns | 14.99 ns |
16+
| StandardUnicodeIsAscii | 200 | 5,216.6 ns | 62.48 ns | 55.38 ns |
17+
| RuntimeIsAscii | 200 | 5,293.2 ns | 41.50 ns | 38.82 ns |
18+
| FastUnicodeIsAscii | 500 | 2,978.6 ns | 34.99 ns | 32.73 ns |
19+
| StandardUnicodeIsAscii | 500 | 6,172.9 ns | 74.53 ns | 69.71 ns |
20+
| RuntimeIsAscii | 500 | 6,210.8 ns | 80.82 ns | 63.10 ns | */
21+
22+
923
// Ideally, we would want to implement something that looks like
1024
// https://learn.microsoft.com/en-us/dotnet/api/system.text.asciiencoding?view=net-7.0
1125
//
@@ -63,7 +77,7 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
6377
{
6478
// instead of a load, we could have set it to zero, like so...
6579
// total = Vector128<ushort>.Zero;
66-
// or to a custome value like this:
80+
// or to a custom value like this:
6781
// total = DuplicateToVector128((char)0);
6882
Vector128<ushort> total = AdvSimd.LoadVector128((ushort*)pStart);
6983
i += 8;

test/AsciiTest.cs

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,48 @@ public class AsciiTest
66
[Fact]
77
public void Test1()
88
{
9-
Assert.True(SimdUnicode.Ascii.IsAscii("absads12323123232131231232132132132312321321312321"));
10-
Assert.False(SimdUnicode.Ascii.IsAscii("absaé12323123232131231232132132132312321321312321"));
11-
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("absads12323123232131231232132132132312321321312321"));
12-
Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absaé12323123232131231232132132132312321321312321"));
9+
// Assert.True(SimdUnicode.Ascii.IsAscii("absads12323123232131231232132132132312321321312321"));
10+
// Assert.False(SimdUnicode.Ascii.IsAscii("absaé12323123232131231232132132132312321321312321"));
11+
// Assert.True(SimdUnicode.Ascii.SIMDIsAscii("absads12323123232131231232132132132312321321312321"));
12+
// Assert.True(SimdUnicode.Ascii.SIMDIsAscii("12345678")); // 8 characters pass
13+
// Assert.True(SimdUnicode.Ascii.SIMDIsAscii("123456789")); // 9 characters fails
14+
Assert.True(SimdUnicode.Ascii.SIMDIsAscii("1234567890123456")); //fails
15+
// Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absaé12323123232131231232132132132312321321312321"));
16+
// Assert.False(SimdUnicode.Ascii.SIMDIsAscii("absa12323123232131231232132132132312321321312321é")); // pass
1317
}
18+
19+
/* [Fact]
20+
public void HardCodedSequencesTest()
21+
{
22+
string[] goodsequences = {
23+
"a",
24+
"abcde12345",
25+
"\x71",
26+
"\x75\x4c",
27+
"\x7f\x4c\x23\x3c\x3a\x6f\x5d\x44\x13\x70"
28+
};
29+
30+
string[] badsequences = {
31+
"\xc3\x28",
32+
"\xa0\xa1",
33+
"\xe2\x28\xa1",
34+
"\xe2\x82\x28",
35+
"\xf0\x28\x8c\xbc",
36+
// ... (continue with all sequences)
37+
};
38+
39+
foreach (var sequence in goodsequences)
40+
{
41+
Assert.True(SimdUnicode.Ascii.IsAscii(sequence), "Expected valid ASCII sequence");
42+
Assert.True(SimdUnicode.Ascii.SIMDIsAscii(sequence), "Expected SIMDIsAscii to validate ASCII sequence");
43+
}
44+
45+
foreach (var sequence in badsequences)
46+
{
47+
Assert.False(SimdUnicode.Ascii.IsAscii(sequence), "Expected non-valid ASCII sequence");
48+
Assert.False(SimdUnicode.Ascii.SIMDIsAscii(sequence), "Expected SIMDIsAscii to invalidate non-ASCII sequence");
49+
}
50+
} */
51+
52+
1453
}

0 commit comments

Comments
 (0)