11using System ;
2+ using System . Runtime . Intrinsics ;
3+ using System . Runtime . Intrinsics . X86 ;
4+ using System . Runtime . Intrinsics . Arm ;
5+ using System . Runtime . CompilerServices ;
6+ using System . Runtime . InteropServices ;
27
3- namespace SimdUnicode {
4- public static class Ascii {
8+
9+ // Ideally, we would want to implement something that looks like
10+ // https://learn.microsoft.com/en-us/dotnet/api/system.text.asciiencoding?view=net-7.0
11+ //
12+ // See https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs
13+ //
14+ // See https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Transcoding.cs
15+ namespace SimdUnicode
16+ {
17+ public unsafe static class Ascii
18+ {
19+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
520 public static bool IsAscii ( this char c ) => c < 128 ;
6- public static bool IsAscii ( this string s ) {
7- foreach ( var c in s ) {
21+
22+ public static bool IsAscii ( this string s )
23+ {
24+ foreach ( var c in s )
25+ {
826 if ( ! c . IsAscii ( ) ) return false ;
927 }
1028 return true ;
1129 }
12- public static bool IsAscii ( this ReadOnlySpan < char > s ) {
13- foreach ( var c in s ) {
30+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
31+ public static bool IsAscii ( this ReadOnlySpan < char > s )
32+ {
33+ foreach ( var c in s )
34+ {
1435 if ( ! c . IsAscii ( ) ) return false ;
1536 }
1637 return true ;
1738 }
18- public static bool IsAscii ( this Span < char > s ) {
19- foreach ( var c in s ) {
39+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
40+ public static unsafe bool SIMDIsAscii ( this ReadOnlySpan < char > s )
41+ {
42+ if ( s . IsEmpty ) return true ;
43+
44+ if ( ArmBase . Arm64 . IsSupported )
45+ {
46+
47+ // We are going to OR together all the results and then use
48+ // the maximum value to determine if any of the characters
49+ // exceeds the ASCII range. See
50+ // https://github.com/simdutf/simdutf/blob/master/src/arm64/implementation.cpp
51+
52+ // There is not a lot of documentation, but we can read the code at
53+ // https://github.com/dotnet/runtime/tree/main/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm
54+ // and see examples at
55+ // https://github.com/dotnet/runtime/blob/main/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/OptimizedInboxTextEncoder.AdvSimd64.cs
56+
57+ // Go through https://learn.microsoft.com/en-us/dotnet/api/system.runtime.intrinsics.arm.advsimd.arm64.maxacross?view=net-8.0
58+ fixed ( char * pStart = & MemoryMarshal . GetReference ( s ) )
59+ {
60+ ushort max_so_far = 0 ;
61+ int i = 0 ;
62+ if ( s . Length > 8 )
63+ {
64+ // instead of a load, we could have set it to zero, like so...
65+ // total = Vector128<ushort>.Zero;
66+ // or to a custome value like this:
67+ // total = DuplicateToVector128((char)0);
68+ Vector128 < ushort > total = AdvSimd . LoadVector128 ( ( ushort * ) pStart ) ;
69+ i += 8 ;
70+ // unrolling could be useful here:
71+ for ( ; i + 7 < s . Length ; i += 8 )
72+ {
73+ Vector128 < ushort > raw = AdvSimd . LoadVector128 ( ( ushort * ) pStart + i ) ;
74+ total = AdvSimd . Or ( total , raw ) ;
75+ }
76+
77+ max_so_far =
78+ AdvSimd . Arm64 . MaxAcross ( total ) . ToScalar ( ) ;
79+ }
80+ for ( ; i < s . Length ; i ++ )
81+ {
82+ if ( pStart [ i ] > max_so_far ) { max_so_far = pStart [ i ] ; }
83+ }
84+ return max_so_far < 128 ;
85+ }
86+ }
87+ else if ( Sse41 . IsSupported )
88+ {
89+ // Go through https://learn.microsoft.com/en-us/dotnet/api/system.runtime.intrinsics.x86.sse2.comparelessthan?view=net-8.0
90+ fixed ( char * pStart = & MemoryMarshal . GetReference ( s ) )
91+ {
92+ int i = 0 ;
93+ if ( s . Length > 8 )
94+ {
95+ Vector128 < ushort > total = Sse41 . LoadDquVector128 ( ( ushort * ) pStart ) ;
96+ i += 8 ;
97+ // unrolling could be useful here:
98+ for ( ; i + 7 < s . Length ; i += 8 )
99+ {
100+ Vector128 < ushort > raw = Sse41 . LoadDquVector128 ( ( ushort * ) pStart + i ) ;
101+ total = Sse2 . Or ( total , raw ) ;
102+ }
103+ Vector128 < ushort > b127 = Vector128 . Create ( ( ushort ) 127 ) ;
104+ Vector128 < ushort > b = Sse41 . Max ( b127 , total ) ;
105+ Vector128 < ushort > b16 = Sse41 . CompareEqual ( b , b127 ) ;
106+ int movemask = Sse2 . MoveMask ( b16 . AsByte ( ) ) ;
107+ if ( movemask != 0xfffff )
108+ {
109+ return false ;
110+ }
111+ }
112+ for ( ; i < s . Length ; i ++ )
113+ {
114+ if ( pStart [ i ] >= 128 ) return false ;
115+ }
116+ return true ;
117+ }
118+ }
119+ // Fallback code
120+
121+ foreach ( var c in s )
122+ {
20123 if ( ! c . IsAscii ( ) ) return false ;
21124 }
22125 return true ;
23126 }
24- public static bool IsAscii ( this ReadOnlyMemory < char > s ) => IsAscii ( s . Span ) ;
25- public static bool IsAscii ( this Memory < char > s ) => IsAscii ( s . Span ) ;
26127 }
27- }
128+ }
129+ // Further reading:
130+ // https://github.com/dotnet/runtime/blob/main/src/libraries/System.Text.Encodings.Web/src/System/Text/Unicode/UnicodeHelpers.cs
131+
0 commit comments