@@ -16,7 +16,7 @@ public static partial class Base64
1616 // If needed for debugging, you can do the following:
1717 /*static string VectorToString(Vector128<byte> vector)
1818 {
19- Span<byte> bytes = new byte[16];
19+ Span<byte> bytes = stackalloc byte[16];
2020 vector.CopyTo(bytes);
2121 StringBuilder sb = new StringBuilder();
2222 foreach (byte b in bytes)
@@ -216,21 +216,20 @@ private static unsafe ulong ToBase64MaskUrl(Block64* b, ref bool error)
216216 b ->chunk3 += roll3 ;
217217 return badCharmask ;
218218 }
219-
220219 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
221- private unsafe static ulong CompressBlock ( ref Block64 b , ulong mask , byte * output )
220+ private unsafe static ulong CompressBlock ( ref Block64 b , ulong mask , byte * output , byte * tablePtr )
222221 {
223222 ulong nmask = ~ mask ;
224- Compress ( b . chunk0 , ( ushort ) mask , output ) ;
225- Compress ( b . chunk1 , ( ushort ) ( mask >> 16 ) , output + UInt64 . PopCount ( nmask & 0xFFFF ) ) ;
226- Compress ( b . chunk2 , ( ushort ) ( mask >> 32 ) , output + UInt64 . PopCount ( nmask & 0xFFFFFFFF ) ) ;
227- Compress ( b . chunk3 , ( ushort ) ( mask >> 48 ) , output + UInt64 . PopCount ( nmask & 0xFFFFFFFFFFFFUL ) ) ;
223+ Compress ( b . chunk0 , ( ushort ) mask , output , tablePtr ) ;
224+ Compress ( b . chunk1 , ( ushort ) ( mask >> 16 ) , output + UInt64 . PopCount ( nmask & 0xFFFF ) , tablePtr ) ;
225+ Compress ( b . chunk2 , ( ushort ) ( mask >> 32 ) , output + UInt64 . PopCount ( nmask & 0xFFFFFFFF ) , tablePtr ) ;
226+ Compress ( b . chunk3 , ( ushort ) ( mask >> 48 ) , output + UInt64 . PopCount ( nmask & 0xFFFFFFFFFFFFUL ) , tablePtr ) ;
228227
229228 return UInt64 . PopCount ( nmask ) ;
230229 }
231230
232231 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
233- private static unsafe void Compress ( Vector128 < byte > data , ushort mask , byte * output )
232+ private static unsafe void Compress ( Vector128 < byte > data , ushort mask , byte * output , byte * tablePtr )
234233 {
235234 if ( mask == 0 )
236235 {
@@ -246,8 +245,8 @@ private static unsafe void Compress(Vector128<byte> data, ushort mask, byte* out
246245 // thintable_epi8[mask2] into a 128-bit register, using only
247246 // two instructions on most compilers.
248247
249- ulong value1 = Tables . thintableEpi8 [ mask1 ] ;
250- ulong value2 = Tables . thintableEpi8 [ mask2 ] ;
248+ ulong value1 = Tables . GetThintableEpi8 ( mask1 ) ;
249+ ulong value2 = Tables . GetThintableEpi8 ( mask2 ) ;
251250
252251 Vector128 < sbyte > shufmask = Vector128 . Create ( value2 , value1 ) . AsSByte ( ) ;
253252
@@ -259,19 +258,15 @@ private static unsafe void Compress(Vector128<byte> data, ushort mask, byte* out
259258 Vector128 < sbyte > pruned = AdvSimd . Arm64 . VectorTableLookup ( data . AsSByte ( ) , shufmask ) ;
260259 // we still need to put the two halves together.
261260 // we compute the popcount of the first half:
262- int pop1 = Tables . BitsSetTable256mul2 [ mask1 ] ;
261+ int pop1 = Tables . GetBitsSetTable256mul2 ( mask1 ) ;
263262 // then load the corresponding mask, what it does is to write
264263 // only the first pop1 bytes from the first 8 bytes, and then
265264 // it fills in with the bytes from the second 8 bytes + some filling
266- // at the end.
265+ // at the end.
266+ Vector128 < byte > compactmask = Vector128 . Load ( tablePtr + pop1 * 8 ) ;
267267
268- fixed ( byte * tablePtr = Tables . pshufbCombineTable )
269- {
270- Vector128 < byte > compactmask = Vector128 . Load ( tablePtr + pop1 * 8 ) ;
271-
272- Vector128 < byte > answer = AdvSimd . Arm64 . VectorTableLookup ( pruned . AsByte ( ) , compactmask ) ;
273- Vector128 . Store ( answer , output ) ;
274- }
268+ Vector128 < byte > answer = AdvSimd . Arm64 . VectorTableLookup ( pruned . AsByte ( ) , compactmask ) ;
269+ Vector128 . Store ( answer , output ) ;
275270 }
276271
277272 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
@@ -341,7 +336,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
341336 {
342337 // translation from ASCII to 6 bit values
343338 bool isUrl = false ;
344- byte [ ] toBase64 = Tables . ToBase64Value ;
345339 bytesConsumed = 0 ;
346340 bytesWritten = 0 ;
347341 const int blocksSize = 6 ;
@@ -352,6 +346,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
352346 fixed ( byte * srcInit = source )
353347 fixed ( byte * dstInit = dest )
354348 fixed ( byte * startOfBuffer = buffer )
349+ fixed ( byte * tablePtr = Tables . pshufbCombineTable )
355350 {
356351 byte * srcEnd = srcInit + source . Length ;
357352 byte * src = srcInit ;
@@ -429,7 +424,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
429424 // optimization opportunity: check for simple masks like those made of
430425 // continuous 1s followed by continuous 0s. And masks containing a
431426 // single bad character.
432- ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr ) ;
427+ ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr , tablePtr ) ;
433428 bufferPtr += compressedBytesCount ;
434429 bufferBytesConsumed += compressedBytesCount ;
435430 }
@@ -469,7 +464,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
469464 int lastBlockSrcCount = 0 ;
470465 while ( ( bufferPtr - startOfBuffer ) % 64 != 0 && src < srcEnd )
471466 {
472- byte val = toBase64 [ ( int ) * src ] ;
467+ byte val = SimdBase64 . Tables . GetToBase64Value ( ( uint ) * src ) ;
473468 * bufferPtr = val ;
474469 if ( val > 64 )
475470 {
@@ -533,7 +528,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
533528
534529 while ( leftover < 4 && src < srcEnd )
535530 {
536- byte val = toBase64 [ ( byte ) * src ] ;
531+ byte val = SimdBase64 . Tables . GetToBase64Value ( ( byte ) * src ) ;
537532 if ( val > 64 )
538533 {
539534 bytesConsumed = ( int ) ( src - srcInit ) ;
@@ -645,7 +640,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
645640 {
646641 // translation from ASCII to 6 bit values
647642 bool isUrl = false ;
648- byte [ ] toBase64 = Tables . ToBase64Value ;
649643 bytesConsumed = 0 ;
650644 bytesWritten = 0 ;
651645 const int blocksSize = 6 ;
@@ -656,6 +650,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
656650 fixed ( char * srcInit = source )
657651 fixed ( byte * dstInit = dest )
658652 fixed ( byte * startOfBuffer = buffer )
653+ fixed ( byte * tablePtr = Tables . pshufbCombineTable )
659654 {
660655 char * srcEnd = srcInit + source . Length ;
661656 char * src = srcInit ;
@@ -730,7 +725,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
730725 // optimization opportunity: check for simple masks like those made of
731726 // continuous 1s followed by continuous 0s. And masks containing a
732727 // single bad character.
733- ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr ) ;
728+ ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr , tablePtr ) ;
734729 bufferPtr += compressedBytesCount ;
735730 bufferBytesConsumed += compressedBytesCount ;
736731 }
@@ -787,7 +782,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
787782 bytesWritten += remainderBytesWritten ;
788783 return result ;
789784 }
790- byte val = toBase64 [ ( int ) * src ] ;
785+ byte val = SimdBase64 . Tables . GetToBase64Value ( ( uint ) * src ) ;
791786 * bufferPtr = val ;
792787 if ( val > 64 )
793788 {
@@ -858,7 +853,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMRegular(ReadOnlySp
858853 return OperationStatus . InvalidData ;
859854 }
860855
861- byte val = toBase64 [ ( byte ) * src ] ;
856+ byte val = SimdBase64 . Tables . GetToBase64Value ( ( uint ) * src ) ;
862857 if ( val > 64 )
863858 {
864859 bytesConsumed = ( int ) ( src - srcInit ) ;
@@ -971,7 +966,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<b
971966 {
972967 // translation from ASCII to 6 bit values
973968 bool isUrl = true ;
974- byte [ ] toBase64 = Tables . ToBase64UrlValue ;
975969 bytesConsumed = 0 ;
976970 bytesWritten = 0 ;
977971 const int blocksSize = 6 ;
@@ -982,6 +976,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<b
982976 fixed ( byte * srcInit = source )
983977 fixed ( byte * dstInit = dest )
984978 fixed ( byte * startOfBuffer = buffer )
979+ fixed ( byte * tablePtr = Tables . pshufbCombineTable )
985980 {
986981 byte * srcEnd = srcInit + source . Length ;
987982 byte * src = srcInit ;
@@ -1056,7 +1051,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<b
10561051 // optimization opportunity: check for simple masks like those made of
10571052 // continuous 1s followed by continuous 0s. And masks containing a
10581053 // single bad character.
1059- ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr ) ;
1054+ ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr , tablePtr ) ;
10601055 bufferPtr += compressedBytesCount ;
10611056 bufferBytesConsumed += compressedBytesCount ;
10621057
@@ -1098,7 +1093,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<b
10981093 int lastBlockSrcCount = 0 ;
10991094 while ( ( bufferPtr - startOfBuffer ) % 64 != 0 && src < srcEnd )
11001095 {
1101- byte val = toBase64 [ ( int ) * src ] ;
1096+ byte val = Tables . GetToBase64UrlValue ( ( byte ) * src ) ;
11021097 * bufferPtr = val ;
11031098 if ( val > 64 )
11041099 {
@@ -1163,7 +1158,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<b
11631158
11641159 while ( leftover < 4 && src < srcEnd )
11651160 {
1166- byte val = toBase64 [ ( byte ) * src ] ;
1161+ byte val = Tables . GetToBase64UrlValue ( ( byte ) * src ) ;
11671162 if ( val > 64 )
11681163 {
11691164 bytesConsumed = ( int ) ( src - srcInit ) ;
@@ -1277,7 +1272,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<c
12771272 {
12781273 // translation from ASCII to 6 bit values
12791274 bool isUrl = true ;
1280- byte [ ] toBase64 = Tables . ToBase64UrlValue ;
12811275 bytesConsumed = 0 ;
12821276 bytesWritten = 0 ;
12831277 const int blocksSize = 6 ;
@@ -1288,6 +1282,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<c
12881282 fixed ( char * srcInit = source )
12891283 fixed ( byte * dstInit = dest )
12901284 fixed ( byte * startOfBuffer = buffer )
1285+ fixed ( byte * tablePtr = Tables . pshufbCombineTable )
12911286 {
12921287 char * srcEnd = srcInit + source . Length ;
12931288 char * src = srcInit ;
@@ -1365,7 +1360,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<c
13651360 // optimization opportunity: check for simple masks like those made of
13661361 // continuous 1s followed by continuous 0s. And masks containing a
13671362 // single bad character.
1368- ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr ) ;
1363+ ulong compressedBytesCount = CompressBlock ( ref b , badCharMask , bufferPtr , tablePtr ) ;
13691364 bufferPtr += compressedBytesCount ;
13701365 bufferBytesConsumed += compressedBytesCount ;
13711366
@@ -1424,7 +1419,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<c
14241419 bytesWritten += remainderBytesWritten ;
14251420 return result ;
14261421 }
1427- byte val = toBase64 [ ( int ) * src ] ;
1422+ byte val = Tables . GetToBase64UrlValue ( ( byte ) * src ) ;
14281423 * bufferPtr = val ;
14291424 if ( val > 64 )
14301425 {
@@ -1495,7 +1490,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64ARMUrl(ReadOnlySpan<c
14951490 bytesWritten = ( int ) ( dst - dstInit ) ;
14961491 return OperationStatus . InvalidData ;
14971492 }
1498- byte val = toBase64 [ ( byte ) * src ] ;
1493+ byte val = Tables . GetToBase64UrlValue ( ( byte ) * src ) ;
14991494 if ( val > 64 )
15001495 {
15011496 bytesConsumed = ( int ) ( src - srcInit ) ;
0 commit comments