66using System . Buffers ;
77using System . Buffers . Binary ;
88
9+
10+ using System . Text ;
911namespace SimdBase64
1012{
1113 namespace AVX2
1214 {
1315 public static partial class Base64
1416 {
15- /*
17+
1618 // If needed for debugging, you can do the following:
19+ /*
1720 static string VectorToString(Vector256<byte> vector)
1821 {
19- Span<byte> bytes = new byte[16 ];
22+ Span<byte> bytes = new byte[32 ];
2023 vector.CopyTo(bytes);
2124 StringBuilder sb = new StringBuilder();
2225 foreach (byte b in bytes)
2326 {
2427 sb.Append(b.ToString("X2") + " ");
2528 }
2629 return sb.ToString().TrimEnd();
27- }*/
30+ }
31+
32+ static string VectorToStringChar(Vector256<byte> vector)
33+ {
34+ Span<byte> bytes = new byte[32];
35+ vector.CopyTo(bytes);
36+ StringBuilder sb = new StringBuilder();
37+ foreach (byte b in bytes)
38+ {
39+ sb.Append((char)b);
40+ }
41+ return sb.ToString().TrimEnd();
42+ }
43+ */
2844
2945 [ StructLayout ( LayoutKind . Sequential ) ]
3046 private struct Block64
@@ -60,14 +76,15 @@ private unsafe static void LoadBlock(Block64* b, char* src)
6076 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
6177 private static unsafe UInt64 ToBase64Mask ( bool base64Url , Block64 * b , ref bool error )
6278 {
63- ulong m0 = ToBase64Mask ( base64Url , ref b ->chunk0 , ref error ) ;
64- ulong m1 = ToBase64Mask ( base64Url , ref b ->chunk1 , ref error ) ;
79+ UInt64 m0 = ToBase64Mask ( base64Url , ref b ->chunk0 , ref error ) ;
80+ UInt64 m1 = ToBase64Mask ( base64Url , ref b ->chunk1 , ref error ) ;
6581 return m0 | ( m1 << 32 ) ;
6682 }
6783
6884 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
69- private static ushort ToBase64Mask ( bool base64Url , ref Vector256 < byte > src , ref bool error )
85+ private static UInt64 ToBase64Mask ( bool base64Url , ref Vector256 < byte > src , ref bool error )
7086 {
87+
7188 Vector256 < sbyte > asciiSpaceTbl = Vector256 . Create (
7289 0x20 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x9 , 0xa ,
7390 0x0 , 0xc , 0xd , 0x0 , 0x0 , 0x20 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 ,
@@ -137,18 +154,19 @@ private static ushort ToBase64Mask(bool base64Url, ref Vector256<byte> src, ref
137154 Vector256 < sbyte > outVector = Avx2 . AddSaturate ( Avx2 . Shuffle ( deltaValues . AsByte ( ) , deltaHash ) . AsSByte ( ) ,
138155 src . AsSByte ( ) ) ;
139156
140- Vector256 < byte > chkVector = Avx2 . AddSaturate ( Avx2 . Shuffle ( checkValues . AsByte ( ) , checkHash ) . AsByte ( ) ,
141- src . AsByte ( ) ) ;
157+ Vector256 < sbyte > chkVector = Avx2 . AddSaturate ( Avx2 . Shuffle ( checkValues . AsByte ( ) , checkHash ) . AsSByte ( ) ,
158+ src . AsSByte ( ) ) ;
142159
143- int mask = Avx2 . MoveMask ( chkVector . AsByte ( ) ) ;
160+ UInt32 mask = ( uint ) Avx2 . MoveMask ( chkVector . AsByte ( ) ) ;
144161 if ( mask != 0 )
145162 {
146163 Vector256 < byte > asciiSpace = Avx2 . CompareEqual ( Avx2 . Shuffle ( asciiSpaceTbl . AsByte ( ) , src ) , src ) ;
147- error |= ( mask != Avx2 . MoveMask ( asciiSpace ) ) ;
164+ UInt32 spaces = ( uint ) Avx2 . MoveMask ( asciiSpace ) ;
165+ error |= ( mask != spaces ) ;
148166 }
149167
150168 src = outVector . AsByte ( ) ;
151- return ( ushort ) mask ;
169+ return ( UInt64 ) mask ;
152170 }
153171
154172 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
@@ -260,7 +278,6 @@ private unsafe static void Base64Decode(byte* output, Vector256<byte> input)
260278 Vector256 < byte > t2 = Avx2 . Shuffle ( t1 . AsSByte ( ) , packShuffle ) . AsByte ( ) ;
261279
262280 // Store the output. This writes 16 bytes, but we only need 12.
263- // Avx2.Store(output, t2);
264281 Sse2 . Store ( output , t2 . GetLower ( ) ) ;
265282 Sse2 . Store ( output + 12 , t2 . GetUpper ( ) ) ;
266283 }
@@ -290,7 +307,7 @@ private static unsafe void Base64DecodeBlockSafe(byte* outPtr, byte* srcPtr)
290307 {
291308 // Copy only the first 12 bytes of the decoded fourth block into the output buffer, offset by 36 bytes.
292309 // This step is necessary because the fourth block may not need all 16 bytes if it contains padding characters.
293- Buffer . MemoryCopy ( bufferPtr , outPtr + 24 , 24 , 24 ) ; // DEGUG:Uncomment
310+ Buffer . MemoryCopy ( bufferPtr , outPtr + 24 , 24 , 24 ) ;
294311 }
295312 }
296313
@@ -412,7 +429,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
412429 }
413430 else if ( bufferPtr != startOfBuffer )
414431 {
415-
416432 CopyBlock ( & b , bufferPtr ) ;
417433 bufferPtr += 64 ;
418434 bufferBytesConsumed += 64 ;
@@ -421,14 +437,10 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
421437 {
422438 if ( dst >= endOfSafe64ByteZone )
423439 {
424-
425-
426440 Base64DecodeBlockSafe ( dst , & b ) ;
427441 }
428442 else
429443 {
430-
431-
432444 Base64DecodeBlock ( dst , & b ) ;
433445 }
434446 bufferBytesWritten += 48 ;
@@ -452,8 +464,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
452464 Base64DecodeBlock ( dst , startOfBuffer + ( blocksSize - 2 ) * 64 ) ;
453465 }
454466
455-
456-
457467 dst += 48 ;
458468 Buffer . MemoryCopy ( startOfBuffer + ( blocksSize - 1 ) * 64 , startOfBuffer , 64 , 64 ) ;
459469 bufferPtr -= ( blocksSize - 1 ) * 64 ;
@@ -467,14 +477,11 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
467477 // Optimization note: if this is almost full, then it is worth our
468478 // time, otherwise, we should just decode directly.
469479
470-
471480 int lastBlock = ( int ) ( ( bufferPtr - startOfBuffer ) % 64 ) ;
472481 int lastBlockSrcCount = 0 ;
473482 // There is at some bytes remaining beyond the last 64 bit block remaining
474483 if ( lastBlock != 0 && srcEnd - src + lastBlock >= 64 ) // We first check if there is any error and eliminate white spaces?:
475484 {
476-
477- // int lastBlockSrcCount = 0;
478485 while ( ( bufferPtr - startOfBuffer ) % 64 != 0 && src < srcEnd )
479486 {
480487 byte val = toBase64 [ ( int ) * src ] ;
@@ -513,8 +520,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
513520 {
514521 Base64DecodeBlock ( dst , subBufferPtr ) ;
515522 }
516- // bufferBytesWritten += 48;
517- dst += 48 ; // 64 bits of base64 decodes to 48 bits
523+ dst += 48 ; // 64 bits of base64 decodes to 48 bits
518524 }
519525 if ( ( bufferPtr - subBufferPtr ) % 64 != 0 )
520526 {
@@ -528,29 +534,24 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
528534 << 8 ;
529535 triple = BinaryPrimitives . ReverseEndianness ( triple ) ;
530536 Buffer . MemoryCopy ( & triple , dst , 4 , 4 ) ;
531-
532537 dst += 3 ;
533538 subBufferPtr += 4 ;
534539 }
535540 if ( subBufferPtr + 4 <= bufferPtr ) // this may be the very last element, might be incomplete
536541 {
537-
538-
539542 UInt32 triple = ( ( ( UInt32 ) ( ( byte ) ( subBufferPtr [ 0 ] ) ) << 3 * 6 ) +
540543 ( ( UInt32 ) ( ( byte ) ( subBufferPtr [ 1 ] ) ) << 2 * 6 ) +
541544 ( ( UInt32 ) ( ( byte ) ( subBufferPtr [ 2 ] ) ) << 1 * 6 ) +
542545 ( ( UInt32 ) ( ( byte ) ( subBufferPtr [ 3 ] ) ) << 0 * 6 ) )
543546 << 8 ;
544547 triple = BinaryPrimitives . ReverseEndianness ( triple ) ;
545548 Buffer . MemoryCopy ( & triple , dst , 3 , 3 ) ;
546-
547549 dst += 3 ;
548550 subBufferPtr += 4 ;
549551 }
550552 int leftover = ( int ) ( bufferPtr - subBufferPtr ) ;
551553 if ( leftover > 0 )
552554 {
553-
554555 while ( leftover < 4 && src < srcEnd )
555556 {
556557 byte val = toBase64 [ ( byte ) * src ] ;
@@ -562,8 +563,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
562563 }
563564 subBufferPtr [ leftover ] = ( byte ) ( val ) ;
564565 leftover += ( val <= 63 ) ? 1 : 0 ;
565-
566- // bufferBytesConsumed +=1;
567566 src ++ ;
568567 }
569568
@@ -610,7 +609,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
610609 }
611610 }
612611
613-
614612 if ( src < srcEnd + equalsigns ) // We finished processing 64-bit blocks, we're not quite at the end yet
615613 {
616614 bytesConsumed = ( int ) ( src - srcInit ) ;
0 commit comments