Skip to content

Commit b185207

Browse files
author
Daniel Lemire
committed
some fixes to the AVX kernel
1 parent a408bd8 commit b185207

File tree

3 files changed

+32
-47
lines changed

3 files changed

+32
-47
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ To run just one benchmark, use a filter:
7171

7272
```
7373
cd benchmark
74-
dotnet run --configuration Release --filter "*somefilter*"
74+
dotnet run -c Release --filter "SimdUnicodeBenchmarks.RealDataBenchmark.AVX2DecodingRealDataUTF8(FileName: \"data/email/\")"
7575
```
7676

7777
If you are under macOS or Linux, you may want to run the benchmarks in privileged mode:

src/Base64AVX2UTF16.cs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
192192
// There is at some bytes remaining beyond the last 64 bit block remaining
193193
if (lastBlock != 0 && srcEnd - src + lastBlock >= 64) // We first check if there is any error and eliminate white spaces?:
194194
{
195-
196-
// int lastBlockSrcCount = 0;
197195
while ((bufferPtr - startOfBuffer) % 64 != 0 && src < srcEnd)
198196
{
199197
if (!SimdBase64.Scalar.Base64.IsValidBase64Index(*src))
@@ -247,7 +245,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
247245
{
248246
Base64DecodeBlock(dst, subBufferPtr);
249247
}
250-
// bufferBytesWritten += 48;
251248
dst += 48;// 64 bits of base64 decodes to 48 bits
252249
}
253250
if ((bufferPtr - subBufferPtr) % 64 != 0)
@@ -302,8 +299,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
302299
}
303300
subBufferPtr[leftover] = (byte)(val);
304301
leftover += (val <= 63) ? 1 : 0;
305-
306-
// bufferBytesConsumed +=1;
307302
src++;
308303
}
309304

@@ -356,20 +351,12 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
356351
bytesConsumed = (int)(src - srcInit);
357352
bytesWritten = (int)(dst - dstInit);
358353

359-
// bytesConsumed = Math.Max(0,(int)(src - srcInit) - (int)bufferBytesConsumed);
360-
// bytesWritten = Math.Max(0,(int)(dst - dstInit) - (int)bufferBytesWritten);
361-
362-
363-
364354
int remainderBytesConsumed = 0;
365355
int remainderBytesWritten = 0;
366356

367357
OperationStatus result =
368358
SimdBase64.Scalar.Base64.Base64WithWhiteSpaceToBinaryScalar(source.Slice(bytesConsumed), dest.Slice(bytesWritten), out remainderBytesConsumed, out remainderBytesWritten, isUrl);
369359

370-
371-
372-
373360
if (result == OperationStatus.InvalidData)
374361
{
375362
bytesConsumed += remainderBytesConsumed;

src/Base64AVX2UTF8.cs

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,41 @@
66
using System.Buffers;
77
using System.Buffers.Binary;
88

9+
10+
using System.Text;
911
namespace SimdBase64
1012
{
1113
namespace AVX2
1214
{
1315
public static partial class Base64
1416
{
15-
/*
17+
1618
// If needed for debugging, you can do the following:
19+
/*
1720
static string VectorToString(Vector256<byte> vector)
1821
{
19-
Span<byte> bytes = new byte[16];
22+
Span<byte> bytes = new byte[32];
2023
vector.CopyTo(bytes);
2124
StringBuilder sb = new StringBuilder();
2225
foreach (byte b in bytes)
2326
{
2427
sb.Append(b.ToString("X2") + " ");
2528
}
2629
return sb.ToString().TrimEnd();
27-
}*/
30+
}
31+
32+
static string VectorToStringChar(Vector256<byte> vector)
33+
{
34+
Span<byte> bytes = new byte[32];
35+
vector.CopyTo(bytes);
36+
StringBuilder sb = new StringBuilder();
37+
foreach (byte b in bytes)
38+
{
39+
sb.Append((char)b);
40+
}
41+
return sb.ToString().TrimEnd();
42+
}
43+
*/
2844

2945
[StructLayout(LayoutKind.Sequential)]
3046
private struct Block64
@@ -60,14 +76,15 @@ private unsafe static void LoadBlock(Block64* b, char* src)
6076
[MethodImpl(MethodImplOptions.AggressiveInlining)]
6177
private static unsafe UInt64 ToBase64Mask(bool base64Url, Block64* b, ref bool error)
6278
{
63-
ulong m0 = ToBase64Mask(base64Url, ref b->chunk0, ref error);
64-
ulong m1 = ToBase64Mask(base64Url, ref b->chunk1, ref error);
79+
UInt64 m0 = ToBase64Mask(base64Url, ref b->chunk0, ref error);
80+
UInt64 m1 = ToBase64Mask(base64Url, ref b->chunk1, ref error);
6581
return m0 | (m1 << 32);
6682
}
6783

6884
[MethodImpl(MethodImplOptions.AggressiveInlining)]
69-
private static ushort ToBase64Mask(bool base64Url, ref Vector256<byte> src, ref bool error)
85+
private static UInt64 ToBase64Mask(bool base64Url, ref Vector256<byte> src, ref bool error)
7086
{
87+
7188
Vector256<sbyte> asciiSpaceTbl = Vector256.Create(
7289
0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa,
7390
0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0,
@@ -137,18 +154,19 @@ private static ushort ToBase64Mask(bool base64Url, ref Vector256<byte> src, ref
137154
Vector256<sbyte> outVector = Avx2.AddSaturate(Avx2.Shuffle(deltaValues.AsByte(), deltaHash).AsSByte(),
138155
src.AsSByte());
139156

140-
Vector256<byte> chkVector = Avx2.AddSaturate(Avx2.Shuffle(checkValues.AsByte(), checkHash).AsByte(),
141-
src.AsByte());
157+
Vector256<sbyte> chkVector = Avx2.AddSaturate(Avx2.Shuffle(checkValues.AsByte(), checkHash).AsSByte(),
158+
src.AsSByte());
142159

143-
int mask = Avx2.MoveMask(chkVector.AsByte());
160+
UInt32 mask = (uint)Avx2.MoveMask(chkVector.AsByte());
144161
if (mask != 0)
145162
{
146163
Vector256<byte> asciiSpace = Avx2.CompareEqual(Avx2.Shuffle(asciiSpaceTbl.AsByte(), src), src);
147-
error |= (mask != Avx2.MoveMask(asciiSpace));
164+
UInt32 spaces = (uint)Avx2.MoveMask(asciiSpace);
165+
error |= (mask != spaces);
148166
}
149167

150168
src = outVector.AsByte();
151-
return (ushort)mask;
169+
return (UInt64)mask;
152170
}
153171

154172
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -260,7 +278,6 @@ private unsafe static void Base64Decode(byte* output, Vector256<byte> input)
260278
Vector256<byte> t2 = Avx2.Shuffle(t1.AsSByte(), packShuffle).AsByte();
261279

262280
// Store the output. This writes 16 bytes, but we only need 12.
263-
// Avx2.Store(output, t2);
264281
Sse2.Store(output, t2.GetLower());
265282
Sse2.Store(output + 12, t2.GetUpper());
266283
}
@@ -290,7 +307,7 @@ private static unsafe void Base64DecodeBlockSafe(byte* outPtr, byte* srcPtr)
290307
{
291308
// Copy only the first 12 bytes of the decoded fourth block into the output buffer, offset by 36 bytes.
292309
// This step is necessary because the fourth block may not need all 16 bytes if it contains padding characters.
293-
Buffer.MemoryCopy(bufferPtr, outPtr + 24, 24, 24);// DEGUG:Uncomment
310+
Buffer.MemoryCopy(bufferPtr, outPtr + 24, 24, 24);
294311
}
295312
}
296313

@@ -412,7 +429,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
412429
}
413430
else if (bufferPtr != startOfBuffer)
414431
{
415-
416432
CopyBlock(&b, bufferPtr);
417433
bufferPtr += 64;
418434
bufferBytesConsumed += 64;
@@ -421,14 +437,10 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
421437
{
422438
if (dst >= endOfSafe64ByteZone)
423439
{
424-
425-
426440
Base64DecodeBlockSafe(dst, &b);
427441
}
428442
else
429443
{
430-
431-
432444
Base64DecodeBlock(dst, &b);
433445
}
434446
bufferBytesWritten += 48;
@@ -452,8 +464,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
452464
Base64DecodeBlock(dst, startOfBuffer + (blocksSize - 2) * 64);
453465
}
454466

455-
456-
457467
dst += 48;
458468
Buffer.MemoryCopy(startOfBuffer + (blocksSize - 1) * 64, startOfBuffer, 64, 64);
459469
bufferPtr -= (blocksSize - 1) * 64;
@@ -467,14 +477,11 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
467477
// Optimization note: if this is almost full, then it is worth our
468478
// time, otherwise, we should just decode directly.
469479

470-
471480
int lastBlock = (int)((bufferPtr - startOfBuffer) % 64);
472481
int lastBlockSrcCount = 0;
473482
// There is at some bytes remaining beyond the last 64 bit block remaining
474483
if (lastBlock != 0 && srcEnd - src + lastBlock >= 64) // We first check if there is any error and eliminate white spaces?:
475484
{
476-
477-
// int lastBlockSrcCount = 0;
478485
while ((bufferPtr - startOfBuffer) % 64 != 0 && src < srcEnd)
479486
{
480487
byte val = toBase64[(int)*src];
@@ -513,8 +520,7 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
513520
{
514521
Base64DecodeBlock(dst, subBufferPtr);
515522
}
516-
// bufferBytesWritten += 48;
517-
dst += 48;// 64 bits of base64 decodes to 48 bits
523+
dst += 48; // 64 bits of base64 decodes to 48 bits
518524
}
519525
if ((bufferPtr - subBufferPtr) % 64 != 0)
520526
{
@@ -528,29 +534,24 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
528534
<< 8;
529535
triple = BinaryPrimitives.ReverseEndianness(triple);
530536
Buffer.MemoryCopy(&triple, dst, 4, 4);
531-
532537
dst += 3;
533538
subBufferPtr += 4;
534539
}
535540
if (subBufferPtr + 4 <= bufferPtr) // this may be the very last element, might be incomplete
536541
{
537-
538-
539542
UInt32 triple = (((UInt32)((byte)(subBufferPtr[0])) << 3 * 6) +
540543
((UInt32)((byte)(subBufferPtr[1])) << 2 * 6) +
541544
((UInt32)((byte)(subBufferPtr[2])) << 1 * 6) +
542545
((UInt32)((byte)(subBufferPtr[3])) << 0 * 6))
543546
<< 8;
544547
triple = BinaryPrimitives.ReverseEndianness(triple);
545548
Buffer.MemoryCopy(&triple, dst, 3, 3);
546-
547549
dst += 3;
548550
subBufferPtr += 4;
549551
}
550552
int leftover = (int)(bufferPtr - subBufferPtr);
551553
if (leftover > 0)
552554
{
553-
554555
while (leftover < 4 && src < srcEnd)
555556
{
556557
byte val = toBase64[(byte)*src];
@@ -562,8 +563,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
562563
}
563564
subBufferPtr[leftover] = (byte)(val);
564565
leftover += (val <= 63) ? 1 : 0;
565-
566-
// bufferBytesConsumed +=1;
567566
src++;
568567
}
569568

@@ -610,7 +609,6 @@ private unsafe static OperationStatus InnerDecodeFromBase64AVX2Regular(ReadOnlyS
610609
}
611610
}
612611

613-
614612
if (src < srcEnd + equalsigns) // We finished processing 64-bit blocks, we're not quite at the end yet
615613
{
616614
bytesConsumed = (int)(src - srcInit);

0 commit comments

Comments
 (0)