Skip to content

Commit 1ffc7d7

Browse files
authored
Merge pull request #31 from simdutf/AVX2_decode_fix
Avx2 decode fix
2 parents b6839f5 + 9e813e7 commit 1ffc7d7

15 files changed

+5669
-3488
lines changed

README.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ really fast base64 decoding function. The initial work that lead to the fast fun
66
was carried out by [gfoidl](https://github.com/gfoidl/Base64).
77

88
- There are accelerated base64 functions for UTF-8 inputs in the .NET runtime, but they are not optimal:
9-
we can make them 50% to 2x or 3x faster.
9+
we can make them 50% faster.
1010
- There is no accelerated base64 functions for UTF-16 inputs (e.g., `string` types). We can be 2x faster
1111
or more.
1212

@@ -18,6 +18,18 @@ of the presence of allowable white space characters and the need to validate the
1818
inputs are valid for encoding, but only some inputs are valid for decoding. Having to skip white space
1919
characters makes accelerated decoding somewhat difficult.
2020

21+
## Results (SimdBase64 vs. fast .NET functions)
22+
23+
We use the enron base64 data for benchmarking, see benchmark/data/email.
24+
We process the data as UTF-8 (ASCII) using the .NET accelerated functions
25+
as a reference (`System.Buffers.Text.Base64.DecodeFromUtf8`).
26+
27+
28+
| processor | SimdBase64(GB/s) | .NET speed (GB/s) | speed up |
29+
|:----------------|:------------------------|:-------------------|:-------------------|
30+
| Apple M2 processor (ARM) | 6.2 | 3.8 | 1.6 x |
31+
| Intel Ice Lake (AVX2) | 5.3 | 3.4 | 1.6 x |
32+
2133

2234
## Requirements
2335

@@ -71,7 +83,7 @@ To run just one benchmark, use a filter:
7183

7284
```
7385
cd benchmark
74-
dotnet run --configuration Release --filter "*somefilter*"
86+
dotnet run -c Release --filter "SimdUnicodeBenchmarks.RealDataBenchmark.AVX2DecodingRealDataUTF8(FileName: \"data/email/\")"
7587
```
7688

7789
If you are under macOS or Linux, you may want to run the benchmarks in privileged mode:

benchmark/Benchmark.cs

Lines changed: 133 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
6666
public string Legend { get; } = "The speed in gigabytes per second";
6767
}
6868

69-
[SimpleJob(launchCount: 1, warmupCount: 5, iterationCount: 5)]
69+
[SimpleJob(launchCount: 1, warmupCount: 10, iterationCount: 10)]
7070
[Config(typeof(Config))]
7171
#pragma warning disable CA1515
7272
public class RealDataBenchmark
@@ -153,8 +153,8 @@ public Config()
153153
}
154154
// Parameters and variables for real data
155155
[Params(
156-
@"data/email/",
157-
@"data/dns/swedenzonebase.txt"
156+
@"data/email/" //,
157+
//@"data/dns/swedenzonebase.txt"
158158
)]
159159
#pragma warning disable CA1051
160160
public string? FileName;
@@ -305,17 +305,53 @@ public unsafe void RunSSEDecodingBenchmarkUTF16(string[] data, int[] lengths)
305305
}
306306
}
307307

308+
public unsafe void RunSSEDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
309+
{
310+
for (int i = 0; i < FileContent.Length; i++)
311+
{
312+
Span<byte> base64 = input[i].AsSpan();
313+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
314+
int bytesConsumed = 0;
315+
int bytesWritten = 0;
316+
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
317+
if (bytesWritten != lengths[i])
318+
{
319+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
320+
#pragma warning disable CA2201
321+
throw new Exception("Error");
322+
}
323+
}
324+
}
308325

326+
public unsafe void RunSSEDecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
327+
{
328+
for (int i = 0; i < FileContent.Length; i++)
329+
{
330+
string s = FileContent[i];
331+
Span<char> base64 = input16[i].AsSpan();
332+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
333+
int bytesConsumed = 0;
334+
int bytesWritten = 0;
335+
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
336+
if (bytesWritten != lengths[i])
337+
{
338+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
339+
#pragma warning disable CA2201
340+
throw new Exception("Error");
341+
}
342+
}
343+
}
309344

310-
public unsafe void RunSSEDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
345+
public unsafe void RunAVX2DecodingBenchmarkUTF8(string[] data, int[] lengths)
311346
{
312347
for (int i = 0; i < FileContent.Length; i++)
313348
{
349+
//string s = FileContent[i];
314350
byte[] base64 = input[i];
315-
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64.AsSpan())];
351+
byte[] dataoutput = output[i];
316352
int bytesConsumed = 0;
317353
int bytesWritten = 0;
318-
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
354+
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
319355
if (bytesWritten != lengths[i])
320356
{
321357
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
@@ -325,16 +361,53 @@ public unsafe void RunSSEDecodingBenchmarkWithAllocUTF8(string[] data, int[] len
325361
}
326362
}
327363

328-
public unsafe void RunSSEDecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
364+
public unsafe void RunAVX2DecodingBenchmarkUTF16(string[] data, int[] lengths)
329365
{
330366
for (int i = 0; i < FileContent.Length; i++)
331367
{
332368
string s = FileContent[i];
333-
char[] base64 = input16[i];
334-
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64.AsSpan())];
369+
ReadOnlySpan<char> base64 = s.AsSpan();
370+
byte[] dataoutput = output[i];
335371
int bytesConsumed = 0;
336372
int bytesWritten = 0;
337-
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
373+
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
374+
if (bytesWritten != lengths[i])
375+
{
376+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
377+
#pragma warning disable CA2201
378+
throw new Exception("Error");
379+
}
380+
}
381+
}
382+
383+
public unsafe void RunAVX2DecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
384+
{
385+
for (int i = 0; i < FileContent.Length; i++)
386+
{
387+
Span<byte> base64 = input[i].AsSpan();
388+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
389+
int bytesConsumed = 0;
390+
int bytesWritten = 0;
391+
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
392+
if (bytesWritten != lengths[i])
393+
{
394+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
395+
#pragma warning disable CA2201
396+
throw new Exception("Error");
397+
}
398+
}
399+
}
400+
401+
public unsafe void RunAVX2DecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
402+
{
403+
for (int i = 0; i < FileContent.Length; i++)
404+
{
405+
string s = FileContent[i];
406+
Span<char> base64 = input16[i].AsSpan();
407+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
408+
int bytesConsumed = 0;
409+
int bytesWritten = 0;
410+
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
338411
if (bytesWritten != lengths[i])
339412
{
340413
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
@@ -383,17 +456,15 @@ public unsafe void RunARMDecodingBenchmarkUTF16(string[] data, int[] lengths)
383456
}
384457
}
385458

386-
387-
388459
public unsafe void RunARMDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
389460
{
390461
for (int i = 0; i < FileContent.Length; i++)
391462
{
392-
byte[] base64 = input[i];
393-
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64.AsSpan())];
463+
Span<byte> base64 = input[i].AsSpan();
464+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
394465
int bytesConsumed = 0;
395466
int bytesWritten = 0;
396-
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
467+
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
397468
if (bytesWritten != lengths[i])
398469
{
399470
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
@@ -408,11 +479,11 @@ public unsafe void RunARMDecodingBenchmarkWithAllocUTF16(string[] data, int[] le
408479
for (int i = 0; i < FileContent.Length; i++)
409480
{
410481
string s = FileContent[i];
411-
char[] base64 = input16[i];
412-
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64.AsSpan())];
482+
Span<char> base64 = input16[i].AsSpan();
483+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
413484
int bytesConsumed = 0;
414485
int bytesWritten = 0;
415-
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
486+
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
416487
if (bytesWritten != lengths[i])
417488
{
418489
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
@@ -474,15 +545,15 @@ public unsafe void DotnetRuntimeSIMDBase64RealDataUTF8()
474545
RunRuntimeSIMDDecodingBenchmarkUTF8(FileContent, DecodedLengths);
475546
}
476547

477-
[Benchmark]
478-
[BenchmarkCategory("default", "runtime")]
548+
//[Benchmark]
549+
//[BenchmarkCategory("default", "runtime")]
479550
public unsafe void DotnetRuntimeSIMDBase64RealDataWithAllocUTF8()
480551
{
481552
RunRuntimeSIMDDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
482553
}
483554

484-
[Benchmark]
485-
[BenchmarkCategory("default", "runtime")]
555+
//[Benchmark]
556+
//[BenchmarkCategory("default", "runtime")]
486557
public unsafe void DotnetRuntimeBase64RealDataUTF16()
487558
{
488559
RunRuntimeDecodingBenchmarkUTF16(FileContent, DecodedLengths);
@@ -495,47 +566,77 @@ public unsafe void SSEDecodingRealDataUTF8()
495566
RunSSEDecodingBenchmarkUTF8(FileContent, DecodedLengths);
496567
}
497568

498-
[Benchmark]
499-
[BenchmarkCategory("SSE")]
569+
//[Benchmark]
570+
//[BenchmarkCategory("SSE")]
500571
public unsafe void SSEDecodingRealDataWithAllocUTF8()
501572
{
502573
RunSSEDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
503574
}
504575

576+
[Benchmark]
577+
[BenchmarkCategory("AVX")]
578+
public unsafe void AVX2DecodingRealDataUTF8()
579+
{
580+
RunAVX2DecodingBenchmarkUTF8(FileContent, DecodedLengths);
581+
}
582+
583+
//[Benchmark]
584+
//[BenchmarkCategory("AVX")]
585+
public unsafe void AVX2DecodingRealDataWithAllocUTF8()
586+
{
587+
RunAVX2DecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
588+
}
589+
590+
505591
[Benchmark]
506592
[BenchmarkCategory("arm64")]
507593
public unsafe void ARMDecodingRealDataUTF8()
508594
{
509595
RunARMDecodingBenchmarkUTF8(FileContent, DecodedLengths);
510596
}
511597

512-
[Benchmark]
513-
[BenchmarkCategory("arm64")]
598+
//[Benchmark]
599+
//[BenchmarkCategory("arm64")]
514600
public unsafe void ARMDecodingRealDataWithAllocUTF8()
515601
{
516602
RunARMDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
517603
}
518604

519-
[Benchmark]
520-
[BenchmarkCategory("arm64")]
605+
//[Benchmark]
606+
//[BenchmarkCategory("arm64")]
521607
public unsafe void ARMDecodingRealDataUTF16()
522608
{
523609
RunARMDecodingBenchmarkUTF16(FileContent, DecodedLengths);
524610
}
525611

526-
[Benchmark]
527-
[BenchmarkCategory("SSE")]
612+
//[Benchmark]
613+
//[BenchmarkCategory("SSE")]
528614
public unsafe void SSEDecodingRealDataUTF16()
529615
{
530616
RunSSEDecodingBenchmarkUTF16(FileContent, DecodedLengths);
531617
}
532618

533-
[Benchmark]
534-
[BenchmarkCategory("SSE")]
619+
//[Benchmark]
620+
//[BenchmarkCategory("SSE")]
535621
public unsafe void SSEDecodingRealDataWithAllocUTF16()
536622
{
537623
RunSSEDecodingBenchmarkWithAllocUTF16(FileContent, DecodedLengths);
538624
}
625+
626+
//[Benchmark]
627+
//[BenchmarkCategory("AVX")]
628+
public unsafe void AVX2DecodingRealDataUTF16()
629+
{
630+
RunAVX2DecodingBenchmarkUTF16(FileContent, DecodedLengths);
631+
}
632+
633+
//[Benchmark]
634+
//[BenchmarkCategory("AVX")]
635+
public unsafe void AVX2DecodingRealDataWithAllocUTF16()
636+
{
637+
RunAVX2DecodingBenchmarkWithAllocUTF16(FileContent, DecodedLengths);
638+
}
639+
539640
}
540641
#pragma warning disable CA1515
541642
public class Program

benchmark/benchmark.csproj

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,4 @@
3030
</None>
3131
</ItemGroup>
3232

33-
34-
3533
</Project>

src/Base64.cs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
using System;
22
using System.Buffers;
33
using System.Runtime.CompilerServices;
4-
using System.Runtime.Intrinsics;
54
using System.Runtime.Intrinsics.Arm;
65
using System.Runtime.Intrinsics.X86;
76

87

98
namespace SimdBase64
109
{
11-
public static class Base64 {
10+
public static class Base64
11+
{
1212
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1313
public static int MaximalBinaryLengthFromBase64<T>(ReadOnlySpan<T> input)
1414
{
1515
return Scalar.Base64.MaximalBinaryLengthFromBase64Scalar(input);
1616
}
17-
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false) {
17+
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
18+
{
1819

1920
if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
2021
{
@@ -38,12 +39,13 @@ public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source,
3839

3940
}
4041

41-
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<char> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false) {
42+
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<char> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
43+
{
4244

4345
if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
4446
{
4547
return Arm.Base64.DecodeFromBase64ARM(source, dest, out bytesConsumed, out bytesWritten, isUrl);
46-
}
48+
}
4749
// To be comleted
4850
//if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported)
4951
//{

0 commit comments

Comments
 (0)