Skip to content

Commit 7d83b23

Browse files
author
Daniel Lemire
committed
minor tweaks
1 parent df351b8 commit 7d83b23

File tree

4 files changed

+120
-17
lines changed

4 files changed

+120
-17
lines changed

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ of the presence of allowable white space characters and the need to validate the
1717
inputs are valid for encoding, but only some inputs are valid for decoding. Having to skip white space
1818
characters makes accelerated decoding somewhat difficult.
1919

20+
2021
## Results (SimdBase64 vs. fast .NET functions)
2122

23+
2224
We use the enron base64 data for benchmarking, see benchmark/data/email.
2325
We process the data as UTF-8 (ASCII) using the .NET accelerated functions
2426
as a reference (`System.Buffers.Text.Base64.DecodeFromUtf8`).
@@ -35,6 +37,12 @@ function (`Convert.FromBase64String(mystring)`), but it is explained in part by
3537
that the .NET team did not accelerated them using SIMD instructions. Thus we omit them, only
3638
comparing with the SIMD-accelerated .NET functions.
3739

40+
41+
## AVX-512
42+
43+
As for .NET 9, the support for AVX-512 remains incomplete in C#. In particular, important
44+
VBMI2 instructions are missing. Hence, we are not using AVX-512 under x64 systems.
45+
3846
## Requirements
3947

4048
We require .NET 9 or better: https://dotnet.microsoft.com/en-us/download/dotnet/9.0
@@ -46,14 +54,14 @@ The library only provides Base64 decoding functions, because the .NET library al
4654
fast Base64 encoding functions.
4755

4856
```c#
49-
string base64 = "SGVsbG8sIFdvcmxkIQ==";
57+
string base64 = "SGVsbG8sIFdvcmxkIQ=="; // could be span<byte> in UTF-8 as well
5058
byte[] buffer = new byte[SimdBase64.Base64.MaximalBinaryLengthFromBase64(base64.AsSpan())];
5159
int bytesConsumed; // gives you the number of characters consumed
5260
int bytesWritten;
5361
var result = SimdBase64.Base64.DecodeFromBase64(base64.AsSpan(), buffer, out bytesConsumed, out bytesWritten, false); // false is for regular base64, true for base64url
5462
// result == OperationStatus.Done
55-
// Encoding.UTF8.GetString(buffer.AsSpan().Slice(0, bytesWritten)) == "Hello, World!"
56-
63+
var answer = buffer.AsSpan().Slice(0, bytesWritten); // decoded result
64+
// Encoding.UTF8.GetString(answer) == "Hello, World!"
5765
```
5866

5967

benchmark/Benchmark.cs

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ public Config()
153153
}
154154
// Parameters and variables for real data
155155
[Params(
156-
@"data/email/" //,
157-
//@"data/dns/swedenzonebase.txt"
156+
@"data/email/",
157+
@"data/dns/swedenzonebase.txt"
158158
)]
159159
#pragma warning disable CA1051
160160
public string? FileName;
@@ -361,6 +361,26 @@ public unsafe void RunAVX2DecodingBenchmarkUTF8(string[] data, int[] lengths)
361361
}
362362
}
363363

364+
365+
public unsafe void RunOurDecodingBenchmarkUTF8(string[] data, int[] lengths)
366+
{
367+
for (int i = 0; i < FileContent.Length; i++)
368+
{
369+
//string s = FileContent[i];
370+
byte[] base64 = input[i];
371+
byte[] dataoutput = output[i];
372+
int bytesConsumed = 0;
373+
int bytesWritten = 0;
374+
SimdBase64.Base64.DecodeFromBase64(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
375+
if (bytesWritten != lengths[i])
376+
{
377+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
378+
#pragma warning disable CA2201
379+
throw new Exception("Error");
380+
}
381+
}
382+
}
383+
364384
public unsafe void RunAVX2DecodingBenchmarkUTF16(string[] data, int[] lengths)
365385
{
366386
for (int i = 0; i < FileContent.Length; i++)
@@ -379,6 +399,24 @@ public unsafe void RunAVX2DecodingBenchmarkUTF16(string[] data, int[] lengths)
379399
}
380400
}
381401
}
402+
public unsafe void RunOurDecodingBenchmarkUTF16(string[] data, int[] lengths)
403+
{
404+
for (int i = 0; i < FileContent.Length; i++)
405+
{
406+
string s = FileContent[i];
407+
ReadOnlySpan<char> base64 = s.AsSpan();
408+
byte[] dataoutput = output[i];
409+
int bytesConsumed = 0;
410+
int bytesWritten = 0;
411+
SimdBase64.Base64.DecodeFromBase64(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
412+
if (bytesWritten != lengths[i])
413+
{
414+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
415+
#pragma warning disable CA2201
416+
throw new Exception("Error");
417+
}
418+
}
419+
}
382420

383421
public unsafe void RunAVX2DecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
384422
{
@@ -418,6 +456,43 @@ public unsafe void RunAVX2DecodingBenchmarkWithAllocUTF16(string[] data, int[] l
418456
}
419457

420458

459+
public unsafe void RunOurDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
460+
{
461+
for (int i = 0; i < FileContent.Length; i++)
462+
{
463+
Span<byte> base64 = input[i].AsSpan();
464+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
465+
int bytesConsumed = 0;
466+
int bytesWritten = 0;
467+
SimdBase64.Base64.DecodeFromBase64(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
468+
if (bytesWritten != lengths[i])
469+
{
470+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
471+
#pragma warning disable CA2201
472+
throw new Exception("Error");
473+
}
474+
}
475+
}
476+
477+
public unsafe void RunOurDecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
478+
{
479+
for (int i = 0; i < FileContent.Length; i++)
480+
{
481+
string s = FileContent[i];
482+
Span<char> base64 = input16[i].AsSpan();
483+
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
484+
int bytesConsumed = 0;
485+
int bytesWritten = 0;
486+
SimdBase64.Base64.DecodeFromBase64(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
487+
if (bytesWritten != lengths[i])
488+
{
489+
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
490+
#pragma warning disable CA2201
491+
throw new Exception("Error");
492+
}
493+
}
494+
}
495+
421496
public unsafe void RunARMDecodingBenchmarkUTF8(string[] data, int[] lengths)
422497
{
423498
for (int i = 0; i < FileContent.Length; i++)
@@ -559,8 +634,8 @@ public unsafe void DotnetRuntimeBase64RealDataUTF16()
559634
RunRuntimeDecodingBenchmarkUTF16(FileContent, DecodedLengths);
560635
}
561636

562-
[Benchmark]
563-
[BenchmarkCategory("SSE")]
637+
//[Benchmark]
638+
//[BenchmarkCategory("SSE")]
564639
public unsafe void SSEDecodingRealDataUTF8()
565640
{
566641
RunSSEDecodingBenchmarkUTF8(FileContent, DecodedLengths);
@@ -573,13 +648,21 @@ public unsafe void SSEDecodingRealDataWithAllocUTF8()
573648
RunSSEDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
574649
}
575650

576-
[Benchmark]
577-
[BenchmarkCategory("AVX")]
651+
//[Benchmark]
652+
//[BenchmarkCategory("AVX")]
578653
public unsafe void AVX2DecodingRealDataUTF8()
579654
{
580655
RunAVX2DecodingBenchmarkUTF8(FileContent, DecodedLengths);
581656
}
582657

658+
[Benchmark]
659+
[BenchmarkCategory("default", "runtime")]
660+
public unsafe void SimdBase64DecodingRealDataUTF8()
661+
{
662+
RunOurDecodingBenchmarkUTF8(FileContent, DecodedLengths);
663+
}
664+
665+
583666
//[Benchmark]
584667
//[BenchmarkCategory("AVX")]
585668
public unsafe void AVX2DecodingRealDataWithAllocUTF8()

src/Base64.cs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,14 @@ public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source,
2121
{
2222
return Arm.Base64.DecodeFromBase64ARM(source, dest, out bytesConsumed, out bytesWritten, isUrl);
2323
}
24-
// To be comleted
25-
//if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported)
24+
// To be comleted, this may have to wait for .NET 10.
25+
//if (Vector512.IsHardwareAccelerated && Avx512Vbmi2.IsSupported)
2626
//{
27-
// return GetPointerToFirstInvalidByteAvx512(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment);
28-
//}
29-
//if (Avx2.IsSupported)
30-
//{
31-
// return GetPointerToFirstInvalidByteAvx2(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment);
3227
//}
28+
if (Avx2.IsSupported)
29+
{
30+
return AVX2.Base64.DecodeFromBase64AVX2(source, dest, out bytesConsumed, out bytesWritten, isUrl);
31+
}
3332
if (Ssse3.IsSupported && Popcnt.IsSupported)
3433
{
3534
return SSE.Base64.DecodeFromBase64SSE(source, dest, out bytesConsumed, out bytesWritten, isUrl);

test/Base64DecodingTestsUTF8.cs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,20 @@ protected static void DecodeBase64CasesUTF8(DecodeFromBase64DelegateFnc DecodeFr
3838
}
3939
}
4040

41-
41+
[Fact]
42+
[Trait("Category", "scalar")]
43+
public void DecodeBase64README() {
44+
string base64 = "SGVsbG8sIFdvcmxkIQ=="; // could be span<byte> as well
45+
// allocate buffer for the decoded data
46+
byte[] dataoutput = new byte[SimdBase64.Base64.MaximalBinaryLengthFromBase64(base64.AsSpan())];
47+
int bytesConsumed;
48+
int bytesWritten;
49+
var result = SimdBase64.Base64.DecodeFromBase64(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
50+
Assert.Equal(OperationStatus.Done, result);
51+
var answer = dataoutput.AsSpan().Slice(0, bytesWritten);
52+
string utf8String = System.Text.Encoding.UTF8.GetString(answer);
53+
Assert.Equal("Hello, World!", utf8String);
54+
}
4255

4356
[Fact]
4457
[Trait("Category", "scalar")]

0 commit comments

Comments
 (0)