Skip to content

Commit df351b8

Browse files
authored
Merge pull request #33 from simdutf/notsure
Performance tuning
2 parents 9bd7981 + 369e70c commit df351b8

File tree

4 files changed

+108
-232
lines changed

4 files changed

+108
-232
lines changed

README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ really fast base64 decoding function. The initial work that lead to the fast fun
66
was carried out by [gfoidl](https://github.com/gfoidl/Base64).
77

88
- There are accelerated base64 functions for UTF-8 inputs in the .NET runtime, but they are not optimal:
9-
we can make them 50% faster.
10-
- There is no accelerated base64 functions for UTF-16 inputs (e.g., `string` types). We can be 2x faster
11-
or more.
9+
we can make them 50% faster or more.
10+
- There is no accelerated base64 functions for UTF-16 inputs (e.g., `string` types). We can be several times faster.
1211

1312
The goal of this project is to provide the fast WHATWG forgiving-base64 algorithm already
1413
used in major JavaScript runtimes (Node.js and Bun) to C#.
@@ -27,12 +26,14 @@ as a reference (`System.Buffers.Text.Base64.DecodeFromUtf8`).
2726

2827
| processor | SimdBase64(GB/s) | .NET speed (GB/s) | speed up |
2928
|:----------------|:------------------------|:-------------------|:-------------------|
30-
| Apple M2 processor (ARM) | 6.3 | 3.8 | 1.7 x |
31-
| Intel Ice Lake (AVX2) | 5.8 | 3.4 | 1.7 x |
32-
| Intel Ice Lake (SSSE3) | 4.7 | 3.4 | 1.4 x |
29+
| Apple M2 processor (ARM) | 6.5 | 3.8 | 1.7 x |
30+
| Intel Ice Lake (AVX2) | 6.6 | 3.4 | 1.9 x |
31+
| Intel Ice Lake (SSSE3) | 4.9 | 3.4 | 1.4 x |
3332

3433
Our results are more impressive when comparing against the standard base64 string decoding
35-
function (`Convert.FromBase64String(mystring)`), but we omit these results for simplicity.
34+
function (`Convert.FromBase64String(mystring)`), but it is explained in part by the fact
35+
that the .NET team did not accelerated them using SIMD instructions. Thus we omit them, only
36+
comparing with the SIMD-accelerated .NET functions.
3637

3738
## Requirements
3839

benchmark/Benchmark.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ public Config()
153153
}
154154
// Parameters and variables for real data
155155
[Params(
156-
//@"data/email/" //,
157-
@"data/dns/swedenzonebase.txt"
156+
@"data/email/" //,
157+
//@"data/dns/swedenzonebase.txt"
158158
)]
159159
#pragma warning disable CA1051
160160
public string? FileName;

src/Base64Scalar.cs

Lines changed: 20 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ public static int MaximalBinaryLengthFromBase64Scalar<T>(ReadOnlySpan<T> input)
6464
public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<byte> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
6565
{
6666

67-
uint[] d0 = isUrl ? Base64Url.d0 : Base64Default.d0;
68-
uint[] d1 = isUrl ? Base64Url.d1 : Base64Default.d1;
69-
uint[] d2 = isUrl ? Base64Url.d2 : Base64Default.d2;
70-
uint[] d3 = isUrl ? Base64Url.d3 : Base64Default.d3;
71-
7267
int length = source.Length;
7368

7469
fixed (byte* srcInit = source)
@@ -89,7 +84,7 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<byte> s
8984
{
9085
// fastpath
9186
while (src + 4 <= srcEnd &&
92-
(x = d0[*src] | d1[src[1]] | d2[src[2]] | d3[src[3]]) < 0x01FFFFFF)
87+
(x = isUrl ? Base64Url.GetD(src) : Base64Default.GetD(src)) < 0x01FFFFFF)
9388
{
9489
if (MatchSystem(Endianness.BIG))
9590
{
@@ -156,7 +151,7 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<byte> s
156151
if (MatchSystem(Endianness.BIG))
157152
{
158153
triple <<= 8;
159-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 2);
154+
Buffer.MemoryCopy(&triple, dst, 2, 2);
160155
}
161156
else
162157
{
@@ -185,7 +180,7 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<byte> s
185180
if (MatchSystem(Endianness.BIG))
186181
{
187182
triple <<= 8;
188-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 3);
183+
Buffer.MemoryCopy(&triple, dst, 3, 3);
189184
}
190185
else
191186
{
@@ -208,11 +203,6 @@ public static bool IsValidBase64Index(char b)
208203
public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<char> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
209204
{
210205

211-
uint[] d0 = isUrl ? Base64Url.d0 : Base64Default.d0;
212-
uint[] d1 = isUrl ? Base64Url.d1 : Base64Default.d1;
213-
uint[] d2 = isUrl ? Base64Url.d2 : Base64Default.d2;
214-
uint[] d3 = isUrl ? Base64Url.d3 : Base64Default.d3;
215-
216206
int length = source.Length;
217207

218208
fixed (char* srcInit = source)
@@ -234,9 +224,7 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<char> s
234224
{
235225
// fastpath
236226
while (src + 4 <= srcEnd &&
237-
IsValidBase64Index(*src) && IsValidBase64Index(src[1]) &&
238-
IsValidBase64Index(src[2]) && IsValidBase64Index(src[3]) &&
239-
(x = d0[*src] | d1[src[1]] | d2[src[2]] | d3[src[3]]) < 0x01FFFFFF)
227+
(x = isUrl ? Base64Url.GetD(src) : Base64Default.GetD(src)) < 0x01FFFFFF)
240228
{
241229
if (MatchSystem(Endianness.BIG))
242230
{
@@ -312,7 +300,7 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<char> s
312300
if (MatchSystem(Endianness.BIG))
313301
{
314302
triple <<= 8;
315-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 2);
303+
Buffer.MemoryCopy(&triple, dst, 2, 2);
316304
}
317305
else
318306
{
@@ -341,7 +329,7 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<char> s
341329
if (MatchSystem(Endianness.BIG))
342330
{
343331
triple <<= 8;
344-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 3);
332+
Buffer.MemoryCopy(&triple, dst, 3, 3);
345333
}
346334
else
347335
{
@@ -358,18 +346,13 @@ public unsafe static OperationStatus DecodeFromBase64Scalar(ReadOnlySpan<char> s
358346
// like DecodeFromBase64Scalar, but it will not write past the end of the ouput buffer.
359347
public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<byte> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
360348
{
361-
362-
363-
uint[] d0 = isUrl ? Base64Url.d0 : Base64Default.d0;
364-
uint[] d1 = isUrl ? Base64Url.d1 : Base64Default.d1;
365-
uint[] d2 = isUrl ? Base64Url.d2 : Base64Default.d2;
366-
uint[] d3 = isUrl ? Base64Url.d3 : Base64Default.d3;
367-
368349
int length = source.Length;
350+
Span<byte> buffer = [0, 0, 0, 0];
369351

370352
// Define pointers within the fixed blocks
371353
fixed (byte* srcInit = source)
372354
fixed (byte* dstInit = dest)
355+
fixed (byte* bufferPtr = buffer)
373356
{
374357
byte* srcEnd = srcInit + length;
375358
byte* src = srcInit;
@@ -382,13 +365,12 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<byt
382365
int idx;
383366
// Should be
384367
// Span<byte> buffer = stackalloc byte[4];
385-
Span<byte> buffer = [0, 0, 0, 0];
386368

387369
while (true)
388370
{
389371
// fastpath
390372
while (src + 4 <= srcEnd &&
391-
(x = d0[*src] | d1[src[1]] | d2[src[2]] | d3[src[3]]) < 0x01FFFFFF)
373+
(x = isUrl ? Base64Url.GetD(src) : Base64Default.GetD(src)) < 0x01FFFFFF)
392374
{
393375

394376

@@ -402,7 +384,7 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<byt
402384
bytesWritten = (int)(dst - dstInit);
403385
return OperationStatus.DestinationTooSmall;
404386
}
405-
Marshal.Copy(buffer, 0, (IntPtr)dst, 3); // optimization opportunity: copy 4 bytes
387+
Buffer.MemoryCopy(bufferPtr, dst, 3, 3);
406388
dst += 3;
407389
src += 4;
408390
}
@@ -475,7 +457,7 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<byt
475457
if (MatchSystem(Endianness.BIG))
476458
{
477459
triple <<= 8;
478-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 2);
460+
Buffer.MemoryCopy(&triple, dst, 2, 2);
479461
}
480462
else
481463
{
@@ -510,7 +492,7 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<byt
510492
if (MatchSystem(Endianness.BIG))
511493
{
512494
triple <<= 8;
513-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 3);
495+
Buffer.MemoryCopy(&triple, dst, 3, 3);
514496
}
515497
else
516498
{
@@ -528,16 +510,15 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<byt
528510
public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<char> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
529511
{
530512

531-
uint[] d0 = isUrl ? Base64Url.d0 : Base64Default.d0;
532-
uint[] d1 = isUrl ? Base64Url.d1 : Base64Default.d1;
533-
uint[] d2 = isUrl ? Base64Url.d2 : Base64Default.d2;
534-
uint[] d3 = isUrl ? Base64Url.d3 : Base64Default.d3;
535-
536513
int length = source.Length;
537514

515+
// Should be
516+
// Span<byte> buffer = stackalloc byte[4];
517+
Span<byte> buffer = [0, 0, 0, 0];
538518
// Define pointers within the fixed blocks
539519
fixed (char* srcInit = source)
540520
fixed (byte* dstInit = dest)
521+
fixed (byte* bufferPtr = buffer)
541522

542523
{
543524
char* srcEnd = srcInit + length;
@@ -549,18 +530,13 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<cha
549530
uint x;
550531
uint triple;
551532
int idx;
552-
// Should be
553-
// Span<byte> buffer = stackalloc byte[4];
554-
Span<byte> buffer = [0, 0, 0, 0];
555533

556534
while (true)
557535
{
558536
// fastpath
559537
while (src + 4 <= srcEnd &&
560-
(x = d0[*src] | d1[src[1]] | d2[src[2]] | d3[src[3]]) < 0x01FFFFFF)
538+
(x = isUrl ? Base64Url.GetD(src) : Base64Default.GetD(src)) < 0x01FFFFFF)
561539
{
562-
563-
564540
if (MatchSystem(Endianness.BIG))
565541
{
566542
x = BinaryPrimitives.ReverseEndianness(x);
@@ -571,7 +547,7 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<cha
571547
bytesWritten = (int)(dst - dstInit);
572548
return OperationStatus.DestinationTooSmall;
573549
}
574-
Marshal.Copy(buffer, 0, (IntPtr)dst, 3); // optimization opportunity: copy 4 bytes
550+
Buffer.MemoryCopy(bufferPtr, dst, 3, 3);
575551
dst += 3;
576552
src += 4;
577553
}
@@ -644,7 +620,7 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<cha
644620
if (MatchSystem(Endianness.BIG))
645621
{
646622
triple <<= 8;
647-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 2);
623+
Buffer.MemoryCopy(&triple, dst, 2, 2);
648624
}
649625
else
650626
{
@@ -679,7 +655,7 @@ public unsafe static OperationStatus SafeDecodeFromBase64Scalar(ReadOnlySpan<cha
679655
if (MatchSystem(Endianness.BIG))
680656
{
681657
triple <<= 8;
682-
Marshal.Copy(BitConverter.GetBytes(triple), 0, (IntPtr)dst, 3);
658+
Buffer.MemoryCopy(&triple, dst, 3, 3);
683659
}
684660
else
685661
{

0 commit comments

Comments
 (0)