Skip to content

Commit ef900f6

Browse files
CopilotHMBSbige
andauthored
feat: Add XtsMode128<TDataCipher, TTweakCipher> with zero-abstraction overhead (#110)
Co-authored-by: HMBSbige <[email protected]> Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: Bruce Wayne <[email protected]>
1 parent 9b2b1e7 commit ef900f6

File tree

8 files changed

+239
-372
lines changed

8 files changed

+239
-372
lines changed

.editorconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ resharper_for_simple_types = use_explicit_type
2727
resharper_fsharp_insert_final_newline = false
2828
resharper_html_insert_final_newline = false
2929
resharper_instance_members_qualify_declared_in =
30+
resharper_keep_existing_attribute_arrangement = true
3031
resharper_keep_existing_initializer_arrangement = false
3132
resharper_max_initializer_elements_on_line = 1
3233
resharper_place_accessorholder_attribute_on_same_line = false

src/CryptoBase.Abstractions/Vectors/VectorBuffer128.cs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ public ref struct VectorBuffer128
1717
[FieldOffset(2 * 32)] public Vector256<byte> V256_2;
1818
[FieldOffset(3 * 32)] public Vector256<byte> V256_3;
1919

20-
[FieldOffset(0 * 64)] public Vector512<byte> V512_0;
21-
[FieldOffset(1 * 64)] public Vector512<byte> V512_1;
22-
2320
[FieldOffset(0 * 64)] public VectorBuffer64 Lower;
2421
[FieldOffset(1 * 64)] public VectorBuffer64 Upper;
2522

@@ -32,15 +29,6 @@ public static implicit operator Span<byte>(in VectorBuffer128 value)
3229
[MethodImpl(MethodImplOptions.AggressiveInlining)]
3330
public static VectorBuffer128 operator ^(scoped in VectorBuffer128 left, scoped in VectorBuffer128 right)
3431
{
35-
if (Vector512.IsHardwareAccelerated)
36-
{
37-
return new VectorBuffer128
38-
{
39-
V512_0 = left.V512_0 ^ right.V512_0,
40-
V512_1 = left.V512_1 ^ right.V512_1
41-
};
42-
}
43-
4432
if (Vector256.IsHardwareAccelerated)
4533
{
4634
return new VectorBuffer128

src/CryptoBase.Abstractions/Vectors/VectorBuffer64.cs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ public ref struct VectorBuffer64
1111
[FieldOffset(0 * 32)] public Vector256<byte> V256_0;
1212
[FieldOffset(1 * 32)] public Vector256<byte> V256_1;
1313

14-
[FieldOffset(0 * 64)] public Vector512<byte> V512;
15-
1614
[FieldOffset(0 * 32)] public VectorBuffer32 Lower;
1715
[FieldOffset(1 * 32)] public VectorBuffer32 Upper;
1816

@@ -25,11 +23,6 @@ public static implicit operator Span<byte>(in VectorBuffer64 value)
2523
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2624
public static VectorBuffer64 operator ^(scoped in VectorBuffer64 left, scoped in VectorBuffer64 right)
2725
{
28-
if (Vector512.IsHardwareAccelerated)
29-
{
30-
return new VectorBuffer64 { V512 = left.V512 ^ right.V512 };
31-
}
32-
3326
if (Vector256.IsHardwareAccelerated)
3427
{
3528
return new VectorBuffer64

src/CryptoBase/SymmetricCryptos/BlockCryptoModes/XtsMode.Avx2.cs

Lines changed: 46 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
namespace CryptoBase.SymmetricCryptos.BlockCryptoModes;
22

3-
public sealed partial class XtsMode
3+
public sealed partial class XtsMode<TBlockCipher>
44
{
55
[MethodImpl(MethodImplOptions.AggressiveInlining)]
6-
private static Vector256<byte> Gf128Mul(in Vector256<byte> tweak, [ConstantExpected(Min = 1, Max = 64)] int x)
6+
private static Vector256<byte> Gf128MulAvx2(Vector256<byte> tweak, [ConstantExpected(Min = 1, Max = 64)] int x)
77
{
88
Vector256<ulong> tmp1 = tweak.AsUInt64() >>> 64 - x;
99

@@ -14,99 +14,88 @@ private static Vector256<byte> Gf128Mul(in Vector256<byte> tweak, [ConstantExpec
1414
return (tweak.AsUInt64() << x ^ tmp1 ^ tmp2).AsByte();
1515
}
1616

17+
[SkipLocalsInit]
1718
[MethodImpl(MethodImplOptions.AggressiveInlining)]
18-
private static void GetInitTweak8Avx2(in ReadOnlySpan<byte> tweakBuffer, in Span<byte> buffer)
19+
private static VectorBuffer128 GetInitTweak8Avx2(Vector128<byte> tweak)
1920
{
20-
ref byte ptr = ref buffer.GetReference();
21-
ref readonly Vector128<byte> x0 = ref Unsafe.As<byte, Vector128<byte>>(ref tweakBuffer.GetReference());
22-
ref Vector256<byte> tweak0 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 0 * 32));
23-
ref Vector256<byte> tweak1 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 1 * 32));
24-
ref Vector256<byte> tweak2 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 2 * 32));
25-
ref Vector256<byte> tweak3 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 3 * 32));
26-
27-
Vector128<byte> x1 = Gf128Mul(x0);
28-
tweak0 = Vector256.Create(x0, x1);
29-
30-
tweak1 = Gf128Mul(tweak0, 2);
31-
tweak2 = Gf128Mul(tweak0, 4);
32-
tweak3 = Gf128Mul(tweak0, 6);
21+
Unsafe.SkipInit(out VectorBuffer128 r);
22+
r.V128_0 = tweak;
23+
r.V128_1 = Gf128MulSse2(tweak, 1);
24+
25+
r.V256_1 = Gf128MulAvx2(r.V256_0, 2);
26+
r.V256_2 = Gf128MulAvx2(r.V256_0, 4);
27+
r.V256_3 = Gf128MulAvx2(r.V256_0, 6);
28+
29+
return r;
3330
}
3431

3532
[MethodImpl(MethodImplOptions.AggressiveInlining)]
36-
private static void Gf128Mul8Avx2(in Span<byte> buffer)
33+
private static void Gf128Mul8Avx2(ref VectorBuffer128 tweak)
3734
{
38-
ref byte ptr = ref buffer.GetReference();
39-
ref Vector256<byte> tweak0 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 0 * 32));
40-
ref Vector256<byte> tweak1 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 1 * 32));
41-
ref Vector256<byte> tweak2 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 2 * 32));
42-
ref Vector256<byte> tweak3 = ref Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref ptr, 3 * 32));
43-
44-
tweak0 = Gf128Mul(tweak0, 8);
45-
tweak1 = Gf128Mul(tweak1, 8);
46-
tweak2 = Gf128Mul(tweak2, 8);
47-
tweak3 = Gf128Mul(tweak3, 8);
35+
tweak.V256_0 = Gf128MulAvx2(tweak.V256_0, 8);
36+
tweak.V256_1 = Gf128MulAvx2(tweak.V256_1, 8);
37+
tweak.V256_2 = Gf128MulAvx2(tweak.V256_2, 8);
38+
tweak.V256_3 = Gf128MulAvx2(tweak.V256_3, 8);
4839
}
4940

5041
[MethodImpl(MethodImplOptions.AggressiveInlining)]
51-
private int Encrypt8Avx2(in Span<byte> tweak, in ReadOnlySpan<byte> source, in Span<byte> destination)
42+
private int Encrypt8Avx2(ref Vector128<byte> tweak, ReadOnlySpan<byte> source, Span<byte> destination)
5243
{
5344
int length = source.Length;
5445
int offset = 0;
5546

56-
const int blockSize = 8 * 16;
57-
using CryptoBuffer<byte> buffer = new(blockSize);
58-
Span<byte> tweakBuffer = buffer.Span;
47+
ref readonly byte sourceRef = ref source.GetReference();
48+
ref byte destinationRef = ref destination.GetReference();
5949

60-
GetInitTweak8Avx2(tweak, tweakBuffer);
50+
VectorBuffer128 tweakBuffer = GetInitTweak8Avx2(tweak);
6151

62-
while (length >= 8 * BlockSize)
52+
while (length >= 8 * BlockBytesSize)
6353
{
64-
ReadOnlySpan<byte> src = source.Slice(offset, blockSize);
65-
Span<byte> dst = destination.Slice(offset, blockSize);
54+
VectorBuffer128 src = Unsafe.Add(ref Unsafe.AsRef(in sourceRef), offset).AsVectorBuffer128();
55+
ref VectorBuffer128 dst = ref Unsafe.Add(ref destinationRef, offset).AsVectorBuffer128();
6656

67-
FastUtils.Xor(src, tweakBuffer, dst, blockSize);
68-
_dataCrypto.Encrypt8(dst, dst);
69-
FastUtils.Xor(dst, tweakBuffer, blockSize);
57+
VectorBuffer128 tmp = src ^ tweakBuffer;
58+
tmp = _dataCipher.Encrypt(tmp);
59+
dst = tmp ^ tweakBuffer;
7060

71-
Gf128Mul8Avx2(tweakBuffer);
61+
Gf128Mul8Avx2(ref tweakBuffer);
7262

73-
offset += blockSize;
74-
length -= blockSize;
63+
offset += 8 * BlockBytesSize;
64+
length -= 8 * BlockBytesSize;
7565
}
7666

77-
tweakBuffer.Slice(0, BlockSize).CopyTo(tweak);
67+
tweak = tweakBuffer.V128_0;
7868

7969
return offset;
8070
}
8171

8272
[MethodImpl(MethodImplOptions.AggressiveInlining)]
83-
private int Decrypt8Avx2(in Span<byte> tweak, in ReadOnlySpan<byte> source, in Span<byte> destination)
73+
private int Decrypt8Avx2(ref Vector128<byte> tweak, ReadOnlySpan<byte> source, Span<byte> destination)
8474
{
8575
int length = source.Length;
8676
int offset = 0;
8777

88-
const int blockSize = 8 * 16;
89-
using CryptoBuffer<byte> buffer = new(blockSize);
90-
Span<byte> tweakBuffer = buffer.Span;
78+
ref readonly byte sourceRef = ref source.GetReference();
79+
ref byte destinationRef = ref destination.GetReference();
9180

92-
GetInitTweak8Avx2(tweak, tweakBuffer);
81+
VectorBuffer128 tweakBuffer = GetInitTweak8Avx2(tweak);
9382

94-
while (length >= 8 * BlockSize)
83+
while (length >= 8 * BlockBytesSize)
9584
{
96-
ReadOnlySpan<byte> src = source.Slice(offset, blockSize);
97-
Span<byte> dst = destination.Slice(offset, blockSize);
85+
VectorBuffer128 src = Unsafe.Add(ref Unsafe.AsRef(in sourceRef), offset).AsVectorBuffer128();
86+
ref VectorBuffer128 dst = ref Unsafe.Add(ref destinationRef, offset).AsVectorBuffer128();
9887

99-
FastUtils.Xor(src, tweakBuffer, dst, blockSize);
100-
_dataCrypto.Decrypt8(dst, dst);
101-
FastUtils.Xor(dst, tweakBuffer, blockSize);
88+
VectorBuffer128 tmp = src ^ tweakBuffer;
89+
tmp = _dataCipher.Decrypt(tmp);
90+
dst = tmp ^ tweakBuffer;
10291

103-
Gf128Mul8Avx2(tweakBuffer);
92+
Gf128Mul8Avx2(ref tweakBuffer);
10493

105-
offset += blockSize;
106-
length -= blockSize;
94+
offset += 8 * BlockBytesSize;
95+
length -= 8 * BlockBytesSize;
10796
}
10897

109-
tweakBuffer.Slice(0, BlockSize).CopyTo(tweak);
98+
tweak = tweakBuffer.V128_0;
11099

111100
return offset;
112101
}

0 commit comments

Comments
 (0)