Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b245b3b
Initial plan
Copilot Oct 24, 2025
20ddbfb
Convert IntrinsicsUtils.Xor and CRC32 files to use Span instead of po…
Copilot Oct 24, 2025
69cdf56
Address code review feedback - remove unnecessary blank lines
Copilot Oct 24, 2025
4685f7b
Remove all fixed pointers from CTR mode implementations
Copilot Oct 24, 2025
30d0bd7
Complete Phase 2: Convert ChaCha20 classes and most pointer methods t…
Copilot Oct 24, 2025
07d306a
Complete Phase 2: Convert all remaining pointer methods to Span/Unsaf…
Copilot Oct 24, 2025
b0dc714
Fix macOS CI error: SF classes now use non-SIMD methods to avoid Plat…
Copilot Oct 24, 2025
a90e8d4
Fix infinite loop in ChaCha20/Salsa20 256-byte processing methods - r…
Copilot Oct 24, 2025
6652c4c
Fix infinite loop in ChaChaCore512 - add missing offset increments
Copilot Oct 24, 2025
0fa6dc0
Optimize UpdateKeyStream methods to use zero-copy Unsafe.As operations
Copilot Oct 24, 2025
1f0df01
Revert previous change and replace Unsafe.ReadUnaligned with zero-cop…
Copilot Oct 24, 2025
a6b0e5d
Simplify UpdateBlocks signature - remove ref parameters, return bytes…
Copilot Oct 24, 2025
ec27e39
Fix code formatting - convert expression-bodied methods to block bodi…
Copilot Oct 24, 2025
7270be2
refactor: crc32
HMBSbige Oct 25, 2025
76d8ff2
refactor: ctr
HMBSbige Oct 25, 2025
ed5fb05
refactor: IncrementCounter
HMBSbige Oct 25, 2025
d2e8299
refactor: chacha20
HMBSbige Oct 26, 2025
1c2fc84
fix: chacha20
HMBSbige Oct 26, 2025
cff05c9
fix: rc4
HMBSbige Oct 26, 2025
e1d836e
refactor: FastUtils
HMBSbige Oct 26, 2025
597763e
refactor: chacha20 and salsa20
HMBSbige Oct 26, 2025
af9e32c
fix: test
HMBSbige Oct 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ jobs:
working-directory: test/${{ env.ProjectName }}.Tests
run: dotnet test -c Release

- name: Run tests (Disable AVX)
if: matrix.os != 'macos-latest'
working-directory: test/${{ env.ProjectName }}.Tests
run: dotnet test -c Release
env:
DOTNET_EnableAVX: 0

build:
needs: [check_format, test]
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') }}
Expand Down
1 change: 0 additions & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<IsPackable>false</IsPackable>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<Authors>HMBSbige</Authors>
<Copyright>Copyright © HMBSbige</Copyright>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
Expand Down
1 change: 1 addition & 0 deletions src/CryptoBase/CryptoBase.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<IsPackable>true</IsPackable>
<Description>A fast crypto library for .NET.</Description>
</PropertyGroup>
Expand Down
54 changes: 28 additions & 26 deletions src/CryptoBase/Digests/CRC32/Crc32Table.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ private Crc32Table(uint poly)

for (uint i = 0; i < 256; ++i)
{
var res = i;
for (var j = 0; j < 16; ++j)
uint res = i;

for (int j = 0; j < 16; ++j)
{
for (var k = 0; k < 8; ++k)
for (int k = 0; k < 8; ++k)
{
res = (res & 1) == 1 ? poly ^ res >> 1 : res >> 1;
}
Expand All @@ -28,32 +29,33 @@ private Crc32Table(uint poly)

public uint Append(uint crc, ReadOnlySpan<byte> source)
{
var offset = 0;
var length = source.Length;
var crcLocal = uint.MaxValue ^ crc;
int offset = 0;
int length = source.Length;
uint crcLocal = uint.MaxValue ^ crc;

uint[] table = _table;

var table = _table;
while (length >= 16)
{
var a = table[3 * 256 + source[offset + 12]]
^ table[2 * 256 + source[offset + 13]]
^ table[1 * 256 + source[offset + 14]]
^ table[0 * 256 + source[offset + 15]];

var b = table[7 * 256 + source[offset + 8]]
^ table[6 * 256 + source[offset + 9]]
^ table[5 * 256 + source[offset + 10]]
^ table[4 * 256 + source[offset + 11]];

var c = table[11 * 256 + source[offset + 4]]
^ table[10 * 256 + source[offset + 5]]
^ table[9 * 256 + source[offset + 6]]
^ table[8 * 256 + source[offset + 7]];

var d = table[15 * 256 + ((byte)crcLocal ^ source[offset])]
^ table[14 * 256 + ((byte)(crcLocal >> 8) ^ source[offset + 1])]
^ table[13 * 256 + ((byte)(crcLocal >> 16) ^ source[offset + 2])]
^ table[12 * 256 + (crcLocal >> 24 ^ source[offset + 3])];
uint a = table[3 * 256 + source[offset + 12]]
^ table[2 * 256 + source[offset + 13]]
^ table[1 * 256 + source[offset + 14]]
^ table[0 * 256 + source[offset + 15]];

uint b = table[7 * 256 + source[offset + 8]]
^ table[6 * 256 + source[offset + 9]]
^ table[5 * 256 + source[offset + 10]]
^ table[4 * 256 + source[offset + 11]];

uint c = table[11 * 256 + source[offset + 4]]
^ table[10 * 256 + source[offset + 5]]
^ table[9 * 256 + source[offset + 6]]
^ table[8 * 256 + source[offset + 7]];

uint d = table[15 * 256 + ((byte)crcLocal ^ source[offset])]
^ table[14 * 256 + ((byte)(crcLocal >> 8) ^ source[offset + 1])]
^ table[13 * 256 + ((byte)(crcLocal >> 16) ^ source[offset + 2])]
^ table[12 * 256 + (crcLocal >> 24 ^ source[offset + 3])];

crcLocal = d ^ c ^ b ^ a;
offset += 16;
Expand Down
56 changes: 29 additions & 27 deletions src/CryptoBase/Digests/CRC32/Crc32X86.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,12 @@ public void UpdateFinal(ReadOnlySpan<byte> origin, Span<byte> destination)
GetHash(destination);
}

public unsafe void Update(ReadOnlySpan<byte> source)
public void Update(ReadOnlySpan<byte> source)
{
if (source.Length >= 64)
{
fixed (byte* p = source)
{
_state = Update(p, source.Length, _state);
source = source[^(source.Length % 0x10)..];
}
_state = Update(source, _state);
source = source.Slice(source.Length - source.Length % 0x10);
}

_state = ~Crc32Table.Crc32.Append(~_state, source);
Expand All @@ -62,24 +59,26 @@ public void Reset()
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint Update(byte* buffer, int length, uint crc)
private static uint Update(ReadOnlySpan<byte> buffer, uint crc)
{
var x1 = Sse2.LoadVector128(buffer).AsUInt64();
var x2 = Sse2.LoadVector128(buffer + 0x10).AsUInt64();
var x3 = Sse2.LoadVector128(buffer + 0x20).AsUInt64();
var x4 = Sse2.LoadVector128(buffer + 0x30).AsUInt64();
var vCrc = Vector128.CreateScalar(crc).AsUInt64();
int length = buffer.Length;

ref Vector128<ulong> x1 = ref Unsafe.As<byte, Vector128<ulong>>(ref MemoryMarshal.GetReference(buffer));
ref Vector128<ulong> x2 = ref Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(0x10));
ref Vector128<ulong> x3 = ref Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(0x20));
ref Vector128<ulong> x4 = ref Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(0x30));
Vector128<ulong> vCrc = Vector128.CreateScalar(crc).AsUInt64();
x1 = Sse2.Xor(x1, vCrc);

length -= 0x40;
buffer += 0x40;
int offset = 0x40;

while (length >= 0x40)
{
var t1 = Pclmulqdq.CarrylessMultiply(x1, K1K2, 0x11);
var t2 = Pclmulqdq.CarrylessMultiply(x2, K1K2, 0x11);
var t3 = Pclmulqdq.CarrylessMultiply(x3, K1K2, 0x11);
var t4 = Pclmulqdq.CarrylessMultiply(x4, K1K2, 0x11);
Vector128<ulong> t1 = Pclmulqdq.CarrylessMultiply(x1, K1K2, 0x11);
Vector128<ulong> t2 = Pclmulqdq.CarrylessMultiply(x2, K1K2, 0x11);
Vector128<ulong> t3 = Pclmulqdq.CarrylessMultiply(x3, K1K2, 0x11);
Vector128<ulong> t4 = Pclmulqdq.CarrylessMultiply(x4, K1K2, 0x11);

x1 = Pclmulqdq.CarrylessMultiply(x1, K1K2, 0x00);
x2 = Pclmulqdq.CarrylessMultiply(x2, K1K2, 0x00);
Expand All @@ -91,16 +90,19 @@ private static unsafe uint Update(byte* buffer, int length, uint crc)
x3 = Sse2.Xor(x3, t3);
x4 = Sse2.Xor(x4, t4);

x1 = Sse2.Xor(x1, Sse2.LoadVector128(buffer).AsUInt64());
x2 = Sse2.Xor(x2, Sse2.LoadVector128(buffer + 0x10).AsUInt64());
x3 = Sse2.Xor(x3, Sse2.LoadVector128(buffer + 0x20).AsUInt64());
x4 = Sse2.Xor(x4, Sse2.LoadVector128(buffer + 0x30).AsUInt64());
x1 = Sse2.Xor(x1, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;
x2 = Sse2.Xor(x2, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;
x3 = Sse2.Xor(x3, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;
x4 = Sse2.Xor(x4, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;

length -= 0x40;
buffer += 0x40;
}

var t = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x11);
Vector128<ulong> t = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x11);
x1 = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x00);
x1 = Sse2.Xor(x1, t);
x1 = Sse2.Xor(x1, x2);
Expand All @@ -120,13 +122,13 @@ private static unsafe uint Update(byte* buffer, int length, uint crc)
t = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x11);
x1 = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x00);
x1 = Sse2.Xor(x1, t);
x1 = Sse2.Xor(x1, Sse2.LoadVector128(buffer).AsUInt64());
x1 = Sse2.Xor(x1, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));

length -= 0x10;
buffer += 0x10;
offset += 0x10;
}

var r4 = Pclmulqdq.CarrylessMultiply(K3K4, x1, 0x01);
Vector128<ulong> r4 = Pclmulqdq.CarrylessMultiply(K3K4, x1, 0x01);
x1 = Sse2.ShiftRightLogical128BitLane(x1, 0x08);
x1 = Sse2.Xor(x1, r4);

Expand All @@ -141,7 +143,7 @@ private static unsafe uint Update(byte* buffer, int length, uint crc)
x1 = Sse2.And(x1, Mask32);
x1 = Pclmulqdq.CarrylessMultiply(x1, RU, 0x00);
x1 = Sse2.Xor(x1, t);
return x1.AsUInt32().GetElement(1); // pextrd eax, x1, 1
return x1.AsUInt32().GetElement(1);// pextrd eax, x1, 1
}

public void Dispose()
Expand Down
78 changes: 39 additions & 39 deletions src/CryptoBase/Digests/CRC32C/Crc32CX86.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
namespace CryptoBase.Digests.CRC32C;

/// <summary>
/// Same as <see cref="Crc32X86"/> , but different constants.
/// Abstraction will cause performance issue!
/// WTF.NET
/// Same as <see cref="Crc32X86" /> , but different constants.
/// </summary>
public class Crc32CX86 : IHash
{
Expand Down Expand Up @@ -38,15 +36,12 @@ public void UpdateFinal(ReadOnlySpan<byte> origin, Span<byte> destination)
GetHash(destination);
}

public unsafe void Update(ReadOnlySpan<byte> source)
public void Update(ReadOnlySpan<byte> source)
{
if (Sse2.IsSupported && Pclmulqdq.IsSupported && source.Length >= 64)
{
fixed (byte* p = source)
{
_state = Update(p, source.Length, _state);
source = source[^(source.Length % 0x10)..];
}
_state = Update(source, _state);
source = source.Slice(source.Length - source.Length % 0x10);
}

if (Sse42.IsSupported)
Expand All @@ -71,24 +66,26 @@ public void Reset()
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint Update(byte* buffer, int length, uint crc)
private static uint Update(ReadOnlySpan<byte> buffer, uint crc)
{
var x1 = Sse2.LoadVector128(buffer).AsUInt64();
var x2 = Sse2.LoadVector128(buffer + 0x10).AsUInt64();
var x3 = Sse2.LoadVector128(buffer + 0x20).AsUInt64();
var x4 = Sse2.LoadVector128(buffer + 0x30).AsUInt64();
var vCrc = Vector128.CreateScalar(crc).AsUInt64();
int length = buffer.Length;

ref Vector128<ulong> x1 = ref Unsafe.As<byte, Vector128<ulong>>(ref MemoryMarshal.GetReference(buffer));
ref Vector128<ulong> x2 = ref Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(0x10));
ref Vector128<ulong> x3 = ref Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(0x20));
ref Vector128<ulong> x4 = ref Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(0x30));
Vector128<ulong> vCrc = Vector128.CreateScalar(crc).AsUInt64();
x1 = Sse2.Xor(x1, vCrc);

length -= 0x40;
buffer += 0x40;
int offset = 0x40;

while (length >= 0x40)
{
var t1 = Pclmulqdq.CarrylessMultiply(x1, K1K2, 0x11);
var t2 = Pclmulqdq.CarrylessMultiply(x2, K1K2, 0x11);
var t3 = Pclmulqdq.CarrylessMultiply(x3, K1K2, 0x11);
var t4 = Pclmulqdq.CarrylessMultiply(x4, K1K2, 0x11);
Vector128<ulong> t1 = Pclmulqdq.CarrylessMultiply(x1, K1K2, 0x11);
Vector128<ulong> t2 = Pclmulqdq.CarrylessMultiply(x2, K1K2, 0x11);
Vector128<ulong> t3 = Pclmulqdq.CarrylessMultiply(x3, K1K2, 0x11);
Vector128<ulong> t4 = Pclmulqdq.CarrylessMultiply(x4, K1K2, 0x11);

x1 = Pclmulqdq.CarrylessMultiply(x1, K1K2, 0x00);
x2 = Pclmulqdq.CarrylessMultiply(x2, K1K2, 0x00);
Expand All @@ -100,16 +97,19 @@ private static unsafe uint Update(byte* buffer, int length, uint crc)
x3 = Sse2.Xor(x3, t3);
x4 = Sse2.Xor(x4, t4);

x1 = Sse2.Xor(x1, Sse2.LoadVector128(buffer).AsUInt64());
x2 = Sse2.Xor(x2, Sse2.LoadVector128(buffer + 0x10).AsUInt64());
x3 = Sse2.Xor(x3, Sse2.LoadVector128(buffer + 0x20).AsUInt64());
x4 = Sse2.Xor(x4, Sse2.LoadVector128(buffer + 0x30).AsUInt64());
x1 = Sse2.Xor(x1, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;
x2 = Sse2.Xor(x2, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;
x3 = Sse2.Xor(x3, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;
x4 = Sse2.Xor(x4, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));
offset += 0x10;

length -= 0x40;
buffer += 0x40;
}

var t = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x11);
Vector128<ulong> t = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x11);
x1 = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x00);
x1 = Sse2.Xor(x1, t);
x1 = Sse2.Xor(x1, x2);
Expand All @@ -129,13 +129,13 @@ private static unsafe uint Update(byte* buffer, int length, uint crc)
t = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x11);
x1 = Pclmulqdq.CarrylessMultiply(x1, K3K4, 0x00);
x1 = Sse2.Xor(x1, t);
x1 = Sse2.Xor(x1, Sse2.LoadVector128(buffer).AsUInt64());
x1 = Sse2.Xor(x1, Unsafe.As<byte, Vector128<ulong>>(ref buffer.GetRef(offset)));

length -= 0x10;
buffer += 0x10;
offset += 0x10;
}

var r4 = Pclmulqdq.CarrylessMultiply(K3K4, x1, 0x01);
Vector128<ulong> r4 = Pclmulqdq.CarrylessMultiply(K3K4, x1, 0x01);
x1 = Sse2.ShiftRightLogical128BitLane(x1, 0x08);
x1 = Sse2.Xor(x1, r4);

Expand All @@ -150,7 +150,7 @@ private static unsafe uint Update(byte* buffer, int length, uint crc)
x1 = Sse2.And(x1, Mask32);
x1 = Pclmulqdq.CarrylessMultiply(x1, RU, 0x00);
x1 = Sse2.Xor(x1, t);
return x1.AsUInt32().GetElement(1); // pextrd eax, x1, 1
return x1.AsUInt32().GetElement(1);// pextrd eax, x1, 1
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand All @@ -160,36 +160,36 @@ private void UpdateSse42(ReadOnlySpan<byte> source)
{
while (source.Length >= 8)
{
var data = BinaryPrimitives.ReadUInt64LittleEndian(source);
ref ulong data = ref Unsafe.As<byte, ulong>(ref MemoryMarshal.GetReference(source));
_state = (uint)Sse42.X64.Crc32(_state, data);
source = source[8..];
source = source.Slice(8);
}

if (source.Length >= 4)
{
var data = BinaryPrimitives.ReadUInt32LittleEndian(source);
ref uint data = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
_state = Sse42.Crc32(_state, data);
source = source[4..];
source = source.Slice(4);
}
}
else
{
while (source.Length >= 4)
{
var data = BinaryPrimitives.ReadUInt32LittleEndian(source);
ref uint data = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
_state = Sse42.Crc32(_state, data);
source = source[4..];
source = source.Slice(4);
}
}

if (source.Length >= 2)
{
var data = BinaryPrimitives.ReadUInt16LittleEndian(source);
ref ushort data = ref Unsafe.As<byte, ushort>(ref MemoryMarshal.GetReference(source));
_state = Sse42.Crc32(_state, data);
source = source[2..];
source = source.Slice(2);
}

foreach (var b in source)
foreach (ref readonly byte b in source)
{
_state = Sse42.Crc32(_state, b);
}
Expand Down
3 changes: 2 additions & 1 deletion src/CryptoBase/Extensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace CryptoBase;

public static class Extensions
{
/// <inheritdoc cref="BitOperations.RotateLeft(uint,int)" />
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint RotateLeft(this uint value, int offset)
{
Expand All @@ -23,7 +24,7 @@ public static int GetDeterministicHashCode<T>(this ReadOnlySpan<T> span) where T
{
int hash = 5381;

foreach (T t in span)
foreach (ref readonly T t in span)
{
hash = (hash << 5) + hash ^ t.GetHashCode();
}
Expand Down
Loading