Skip to content

Commit 91b72f2

Browse files
committed
Improve perfomance for XXH32 and XXH64
1 parent 9ab0dd4 commit 91b72f2

File tree

5 files changed

+56
-42
lines changed

5 files changed

+56
-42
lines changed

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ Runtime=.NET 6.0
3636

3737
| Method | x64 |
3838
|:---------------|-----------:|
39-
| Hash32 Array | 5.87 GB/s |
40-
| Hash64 Array | 9.07 GB/s |
39+
| Hash32 Array | 6.65 GB/s |
40+
| Hash64 Array | 12.28 GB/s |
4141
| Hash128 Array | 12.04 GB/s |
4242
| Hash3 Array | 12.08 GB/s |
43-
| Hash32 Span | 5.87 GB/s |
44-
| Hash64 Span | 9.07 GB/s |
43+
| Hash32 Span | 6.65 GB/s |
44+
| Hash64 Span | 12.28 GB/s |
4545
| Hash128 Span | 12.04 GB/s |
4646
| Hash3 Span | 12.08 GB/s |
4747
| Hash32 Stream | 3.22 GB/s |
@@ -51,10 +51,10 @@ Runtime=.NET 6.0
5151

5252
| Method | Platform | Language | 1KB Time | 1MB Time | 1GB Time | Speed |
5353
|:-------------------|---------:|---------:|----------:|----------:|----------:|-----------:|
54-
| Hash32 | x64 | C# | 151.5 ns | 143.4 us | 170.3 ms | 5.87 GB/s |
55-
| Hash32 | x64 | C | 138.5 ns | 129.5 us | 152.4 ms | 6.56 GB/s |
56-
| Hash64 | x64 | C# | 84.6 ns | 77.9 us | 110.2 ms | 9.07 GB/s |
57-
| Hash64 | x64 | C | 74.2 ns | 64.8 us | 83.0 ms | 12.04 GB/s |
54+
| Hash32 | x64 | C# | 138.0 ns | 130.2 us | 150.3 ms | 6.65 GB/s |
55+
| Hash32 | x64 | C | 140.2 ns | 129.6 us | 150.3 ms | 6.65 GB/s |
56+
| Hash64 | x64 | C# | 73.9 ns | 64.6 us | 81.4 ms | 12.28 GB/s |
57+
| Hash64 | x64 | C | 75.5 ns | 65.2 us | 84.5 ms | 11.83 GB/s |
5858
| Hash128 (SSE2/AVX2)| x64 | C# | 151.6 ns | 64.5 us | 80.5 ms | 12.04 GB/s |
5959
| Hash128 (SSE2/AVX2)| x64 | C | 84.4 ns | 38.3 us | 57.4 ms | 17.42 GB/s |
6060
| Hash3 (SSE2/AVX2)| x64 | C# | 77.6 ns | 62.1 us | 78.5 ms | 12.08 GB/s |

src/Standart.Hash.xxHash/__inline__xxHash32.cs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,28 @@ private static unsafe uint __inline__XXH32(byte* input, int len, uint seed)
2727

2828
do
2929
{
30+
var reg1 = *((uint*)(input + 0));
31+
var reg2 = *((uint*)(input + 4));
32+
var reg3 = *((uint*)(input + 8));
33+
var reg4 = *((uint*)(input + 12));
34+
3035
// XXH32_round
31-
v1 += *((uint*) (input+0)) * XXH_PRIME32_2;
36+
v1 += reg1 * XXH_PRIME32_2;
3237
v1 = (v1 << 13) | (v1 >> (32 - 13));
3338
v1 *= XXH_PRIME32_1;
3439

3540
// XXH32_round
36-
v2 += *((uint*) (input+4)) * XXH_PRIME32_2;
41+
v2 += reg2 * XXH_PRIME32_2;
3742
v2 = (v2 << 13) | (v2 >> (32 - 13));
3843
v2 *= XXH_PRIME32_1;
3944

4045
// XXH32_round
41-
v3 += *((uint*) (input+8)) * XXH_PRIME32_2;
46+
v3 += reg3 * XXH_PRIME32_2;
4247
v3 = (v3 << 13) | (v3 >> (32 - 13));
4348
v3 *= XXH_PRIME32_1;
4449

4550
// XXH32_round
46-
v4 += *((uint*) (input+12)) * XXH_PRIME32_2;
51+
v4 += reg4 * XXH_PRIME32_2;
4752
v4 = (v4 << 13) | (v4 >> (32 - 13));
4853
v4 *= XXH_PRIME32_1;
4954

@@ -100,23 +105,28 @@ private static unsafe void __inline__XXH32_stream_process(byte[] input, int len,
100105

101106
do
102107
{
108+
var reg1 = *((uint*)(ptr + 0));
109+
var reg2 = *((uint*)(ptr + 4));
110+
var reg3 = *((uint*)(ptr + 8));
111+
var reg4 = *((uint*)(ptr + 12));
112+
103113
// XXH32_round
104-
v1 += *((uint*)(ptr + 0)) * XXH_PRIME32_2;
114+
v1 += reg1 * XXH_PRIME32_2;
105115
v1 = (v1 << 13) | (v1 >> (32 - 13));
106116
v1 *= XXH_PRIME32_1;
107117

108118
// XXH32_round
109-
v2 += *((uint*)(ptr + 4)) * XXH_PRIME32_2;
119+
v2 += reg2 * XXH_PRIME32_2;
110120
v2 = (v2 << 13) | (v2 >> (32 - 13));
111121
v2 *= XXH_PRIME32_1;
112122

113123
// XXH32_round
114-
v3 += *((uint*)(ptr + 8)) * XXH_PRIME32_2;
124+
v3 += reg3 * XXH_PRIME32_2;
115125
v3 = (v3 << 13) | (v3 >> (32 - 13));
116126
v3 *= XXH_PRIME32_1;
117127

118128
// XXH32_round
119-
v4 += *((uint*)(ptr + 12)) * XXH_PRIME32_2;
129+
v4 += reg4 * XXH_PRIME32_2;
120130
v4 = (v4 << 13) | (v4 >> (32 - 13));
121131
v4 *= XXH_PRIME32_1;
122132

src/Standart.Hash.xxHash/__inline__xxHash64.cs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,31 @@ private static unsafe ulong __inline__XXH64(byte* input, int len, ulong seed)
2727

2828
do
2929
{
30+
var reg1 = *((ulong*)(input + 0));
31+
var reg2 = *((ulong*)(input + 8));
32+
var reg3 = *((ulong*)(input + 16));
33+
var reg4 = *((ulong*)(input + 24));
34+
3035
// XXH64_round
31-
v1 += *((ulong*) input) * XXH_PRIME64_2;
36+
v1 += reg1 * XXH_PRIME64_2;
3237
v1 = (v1 << 31) | (v1 >> (64 - 31));
3338
v1 *= XXH_PRIME64_1;
34-
input += 8;
3539

3640
// XXH64_round
37-
v2 += *((ulong*) input) * XXH_PRIME64_2;
41+
v2 += reg2 * XXH_PRIME64_2;
3842
v2 = (v2 << 31) | (v2 >> (64 - 31));
3943
v2 *= XXH_PRIME64_1;
40-
input += 8;
4144

4245
// XXH64_round
43-
v3 += *((ulong*) input) * XXH_PRIME64_2;
46+
v3 += reg3 * XXH_PRIME64_2;
4447
v3 = (v3 << 31) | (v3 >> (64 - 31));
4548
v3 *= XXH_PRIME64_1;
46-
input += 8;
4749

4850
// XXH64_round
49-
v4 += *((ulong*) input) * XXH_PRIME64_2;
51+
v4 += reg4 * XXH_PRIME64_2;
5052
v4 = (v4 << 31) | (v4 >> (64 - 31));
5153
v4 *= XXH_PRIME64_1;
52-
input += 8;
54+
input += 32;
5355
} while (input < limit);
5456

5557
h64 = ((v1 << 1) | (v1 >> (64 - 1))) +
@@ -134,29 +136,31 @@ private static unsafe void __inline__XXH64_stream_process(byte[] input, int len,
134136

135137
do
136138
{
139+
var reg1 = *((ulong*)(ptr + 0));
140+
var reg2 = *((ulong*)(ptr + 8));
141+
var reg3 = *((ulong*)(ptr + 16));
142+
var reg4 = *((ulong*)(ptr + 24));
143+
137144
// XXH64_round
138-
v1 += *((ulong*) ptr) * XXH_PRIME64_2;
145+
v1 += reg1 * XXH_PRIME64_2;
139146
v1 = (v1 << 31) | (v1 >> (64 - 31));
140147
v1 *= XXH_PRIME64_1;
141-
ptr += 8;
142148

143149
// XXH64_round
144-
v2 += *((ulong*) ptr) * XXH_PRIME64_2;
150+
v2 += reg2 * XXH_PRIME64_2;
145151
v2 = (v2 << 31) | (v2 >> (64 - 31));
146152
v2 *= XXH_PRIME64_1;
147-
ptr += 8;
148153

149154
// XXH64_round
150-
v3 += *((ulong*) ptr) * XXH_PRIME64_2;
155+
v3 += reg3 * XXH_PRIME64_2;
151156
v3 = (v3 << 31) | (v3 >> (64 - 31));
152157
v3 *= XXH_PRIME64_1;
153-
ptr += 8;
154158

155159
// XXH64_round
156-
v4 += *((ulong*) ptr) * XXH_PRIME64_2;
160+
v4 += reg4 * XXH_PRIME64_2;
157161
v4 = (v4 << 31) | (v4 >> (64 - 31));
158162
v4 *= XXH_PRIME64_1;
159-
ptr += 8;
163+
ptr += 32;
160164
} while (ptr < limit);
161165
}
162166
}

src/Standart.Hash.xxHash/xxHash32.XXH.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ namespace Standart.Hash.xxHash;
66

77
public static partial class xxHash32
88
{
9-
private const uint XXH_PRIME32_1 = 2654435761U;
10-
private const uint XXH_PRIME32_2 = 2246822519U;
11-
private const uint XXH_PRIME32_3 = 3266489917U;
12-
private const uint XXH_PRIME32_4 = 668265263U;
13-
private const uint XXH_PRIME32_5 = 374761393U;
9+
private static readonly uint XXH_PRIME32_1 = 2654435761U;
10+
private static readonly uint XXH_PRIME32_2 = 2246822519U;
11+
private static readonly uint XXH_PRIME32_3 = 3266489917U;
12+
private static readonly uint XXH_PRIME32_4 = 668265263U;
13+
private static readonly uint XXH_PRIME32_5 = 374761393U;
1414

1515
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1616
private static uint XXH_rotl32(uint x, int r)

src/Standart.Hash.xxHash/xxHash64.XXH.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ namespace Standart.Hash.xxHash;
66

77
public static partial class xxHash64
88
{
9-
private const ulong XXH_PRIME64_1 = 11400714785074694791UL;
10-
private const ulong XXH_PRIME64_2 = 14029467366897019727UL;
11-
private const ulong XXH_PRIME64_3 = 1609587929392839161UL;
12-
private const ulong XXH_PRIME64_4 = 9650029242287828579UL;
13-
private const ulong XXH_PRIME64_5 = 2870177450012600261UL;
9+
private static readonly ulong XXH_PRIME64_1 = 11400714785074694791UL;
10+
private static readonly ulong XXH_PRIME64_2 = 14029467366897019727UL;
11+
private static readonly ulong XXH_PRIME64_3 = 1609587929392839161UL;
12+
private static readonly ulong XXH_PRIME64_4 = 9650029242287828579UL;
13+
private static readonly ulong XXH_PRIME64_5 = 2870177450012600261UL;
1414

1515
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1616
private static ulong XXH_rotl64(ulong x, int r)

0 commit comments

Comments
 (0)