Skip to content

Commit af03d89

Browse files
authored
Merge pull request #2673 from fireice-uk/xmr-stak-rx-dev
[RX] release 1.0.5-rx
2 parents 65ade74 + cd2e233 commit af03d89

28 files changed

+772
-538
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ option(CMAKE_LINK_STATIC "link as much as possible libraries static" OFF)
7171
#option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF)
7272
#set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "Use the static version of the CUDA runtime library if available" FORCE)
7373

74-
option(CUDA_ENABLE "Enable or disable CUDA support (NVIDIA backend)" ON)
74+
option(CUDA_ENABLE "Enable or disable CUDA support (NVIDIA backend)" OFF)
7575
if(CUDA_ENABLE)
7676
find_package(CUDA 9.0)
7777

@@ -205,7 +205,7 @@ endif()
205205
# Find OpenCL
206206
###############################################################################
207207

208-
option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" ON)
208+
option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" OFF)
209209
if(OpenCL_ENABLE)
210210
# try to find AMD OpenCL before NVIDIA OpenCL
211211
find_path(OpenCL_INCLUDE_DIR

xmrstak/backend/cpu/cpuType.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,5 +107,12 @@ Model getModel()
107107
return result;
108108
}
109109

110+
bool firstHasBMI2()
111+
{
112+
int32_t cpu_info[4];
113+
cpuid(7, 0, cpu_info);
114+
return has_feature(cpu_info[1], 8);
115+
}
116+
110117
} // namespace cpu
111118
} // namespace xmrstak

xmrstak/backend/cpu/cpuType.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,13 @@ Model getModel();
2727
*/
2828
int32_t get_masked(int32_t val, int32_t h, int32_t l);
2929

30+
bool firstHasBMI2();
31+
32+
inline bool hasBMI2()
33+
{
34+
static bool bmi2 = firstHasBMI2();
35+
return bmi2;
36+
}
37+
3038
} // namespace cpu
3139
} // namespace xmrstak

xmrstak/backend/cpu/crypto/cryptonight.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ struct randomX_global_ctx
124124
}
125125
printer::inst()->print_msg(LDEBUG,"allocate dataset/cache for numa %u", uint32_t(numaId));
126126
#ifdef __linux__
127-
randomx_dataset* dataset = randomx_alloc_dataset(static_cast<randomx_flags>(RANDOMX_FLAG_LARGE_PAGES | RANDOMX_FLAG_LARGE_PAGES_1G));
127+
randomx_dataset* dataset = randomx_alloc_dataset(static_cast<randomx_flags>(RANDOMX_FLAG_LARGE_PAGES | RANDOMX_FLAG_1GB_PAGES));
128128
if (!dataset)
129129
{
130130
printer::inst()->print_msg(LDEBUG,"Warning: dataset allocation with 1 GiB pages failed");

xmrstak/backend/cpu/crypto/cryptonight_aesni.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ struct RandomX_generator
150150
randomx_apply_config(RandomX_WowneroConfig);
151151
else if(ALGO == randomX_arqma)
152152
randomx_apply_config(RandomX_ArqmaConfig);
153+
else if(ALGO == randomX_safex)
154+
randomx_apply_config(RandomX_SafexConfig);
155+
else if(ALGO == randomX_keva)
156+
randomx_apply_config(RandomX_KevaConfig);
153157
}
154158

155159
for(size_t i = 0; i < N; i++)

xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
234234
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
235235
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
236236

237-
constexpr int PREFETCH_DISTANCE = 4096;
237+
constexpr int PREFETCH_DISTANCE = 7168;
238238
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
239239
scratchpadEnd -= PREFETCH_DISTANCE;
240240

@@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
258258

259259
rx_prefetch_t0(prefetchPtr);
260260

261-
scratchpadPtr += 64;
262-
prefetchPtr += 64;
261+
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
262+
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
263+
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
264+
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
265+
266+
fill_state0 = aesdec<softAes>(fill_state0, key0);
267+
fill_state1 = aesenc<softAes>(fill_state1, key1);
268+
fill_state2 = aesdec<softAes>(fill_state2, key2);
269+
fill_state3 = aesenc<softAes>(fill_state3, key3);
270+
271+
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
272+
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
273+
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
274+
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
275+
276+
rx_prefetch_t0(prefetchPtr + 64);
277+
278+
scratchpadPtr += 128;
279+
prefetchPtr += 128;
263280
}
264281
prefetchPtr = (const char*) scratchpad;
265282
scratchpadEnd += PREFETCH_DISTANCE;

xmrstak/backend/cpu/crypto/randomx/asm/program_epilogue_store.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
;# save VM register values
2+
add rsp, 40
23
pop rcx
34
mov qword ptr [rcx+0], r8
45
mov qword ptr [rcx+8], r9
Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
lea rcx, [rsi+rax]
2-
push rcx
2+
mov [rsp+16], rcx
33
xor r8, qword ptr [rcx+0]
44
xor r9, qword ptr [rcx+8]
55
xor r10, qword ptr [rcx+16]
@@ -9,7 +9,7 @@
99
xor r14, qword ptr [rcx+48]
1010
xor r15, qword ptr [rcx+56]
1111
lea rcx, [rsi+rdx]
12-
push rcx
12+
mov [rsp+24], rcx
1313
cvtdq2pd xmm0, qword ptr [rcx+0]
1414
cvtdq2pd xmm1, qword ptr [rcx+8]
1515
cvtdq2pd xmm2, qword ptr [rcx+16]
@@ -18,11 +18,11 @@
1818
cvtdq2pd xmm5, qword ptr [rcx+40]
1919
cvtdq2pd xmm6, qword ptr [rcx+48]
2020
cvtdq2pd xmm7, qword ptr [rcx+56]
21-
andps xmm4, xmm13
22-
andps xmm5, xmm13
23-
andps xmm6, xmm13
24-
andps xmm7, xmm13
25-
orps xmm4, xmm14
26-
orps xmm5, xmm14
27-
orps xmm6, xmm14
28-
orps xmm7, xmm14
21+
andpd xmm4, xmm13
22+
andpd xmm5, xmm13
23+
andpd xmm6, xmm13
24+
andpd xmm7, xmm13
25+
orpd xmm4, xmm14
26+
orpd xmm5, xmm14
27+
orpd xmm6, xmm14
28+
orpd xmm7, xmm14
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
lea rcx, [rsi+rax]
2+
mov [rsp+16], rcx
3+
xor r8, qword ptr [rcx+0]
4+
xor r9, qword ptr [rcx+8]
5+
xor r10, qword ptr [rcx+16]
6+
xor r11, qword ptr [rcx+24]
7+
xor r12, qword ptr [rcx+32]
8+
xor r13, qword ptr [rcx+40]
9+
xor r14, qword ptr [rcx+48]
10+
xor r15, qword ptr [rcx+56]
11+
lea rcx, [rsi+rdx]
12+
mov [rsp+24], rcx
13+
cvtdq2pd xmm0, qword ptr [rcx+0]
14+
cvtdq2pd xmm1, qword ptr [rcx+8]
15+
cvtdq2pd xmm2, qword ptr [rcx+16]
16+
cvtdq2pd xmm3, qword ptr [rcx+24]
17+
cvtdq2pd xmm4, qword ptr [rcx+32]
18+
cvtdq2pd xmm5, qword ptr [rcx+40]
19+
cvtdq2pd xmm6, qword ptr [rcx+48]
20+
cvtdq2pd xmm7, qword ptr [rcx+56]
21+
vpcmov xmm4, xmm4, xmm14, xmm13
22+
vpcmov xmm5, xmm5, xmm14, xmm13
23+
vpcmov xmm6, xmm6, xmm14, xmm13
24+
vpcmov xmm7, xmm7, xmm14, xmm13

xmrstak/backend/cpu/crypto/randomx/asm/program_loop_store.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
pop rcx
1+
mov rcx, [rsp+24]
22
mov qword ptr [rcx+0], r8
33
mov qword ptr [rcx+8], r9
44
mov qword ptr [rcx+16], r10
@@ -7,7 +7,7 @@
77
mov qword ptr [rcx+40], r13
88
mov qword ptr [rcx+48], r14
99
mov qword ptr [rcx+56], r15
10-
pop rcx
10+
mov rcx, [rsp+16]
1111
xorpd xmm0, xmm4
1212
xorpd xmm1, xmm5
1313
xorpd xmm2, xmm6

0 commit comments

Comments
 (0)