Skip to content

Commit 4005160

Browse files
committed
upgrade cryptolight algo for AEON support
they also forked to variant 1 system...
1 parent defb7de commit 4005160

File tree

8 files changed

+129
-22
lines changed

8 files changed

+129
-22
lines changed

ccminer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2380,7 +2380,7 @@ static void *miner_thread(void *userdata)
23802380
rc = scanhash_c11(thr_id, &work, max_nonce, &hashes_done);
23812381
break;
23822382
case ALGO_CRYPTOLIGHT:
2383-
rc = scanhash_cryptolight(thr_id, &work, max_nonce, &hashes_done);
2383+
rc = scanhash_cryptolight(thr_id, &work, max_nonce, &hashes_done, 1);
23842384
break;
23852385
case ALGO_CRYPTONIGHT:
23862386
{

crypto/cryptolight-core.cu

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ void cryptolight_core_gpu_phase1(int threads, uint32_t * long_state, uint32_t *
5757
}
5858
}
5959

60+
// --------------------------------------------------------------------------------------------------------------
61+
6062
__global__
61-
void cryptolight_core_gpu_phase2(const int threads, const int bfactor, const int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b)
63+
void cryptolight_old_gpu_phase2(const int threads, const int bfactor, const int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b)
6264
{
6365
__shared__ uint32_t __align__(16) sharedMemory[1024];
6466

@@ -209,6 +211,70 @@ void cryptolight_core_gpu_phase2(const int threads, const int bfactor, const int
209211
#endif // __CUDA_ARCH__ >= 300
210212
}
211213

214+
__device__ __forceinline__ void store_variant1(uint32_t* long_state)
215+
{
216+
uint4* Z = (uint4*) long_state;
217+
const uint32_t tmp = (Z->z >> 24); // __byte_perm(src, 0, 0x7773);
218+
const uint32_t index = (((tmp >> 3) & 6u) | (tmp & 1u)) << 1;
219+
Z->z = (Z->z & 0x00ffffffu) | ((tmp ^ ((0x75310u >> index) & 0x30u)) << 24);
220+
}
221+
222+
#define MUL_SUM_XOR_DST_1(a,c,dst,tweak) { \
223+
uint64_t hi, lo = cuda_mul128(((uint64_t *)a)[0], ((uint64_t *)dst)[0], &hi) + ((uint64_t *)c)[1]; \
224+
hi += ((uint64_t *)c)[0]; \
225+
((uint64_t *)c)[0] = ((uint64_t *)dst)[0] ^ hi; \
226+
((uint64_t *)c)[1] = ((uint64_t *)dst)[1] ^ lo; \
227+
((uint64_t *)dst)[0] = hi; \
228+
((uint64_t *)dst)[1] = lo ^ tweak; }
229+
230+
__global__
231+
void cryptolight_gpu_phase2(const uint32_t threads, const uint16_t bfactor, const uint32_t partidx,
232+
uint32_t * __restrict__ d_long_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b,
233+
uint64_t * __restrict__ d_tweak)
234+
{
235+
__shared__ __align__(16) uint32_t sharedMemory[1024];
236+
cn_aes_gpu_init(sharedMemory);
237+
__syncthreads();
238+
239+
const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
240+
if (thread < threads)
241+
{
242+
const uint32_t batchsize = ITER >> (2 + bfactor);
243+
const uint32_t start = partidx * batchsize;
244+
const uint32_t end = start + batchsize;
245+
const uint32_t longptr = thread << LONG_SHL_IDX;
246+
uint32_t * long_state = &d_long_state[longptr];
247+
uint64_t tweak = d_tweak[thread];
248+
249+
void * ctx_a = (void*)(&d_ctx_a[thread << 2]);
250+
void * ctx_b = (void*)(&d_ctx_b[thread << 2]);
251+
uint4 A = AS_UINT4(ctx_a); // ld.global.u32.v4
252+
uint4 B = AS_UINT4(ctx_b);
253+
uint32_t* a = (uint32_t*)&A;
254+
uint32_t* b = (uint32_t*)&B;
255+
256+
for (int i = start; i < end; i++)
257+
{
258+
uint32_t c[4];
259+
uint32_t j = (A.x >> 2) & E2I_MASK2;
260+
cn_aes_single_round(sharedMemory, &long_state[j], c, a);
261+
XOR_BLOCKS_DST(c, b, &long_state[j]);
262+
store_variant1(&long_state[j]);
263+
MUL_SUM_XOR_DST_1(c, a, &long_state[(c[0] >> 2) & E2I_MASK2], tweak);
264+
265+
j = (A.x >> 2) & E2I_MASK2;
266+
cn_aes_single_round(sharedMemory, &long_state[j], b, a);
267+
XOR_BLOCKS_DST(b, c, &long_state[j]);
268+
store_variant1(&long_state[j]);
269+
MUL_SUM_XOR_DST_1(b, a, &long_state[(b[0] >> 2) & E2I_MASK2], tweak);
270+
}
271+
if (bfactor) {
272+
AS_UINT4(ctx_a) = A;
273+
AS_UINT4(ctx_b) = B;
274+
}
275+
}
276+
}
277+
212278
__global__
213279
void cryptolight_core_gpu_phase3(int threads, const uint32_t * long_state, uint32_t * ctx_state, uint32_t * ctx_key2)
214280
{
@@ -252,7 +318,7 @@ extern int device_bfactor[MAX_GPUS];
252318

253319
__host__
254320
void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state,
255-
uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
321+
uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint64_t *d_ctx_tweak)
256322
{
257323
dim3 grid(blocks);
258324
dim3 block(threads);
@@ -271,7 +337,11 @@ void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long
271337

272338
for(i = 0; i < partcount; i++)
273339
{
274-
cryptolight_core_gpu_phase2 <<<grid, (device_sm[dev_id] >= 300 ? block4 : block)>>>(blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
340+
dim3 b = device_sm[dev_id] >= 300 ? block4 : block;
341+
if (variant == 0)
342+
cryptolight_old_gpu_phase2 <<<grid, b>>> (blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
343+
else
344+
cryptolight_gpu_phase2 <<<grid, b>>> (blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b, d_ctx_tweak);
275345
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
276346
if(partcount > 1) usleep(bsleep);
277347
}

crypto/cryptolight-cpu.cpp

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ struct cryptonight_ctx {
2222
oaes_ctx* aes_ctx;
2323
};
2424

25+
26+
static void cryptolight_store_variant(void* state, int variant) {
27+
if (variant == 1) {
28+
// use variant 1 like monero since june 2018
29+
const uint8_t tmp = ((const uint8_t*)(state))[11];
30+
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1;
31+
((uint8_t*)(state))[11] = tmp ^ ((0x75310 >> index) & 0x30);
32+
}
33+
}
34+
2535
static void do_blake_hash(const void* input, int len, void* output)
2636
{
2737
uchar hash[32];
@@ -132,14 +142,14 @@ static void mul_sum_dst(const uint8_t* a, const uint8_t* b, const uint8_t* c, ui
132142
((uint64_t*) dst)[0] += ((uint64_t*) c)[0];
133143
}
134144

135-
static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
145+
static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst, const int variant, const uint64_t tweak) {
136146
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
137147
hi += ((uint64_t*) c)[0];
138148

139149
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
140150
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
141151
((uint64_t*) dst)[0] = hi;
142-
((uint64_t*) dst)[1] = lo;
152+
((uint64_t*) dst)[1] = variant ? lo ^ tweak : lo;
143153
}
144154

145155
static void copy_block(uint8_t* dst, const uint8_t* src) {
@@ -157,13 +167,18 @@ static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
157167
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
158168
}
159169

160-
static void cryptolight_hash_ctx(void* output, const void* input, const int len, struct cryptonight_ctx* ctx)
170+
static int cryptolight_hash_ctx(void* output, const void* input, const int len, struct cryptonight_ctx* ctx, const int variant)
161171
{
162172
size_t i, j;
173+
if (variant && len < 43)
174+
return 0;
175+
163176
keccak_hash_process(&ctx->state.hs, (const uint8_t*) input, len);
164177
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
165178
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
166179

180+
const uint64_t tweak = variant ? *((uint64_t*) (((uint8_t*)input) + 35)) ^ ctx->state.hs.w[24] : 0;
181+
167182
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
168183
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
169184
#undef RND
@@ -186,14 +201,16 @@ static void cryptolight_hash_ctx(void* output, const void* input, const int len,
186201
j = e2i(ctx->a);
187202
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
188203
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
204+
cryptolight_store_variant(&ctx->long_state[j], variant);
189205

190-
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
206+
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)], variant, tweak);
191207

192208
j = e2i(ctx->a);
193209
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
194210
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
211+
cryptolight_store_variant(&ctx->long_state[j], variant);
195212

196-
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
213+
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)], variant, tweak);
197214
}
198215

199216
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
@@ -219,11 +236,19 @@ static void cryptolight_hash_ctx(void* output, const void* input, const int len,
219236
if (opt_debug) applog(LOG_DEBUG, "extra algo=%d", extra_algo);
220237

221238
oaes_free((OAES_CTX **) &ctx->aes_ctx);
239+
return 1;
222240
}
223241

224-
void cryptolight_hash(void* output, const void* input, int len)
242+
int cryptolight_hash_variant(void* output, const void* input, int len, int variant)
225243
{
226244
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
227-
cryptolight_hash_ctx(output, input, len, ctx);
245+
int rc = cryptolight_hash_ctx(output, input, len, ctx, variant);
228246
free(ctx);
247+
return rc;
229248
}
249+
250+
void cryptolight_hash(void* output, const void* input)
251+
{
252+
cryptolight_hash_variant(output, input, 76, 1);
253+
}
254+

crypto/cryptolight.cu

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ static uint32_t *d_ctx_state[MAX_GPUS];
1111
static uint32_t *d_ctx_key1[MAX_GPUS];
1212
static uint32_t *d_ctx_key2[MAX_GPUS];
1313
static uint32_t *d_ctx_text[MAX_GPUS];
14+
static uint64_t *d_ctx_tweak[MAX_GPUS];
1415
static uint32_t *d_ctx_a[MAX_GPUS];
1516
static uint32_t *d_ctx_b[MAX_GPUS];
1617

1718
static bool init[MAX_GPUS] = { 0 };
1819

19-
extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
20+
extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int variant)
2021
{
2122
int res = 0;
2223
uint32_t throughput = 0;
@@ -26,13 +27,18 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
2627
uint32_t *nonceptr = (uint32_t*) (&pdata[39]);
2728
const uint32_t first_nonce = *nonceptr;
2829
uint32_t nonce = first_nonce;
30+
int dev_id = device_map[thr_id];
2931

3032
if(opt_benchmark) {
3133
ptarget[7] = 0x00ff;
3234
}
3335

3436
if(!init[thr_id])
3537
{
38+
if (!device_config[thr_id] && strcmp(device_name[dev_id], "TITAN V") == 0) {
39+
device_config[thr_id] = strdup("80x32");
40+
}
41+
3642
if (device_config[thr_id]) {
3743
sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
3844
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
@@ -79,6 +85,7 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
7985
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
8086
cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput);
8187
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
88+
cudaMalloc(&d_ctx_tweak[thr_id], sizeof(uint64_t) * throughput);
8289

8390
init[thr_id] = true;
8491
}
@@ -91,8 +98,8 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
9198
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
9299

93100
cryptonight_extra_setData(thr_id, pdata, ptarget);
94-
cryptonight_extra_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], 0, NULL);
95-
cryptolight_core_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
101+
cryptonight_extra_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], variant, d_ctx_tweak[thr_id]);
102+
cryptolight_core_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], variant, d_ctx_tweak[thr_id]);
96103
cryptonight_extra_final(thr_id, throughput, nonce, resNonces, d_ctx_state[thr_id]);
97104

98105
*hashes_done = nonce - first_nonce + throughput;
@@ -104,7 +111,7 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
104111
uint32_t *tempnonceptr = (uint32_t*)(((char*)tempdata) + 39);
105112
memcpy(tempdata, pdata, 76);
106113
*tempnonceptr = resNonces[0];
107-
cryptolight_hash(vhash, tempdata, 76);
114+
cryptolight_hash_variant(vhash, tempdata, 76, variant);
108115
if(vhash[7] <= Htarg && fulltest(vhash, ptarget))
109116
{
110117
res = 1;
@@ -114,7 +121,7 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
114121
if(resNonces[1] != UINT32_MAX)
115122
{
116123
*tempnonceptr = resNonces[1];
117-
cryptolight_hash(vhash, tempdata, 76);
124+
cryptolight_hash_variant(vhash, tempdata, 76, variant);
118125
if(vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
119126
res++;
120127
work->nonces[1] = resNonces[1];
@@ -157,6 +164,7 @@ void free_cryptolight(int thr_id)
157164
cudaFree(d_ctx_key1[thr_id]);
158165
cudaFree(d_ctx_key2[thr_id]);
159166
cudaFree(d_ctx_text[thr_id]);
167+
cudaFree(d_ctx_tweak[thr_id]);
160168
cudaFree(d_ctx_a[thr_id]);
161169
cudaFree(d_ctx_b[thr_id]);
162170

crypto/cryptolight.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ static inline void exit_if_cudaerror(int thr_id, const char *src, int line)
135135
}
136136
}
137137

138-
void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
138+
void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint64_t *d_ctx_tweak);
139139

140140
void cryptonight_extra_setData(int thr_id, const void *data, const void *ptarget);
141141
void cryptonight_extra_init(int thr_id/*, uint32_t threads*/);

crypto/xmr-rpc.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,16 +550,19 @@ bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work)
550550
}
551551

552552
else if (opt_algo == ALGO_CRYPTOLIGHT) {
553+
int variant = 1;
553554
uint32_t nonce = work->nonces[idnonce];
554555
noncestr = bin2hex((unsigned char*) &nonce, 4);
555556
last_found_nonce = nonce;
556-
cryptolight_hash(hash, data, 76);
557+
//if (cryptonight_fork > 1 && ((unsigned char*)work->data)[0] >= cryptonight_fork)
558+
// variant = ((unsigned char*)work->data)[0] - cryptonight_fork + 1;
559+
cryptolight_hash_variant(hash, data, 76, variant);
557560
work_set_target_ratio(work, (uint32_t*) hash);
558561
}
559562

560563
else if (opt_algo == ALGO_CRYPTONIGHT) {
561-
uint32_t nonce = work->nonces[idnonce];
562564
int variant = 0;
565+
uint32_t nonce = work->nonces[idnonce];
563566
noncestr = bin2hex((unsigned char*) &nonce, 4);
564567
last_found_nonce = nonce;
565568
if (cryptonight_fork > 1 && ((unsigned char*)work->data)[0] >= cryptonight_fork)

miner.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce,
279279
extern int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
280280
extern int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
281281
extern int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
282-
extern int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
282+
extern int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int variant);
283283
extern int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int variant);
284284
extern int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
285285
extern int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@@ -900,7 +900,8 @@ void blake2b_hash(void *output, const void *input);
900900
void blake2s_hash(void *output, const void *input);
901901
void bmw_hash(void *state, const void *input);
902902
void c11hash(void *output, const void *input);
903-
void cryptolight_hash(void* output, const void* input, int len);
903+
int cryptolight_hash_variant(void* output, const void* input, int len, int variant);
904+
void cryptolight_hash(void* output, const void* input);
904905
int cryptonight_hash_variant(void* output, const void* input, size_t len, int variant);
905906
void cryptonight_hash(void* output, const void* input);
906907
void monero_hash(void* output, const void* input);

util.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2193,7 +2193,7 @@ void print_hash_tests(void)
21932193
c11hash(&hash[0], &buf[0]);
21942194
printpfx("c11", hash);
21952195

2196-
cryptolight_hash(&hash[0], &buf[0], 76);
2196+
cryptolight_hash(&hash[0], &buf[0]);
21972197
printpfx("cryptolight", hash);
21982198

21992199
cryptonight_hash(&hash[0], &buf[0]);

0 commit comments

Comments
 (0)