@@ -391,14 +391,15 @@ static bool init[MAX_GPUS] = { 0 };
391391static __thread uint32_t throughput = 0 ;
392392static __thread bool gtx750ti = false ;
393393
394+ static uint32_t *h_GNonces[16 ]; // this need to get fixed as the rest of that routine
395+
394396extern " C" int scanhash_allium (int thr_id, struct work * work, uint32_t max_nonce, unsigned long *hashes_done)
395397{
396398 uint32_t *pdata = work->data ;
397399 uint32_t *ptarget = work->target ;
398400 uint32_t _ALIGN (64 ) endiandata[20 ];
399401 const uint32_t Htarg = ptarget[7 ];
400402 const uint32_t first_nonce = pdata[19 ];
401- uint32_t nonce = first_nonce;
402403
403404 int dev_id = device_map[thr_id];
404405 int rc = 0 ;
@@ -446,67 +447,73 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
446447
447448 CUDA_SAFE_CALL (cudaMalloc (&d_hash[thr_id], (size_t )32 * throughput));
448449
450+ // nonce
451+ cudaMallocHost (&h_GNonces[thr_id], 2 * sizeof (uint32_t ));
452+
449453 init[thr_id] = true ;
450454 }
451455 resNonces = h_resNonce[thr_id];
452456
453457 for (int k = 0 ; k < 19 ; k++)
454458 be32enc (&endiandata[k], pdata[k]);
459+
455460 allium_blake2s_setBlock (endiandata, ptarget[7 ]);
456461
462+ cudaMemset (d_resNonce[thr_id], 0x00 , maxResults*sizeof (uint32_t ));
457463 uint32_t _ALIGN (64 ) hash[8 ];
464+
458465 do {
459- be32enc (&endiandata[19 ], nonce);
466+ // be32enc(&endiandata[19], nonce);
460467
461468 if (ptarget[7 ]) {
462- allium_blake2s_gpu_hash_nonce<<<grid, block>>> (throughput, nonce , d_resNonce[thr_id], ptarget[7 ]);
469+ allium_blake2s_gpu_hash_nonce<<<grid, block>>> (throughput, pdata[ 19 ] , d_resNonce[thr_id], ptarget[7 ]);
463470 }
464471 else {
465- allium_blake2s_gpu_hash_nonce<<<grid, block>>> (throughput, nonce , d_resNonce[thr_id]);
472+ allium_blake2s_gpu_hash_nonce<<<grid, block>>> (throughput, pdata[ 19 ] , d_resNonce[thr_id]);
466473 }
467474
475+ *hashes_done = pdata[19 ] - first_nonce + throughput;
468476
469- be32enc (&d_hash[thr_id], (uint32_t ) d_resNonce[thr_id]);
470- // d_hash[thr_id] = (uint32_t)d_resNonce[thr_id];
471-
472- lyra2_cpu_hash_32 (thr_id, throughput, nonce, d_hash[thr_id], gtx750ti);
477+ cudaMemcpy (&d_hash[thr_id], d_resNonce[thr_id], sizeof (uint32_t ), cudaMemcpyHostToHost);
473478
474- cudaMemcpy (resNonces, d_hash[thr_id], sizeof ( uint32_t ), cudaMemcpyDeviceToHost );
479+ lyra2_cpu_hash_32 (thr_id, throughput, pdata[ 19 ], d_hash[thr_id], gtx750ti );
475480
476- if (resNonces[0 ])
481+ cudaMemcpy (h_GNonces[thr_id], d_hash[thr_id], 1 * sizeof (uint32_t ), cudaMemcpyDeviceToHost);
482+ work->nonces [0 ] = *h_GNonces[thr_id];
483+ if (work->nonces [0 ])
477484 {
478- cudaMemcpy (resNonces, d_hash[thr_id], maxResults*sizeof (uint32_t ), cudaMemcpyDeviceToHost);
479- cudaMemset (d_hash[thr_id], 0x00 , sizeof (uint32_t ));
485+ // gpulog(LOG_INFO, thr_id, "Running on nonce %u", work->nonces[0]);
486+ // cudaMemcpy(resNonces, d_hash[thr_id], maxResults*sizeof(uint32_t), cudaMemcpyDeviceToHost);
487+ // cudaMemset(d_hash[thr_id], 0x00, sizeof(uint32_t));
480488
481- if (resNonces[0 ] >= maxResults) {
482- gpulog (LOG_WARNING, thr_id, " candidates flood: %u" , resNonces[0 ]);
483- resNonces[0 ] = maxResults - 1 ;
484- }
489+ // if (resNonces[0] >= maxResults) {
490+ // gpulog(LOG_WARNING, thr_id, "candidates flood: %u", resNonces[0]);
491+ // resNonces[0] = maxResults - 1;
492+ // }
485493
486- nonce = sph_bswap32 (resNonces[ 1 ]) ;
487- be32enc (&endiandata[19 ], nonce );
494+ pdata[ 19 ] = work-> nonces [ 0 ] ;
495+ be32enc (&endiandata[19 ], pdata[ 19 ] );
488496 allium_hash (hash, endiandata);
489497
490498 if (hash[7 ] <= Htarg && fulltest (hash, ptarget)) {
491499 gpulog (LOG_INFO, thr_id, " Found valid nonce" );
492- work->nonces [0 ] = nonce ;
500+ // work->nonces[0] = pdata[19] ;
493501 work->valid_nonces = 1 ;
494502 work_set_target_ratio (work, hash);
495- pdata[19 ] = nonce;
496503 *hashes_done = pdata[19 ] - first_nonce;
497504 return work->valid_nonces ;
498505 }
499506 }
500507
501- if (nonce + throughput > max_nonce) {
502- nonce = max_nonce;
508+ if (pdata[ 19 ] + throughput > max_nonce) {
509+ pdata[ 19 ] = max_nonce;
503510 break ;
504511 }
505512
506- nonce += throughput;
513+ pdata[ 19 ] += throughput;
507514 } while (!work_restart[thr_id].restart );
508515
509- pdata[19 ] = nonce;
516+ // pdata[19] = nonce;
510517 *hashes_done = pdata[19 ] - first_nonce + 1 ;
511518
512519 return 0 ;
@@ -524,7 +531,8 @@ extern "C" void free_allium(int thr_id)
524531 cudaFree (d_hash[thr_id]);
525532 if (device_sm[dev_id] >= 350 )
526533 cudaFree (d_matrix[thr_id]);
527- // lyra2Z_cpu_free(thr_id);
534+ // nonce
535+ cudaFreeHost (h_GNonces[thr_id]);
528536
529537 init[thr_id] = false ;
530538
0 commit comments