Skip to content

Commit 1a82efe

Browse files
authored
Merge pull request #4 from bogdanadnan/dev
OpenCL optimizations.
2 parents 5440805 + d00b91d commit 1a82efe

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

hash/gpu/opencl/opencl_hasher.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,9 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
205205
}
206206

207207
device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0);
208+
device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4
208209
if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0)
209-
device->profile_info.threads = 1;
210+
device->profile_info.threads = 4;
210211

211212
double counter = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk;
212213
size_t allocated_mem_for_current_chunk = 0;
@@ -651,8 +652,8 @@ bool opencl_kernel_prehasher(void *memory, int threads, argon2profile *profile,
651652

652653
cl_int error;
653654

654-
size_t total_work_items = threads * 8 * profile->thr_cost;
655-
size_t local_work_items = 8 * profile->thr_cost;
655+
size_t total_work_items = 64 * threads / 4;
656+
size_t local_work_items = 64;
656657

657658
device->device_lock.lock();
658659

@@ -666,7 +667,7 @@ bool opencl_kernel_prehasher(void *memory, int threads, argon2profile *profile,
666667

667668
clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
668669
clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]);
669-
clSetKernelArg(device->kernel_prehash, 2, 4 * sizeof(cl_ulong) * 60, NULL);
670+
clSetKernelArg(device->kernel_prehash, 2, 16 * sizeof(cl_ulong) * 60, NULL);
670671

671672
error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
672673
if(error != CL_SUCCESS) {

hash/gpu/opencl/opencl_kernel.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -799,15 +799,19 @@ __kernel void prehash (
799799
__global uint *seed,
800800
__local ulong *blake_shared) {
801801
802-
int hash = get_group_id(0);
803-
int id = get_local_id(0);
802+
int hash = get_group_id(0) * 4;
803+
int id = get_local_id(0); // 64 threads
804+
805+
int hash_idx = id >> 4;
806+
hash += hash_idx;
807+
id = id & 0xF;
804808
805809
int thr_id = id % 4; // thread id in session
806810
int session = id / 4; // 4 blake2b hashing session
807811
int lane = session / 2; // 2 lanes
808812
int idx = session % 2; // idx in lane
809813
810-
__local uint *local_mem = (__local uint *)&blake_shared[session * BLAKE_SHARED_MEM_ULONG];
814+
__local uint *local_mem = (__local uint *)&blake_shared[(hash_idx * 4 + session) * BLAKE_SHARED_MEM_ULONG];
811815
__global uint *local_preseed = preseed + hash * IXIAN_SEED_SIZE_UINT;
812816
__global uint *local_seed = seed + (hash * 4 + session) * BLOCK_SIZE_UINT;
813817

0 commit comments

Comments
 (0)