@@ -205,8 +205,9 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
205205 }
206206
207207 device->profile_info .threads = (uint32_t )(max_threads * intensity / 100.0 );
208+ device->profile_info .threads = (device->profile_info .threads / 4 ) * 4 ; // make it divisible by 4
208209 if (max_threads > 0 && device->profile_info .threads == 0 && intensity > 0 )
209- device->profile_info .threads = 1 ;
210+ device->profile_info .threads = 4 ;
210211
211212 double counter = (double )device->profile_info .threads / (double )device->profile_info .threads_per_chunk ;
212213 size_t allocated_mem_for_current_chunk = 0 ;
@@ -651,8 +652,8 @@ bool opencl_kernel_prehasher(void *memory, int threads, argon2profile *profile,
651652
652653 cl_int error;
653654
654- size_t total_work_items = threads * 8 * profile-> thr_cost ;
655- size_t local_work_items = 8 * profile-> thr_cost ;
655+ size_t total_work_items = 64 * threads / 4 ;
656+ size_t local_work_items = 64 ;
656657
657658 device->device_lock .lock ();
658659
@@ -666,7 +667,7 @@ bool opencl_kernel_prehasher(void *memory, int threads, argon2profile *profile,
666667
667668 clSetKernelArg (device->kernel_prehash , 0 , sizeof (device->arguments .preseed_memory [gpumgmt_thread->thread_id ]), &device->arguments .preseed_memory [gpumgmt_thread->thread_id ]);
668669 clSetKernelArg (device->kernel_prehash , 1 , sizeof (device->arguments .seed_memory [gpumgmt_thread->thread_id ]), &device->arguments .seed_memory [gpumgmt_thread->thread_id ]);
669- clSetKernelArg (device->kernel_prehash , 2 , 4 * sizeof (cl_ulong) * 60 , NULL );
670+ clSetKernelArg (device->kernel_prehash , 2 , 16 * sizeof (cl_ulong) * 60 , NULL );
670671
671672 error=clEnqueueNDRangeKernel (device->queue , device->kernel_prehash , 1 , NULL , &total_work_items, &local_work_items, 0 , NULL , NULL );
672673 if (error != CL_SUCCESS) {
0 commit comments