@@ -165,10 +165,22 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
165165 return false ;
166166 }
167167
168- device->kernel = clCreateKernel (device->program , " fill_blocks " , &error);
168+ device->kernel_prehash = clCreateKernel (device->program , " prehash " , &error);
169169 if (error != CL_SUCCESS) {
170170 device->error = error;
171- device->error_message = " Error creating opencl kernel for device." ;
171+ device->error_message = " Error creating opencl prehash kernel for device." ;
172+ return false ;
173+ }
174+ device->kernel_fill_blocks = clCreateKernel (device->program , " fill_blocks" , &error);
175+ if (error != CL_SUCCESS) {
176+ device->error = error;
177+ device->error_message = " Error creating opencl main kernel for device." ;
178+ return false ;
179+ }
180+ device->kernel_posthash = clCreateKernel (device->program , " posthash" , &error);
181+ if (error != CL_SUCCESS) {
182+ device->error = error;
183+ device->error_message = " Error creating opencl posthash kernel for device." ;
172184 return false ;
173185 }
174186
@@ -321,28 +333,56 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
321333 return false ;
322334 }
323335
324- device->arguments .seed_memory [0 ] = clCreateBuffer (device->context , CL_MEM_READ_ONLY, max_threads * 4 * ARGON2_BLOCK_SIZE, NULL , &error);
336+ device->arguments .preseed_memory [0 ] = clCreateBuffer (device->context , CL_MEM_READ_ONLY, device->profile_info .threads * IXIAN_SEED_SIZE, NULL , &error);
337+ if (error != CL_SUCCESS) {
338+ device->error = error;
339+ device->error_message = " Error creating memory buffer." ;
340+ return false ;
341+ }
342+
343+ device->arguments .preseed_memory [1 ] = clCreateBuffer (device->context , CL_MEM_READ_ONLY, device->profile_info .threads * IXIAN_SEED_SIZE, NULL , &error);
344+ if (error != CL_SUCCESS) {
345+ device->error = error;
346+ device->error_message = " Error creating memory buffer." ;
347+ return false ;
348+ }
349+
350+ device->arguments .seed_memory [0 ] = clCreateBuffer (device->context , CL_MEM_READ_WRITE, device->profile_info .threads * 4 * ARGON2_BLOCK_SIZE, NULL , &error);
351+ if (error != CL_SUCCESS) {
352+ device->error = error;
353+ device->error_message = " Error creating memory buffer." ;
354+ return false ;
355+ }
356+
357+ device->arguments .seed_memory [1 ] = clCreateBuffer (device->context , CL_MEM_READ_WRITE, device->profile_info .threads * 4 * ARGON2_BLOCK_SIZE, NULL , &error);
358+ if (error != CL_SUCCESS) {
359+ device->error = error;
360+ device->error_message = " Error creating memory buffer." ;
361+ return false ;
362+ }
363+
364+ device->arguments .out_memory [0 ] = clCreateBuffer (device->context , CL_MEM_READ_WRITE, device->profile_info .threads * ARGON2_BLOCK_SIZE, NULL , &error);
325365 if (error != CL_SUCCESS) {
326366 device->error = error;
327367 device->error_message = " Error creating memory buffer." ;
328368 return false ;
329369 }
330370
331- device->arguments .seed_memory [1 ] = clCreateBuffer (device->context , CL_MEM_READ_ONLY, max_threads * 4 * ARGON2_BLOCK_SIZE, NULL , &error);
371+ device->arguments .out_memory [1 ] = clCreateBuffer (device->context , CL_MEM_READ_WRITE, device-> profile_info . threads * ARGON2_BLOCK_SIZE, NULL , &error);
332372 if (error != CL_SUCCESS) {
333373 device->error = error;
334374 device->error_message = " Error creating memory buffer." ;
335375 return false ;
336376 }
337377
338- device->arguments .out_memory [0 ] = clCreateBuffer (device->context , CL_MEM_WRITE_ONLY, max_threads * 4 * ARGON2_BLOCK_SIZE , NULL , &error);
378+ device->arguments .hash_memory [0 ] = clCreateBuffer (device->context , CL_MEM_WRITE_ONLY, device-> profile_info . threads * ARGON2_RAW_LENGTH , NULL , &error);
339379 if (error != CL_SUCCESS) {
340380 device->error = error;
341381 device->error_message = " Error creating memory buffer." ;
342382 return false ;
343383 }
344384
345- device->arguments .out_memory [1 ] = clCreateBuffer (device->context , CL_MEM_WRITE_ONLY, max_threads * 4 * ARGON2_BLOCK_SIZE , NULL , &error);
385+ device->arguments .hash_memory [1 ] = clCreateBuffer (device->context , CL_MEM_WRITE_ONLY, device-> profile_info . threads * ARGON2_RAW_LENGTH , NULL , &error);
346386 if (error != CL_SUCCESS) {
347387 device->error = error;
348388 device->error_message = " Error creating memory buffer." ;
@@ -381,15 +421,15 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
381421 }
382422 free (segments);
383423
384- clSetKernelArg (device->kernel , 0 , sizeof (device->arguments .memory_chunk_0 ), &device->arguments .memory_chunk_0 );
385- clSetKernelArg (device->kernel , 1 , sizeof (device->arguments .memory_chunk_1 ), &device->arguments .memory_chunk_1 );
386- clSetKernelArg (device->kernel , 2 , sizeof (device->arguments .memory_chunk_2 ), &device->arguments .memory_chunk_2 );
387- clSetKernelArg (device->kernel , 3 , sizeof (device->arguments .memory_chunk_3 ), &device->arguments .memory_chunk_3 );
388- clSetKernelArg (device->kernel , 4 , sizeof (device->arguments .memory_chunk_4 ), &device->arguments .memory_chunk_4 );
389- clSetKernelArg (device->kernel , 5 , sizeof (device->arguments .memory_chunk_5 ), &device->arguments .memory_chunk_5 );
390- clSetKernelArg (device->kernel , 8 , sizeof (device->arguments .address ), &device->arguments .address );
391- clSetKernelArg (device->kernel , 9 , sizeof (device->arguments .segments ), &device->arguments .segments );
392- clSetKernelArg (device->kernel , 10 , sizeof (int32_t ), &device->profile_info .threads_per_chunk );
424+ clSetKernelArg (device->kernel_fill_blocks , 0 , sizeof (device->arguments .memory_chunk_0 ), &device->arguments .memory_chunk_0 );
425+ clSetKernelArg (device->kernel_fill_blocks , 1 , sizeof (device->arguments .memory_chunk_1 ), &device->arguments .memory_chunk_1 );
426+ clSetKernelArg (device->kernel_fill_blocks , 2 , sizeof (device->arguments .memory_chunk_2 ), &device->arguments .memory_chunk_2 );
427+ clSetKernelArg (device->kernel_fill_blocks , 3 , sizeof (device->arguments .memory_chunk_3 ), &device->arguments .memory_chunk_3 );
428+ clSetKernelArg (device->kernel_fill_blocks , 4 , sizeof (device->arguments .memory_chunk_4 ), &device->arguments .memory_chunk_4 );
429+ clSetKernelArg (device->kernel_fill_blocks , 5 , sizeof (device->arguments .memory_chunk_5 ), &device->arguments .memory_chunk_5 );
430+ clSetKernelArg (device->kernel_fill_blocks , 8 , sizeof (device->arguments .address ), &device->arguments .address );
431+ clSetKernelArg (device->kernel_fill_blocks , 9 , sizeof (device->arguments .segments ), &device->arguments .segments );
432+ clSetKernelArg (device->kernel_fill_blocks , 10 , sizeof (int32_t ), &device->profile_info .threads_per_chunk );
393433
394434 return true ;
395435}
@@ -605,68 +645,112 @@ struct opencl_gpumgmt_thread_data {
605645 opencl_device_info *device;
606646};
607647
648+ bool opencl_kernel_prehasher (void *memory, int threads, argon2profile *profile, void *user_data) {
649+ opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
650+ opencl_device_info *device = gpumgmt_thread->device ;
651+
652+ cl_int error;
653+
654+ size_t total_work_items = threads * 8 * profile->thr_cost ;
655+ size_t local_work_items = 8 * profile->thr_cost ;
656+
657+ device->device_lock .lock ();
658+
659+ error = clEnqueueWriteBuffer (device->queue , device->arguments .preseed_memory [gpumgmt_thread->thread_id ], CL_FALSE, 0 , threads * IXIAN_SEED_SIZE, memory, 0 , NULL , NULL );
660+ if (error != CL_SUCCESS) {
661+ device->error = error;
662+ device->error_message = " Error writing to gpu memory." ;
663+ device->device_lock .unlock ();
664+ return false ;
665+ }
666+
667+ clSetKernelArg (device->kernel_prehash , 0 , sizeof (device->arguments .preseed_memory [gpumgmt_thread->thread_id ]), &device->arguments .preseed_memory [gpumgmt_thread->thread_id ]);
668+ clSetKernelArg (device->kernel_prehash , 1 , sizeof (device->arguments .seed_memory [gpumgmt_thread->thread_id ]), &device->arguments .seed_memory [gpumgmt_thread->thread_id ]);
669+ clSetKernelArg (device->kernel_prehash , 2 , 4 * sizeof (cl_ulong) * 60 , NULL );
670+
671+ error=clEnqueueNDRangeKernel (device->queue , device->kernel_prehash , 1 , NULL , &total_work_items, &local_work_items, 0 , NULL , NULL );
672+ if (error != CL_SUCCESS) {
673+ device->error = error;
674+ device->error_message = " Error running the kernel." ;
675+ device->device_lock .unlock ();
676+ return false ;
677+ }
678+
679+ return true ;
680+ }
681+
608682void *opencl_kernel_filler (void *memory, int threads, argon2profile *profile, void *user_data) {
609- // uint64_t start_log = microseconds();
610- // printf("Waiting for lock: %lld\n", microseconds() - start_log);
611- // start_log = microseconds();
612683 opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
613684 opencl_device_info *device = gpumgmt_thread->device ;
614685
615686 cl_int error;
616687
617- int mem_seed_count = profile->thr_cost ;
618688 size_t total_work_items = threads * KERNEL_WORKGROUP_SIZE * profile->thr_cost ;
619689 size_t local_work_items = KERNEL_WORKGROUP_SIZE * profile->thr_cost ;
620690
621- device->device_lock .lock ();
691+ clSetKernelArg (device->kernel_fill_blocks , 6 , sizeof (device->arguments .seed_memory [gpumgmt_thread->thread_id ]), &device->arguments .seed_memory [gpumgmt_thread->thread_id ]);
692+ clSetKernelArg (device->kernel_fill_blocks , 7 , sizeof (device->arguments .out_memory [gpumgmt_thread->thread_id ]), &device->arguments .out_memory [gpumgmt_thread->thread_id ]);
693+ error=clEnqueueNDRangeKernel (device->queue , device->kernel_fill_blocks , 1 , NULL , &total_work_items, &local_work_items, 0 , NULL , NULL );
694+ if (error != CL_SUCCESS) {
695+ device->error = error;
696+ device->error_message = " Error running the kernel." ;
697+ device->device_lock .unlock ();
698+ return NULL ;
699+ }
622700
623- error = clEnqueueWriteBuffer (device->queue , device->arguments .seed_memory [gpumgmt_thread->thread_id ], CL_FALSE, 0 , threads * 2 * mem_seed_count * ARGON2_BLOCK_SIZE, memory, 0 , NULL , NULL );
624- if (error != CL_SUCCESS) {
625- device->error = error;
626- device->error_message = " Error writing to gpu memory." ;
627- device->device_lock .unlock ();
628- return NULL ;
629- }
701+ return memory;
702+ }
703+
704+ bool opencl_kernel_posthasher (void *memory, int threads, argon2profile *profile, void *user_data) {
705+ opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
706+ opencl_device_info *device = gpumgmt_thread->device ;
707+
708+ cl_int error;
709+
710+ size_t total_work_items = threads * 4 ;
711+ size_t local_work_items = 4 ;
630712
631- clSetKernelArg (device->kernel , 6 , sizeof (device->arguments .seed_memory [gpumgmt_thread->thread_id ]), &device->arguments .seed_memory [gpumgmt_thread->thread_id ]);
632- clSetKernelArg (device->kernel , 7 , sizeof (device->arguments .out_memory [gpumgmt_thread->thread_id ]), &device->arguments .out_memory [gpumgmt_thread->thread_id ]);
633- error=clEnqueueNDRangeKernel (device->queue , device->kernel , 1 , NULL , &total_work_items, &local_work_items, 0 , NULL , NULL );
713+ clSetKernelArg (device->kernel_posthash , 0 , sizeof (device->arguments .hash_memory [gpumgmt_thread->thread_id ]), &device->arguments .hash_memory [gpumgmt_thread->thread_id ]);
714+ clSetKernelArg (device->kernel_posthash , 1 , sizeof (device->arguments .out_memory [gpumgmt_thread->thread_id ]), &device->arguments .out_memory [gpumgmt_thread->thread_id ]);
715+ clSetKernelArg (device->kernel_posthash , 2 , sizeof (cl_ulong) * 60 , NULL );
716+
717+ error=clEnqueueNDRangeKernel (device->queue , device->kernel_posthash , 1 , NULL , &total_work_items, &local_work_items, 0 , NULL , NULL );
634718 if (error != CL_SUCCESS) {
635719 device->error = error;
636720 device->error_message = " Error running the kernel." ;
637721 device->device_lock .unlock ();
638- return NULL ;
722+ return false ;
639723 }
640724
641- error = clEnqueueReadBuffer (device->queue , device->arguments .out_memory [gpumgmt_thread->thread_id ], CL_FALSE, 0 , threads * 2 * mem_seed_count * ARGON2_BLOCK_SIZE , memory, 0 , NULL , NULL );
642- if (error != CL_SUCCESS) {
643- device->error = error;
644- device->error_message = " Error reading gpu memory." ;
645- device->device_lock .unlock ();
646- return NULL ;
647- }
648-
649- error=clFinish (device->queue );
725+ error = clEnqueueReadBuffer (device->queue , device->arguments .hash_memory [gpumgmt_thread->thread_id ], CL_FALSE, 0 , threads * ARGON2_RAW_LENGTH , memory, 0 , NULL , NULL );
726+ if (error != CL_SUCCESS) {
727+ device->error = error;
728+ device->error_message = " Error reading gpu memory." ;
729+ device->device_lock .unlock ();
730+ return false ;
731+ }
732+
733+ error=clFinish (device->queue );
650734 if (error != CL_SUCCESS) {
651735 device->error = error;
652736 device->error_message = " Error flushing GPU queue." ;
653737 device->device_lock .unlock ();
654- return NULL ;
738+ return false ;
655739 }
656740
657741 device->device_lock .unlock ();
658742
659- return memory ;
743+ return true ;
660744}
661745
662746void opencl_hasher::__run (opencl_device_info *device, int thread_id) {
663- void *memory = malloc (4 * ARGON2_BLOCK_SIZE * device->profile_info .threads );
747+ void *memory = malloc (IXIAN_SEED_SIZE * device->profile_info .threads );
664748
665749 opencl_gpumgmt_thread_data thread_data;
666750 thread_data.device = device;
667751 thread_data.thread_id = thread_id;
668752
669- argon2 hash_factory (NULL , opencl_kernel_filler, NULL , memory, &thread_data);
753+ argon2 hash_factory (opencl_kernel_prehasher , opencl_kernel_filler, opencl_kernel_posthasher , memory, &thread_data);
670754 hash_factory.set_lane_length (2 );
671755
672756 while (__running) {
@@ -721,12 +805,18 @@ void opencl_hasher::cleanup() {
721805 clReleaseMemObject ((*it)->arguments .memory_chunk_5 );
722806 clReleaseMemObject ((*it)->arguments .address );
723807 clReleaseMemObject ((*it)->arguments .segments );
724- clReleaseMemObject ((*it)->arguments .seed_memory [0 ]);
725- clReleaseMemObject ((*it)->arguments .seed_memory [1 ]);
726- clReleaseMemObject ((*it)->arguments .out_memory [0 ]);
727- clReleaseMemObject ((*it)->arguments .out_memory [1 ]);
728-
729- clReleaseKernel ((*it)->kernel );
808+ clReleaseMemObject ((*it)->arguments .preseed_memory [0 ]);
809+ clReleaseMemObject ((*it)->arguments .preseed_memory [1 ]);
810+ clReleaseMemObject ((*it)->arguments .seed_memory [0 ]);
811+ clReleaseMemObject ((*it)->arguments .seed_memory [1 ]);
812+ clReleaseMemObject ((*it)->arguments .out_memory [0 ]);
813+ clReleaseMemObject ((*it)->arguments .out_memory [1 ]);
814+ clReleaseMemObject ((*it)->arguments .hash_memory [0 ]);
815+ clReleaseMemObject ((*it)->arguments .hash_memory [1 ]);
816+
817+ clReleaseKernel ((*it)->kernel_prehash );
818+ clReleaseKernel ((*it)->kernel_fill_blocks );
819+ clReleaseKernel ((*it)->kernel_posthash );
730820 clReleaseProgram ((*it)->program );
731821 clReleaseCommandQueue ((*it)->queue );
732822 clReleaseContext ((*it)->context );
0 commit comments