@@ -158,7 +158,7 @@ bool operator<(const kernel_map_key & l, const kernel_map_key & r) {
158
158
*****************************************************************************/
159
159
// FIXME: This function should be returning an error.
160
160
void makeGemmKernel (
161
- cl_kernel *clKernel, // ignored as input; returns as output
161
+ cl_kernel *clKernel, // ignored as input; returns as output only
162
162
cl_command_queue clQueue,
163
163
const char *kernelSource,
164
164
const char *sourceBuildOptions,
@@ -461,10 +461,10 @@ clblasGemm(
461
461
size_t *colKernelBinarySize = 0 ;
462
462
size_t *cornerKernelBinarySize = 0 ;
463
463
const char *binaryBuildOptions = NULL ;
464
- cl_kernel *tileClKernel = NULL ;
465
- cl_kernel *rowClKernel = NULL ;
466
- cl_kernel *colClKernel = NULL ;
467
- cl_kernel *cornerClKernel = NULL ;
464
+ cl_kernel *tileClKernelDummy = NULL ; // no longer used; broke thread safety
465
+ cl_kernel *rowClKernelDummy = NULL ; // no longer used; broke thread safety
466
+ cl_kernel *colClKernelDummy = NULL ; // no longer used; broke thread safety
467
+ cl_kernel *cornerClKernelDummy = NULL ; // no longer used; broke thread safety
468
468
unsigned int workGroupNumRows;
469
469
unsigned int workGroupNumCols;
470
470
unsigned int microTileNumRows;
@@ -489,10 +489,10 @@ clblasGemm(
489
489
&colKernelBinarySize,
490
490
&cornerKernelBinarySize,
491
491
&binaryBuildOptions,
492
- &tileClKernel ,
493
- &rowClKernel ,
494
- &colClKernel ,
495
- &cornerClKernel ,
492
+ &tileClKernelDummy ,
493
+ &rowClKernelDummy ,
494
+ &colClKernelDummy ,
495
+ &cornerClKernelDummy ,
496
496
&workGroupNumRows,
497
497
&workGroupNumCols,
498
498
µTileNumRows,
@@ -530,10 +530,10 @@ clblasGemm(
530
530
&colKernelBinarySize,
531
531
&cornerKernelBinarySize,
532
532
&binaryBuildOptions,
533
- &tileClKernel ,
534
- &rowClKernel ,
535
- &colClKernel ,
536
- &cornerClKernel ,
533
+ &tileClKernelDummy ,
534
+ &rowClKernelDummy ,
535
+ &colClKernelDummy ,
536
+ &cornerClKernelDummy ,
537
537
&workGroupNumRows,
538
538
&workGroupNumCols,
539
539
µTileNumRows,
@@ -567,14 +567,15 @@ clblasGemm(
567
567
* Build kernels
568
568
*****************************************************************************/
569
569
570
- tileClKernel = NULL ;
571
- rowClKernel = NULL ;
572
- colClKernel = NULL ;
573
- cornerClKernel = NULL ;
574
- if (needTileKernel) makeGemmKernel ( tileClKernel, commandQueues[0 ], tileKernelSource, sourceBuildOptions, &tileKernelBinary, tileKernelBinarySize, binaryBuildOptions);
575
- if (needRowKernel) makeGemmKernel ( rowClKernel, commandQueues[0 ], rowKernelSource, sourceBuildOptions, &rowKernelBinary, rowKernelBinarySize, binaryBuildOptions);
576
- if (needColKernel) makeGemmKernel ( colClKernel, commandQueues[0 ], colKernelSource, sourceBuildOptions, &colKernelBinary, colKernelBinarySize, binaryBuildOptions);
577
- if (needCornerKernel) makeGemmKernel (cornerClKernel, commandQueues[0 ], cornerKernelSource, sourceBuildOptions, &cornerKernelBinary, cornerKernelBinarySize, binaryBuildOptions);
570
+
571
+ cl_kernel tileClKernel = NULL ;
572
+ cl_kernel rowClKernel = NULL ;
573
+ cl_kernel colClKernel = NULL ;
574
+ cl_kernel cornerClKernel = NULL ;
575
+ if (needTileKernel) makeGemmKernel ( &tileClKernel, commandQueues[0 ], tileKernelSource, sourceBuildOptions, &tileKernelBinary, tileKernelBinarySize, binaryBuildOptions);
576
+ if (needRowKernel) makeGemmKernel ( &rowClKernel, commandQueues[0 ], rowKernelSource, sourceBuildOptions, &rowKernelBinary, rowKernelBinarySize, binaryBuildOptions);
577
+ if (needColKernel) makeGemmKernel ( &colClKernel, commandQueues[0 ], colKernelSource, sourceBuildOptions, &colKernelBinary, colKernelBinarySize, binaryBuildOptions);
578
+ if (needCornerKernel) makeGemmKernel (&cornerClKernel, commandQueues[0 ], cornerKernelSource, sourceBuildOptions, &cornerKernelBinary, cornerKernelBinarySize, binaryBuildOptions);
578
579
const size_t localWorkSize[2 ] = { workGroupNumRows, workGroupNumCols };
579
580
unsigned int numKernelsEnqueued = 0 ;
580
581
@@ -603,7 +604,7 @@ clblasGemm(
603
604
if (needTileKernel) {
604
605
// printf("enqueueing tile kernel\n");
605
606
size_t globalWorkSize[2 ] = {(M/macroTileNumRows)*workGroupNumRows, (N/macroTileNumCols)*workGroupNumCols };
606
- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * tileClKernel,
607
+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], tileClKernel,
607
608
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
608
609
globalWorkSize, localWorkSize,
609
610
numEventsInWaitList, eventWaitList,
@@ -618,7 +619,7 @@ clblasGemm(
618
619
if (needRowKernel) {
619
620
// printf("enqueueing row kernel\n");
620
621
size_t globalWorkSize[2 ] = {1 *workGroupNumRows, (N/macroTileNumCols)*workGroupNumCols };
621
- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * rowClKernel,
622
+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], rowClKernel,
622
623
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
623
624
globalWorkSize, localWorkSize,
624
625
numEventsInWaitList, eventWaitList,
@@ -633,7 +634,7 @@ clblasGemm(
633
634
if (needColKernel) {
634
635
// printf("enqueueing col kernel\n");
635
636
size_t globalWorkSize[2 ] = { (M/macroTileNumRows)*workGroupNumRows, 1 *workGroupNumCols };
636
- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * colClKernel,
637
+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], colClKernel,
637
638
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
638
639
globalWorkSize, localWorkSize,
639
640
numEventsInWaitList, eventWaitList,
@@ -648,7 +649,7 @@ clblasGemm(
648
649
if (needCornerKernel) {
649
650
// printf("enqueueing corner kernel\n");
650
651
size_t globalWorkSize[2 ] = { 1 *workGroupNumRows, 1 *workGroupNumCols };
651
- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * cornerClKernel,
652
+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], cornerClKernel,
652
653
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
653
654
globalWorkSize, localWorkSize,
654
655
numEventsInWaitList, eventWaitList,
0 commit comments