@@ -25,6 +25,13 @@ def makeOpenCLKernelString(kernel):
25
25
kStr += "/* %s */" % kernel .getName ()
26
26
kStr += endLine
27
27
28
+ ####################################
29
+ # Double precision pragma
30
+ prec = kernel .getName ()[0 ].lower ()
31
+ if prec == "d" or prec == "z" :
32
+ kStr += endLine
33
+ kStr += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" + endLine
34
+
28
35
####################################
29
36
# kernel parameters
30
37
kStr += endLine
@@ -168,14 +175,14 @@ def makeOpenCLKernelString(kernel):
168
175
kStr += endLine
169
176
kStr += "/* %dx%d micro-tile */%s" % (kernel .microTileNumRows , kernel .microTileNumCols , endLine )
170
177
kStr += "#define MICRO_TILE \\ \\ " + endLine
171
- for a in range (0 , kernel .microTileNumRows ):
178
+ for a in range (0 , int ( kernel .microTileNumRows ) ):
172
179
kStr += " rA[%d] = localA[offA + %d*WG_NUM_ROWS]; \\ \\ %s" % (a , a , endLine )
173
- for b in range (0 , kernel .microTileNumCols ):
180
+ for b in range (0 , int ( kernel .microTileNumCols ) ):
174
181
kStr += " rB[%d] = localB[offB + %d*WG_NUM_COLS]; \\ \\ %s" % (b , b , endLine )
175
182
kStr += " offA += (MACRO_TILE_NUM_ROWS+LOCAL_COL_PAD); \\ \\ " + endLine
176
183
kStr += " offB += (MACRO_TILE_NUM_COLS+LOCAL_ROW_PAD); \\ \\ " + endLine
177
- for a in range (0 , kernel .microTileNumRows ):
178
- for b in range (0 , kernel .microTileNumCols ):
184
+ for a in range (0 , int ( kernel .microTileNumRows ) ):
185
+ for b in range (0 , int ( kernel .microTileNumCols ) ):
179
186
kStr += " TYPE_MAD(rA[%d],rB[%d],rC[%d][%d]); \\ \\ %s" % (a , b , a , b , endLine )
180
187
kStr += " mem_fence(CLK_LOCAL_MEM_FENCE);" + endLine
181
188
kStr += endLine
@@ -365,7 +372,7 @@ def makeOpenCLKernelString(kernel):
365
372
zeroString = "(double2)(0.0, 0.0)"
366
373
else :
367
374
zeroString = "0.0"
368
- for a in range (0 , numALoads ):
375
+ for a in range (0 , int ( numALoads ) ):
369
376
kStr += " lA[ %d*localAStride ] = " % a
370
377
if kernel .isRowKernel ():
371
378
kStr += "( globalARow(%d) >= M) ? %s : " % ( a , zeroString )
@@ -378,7 +385,7 @@ def makeOpenCLKernelString(kernel):
378
385
kStr += "A[ GET_GLOBAL_INDEX_A( globalARow(%d), globalACol(%d) ) ];%s" % (numALoads , numALoads , endLine )
379
386
kStr += " }" + endLine
380
387
381
- for b in range (0 , numBLoads ):
388
+ for b in range (0 , int ( numBLoads ) ):
382
389
kStr += " lB[ %d*localBStride ] = " % b
383
390
if kernel .isColKernel ():
384
391
kStr += "( globalBCol(%d) >= N) ? %s : " % ( b , zeroString )
@@ -399,7 +406,7 @@ def makeOpenCLKernelString(kernel):
399
406
# do mads
400
407
kStr += endLine
401
408
kStr += " /* do mads */" + endLine
402
- for u in range (0 , kernel .unroll ):
409
+ for u in range (0 , int ( kernel .unroll ) ):
403
410
kStr += " MICRO_TILE" + endLine
404
411
405
412
####################################
@@ -437,8 +444,8 @@ def makeOpenCLKernelString(kernel):
437
444
if kernel .precision == "z" :
438
445
kStr += " double type_mad_tmp;" + endLine
439
446
440
- for a in range (0 , kernel .microTileNumRows ):
441
- for b in range (0 , kernel .microTileNumCols ):
447
+ for a in range (0 , int ( kernel .microTileNumRows ) ):
448
+ for b in range (0 , int ( kernel .microTileNumCols ) ):
442
449
if kernel .isRowKernel ():
443
450
kStr += " if (globalCRow+%d*WG_NUM_ROWS < M)" % a
444
451
if kernel .isColKernel ():
@@ -534,7 +541,7 @@ def writeOpenCLKernels():
534
541
cornerKernel .macroTileNumCols = 1
535
542
writeOpenCLKernelToFile (cornerKernel )
536
543
numKernels += 4
537
- print "AutoGemm.py: generated %d kernels" % numKernels
544
+ print ( "AutoGemm.py: generated %d kernels" % numKernels )
538
545
539
546
540
547
@@ -583,5 +590,4 @@ def writeOpenCLKernels():
583
590
584
591
kernelName = kernel .getName ()
585
592
kernelFileName = Common .getKernelSourcePath () + kernelName + "_src.cpp"
586
- print "kernel \" %s\" written to %s" % (kernelName , kernelFileName )
587
-
593
+ print ("kernel \" %s\" written to %s" % (kernelName , kernelFileName ))
0 commit comments