@@ -162,12 +162,6 @@ def makeOpenCLKernelString(kernel):
162
162
" REG.s0 = mad( -ALPHA.s1, REG.s1, REG.s0 ); \\ \\ " + endLine +
163
163
" REG.s1 *= ALPHA.s0; \\ \\ " + endLine +
164
164
" REG.s1 = mad( ALPHA.s1, type_mad_tmp, REG.s1 ); \\ \\ " + endLine +
165
- " /* (2) */ \\ \\ " + endLine +
166
- " REG.s0 = mad( BETA.s0, DST.s0, REG.s0 ); \\ \\ " + endLine +
167
- " REG.s0 = mad( -BETA.s1, DST.s1, REG.s0 ); \\ \\ " + endLine +
168
- " REG.s1 = mad( BETA.s1, DST.s0, REG.s1 ); \\ \\ " + endLine +
169
- " REG.s1 = mad( BETA.s0, DST.s1, REG.s1 ); \\ \\ " + endLine +
170
- " /* (3) */ \\ \\ " + endLine +
171
165
" DST = REG;" + endLine )
172
166
173
167
####################################
@@ -357,11 +351,11 @@ def makeOpenCLKernelString(kernel):
357
351
kStr += endLine
358
352
kStr += " /* load global -> local */" + endLine
359
353
numALoads = (kernel .workGroupNumRows * kernel .microTileNumRows * kernel .unroll ) \
360
- / (kernel .workGroupNumRows * kernel .workGroupNumCols )
354
+ // (kernel .workGroupNumRows * kernel .workGroupNumCols ) # // -- integer divide
361
355
numALoadsR = (kernel .workGroupNumRows * kernel .microTileNumRows * kernel .unroll ) \
362
356
% (kernel .workGroupNumRows * kernel .workGroupNumCols )
363
357
numBLoads = (kernel .workGroupNumCols * kernel .microTileNumCols * kernel .unroll ) \
364
- / (kernel .workGroupNumRows * kernel .workGroupNumCols )
358
+ // (kernel .workGroupNumRows * kernel .workGroupNumCols ) # // - integer divide
365
359
numBLoadsR = (kernel .workGroupNumCols * kernel .microTileNumCols * kernel .unroll ) \
366
360
% (kernel .workGroupNumRows * kernel .workGroupNumCols )
367
361
0 commit comments