Skip to content

Commit c355d02

Browse files
committed
Merge pull request #202 from arrayfire/arrayfire-release-test
Fixing issue with beta == 0 in AutoGemm kernels
2 parents 590b47d + c41cc5d commit c355d02

File tree

3 files changed

+7
-13
lines changed

3 files changed

+7
-13
lines changed

src/library/blas/AutoGemm/KernelOpenCL.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,6 @@ def makeOpenCLKernelString(kernel):
162162
" REG.s0 = mad( -ALPHA.s1, REG.s1, REG.s0 ); \\\\" + endLine +
163163
" REG.s1 *= ALPHA.s0; \\\\" + endLine +
164164
" REG.s1 = mad( ALPHA.s1, type_mad_tmp, REG.s1 ); \\\\" + endLine +
165-
" /* (2) */ \\\\" + endLine +
166-
" REG.s0 = mad( BETA.s0, DST.s0, REG.s0 ); \\\\" + endLine +
167-
" REG.s0 = mad( -BETA.s1, DST.s1, REG.s0 ); \\\\" + endLine +
168-
" REG.s1 = mad( BETA.s1, DST.s0, REG.s1 ); \\\\" + endLine +
169-
" REG.s1 = mad( BETA.s0, DST.s1, REG.s1 ); \\\\" + endLine +
170-
" /* (3) */ \\\\" + endLine +
171165
" DST = REG;" + endLine )
172166

173167
####################################
@@ -357,11 +351,11 @@ def makeOpenCLKernelString(kernel):
357351
kStr += endLine
358352
kStr += " /* load global -> local */" + endLine
359353
numALoads = (kernel.workGroupNumRows*kernel.microTileNumRows*kernel.unroll) \
360-
/ (kernel.workGroupNumRows*kernel.workGroupNumCols)
354+
// (kernel.workGroupNumRows*kernel.workGroupNumCols) # // -- integer divide
361355
numALoadsR = (kernel.workGroupNumRows*kernel.microTileNumRows*kernel.unroll) \
362356
% (kernel.workGroupNumRows*kernel.workGroupNumCols)
363357
numBLoads = (kernel.workGroupNumCols*kernel.microTileNumCols*kernel.unroll) \
364-
/ (kernel.workGroupNumRows*kernel.workGroupNumCols)
358+
// (kernel.workGroupNumRows*kernel.workGroupNumCols) # // - integer divide
365359
numBLoadsR = (kernel.workGroupNumCols*kernel.microTileNumCols*kernel.unroll) \
366360
% (kernel.workGroupNumRows*kernel.workGroupNumCols)
367361

src/library/blas/AutoGemm/KernelParameters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,11 @@ def isValid(self):
8989
return True
9090
"""
9191
numALoads = (self.workGroupNumRows*self.microTileNumRows*self.unroll) \
92-
/ (self.workGroupNumRows*self.workGroupNumCols)
92+
// (self.workGroupNumRows*self.workGroupNumCols)
9393
numALoadsR = (self.workGroupNumRows*self.microTileNumRows*self.unroll) \
9494
% (self.workGroupNumRows*self.workGroupNumCols)
9595
numBLoads = (self.workGroupNumCols*self.microTileNumCols*self.unroll) \
96-
/ (self.workGroupNumRows*self.workGroupNumCols)
96+
// (self.workGroupNumRows*self.workGroupNumCols)
9797
numBLoadsR = (self.workGroupNumCols*self.microTileNumCols*self.unroll) \
9898
% (self.workGroupNumRows*self.workGroupNumCols)
9999
if (numALoads>0 and numALoadsR>0):

src/library/blas/trtri/TrtriKernelSourceIncludes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66

77
//**** compiler flags
88
//**** online compilation flags
9-
const char * const TrtriBuildOptions = "-cl-std=CL2.0";
10-
const char * const TrtribinBuildOptions = "-cl-std=CL2.0";
9+
const char * const TrtriBuildOptions = "-cl-std=CL" OPENCL_VERSION;
10+
const char * const TrtribinBuildOptions = "-cl-std=CL" OPENCL_VERSION;
1111

1212
/*mod 192 dtrsm*/
13-
extern const char * const diag_dtrtri_upper_192_12_src;
13+
extern const char * const diag_dtrtri_upper_192_12_src;
1414
extern unsigned char *diag_dtrtri_upper_192_12_bin;
1515
extern size_t diag_dtrtri_upper_192_12_binSize;
1616

0 commit comments

Comments
 (0)