Skip to content

Commit 4447bfe

Browse files
author
Timmy
committed
Merge pull request #100 from TimmyLiu/develop
better handle sgemm NT where M and N are mod32 and not mod64. M and N…
2 parents 6d1e3c4 + f4af838 commit 4447bfe

File tree

7 files changed

+1026
-2
lines changed

7 files changed

+1026
-2
lines changed

src/library/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ set(SRC_BLAS
7373
blas/functor/hawaii_dgemmChannelConflict.cc
7474
blas/functor/hawaii_dgemmSplitKernel.cc
7575
blas/functor/hawaii_sgemmSplitKernel.cc
76+
blas/functor/hawaii_sgemmSplit64_32.cc
7677
blas/functor/gcn_dgemmCommon.cc
7778
blas/functor/gcn_sgemm.cc
7879
blas/functor/gcn_dgemmSmallMatrices.cc
@@ -103,6 +104,7 @@ set(SRC_BLAS_HEADERS
103104
blas/functor/include/hawaii_dgemmChannelConflict.h
104105
blas/functor/include/hawaii_dgemmSplitKernel.h
105106
blas/functor/include/hawaii_sgemmSplitKernel.h
107+
blas/functor/include/hawaii_sgemmSplit64_32.h
106108
blas/functor/include/gcn_dgemmCommon.h
107109
blas/functor/include/gcn_sgemm.h
108110
blas/functor/include/gcn_dgemmSmallMatrices.h
@@ -248,6 +250,7 @@ set(SRC_CL_TEMPLATES_GEN
248250
sgemm_gcn.clHawaii_64.bin.cl
249251
sgemm_gcn.clBonaire_64.bin.cl
250252
sgemm_gcn.clTahiti_64.bin.cl
253+
sgemm_hawaiiSplit64_32.clHawaii_64.bin.cl
251254
)
252255

253256
set(SRC_BLAS_GENERIC_HEADERS

src/library/bingen.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ ${CLTEMPLATE_PATH}/dgemm_hawaiiSplitKernel.cl
1414
${CLTEMPLATE_PATH}/sgemm_hawaiiSplitKernel.cl
1515
${CLTEMPLATE_PATH}/sgemm_gcn.cl
1616
${CLTEMPLATE_PATH}/sgemm_gcn_SmallMatrices.cl
17+
${CLTEMPLATE_PATH}/sgemm_hawaiiSplit64_32.cl
1718
)
1819

1920

src/library/blas/functor/hawaii.cc

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "gcn_dgemmSmallMatrices.h"
2525
#include "gcn_sgemmSmallMatrices.h"
2626
#include "hawaii_sgemmBranchKernel.h"
27+
#include "hawaii_sgemmSplit64_32.h"
2728

2829
FunctorSelectorHawaii FunctorSelectorHawaii::instance ;
2930

@@ -106,8 +107,24 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
106107
SmallMatricesMod32 = SmallMatricesMod32&&Not_TT&&args.K % 16 == 0;
107108
//SmallMatrices= false;
108109

109-
bool useSpliKernel=((args.M%96==0 && args.N%96==0) || !(args.M%64==0 && args.N%64==0&& args.M<4000 &&args.N<4000)) /*&&args.K%16==0*/;
110+
bool useSpliKernel=((args.M%96==0 && args.N%96==0) || !(args.M%64==0 && args.N%64==0&& args.M<4000 &&args.N<4000)) ;
110111
useSpliKernel=useSpliKernel&&Not_TT;
112+
113+
//functor = clBlashawaiiSgemmSplit64_32Functor::provide(args, "Hawaii");
114+
//if (functor)
115+
// return functor;
116+
117+
if ((args.M >= 1184 && args.N >= 1184) && (args.M <= 3872 && args.N <= 3872) && (args.M % 64 != 0 && args.N % 64 != 0) && (args.M % 96 != 0 && args.N % 96 != 0) && (args.K % 16 == 0))
118+
{
119+
//all the mod32 sizes that is not mod64 or mod96 ranging from 1184 to 3872
120+
//non mod32 cases are not implemented in this approach and are of less interest
121+
if ((args.M % 32 == 0 && args.N % 32 == 0) && (args.transA == clblasNoTrans && args.transB == clblasTrans))
122+
{
123+
functor = clBlashawaiiSgemmSplit64_32Functor::provide(args, "Hawaii");
124+
if (functor)
125+
return functor;
126+
}
127+
}
111128

112129
//the English translation of below is: if small matrix that is (not mod32) and (not_TT) and K has to be mod 16
113130
if (SmallMatrices && (!SmallMatricesMod32) && (Not_TT) && (args.K%16 == 0))
@@ -141,6 +158,7 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
141158

142159
// else use the fallback implementation
143160
return this->clblasFunctorSelector::select_sgemm_specific(args);
161+
144162
#endif
145163
}
146164

0 commit comments

Comments
 (0)