File tree Expand file tree Collapse file tree 4 files changed +455
-0
lines changed
Expand file tree Collapse file tree 4 files changed +455
-0
lines changed Original file line number Diff line number Diff line change @@ -81,6 +81,7 @@ set(SRC_BLAS
8181 blas/functor/gcn_dgemmSmallMatrices.cc
8282 blas/functor/gcn_sgemmSmallMatrices.cc
8383 blas/functor/hawaii_sgemmBranchKernel.cc
84+ blas/functor/hawaii_sgemmBig1024Kernel.cc
8485)
8586
8687set (SRC_BLAS_HEADERS
@@ -114,6 +115,7 @@ set(SRC_BLAS_HEADERS
114115 blas/functor/include /gcn_dgemmSmallMatrices.h
115116 blas/functor/include /gcn_sgemmSmallMatrices.h
116117 blas/functor/include /hawaii_sgemmBranchKernel.h
118+ blas/functor/include /hawaii_sgemmBig1024Kernel.h
117119)
118120
119121set (SRC_BLAS_GENERIC
Original file line number Diff line number Diff line change 2727#include " hawaii_sgemmSplit64_32.h"
2828#include " gcn_zgemm.h"
2929#include " gpu_dtrsm192.h"
30+ #include " hawaii_sgemmBig1024Kernel.h"
3031
3132FunctorSelectorHawaii FunctorSelectorHawaii::instance ;
3233
@@ -116,6 +117,22 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
116117 // if (functor)
117118 // return functor;
118119
120+ if ((args.lda % 1024 == 0 ) && (args.ldb % 1024 == 0 ) && (args.K > args.lda / 4 ))
121+ {
122+ if ((args.lda == args.ldb ) && (args.lda >= 4096 ) && (args.lda <= 8192 )) // between 4096 and 8192 for now
123+ {
124+ if (args.lda != 6144 )// 6144 is handled by a special case split
125+ {
126+ if (args.M % 128 == 0 && args.N % 128 == 0 && args.K % 64 == 0 )
127+ {
128+ functor = clBlashawaiiSgemmBig1024KernelFunctor::provide (args, " Hawaii" );
129+ if (functor)
130+ return functor;
131+ }
132+ }
133+ }
134+ }
135+
119136 if ((args.M >= 1184 && args.N >= 1184 ) && (args.M <= 3872 && args.N <= 3872 ) && (args.M % 64 != 0 && args.N % 64 != 0 ) && (args.M % 96 != 0 && args.N % 96 != 0 ) && (args.K % 16 == 0 ))
120137 {
121138 // all the mod32 sizes that is not mod64 or mod96 ranging from 1184 to 3872
You can’t perform that action at this time.
0 commit comments