File tree Expand file tree Collapse file tree 4 files changed +455
-0
lines changed Expand file tree Collapse file tree 4 files changed +455
-0
lines changed Original file line number Diff line number Diff line change @@ -81,6 +81,7 @@ set(SRC_BLAS
81
81
blas/functor/gcn_dgemmSmallMatrices.cc
82
82
blas/functor/gcn_sgemmSmallMatrices.cc
83
83
blas/functor/hawaii_sgemmBranchKernel.cc
84
+ blas/functor/hawaii_sgemmBig1024Kernel.cc
84
85
)
85
86
86
87
set (SRC_BLAS_HEADERS
@@ -114,6 +115,7 @@ set(SRC_BLAS_HEADERS
114
115
blas/functor/include/gcn_dgemmSmallMatrices.h
115
116
blas/functor/include/gcn_sgemmSmallMatrices.h
116
117
blas/functor/include/hawaii_sgemmBranchKernel.h
118
+ blas/functor/include/hawaii_sgemmBig1024Kernel.h
117
119
)
118
120
119
121
set (SRC_BLAS_GENERIC
Original file line number Diff line number Diff line change 27
27
#include " hawaii_sgemmSplit64_32.h"
28
28
#include " gcn_zgemm.h"
29
29
#include " gpu_dtrsm192.h"
30
+ #include " hawaii_sgemmBig1024Kernel.h"
30
31
31
32
FunctorSelectorHawaii FunctorSelectorHawaii::instance ;
32
33
@@ -116,6 +117,22 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
116
117
// if (functor)
117
118
// return functor;
118
119
120
+ if ((args.lda % 1024 == 0 ) && (args.ldb % 1024 == 0 ) && (args.K > args.lda / 4 ))
121
+ {
122
+ if ((args.lda == args.ldb ) && (args.lda >= 4096 ) && (args.lda <= 8192 )) // between 4096 and 8192 for now
123
+ {
124
+ if (args.lda != 6144 )// 6144 is handled by a special case split
125
+ {
126
+ if (args.M % 128 == 0 && args.N % 128 == 0 && args.K % 64 == 0 )
127
+ {
128
+ functor = clBlashawaiiSgemmBig1024KernelFunctor::provide (args, " Hawaii" );
129
+ if (functor)
130
+ return functor;
131
+ }
132
+ }
133
+ }
134
+ }
135
+
119
136
if ((args.M >= 1184 && args.N >= 1184 ) && (args.M <= 3872 && args.N <= 3872 ) && (args.M % 64 != 0 && args.N % 64 != 0 ) && (args.M % 96 != 0 && args.N % 96 != 0 ) && (args.K % 16 == 0 ))
120
137
{
121
138
// all the mod32 sizes that is not mod64 or mod96 ranging from 1184 to 3872
You can’t perform that action at this time.
0 commit comments