|
24 | 24 | #include "gcn_dgemmSmallMatrices.h"
|
25 | 25 | #include "gcn_sgemmSmallMatrices.h"
|
26 | 26 | #include "hawaii_sgemmBranchKernel.h"
|
| 27 | +#include "hawaii_sgemmSplit64_32.h" |
27 | 28 |
|
28 | 29 | FunctorSelectorHawaii FunctorSelectorHawaii::instance ;
|
29 | 30 |
|
@@ -106,8 +107,24 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
|
106 | 107 | SmallMatricesMod32 = SmallMatricesMod32&&Not_TT&&args.K % 16 == 0;
|
107 | 108 | //SmallMatrices= false;
|
108 | 109 |
|
109 |
| - bool useSpliKernel=((args.M%96==0 && args.N%96==0) || !(args.M%64==0 && args.N%64==0&& args.M<4000 &&args.N<4000)) /*&&args.K%16==0*/; |
| 110 | + bool useSpliKernel=((args.M%96==0 && args.N%96==0) || !(args.M%64==0 && args.N%64==0&& args.M<4000 &&args.N<4000)) ; |
110 | 111 | useSpliKernel=useSpliKernel&&Not_TT;
|
| 112 | + |
| 113 | + //functor = clBlashawaiiSgemmSplit64_32Functor::provide(args, "Hawaii"); |
| 114 | + //if (functor) |
| 115 | + // return functor; |
| 116 | + |
| 117 | + if ((args.M >= 1184 && args.N >= 1184) && (args.M <= 3872 && args.N <= 3872) && (args.M % 64 != 0 && args.N % 64 != 0) && (args.M % 96 != 0 && args.N % 96 != 0) && (args.K % 16 == 0)) |
| 118 | + { |
| 119 | + //all the mod32 sizes that is not mod64 or mod96 ranging from 1184 to 3872 |
| 120 | + //non mod32 cases are not implemented in this approach and are of less interest |
| 121 | + if ((args.M % 32 == 0 && args.N % 32 == 0) && (args.transA == clblasNoTrans && args.transB == clblasTrans)) |
| 122 | + { |
| 123 | + functor = clBlashawaiiSgemmSplit64_32Functor::provide(args, "Hawaii"); |
| 124 | + if (functor) |
| 125 | + return functor; |
| 126 | + } |
| 127 | + } |
111 | 128 |
|
112 | 129 | //the English translation of below is: if small matrix that is (not mod32) and (not_TT) and K has to be mod 16
|
113 | 130 | if (SmallMatrices && (!SmallMatricesMod32) && (Not_TT) && (args.K%16 == 0))
|
@@ -141,6 +158,7 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
|
141 | 158 |
|
142 | 159 | // else use the fallback implementation
|
143 | 160 | return this->clblasFunctorSelector::select_sgemm_specific(args);
|
| 161 | + |
144 | 162 | #endif
|
145 | 163 | }
|
146 | 164 |
|
|
0 commit comments