Skip to content

Commit 961e754

Browse files
committed
mkl split gemm for better perf
1 parent 4a07617 commit 961e754

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ else()
136136
set(THIRD_PARTY_BUILD_TYPE Release)
137137
endif()
138138

139+
if(WITH_MKL)
140+
option(MKL_SPLIT_GEMM "PaddlePaddle MKL gemm would split to small ones" OFF)
141+
if (MKL_SPLIT_GEMM)
142+
add_definitions(-DPADDLE_MKL_SPLIT_GEMM)
143+
endif()
144+
endif()
139145
set(WITH_MKLML ${WITH_MKL})
140146
if (NOT DEFINED WITH_MKLDNN)
141147
if (WITH_MKL AND AVX2_FOUND)

paddle/fluid/operators/math/blas_impl.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,23 @@ void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA,
209209
&beta, C, &ldc);
210210
} else {
211211
#endif
212-
CBlas<T>::GEMM(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B,
213-
ldb, beta, C, ldc);
212+
213+
#ifdef PADDLE_MKL_SPLIT_GEMM
214+
constexpr int bs = 2;
215+
if (M % bs == 0 && transA == CblasNoTrans && transB == CblasNoTrans) {
216+
for (int off = 0; off < M; off += bs) {
217+
CBlas<T>::GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, off, N, K,
218+
alpha, A + off * lda, lda, B, ldb, beta, C + off * ldb,
219+
ldc);
220+
}
221+
} else {
222+
#endif
223+
CBlas<T>::GEMM(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B,
224+
ldb, beta, C, ldc);
225+
#ifdef PADDLE_MKL_SPLIT_GEMM
226+
}
227+
#endif
228+
214229
#ifdef PADDLE_WITH_LIBXSMM
215230
}
216231
#endif

0 commit comments

Comments
 (0)