Skip to content

Commit 828aff1

Browse files
committed
Merge pull request #112 from TimmyLiu/develop
add codepath for dtrsm when M and N are mod192
2 parents f9a2250 + f7c6536 commit 828aff1

File tree

6 files changed

+1680
-1
lines changed

6 files changed

+1680
-1
lines changed

src/library/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ set(SRC_BLAS
6969
blas/functor/bonaire.cc
7070
blas/functor/gcn_dgemm.cc
7171
blas/functor/gpu_dtrsm.cc
72+
blas/functor/gpu_dtrsm192.cc
7273
blas/functor/functor_fill.cc
7374
blas/functor/hawaii_dgemmChannelConflict.cc
7475
blas/functor/hawaii_dgemmSplitKernel.cc
@@ -101,6 +102,7 @@ set(SRC_BLAS_HEADERS
101102
blas/functor/include/bonaire.h
102103
blas/functor/include/gcn_dgemm.h
103104
blas/functor/include/gpu_dtrsm.h
105+
blas/functor/include/gpu_dtrsm192.h
104106
blas/functor/include/BinaryBuild.h
105107
blas/functor/include/hawaii_dgemmChannelConflict.h
106108
blas/functor/include/hawaii_dgemmSplitKernel.h
@@ -229,7 +231,7 @@ set (SRC_CL_TEMPLATES
229231
dgemm_hawaiiSplitKernel.cl
230232
sgemm_hawaiiSplitKernel.cl
231233
dtrsm_gpu.cl
232-
234+
dtrsm_gpu192.cl
233235
dgemm_gcn_SmallMatrices.cl
234236
sgemm_gcn_SmallMatrices.cl
235237
sgemm_gcn.cl
@@ -239,6 +241,7 @@ set (SRC_CL_TEMPLATES
239241
set(SRC_CL_TEMPLATES_GEN
240242
dgemm_hawai.clHawaii_64.bin.cl
241243
dtrsm_gpu.clHawaii_64.bin.cl
244+
dtrsm_gpu192.clHawaii_64.bin.cl
242245
dgemm_hawaiiChannelConfilct.clHawaii_64.bin.cl
243246
dgemm_hawaiiSplitKernel.clHawaii_64.bin.cl
244247
sgemm_hawaiiSplitKernel.clHawaii_64.bin.cl

src/library/bingen.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ ${CLTEMPLATE_PATH}/sgemm_gcn.cl
1616
${CLTEMPLATE_PATH}/zgemm_gcn.cl
1717
${CLTEMPLATE_PATH}/sgemm_gcn_SmallMatrices.cl
1818
${CLTEMPLATE_PATH}/sgemm_hawaiiSplit64_32.cl
19+
${CLTEMPLATE_PATH}/dtrsm_gpu192.cl
1920
)
2021

2122

0 commit comments

Comments
 (0)