Skip to content

Commit e7e01ad

Browse files
committed
AutoGemm performance data; sgemm add unroll=8 for benchmarking; gemm compile kernel prints build log
1 parent 4b34283 commit e7e01ad

13 files changed

+5797
-11
lines changed

doc/performance/clBLAS_2.6.0/S9150/cgemmNT_S9150_14.50.2_2.6.0_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.6.0/S9150/dgemmNT_S9150_14.50.2_2.6.0_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.6.0/S9150/sgemmNT_S9150_14.50.2_2.6.0_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.6.0/S9150/zgemmNT_S9150_14.50.2_2.6.0_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.7.1/S9150/cgemmNT_S9150_14.50.2_2.7.1_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.7.1/S9150/dgemmNT_S9150_14.50.2_2.7.1_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.7.1/S9150/sgemmNT_S9150_14.50.2_2.7.1_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.7.1/S9150/zgemmNT_S9150_14.50.2_2.7.1_8.csv

Lines changed: 721 additions & 0 deletions
Large diffs are not rendered by default.

src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ if( NOT DEFINED clBLAS_VERSION_MINOR )
112112
endif( )
113113

114114
if( NOT DEFINED clBLAS_VERSION_PATCH )
115-
set( clBLAS_VERSION_PATCH 0 )
115+
set( clBLAS_VERSION_PATCH 1 )
116116
endif( )
117117

118118
set( clBLAS_VERSION "${clBLAS_VERSION_MAJOR}.${clBLAS_VERSION_MINOR}.${clBLAS_VERSION_PATCH}")

src/library/blas/AutoGemm/AutoGemmParameters.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99
# [ size, fallback tile, [ valid tiles ] ],
1010
"s":[
1111
[ 4000, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
12-
[ 2496, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4] ] ],
12+
[ 2496, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4] ] ],
1313
[ 2448, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
1414
[ 1600, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5] ] ],
1515
[ 1008, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
16-
[ 960, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
17-
[ 896, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
18-
[ 864, [ 16, 16, 2, 2], [ [ 16, 16, 6, 6], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2] ] ],
19-
[ 784, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
20-
[ 768, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
16+
[ 960, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
17+
[ 896, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
18+
[ 864, [ 16, 16, 2, 2], [ [ 16, 16, 6, 6], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2] ] ],
19+
[ 784, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
20+
[ 768, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
2121
[ 720, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 3, 3] ] ],
2222
[ 464, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ],
2323
[ 304, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
@@ -76,7 +76,7 @@
7676
transposes = { "s":["N", "T"], "d":["N", "T"], \
7777
"c":["N", "T", "C"], "z":["N", "T", "C"] }
7878

79-
unrolls = { "s":[16, 1], "d":[8, 1], "c":[8, 1], "z":[8, 1] }
79+
unrolls = { "s":[16, 8, 1], "d":[8, 1], "c":[8, 1], "z":[8, 1] }
8080

8181
betas = [ 0, 1 ]
8282

0 commit comments

Comments
 (0)