Skip to content

Commit 99b3931

Browse files
committed
adding AutoGemm kernel selection logic for Fiji
1 parent 57e1e65 commit 99b3931

File tree

4 files changed

+62
-16
lines changed

4 files changed

+62
-16
lines changed

src/library/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ option( PRECOMPILE_GEMM_TRANS_CN "AutoGemm: pre-compile CN transpose cases" OFF)
9191
option( PRECOMPILE_GEMM_TRANS_CT "AutoGemm: pre-compile CT transpose cases" OFF)
9292
option( PRECOMPILE_GEMM_TRANS_CC "AutoGemm: pre-compile CC transpose cases" OFF)
9393

94+
set( AUTOGEMM_ARCHITECTURE "Hawaii" CACHE STRING "AutoGemm: device for kernel selection logic" )
95+
set_property( CACHE AUTOGEMM_ARCHITECTURE PROPERTY STRINGS "Hawaii" "Fiji" )
96+
97+
9498

9599
# opencl compiler version
96100
#set( PRECOMPILE_GEMM_OPENCL_VERSION "2.0" CACHE STRING "OpenCL compiler version supported by device driver." )
@@ -239,7 +243,7 @@ endif()#endif precompile active
239243
################################################################################
240244
add_custom_command(
241245
OUTPUT ${AUTOGEMM_HEADERS} ${AUTOGEMM_SRC}
242-
COMMAND python ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/AutoGemm.py --output-path ${CMAKE_BINARY_DIR}/include --opencl-compiler-version ${OPENCL_VERSION}
246+
COMMAND python ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/AutoGemm.py --output-path ${CMAKE_BINARY_DIR}/include --opencl-compiler-version ${OPENCL_VERSION} --architecture ${AUTOGEMM_ARCHITECTURE}
243247
DEPENDS ${AUTOGEMM_SCRIPTS}
244248
)
245249

src/library/blas/AutoGemm/AutoGemm.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import Common
2222
import Includes
2323
import KernelSelection
24+
import AutoGemmParameters
2425
import KernelOpenCL
2526

2627

@@ -32,6 +33,7 @@
3233
ap = argparse.ArgumentParser(description="AutoGemm")
3334
ap.add_argument("--output-path", dest="output" )
3435
ap.add_argument("--opencl-compiler-version", dest="clCompilerVersion", action="store", choices=["1.1", "1.2", "2.0" ])
36+
ap.add_argument("--architecture", dest="architecture", action="store", choices=["Hawaii", "Fiji" ])
3537
args = ap.parse_args()
3638
if args.output:
3739
Common.setOutputPath(args.output)
@@ -40,6 +42,7 @@
4042

4143
print "AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler"
4244
Common.setClCompilerVersion(args.clCompilerVersion)
45+
AutoGemmParameters.setArchitecture(args.architecture)
4346

4447
KernelOpenCL.writeOpenCLKernels()
4548
KernelSelection.writeKernelSelection()

src/library/blas/AutoGemm/AutoGemmParameters.py

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Tile Parameters for Kernel Selection Data
66
################################################################################
77

8-
kernelSelectionData = {
8+
kernelSelectionDataHawaii = {
99
# [ size, fallback tile, [ valid tiles ] ],
1010
"s":[
1111
[ 4000, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
@@ -54,17 +54,55 @@
5454
],
5555
}
5656

57-
"""
58-
for testing all micro-tile sizes
59-
[ 128, [ 16, 16, 8, 8], [ [ 16, 16, 8, 8] ] ],
60-
[ 112, [ 16, 16, 7, 7], [ [ 16, 16, 7, 7] ] ],
61-
[ 96, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
62-
[ 80, [ 16, 16, 5, 5], [ [ 16, 16, 5, 5] ] ],
63-
[ 64, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4] ] ],
64-
[ 48, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3] ] ],
65-
[ 32, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2] ] ],
66-
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
67-
"""
57+
kernelSelectionDataFiji = {
58+
"s":[
59+
[ 3072, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 4, 4] ] ],
60+
[ 2240, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
61+
[ 1760, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
62+
[ 1600, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
63+
[ 1056, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
64+
[ 960, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
65+
[ 736, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
66+
[ 528, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
67+
[ 432, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
68+
[ 320, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
69+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
70+
],
71+
"d":[
72+
[ 3200, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 2, 2], [ 8, 8, 6, 6 ] ] ],
73+
[ 1632, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ] ] ],
74+
[ 1280, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ], [ 16, 16, 1, 1] ] ],
75+
[ 1056, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
76+
[ 672, [ 16, 16, 2, 2], [ [ 16, 16, 1, 1] ] ],
77+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
78+
],
79+
"c":[
80+
[ 2240, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], ] ],
81+
[ 1440, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
82+
[ 1088, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ],
83+
[ 704, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 5, 5] ] ],
84+
[ 528, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
85+
[ 336, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
86+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
87+
],
88+
"z":[
89+
[ 2528, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 3, 3] ] ],
90+
[ 1872, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
91+
[ 1040, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
92+
[ 768, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
93+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
94+
]
95+
}
96+
97+
kernelSelectionData = kernelSelectionDataHawaii
98+
def setArchitecture(architecture):
99+
global kernelSelectionData, kernelSelectionDataHawaii, kernelSelectionDataFiji
100+
101+
if architecture == "Fiji":
102+
kernelSelectionData = kernelSelectionDataFiji
103+
else:
104+
kernelSelectionData = kernelSelectionDataHawaii
105+
68106

69107
################################################################################
70108
# Non-Tile Parameters

src/library/blas/AutoGemm/KernelSelection.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -675,9 +675,10 @@ def writeKernelSelection():
675675
# Main
676676
################################################################################
677677
if __name__ == "__main__":
678-
if len(sys.argv) == 2:
678+
if len(sys.argv) == 3:
679679
Common.setOutputPath(sys.argv[1])
680+
AutoGemmParameters.setArchitecture(sys.argv[2])
681+
writeKernelSelection()
680682
else:
681-
print "Warning: No output path specified; default is working directory."
682-
writeKernelSelection()
683+
print "USAGE: python KernelSelection.py output_path architecture"
683684

0 commit comments

Comments
 (0)