@@ -90,6 +90,7 @@ option( PRECOMPILE_GEMM_TRANS_CN "AutoGemm: pre-compile CN transpose cases" OFF)
90
90
option ( PRECOMPILE_GEMM_TRANS_CT "AutoGemm: pre-compile CT transpose cases" OFF )
91
91
option ( PRECOMPILE_GEMM_TRANS_CC "AutoGemm: pre-compile CC transpose cases" OFF )
92
92
93
+
93
94
# opencl compiler version
94
95
#set( PRECOMPILE_GEMM_OPENCL_VERSION "2.0" CACHE STRING "OpenCL compiler version supported by device driver." )
95
96
#set_property( CACHE PRECOMPILE_GEMM_OPENCL_VERSION PROPERTY STRINGS 2.0 1.2 1.1 )
@@ -291,7 +292,25 @@ source_group(AutoGemm\\src FILES ${AUTOGEMM_SRC} ${AUTOGEMM_PRECOMPILED_KERNELS}
291
292
# AutoGemm End
292
293
################################################################################
293
294
295
+ ################################################################################
296
+ # BEGIN Pre Compile General (static) Kernels
297
+ ################################################################################
298
+ # options for pre-compiling trsm kernels
299
+ option ( PRECOMPILE_TRSM_STRSM "pre-compile available dtrsm kernels" OFF )
300
+ option ( PRECOMPILE_TRSM_DTRSM "pre-compile available strsm kernels" OFF )
301
+ if (PRECOMPILE_TRSM_DTRSM)
302
+ add_definitions (-DCLBLAS_OFFLINE_COMPILE_DTRSM)
303
+ message (STATUS "precompile DTRSM kernels." )
304
+ endif ()
305
+ if (PRECOMPILE_TRSM_STRSM)
306
+ add_definitions (-DCLBLAS_OFFLINE_COMPILE_STRSM)
307
+ message (STATUS "precompile STRSM kernels. (not yet implemented)" )
308
+ endif ()
309
+
294
310
311
+ ################################################################################
312
+ # END Pre Compile General (static) Kernels
313
+ ################################################################################
295
314
296
315
set (SRC_BLAS
297
316
blas/init.c
@@ -670,6 +689,40 @@ ExternalProject_Add( tplgen
670
689
INSTALL_COMMAND ""
671
690
)
672
691
692
+ ################OCLBinaryGenerator
693
+ if (PRECOMPILE_TRSM_DTRSM OR PRECOMPILE_TRSM_STRSM)
694
+
695
+
696
+ ExternalProject_Add( OCLBinaryGenerator
697
+ URL "${CMAKE_SOURCE_DIR} /library/tools/OCLBinaryGenerator"
698
+ CMAKE_ARGS -DOPENCL_LIBRARIES=${OPENCL_LIBRARIES} -DOPENCL_INCLUDE_DIRS=${OPENCL_INCLUDE_DIRS}
699
+ INSTALL_COMMAND ""
700
+ )
701
+ ExternalProject_Get_Property( OCLBinaryGenerator binary_dir )
702
+ message (STATUS "OCLBinaryGenerator binary_dir =${binary_dir} " )
703
+ set ( OCLBinaryGeneratorBinaryDir "${binary_dir} /staging" )
704
+
705
+ # OCLBinaryGenerator requires at least three inputs
706
+ # 1, path to the kernel file
707
+ # 2, file name
708
+ # 3, output directory
709
+ # 4, [optional] compiler flags
710
+ # 5, [optional] trageted hardware. If this is not supplied OCLBinaryGenerator will generate binary for the first device on system
711
+ set ( OCL_COMPILER_FLAGS " " )
712
+ if ( OPENCL_VERSION STREQUAL "2.0" )
713
+ set ( OCL_COMPILER_FLAGS "-cl-std=CL2.0" )
714
+ endif ()
715
+
716
+ add_custom_target ( OCLBinaryGenerator_GEN )
717
+ add_custom_command (TARGET OCLBinaryGenerator_GEN
718
+ PRE_BUILD
719
+ COMMAND ${CMAKE_COMMAND} -DOCLBinaryGeneratorBinaryDir=${OCLBinaryGeneratorBinaryDir} -DSOURCE_DIR=${CMAKE_SOURCE_DIR} -DBINARY_DIR=${CMAKE_BINARY_DIR} -DOCL_COMPILER_FLAGS=${OCL_COMPILER_FLAGS}
720
+ -P "${CMAKE_SOURCE_DIR} /library/OCLBinaryGenerator.cmake"
721
+ )
722
+ add_dependencies ( OCLBinaryGenerator_GEN OCLBinaryGenerator )
723
+
724
+ endif ()
725
+
673
726
# if offline compilation is not chosen, bingen should not be built
674
727
if (OPENCL_OFFLINE_BUILD_TAHITI_KERNEL OR OPENCL_OFFLINE_BUILD_HAWAII_KERNEL OR OPENCL_OFFLINE_BUILD_BONAIRE_KERNEL)
675
728
ExternalProject_Add( bingen
@@ -801,6 +854,10 @@ add_library(clBLAS
801
854
)
802
855
add_dependencies (clBLAS GENERATE_CLT)
803
856
857
+ if (PRECOMPILE_TRSM_DTRSM OR PRECOMPILE_TRSM_STRSM)
858
+ add_dependencies (clBLAS OCLBinaryGenerator_GEN)
859
+ endif ()
860
+
804
861
# AutoGemm needs compiler flag to utilize pre-compiled kernels
805
862
if ( ${PRECOMPILE_GEMM_ACTIVE} )
806
863
set_target_properties (clBLAS PROPERTIES COMPILE_FLAGS -DAUTOGEMM_USE_PRE_COMPILED_KERNELS)
0 commit comments