1- cmake_minimum_required (VERSION 3.10)
1+ cmake_minimum_required (VERSION 3.19) # at least 3.19 in order to have the compression-level in Zstd
22project (hiprt)
33
44#
@@ -15,6 +15,7 @@ option(HIPRTEW "Use hiprtew" OFF)
1515option (NO_ENCRYPT "Don't encrypt kernel source and binaries" OFF )
1616option (NO_UNITTEST "Don't build unit tests" OFF )
1717option (HIPRT_PREFER_HIP_5 "Prefer HIP 5" OFF )
18+ option (COMPILED_COMPRESSION "enable compression of compiled kernels" ON ) # this argument is only used if BAKE_COMPILED_KERNEL is enabled -- advised to let it 'ON' as it's the path tested by the HIPRT team.
1819
1920option (FORCE_DISABLE_CUDA "By default Cuda support is automatically added if a Cuda install is detected. Turn this flag to ON to force Cuda to be disabled." OFF )
2021
@@ -388,6 +389,10 @@ set(KERNEL_HIPRT_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_
388389set (KERNEL_UNITTEST_COMP "${BASE_OUTPUT_DIR} /${CMAKE_BUILD_TYPE} /hiprt${version_str_} _${HIP_VERSION_STR} _precompiled_bitcode_${KERNEL_OS_POSTFIX} .hipfb" ) # example: hiprt02005_6.2_precompiled_bitcode_win.hipfb
389390set (KERNEL_OROCHI_COMP "${BASE_OUTPUT_DIR} /${CMAKE_BUILD_TYPE} /oro_compiled_kernels.hipfb" )
390391
392+ # temp files: compiled kernel, compressed.
393+ set (KERNEL_HIPRT_COMP_COMPRESSED "${CMAKE_BINARY_DIR} /hiprt${version_str_} _${HIP_VERSION_STR} _amd.zstd" )
394+ set (KERNEL_OROCHI_COMP_COMPRESSED "${CMAKE_BINARY_DIR} /oro_compiled_kernels.zstd" )
395+
391396
392397# precompile kernels:
393398if (PRECOMPILE)
@@ -479,11 +484,16 @@ if ( BAKE_COMPILED_KERNEL )
479484
480485 set (PYTHON_FILE "${CMAKE_CURRENT_SOURCE_DIR} /contrib/Orochi/scripts/convert_binary_to_array.py" )
481486
487+ set (ARCHIVE_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR} /contrib/Orochi/scripts/create_archive.cmake" )
488+
482489 # HIPRT binary
483490 set (KERNEL_HIPRT_H "${CMAKE_CURRENT_SOURCE_DIR} /hiprt/impl/bvh_build_array.h" )
484491 add_custom_command (
485492 OUTPUT ${KERNEL_HIPRT_H}
486- COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_HIPRT_COMP} ${KERNEL_HIPRT_H}
493+ # 1) Create the Zstd archive
494+ COMMAND ${CMAKE_COMMAND} -DINPUT_FILE=${KERNEL_HIPRT_COMP} -DOUTPUT_FILE=${KERNEL_HIPRT_COMP_COMPRESSED} -DDO_COMPRESS=${COMPILED_COMPRESSION} -P ${ARCHIVE_SCRIPT}
495+ # 2) Run the Python converter on that archive
496+ COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_HIPRT_COMP} ${KERNEL_HIPRT_COMP_COMPRESSED} ${KERNEL_HIPRT_H} ${COMPILED_COMPRESSION}
487497 DEPENDS ${KERNEL_HIPRT_COMP} # Ensure compile.py has already run.
488498 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
489499 COMMENT "Converting HIPRT compiled kernel to header"
@@ -494,7 +504,10 @@ if ( BAKE_COMPILED_KERNEL )
494504 set (KERNEL_OROCHI_H "${CMAKE_CURRENT_SOURCE_DIR} /contrib/Orochi/ParallelPrimitives/cache/oro_compiled_kernels.h" )
495505 add_custom_command (
496506 OUTPUT ${KERNEL_OROCHI_H}
497- COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_OROCHI_COMP} ${KERNEL_OROCHI_H}
507+ # 1) Create the Zstd archive
508+ COMMAND ${CMAKE_COMMAND} -DINPUT_FILE=${KERNEL_OROCHI_COMP} -DOUTPUT_FILE=${KERNEL_OROCHI_COMP_COMPRESSED} -DDO_COMPRESS=${COMPILED_COMPRESSION} -P ${ARCHIVE_SCRIPT}
509+ # 2) Run the Python converter on that archive
510+ COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_OROCHI_COMP} ${KERNEL_OROCHI_COMP_COMPRESSED} ${KERNEL_OROCHI_H} ${COMPILED_COMPRESSION}
498511 DEPENDS ${KERNEL_OROCHI_COMP} # Ensure compile.py has already run.
499512 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
500513 COMMENT "Converting Orochi compiled kernel to header"
@@ -533,8 +546,40 @@ endif()
533546
534547
535548if ( BAKE_COMPILED_KERNEL )
549+
550+
551+ if ( COMPILED_COMPRESSION )
552+ # Gather minimal Zstd sources
553+ file (GLOB ZSTD_SRCS
554+ contrib/zstd/lib/common/*.c
555+ contrib/zstd/lib/decompress/*.c
556+ )
557+
558+ # Build a static lib zstd_embedded
559+ add_library (zstd_embedded STATIC
560+ ${ZSTD_SRCS}
561+ )
562+
563+ # Include Zstd headers
564+ target_include_directories (zstd_embedded
565+ PUBLIC
566+ contrib/zstd/lib
567+ )
568+
569+ set_target_properties (zstd_embedded PROPERTIES POSITION_INDEPENDENT_CODE ON ) # -fPIC
570+ target_compile_definitions (zstd_embedded PRIVATE ZSTD_DISABLE_ASM) # disable ASM for easier build
571+
572+ # Link against zstd_embedded
573+ target_link_libraries (${HIPRT_NAME} zstd_embedded )
574+
575+ # the 'ORO_LINK_ZSTD' flag enables use of ZSTD API in the source code.
576+ target_compile_definitions (${HIPRT_NAME} PRIVATE ORO_LINK_ZSTD)
577+ endif ()
578+
579+
580+
536581 # enable the 'BAKE_COMPILED_KERNEL' on Orochi: this mode is activated by adding those 2 defines.
537- target_compile_definitions (${HIPRT_NAME} PRIVATE ORO_PP_LOAD_FROM_STRING ORO_PRECOMPILED)
582+ target_compile_definitions (${HIPRT_NAME} PRIVATE ORO_PP_LOAD_FROM_STRING HIPRT_BITCODE_LINKING ORO_PRECOMPILED)
538583
539584 #enable the 'BAKE_COMPILED_KERNEL' on HIPRT:
540585 target_compile_definitions (${HIPRT_NAME} PRIVATE HIPRT_BAKE_COMPILED_KERNEL )
@@ -592,12 +637,17 @@ if(PRECOMPILE AND NOT BAKE_COMPILED_KERNEL)
592637 DESTINATION bin)
593638endif ()
594639
640+
641+
642+
643+
644+
595645# Project: Unit Test
596646if (NOT NO_UNITTEST)
597647
598648 add_executable (unittest)
599649
600- if (BITCODE)
650+ if (BITCODE OR BAKE_COMPILED_KERNEL )
601651 target_compile_definitions (unittest PRIVATE HIPRT_BITCODE_LINKING)
602652 endif ()
603653 if (WIN32 )
0 commit comments