Skip to content

Commit 8071587

Browse files
authored
Merge pull request #23 from GPUOpen-LibrariesAndSDKs/next-release-2
Update to 2.5
2 parents 3a8b836 + 003d10d commit 8071587

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2564
-3399
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ hiprt/hiprtew.h
1414
hiprt/cache/Kernels.h
1515
hiprt/cache/KernelArgs.h
1616
PUBLIC_OUT/
17+
hiprt/impl/bvh_build_array.h

CMakeLists.txt

Lines changed: 65 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
11
cmake_minimum_required(VERSION 3.10)
22
project(hiprt)
33

4+
#
45
# Options
5-
option(BAKE_KERNEL "Enable the use of encrypted and baked kernels" OFF)
6-
option(BITCODE "Enable bitcode linking" OFF)
6+
#
7+
8+
# Only one of those 3 flags should be ON at a time, or none. Multiple flags ON are untested and may cause issues.
9+
option(BAKE_KERNEL "Enable the use of encrypted and baked kernels source code" OFF)
10+
option(BAKE_COMPILED_KERNEL "If enabled, the kernels will be compiled and embedded as a buffer inside the binary. if you do that, you should also enable the flag 'PRECOMPILE'" OFF)
11+
option(BITCODE "Enable bitcode linking: when running the program, it's expected to find precompiled kernels as external files." OFF)
12+
713
option(PRECOMPILE "Precompile kernels" OFF)
814
option(HIPRTEW "Use hiprtew" OFF)
915
option(NO_ENCRYPT "Don't encrypt kernel source and binaries" OFF)
1016
option(NO_UNITTEST "Don't build unit tests" OFF)
1117
option(HIPRT_PREFER_HIP_5 "Prefer HIP 5" OFF)
1218

1319

20+
21+
find_program(PYTHON_EXECUTABLE
22+
NAMES python
23+
PATHS /usr/bin /usr/local/bin /opt/local/bin
24+
)
25+
message(STATUS "Python path = ${PYTHON_EXECUTABLE}")
26+
27+
28+
1429
# GENERATE_BAKE_KERNEL is enabled by default if we use the flags 'BAKE_KERNEL' or 'BITCODE'.
1530
# It can be forced to OFF, but in this case, some building functions from the HIPRT API, like hiprtBuildTraceKernelsFromBitcode will fail.
1631
if(BAKE_KERNEL OR BITCODE)
@@ -34,6 +49,7 @@ add_definitions(-DGTEST_HAS_TR1_TUPLE=0)
3449

3550

3651

52+
3753
# Functions
3854
function(copy_dir src_dir dst_dir pattern)
3955
file(GLOB_RECURSE files "${src_dir}/${pattern}")
@@ -183,6 +199,10 @@ function(get_hip_sdk_version result)
183199
endfunction()
184200

185201

202+
# fill the HIP_VERSION_STR variable, it will look like: "6.2"
203+
get_hip_sdk_version(HIP_VERSION_STR)
204+
message(STATUS "HIP_VERSION_STR = ${HIP_VERSION_STR}")
205+
186206

187207
function(write_version_info in_file header_file version_file version_str_out)
188208

@@ -228,7 +248,6 @@ function(write_version_info in_file header_file version_file version_str_out)
228248
string(REPLACE "@HIPRT_API_VERSION@" "${HIPRT_API_VERSION}" header_content "${header_content}")
229249

230250
# Get HIP SDK version and replace placeholder
231-
get_hip_sdk_version(HIP_VERSION_STR)
232251
string(REPLACE "@HIP_VERSION_STR@" "\"${HIP_VERSION_STR}\"" header_content "${header_content}")
233252

234253
# Write the modified content to the header file
@@ -325,23 +344,51 @@ if(HIPRT_PREFER_HIP_5)
325344
endif()
326345

327346

328-
# precompile kernels
347+
# precompile kernels:
329348
if(PRECOMPILE)
330349
execute_process(
331-
COMMAND python compile.py --nvidia --hipSdkPath ${HIP_PATH}
350+
COMMAND ${PYTHON_EXECUTABLE} compile.py --nvidia --hipSdkPath ${HIP_PATH}
332351
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts/bitcodes
333352
)
334-
353+
335354
if(NOT NO_UNITTEST)
336355
execute_process(
337-
COMMAND python precompile_bitcode.py --nvidia --hipSdkPath ${HIP_PATH}
356+
COMMAND ${PYTHON_EXECUTABLE} precompile_bitcode.py --nvidia --hipSdkPath ${HIP_PATH}
338357
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts/bitcodes
339358
)
340359
endif()
360+
341361

342362
endif()
343363

344364

365+
366+
# convert the binary to a buffer that will be embeded inside the binary
367+
# it's expected the step 'PRECOMPILE' has been executed.
368+
if ( BAKE_COMPILED_KERNEL )
369+
370+
message(">> Generate embedded precompiled")
371+
372+
set(PYTHON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/scripts/convert_binary_to_array.py")
373+
374+
set(KERNEL_HIPRT_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_str_}_${HIP_VERSION_STR}_amd.hipfb")
375+
set(KERNEL_HIPRT_H "${CMAKE_CURRENT_SOURCE_DIR}/hiprt/impl/bvh_build_array.h")
376+
execute_process(
377+
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_HIPRT_COMP} ${KERNEL_HIPRT_H}
378+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
379+
)
380+
381+
set(KERNEL_OROCHI_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/oro_compiled_kernels.hipfb")
382+
set(KERNEL_OROCHI_H "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/ParallelPrimitives/cache/oro_compiled_kernels.h")
383+
execute_process(
384+
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_OROCHI_COMP} ${KERNEL_OROCHI_H}
385+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
386+
)
387+
388+
endif()
389+
390+
391+
345392
if(BAKE_KERNEL OR GENERATE_BAKE_KERNEL)
346393
message(">> BakeKernel Executed")
347394
if(WIN32)
@@ -368,6 +415,17 @@ if(BAKE_KERNEL OR GENERATE_BAKE_KERNEL)
368415

369416
endif()
370417

418+
419+
420+
if ( BAKE_COMPILED_KERNEL )
421+
# enable the 'BAKE_COMPILED_KERNEL' on Orochi: this mode is activated by adding those 2 defines.
422+
target_compile_definitions(${HIPRT_NAME} PRIVATE ORO_PP_LOAD_FROM_STRING ORO_PRECOMPILED)
423+
424+
#enable the 'BAKE_COMPILED_KERNEL' on HIPRT:
425+
target_compile_definitions(${HIPRT_NAME} PRIVATE HIPRT_BAKE_COMPILED_KERNEL )
426+
endif()
427+
428+
371429
if(BAKE_KERNEL)
372430
target_compile_definitions(${HIPRT_NAME} PRIVATE HIPRT_LOAD_FROM_STRING ORO_PP_LOAD_FROM_STRING)
373431
endif()

contrib/Orochi/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,7 @@ build/
2525
result.xml
2626
UnitTest/bitcodes/*.fatbin
2727
Test/SimpleD3D12/cache/**
28+
29+
ParallelPrimitives/cache/KernelArgs.h
30+
ParallelPrimitives/cache/Kernels.h
31+
ParallelPrimitives/cache/oro_compiled_kernels.h

contrib/Orochi/Orochi/OrochiUtils.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,41 @@ oroFunction OrochiUtils::getFunctionFromString( oroDevice device, const char* so
558558
return f;
559559
}
560560

561+
oroFunction OrochiUtils::getFunctionFromPrecompiledBinary_asData( const unsigned char* precompData, size_t dataSizeInBytes, const std::string& funcName )
562+
{
563+
std::lock_guard<std::recursive_mutex> lock( m_mutex );
564+
565+
const std::string cacheName = OrochiUtilsImpl::getCacheName( "___BAKED_BIN___", funcName );
566+
if( m_kernelMap.find( cacheName.c_str() ) != m_kernelMap.end() )
567+
{
568+
return m_kernelMap[cacheName].function;
569+
}
570+
571+
oroModule module = nullptr;
572+
oroError e = oroModuleLoadData( &module, precompData );
573+
if ( e != oroSuccess )
574+
{
575+
// add some verbose info to help debugging missing data
576+
printf("oroModuleLoadData FAILED (error = %d) loading baked precomp data: %s\n", e, funcName.c_str());
577+
return nullptr;
578+
}
579+
580+
oroFunction functionOut{};
581+
e = oroModuleGetFunction( &functionOut, module, funcName.c_str() );
582+
if ( e != oroSuccess )
583+
{
584+
// add some verbose info to help debugging missing data
585+
printf("oroModuleGetFunction FAILED (error = %d) loading baked precomp data: %s\n", e, funcName.c_str());
586+
return nullptr;
587+
}
588+
OROASSERT( e == oroSuccess, 0 );
589+
590+
m_kernelMap[cacheName].function = functionOut;
591+
m_kernelMap[cacheName].module = module;
592+
593+
return functionOut;
594+
}
595+
561596
oroFunction OrochiUtils::getFunctionFromPrecompiledBinary( const std::string& path, const std::string& funcName )
562597
{
563598
std::lock_guard<std::recursive_mutex> lock( m_mutex );

contrib/Orochi/Orochi/OrochiUtils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ class OrochiUtils
6969

7070
oroFunction getFunctionFromPrecompiledBinary( const std::string& path, const std::string& funcName );
7171

72+
// this function is like 'getFunctionFromPrecompiledBinary' but instead of giving a path to a file, we give the data directly.
73+
// ( use the script convert_binary_to_array.py to convert the .hipfb to a C-array. )
74+
oroFunction getFunctionFromPrecompiledBinary_asData( const unsigned char* data, size_t dataSizeInBytes, const std::string& funcName );
75+
7276
oroFunction getFunctionFromFile( oroDevice device, const char* path, const char* funcName, std::vector<const char*>* opts );
7377
oroFunction getFunctionFromString( oroDevice device, const char* source, const char* path, const char* funcName, std::vector<const char*>* opts, int numHeaders, const char** headers, const char** includeNames );
7478
oroFunction getFunction( oroDevice device, const char* code, const char* path, const char* funcName, std::vector<const char*>* opts, int numHeaders = 0, const char** headers = 0, const char** includeNames = 0, oroModule* loadedModule = 0 );

contrib/Orochi/ParallelPrimitives/RadixSort.cpp

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,47 @@
4040
#include <dlfcn.h>
4141
#endif
4242

43-
namespace
44-
{
45-
#if defined( ORO_PRECOMPILED )
46-
constexpr auto useBitCode = true;
43+
#if defined( ORO_PRECOMPILED ) && defined( ORO_PP_LOAD_FROM_STRING )
44+
#include <ParallelPrimitives/cache/oro_compiled_kernels.h> // generate this header with 'convert_binary_to_array.py'
4745
#else
48-
constexpr auto useBitCode = false;
46+
const unsigned char oro_compiled_kernels_h[] = "";
47+
const size_t oro_compiled_kernels_h_size = 0;
4948
#endif
5049

51-
#if defined( ORO_PP_LOAD_FROM_STRING )
52-
constexpr auto useBakeKernel = true;
53-
#else
54-
constexpr auto useBakeKernel = false;
55-
static const char* hip_RadixSortKernels = nullptr;
56-
namespace hip
50+
namespace
5751
{
58-
static const char** RadixSortKernelsArgs = nullptr;
59-
static const char** RadixSortKernelsIncludes = nullptr;
60-
} // namespace hip
52+
53+
// if those 2 preprocessors are enabled, this activates the 'usePrecompiledAndBakedKernel' mode.
54+
#if defined( ORO_PRECOMPILED ) && defined( ORO_PP_LOAD_FROM_STRING )
55+
56+
// this flag means that we bake the precompiled kernels
57+
constexpr auto usePrecompiledAndBakedKernel = true;
58+
59+
constexpr auto useBitCode = false;
60+
constexpr auto useBakeKernel = false;
61+
62+
#else
63+
64+
constexpr auto usePrecompiledAndBakedKernel = false;
65+
66+
#if defined( ORO_PRECOMPILED )
67+
constexpr auto useBitCode = true; // this flag means we use the bitcode file
68+
#else
69+
constexpr auto useBitCode = false;
70+
#endif
71+
72+
#if defined( ORO_PP_LOAD_FROM_STRING )
73+
constexpr auto useBakeKernel = true; // this flag means we use the HIP source code embeded in the binary ( as a string )
74+
#else
75+
constexpr auto useBakeKernel = false;
76+
static const char* hip_RadixSortKernels = nullptr;
77+
namespace hip
78+
{
79+
static const char** RadixSortKernelsArgs = nullptr;
80+
static const char** RadixSortKernelsIncludes = nullptr;
81+
} // namespace hip
82+
#endif
83+
6184
#endif
6285

6386
static_assert( !( useBitCode && useBakeKernel ), "useBitCode and useBakeKernel cannot coexist" );
@@ -211,9 +234,14 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
211234
opts.push_back( sort_block_size_param.c_str() );
212235
opts.push_back( sort_num_warps_param.c_str() );
213236

237+
214238
for( const auto& record : records )
215239
{
216-
if constexpr( useBakeKernel )
240+
if constexpr( usePrecompiledAndBakedKernel )
241+
{
242+
oroFunctions[record.kernelType] = m_oroutils.getFunctionFromPrecompiledBinary_asData(oro_compiled_kernels_h, oro_compiled_kernels_h_size, record.kernelName.c_str() );
243+
}
244+
else if constexpr( useBakeKernel )
217245
{
218246
oroFunctions[record.kernelType] = m_oroutils.getFunctionFromString( m_device, hip_RadixSortKernels, currentKernelPath.c_str(), record.kernelName.c_str(), &opts, 1, hip::RadixSortKernelsArgs, hip::RadixSortKernelsIncludes );
219247
}
@@ -231,6 +259,8 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
231259
printKernelInfo( record.kernelName, oroFunctions[record.kernelType] );
232260
}
233261
}
262+
263+
return;
234264
}
235265

236266
int RadixSort::calculateWGsToExecute( const int blockSize ) const noexcept
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# convert_binary_to_header.py
2+
import sys
3+
from pathlib import Path
4+
5+
def binary_to_c_array(bin_file, array_name):
6+
with open(bin_file, 'rb') as f:
7+
binary_data = f.read()
8+
9+
hex_array = ', '.join(f'0x{b:02x}' for b in binary_data)
10+
c_array = f'const unsigned char {array_name}[] = {{\n {hex_array}\n}};\n'
11+
c_array += f'const size_t {array_name}_size = sizeof({array_name});\n'
12+
return c_array
13+
14+
if __name__ == "__main__":
15+
if len(sys.argv) != 3:
16+
print(f"Usage: {sys.argv[0]} <input_binary_file> <output_header_file>")
17+
sys.exit(1)
18+
19+
bin_file = sys.argv[1]
20+
header_file_path = sys.argv[2]
21+
header_file = Path(header_file_path).name
22+
array_name = header_file.replace('.', '_')
23+
24+
c_array = binary_to_c_array(bin_file, array_name)
25+
with open(header_file_path, 'w') as f:
26+
f.write("// generated by convert_binary_to_header.py\n")
27+
f.write(c_array)

hiprt/hiprt_device.h

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ HIPRT_DEVICE hiprtFrameMatrix hiprtGetWorldToObjectFrameMatrix( hiprtScene scene
545545
* \param instanceID Instance ID.
546546
* \param time The time.
547547
*/
548-
HIPRT_DEVICE hiprtFloat3 hiprtPointObjectToWorld( hiprtFloat3 point, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
548+
HIPRT_DEVICE float3 hiprtPointObjectToWorld( float3 point, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
549549

550550
/** \brief Transforms a point from the world space to the object space.
551551
*
@@ -554,7 +554,7 @@ HIPRT_DEVICE hiprtFloat3 hiprtPointObjectToWorld( hiprtFloat3 point, hiprtScene
554554
* \param instanceID Instance ID.
555555
* \param time The time.
556556
*/
557-
HIPRT_DEVICE hiprtFloat3 hiprtPointWorldToObject( hiprtFloat3 point, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
557+
HIPRT_DEVICE float3 hiprtPointWorldToObject( float3 point, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
558558

559559
/** \brief Transforms a vector from the object space to the world space.
560560
*
@@ -563,8 +563,7 @@ HIPRT_DEVICE hiprtFloat3 hiprtPointWorldToObject( hiprtFloat3 point, hiprtScene
563563
* \param instanceID Instance ID.
564564
* \param time The time.
565565
*/
566-
HIPRT_DEVICE hiprtFloat3
567-
hiprtVectorObjectToWorld( hiprtFloat3 vector, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
566+
HIPRT_DEVICE float3 hiprtVectorObjectToWorld( float3 vector, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
568567

569568
/** \brief Transforms a vector from the world space to the object space.
570569
*
@@ -573,8 +572,7 @@ hiprtVectorObjectToWorld( hiprtFloat3 vector, hiprtScene scene, uint32_t instanc
573572
* \param instanceID Instance ID.
574573
* \param time The time.
575574
*/
576-
HIPRT_DEVICE hiprtFloat3
577-
hiprtVectorWorldToObject( hiprtFloat3 vector, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
575+
HIPRT_DEVICE float3 hiprtVectorWorldToObject( float3 vector, hiprtScene scene, uint32_t instanceID, float time = 0.0f );
578576

579577
/** \brief Returns the object to world transformation for a given instance and time in the form of the SRT frame.
580578
*
@@ -619,8 +617,8 @@ HIPRT_DEVICE hiprtFrameMatrix hiprtGetWorldToObjectFrameMatrix(
619617
* \param instanceIDs Instance IDs (multi-level instancing).
620618
* \param time The time.
621619
*/
622-
HIPRT_DEVICE hiprtFloat3 hiprtPointObjectToWorld(
623-
hiprtFloat3 point, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
620+
HIPRT_DEVICE float3 hiprtPointObjectToWorld(
621+
float3 point, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
624622

625623
/** \brief Transforms a point from the world space to the object space.
626624
*
@@ -629,8 +627,8 @@ HIPRT_DEVICE hiprtFloat3 hiprtPointObjectToWorld(
629627
* \param instanceIDs Instance IDs (multi-level instancing).
630628
* \param time The time.
631629
*/
632-
HIPRT_DEVICE hiprtFloat3 hiprtPointWorldToObject(
633-
hiprtFloat3 point, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
630+
HIPRT_DEVICE float3 hiprtPointWorldToObject(
631+
float3 point, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
634632

635633
/** \brief Transforms a vector from the object space to the world space.
636634
*
@@ -639,8 +637,8 @@ HIPRT_DEVICE hiprtFloat3 hiprtPointWorldToObject(
639637
* \param instanceIDs Instance IDs (multi-level instancing).
640638
* \param time The time.
641639
*/
642-
HIPRT_DEVICE hiprtFloat3 hiprtVectorObjectToWorld(
643-
hiprtFloat3 vector, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
640+
HIPRT_DEVICE float3 hiprtVectorObjectToWorld(
641+
float3 vector, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
644642

645643
/** \brief Transforms a vector from the world space to the object space.
646644
*
@@ -649,5 +647,5 @@ HIPRT_DEVICE hiprtFloat3 hiprtVectorObjectToWorld(
649647
* \param instanceIDs Instance IDs (multi-level instancing).
650648
* \param time The time.
651649
*/
652-
HIPRT_DEVICE hiprtFloat3 hiprtVectorWorldToObject(
653-
hiprtFloat3 vector, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );
650+
HIPRT_DEVICE float3 hiprtVectorWorldToObject(
651+
float3 vector, hiprtScene scene, const uint32_t ( &instanceIDs )[hiprtMaxInstanceLevels], float time = 0.0f );

0 commit comments

Comments
 (0)