diff --git a/.github/workflows/rocprofiler-sdk-continuous_integration.yml b/.github/workflows/rocprofiler-sdk-continuous_integration.yml index 250d2ea798a..a6d133c7caa 100644 --- a/.github/workflows/rocprofiler-sdk-continuous_integration.yml +++ b/.github/workflows/rocprofiler-sdk-continuous_integration.yml @@ -180,6 +180,7 @@ jobs: update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12 python3 -m pip install -U --user -r requirements.txt + python3 -m pip install -U --user cmake==3.24.0 rm -rf \ ${{ env.ROCM_PATH }}/lib/*rocprofiler-sdk* \ ${{ env.ROCM_PATH }}/lib/cmake/*rocprofiler-sdk* \ @@ -192,6 +193,7 @@ jobs: shell: bash run: | echo "PKG_CONFIG_PATH=/opt/amdgpu/lib64/pkgconfig:/opt/amdgpu/lib/pkgconfig:/opt/amdgpu/lib/$(uname -p)-linux-gnu/pkgconfig:${PKG_CONFIG_PATH}" >> $GITHUB_ENV + echo "PATH=$HOME/.local/bin:$PATH" >> $GITHUB_ENV - name: Setup ccache uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 @@ -341,7 +343,6 @@ jobs: ${{ matrix.system.ci-flags }} -- \ -DROCPROFILER_DEP_ROCMCORE=ON \ -DROCPROFILER_BUILD_DOCS=OFF \ - -DROCPROFILER_BUILD_FMT=OFF \ -DROCPROFILER_INTERNAL_RCCL_API_TRACE=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} \ -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk \ @@ -520,6 +521,7 @@ jobs: shell: bash run: | echo "PKG_CONFIG_PATH=/opt/amdgpu/lib64/pkgconfig:/opt/amdgpu/lib/pkgconfig:/opt/amdgpu/lib/$(uname -p)-linux-gnu/pkgconfig:${PKG_CONFIG_PATH}" >> $GITHUB_ENV + echo "PATH=$HOME/.local/bin:$PATH" >> $GITHUB_ENV - name: Install Curl for RHEL 8.8 if: ${{ matrix.system.os == 'rhel-8.8' }} @@ -656,7 +658,6 @@ jobs: -- \ -DROCPROFILER_DEP_ROCMCORE=ON \ -DROCPROFILER_BUILD_DOCS=OFF \ - -DROCPROFILER_BUILD_FMT=OFF \ -DROCPROFILER_INTERNAL_RCCL_API_TRACE=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} \ -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' \ @@ -757,12 +758,14 @@ jobs: apt-get install -y gcc-${{ env.GCC_COMPILER_VERSION }} g++-${{ env.GCC_COMPILER_VERSION }} update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${{ env.GCC_COMPILER_VERSION }} 100 --slave /usr/bin/g++ g++ /usr/bin/g++-${{ env.GCC_COMPILER_VERSION }} --slave /usr/bin/gcov gcov /usr/bin/gcov-${{ env.GCC_COMPILER_VERSION }} python3 -m pip install -U --user -r requirements.txt + python3 -m pip install -U --user cmake==3.24.0 rm -rf ${{ env.ROCM_PATH }}/lib/*rocprofiler-sdk* ${{ env.ROCM_PATH }}/lib/cmake/*rocprofiler-sdk* ${{ env.ROCM_PATH }}/share/*rocprofiler-sdk* ${{ env.ROCM_PATH }}/libexec/*rocprofiler-sdk* ${{ env.ROCM_PATH }}*/lib/python*/site-packages/roctx ${{ env.ROCM_PATH }}*/lib/python*/site-packages/rocpd - name: Setup environment variables shell: bash run: | echo "PKG_CONFIG_PATH=/opt/amdgpu/lib64/pkgconfig:/opt/amdgpu/lib/pkgconfig:/opt/amdgpu/lib/$(uname -p)-linux-gnu/pkgconfig:${PKG_CONFIG_PATH}" >> $GITHUB_ENV + echo "PATH=$HOME/.local/bin:$PATH" >> $GITHUB_ENV - name: List Files shell: bash @@ -877,7 +880,6 @@ jobs: --memcheck ${{ matrix.system.sanitizer }} --run-attempt ${{ github.run_attempt }} -- - -DROCPROFILER_BUILD_FMT=OFF \ -DROCPROFILER_INTERNAL_RCCL_API_TRACE=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} -DCMAKE_INSTALL_PREFIX="${{ env.ROCM_PATH }}" diff --git a/projects/rocprofiler-sdk/VERSION b/projects/rocprofiler-sdk/VERSION index 26aaba0e866..6085e946503 100644 --- a/projects/rocprofiler-sdk/VERSION +++ b/projects/rocprofiler-sdk/VERSION @@ -1 +1 @@ -1.2.0 +1.2.1 diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp index 2f7a78ae12a..584a7266bcc 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -401,6 +401,9 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_read); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_counted_queue_acquire); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_counted_queue_release); # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x0A +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_memory_async_batch_copy); +# endif #endif #if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x01 @@ -426,6 +429,8 @@ static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 74); static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 76); # elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x09 static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 78); +# elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x0A +static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 79); # else # if !defined(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) && \ (defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h index d6431ee166f..5c92c93abe0 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h @@ -129,6 +129,9 @@ typedef enum rocprofiler_hsa_amd_ext_api_id_t // NOLINT(performance-enum-size) ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_counted_queue_acquire, ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_counted_queue_release, # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x0A + ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_memory_async_batch_copy, +# endif #endif ROCPROFILER_HSA_AMD_EXT_API_ID_LAST, diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h index 2e204aba31a..0798a1be0eb 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h @@ -1480,6 +1480,15 @@ typedef union rocprofiler_hsa_api_args_t hsa_queue_t* queue; } hsa_amd_counted_queue_release; # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x0A + struct + { + const hsa_amd_memory_copy_op_t* copy_ops; + uint32_t num_copy_ops; + uint32_t num_dep_signals; + const hsa_signal_t* dep_signals; + } hsa_amd_memory_async_batch_copy; +# endif #endif } rocprofiler_hsa_api_args_t; diff --git a/projects/rocprofiler-sdk/source/lib/common/logging.cpp b/projects/rocprofiler-sdk/source/lib/common/logging.cpp index 24aecbc23db..98fbd8e7620 100644 --- a/projects/rocprofiler-sdk/source/lib/common/logging.cpp +++ b/projects/rocprofiler-sdk/source/lib/common/logging.cpp @@ -58,7 +58,7 @@ struct log_level_info env_store get_glog_env_config(const logging_config& cfg) { - auto as_env_config = [](std::string_view _var, auto _val) { + auto as_env_config = [](std::string_view _var, const auto& _val) { return env_config{std::string{_var}, fmt::format("{}", _val), 1}; }; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp index 9efd5c6d567..598da18c39c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp @@ -61,6 +61,8 @@ ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 75); ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 77); #elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x09 ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 79); +#elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x0A +ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 80); #else INTERNAL_CI_ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 0); #endif @@ -305,6 +307,9 @@ ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_ais_file_read_fn, 76); ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_counted_queue_acquire_fn, 77); ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_counted_queue_release_fn, 78); #endif +#if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x0A +ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_memory_async_batch_copy_fn, 79); +#endif ROCP_SDK_ENFORCE_ABI(::ImageExtTable, hsa_ext_image_get_capability_fn, 1); ROCP_SDK_ENFORCE_ABI(::ImageExtTable, hsa_ext_image_data_get_info_fn, 2); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp index f0d7d9e94f0..e849c48493d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "lib/common/logging.hpp" #include "lib/rocprofiler-sdk/hsa/queue.hpp" #include @@ -188,4 +189,106 @@ struct formatter } }; #endif +#if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x0A +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(hsa_amd_memory_copy_op_type_t const& op, FormatContext& ctx) const + { + switch(op) + { + case HSA_AMD_MEMORY_COPY_OP_LINEAR: + return fmt::format_to(ctx.out(), "HSA_AMD_MEMORY_COPY_OP_LINEAR"); + case HSA_AMD_MEMORY_COPY_OP_LINEAR_BROADCAST: + return fmt::format_to(ctx.out(), "HSA_AMD_MEMORY_COPY_OP_LINEAR_BROADCAST"); + case HSA_AMD_MEMORY_COPY_OP_LINEAR_SWAP: + return fmt::format_to(ctx.out(), "HSA_AMD_MEMORY_COPY_OP_LINEAR_SWAP"); + case HSA_AMD_MEMORY_COPY_OP_LINEAR_INDIRECT: + return fmt::format_to(ctx.out(), "HSA_AMD_MEMORY_COPY_OP_LINEAR_INDIRECT"); + } + + auto value = static_cast>(op); + ROCP_CI_LOG(INFO) << fmt::format("Unknown hsa_amd_memory_copy_op_type_t {}", value); + return fmt::format_to(ctx.out(), "hsa_amd_memory_copy_op_type_t({})", value); + } +}; + +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(hsa_amd_memory_copy_op_t const& op, FormatContext& ctx) const + { + auto out = fmt::format_to(ctx.out(), + "[MEMORY_COPY_OP type={}, version={}, num_dsts={}, " + "traffic_class={}, completion_signal={}", + op.type, + op.version, + op.num_dsts, + op.traffic_class, + op.completion_signal.handle); + + switch(op.type) + { + case HSA_AMD_MEMORY_COPY_OP_LINEAR_BROADCAST: + out = fmt::format_to( + out, + ", src={}, src_agent={}, dst_list={}, dst_agent_list={}, size={}", + fmt::ptr(op.src), + op.src_agent.handle, + fmt::ptr(op.dst_list), + fmt::ptr(op.dst_agent_list), + op.size); + break; + + case HSA_AMD_MEMORY_COPY_OP_LINEAR_SWAP: + out = fmt::format_to( + out, + ", src={}, src_agent={}, dst={}, dst_agent={}, src_size={}, dst_size={}", + fmt::ptr(op.src), + op.src_agent.handle, + fmt::ptr(op.dst), + op.dst_agent.handle, + op.src_size, + op.dst_size); + break; + + case HSA_AMD_MEMORY_COPY_OP_LINEAR: + case HSA_AMD_MEMORY_COPY_OP_LINEAR_INDIRECT: + default: + out = fmt::format_to( + out, + ", src={}, src_agent={}, dst={}, dst_agent={}, size={}, unused_size={}", + fmt::ptr(op.src), + op.src_agent.handle, + fmt::ptr(op.dst), + op.dst_agent.handle, + op.size, + op.unused_size); + break; + } + + if(op.reserved[0] != 0 || op.reserved[1] != 0 || op.reserved[2] != 0) + { + out = fmt::format_to( + out, ", reserved=[{},{},{}]", op.reserved[0], op.reserved[1], op.reserved[2]); + } + + return fmt::format_to(out, "]"); + } +}; +#endif } // namespace fmt diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp index 3af3ae42b7b..b98f0b4cbfa 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp @@ -536,6 +536,16 @@ HSA_API_INFO_DEFINITION_V(ROCPROFILER_HSA_TABLE_ID_AmdExt, hsa_amd_counted_queue_release_fn, queue) # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x0A +HSA_API_INFO_DEFINITION_V(ROCPROFILER_HSA_TABLE_ID_AmdExt, + ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_memory_async_batch_copy, + hsa_amd_memory_async_batch_copy, + hsa_amd_memory_async_batch_copy_fn, + copy_ops, + num_copy_ops, + num_dep_signals, + dep_signals) +# endif # endif #elif defined(ROCPROFILER_LIB_ROCPROFILER_HSA_ASYNC_COPY_CPP_IMPL) && \