diff --git a/.gitmodules b/.gitmodules index 7bbab5a35..49adf8313 100644 --- a/.gitmodules +++ b/.gitmodules @@ -39,4 +39,4 @@ url = https://github.com/pybind/pybind11.git [submodule "external/gotcha"] path = external/gotcha - url = https://jrmadsen@github.com/jrmadsen/GOTCHA + url = https://github.com/ROCm/GOTCHA diff --git a/benchmark/source/bin/mandelbrot/CMakeLists.txt b/benchmark/source/bin/mandelbrot/CMakeLists.txt index 5f1274484..f01b73b2c 100644 --- a/benchmark/source/bin/mandelbrot/CMakeLists.txt +++ b/benchmark/source/bin/mandelbrot/CMakeLists.txt @@ -29,6 +29,8 @@ set(CMAKE_HIP_STANDARD 17) set(CMAKE_HIP_EXTENSIONS OFF) set(CMAKE_HIP_STANDARD_REQUIRED ON) +find_package(rocprofiler-sdk-roctx REQUIRED) + set_source_files_properties(mandelbrot.cpp PROPERTIES LANGUAGE HIP) set_source_files_properties(utils.cpp PROPERTIES LANGUAGE HIP) @@ -37,6 +39,7 @@ target_sources(mandelbrot PRIVATE mandelbrot.cpp utils.cpp) target_compile_options(mandelbrot PRIVATE -W -Wall -Wextra -Wpedantic -Werror -ffp-contract=fast) target_include_directories(mandelbrot PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(mandelbrot PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx) install( TARGETS mandelbrot diff --git a/benchmark/source/bin/mandelbrot/mandelbrot.cpp b/benchmark/source/bin/mandelbrot/mandelbrot.cpp index e77947dc5..f487dc27b 100644 --- a/benchmark/source/bin/mandelbrot/mandelbrot.cpp +++ b/benchmark/source/bin/mandelbrot/mandelbrot.cpp @@ -25,6 +25,8 @@ #include "utils.hpp" +#include + #include #include #include @@ -473,6 +475,19 @@ hipPerfMandelBrot::printResults() std::cout << std::endl; } +struct roctx_range +{ + template + roctx_range(Args&&... args) + { + auto _ss = std::stringstream{}; + ((_ss << args), ...); + roctxRangePush(_ss.str().c_str()); + } + + ~roctx_range() { roctxRangePop(); } +}; + // Wrappers for the kernel launches void hipPerfMandelBrot::float_mad(uint* out, @@ -487,6 +502,9 @@ hipPerfMandelBrot::float_mad(uint* out, int threads_per_block, int kernelCnt) { + auto _range = + roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"}; + int streamCnt = getNumStreams(); hipLaunchKernelGGL(float_mad_kernel, dim3(blocks), @@ -515,6 +533,9 @@ hipPerfMandelBrot::float_mandel_unroll(uint* out, int threads_per_block, int kernelCnt) { + auto _range = + roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"}; + int streamCnt = getNumStreams(); hipLaunchKernelGGL(float_mandel_unroll_kernel, dim3(blocks), @@ -543,6 +564,9 @@ hipPerfMandelBrot::double_mad(uint* out, int threads_per_block, int kernelCnt) { + auto _range = + roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"}; + int streamCnt = getNumStreams(); hipLaunchKernelGGL(double_mad_kernel, dim3(blocks), @@ -571,6 +595,9 @@ hipPerfMandelBrot::double_mandel_unroll(uint* out, int threads_per_block, int kernelCnt) { + auto _range = + roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"}; + int streamCnt = getNumStreams(); hipLaunchKernelGGL(float_mandel_unroll_kernel, dim3(blocks), @@ -589,6 +616,8 @@ hipPerfMandelBrot::double_mandel_unroll(uint* out, void hipPerfMandelBrot::run(unsigned int testCase, unsigned int /* deviceId */) { + auto _run_range = roctx_range{__FUNCTION__, "(testCase=", testCase, ")"}; + unsigned int numStreams = getNumStreams(); coordIdx = testCase % numCoords; @@ -667,6 +696,8 @@ hipPerfMandelBrot::run(unsigned int testCase, unsigned int /* deviceId */) for(unsigned int k = 0; k < numLoops; k++) { + auto _loop_range = roctx_range{__FUNCTION__, "(testCase=", testCase, ") :: loop #", k}; + if((testCase == 0 || testCase == 1 || testCase == 2 || testCase == 5 || testCase == 6 || testCase == 7 || testCase == 10 || testCase == 11 || testCase == 12)) { @@ -805,6 +836,8 @@ hipPerfMandelBrot::checkData(uint* ptr) int main(int argc, char* argv[]) { + auto _range = roctx_range{argv[0]}; + // Default values for kernels and streams unsigned int numStreamsWarmup = 1, numKernelsWarmup = 1; unsigned int numStreamsSync = 1, numKernelsSync = 1; diff --git a/cmake/rocprofiler_build_settings.cmake b/cmake/rocprofiler_build_settings.cmake index 24c34bc0c..b11e8cfe7 100644 --- a/cmake/rocprofiler_build_settings.cmake +++ b/cmake/rocprofiler_build_settings.cmake @@ -93,10 +93,12 @@ rocprofiler_target_compile_options( rocprofiler-sdk-debug-flags INTERFACE "-g3" "-fno-omit-frame-pointer" "-fno-optimize-sibling-calls") -target_compile_options( - rocprofiler-sdk-debug-flags - INTERFACE $<$:$<$:-rdynamic>> - $<$:$<$:-rdynamic>>) +if(NOT ROCPROFILER_ENABLE_CLANG_TIDY) + target_compile_options( + rocprofiler-sdk-debug-flags + INTERFACE $<$:$<$:-rdynamic>> + $<$:$<$:-rdynamic>>) +endif() if(NOT APPLE AND NOT ROCPROFILER_ENABLE_CLANG_TIDY) target_link_options(rocprofiler-sdk-debug-flags INTERFACE diff --git a/cmake/rocprofiler_options.cmake b/cmake/rocprofiler_options.cmake index 08d103263..6c7243179 100644 --- a/cmake/rocprofiler_options.cmake +++ b/cmake/rocprofiler_options.cmake @@ -51,6 +51,11 @@ rocprofiler_add_option( "Use (internal) instead of RCCL-provided . Note: this should never be used in production" OFF ADVANCED) +rocprofiler_add_option( + ROCPROFILER_BUILD_PYTHON + "Enable building the Python bindings for roctx and rocpd. Note: this should not be disabled unless absolutely necessary" + ON + ADVANCED) rocprofiler_add_option( ROCPROFILER_BUILD_GHC_FS @@ -162,3 +167,30 @@ set(ROCPROFILER_DEFAULT_FAIL_REGEX # this should be defaulted to OFF by ROCm 7.0.1 or 7.1 this should only used to disable # sample tests in extreme circumstances option(ROCPROFILER_DISABLE_UNSTABLE_CTESTS "Disable unstable tests" ON) + +if(ROCPROFILER_BUILD_PYTHON) + # make sure we have all python version candidates + set(ROCPROFILER_PYTHON_VERSION_CANDIDATES + "3.20;3.19;3.18;3.17;3.16;3.15;3.14;3.13;3.12;3.11;3.10;3.9;3.8;3.7;3.6" + CACHE STRING "Python versions to search for, newest first") + + if(NOT ROCPROFILER_PYTHON_VERSIONS) + unset(ROCPROFILER_PYTHON_VERSIONS CACHE) + rocprofiler_get_default_python_versions(DEFAULT_PYTHON_VERSIONS) + set(ROCPROFILER_PYTHON_VERSIONS + "${DEFAULT_PYTHON_VERSIONS}" + CACHE STRING "") + endif() + + if(NOT ROCPROFILER_PYTHON_VERSIONS) + message( + FATAL_ERROR + "No python3 versions found for building rocprofiler-sdk Python bindings. Either install Python3 development package(s) (i.e. Python.h + python library) or set ROCPROFILER_BUILD_PYTHON=OFF" + ) + endif() +else() + set(ROCPROFILER_PYTHON_VERSIONS "") +endif() + +rocprofiler_add_feature(ROCPROFILER_PYTHON_VERSIONS + "ROCTx and ROCpd Python bindings build versions") diff --git a/cmake/rocprofiler_utilities.cmake b/cmake/rocprofiler_utilities.cmake index 4bd5e7062..e916d57b1 100644 --- a/cmake/rocprofiler_utilities.cmake +++ b/cmake/rocprofiler_utilities.cmake @@ -1026,4 +1026,79 @@ function(rocprofiler_install_env_setup_files) COMPONENT ${RIEF_COMPONENT}) endfunction() +macro(rocprofiler_reset_python3_cache) + foreach( + _VAR + _Python3_Compiler_REASON_FAILURE + _Python3_Development_REASON_FAILURE + _Python3_EXECUTABLE + _Python3_INCLUDE_DIR + _Python3_INTERPRETER_PROPERTIES + _Python3_INTERPRETER_SIGNATURE + _Python3_LIBRARY_RELEASE + _Python3_NumPy_REASON_FAILURE + Python3_EXECUTABLE + Python3_INCLUDE_DIR + Python3_INTERPRETER_ID + Python3_STDLIB + Python3_STDARCH + Python3_SITELIB + Python3_SOABI + ${ARGN}) + unset(${_VAR} CACHE) + unset(${_VAR}) + endforeach() +endmacro() + +macro(rocprofiler_find_python3 _VERSION) + rocprofiler_reset_python3_cache() + + if("${_VERSION}" MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$") + find_package(Python3 ${_VERSION} EXACT ${ARGN} REQUIRED MODULE + COMPONENTS Interpreter Development) + elseif("${_VERSION}" MATCHES "^([0-9]+)\\.([0-9]+)$") + find_package(Python3 ${_VERSION}.0...${_VERSION}.999 ${ARGN} REQUIRED MODULE + COMPONENTS Interpreter Development) + else() + message( + FATAL_ERROR + "Invalid Python3 version (${_VERSION}). Specify . or .." + ) + endif() +endmacro() + +function(rocprofiler_get_default_python_versions _VAR) + set(_PYTHON_FOUND_VERSIONS) + + foreach(_VER IN LISTS ROCPROFILER_PYTHON_VERSION_CANDIDATES) + rocprofiler_reset_python3_cache() + find_package(Python3 ${_VER} EXACT QUIET COMPONENTS Interpreter Development) + + if(Python3_FOUND) + list(APPEND _PYTHON_FOUND_VERSIONS + "${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}") + endif() + endforeach() + + # If none found, do one last check for 3.6 (no EXACT) + if(NOT _PYTHON_FOUND_VERSIONS) + rocprofiler_reset_python3_cache() + find_package(Python3 3.6 COMPONENTS Interpreter Development) + + if(Python3_FOUND) + list(APPEND _PYTHON_FOUND_VERSIONS + "${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}") + endif() + endif() + + # Set the output variable to the first found version, if any + if(_PYTHON_FOUND_VERSIONS) + set(${_VAR} + "${_PYTHON_FOUND_VERSIONS}" + PARENT_SCOPE) + endif() + + rocprofiler_reset_python3_cache() +endfunction() + cmake_policy(POP) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 4756b8177..4ba66074c 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -185,6 +185,8 @@ target_sources( rocprofiler-sdk-perfetto-static-library PRIVATE ${PROJECT_SOURCE_DIR}/external/perfetto/sdk/perfetto.h ${PROJECT_SOURCE_DIR}/external/perfetto/sdk/perfetto.cc) +target_compile_definitions(rocprofiler-sdk-perfetto-static-library + PRIVATE $) target_include_directories( rocprofiler-sdk-perfetto-static-library SYSTEM INTERFACE $) diff --git a/external/gotcha b/external/gotcha index 9afbaf0ae..b944da10f 160000 --- a/external/gotcha +++ b/external/gotcha @@ -1 +1 @@ -Subproject commit 9afbaf0ae5858098d50f79bf494b6333da17da6b +Subproject commit b944da10ff9b3364ef2e4b12e02cb2464e05dd48 diff --git a/source/include/rocprofiler-sdk-rocpd/sql.h b/source/include/rocprofiler-sdk-rocpd/sql.h index 31b301cbd..978ca7d3d 100644 --- a/source/include/rocprofiler-sdk-rocpd/sql.h +++ b/source/include/rocprofiler-sdk-rocpd/sql.h @@ -57,7 +57,7 @@ typedef enum ROCPD_EXPERIMENTAL rocpd_sql_schema_kind_t ROCPD_SQL_SCHEMA_ROCPD_VIEWS, ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS, ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS, - ROCPD_SQL_SCHEMA_ROCPD_MARKER_VIEWS, + ROCPD_SQL_SCHEMA_ROCPD_METADATA, ROCPD_SQL_SCHEMA_LAST, } rocpd_sql_schema_kind_t; diff --git a/source/lib/common/logging.hpp b/source/lib/common/logging.hpp index ffac6239b..dcbd74b6c 100644 --- a/source/lib/common/logging.hpp +++ b/source/lib/common/logging.hpp @@ -52,10 +52,10 @@ #if defined(ROCPROFILER_CI) # define ROCP_CI_LOG_IF(NON_CI_LEVEL, ...) ROCP_FATAL_IF(__VA_ARGS__) -# define ROCP_CI_LOG(NON_CI_LEVEL, ...) ROCP_FATAL +# define ROCP_CI_LOG(NON_CI_LEVEL) ROCP_FATAL #else # define ROCP_CI_LOG_IF(NON_CI_LEVEL, ...) ROCP_##NON_CI_LEVEL##_IF(__VA_ARGS__) -# define ROCP_CI_LOG(NON_CI_LEVEL, ...) ROCP_##NON_CI_LEVEL +# define ROCP_CI_LOG(NON_CI_LEVEL) ROCP_##NON_CI_LEVEL #endif namespace rocprofiler diff --git a/source/lib/common/simple_timer.cpp b/source/lib/common/simple_timer.cpp index 2eadc294c..17d063224 100644 --- a/source/lib/common/simple_timer.cpp +++ b/source/lib/common/simple_timer.cpp @@ -22,6 +22,7 @@ #include "lib/common/simple_timer.hpp" #include "lib/common/logging.hpp" +#include "lib/common/synchronized.hpp" #include @@ -35,14 +36,16 @@ namespace rocprofiler { namespace common { -simple_timer::simple_timer(std::string&& label) +simple_timer::simple_timer(std::string&& label, int log_level) : m_label{std::move(label)} +, m_log_level{log_level} { start(); } -simple_timer::simple_timer(std::string&& label, defer_start) +simple_timer::simple_timer(std::string&& label, defer_start, int log_level) : m_label{std::move(label)} +, m_log_level{log_level} {} simple_timer::~simple_timer() @@ -52,19 +55,21 @@ simple_timer::~simple_timer() else if(m_end <= m_beg) stop(); - ROCP_WARNING << fmt::format("{} :: {:12.6f} sec", m_label, get()); + report(); } -void +simple_timer& simple_timer::start() { m_beg = clock_type::now(); + return *this; } -void +simple_timer& simple_timer::stop() { m_end = clock_type::now(); + return *this; } double @@ -81,6 +86,51 @@ simple_timer::get_nsec() const return std::chrono::duration_cast(m_end - m_beg).count(); } +simple_timer& +simple_timer::report() +{ + static auto max_width = Synchronized{0}; + max_width.wlock( + [](auto& _max_width, auto _w) { + if(_w > _max_width) _max_width = _w; + if(_max_width > 120) _max_width = 120; + }, + m_label.size()); + + auto _width = max_width.get(); + + switch(m_log_level) + { + case ROCP_LOG_LEVEL_WARNING: + { + ROCP_WARNING << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get()); + break; + } + case ROCP_LOG_LEVEL_ERROR: + { + ROCP_ERROR << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get()); + break; + } + case ROCP_LOG_LEVEL_INFO: + { + ROCP_INFO << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get()); + break; + } + case ROCP_LOG_LEVEL_TRACE: + { + ROCP_TRACE << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get()); + break; + } + case ROCP_LOG_LEVEL_NONE: + default: + { + break; + } + } + + return *this; +} + std::ostream& operator<<(std::ostream& _os, const simple_timer& _val) { diff --git a/source/lib/common/simple_timer.hpp b/source/lib/common/simple_timer.hpp index f919ad5f2..472057d97 100644 --- a/source/lib/common/simple_timer.hpp +++ b/source/lib/common/simple_timer.hpp @@ -23,6 +23,7 @@ #pragma once #include "lib/common/defines.hpp" +#include "lib/common/logging.hpp" #include #include @@ -41,16 +42,17 @@ struct simple_timer { using duration_t = std::chrono::duration; - explicit simple_timer(std::string&& label); - explicit simple_timer(std::string&& label, defer_start); + explicit simple_timer(std::string&& label, int log_level = ROCP_LOG_LEVEL_WARNING); + explicit simple_timer(std::string&& label, defer_start, int log_level = ROCP_LOG_LEVEL_WARNING); ~simple_timer(); - void start(); - void stop(); + simple_timer& start(); + simple_timer& stop(); double get() const; size_t get_nsec() const; std::string_view label() const { return std::string_view{m_label}; } void set_quiet(bool v) const { m_quiet = v; } + simple_timer& report(); friend std::ostream& operator<<(std::ostream& _os, const simple_timer& _val); @@ -58,10 +60,11 @@ struct simple_timer using clock_type = std::chrono::steady_clock; using time_point_t = std::chrono::time_point; - std::string m_label = {}; - time_point_t m_beg = {}; - time_point_t m_end = {}; - mutable bool m_quiet = false; + std::string m_label = {}; + time_point_t m_beg = {}; + time_point_t m_end = {}; + int m_log_level = ROCP_LOG_LEVEL_WARNING; + mutable bool m_quiet = false; }; } // namespace common } // namespace rocprofiler diff --git a/source/lib/output/counter_info.hpp b/source/lib/output/counter_info.hpp index b57a7c961..bc01feb4e 100644 --- a/source/lib/output/counter_info.hpp +++ b/source/lib/output/counter_info.hpp @@ -40,21 +40,56 @@ namespace tool { constexpr uint32_t lds_block_size = 128 * 4; +struct tool_counter_dimension_instance_info +{ + using parent_type = rocprofiler_counter_record_dimension_instance_info_t; + + tool_counter_dimension_instance_info() = default; + ~tool_counter_dimension_instance_info() = default; + + explicit tool_counter_dimension_instance_info(parent_type _data) + : instance_id(_data.instance_id) + , counter_id(_data.counter_id) + { + if(_data.dimensions) + { + dimensions.reserve(_data.dimensions_count); + for(size_t i = 0; i < _data.dimensions_count; ++i) + { + if(_data.dimensions[i]) dimensions.emplace_back(*_data.dimensions[i]); + } + } + } + + auto size() const { return dimensions.size(); } + auto begin() { return dimensions.begin(); } + auto begin() const { return dimensions.begin(); } + auto end() { return dimensions.end(); } + auto end() const { return dimensions.end(); } + + rocprofiler_counter_instance_id_t instance_id = 0; + uint64_t counter_id = 0; + std::vector dimensions = {}; +}; + using counter_dimension_id_vec_t = std::vector; using counter_dimension_info_vec_t = std::vector; +using counter_dimension_instance_info_vec_t = std::vector; struct tool_counter_info : rocprofiler_counter_info_v1_t { using parent_type = rocprofiler_counter_info_v1_t; - tool_counter_info(rocprofiler_agent_id_t _agent_id, - parent_type _info, - counter_dimension_id_vec_t&& _dim_ids, - counter_dimension_info_vec_t&& _dim_info) + tool_counter_info(rocprofiler_agent_id_t _agent_id, + parent_type _info, + counter_dimension_id_vec_t&& _dim_ids, + counter_dimension_info_vec_t&& _dim_info, + counter_dimension_instance_info_vec_t&& _dim_instances) : parent_type{_info} , agent_id{_agent_id} , dimension_ids{std::move(_dim_ids)} , dimensions{std::move(_dim_info)} + , dimension_instances{std::move(_dim_instances)} {} ~tool_counter_info() = default; @@ -63,9 +98,10 @@ struct tool_counter_info : rocprofiler_counter_info_v1_t tool_counter_info& operator=(const tool_counter_info&) = default; tool_counter_info& operator=(tool_counter_info&&) noexcept = default; - rocprofiler_agent_id_t agent_id = {}; - counter_dimension_id_vec_t dimension_ids = {}; - counter_dimension_info_vec_t dimensions = {}; + rocprofiler_agent_id_t agent_id = {}; + counter_dimension_id_vec_t dimension_ids = {}; + counter_dimension_info_vec_t dimensions = {}; + counter_dimension_instance_info_vec_t dimension_instances = {}; }; using counter_info_vec_t = std::vector; @@ -73,13 +109,15 @@ using agent_counter_info_map_t = std::unordered_map void save(ArchiveT& ar) const { ar(cereal::make_nvp("counter_id", id)); + ar(cereal::make_nvp("counter_instance_id", instance_id)); ar(cereal::make_nvp("value", value)); } }; diff --git a/source/lib/output/generateRocpd.cpp b/source/lib/output/generateRocpd.cpp index b1cf68c99..5c6f7f966 100644 --- a/source/lib/output/generateRocpd.cpp +++ b/source/lib/output/generateRocpd.cpp @@ -21,7 +21,6 @@ // SOFTWARE. #include "generateRocpd.hpp" -#include "lib/common/uuid_v7.hpp" #include "metadata.hpp" #include "output_stream.hpp" #include "stream_info.hpp" @@ -39,6 +38,7 @@ #include "lib/common/scope_destructor.hpp" #include "lib/common/simple_timer.hpp" #include "lib/common/utility.hpp" +#include "lib/common/uuid_v7.hpp" #include "lib/output/sql/common.hpp" #include "lib/output/sql/deferred_transaction.hpp" @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -67,7 +68,10 @@ #include #include #include +#include #include +#include +#include #include #include #include @@ -291,12 +295,6 @@ iterate_args_callback(rocprofiler_buffer_tracing_kind_t /*kind*/, return 0; } -struct sql_insert_value -{ - std::string_view name = {}; - std::string value = {}; -}; - struct allow_empty_string {}; @@ -408,29 +406,14 @@ create_event_impl(sqlite3* conn, std::initializer_list&& _data } uint64_t -get_track_id_impl(sqlite3* conn, - uint64_t node_id, - pid_t pid, - pid_t tid, - uint64_t name_id, - std::string_view extdata, - int line) +get_track_id_impl(int line, sqlite3* conn, track_data&& track) { - auto _track = track_data{node_id, pid, tid, name_id}; - auto itr = get_tracks().find(_track); + auto itr = get_tracks().find(track); if(itr == get_tracks().end()) { auto idx = get_tracks().size() + 1; - itr = get_tracks().emplace(_track, idx).first; - auto stmt = get_insert_statement("rocpd_track{{uuid}}", - { - insert_value("id", idx), - insert_value("nid", node_id), - insert_value("pid", pid), - insert_value("tid", tid), - insert_value("name_id", name_id), - insert_value("extdata", extdata), - }); + itr = get_tracks().emplace(track, idx).first; + auto stmt = get_insert_statement("rocpd_track{{uuid}}", track.get_insert_values()); sql::execute_raw_sql_statements_impl(conn, stmt, line); return idx; @@ -439,111 +422,109 @@ get_track_id_impl(sqlite3* conn, return itr->second; } -// so that execute_raw_sql_statements returns the correct line -#define create_event(...) create_event_impl(__VA_ARGS__, __LINE__) -#define get_track_id(...) get_track_id_impl(__VA_ARGS__, __LINE__) -} // namespace - -size_t -track_data::hash() const +constexpr auto phase_none = ROCPROFILER_CALLBACK_PHASE_NONE; +constexpr auto phase_enter = ROCPROFILER_CALLBACK_PHASE_ENTER; +constexpr auto phase_exit = ROCPROFILER_CALLBACK_PHASE_EXIT; +constexpr auto phase_last = ROCPROFILER_CALLBACK_PHASE_LAST; + +int64_t +get_timestamp_id_impl(sqlite3* conn, + rocprofiler_timestamp_t timestamp, + rocprofiler_callback_phase_t phase, + uint64_t track_id, + int line) { - return get_hash_id(fmt::format("{:#018x}{:#018x}{:#018x}{:#018x}", node_id, pid, tid, name_id)); -} + using phase_value_type = std::underlying_type_t; -bool -operator==(const track_data& lhs, const track_data& rhs) -{ - return std::tie(lhs.node_id, lhs.pid, lhs.tid, lhs.name_id) == - std::tie(rhs.node_id, rhs.pid, rhs.tid, rhs.name_id); -} - -namespace -{ -constexpr auto MEMORY_PREFIX = std::string_view{"MEMORY_ALLOCATION_"}; -constexpr auto SCRATCH_PREFIX = std::string_view{"SCRATCH_MEMORY_"}; -constexpr auto VMEM_PREFIX = std::string_view{"VMEM_"}; -constexpr auto ASYNC_PREFIX = std::string_view{"ASYNC_"}; - -std::pair -memtype_to_db(std::string_view memory_type) -{ - std::string _type; - std::string _level; - if(memory_type.find(MEMORY_PREFIX) == 0) + if(phase < phase_none || phase >= phase_last) { - _type = memory_type.substr(MEMORY_PREFIX.length()); - if(_type.find(VMEM_PREFIX) == 0) - { - _type = _type.substr(VMEM_PREFIX.length()); - _level = "VIRTUAL"; - } - else - { - _level = "REAL"; - } - } - else if(memory_type.find(SCRATCH_PREFIX) == 0) - { - _type = memory_type.substr(SCRATCH_PREFIX.length()); - _level = "SCRATCH"; - if(memory_type.find(ASYNC_PREFIX) == 0) - { - _type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM - } + ROCP_CI_LOG(WARNING) << fmt::format( + "timestamp {} in track {} has an unsupported phase {} (expected {} <= x < {}). " + "Treating the phase as NONE (i.e. 0)", + timestamp, + track_id, + static_cast(phase), + static_cast(phase_none), + static_cast(phase_last)); + + phase = phase_none; } - if(_type == "ALLOCATE") - { - _type = "ALLOC"; - } + auto stmt = get_insert_statement("rocpd_timestamp{{uuid}}", + {insert_value("value", timestamp), + insert_value("phase", static_cast(phase)), + insert_value("track_id", track_id)}); + sql::execute_raw_sql_statements_impl(conn, stmt, line); - return std::make_pair(_type, _level); + return sqlite3_last_insert_rowid(conn); } -template -auto -extract_flags_field(const Tp& _data, int) -> decltype(std::declval().flags, std::string{}) +// so that execute_raw_sql_statements returns the correct line +#define create_event(...) create_event_impl(__VA_ARGS__, __LINE__) +#define get_track_id(...) get_track_id_impl(__LINE__, __VA_ARGS__) +#define get_timestamp_id(...) get_timestamp_id_impl(__VA_ARGS__, __LINE__) +} // namespace + +size_t +track_data::hash() const { - return std::to_string(static_cast(_data.flags)); + return common::fnv1a_hasher::combine( + nid, ppid, pid, tid, agent_id, queue_id, stream_id, name_id, extdata); } -template -std::string -extract_flags_field(const Tp&, long) +std::vector +track_data::get_insert_values() const { - return ""; + return std::vector{ + insert_value("nid", nid), + insert_value("ppid", ppid), + insert_value("pid", pid), + insert_value("tid", tid), + insert_value("agent_id", agent_id), + insert_value("queue_id", queue_id), + insert_value("stream_id", stream_id), + insert_value("name_id", name_id), + insert_value("extdata", extdata), + }; } -template -std::string -extract_flags_field(const Tp& _data) +bool +operator==(const track_data& lhs, const track_data& rhs) { - return extract_flags_field(_data, 0); + return (lhs.tie() == rhs.tie()); } -#define GENERATE_FIELD_ACCESSOR(FUNC_NAME, FIELD_NAME, DATA_TYPE, ...) \ +namespace +{ +#define GENERATE_FIELD_ACCESSOR(FUNC_NAME, FIELD_NAME, DATA_TYPE) \ template \ - auto FUNC_NAME(const Tp& _data, int)->decltype(std::declval().FIELD_NAME, DATA_TYPE{}) \ + auto FUNC_NAME(const Tp& _data, int) \ + ->decltype(std::declval().FIELD_NAME, std::optional{}) \ { \ return _data.FIELD_NAME; \ } \ \ template \ - DATA_TYPE FUNC_NAME(const Tp&, long) \ + std::optional FUNC_NAME(const Tp&, long) \ { \ - return DATA_TYPE{__VA_ARGS__}; \ + return std::nullopt; \ } \ \ template \ - DATA_TYPE FUNC_NAME(const Tp& _data) \ + std::optional FUNC_NAME(const Tp& _data) \ { \ return FUNC_NAME(_data, 0); \ } -GENERATE_FIELD_ACCESSOR(extract_stream_field, stream_id, rocprofiler_stream_id_t, 0) -GENERATE_FIELD_ACCESSOR(extract_queue_field, queue_id, rocprofiler_queue_id_t, 0) -GENERATE_FIELD_ACCESSOR(extract_allocation_size_field, allocation_size, uint64_t, 0) -GENERATE_FIELD_ACCESSOR(extract_address_field, address, rocprofiler_address_t, 0) +GENERATE_FIELD_ACCESSOR(extract_stream_field, stream_id, rocprofiler_stream_id_t) +GENERATE_FIELD_ACCESSOR(extract_queue_field, queue_id, rocprofiler_queue_id_t) +GENERATE_FIELD_ACCESSOR(extract_allocation_size_field, allocation_size, uint64_t) +GENERATE_FIELD_ACCESSOR(extract_address_field, address, rocprofiler_address_t) +GENERATE_FIELD_ACCESSOR(extract_flags_field, flags, rocprofiler_scratch_alloc_flag_t) + +constexpr auto null_stream_id = rocprofiler_stream_id_t{.handle = 0}; +constexpr auto null_queue_id = rocprofiler_queue_id_t{.handle = 0}; +constexpr auto null_agent_id = rocprofiler_agent_id_t{.handle = 0}; } // namespace void @@ -604,10 +585,12 @@ write_rocpd( execute_raw_sql_statements(conn, table_schema); - for(auto itr : {ROCPD_SQL_SCHEMA_ROCPD_VIEWS, - ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS, - ROCPD_SQL_SCHEMA_ROCPD_MARKER_VIEWS, - ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS}) + for(auto itr : { + ROCPD_SQL_SCHEMA_ROCPD_VIEWS, + ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS, + ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS, + ROCPD_SQL_SCHEMA_ROCPD_METADATA, + }) { auto views_schema = read_schema_file(itr); execute_raw_sql_statements(conn, views_schema); @@ -743,6 +726,7 @@ write_rocpd( // use this to lookup indexes of strings auto string_entries = _metadata.get_string_entries(); + auto category_ids = std::unordered_map{}; { auto _sqlgenperf_rocpd = get_simple_timer("rocpd_string"); @@ -760,6 +744,29 @@ write_rocpd( } } + auto insert_category_data = [&conn, &tool_metadata](auto& _category_ids) { + auto _sqlgenperf_rocpd = get_simple_timer("rocpd_info_category"); + auto _existing_categories = std::unordered_map{}; + for(const auto& itr : tool_metadata.buffer_names) + { + if(auto _category = std::string_view{sdk::get_perfetto_category(itr.value)}; + _existing_categories.count(_category) == 0) + { + auto stmt = get_insert_statement("rocpd_info_category{{uuid}}", + { + insert_value("name", _category), + }); + + _category_ids[itr.value] = execute_raw_sql_statements(conn, stmt); + } + else + { + _category_ids[itr.value] = + _existing_categories[_category]; // reuse existing category ID + } + }; + }; + auto insert_node_data = [&conn, &tool_metadata, node_id, node_hash]() { auto _sqlgenperf_rocpd = get_simple_timer("rocpd_info_node"); const auto& _info = tool_metadata.node_data; @@ -843,6 +850,10 @@ write_rocpd( else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) type = "GPU"; + auto _name = (itr.product_name && !std::string_view{itr.product_name}.empty()) + ? std::make_optional(std::string_view{itr.product_name}) + : std::nullopt; + auto stmt = get_insert_statement( "rocpd_info_agent{{uuid}}", { @@ -854,11 +865,11 @@ write_rocpd( insert_value("logical_index", itr.logical_node_id), insert_value("type_index", itr.logical_node_type_id), insert_value("uuid", itr.device_id), - insert_value("name", itr.name), + insert_value("name", _name), + insert_value("generic_name", itr.name), insert_value("model_name", itr.model_name, allow_empty_string{}), insert_value("vendor_name", itr.vendor_name, allow_empty_string{}), insert_value("product_name", itr.product_name, allow_empty_string{}), - insert_value("user_name", itr.product_name, allow_empty_string{}), insert_value("extdata", json_info), }); @@ -933,51 +944,91 @@ write_rocpd( { for(const auto& aitr : itr.second) { + ROCP_CI_LOG_IF(WARNING, itr.first != aitr.agent_id) + << fmt::format("Agent ID mismatch for counter {}: {} vs {}", + aitr.name, + itr.first.handle, + aitr.agent_id.handle); + const auto* agent = tool_metadata.get_agent(itr.first); if(agent == nullptr || !recorded.emplace(aitr.id).second) continue; - auto json_data = get_json_string([agent](auto& ar) { - if(agent) cereal::save(ar, *agent); - }); + auto json_data = get_json_string( + [](auto& ar, const auto& counter_data) { cereal::save(ar, counter_data); }, + aitr); auto _name = sanitize_sql_string(aitr.name); auto _description = sanitize_sql_string(aitr.description); auto _block = sanitize_sql_string(aitr.block); auto _expression = sanitize_sql_string(aitr.expression); - auto stmt = get_insert_statement( - "rocpd_info_pmc{{uuid}}", + for(const auto& ditr : aitr.dimension_instances) + { + // auto _counter_id = ditr.counter_id; + auto _instance_id = ditr.instance_id; + auto _qualifiers = std::vector{}; + _qualifiers.reserve(ditr.size() + 1); + + for(const auto& iitr : ditr) { - insert_value("id", aitr.id.handle), - insert_value("nid", node_id), - insert_value("pid", this_pid), - insert_value("target_arch", std::string_view{"GPU"}), - insert_value("agent_id", agent->node_id), - insert_value("name", _name, allow_empty_string{}), - insert_value("symbol", _name, allow_empty_string{}), - insert_value("description", _description, allow_empty_string{}), - insert_value("component", std::string_view{"rocm"}), - insert_value("value_type", std::string_view{"ABS"}), - insert_value("block", _block, allow_empty_string{}), - insert_value("expression", _expression, allow_empty_string{}), - insert_value("is_constant", aitr.is_constant), - insert_value("is_derived", aitr.is_derived), - insert_value("extdata", json_data), - }); + constexpr auto _prefix = std::string_view{"dimension_"}; + auto _qualifier_name = sanitize_sql_string(iitr.dimension_name); + for(auto& qitr : _qualifier_name) + qitr = std::tolower(qitr); // convert to lowercase - execute_raw_sql_statements(conn, stmt); + // remove "dimension_" prefix + if(auto _pos = _qualifier_name.find(_prefix); + _pos == 0 && _pos + _prefix.length() < _qualifier_name.length()) + _qualifier_name = _qualifier_name.substr(_pos + _prefix.length()); + + _qualifiers.emplace_back(fmt::format("{}={}", _qualifier_name, iitr.index)); + } + + auto _qualifier = + fmt::format("{}", fmt::join(_qualifiers.begin(), _qualifiers.end(), ",")); + auto _instance_name = fmt::format("{}:{}", _name, _qualifier); + auto _dim_stmt = get_insert_statement( + "rocpd_info_pmc{{uuid}}", + { + insert_value("id", _instance_id), + insert_value("nid", node_id), + insert_value("pid", this_pid), + insert_value("target_arch", std::string_view{"GPU"}), + insert_value("agent_id", agent->node_id), + insert_value("name", _name, allow_empty_string{}), + insert_value("symbol", _instance_name, allow_empty_string{}), + insert_value("qualifier", _qualifier, allow_empty_string{}), + insert_value("description", _description, allow_empty_string{}), + insert_value("component", std::string_view{"rocm"}), + insert_value("value_type", std::string_view{"ABS"}), + insert_value("block", _block, allow_empty_string{}), + insert_value("expression", _expression, allow_empty_string{}), + insert_value("is_constant", aitr.is_constant), + insert_value("is_derived", aitr.is_derived), + }); + + execute_raw_sql_statements(conn, _dim_stmt); + } } } }; - auto insert_kernel_dispatch_data = [&, node_id, this_pid](auto& dispatch_evt_ids) { + auto insert_kernel_dispatch_data = [&conn, + &tool_metadata, + &category_ids, + &string_entries, + &kernel_dispatch_gen, + &counter_collection_gen, + node_id, + this_ppid, + this_pid](auto& dispatch_evt_ids) { auto _sqlgenperf_rocpd = get_simple_timer("rocpd_kernel_dispatch"); auto process_dispatch = [&](uint64_t dispatch_id, uint64_t kernel_id, const auto& corr_id, const auto& info, - const auto& kind, + auto kind, uint32_t thread_id, uint64_t queue_id, uint64_t stream_id, @@ -995,7 +1046,7 @@ write_rocpd( auto evt_id = create_event(conn, { - insert_value("category_id", string_entries.at(kind)), + insert_value("category_id", category_ids.at(kind)), insert_value("stack_id", corr_id.internal), insert_value("parent_stack_id", corr_id.internal), insert_value("correlation_id", corr_id.external.value), @@ -1025,23 +1076,33 @@ write_rocpd( ? tool_metadata.get_kernel_name(kernel_id, corr_id.external.value) : std::string_view{}; - auto agent_node_id = tool_metadata.get_agent(info.agent_id)->node_id; + auto track_id = + get_track_id(conn, + track_data{ + .nid = node_id, + .ppid = this_ppid, + .pid = this_pid, + .tid = thread_id, + .agent_id = tool_metadata.get_agent(info.agent_id)->node_id, + .queue_id = queue_id, + .stream_id = stream_id, + .name_id = std::nullopt, + .extdata = std::nullopt, + }); + + auto start_id = get_timestamp_id(conn, start_timestamp, phase_enter, track_id); + auto end_id = get_timestamp_id(conn, end_timestamp, phase_exit, track_id); // Insert into kernel dispatch table auto stmt = get_insert_statement( "rocpd_kernel_dispatch{{uuid}}", { insert_value("id", dispatch_id), - insert_value("nid", node_id), - insert_value("pid", this_pid), - insert_value("tid", thread_id), - insert_value("agent_id", agent_node_id), + insert_value("track_id", track_id), insert_value("kernel_id", kernel_id), insert_value("dispatch_id", dispatch_id), - insert_value("queue_id", queue_id), - insert_value("stream_id", stream_id), - insert_value("start", start_timestamp), - insert_value("end", end_timestamp), + insert_value("start_id", start_id), + insert_value("end_id", end_id), insert_value("private_segment_size", info.private_segment_size), insert_value("group_segment_size", info.group_segment_size), insert_value("workgroup_size_x", workgroup.x), @@ -1071,8 +1132,7 @@ write_rocpd( get_thread_id(record.thread_id); // Use buffer category for kernel dispatches - auto kind = - tool_metadata.buffer_names.at(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH); + auto kind = ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH; // Process this dispatch process_dispatch(info.dispatch_id, // dispatch_id @@ -1107,7 +1167,7 @@ write_rocpd( itr.dispatch_info.kernel_id, // kernel_id itr.correlation_id, // corr_id itr.dispatch_info, // info - tool_metadata.buffer_names.at(itr.kind), // kind + itr.kind, // kind itr.thread_id, // thread_id get_queue_id(itr.dispatch_info.queue_id), // queue_id get_stream_id(itr.stream_id), // stream_id @@ -1122,27 +1182,33 @@ write_rocpd( } }; - auto insert_pmc_event_data = [&conn, &tool_metadata, &counter_collection_gen]( - auto& dispatch_evt_ids) { - auto _sqlgenperf_rocpd = get_simple_timer("rocpd_pmc_event"); - size_t idx = tool_metadata.pmc_event_offset; + auto insert_pmc_event_data = [&conn, &counter_collection_gen](auto& dispatch_evt_ids) { + auto _sqlgenperf_rocpd = get_simple_timer("rocpd_pmc_event"); for(auto ditr : counter_collection_gen) { auto _deferred = sql::deferred_transaction{conn}; - for(const auto& record : counter_collection_gen.get(ditr)) + for(const auto& itr : counter_collection_gen.get(ditr)) { - const auto& info = record.dispatch_data.dispatch_info; + const auto& info = itr.dispatch_data.dispatch_info; auto dispatch_id = info.dispatch_id; + auto has_evt_id = (dispatch_id < dispatch_evt_ids.size()); + + if(!has_evt_id) + { + ROCP_CI_LOG(WARNING) << fmt::format( + "dispatch counter collection is missing event id for dispatch_id={}", + dispatch_id); + continue; + } auto evt_id = dispatch_evt_ids.at(dispatch_id); - for(const auto& count : record.read()) + for(const auto& ritr : itr.read()) { auto stmt = get_insert_statement("rocpd_pmc_event{{uuid}}", { - insert_value("id", idx++), insert_value("event_id", evt_id), - insert_value("pmc_id", count.id.handle), - insert_value("value", count.value), + insert_value("pmc_id", ritr.instance_id), + insert_value("value", ritr.value), }); execute_raw_sql_statements(conn, stmt); @@ -1152,7 +1218,8 @@ write_rocpd( }; auto insert_memory_copy_data = - [&conn, &tool_metadata, &string_entries, node_id, this_pid](const auto& _gen) { + [&conn, &tool_metadata, &category_ids, &string_entries, node_id, this_ppid, this_pid]( + const auto& _gen) { auto _sqlgenperf_rocpd = get_simple_timer("rocpd_memory_copy"); size_t copy_idx = 1; @@ -1164,27 +1231,42 @@ write_rocpd( // insert thread info if it doesn't already exist get_thread_id(itr.thread_id); - auto kind = tool_metadata.buffer_names.at(itr.kind); auto name = tool_metadata.buffer_names.at(itr.kind, itr.operation); auto evt_id = create_event( conn, { - insert_value("category_id", string_entries.at(kind)), + insert_value("category_id", category_ids.at(itr.kind)), insert_value("stack_id", itr.correlation_id.internal), insert_value("parent_stack_id", itr.correlation_id.internal), insert_value("correlation_id", itr.correlation_id.external.value), }); + auto track_id = get_track_id( + conn, + track_data{ + .nid = node_id, + .ppid = this_ppid, + .pid = this_pid, + .tid = itr.thread_id, + .agent_id = tool_metadata.get_agent(itr.dst_agent_id)->node_id, + .queue_id = std::nullopt, + .stream_id = get_stream_id(itr.stream_id), + .name_id = std::nullopt, + .extdata = std::nullopt, + }); + + auto start_id = + get_timestamp_id(conn, itr.start_timestamp, phase_enter, track_id); + auto end_id = get_timestamp_id(conn, itr.end_timestamp, phase_exit, track_id); + auto stmt = get_insert_statement( "rocpd_memory_copy{{uuid}}", { insert_value("id", copy_idx++), - insert_value("nid", node_id), - insert_value("pid", this_pid), - insert_value("tid", itr.thread_id), - insert_value("start", itr.start_timestamp), - insert_value("end", itr.end_timestamp), + insert_value("track_id", track_id), + insert_value("start_id", start_id), + insert_value("end_id", end_id), insert_value("name_id", string_entries.at(name)), insert_value("dst_agent_id", tool_metadata.get_agent(itr.dst_agent_id)->node_id), @@ -1193,7 +1275,6 @@ write_rocpd( insert_value("dst_address", itr.dst_address.value), insert_value("src_address", itr.src_address.value), insert_value("size", itr.bytes), - insert_value("stream_id", get_stream_id(itr.stream_id)), insert_value("event_id", evt_id), }); @@ -1203,7 +1284,8 @@ write_rocpd( }; auto insert_memory_alloc_data = - [&conn, &tool_metadata, &string_entries, node_id, this_pid](const auto& _gen) { + [&conn, &tool_metadata, &category_ids, &string_entries, node_id, this_ppid, this_pid]( + const auto& _gen) { for(auto pitr : _gen) { auto _deferred = sql::deferred_transaction{conn}; @@ -1212,60 +1294,150 @@ write_rocpd( // insert thread info if it doesn't already exist get_thread_id(itr.thread_id); - auto _kind = tool_metadata.buffer_names.at(itr.kind); - auto _cpptype = tool_metadata.buffer_names.at(itr.kind, itr.operation); - auto [_type, _level] = memtype_to_db(_cpptype); + auto _kind = tool_metadata.buffer_names.at(itr.kind); + auto _name = tool_metadata.buffer_names.at(itr.kind, itr.operation); - ROCP_FATAL_IF(_type != "ALLOC" && _type != "FREE" && _type != "RECLAIM" && - _type != "REALLOC") - << "erroneous db type: " << _type; + auto _type = std::string{}; + auto _level = std::string{}; - ROCP_FATAL_IF(_level != "REAL" && _level != "VIRTUAL" && _level != "SCRATCH") - << "erroneous db level: " << _level; + auto _emit_warning = [_kind, _name]() { + ROCP_CI_LOG(WARNING) + << fmt::format("rocpd does not know how to classify memory allocation " + "of kind={} and operation={}", + _kind, + _name); + }; - auto _node_id = std::optional{}; - if(_type == "ALLOC") + if(itr.kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) + { + auto _operation = + static_cast(itr.operation); + if(_operation == ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE) + { + _type = "ALLOC"; + _level = "REAL"; + } + else if(_operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE) + { + _type = "ALLOC"; + _level = "VIRTUAL"; + } + else if(_operation == ROCPROFILER_MEMORY_ALLOCATION_FREE) + { + _type = "FREE"; + _level = "REAL"; + } + else if(_operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_FREE) + { + _type = "FREE"; + _level = "VIRTUAL"; + } + else + { + _emit_warning(); + continue; + } + } + else if(itr.kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY) { - _node_id = tool_metadata.get_agent(itr.agent_id)->node_id; + auto _operation = + static_cast(itr.operation); + if(_operation == ROCPROFILER_SCRATCH_MEMORY_ALLOC) + { + _type = "ALLOC"; + _level = "SCRATCH"; + } + else if(_operation == ROCPROFILER_SCRATCH_MEMORY_FREE) + { + _type = "FREE"; + _level = "SCRATCH"; + } + else if(_operation == ROCPROFILER_SCRATCH_MEMORY_ASYNC_RECLAIM) + { + _type = "RECLAIM"; + _level = "SCRATCH"; + } + else + { + _emit_warning(); + continue; + } + } + else + { + _emit_warning(); + continue; } - auto _stream_id = get_stream_id(extract_stream_field(itr)); - auto _queue_id = get_queue_id(extract_queue_field(itr)); - auto _address = extract_address_field(itr); + auto _stream_id = + get_stream_id(extract_stream_field(itr).value_or(null_stream_id)); + auto _queue_id = get_queue_id(extract_queue_field(itr).value_or(null_queue_id)); + auto _address = extract_address_field(itr); auto _allocation_size = extract_allocation_size_field(itr); + auto _flags = extract_flags_field(itr); + auto _node_id = std::optional{}; + auto _address_value = (_address) ? std::make_optional(_address->value) + : std::optional{}; + auto _extdata = (_flags) + ? std::make_optional(get_json_string([&_flags](auto& ar) { + ar(cereal::make_nvp("flags", *_flags)); + })) + : std::optional{}; + + if(itr.agent_id != null_agent_id) + { + if(const auto* _agent = tool_metadata.get_agent(itr.agent_id); _agent) + _node_id = _agent->node_id; + else + { + ROCP_CI_LOG(ERROR) + << fmt::format("nullptr to rocprofiler_agent_id_t{}.handle={}{}", + '{', + itr.agent_id.handle, + '}'); + } + } auto evt_id = create_event( conn, { - insert_value("category_id", string_entries.at(_kind)), + insert_value("category_id", category_ids.at(itr.kind)), insert_value("stack_id", itr.correlation_id.internal), insert_value("parent_stack_id", itr.correlation_id.ancestor), insert_value("correlation_id", itr.correlation_id.external.value), }); - auto flags = extract_flags_field(itr); + auto track_id = get_track_id(conn, + track_data{ + .nid = node_id, + .ppid = this_ppid, + .pid = this_pid, + .tid = itr.thread_id, + .agent_id = _node_id, + .queue_id = _queue_id, + .stream_id = _stream_id, + .name_id = std::nullopt, + .extdata = std::nullopt, + }); - auto stmt = get_insert_statement( - "rocpd_memory_allocate{{uuid}}", - { - insert_value("nid", node_id), - insert_value("pid", this_pid), - insert_value("tid", itr.thread_id), - insert_value("start", itr.start_timestamp), - insert_value("end", itr.end_timestamp), - insert_value("agent_id", _node_id), - insert_value("type", _type), - insert_value("level", _level), - insert_value("queue_id", _queue_id), - insert_value("stream_id", _stream_id), - insert_value("event_id", evt_id), - insert_value("address", _address.value), - insert_value("size", _allocation_size), - insert_value("extdata", - flags == "" ? "{}" : get_json_string([&flags](auto& ar) { - ar(cereal::make_nvp("flags", flags)); - })), - }); + auto start_id = + get_timestamp_id(conn, itr.start_timestamp, phase_enter, track_id); + auto end_id = get_timestamp_id(conn, itr.end_timestamp, phase_exit, track_id); + + auto stmt = + get_insert_statement("rocpd_memory_allocate{{uuid}}", + { + insert_value("track_id", track_id), + insert_value("start_id", start_id), + insert_value("end_id", end_id), + insert_value("name_id", string_entries.at(_name)), + insert_value("type", _type), + insert_value("level", _level), + insert_value("event_id", evt_id), + insert_value("address", _address_value), + insert_value("size", _allocation_size), + insert_value("extdata", _extdata), + }); execute_raw_sql_statements(conn, stmt); } @@ -1273,123 +1445,121 @@ write_rocpd( }; // new string entries argument types and names can be added to _metadata - auto insert_api_data = [&conn, &tool_metadata, &string_entries, node_id, this_pid]( - const auto& _gen) { - for(auto pitr : _gen) - { - auto _deferred = sql::deferred_transaction{conn}; - - for(auto itr : _gen.get(pitr)) + auto insert_api_data = + [&conn, &tool_metadata, &category_ids, &string_entries, node_id, this_ppid, this_pid]( + const auto& _gen) { + for(auto pitr : _gen) { - auto category = tool_metadata.buffer_names.at(itr.kind); - auto name = tool_metadata.buffer_names.at(itr.kind, itr.operation); + auto _deferred = sql::deferred_transaction{conn}; - auto msg = std::string{"{}"}; - if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API) + for(auto itr : _gen.get(pitr)) { - if(static_cast(itr.operation) != - ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId) + auto name = tool_metadata.buffer_names.at(itr.kind, itr.operation); + + auto msg = std::string{"{}"}; + if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API) { - // check generatePerfetto.cpp and generateOTF2.cpp, and the marker name in - // the view - auto message = - tool_metadata.get_marker_message(itr.correlation_id.internal); - if(!message.empty()) + if(static_cast(itr.operation) != + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId) + { + // check generatePerfetto.cpp and generateOTF2.cpp, and the marker name + // in the view + auto message = + tool_metadata.get_marker_message(itr.correlation_id.internal); + if(!message.empty()) + { + msg = get_json_string( + [](auto& ar, std::string_view _msg) { + ar(cereal::make_nvp("message", std::string{_msg})); + }, + name); + name = message; + } + } + else { msg = get_json_string( [](auto& ar, std::string_view _msg) { ar(cereal::make_nvp("message", std::string{_msg})); }, - message); + name); } } - else + + auto args = function_args_t{}; { - msg = get_json_string( - [](auto& ar, std::string_view _msg) { - ar(cereal::make_nvp("message", std::string{_msg})); - }, - name); - } - } + auto _record = rocprofiler_record_header_t{ + .hash = rocprofiler_record_header_compute_hash( + ROCPROFILER_BUFFER_CATEGORY_TRACING, itr.kind), + .payload = &itr}; - auto args = function_args_t{}; - { - auto _record = rocprofiler_record_header_t{ - .hash = rocprofiler_record_header_compute_hash( - ROCPROFILER_BUFFER_CATEGORY_TRACING, itr.kind), - .payload = &itr}; + rocprofiler_iterate_buffer_tracing_record_args( + _record, iterate_args_callback, &args); + } - rocprofiler_iterate_buffer_tracing_record_args( - _record, iterate_args_callback, &args); - } + // insert thread info if it doesn't already exist + get_thread_id(itr.thread_id); - // insert thread info if it doesn't already exist - get_thread_id(itr.thread_id); + auto evt_id = create_event( + conn, + { + insert_value("category_id", category_ids.at(itr.kind)), + insert_value("stack_id", itr.correlation_id.internal), + insert_value("parent_stack_id", itr.correlation_id.ancestor), + insert_value("correlation_id", itr.correlation_id.external.value), + insert_value("extdata", msg), + }); - auto evt_id = create_event( - conn, + // insert arguments into rocpd_arg table + for(const auto& arg_info : args) { - insert_value("category_id", string_entries.at(category)), - insert_value("stack_id", itr.correlation_id.internal), - insert_value("parent_stack_id", itr.correlation_id.ancestor), - insert_value("correlation_id", itr.correlation_id.external.value), - insert_value("extdata", msg), - }); - - // insert arguments into rocpd_arg table - for(const auto& arg_info : args) - { - auto demangled_type = common::cxx_demangle(arg_info.arg_type); + auto demangled_type = common::cxx_demangle(arg_info.arg_type); - auto args_stmt = - get_insert_statement("rocpd_arg{{uuid}}", - { - insert_value("event_id", evt_id), - insert_value("position", arg_info.arg_number), - insert_value("type", demangled_type), - insert_value("name", arg_info.arg_name), - insert_value("value", arg_info.arg_value), - }); + auto args_stmt = + get_insert_statement("rocpd_arg{{uuid}}", + { + insert_value("event_id", evt_id), + insert_value("position", arg_info.arg_number), + insert_value("type", demangled_type), + insert_value("name", arg_info.arg_name), + insert_value("value", arg_info.arg_value), + }); - execute_raw_sql_statements(conn, args_stmt); - } + execute_raw_sql_statements(conn, args_stmt); + } + + auto track_id = get_track_id(conn, + track_data{ + .nid = node_id, + .ppid = this_ppid, + .pid = this_pid, + .tid = itr.thread_id, + .agent_id = std::nullopt, + .queue_id = std::nullopt, + .stream_id = std::nullopt, + .name_id = std::nullopt, + .extdata = std::nullopt, + }); + + auto start_id = + get_timestamp_id(conn, itr.start_timestamp, phase_enter, track_id); + auto end_id = get_timestamp_id(conn, itr.end_timestamp, phase_exit, track_id); - if(itr.start_timestamp != itr.end_timestamp) - { auto region_stmt = get_insert_statement("rocpd_region{{uuid}}", { insert_value("id", itr.correlation_id.internal), - insert_value("nid", node_id), - insert_value("pid", this_pid), - insert_value("tid", itr.thread_id), - insert_value("start", itr.start_timestamp), - insert_value("end", itr.end_timestamp), + insert_value("track_id", track_id), insert_value("name_id", string_entries.at(name)), + insert_value("start_id", start_id), + insert_value("end_id", end_id), insert_value("event_id", evt_id), }); execute_raw_sql_statements(conn, region_stmt); } - else - { - auto track_id = get_track_id( - conn, node_id, this_pid, itr.thread_id, string_entries.at(category), "{}"); - auto sample_stmt = - get_insert_statement("rocpd_sample{{uuid}}", - { - insert_value("id", itr.correlation_id.internal), - insert_value("track_id", track_id), - insert_value("timestamp", itr.start_timestamp), - insert_value("event_id", evt_id), - }); - - execute_raw_sql_statements(conn, sample_stmt); - } } - } - }; + }; auto dispatch_to_evt_id = common::container::stable_vector{}; @@ -1399,6 +1569,7 @@ write_rocpd( get_stream_id(rocprofiler_stream_id_t{.handle = 0}); get_queue_id(rocprofiler_queue_id_t{.handle = 0}); + insert_category_data(category_ids); insert_agent_data(); insert_pmc_data(); insert_kernel_code_object_data(); diff --git a/source/lib/output/generateRocpd.hpp b/source/lib/output/generateRocpd.hpp index 1ed8fac9d..125e4ea57 100644 --- a/source/lib/output/generateRocpd.hpp +++ b/source/lib/output/generateRocpd.hpp @@ -28,8 +28,14 @@ #include "output_config.hpp" #include "stream_info.hpp" +#include + #include #include +#include +#include +#include +#include namespace rocprofiler { @@ -60,16 +66,36 @@ struct argument_info std::string arg_value = {}; }; +struct sql_insert_value +{ + std::string_view name = {}; + std::string value = {}; +}; + struct track_data { - uint64_t node_id = 0; - pid_t pid = 0; - pid_t tid = 0; - uint64_t name_id = 0; + uint64_t nid = 0; + std::optional ppid = {}; + std::optional pid = {}; + std::optional tid = {}; + std::optional agent_id = {}; + std::optional queue_id = {}; + std::optional stream_id = {}; + std::optional name_id = {}; + std::optional extdata = {}; - size_t hash() const; + size_t hash() const; + decltype(auto) tie() const; + + std::vector get_insert_values() const; }; +inline decltype(auto) +track_data::tie() const +{ + return std::tie(nid, ppid, pid, tid, agent_id, queue_id, stream_id); +} + bool operator==(const track_data& lhs, const track_data& rhs); } // namespace tool diff --git a/source/lib/output/metadata.cpp b/source/lib/output/metadata.cpp index e7ec5cebc..2cd72a050 100644 --- a/source/lib/output/metadata.cpp +++ b/source/lib/output/metadata.cpp @@ -115,9 +115,10 @@ process_agent_counters(rocprofiler_agent_id_t agent_id, for(size_t i = 0; i < num_counters; ++i) { - auto _info = rocprofiler_counter_info_v1_t{}; - auto _dim_ids = std::vector{}; - auto _dim_info = std::vector{}; + auto _info = rocprofiler_counter_info_v1_t{}; + auto _dim_ids = std::vector{}; + auto _dim_info = std::vector{}; + auto _dim_instances = std::vector{}; ROCPROFILER_CHECK(rocprofiler_query_counter_info( counters[i], ROCPROFILER_COUNTER_INFO_VERSION_1, &_info)); @@ -140,8 +141,25 @@ process_agent_counters(rocprofiler_agent_id_t agent_id, _dim_info.emplace_back(*_info.dimensions[j]); } - agent_counter_info_data->at(id).emplace_back( - id, _info, std::move(_dim_ids), std::move(_dim_info)); + for(uint64_t j = 0; j < _info.dimensions_instances_count; ++j) + { + if(_info.dimensions_instances[j] == nullptr) + { + ROCP_WARNING << fmt::format( + "nullptr dimension instance encountered for counter '{}' at index {}", + _info.name, + j); + continue; + } + + _dim_instances.emplace_back(*_info.dimensions_instances[j]); + } + + agent_counter_info_data->at(id).emplace_back(id, + _info, + std::move(_dim_ids), + std::move(_dim_info), + std::move(_dim_instances)); } return ROCPROFILER_STATUS_SUCCESS; diff --git a/source/lib/output/metadata.hpp b/source/lib/output/metadata.hpp index 300b4324c..7bc0460d7 100644 --- a/source/lib/output/metadata.hpp +++ b/source/lib/output/metadata.hpp @@ -170,9 +170,6 @@ struct metadata node_info node_data = {}; std::vector command_line = {}; - // PMC event ids start at this number - uint64_t pmc_event_offset = 1; - metadata() = default; metadata(inprocess); diff --git a/source/lib/output/output_config.cpp b/source/lib/output/output_config.cpp index 806337fe9..9b9dbb241 100644 --- a/source/lib/output/output_config.cpp +++ b/source/lib/output/output_config.cpp @@ -63,6 +63,8 @@ output_config::parse_env() tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory); kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false); group_by_queue = common::get_env("ROCPROF_GROUP_BY_QUEUE", false); + annotate_args = common::get_env("ROCPROF_ANNOTATE_ARGS", false); + annotate_pmc = common::get_env("ROCPROF_ANNOTATE_PMC", false); auto to_upper = [](std::string val) { for(auto& vitr : val) vitr = toupper(vitr); diff --git a/source/lib/output/output_config.hpp b/source/lib/output/output_config.hpp index 4284f9bdf..e7e44bc5e 100644 --- a/source/lib/output/output_config.hpp +++ b/source/lib/output/output_config.hpp @@ -71,6 +71,8 @@ struct output_config bool summary_output = false; bool kernel_rename = false; bool group_by_queue = false; + bool annotate_args = false; + bool annotate_pmc = false; uint64_t stats_summary_unit_value = 1; size_t perfetto_shmem_size_hint = defaults::perfetto_shmem_size_hint_kb; size_t perfetto_buffer_size = defaults::perfetto_buffer_size_kb; @@ -133,6 +135,8 @@ output_config::save(ArchiveT& ar) const CFG_SERIALIZE_MEMBER(rocpd_output); CFG_SERIALIZE_MEMBER(kernel_rename); CFG_SERIALIZE_MEMBER(group_by_queue); + CFG_SERIALIZE_MEMBER(annotate_args); + CFG_SERIALIZE_MEMBER(annotate_pmc); #undef CFG_SERIALIZE_MEMBER #undef CFG_SERIALIZE_NAMED_MEMBER diff --git a/source/lib/output/output_stream.cpp b/source/lib/output/output_stream.cpp index 7c1307e5e..26adb35ec 100644 --- a/source/lib/output/output_stream.cpp +++ b/source/lib/output/output_stream.cpp @@ -68,6 +68,10 @@ get_output_filename(const output_config& cfg, std::string_view fname, std::strin fs::create_directories(output_path); } + if(auto extpos = fname.rfind(_ext); + extpos < fname.size() && extpos + _ext.size() == fname.size()) + fname = fname.substr(0, extpos); + auto _ofname = tool::format_path(output_path / fmt::format("{}_{}{}", output_prefix, fname, _ext)); @@ -95,11 +99,11 @@ get_output_stream(const output_config& cfg, auto cfg_output_path = tool::format_path(cfg.output_path); if(stdout_names.count(cfg_output_path) > 0 || stdout_names.count(fname) > 0) - return {&std::cout, [](auto*&) {}}; + return {"stdout", &std::cout, [](auto*&) {}}; else if(stderr_names.count(cfg_output_path) > 0 || stderr_names.count(fname) > 0) - return {&std::cout, [](auto*&) {}}; + return {"stderr", &std::cerr, [](auto*&) {}}; else if(cfg_output_path.empty() || fname.empty()) - return {&std::clog, [](auto*&) {}}; + return {"stdlog", &std::clog, [](auto*&) {}}; auto output_file = get_output_filename(cfg, fname, ext); auto* _ofs = new(std::nothrow) std::ofstream{output_file, mode}; @@ -110,7 +114,7 @@ get_output_stream(const output_config& cfg, ROCP_ERROR << "Opened result file: " << output_file; - return {_ofs, [](std::ostream*& v) { + return {std::move(output_file), _ofs, [](std::ostream*& v) { if(v) dynamic_cast(v)->close(); delete v; v = nullptr; diff --git a/source/lib/output/output_stream.hpp b/source/lib/output/output_stream.hpp index d5123c81e..07f84d1c0 100644 --- a/source/lib/output/output_stream.hpp +++ b/source/lib/output/output_stream.hpp @@ -49,6 +49,11 @@ struct output_stream : stream{_os} , dtor{_dtor} {} + output_stream(std::string&& _name, std::ostream* _os, ostream_dtor_t _dtor) + : name{std::move(_name)} + , stream{_os} + , dtor{_dtor} + {} ~output_stream() { close(); } output_stream(const output_stream&) = delete; @@ -68,10 +73,18 @@ struct output_stream { if(stream) (*stream) << std::flush; if(dtor) dtor(stream); + stream = nullptr; + } + + bool is_open() const { return (stream != nullptr); } + void flush() const + { + if(stream) stream->flush(); } bool writes_to_file() const { return (dynamic_cast(stream) != nullptr); } + std::string name = {}; std::ostream* stream = nullptr; ostream_dtor_t dtor = nullptr; }; diff --git a/source/lib/output/sql/common.cpp b/source/lib/output/sql/common.cpp index 32587752d..d4cb4439e 100644 --- a/source/lib/output/sql/common.cpp +++ b/source/lib/output/sql/common.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "lib/output/sql/common.hpp" +#include "lib/common/simple_timer.hpp" #include "lib/output/kernel_symbol_info.hpp" #include "lib/common/logging.hpp" @@ -141,6 +142,9 @@ extract_column_name(sqlite3_stmt* stmt, int32_t col) int64_t extract_row_count(sqlite3* conn, std::string_view query) { + auto _rowcnt_perf = common::simple_timer{ + fmt::format("{} for SQL query '{}'", __FUNCTION__, query), ROCP_LOG_LEVEL_TRACE}; + auto _pos = query.find(';'); auto _query = (_pos == std::string_view::npos) ? query : query.substr(0, _pos); auto _count_query = fmt::format("SELECT COUNT(*) AS count FROM ({}) x;", _query); diff --git a/source/lib/output/sql/extract_data_type.hpp b/source/lib/output/sql/extract_data_type.hpp index ff29871e3..00b8100f1 100644 --- a/source/lib/output/sql/extract_data_type.hpp +++ b/source/lib/output/sql/extract_data_type.hpp @@ -59,6 +59,8 @@ struct extract_data_type::value>> { if constexpr(sizeof(Tp) > sizeof(int32_t)) return Tp{sqlite3_column_int64(stmt, col)}; + else if constexpr(sizeof(Tp) < sizeof(int32_t)) + return static_cast(sqlite3_column_int(stmt, col)); else return Tp{sqlite3_column_int(stmt, col)}; } diff --git a/source/lib/python/CMakeLists.txt b/source/lib/python/CMakeLists.txt index 0c0d8755c..c6c3007cb 100644 --- a/source/lib/python/CMakeLists.txt +++ b/source/lib/python/CMakeLists.txt @@ -7,12 +7,5 @@ set(DEFAULT_PYTHON_RPATH "\$ORIGIN:\$ORIGIN/../../..:\$ORIGIN/../../../rocprofil include("${CMAKE_CURRENT_LIST_DIR}/utilities.cmake") -if(NOT DEFINED ROCPROFILER_PYTHON_VERSIONS) - get_default_python_versions(DEFAULT_PYTHON_VERSIONS) - set(ROCPROFILER_PYTHON_VERSIONS - "${DEFAULT_PYTHON_VERSIONS}" - CACHE STRING "") -endif() - add_subdirectory(roctx) add_subdirectory(rocpd) diff --git a/source/lib/python/rocpd/__init__.py b/source/lib/python/rocpd/__init__.py index dc9f79856..acc6d4490 100644 --- a/source/lib/python/rocpd/__init__.py +++ b/source/lib/python/rocpd/__init__.py @@ -32,6 +32,13 @@ except Exception: pass +try: + import sqlite3 + + sqlite3.connect(":memory:") # Test if sqlite3 is available +except Exception: + pass + from . import libpyrocpd from .importer import RocpdImportData diff --git a/source/lib/python/rocpd/__main__.py b/source/lib/python/rocpd/__main__.py index 92f5eaf4e..0b7b72e46 100644 --- a/source/lib/python/rocpd/__main__.py +++ b/source/lib/python/rocpd/__main__.py @@ -34,7 +34,7 @@ def main(argv=None, config=None): """Main entry point for the rocpd command line tool. Args: - argv (list, optional): List of command line arguments. Defaults to None. + argv (list, optional): List of command line options. Defaults to None. """ import argparse @@ -44,6 +44,7 @@ def main(argv=None, config=None): from . import pftrace from . import query from . import summary + from . import filter from . import time_window from . import version_info from .importer import RocpdImportData @@ -100,6 +101,7 @@ def main(argv=None, config=None): prog="rocpd", description="Aggregate and/or analyze ROCm Profiling Data (rocpd)", allow_abbrev=False, + exit_on_error=True, ) parser.add_argument( @@ -109,6 +111,18 @@ def main(argv=None, config=None): help="Print the version information and exit", ) + def add_required_args(_parser): + _required_params = _parser.add_argument_group("Required options") + _required_params.add_argument( + "-i", + "--input", + required=True, + type=output_config.check_file_exists, + nargs="+", + help="Input path and filename to one or more database(s), separated by spaces", + ) + return _required_params + subparsers = parser.add_subparsers(dest="command") converter = subparsers.add_parser( "convert", @@ -137,16 +151,7 @@ def main(argv=None, config=None): def get_output_type(val): return val.lower().replace("perfetto", "pftrace") - # add required options for each subparser - converter_required_params = converter.add_argument_group("Required options") - converter_required_params.add_argument( - "-i", - "--input", - required=True, - type=output_config.check_file_exists, - nargs="+", - help="Input path and filename to one or more database(s)", - ) + converter_required_params = add_required_args(converter) converter_required_params.add_argument( "-f", "--output-format", @@ -158,43 +163,28 @@ def get_output_type(val): required=True, ) - query_required_params = query_reporter.add_argument_group("Required options") - query_required_params.add_argument( - "-i", - "--input", - required=True, - type=output_config.check_file_exists, - nargs="+", - help="Input path and filename to one or more database(s)", - ) - - summary_required_params = generate_summary.add_argument_group("Required options") - summary_required_params.add_argument( - "-i", - "--input", - required=True, - type=output_config.check_file_exists, - nargs="+", - help="Input path and filename to one or more database(s)", - ) + query_required_params = add_required_args(query_reporter) + summary_required_params = add_required_args(generate_summary) # converter: add args from any sub-modules valid_out_config_args = output_config.add_args(converter) - valid_generic_args = output_config.add_generic_args(converter) valid_pftrace_args = pftrace.add_args(converter) valid_csv_args = csv.add_args(converter) valid_otf2_args = otf2.add_args(converter) valid_time_window_args = time_window.add_args(converter) + valid_filter_args = filter.add_args(converter) # query: subparser args valid_out_config_args = output_config.add_args(query_reporter) valid_query_args = query.add_args(query_reporter) valid_time_window_args = time_window.add_args(query_reporter) + valid_filter_args = filter.add_args(query_reporter) # summary: subparser args valid_io_args = summary.add_io_args(generate_summary) valid_summary_args = summary.add_args(generate_summary) valid_time_window_args = time_window.add_args(generate_summary) + valid_filter_args = filter.add_args(generate_summary) # parse the command line arguments args = parser.parse_args(argv) @@ -215,13 +205,11 @@ def get_output_type(val): if args.command == "convert": # process the args out_cfg_args = output_config.process_args(args, valid_out_config_args) - generic_out_cfg_args = output_config.process_generic_args( - args, valid_generic_args - ) pftrace_args = pftrace.process_args(args, valid_pftrace_args) csv_args = csv.process_args(args, valid_csv_args) otf2_args = otf2.process_args(args, valid_otf2_args) window_args = time_window.process_args(args, valid_time_window_args) + filter_args = filter.process_args(args, valid_filter_args) # now start processing the data. Import the data and merge the views importData = RocpdImportData(args.input) @@ -230,9 +218,13 @@ def get_output_type(val): if window_args is not None: time_window.apply_time_window(importData, **window_args) + # apply filtering if requested + if filter_args is not None: + filter.check_args(importData, **filter_args) + filter.apply_filter(importData, **filter_args) + all_args = { **out_cfg_args, - **generic_out_cfg_args, **pftrace_args, **csv_args, **otf2_args, @@ -253,7 +245,7 @@ def get_output_type(val): for out_format in args.output_format: if out_format in format_handlers: - print(f"Converting database(s) to {out_format} format:") + print(f"\nConverting database(s) to {out_format} format...") format_handlers[out_format](importData, config) else: print(f"Warning: Unsupported output format '{out_format}'") @@ -264,13 +256,18 @@ def get_output_type(val): query_args = query.process_args(args, valid_query_args) out_cfg_args = output_config.process_args(args, valid_out_config_args) window_args = time_window.process_args(args, valid_time_window_args) + filter_args = filter.process_args(args, valid_filter_args) - all_args = {**query_args, **out_cfg_args} + all_args = { + **query_args, + **out_cfg_args, + } query.execute( args.input, args, window_args=window_args, + filter_args=filter_args, **all_args, ) @@ -280,6 +277,7 @@ def get_output_type(val): summary_args = summary.process_args(args, valid_summary_args) io_args = output_config.process_args(args, valid_io_args) window_args = time_window.process_args(args, valid_time_window_args) + filter_args = filter.process_args(args, valid_filter_args) # now start processing the data. Import the data and merge the views importData = RocpdImportData(args.input) @@ -288,10 +286,15 @@ def get_output_type(val): if window_args is not None: time_window.apply_time_window(importData, **window_args) + # apply filtering if requested + if filter_args is not None: + filter.check_args(importData, **filter_args) + filter.apply_filter(importData, **filter_args) + all_args = {**summary_args, **io_args} summary.generate_all_summaries(importData, **all_args) - print("Done. Exiting...") + print("Done. Exiting...") if __name__ == "__main__": diff --git a/source/lib/python/rocpd/bindings.py b/source/lib/python/rocpd/bindings.py new file mode 100644 index 000000000..2c0257400 --- /dev/null +++ b/source/lib/python/rocpd/bindings.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + + +__all__ = ["RocpdImportData", "connect"] + +import sqlite3 + +try: + from . import libpyrocpd + + RocpdImportData = libpyrocpd.RocpdImportData + connect = libpyrocpd.connect + +except ImportError: + + class RocpdImportData(sqlite3.Connection): + """Fallback class replicating the interface of libpyrocpd.RocpdImportData.""" + + def __init__(self, input, dbname=":memory:"): + pass + + connect = sqlite3.connect diff --git a/source/lib/python/rocpd/csv.py b/source/lib/python/rocpd/csv.py index 0c6a5d3d9..42826ddef 100644 --- a/source/lib/python/rocpd/csv.py +++ b/source/lib/python/rocpd/csv.py @@ -25,6 +25,7 @@ from .importer import RocpdImportData from .time_window import apply_time_window +from .filter import apply_filter from . import output_config from . import libpyrocpd @@ -33,11 +34,12 @@ def write_csv(importData, config): return libpyrocpd.write_csv(importData, config) -def execute(input, config=None, window_args=None, **kwargs): +def execute(input, config=None, window_args=None, filter_args=None, **kwargs): importData = RocpdImportData(input) apply_time_window(importData, **window_args) + apply_filter(importData, **filter_args) config = ( output_config.output_config(**kwargs) @@ -65,7 +67,8 @@ def main(argv=None): from .time_window import process_args as process_args_time_window from .output_config import add_args as add_args_output_config from .output_config import process_args as process_args_output_config - from .output_config import add_generic_args, process_generic_args + from .filter import add_args as add_args_filter + from .filter import process_args as process_args_filter parser = argparse.ArgumentParser( description="Convert rocPD to CSV files", @@ -85,24 +88,23 @@ def main(argv=None): ) valid_out_config_args = add_args_output_config(parser) - valid_generic_args = add_generic_args(parser) + valid_filter_args = add_args_filter(parser) valid_time_window_args = add_args_time_window(parser) valid_csv_args = add_args(parser) args = parser.parse_args(argv) out_cfg_args = process_args_output_config(args, valid_out_config_args) - generic_out_cfg_args = process_generic_args(args, valid_generic_args) + filter_args = process_args_filter(args, valid_filter_args) window_args = process_args_time_window(args, valid_time_window_args) csv_args = process_args(args, valid_csv_args) all_args = { **out_cfg_args, - **generic_out_cfg_args, **csv_args, } - execute(args.input, window_args=window_args, **all_args) + execute(args.input, window_args=window_args, filter_args=filter_args, **all_args) if __name__ == "__main__": diff --git a/source/lib/python/rocpd/filter.py b/source/lib/python/rocpd/filter.py new file mode 100644 index 000000000..b83c9b9d4 --- /dev/null +++ b/source/lib/python/rocpd/filter.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +import sys +import sqlite3 +import argparse + +from .importer import RocpdImportData, execute_statement + + +def get_distinct_in_column(connection, table, column, conditions=""): + """Get distinct values from a specific column.""" + return [ + itr[0] + for itr in connection.execute( + f"SELECT DISTINCT({column}) FROM {table} {conditions}" + ).fetchall() + ] + + +def create_view(connection: sqlite3.Connection, view_name: str, query: str) -> None: + """Create or replace a database view.""" + execute_statement(connection, f"DROP VIEW IF EXISTS {view_name}") + execute_statement(connection, query) + connection.commit() + + +def get_column_names(conn: RocpdImportData, table_name: str): + """ + Use SELECT on zero rows and read cursor.description. + """ + cursor = conn.execute(f"SELECT * FROM '{table_name}' LIMIT 0") + return [desc[0] for desc in cursor.description] + + +def apply_filter(connection: RocpdImportData, **kwargs) -> None: + """Apply filtering to create filtered views.""" + + include_category = kwargs.get("include_category", None) + exclude_category = kwargs.get("exclude_category", None) + if include_category is not None or exclude_category is not None: + categories = ( + connection.filters["category"] + if "category" in connection.filters and connection.filters["category"] + else get_distinct_in_column(connection, "rocpd_info_category", "name") + ) + remaining_categories = categories[:] + if include_category is not None: + for itr in include_category: + if itr in categories and itr not in remaining_categories: + remaining_categories.append(itr) + if exclude_category is not None: + for itr in exclude_category: + if itr in remaining_categories: + remaining_categories.remove(itr) + + connection.filters["category"] = remaining_categories + + # Create views for tables with category_id filtered + if "category" in connection.filters and connection.filters["category"]: + category_tables = [] # dedicated table for categories + + # Get all tables that have a category_id column + for itr in connection.table_info.keys(): + if itr.find("rocpd_info_") == 0: + continue + column_names = get_column_names(connection, itr) + if "category_id" in column_names: + category_tables += [itr] + + # Get the distinct category IDs for the specified categories + category_ids = get_distinct_in_column( + connection, + "rocpd_info_category", + "id", + "WHERE name IN ({})".format( + ",".join(f"'{cat}'" for cat in connection.filters.get("category", [])) + ), + ) + + # Create views for each table that has a category_id values in the specified categories + filtered_category_ids = ", ".join([f"{itr}" for itr in category_ids]) + for table_name in category_tables: + dbs = [ + f"{itr} WHERE category_id IN ({filtered_category_ids})" + for itr in connection.table_info[table_name] + ] + table_union = " UNION ALL ".join(dbs) + create_view_schema = f""" + CREATE TEMPORARY VIEW {table_name} AS + {table_union} + """ + create_view(connection, table_name, create_view_schema) + + return connection + + +# +# Command-line interface functions +# +def add_args(parser: argparse.ArgumentParser): + """Add filtering options to existing argument parser.""" + + filter_options = parser.add_argument_group("Filter options") + + # Start time mutually exclusive group + filter_options.add_argument( + "--list-categories", + action="store_true", + help="List all available categories", + ) + filter_options.add_argument( + "--include-category", + type=str, + help="Explicit list of categories to include", + nargs="+", + default=None, + ) + filter_options.add_argument( + "--exclude-category", + type=str, + help="Named marker event to use as window start point", + nargs="+", + default=None, + ) + + return ["list_categories", "include_category", "exclude_category"] + + +def process_args(args, valid_args): + + ret = {} + for itr in valid_args: + if hasattr(args, itr): + val = getattr(args, itr) + if val is not None: + ret[itr] = val + return ret + + +def check_args(connection: RocpdImportData, **kwargs): + """Check if the provided arguments are valid for filtering.""" + + categories = sorted(get_distinct_in_column(connection, "rocpd_info_category", "name")) + for option in ["include_category", "exclude_category"]: + option_args = kwargs.get(option, None) + if option_args is not None: + option_name = "--{}".format(option.replace("_", "-")) + invalid_categories = [itr for itr in option_args if itr not in categories] + if invalid_categories: + raise argparse.ArgumentError( + f"{option_name} must be one of {categories}. Invalid categories: {invalid_categories}" + ) + + if kwargs.get("list_categories", False): + print("Available categories:") + for category in categories: + print(f" - {category}") + sys.exit(0) + + +def execute(input_rpd: str, **kwargs) -> RocpdImportData: + """Execute time window filtering on database file.""" + + importData = RocpdImportData(input_rpd) + + apply_filter(importData, **kwargs) + + return importData + + +def main(argv=None) -> int: + """Main entry point for command line execution.""" + parser = argparse.ArgumentParser( + description="Apply time window filtering to ROCpd database views" + ) + parser.add_argument( + "-i", + "--input", + type=str, + required=True, + help="Path to the input ROCpd database file", + ) + + arg_names = add_args(parser) + args = parser.parse_args(argv) + + execute(args.input, **{arg: getattr(args, arg) for arg in arg_names}) + + +if __name__ == "__main__": + main() diff --git a/source/lib/python/rocpd/importer.py b/source/lib/python/rocpd/importer.py index 5ad4aef1a..7f0b69bdc 100644 --- a/source/lib/python/rocpd/importer.py +++ b/source/lib/python/rocpd/importer.py @@ -38,29 +38,34 @@ class RocpdImportData(libpyrocpd.RocpdImportData): - def __init__(self, input): + def __init__(self, input, dbname=":memory:"): if isinstance(input, RocpdImportData): super(RocpdImportData, self).__init__(input) self.table_info = input.table_info + self.filters = input.filters else: + def internal_init(_input, _output): + _connection = libpyrocpd.connect(_output) + _connection.execute("PRAGMA foreign_keys = ON") + _table_info = _create_temp_views(_connection, _input) + _create_meta_views(_connection) + return (_connection, _input, _table_info) + if isinstance(input, sqlite3.Connection): raise ValueError( "RocpdImportData does not accept existing sqlite3 connections" ) elif isinstance(input, str): - _connection = libpyrocpd.connect(input) - _filenames = [input] + _connection, _filenames, _table_info = internal_init([input], dbname) elif isinstance(input, list) and len(input) > 0 and isinstance(input[0], str): - _connection = libpyrocpd.connect(":memory:") - _filenames = input[:] - _connection.execute("PRAGMA foreign_keys = ON") - self.table_info = _create_temp_views(_connection, input) - _create_meta_views(_connection) + _connection, _filenames, _table_info = internal_init(input, dbname) else: raise ValueError( f"input is unsupported type. Expected sqlite3.Connection, string, or (non-empty) list of strings. type={type(input).__name__}" ) + self.filters = {} + self.table_info = _table_info super(RocpdImportData, self).__init__(_connection, _filenames) def __getattr__(self, name): @@ -163,4 +168,6 @@ def _create_temp_views(connection, input): def _create_meta_views(connection): schema = RocpdSchema() sql_script = schema.views.replace("CREATE VIEW", "CREATE TEMPORARY VIEW") - execute_statement(connection, sql_script, is_script=True) + # for easier debugging + for itr in sql_script.split(";"): + execute_statement(connection, f"{itr};") diff --git a/source/lib/python/rocpd/libpyrocpd.cpp b/source/lib/python/rocpd/libpyrocpd.cpp index 0f6232572..12b90759e 100644 --- a/source/lib/python/rocpd/libpyrocpd.cpp +++ b/source/lib/python/rocpd/libpyrocpd.cpp @@ -75,31 +75,24 @@ template auto read_impl(sqlite3* conn, std::string_view conditions) { - auto query = std::string_view{}; + auto table = std::string_view{}; if constexpr(std::is_same::value) - query = "rocpd_info_node"; + table = "rocpd_info_node"; else if constexpr(std::is_same::value) - query = "processes"; + table = "processes"; else if constexpr(std::is_same::value) - query = "threads"; + table = "threads"; else if constexpr(std::is_same::value) - query = "regions"; + table = "regions"; else if constexpr(std::is_same::value) - query = "kernels"; + table = "kernels"; else if constexpr(std::is_same::value) - query = "rocpd_info_agent"; + table = "rocpd_info_agent"; else static_assert(rocprofiler::sdk::mpl::assert_false::value, "Unsupported read type"); - auto data = std::vector{}; - if(conn) - { - auto ar = cereal::SQLite3InputArchive{ - conn, fmt::format("SELECT * FROM {} {}", query, conditions)}; - cereal::load(ar, data); - } - return data; + return read_sql_query(conn, fmt::format("SELECT * FROM {} {}", table, conditions)); } template @@ -211,9 +204,9 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) .value("rocpd_tables", ROCPD_SQL_SCHEMA_ROCPD_TABLES) .value("rocpd_indexes", ROCPD_SQL_SCHEMA_ROCPD_INDEXES) .value("rocpd_views", ROCPD_SQL_SCHEMA_ROCPD_VIEWS) + .value("rocpd_metadata", ROCPD_SQL_SCHEMA_ROCPD_METADATA) .value("data_views", ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS) - .value("summary_views", ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS) - .value("marker_views", ROCPD_SQL_SCHEMA_ROCPD_MARKER_VIEWS); + .value("summary_views", ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS); py::enum_(pyrocpd, "sql_option", "Load schema options") .value("none", ROCPD_SQL_OPTIONS_NONE) @@ -226,12 +219,19 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) // demo for creating python bindings to a class py::class_(pyrocpd, "agent") + .def_readonly("id", &rocpd::types::agent::id) + .def_readonly("nid", &rocpd::types::agent::nid) + .def_readonly("pid", &rocpd::types::agent::pid) .def_readonly("node_id", &rocpd::types::agent::node_id) - .def_readonly("logical_node_id", &rocpd::types::agent::logical_node_id) + .def_readonly("absolute_index", &rocpd::types::agent::absolute_index) + .def_readonly("logical_index", &rocpd::types::agent::logical_index) + .def_readonly("type_index", &rocpd::types::agent::type_index) .def_readonly("gpu_index", &rocpd::types::agent::gpu_index) .def_readonly("name", &rocpd::types::agent::name) - .def_readonly("user_name", &rocpd::types::agent::user_name) - .def_readonly("product_name", &rocpd::types::agent::product_name); + .def_readonly("generic_name", &rocpd::types::agent::generic_name) + .def_readonly("model_name", &rocpd::types::agent::model_name) + .def_readonly("product_name", &rocpd::types::agent::product_name) + .def_readonly("vendor_name", &rocpd::types::agent::vendor_name); py::class_(pyrocpd, "node") .def(py::init<>()) @@ -285,6 +285,8 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) .def_readwrite("kernel_rename", &tool::output_config::kernel_rename) .def_readwrite("agent_index_value", &tool::output_config::agent_index_value) .def_readwrite("group_by_queue", &tool::output_config::group_by_queue) + .def_readwrite("annotate_args", &tool::output_config::annotate_args) + .def_readwrite("annotate_pmc", &tool::output_config::annotate_pmc) .def_readwrite("perfetto_shmem_size_hint", &tool::output_config::perfetto_shmem_size_hint) .def_readwrite("perfetto_buffer_size", &tool::output_config::perfetto_buffer_size) .def_readwrite("perfetto_backend", &tool::output_config::perfetto_backend) @@ -398,107 +400,77 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) pyrocpd.def( "write_perfetto", [](rocpd::RocpdImportData& data, const tool::output_config& output_cfg) -> bool { - auto _create_agent_index = - [&output_cfg](const rocpd::types::agent& _agent) -> tool::agent_index { - auto ret_index = tool::create_agent_index( - output_cfg.agent_index_value, - _agent.node_id, // absolute index - static_cast(_agent.logical_node_id), // relative index - static_cast(_agent.logical_node_type_id), // type-relative index - std::string_view(_agent.type)); - return ret_index; - }; // ORDER BY expression for kernel dispatches - constexpr auto kernels_order_by = - "agent_abs_index ASC, stream_id ASC, queue_id ASC, start ASC, end DESC"; - - constexpr auto region_order_by = "start ASC, end DESC"; - constexpr auto sample_order_by = "timestamp ASC"; + constexpr auto region_order_by = "start ASC, end ASC"; + constexpr auto sample_order_by = "timestamp ASC"; + constexpr auto kernels_order_by = "stream_id ASC, queue_id ASC, start ASC, end ASC"; + constexpr auto memcpy_order_by = "stream_id ASC, queue_id ASC, start ASC, end ASC"; + constexpr auto memalloc_order_by = "stream_id ASC, queue_id ASC, start ASC, end ASC"; - auto perfetto_session = rocpd::output::PerfettoSession{output_cfg}; - auto sqlgen_perf = common::simple_timer{ + auto sqlgen_perf = common::simple_timer{ fmt::format("Perfetto generation from {} SQL database(s)", data.size())}; - for(auto obj : {data.connection}) + + auto* conn = rocpd::interop::get_connection(std::move(data.connection)); + auto perfetto_session = rocpd::output::PerfettoSession{output_cfg, conn}; + auto nodes = rocpd::read(conn); + for(const auto& nitr : nodes) { - auto* conn = rocpd::interop::get_connection(std::move(obj)); - auto nodes = rocpd::read(conn); + auto agents = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + auto processes = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - for(const auto& nitr : nodes) + for(const auto& pitr : processes) { - auto agents = rocpd::read( - conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - auto processes = rocpd::read( - conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - - for(const auto& pitr : processes) - { - ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) - << fmt::format("Found process with a mismatched nid/guid. process: " - "{}/{} vs. node: {}/{}", - pitr.nid, + ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) + << fmt::format("Found process with a mismatched nid/guid. process: " + "{}/{} vs. node: {}/{}", + pitr.nid, + pitr.guid, + nitr.id, + nitr.guid); + auto select_guid_nid_pid = [&nitr, &pitr](std::string_view tbl) { + return fmt::format("SELECT * FROM {} WHERE guid = '{}' AND nid " + "= {} AND pid = {}", + tbl, pitr.guid, nitr.id, - nitr.guid); - auto select_guid_nid_pid = [&nitr, &pitr](std::string_view tbl) { - return fmt::format("SELECT * FROM {} WHERE guid = '{}' AND nid " - "= {} AND pid = {}", - tbl, - pitr.guid, - nitr.id, - pitr.pid); - }; - - auto _sqlgen_perft = common::simple_timer{fmt::format( - "Perfetto generation from SQL for process {} (total)", pitr.pid)}; - - auto kernels = rocpd::sql_generator{ - conn, select_guid_nid_pid("kernels"), kernels_order_by}; - - auto memory_allocations = - rocpd::sql_generator{ - conn, select_guid_nid_pid("memory_allocations")}; + pitr.pid); + }; - auto memory_copies = rocpd::sql_generator{ - conn, select_guid_nid_pid("memory_copies")}; + auto _sqlgen_perft = common::simple_timer{fmt::format( + "Perfetto generation from SQL for process {} (total)", pitr.pid)}; - auto counters = rocpd::sql_generator{ - conn, select_guid_nid_pid("counters_collection")}; + auto kernels = rocpd::sql_generator{ + conn, select_guid_nid_pid("kernels"), kernels_order_by}; - auto regions = rocpd::sql_generator{ - conn, select_guid_nid_pid("regions"), region_order_by}; + auto memory_allocations = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_allocations"), memalloc_order_by}; - auto samples = rocpd::sql_generator{ - conn, select_guid_nid_pid("samples"), sample_order_by}; + auto memory_copies = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_copies"), memcpy_order_by}; - auto threads = rocpd::sql_generator{ - conn, select_guid_nid_pid("threads")}; + auto regions = rocpd::sql_generator{ + conn, select_guid_nid_pid("regions"), region_order_by}; - // absolute_index |-> (agent, agent_index) - auto agents_map = - std::unordered_map>{}; + auto samples = rocpd::sql_generator{ + conn, select_guid_nid_pid("samples"), sample_order_by}; - for(const auto& itr : agents) - { - auto new_index = _create_agent_index(itr); - agents_map.emplace(itr.absolute_index, std::make_pair(itr, new_index)); - } + auto threads = rocpd::sql_generator{ + conn, select_guid_nid_pid("threads")}; - ROCP_TRACE << "Starting Perfetto generation from SQL for process " - << pitr.pid; - auto _sqlgen_perfw = common::simple_timer{fmt::format( - "Perfetto generation from SQL for process {} (write)", pitr.pid)}; - rocpd::output::write_perfetto(perfetto_session, - pitr, - agents_map, - threads, - regions, - samples, - kernels, - memory_copies, - memory_allocations, - counters); - } + ROCP_TRACE << "Starting Perfetto generation from SQL for process " << pitr.pid; + auto _sqlgen_perfw = common::simple_timer{fmt::format( + "Perfetto generation from SQL for process {} (write)", pitr.pid)}; + rocpd::output::write_perfetto(perfetto_session, + pitr, + agents, + threads, + regions, + samples, + kernels, + memory_copies, + memory_allocations); } } return true; @@ -513,101 +485,40 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) if(data.empty()) return; - auto csv_manager = rocpd::output::CsvManager{output_cfg}; - - for(auto obj : {data.connection}) - { - auto* conn = rocpd::interop::get_connection(std::move(obj)); - auto nodes = rocpd::read(conn); - - for(const auto& nitr : nodes) - { - auto agents = rocpd::read( - conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - auto processes = rocpd::read( - conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - - for(const auto& pitr : processes) - { - ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) - << fmt::format("Found process with a mismatched nid/guid. process: " - "{}/{} vs. node: {}/{}", - pitr.nid, - pitr.guid, - nitr.id, - nitr.guid); - auto _sqlgen_csv = common::simple_timer{fmt::format( - "CSV generation from SQL for process {} (total)", pitr.pid)}; - - auto select_guid_nid_pid = [&nitr, &pitr](std::string_view tbl, - std::string_view - where_extra_condition = {}) { - return fmt::format( - "SELECT * FROM {} WHERE guid = '{}' AND nid = {} AND pid = {} {}", - tbl, - pitr.guid, - nitr.id, - pitr.pid, - where_extra_condition); - }; + auto csv_manager = rocpd::output::CsvManager{output_cfg}; + auto* conn = rocpd::interop::get_connection(std::move(data.connection)); - rocpd::output::write_agent_info_csv(csv_manager, agents); + constexpr auto region_order_by = "start ASC, end ASC"; - constexpr auto region_order_by = "start ASC, end DESC"; + auto select_guid_nid_pid = [](std::string_view tbl, + std::string_view where_extra_condition = {}) { + return fmt::format("SELECT * FROM {} {}", tbl, where_extra_condition); + }; - auto kernels = rocpd::sql_generator{ - conn, select_guid_nid_pid("kernels"), region_order_by}; - auto memory_copies = rocpd::sql_generator{ - conn, select_guid_nid_pid("memory_copies"), region_order_by}; - auto memory_allocations = - rocpd::sql_generator{ - conn, select_guid_nid_pid("memory_allocations"), region_order_by}; - auto hip_api_calls = rocpd::sql_generator{ - conn, - select_guid_nid_pid("regions", "AND category LIKE 'HIP_%'"), - region_order_by}; - auto hsa_api_calls = rocpd::sql_generator{ - conn, - select_guid_nid_pid("regions", "AND category LIKE 'HSA_%'"), - region_order_by}; - auto marker_api_calls = rocpd::sql_generator{ - conn, - select_guid_nid_pid("regions_and_samples", - "AND category LIKE 'MARKER_%'"), - region_order_by}; - auto counters_calls = rocpd::sql_generator{ - conn, select_guid_nid_pid("counters_collection"), region_order_by}; - auto scratch_memory_calls = - rocpd::sql_generator{ - conn, select_guid_nid_pid("scratch_memory"), region_order_by}; - auto rccl_calls = rocpd::sql_generator{ - conn, - select_guid_nid_pid("regions", "AND category LIKE 'RCCL_%'"), - region_order_by}; - auto rocdecode_calls = rocpd::sql_generator{ - conn, - select_guid_nid_pid("regions", "AND category LIKE 'ROCDECODE_%'"), - region_order_by}; - auto rocjpeg_calls = rocpd::sql_generator{ - conn, - select_guid_nid_pid("regions", "AND category LIKE 'ROCJPEG_%'"), - region_order_by}; - - rocpd::output::write_csvs(csv_manager, - kernels, - memory_copies, - memory_allocations, - hip_api_calls, - hsa_api_calls, - marker_api_calls, - counters_calls, - scratch_memory_calls, - rccl_calls, - rocdecode_calls, - rocjpeg_calls); - } - } - } + auto agents = rocpd::read(conn); + auto kernels = rocpd::sql_generator{ + conn, select_guid_nid_pid("kernels"), region_order_by}; + auto memory_copies = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_copies"), region_order_by}; + auto memory_allocations = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_allocations"), region_order_by}; + auto region_api_calls = rocpd::sql_generator{ + conn, select_guid_nid_pid("regions"), region_order_by}; + auto counters_calls = rocpd::sql_generator{ + conn, select_guid_nid_pid("kernel_pmc_events"), region_order_by}; + auto scratch_memory_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("memory_allocations", "WHERE level = 'SCRATCH'"), + region_order_by}; + + rocpd::output::write_csv(csv_manager, + agents, + kernels, + memory_copies, + memory_allocations, + region_api_calls, + counters_calls, + scratch_memory_calls); }, "Write trace data to CSV files"); @@ -621,12 +532,12 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) _agent.node_id, // absolute index static_cast(_agent.logical_node_id), // relative index static_cast(_agent.logical_node_type_id), // type-relative index - std::string_view(_agent.type)); + std::string_view(_agent.type_name)); return ret_index; }; constexpr auto kernels_order_by = - "agent_abs_index ASC, stream_id ASC, queue_id ASC, start ASC, end DESC"; + "agent_absolute_index ASC, stream_id ASC, queue_id ASC, start ASC, end ASC"; // to initialise the OTF@ session properly we need to know: // (1) the process with the earliest start time @@ -722,7 +633,7 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) where_extra_condition); }; - constexpr auto region_order_by = "start ASC, end DESC"; + constexpr auto region_order_by = "start ASC, end ASC"; auto _sqlgen_otf2 = common::simple_timer{fmt::format( "OTF2 generation from SQL for process {} (total)", pitr.pid)}; diff --git a/source/lib/python/rocpd/otf2.py b/source/lib/python/rocpd/otf2.py index 33714e8a0..3c73d9c53 100644 --- a/source/lib/python/rocpd/otf2.py +++ b/source/lib/python/rocpd/otf2.py @@ -25,6 +25,7 @@ from .importer import RocpdImportData from .time_window import apply_time_window +from .filter import apply_filter from . import output_config from . import libpyrocpd @@ -33,11 +34,12 @@ def write_otf2(importData, config): return libpyrocpd.write_otf2(importData, config) -def execute(input, config=None, window_args=None, **kwargs): +def execute(input, config=None, window_args=None, filter_args=None, **kwargs): importData = RocpdImportData(input) apply_time_window(importData, **window_args) + apply_filter(importData, **filter_args) config = ( output_config.output_config(**kwargs) @@ -82,7 +84,8 @@ def main(argv=None): from .time_window import process_args as process_args_time_window from .output_config import add_args as add_args_output_config from .output_config import process_args as process_args_output_config - from .output_config import add_generic_args, process_generic_args + from .filter import add_args as add_args_filter + from .filter import process_args as process_args_filter parser = argparse.ArgumentParser( description="Convert rocPD to OTF2 format", allow_abbrev=False @@ -101,19 +104,22 @@ def main(argv=None): valid_out_config_args = add_args_output_config(parser) valid_otf2_args = add_args(parser) - valid_generic_args = add_generic_args(parser) + valid_filter_args = add_args_filter(parser) valid_time_window_args = add_args_time_window(parser) args = parser.parse_args(argv) out_cfg_args = process_args_output_config(args, valid_out_config_args) - generic_out_cfg_args = process_generic_args(args, valid_generic_args) + filter_args = process_args_filter(args, valid_filter_args) window_args = process_args_time_window(args, valid_time_window_args) otf2_args = process_args(args, valid_otf2_args) - all_args = {**out_cfg_args, **otf2_args, **generic_out_cfg_args} + all_args = { + **out_cfg_args, + **otf2_args, + } - execute(args.input, window_args=window_args, **all_args) + execute(args.input, window_args=window_args, filter_args=filter_args, **all_args) if __name__ == "__main__": diff --git a/source/lib/python/rocpd/output_config.py b/source/lib/python/rocpd/output_config.py index d0877384c..f89404502 100644 --- a/source/lib/python/rocpd/output_config.py +++ b/source/lib/python/rocpd/output_config.py @@ -125,6 +125,18 @@ def add_args(parser): required=False, ) + agent_index_options = parser.add_argument_group("Agent index options") + + agent_index_options.add_argument( + "--agent-index-value", + choices=("absolute", "relative", "type-relative"), + help="""Device identification format in CSV/Perfetto/OTF2 output (default: relative): + absolute: uses node_id (Agent-0, Agent-2, Agent-4) ignoring cgroups restrictions. + relative: uses logical_node_id (Agent-0, Agent-1, Agent-2) considering cgroups restrictions. + type-relative: uses logical_node_type_id (CPU-0, GPU-0, GPU-1) with numbering that resets for each device type.""", + default="relative", + ) + kernel_naming_options = parser.add_argument_group("Kernel naming options") kernel_naming_options.add_argument( @@ -134,7 +146,7 @@ def add_args(parser): default=False, ) - return ["output_file", "output_path", "kernel_rename"] + return ["output_file", "output_path", "agent_index_value", "kernel_rename"] def process_args(args, valid_args): @@ -146,37 +158,7 @@ def process_args(args, valid_args): if itr == "output_format": ret[itr] = val elif itr == "output_path" and val is not None: - ret[itr] = format_path(val) + ret[itr] = os.path.abspath(format_path(val)) elif val is not None: ret[itr] = val return ret - - -def add_generic_args(parser): - """Add generic arguments that apply to multiple output formats.""" - - generic_options = parser.add_argument_group("Generic options") - - generic_options.add_argument( - "--agent-index-value", - choices=("absolute", "relative", "type-relative"), - help="""Device identification format in CSV/Perfetto/OTF2 output (default: relative): - absolute: uses node_id (Agent-0, Agent-2, Agent-4) ignoring cgroups restrictions. - relative: uses logical_node_id (Agent-0, Agent-1, Agent-2) considering cgroups restrictions. - type-relative: uses logical_node_type_id (CPU-0, GPU-0, GPU-1) with numbering that resets for each device type.""", - default="relative", - ) - - return [ - "agent_index_value", - ] - - -def process_generic_args(args, valid_args): - ret = {} - for itr in valid_args: - if hasattr(args, itr): - val = getattr(args, itr) - if val is not None: - ret[itr] = val - return ret diff --git a/source/lib/python/rocpd/pftrace.py b/source/lib/python/rocpd/pftrace.py index a2650062a..2ff8a9c1d 100644 --- a/source/lib/python/rocpd/pftrace.py +++ b/source/lib/python/rocpd/pftrace.py @@ -25,6 +25,7 @@ from .importer import RocpdImportData from .time_window import apply_time_window +from .filter import apply_filter from . import output_config from . import libpyrocpd @@ -33,11 +34,12 @@ def write_pftrace(importData, config): return libpyrocpd.write_perfetto(importData, config) -def execute(input, config=None, window_args=None, **kwargs): +def execute(input, config=None, window_args=None, filter_args=None, **kwargs): importData = RocpdImportData(input) apply_time_window(importData, **window_args) + apply_filter(importData, **filter_args) config = ( output_config.output_config(**kwargs) @@ -90,12 +92,28 @@ def add_args(parser): default=False, ) + pftrace_options.add_argument( + "--annotate-args", + help="Add the function arguments (when available) to the Perfetto debug annotations", + action="store_true", + default=False, + ) + + pftrace_options.add_argument( + "--annotate-pmc", + help="Add the function PMC values (when available) to the Perfetto debug annotations", + action="store_true", + default=False, + ) + return [ "perfetto_backend", "perfetto_buffer_fill_policy", "perfetto_buffer_size", "perfetto_shmem_size_hint", "group_by_queue", + "annotate_args", + "annotate_pmc", ] @@ -116,7 +134,8 @@ def main(argv=None): from .time_window import process_args as process_args_time_window from .output_config import add_args as add_args_output_config from .output_config import process_args as process_args_output_config - from .output_config import add_generic_args, process_generic_args + from .filter import add_args as add_args_filter + from .filter import process_args as process_args_filter parser = argparse.ArgumentParser( description="Convert rocPD to Perfetto file", allow_abbrev=False @@ -135,25 +154,25 @@ def main(argv=None): valid_out_config_args = add_args_output_config(parser) valid_pftrace_args = add_args(parser) - valid_generic_args = add_generic_args(parser) + valid_filter_args = add_args_filter(parser) valid_time_window_args = add_args_time_window(parser) args = parser.parse_args(argv) out_cfg_args = process_args_output_config(args, valid_out_config_args) pftrace_args = process_args(args, valid_pftrace_args) - generic_out_cfg_args = process_generic_args(args, valid_generic_args) + filter_args = process_args_filter(args, valid_filter_args) window_args = process_args_time_window(args, valid_time_window_args) all_args = { **pftrace_args, **out_cfg_args, - **generic_out_cfg_args, } execute( args.input, window_args=window_args, + filter_args=filter_args, **all_args, ) diff --git a/source/lib/python/rocpd/query.py b/source/lib/python/rocpd/query.py index ab369326d..10fd5360e 100644 --- a/source/lib/python/rocpd/query.py +++ b/source/lib/python/rocpd/query.py @@ -33,6 +33,7 @@ from . import libpyrocpd from .importer import RocpdImportData from .time_window import apply_time_window +from .filter import apply_filter def export_sqlite_query( @@ -42,6 +43,7 @@ def export_sqlite_query( export_format: Optional[str] = None, export_path: Optional[str] = None, dashboard_template_path: Optional[str] = None, + **kwargs: Optional[dict], ) -> Optional[str]: """ Execute a SQLite query and print it to console. @@ -99,7 +101,21 @@ def write_export(content): # 3) Export based on format if export_format == "csv": - df.to_csv(export_path, index=False) + import csv + + cols = [f"{itr}" for itr in df.columns.tolist()] + col_names = ( + [f"{itr}".title() for itr in cols] + if kwargs.get("title_columns", True) + else cols[:] + ) + df.to_csv( + export_path, + index=False, + columns=cols, + header=col_names, + quoting=csv.QUOTE_NONNUMERIC, + ) elif export_format == "html": write_export(df.to_html(index=False)) @@ -447,11 +463,12 @@ def process_args(args, valid_args): return ret -def execute(input, args, config=None, window_args=None, **kwargs): +def execute(input, args, config=None, window_args=None, filter_args=None, **kwargs): importData = RocpdImportData(input) apply_time_window(importData, **window_args) + apply_filter(importData, **filter_args) config = ( output_config.output_config(**kwargs) @@ -513,7 +530,8 @@ def main(argv=None): from .time_window import process_args as process_args_time_window from .output_config import add_args as add_args_output_config from .output_config import process_args as process_args_output_config - from .output_config import add_generic_args, process_generic_args + from .filter import add_args as add_args_filter + from .filter import process_args as process_args_filter parser = argparse.ArgumentParser( description="Generate report for rocpd query", allow_abbrev=False @@ -531,27 +549,27 @@ def main(argv=None): ) valid_out_config_args = add_args_output_config(parser) - valid_generic_args = add_generic_args(parser) + valid_filter_args = add_args_filter(parser) valid_time_window_args = add_args_time_window(parser) valid_query_args = add_args(parser) args = parser.parse_args(argv) out_cfg_args = process_args_output_config(args, valid_out_config_args) - generic_out_cfg_args = process_generic_args(args, valid_generic_args) + filter_args = process_args_filter(args, valid_filter_args) window_args = process_args_time_window(args, valid_time_window_args) query_args = process_args(args, valid_query_args) all_args = { **query_args, **out_cfg_args, - **generic_out_cfg_args, } execute( args.input, args, window_args=window_args, + filter_args=filter_args, **all_args, ) diff --git a/source/lib/python/rocpd/schema.py b/source/lib/python/rocpd/schema.py index fe13e7836..76154e38b 100644 --- a/source/lib/python/rocpd/schema.py +++ b/source/lib/python/rocpd/schema.py @@ -56,8 +56,15 @@ def __init__(self, uuid="", guid=""): variables, ) + self.metadata = RocpdSchema.load_schema( + libpyrocpd.sql_engine.sqlite3, + libpyrocpd.sql_schema.rocpd_metadata, + libpyrocpd.sql_option.none, + variables, + ) + _views = [] - for itr in ["rocpd", "data", "summary", "marker"]: + for itr in ["rocpd", "data", "summary"]: _views += [ RocpdSchema.load_schema( libpyrocpd.sql_engine.sqlite3, diff --git a/source/lib/python/rocpd/source/CMakeLists.txt b/source/lib/python/rocpd/source/CMakeLists.txt index 99a6927b4..df52cb349 100644 --- a/source/lib/python/rocpd/source/CMakeLists.txt +++ b/source/lib/python/rocpd/source/CMakeLists.txt @@ -2,24 +2,23 @@ # libpyrocpd python binding sources # -set(libpyrocpd_source_headers - common.hpp - functions.hpp - interop.hpp - perfetto.hpp - csv.hpp - otf2.hpp - sql_generator.hpp - pysqlite_Connection.h - types.hpp) +set(libpyrocpd_source_object_headers common.hpp functions.hpp perfetto.hpp csv.hpp + otf2.hpp sql_generator.hpp types.hpp) -set(libpyrocpd_source_sources csv.cpp functions.cpp interop.cpp otf2.cpp perfetto.cpp - types.cpp) +set(libpyrocpd_source_object_sources csv.cpp functions.cpp otf2.cpp perfetto.cpp + types.cpp) + +rocprofiler_rocpd_python_bindings_object_sources( + PRIVATE ${libpyrocpd_source_object_headers} ${libpyrocpd_source_object_sources}) + +set(libpyrocpd_source_target_headers interop.hpp pysqlite_Connection.h) + +set(libpyrocpd_source_target_sources interop.cpp) foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS}) rocprofiler_rocpd_python_bindings_target_sources( - ${_PYTHON_VERSION} PRIVATE ${libpyrocpd_source_sources} - ${libpyrocpd_source_headers}) + ${_PYTHON_VERSION} PRIVATE ${libpyrocpd_source_target_sources} + ${libpyrocpd_source_target_headers}) endforeach() add_subdirectory(serialization) diff --git a/source/lib/python/rocpd/source/csv.cpp b/source/lib/python/rocpd/source/csv.cpp index 722edcbc2..c473a1029 100644 --- a/source/lib/python/rocpd/source/csv.cpp +++ b/source/lib/python/rocpd/source/csv.cpp @@ -24,7 +24,9 @@ #include "lib/common/defines.hpp" #include "lib/common/hasher.hpp" +#include "lib/common/logging.hpp" #include "lib/common/mpl.hpp" +#include "lib/common/simple_timer.hpp" #include "lib/output/csv.hpp" #include "lib/output/csv_output_file.hpp" #include "lib/output/generator.hpp" @@ -48,8 +50,6 @@ #include #include -namespace fs = std::filesystem; - namespace { const std::string STATS_HEADER = "\"Name\",\"Calls\",\"TotalDurationNs\"," @@ -66,12 +66,6 @@ namespace output CsvManager::CsvManager(rocprofiler::tool::output_config output_cfg) : config{std::move(output_cfg)} { - if(!ensure_output_directory()) - { - ROCP_ERROR << "Failed to create csv output directory: " << config.output_path; - return; - } - this->csv_configs = { {CsvType::KERNEL_DISPATCH, {"kernel_trace.csv", @@ -81,31 +75,32 @@ CsvManager::CsvManager(rocprofiler::tool::output_config output_cfg) "\"Workgroup_Size_X\",\"Workgroup_Size_Y\",\"Workgroup_Size_Z\"," "\"Grid_Size_X\",\"Grid_Size_Y\",\"Grid_Size_Z\""}}, {CsvType::MEMORY_COPY, - {"memory_copy_trace.csv", + {"memory_copy_trace", "\"Guid\",\"Kind\",\"Direction\",\"Stream_Id\",\"Source_Agent_Id\"," "\"Destination_Agent_" "Id\"," "\"Correlation_Id\",\"Start_Timestamp\",\"End_Timestamp\""}}, {CsvType::MEMORY_ALLOCATION, - {"memory_allocation_trace.csv", + {"memory_allocation_trace", "\"Guid\",\"Kind\",\"Operation\",\"Agent_Id\",\"Allocation_Size\"," "\"Address\"," "\"Correlation_Id\",\"Start_Timestamp\",\"End_Timestamp\""}}, {CsvType::SCRATCH_MEMORY, - {"scratch_memory_trace.csv", + {"scratch_memory_trace", "\"Kind\",\"Operation\",\"Agent_Id\",\"Queue_Id\",\"Thread_Id\"," "\"Alloc_Flags\",\"Start_" "Timestamp\",\"End_Timestamp\""}}, - {CsvType::HIP_API, {"hip_api_trace.csv", API_TRACE_HEADER}}, - {CsvType::HSA_CSV_API, {"hsa_api_trace.csv", API_TRACE_HEADER}}, - {CsvType::MARKER, {"marker_api_trace.csv", API_TRACE_HEADER}}, - {CsvType::RCCL_API, {"rccl_api_trace.csv", API_TRACE_HEADER}}, - {CsvType::ROCDECODE_API, {"rocdecode_api_trace.csv", API_TRACE_HEADER}}, - {CsvType::ROCJPEG_API, {"rocjpeg_api_trace.csv", API_TRACE_HEADER}}, + {CsvType::REGION_API, {"api_trace", API_TRACE_HEADER}}, + {CsvType::HIP_API, {"hip_api_trace", API_TRACE_HEADER}}, + {CsvType::HSA_CSV_API, {"hsa_api_trace", API_TRACE_HEADER}}, + {CsvType::MARKER, {"marker_api_trace", API_TRACE_HEADER}}, + {CsvType::RCCL_API, {"rccl_api_trace", API_TRACE_HEADER}}, + {CsvType::ROCDECODE_API, {"rocdecode_api_trace", API_TRACE_HEADER}}, + {CsvType::ROCJPEG_API, {"rocjpeg_api_trace", API_TRACE_HEADER}}, {CsvType::COUNTER, - {"counter_collection.csv", + {"counter_collection", "\"Pid\",\"Correlation_Id\",\"Dispatch_Id\",\"Agent_Id\",\"Queue_Id\"," "\"Process_Id\"," "\"Thread_Id\"," @@ -117,20 +112,6 @@ CsvManager::CsvManager(rocprofiler::tool::output_config output_cfg) }; } -bool -CsvManager::ensure_output_directory() const -{ - try - { - fs::create_directories(config.output_path); - return true; - } catch(const std::exception& e) - { - ROCP_ERROR << "Failed to create directory: " << e.what(); - return false; - } -} - CsvManager::~CsvManager() { for(auto& [type, stream] : streams) @@ -143,7 +124,7 @@ CsvManager::~CsvManager() } } -std::ofstream& +CsvManager::output_stream_t& CsvManager::get_stream(CsvType type) { return streams[type]; @@ -168,22 +149,20 @@ CsvManager::initialize_csv_file(CsvType type) const auto& cfg = csv_configs[type]; - fs::path output_dir = config.output_path; - fs::path filename = - config.output_file.empty() ? cfg.filename : config.output_file + "_" + cfg.filename; - - file_paths[type] = (output_dir / filename).string(); + auto output_file = rocprofiler::tool::get_output_filename(config, cfg.filename, ".csv"); + auto& stream = streams.emplace(type, std::move(output_file)).first->second; - auto& path = file_paths[type]; - auto& stream = streams[type]; - - stream.open(path, std::ios::out); if(!stream.is_open()) { - ROCP_ERROR << "Failed to open CSV output file: " << path; + ROCP_ERROR << fmt::format("Failed to open CSV output file: '{}'", output_file); return false; } + // populate file paths + file_paths[type] = output_file; + + ROCP_ERROR << "Opened result file: " << output_file; + stream << cfg.header << '\n'; return true; } @@ -192,15 +171,7 @@ template bool has_any_data(const rocprofiler::tool::generator& data_gen) { - for(auto ditr : data_gen) - { - auto gen = data_gen.get(ditr); - if(begin(gen) != end(gen)) - { - return true; - } - } - return false; + return (data_gen.empty() == false); } template @@ -210,16 +181,29 @@ process_data_to_csv(CsvManager& csv_manager, const rocprofiler::tool::generator& data_gen, Processor process_func) { - if(!has_any_data(data_gen)) return; + if(!has_any_data(data_gen)) + { + ROCP_INFO << fmt::format("No data found for CSV type: {}, skipping CSV generation", + csv_manager.csv_configs.at(csv_type).filename); + return; + } + + if(!csv_manager.initialize_csv_file(csv_type)) + { + ROCP_INFO << fmt::format( + "CSV manager failed to initialize for CSV type: {}, skipping CSV generation", + csv_manager.csv_configs.at(csv_type).filename); + return; + } - if(!csv_manager.initialize_csv_file(csv_type)) return; + auto csvgen_perf = rocprofiler::common::simple_timer{ + fmt::format("CSV generation :: {}", csv_manager.csv_configs.at(csv_type).filename)}; for(auto ditr : data_gen) { - auto gen = data_gen.get(ditr); - for(auto it = begin(gen); it != end(gen); ++it) + for(const auto& itr : data_gen.get(ditr)) { - process_func(csv_manager, csv_type, *it); + process_func(csv_manager, csv_type, itr); } } } @@ -237,8 +221,8 @@ write_kernel_csv( std::string kernel_identifier = cm.config.kernel_rename ? kernel.region : kernel.name; std::string agent_identifier = create_agent_index(cm.config.agent_index_value, - kernel.agent_abs_index, - kernel.agent_log_index, + kernel.agent_absolute_index, + kernel.agent_logical_index, kernel.agent_type_index, std::string_view(kernel.agent_type)) .as_string(); @@ -258,7 +242,7 @@ write_kernel_csv( kernel.end, kernel.lds_size, kernel.scratch_size, - kernel.vgpr_count, + kernel.arch_vgpr_count, kernel.accum_vgpr_count, kernel.sgpr_count, kernel.workgroup_size.x, @@ -281,16 +265,16 @@ write_memory_copy_csv( [](CsvManager& cm, CsvType type, const rocpd::types::memory_copies& mcopy) { std::string src_agent_identifier = create_agent_index(cm.config.agent_index_value, - mcopy.src_agent_abs_index, - mcopy.src_agent_log_index, + mcopy.src_agent_absolute_index, + mcopy.src_agent_logical_index, mcopy.src_agent_type_index, std::string_view(mcopy.src_agent_type)) .as_string(); std::string dst_agent_identifier = create_agent_index(cm.config.agent_index_value, - mcopy.dst_agent_abs_index, - mcopy.dst_agent_log_index, + mcopy.dst_agent_absolute_index, + mcopy.dst_agent_logical_index, mcopy.dst_agent_type_index, std::string_view(mcopy.dst_agent_type)) .as_string(); @@ -313,22 +297,27 @@ write_memory_allocation_csv( CsvManager& csv_manager, const rocprofiler::tool::generator& memory_alloc_gen) { + static auto operation_name_mapping = std::unordered_map{ + {"ALLOC", "ALLOCATE"}, + }; + process_data_to_csv( csv_manager, CsvType::MEMORY_ALLOCATION, memory_alloc_gen, [](CsvManager& cm, CsvType type, const rocpd::types::memory_allocation& malloc) { - std::string normalized_type = malloc.type; - if(normalized_type == "ALLOC") + auto _optype = std::string_view{malloc.type}; + if(auto mitr = operation_name_mapping.find(_optype); + mitr != operation_name_mapping.end()) { - normalized_type = "ALLOCATE"; + _optype = mitr->second; } - std::string operation = fmt::format("MEMORY_ALLOCATION_{}", normalized_type); + std::string operation = fmt::format("MEMORY_ALLOCATION_{}", _optype); std::string agent_identifier = create_agent_index(cm.config.agent_index_value, - malloc.agent_abs_index, - malloc.agent_log_index, + malloc.agent_absolute_index, + malloc.agent_logical_index, malloc.agent_type_index, std::string_view(malloc.agent_type)) .as_string(); @@ -362,8 +351,8 @@ write_scratch_memory_csv( [](CsvManager& cm, CsvType type, const rocpd::types::scratch_memory& scratch_mem) { std::string agent_identifier = create_agent_index(cm.config.agent_index_value, - scratch_mem.agent_abs_index, - scratch_mem.agent_log_index, + scratch_mem.agent_absolute_index, + scratch_mem.agent_logical_index, scratch_mem.agent_type_index, std::string_view(scratch_mem.agent_type)) .as_string(); @@ -380,6 +369,26 @@ write_scratch_memory_csv( }); } +void +write_region_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& region_api_gen) +{ + process_data_to_csv(csv_manager, + CsvType::REGION_API, + region_api_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::region& api) { + cm.write_line(type, + fmt::format("\"{}\"", api.guid), + fmt::format("\"{}\"", api.category), + fmt::format("\"{}\"", api.name), + api.pid, + api.tid, + api.stack_id, + api.start, + api.end); + }); +} + void write_hip_api_csv(CsvManager& csv_manager, const rocprofiler::tool::generator& hip_api_gen) @@ -454,18 +463,11 @@ write_marker_api_csv(CsvManager& c for(const auto& record : marker_api_gen.get(ditr)) { auto row_ss = std::stringstream{}; - auto _name = record.name; - - if(record.has_extdata()) - { - if(auto _extdata = record.get_extdata(); !_extdata.message.empty()) - _name = _extdata.message; - } marker_csv_encoder::write_row(row_ss, record.guid, record.category, - _name, + record.name, record.pid, record.tid, record.stack_id, @@ -681,8 +683,8 @@ write_counters_csv(CsvManager& cs [](CsvManager& cm, CsvType type, const rocpd::types::counter& counter) { std::string agent_identifier = create_agent_index(cm.config.agent_index_value, - counter.agent_abs_index, - counter.agent_log_index, + counter.agent_absolute_index, + counter.agent_logical_index, counter.agent_type_index, std::string_view(counter.agent_type)) .as_string(); @@ -697,47 +699,37 @@ write_counters_csv(CsvManager& cs counter.tid, counter.grid_size, counter.kernel_id, - fmt::format("\"{}\"", counter.kernel_name), + fmt::format("\"{}\"", counter.name), counter.workgroup_size, - counter.lds_block_size, + counter.lds_size, counter.scratch_size, - counter.vgpr_count, + counter.arch_vgpr_count, counter.accum_vgpr_count, counter.sgpr_count, - fmt::format("\"{}\"", counter.counter_name), - counter.value, + fmt::format("\"{}\"", counter.pmc_name), + counter.pmc_value, counter.start, counter.end); }); } void -write_csvs(CsvManager& csv_manager, - const rocprofiler::tool::generator& kernel_dispatch, - const rocprofiler::tool::generator& memory_copies, - const rocprofiler::tool::generator& memory_allocations, - const rocprofiler::tool::generator& hip_api_calls, - const rocprofiler::tool::generator& hsa_api_calls, - const rocprofiler::tool::generator& marker_api_calls, - const rocprofiler::tool::generator& counters_calls, - const rocprofiler::tool::generator& scratch_memory_calls, - const rocprofiler::tool::generator& rccl_calls, - const rocprofiler::tool::generator& rocdecode_calls, - const rocprofiler::tool::generator& rocjpeg_calls) +write_csv(CsvManager& csv_manager, + const std::vector& agents, + const rocprofiler::tool::generator& kernel_dispatch, + const rocprofiler::tool::generator& memory_copies, + const rocprofiler::tool::generator& memory_allocations, + const rocprofiler::tool::generator& region_api_calls, + const rocprofiler::tool::generator& counters_calls, + const rocprofiler::tool::generator& scratch_memory_calls) { + rocpd::output::write_agent_info_csv(csv_manager, agents); rocpd::output::write_kernel_csv(csv_manager, kernel_dispatch); rocpd::output::write_memory_copy_csv(csv_manager, memory_copies); rocpd::output::write_memory_allocation_csv(csv_manager, memory_allocations); - rocpd::output::write_hip_api_csv(csv_manager, hip_api_calls); - rocpd::output::write_hsa_api_csv(csv_manager, hsa_api_calls); - rocpd::output::write_marker_api_csv(csv_manager, marker_api_calls); - - rocpd::output::write_counters_csv(csv_manager, counters_calls); + rocpd::output::write_region_api_csv(csv_manager, region_api_calls); rocpd::output::write_scratch_memory_csv(csv_manager, scratch_memory_calls); - rocpd::output::write_rccl_api_csv(csv_manager, rccl_calls); - - rocpd::output::write_rocdecode_api_csv(csv_manager, rocdecode_calls); - rocpd::output::write_rocjpeg_api_csv(csv_manager, rocjpeg_calls); + rocpd::output::write_counters_csv(csv_manager, counters_calls); } } // namespace output } // namespace rocpd diff --git a/source/lib/python/rocpd/source/csv.hpp b/source/lib/python/rocpd/source/csv.hpp index 7cabdcaf1..838c18e51 100644 --- a/source/lib/python/rocpd/source/csv.hpp +++ b/source/lib/python/rocpd/source/csv.hpp @@ -59,6 +59,7 @@ enum class CsvType MEMORY_COPY, MEMORY_ALLOCATION, SCRATCH_MEMORY, + REGION_API, HIP_API, HSA_CSV_API, MARKER, @@ -71,13 +72,15 @@ enum class CsvType class CsvManager { public: + using output_stream_t = std::ofstream; + CsvManager(rocprofiler::tool::output_config output_cfg); ~CsvManager(); rocprofiler::tool::output_config config; std::map csv_configs; - std::ofstream& get_stream(CsvType type); + output_stream_t& get_stream(CsvType type); bool has_stream(CsvType type) const; bool initialize_csv_file(CsvType type); @@ -94,27 +97,18 @@ class CsvManager } private: - std::map streams; - std::map file_paths; - - bool ensure_output_directory() const; + std::map streams = {}; + std::map file_paths = {}; }; void -write_agent_info_csv(CsvManager& csv_manager, const std::vector& agents); - -void -write_csvs(CsvManager& csv_manager, - const rocprofiler::tool::generator& kernel_dispatch, - const rocprofiler::tool::generator& memory_copies, - const rocprofiler::tool::generator& memory_allocations, - const rocprofiler::tool::generator& hip_api_calls, - const rocprofiler::tool::generator& hsa_api_calls, - const rocprofiler::tool::generator& marker_api_calls, - const rocprofiler::tool::generator& counters_calls, - const rocprofiler::tool::generator& scratch_memory_calls, - const rocprofiler::tool::generator& rccl_calls, - const rocprofiler::tool::generator& rocdecode_calls, - const rocprofiler::tool::generator& rocjpeg_calls); +write_csv(CsvManager& csv_manager, + const std::vector& agents, + const rocprofiler::tool::generator& kernel_dispatch, + const rocprofiler::tool::generator& memory_copies, + const rocprofiler::tool::generator& memory_allocations, + const rocprofiler::tool::generator& region_api_calls, + const rocprofiler::tool::generator& counters_calls, + const rocprofiler::tool::generator& scratch_memory_calls); } // namespace output } // namespace rocpd diff --git a/source/lib/python/rocpd/source/functions.cpp b/source/lib/python/rocpd/source/functions.cpp index 0d7c29316..601e28364 100644 --- a/source/lib/python/rocpd/source/functions.cpp +++ b/source/lib/python/rocpd/source/functions.cpp @@ -33,6 +33,7 @@ #include #include +#include namespace rocpd { @@ -40,14 +41,13 @@ namespace functions { namespace { -// Custom SQL function: rocpd_get_string(common_string_id, unique_string_id, nid, pid) +// Custom SQL function: rocpd_get_string(string_id, guid) void rocpd_get_string(sqlite3_context* context, int argc, sqlite3_value** argv) { if(argc != 4) { - ROCP_WARNING << "rocpd_get_string requires exactly 4 arguments (common_string_id, " - "unique_string_id, nid, pid)"; + ROCP_WARNING << "rocpd_get_string requires exactly 2 arguments (string_id, guid)"; sqlite3_result_null(context); return; } @@ -55,13 +55,10 @@ rocpd_get_string(sqlite3_context* context, int argc, sqlite3_value** argv) auto* db = static_cast(sqlite3_user_data(context)); // common and unique name ids passed in - auto c_name_id = sqlite3_value_int64(argv[0]); - auto u_name_id = sqlite3_value_int64(argv[1]); - - auto execute_query = [&](std::string_view _query, std::initializer_list&& _args) { - // char query[256]; - // snprintf(query, sizeof(query), "SELECT value FROM %s WHERE id = ?", table); + auto _name_id = sqlite3_value_int64(argv[0]); + const auto* _guid = reinterpret_cast(sqlite3_value_text(argv[1])); + auto execute_query = [&](std::string_view _query) { sqlite3_stmt* stmt = nullptr; if(int rc = sqlite3_prepare_v2(db, _query.data(), -1, &stmt, nullptr); rc != SQLITE_OK) @@ -71,9 +68,8 @@ rocpd_get_string(sqlite3_context* context, int argc, sqlite3_value** argv) return; } - int64_t idx = 1; - for(auto itr : _args) - sqlite3_bind_int64(stmt, idx++, itr); + sqlite3_bind_int64(stmt, 1, _name_id); + sqlite3_bind_text(stmt, 1, _guid, std::string_view{_guid}.length(), nullptr); if(auto rc = sqlite3_step(stmt); rc == SQLITE_ROW) { @@ -95,23 +91,69 @@ rocpd_get_string(sqlite3_context* context, int argc, sqlite3_value** argv) sqlite3_finalize(stmt); }; - if(c_name_id != 0) + if(_name_id != 0) { - execute_query("SELECT string FROM rocpd_common_string WHERE id == ?", - std::initializer_list{c_name_id}); + execute_query("SELECT string FROM rocpd_string WHERE id == ? AND guid = '?'"); } - else if(u_name_id != 0) + else { - auto u_nid = sqlite3_value_int64(argv[2]); - auto u_pid = sqlite3_value_int64(argv[3]); + sqlite3_result_null(context); + } +} + +// --- 1) Define the aggregation context --- +struct stddev_context +{ + sqlite3_int64 nsamp = 0; // count of values + double mean = 0.0; // running mean + double diff_sqr = 0.0; // running sum of squares of differences +}; + +// --- 2) step function: called once per row --- +void +stddev_step(sqlite3_context* ctx, int argc, sqlite3_value** argv) +{ + if(argc == 0) return; + + // We expect a single REAL or INT argument + if(sqlite3_value_type(argv[0]) == SQLITE_NULL) return; + + auto val = sqlite3_value_double(argv[0]); + + // Allocate or fetch our context struct + auto* p = static_cast(sqlite3_aggregate_context(ctx, sizeof(stddev_context))); + if(!p) return; // OOM - execute_query( - "SELECT string FROM rocpd_unique_string WHERE id == ? AND nid = ? AND pid = ?", - std::initializer_list{u_name_id, u_nid, u_pid}); + // Initialize on first call + if(p->nsamp == 0) + { + p->nsamp = 0; + p->mean = 0.0; + p->diff_sqr = 0.0; + } + + // Welford’s algorithm + ++p->nsamp; + auto delta = (val - p->mean); + p->mean += (delta / p->nsamp); + auto delta2 = (val - p->mean); + p->diff_sqr += (delta * delta2); +} + +// --- 3) finalize function: called after all rows are processed --- +void +stddev_finalize(sqlite3_context* ctx) +{ + auto* p = static_cast(sqlite3_aggregate_context(ctx, 0)); + if(!p || p->nsamp < 2) + { + // Not enough data to form a sample stddev + sqlite3_result_null(ctx); } else { - sqlite3_result_null(context); + auto variance = p->diff_sqr / (p->nsamp - 1); + sqlite3_result_double(ctx, std::sqrt(variance)); } } } // namespace @@ -119,20 +161,28 @@ rocpd_get_string(sqlite3_context* context, int argc, sqlite3_value** argv) void define_for_database(sqlite3* conn) { - if(false) - { - sqlite3_create_function_v2(conn, - "rocpd_get_string", - 4, - SQLITE_UTF8, - conn, - rocpd_get_string, - nullptr, - nullptr, - nullptr); - } - - rocprofiler::common::consume_args(conn); + // name = "STDDEV_SAMP", 1 arg, UTF-8, no user data, + // no scalar function, but these aggregate callbacks: + sqlite3_create_function_v2(conn, + "STDDEV_SAMP", // SQL name + 1, // number of args + SQLITE_UTF8, + nullptr, // user data pointer + nullptr, // xFunc (for scalar) — null for aggregates + stddev_step, // xStep + stddev_finalize, // xFinal + nullptr // destructor for user data + ); + + sqlite3_create_function_v2(conn, + "rocpd_get_string", + 2, + SQLITE_UTF8, + conn, + rocpd_get_string, + nullptr, + nullptr, + nullptr); } } // namespace functions } // namespace rocpd diff --git a/source/lib/python/rocpd/source/functions.hpp b/source/lib/python/rocpd/source/functions.hpp index 01b66f72f..acae33d88 100644 --- a/source/lib/python/rocpd/source/functions.hpp +++ b/source/lib/python/rocpd/source/functions.hpp @@ -25,7 +25,6 @@ #include "lib/common/defines.hpp" #include "lib/common/logging.hpp" -#include #include namespace rocpd diff --git a/source/lib/python/rocpd/source/interop.cpp b/source/lib/python/rocpd/source/interop.cpp index bdde41b0b..5cb5d8cb7 100644 --- a/source/lib/python/rocpd/source/interop.cpp +++ b/source/lib/python/rocpd/source/interop.cpp @@ -161,6 +161,22 @@ auto bindings = std::array{ void activate_gotcha_bindings() { +#if defined(GOTCHA_INIT) && GOTCHA_INIT > 0 + // initialize gotcha + gotcha_init_config_t gotcha_cfg = { + .size = sizeof(gotcha_init_config_t), .dl_open_bind = 0, .dl_sym_bind = 0}; + gotcha_init(&gotcha_cfg); +#endif + + // this ensures that the sqlite3 module is imported and the sqlite3_open_v2 and sqlite3_close_v2 + // in lib-dynload/_sqlite3.cpython-*.so are remapped to the gotcha bindings. + // this is needed to ensure that the sqlite3 connections are captured by gotcha + { + auto sqlite3_mod = py::module_::import("sqlite3"); + auto ret = sqlite3_mod.attr("connect")(":memory:"); + ret.attr("close")(); + } + // activate the gotcha wrappers auto _err = gotcha_wrap(bindings.data(), bindings.size(), "rocpd.sqlite3"); ROCP_WARNING_IF(_err != GOTCHA_SUCCESS) << "gotcha error for rocpd.sqlite3"; diff --git a/source/lib/python/rocpd/source/otf2.cpp b/source/lib/python/rocpd/source/otf2.cpp index ed84dfb82..100e8bda8 100644 --- a/source/lib/python/rocpd/source/otf2.cpp +++ b/source/lib/python/rocpd/source/otf2.cpp @@ -538,7 +538,7 @@ write_otf2(const OTF2Session& otf2_session, get_hash_id(_name), region_info{_name, OTF2_REGION_ROLE_DATA_TRANSFER, OTF2_PARADIGM_HIP}); - auto _extended_agent = agent_data.at(itr.dst_agent_abs_index); + auto _extended_agent = agent_data.at(itr.dst_agent_absolute_index); auto _agent_handle = _extended_agent.types_agent.id.handle; auto _evt_info = event_info{location_base{ process.pid, itr.tid, _agent_handle, ROCPROFILER_AGENT_MEMORY_COPY_TYPE}}; @@ -587,7 +587,7 @@ write_otf2(const OTF2Session& otf2_session, get_hash_id(_alloc_operation), region_info{_alloc_operation, OTF2_REGION_ROLE_ALLOCATE, OTF2_PARADIGM_HIP}); - auto _extended_agent = agent_data.at(itr.agent_abs_index); + auto _extended_agent = agent_data.at(itr.agent_absolute_index); auto _handle = _extended_agent.types_agent.id.handle; auto _evt_info = event_info{location_base{ @@ -672,7 +672,7 @@ write_otf2(const OTF2Session& otf2_session, _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); auto* _attrs = create_attribute_list_for_name(_perfetto_name); - auto _extended_agent = agent_data.at(itr.agent_abs_index); + auto _extended_agent = agent_data.at(itr.agent_absolute_index); auto _handle = _extended_agent.types_agent.id.handle; auto agent_index_info = _extended_agent.agent_index; diff --git a/source/lib/python/rocpd/source/perfetto.cpp b/source/lib/python/rocpd/source/perfetto.cpp index 95d08b771..d148eb68e 100644 --- a/source/lib/python/rocpd/source/perfetto.cpp +++ b/source/lib/python/rocpd/source/perfetto.cpp @@ -21,6 +21,10 @@ // SOFTWARE. #include "lib/python/rocpd/source/perfetto.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/simple_timer.hpp" +#include "lib/python/rocpd/source/common.hpp" +#include "lib/python/rocpd/source/sql_generator.hpp" #include "lib/common/defines.hpp" #include "lib/common/hasher.hpp" @@ -32,13 +36,17 @@ #include "lib/output/output_stream.hpp" #include "lib/output/sql/common.hpp" #include "lib/output/stream_info.hpp" +#include "lib/python/rocpd/source/types.hpp" #include "lib/rocprofiler-sdk-tool/config.hpp" +#include + #include #include #include #include +#include #include namespace rocpd @@ -68,8 +76,9 @@ get_hash_id(Tp&& _val) } } // namespace -PerfettoSession::PerfettoSession(const tool::output_config& output_cfg) +PerfettoSession::PerfettoSession(const tool::output_config& output_cfg, sqlite3* conn) : config{output_cfg} +, connection{conn} { auto args = ::perfetto::TracingInitArgs{}; auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{}; @@ -122,7 +131,9 @@ PerfettoSession::PerfettoSession(const tool::output_config& output_cfg) PerfettoSession::~PerfettoSession() { + tracing_session->FlushBlocking(); tracing_session->StopBlocking(); + auto filename = std::string{"results"}; auto ofs = tool::get_output_stream(config, filename, ".pftrace", std::ios::binary); @@ -164,18 +175,15 @@ PerfettoSession::~PerfettoSession() } void -write_perfetto( - const PerfettoSession& perfetto_session, - const types::process& process, - const std::unordered_map>& - agent_data, - const tool::generator& thread_gen, - const tool::generator& region_gen, - const tool::generator& sample_gen, - const tool::generator& kernel_dispatch_gen, - const tool::generator& memory_copy_gen, - const tool::generator& memory_allocation_gen, - const tool::generator& counter_collection_gen) +write_perfetto(const PerfettoSession& perfetto_session, + const types::process& process, + const std::vector& agents, + const tool::generator& thread_gen, + const tool::generator& region_gen, + const tool::generator& sample_gen, + const tool::generator& kernel_dispatch_gen, + const tool::generator& memory_copy_gen, + const tool::generator& /*memory_allocation_gen*/) { namespace sdk = ::rocprofiler::sdk; namespace common = ::rocprofiler::common; @@ -183,6 +191,15 @@ write_perfetto( static auto orig_process_track = ::perfetto::ProcessTrack::Current(); static auto orig_process_desc = orig_process_track.Serialize(); + static auto get_simple_timer = [](std::string_view label) { + return common::simple_timer{fmt::format("Perfetto generation :: {:24}", label)}; + }; + + static auto report_simple_timer = [](common::simple_timer& _timer) { + _timer.stop().report().set_quiet(true); + }; + + auto* conn = perfetto_session.connection; const auto& tracing_session = perfetto_session.tracing_session; const auto& ocfg = perfetto_session.config; const uint64_t this_pid = process.pid; @@ -208,12 +225,91 @@ write_perfetto( ::perfetto::TrackEvent::SetTrackDescriptor(this_pid_track, desc); } - auto agent_thread_ids = std::unordered_map>{}; - auto agent_thread_ids_alloc = std::unordered_map>{}; - auto agent_queue_ids = - std::unordered_map>{}; - auto agent_stream_ids = std::unordered_set{}; - auto thread_indexes = std::unordered_map{}; + auto read_group_by_query = [&conn, &process](auto type, + std::initializer_list tables) { + using value_type = common::mpl::unqualified_type_t; + + auto get_group_by_query = [&]() { + auto _select = std::vector{}; + for(auto itr : tables) + { + auto _group_by = fmt::format("{}", value_type::get_group_by()); + + _select.emplace_back( + fmt::format("SELECT {} FROM {} WHERE {}", + _group_by, + itr, + fmt::format("guid = '{}' AND nid = {} AND pid = {}", + process.guid, + process.nid, + process.pid))); + } + + return fmt::format("{} GROUP BY {} ORDER BY {}", + fmt::join(_select.begin(), _select.end(), " UNION ALL "), + value_type::get_group_by(), + value_type::get_order_by()); + }; + + return rocpd::read_sql_query(conn, get_group_by_query()); + }; + + auto read_event_args = [&conn, &process, &ocfg](uint64_t event_id) { + if(!ocfg.annotate_args) return std::vector{}; + return rocpd::read_sql_query( + conn, + fmt::format( + "SELECT * FROM rocpd_arg WHERE guid='{}' AND event_id={}", process.guid, event_id)); + }; + + auto read_pmc_events = [&conn, &process, &ocfg](uint64_t event_id) { + if(!ocfg.annotate_pmc) return std::vector{}; + return rocpd::read_sql_query( + conn, + fmt::format("SELECT * FROM rocpd_pmc_event WHERE guid='{}' AND event_id={}", + process.guid, + event_id)); + }; + + auto pmc_info = std::unordered_map{}; + auto read_pmc_info = [&conn, &process, &pmc_info](uint64_t pmc_id) -> const types::pmc_info* { + if(pmc_info.count(pmc_id) > 0) return &pmc_info.at(pmc_id); + + auto _pmc_info_query = fmt::format( + "SELECT * FROM rocpd_info_pmc WHERE id={} AND guid='{}'", pmc_id, process.guid); + auto _data = rocpd::read_sql_query(conn, _pmc_info_query); + + if(_data.empty()) + { + ROCP_WARNING << fmt::format("SQL Query \"{}\" returned no results", _pmc_info_query); + return nullptr; + } + + ROCP_WARNING_IF(_data.size() > 1) + << fmt::format("SQL Query \"{}\" returned {} results (expected one result)", + _pmc_info_query, + _data.size()); + + pmc_info[pmc_id] = _data.at(0); + + return &pmc_info.at(pmc_id); + }; + + auto agent_thread_ids = + read_group_by_query(types::group_by_agent_tid{}, {"kernels", "memory_allocations"}); + + auto agent_queue_ids = read_group_by_query(types::group_by_agent_queue_id{}, {"kernels"}); + auto stream_ids = read_group_by_query(types::group_by_stream_id{}, + {"kernels", "memory_copies", "memory_allocations"}); + auto thread_indexes = std::unordered_map{}; + + { + auto thread_ids = read_group_by_query(types::group_by_tid{}, {"threads"}); + std::sort(thread_ids.begin(), thread_ids.end()); + + for(const auto& itr : thread_ids) + thread_indexes[itr.tid] = thread_indexes.size(); + } auto thread_tracks = std::unordered_map{}; auto agent_thread_tracks = @@ -223,41 +319,29 @@ write_perfetto( std::unordered_map>{}; auto stream_tracks = std::unordered_map{}; - { - for(auto ditr : memory_copy_gen) - for(const auto& itr : memory_copy_gen.get(ditr)) - { - auto stream_id = rocprofiler_stream_id_t{.handle = itr.stream_id}; - agent_stream_ids.emplace(stream_id); - if(ocfg.group_by_queue) - { - agent_thread_ids[itr.dst_agent_abs_index].emplace(itr.tid); - } - } - } - - for(auto ditr : memory_allocation_gen) - for(const auto& itr : memory_allocation_gen.get(ditr)) - { - agent_thread_ids_alloc[itr.agent_abs_index].emplace(itr.tid); - } + // absolute_index |-> (agent, agent_index) + auto agent_data = + std::unordered_map>{}; + for(const auto& itr : agents) { - for(auto ditr : kernel_dispatch_gen) - for(const auto& itr : kernel_dispatch_gen.get(ditr)) - { - auto stream_id = rocprofiler_stream_id_t{.handle = itr.stream_id}; - auto queue_id = rocprofiler_queue_id_t{.handle = itr.queue_id}; - agent_stream_ids.emplace(stream_id); - if(ocfg.group_by_queue) - { - agent_queue_ids[itr.agent_abs_index].emplace(queue_id); - } - } + auto _create_agent_index = [&ocfg](const rocpd::types::agent& _agent) -> tool::agent_index { + auto ret_index = tool::create_agent_index( + ocfg.agent_index_value, + _agent.node_id, // absolute index + static_cast(_agent.logical_node_id), // relative index + static_cast(_agent.logical_node_type_id), // type-relative index + std::string_view(_agent.type_name)); + return ret_index; + }; + + auto new_index = _create_agent_index(itr); + agent_data.emplace(itr.absolute_index, std::make_pair(itr, new_index)); } uint64_t nthrn = 0; for(auto ditr : thread_gen) + { for(const auto& itr : thread_gen.get(ditr)) { auto is_main_thread = (static_cast(itr.tid) == this_pid); @@ -280,84 +364,55 @@ write_perfetto( thread_tracks.emplace(itr.tid, _track); } + } - for(const auto& [abs_index, thread_ids] : agent_thread_ids) + for(const auto& itr : agent_thread_ids) { - const auto _agent = agent_data.at(abs_index).first; + const auto _agent = agent_data.at(itr.agent_absolute_index).first; + const auto _tid = itr.tid; - for(auto titr : thread_ids) - { - auto _namess = std::stringstream{}; - _namess << "COPY to AGENT [" << _agent.logical_node_id << "] THREAD [" - << thread_indexes.at(titr) << "] "; - - if(_agent.type == "CPU") - _namess << "(CPU)"; - else if(_agent.type == "GPU") - _namess << "(GPU)"; - else - _namess << "(UNK)"; + auto _name = fmt::format("COPY to AGENT [{}] THREAD [{}] ({})", + _agent.logical_index, + thread_indexes.at(_tid), + _agent.type_name); - auto _track = ::perfetto::Track{get_hash_id(_namess.str()), this_pid_track}; - auto _desc = _track.Serialize(); - _desc.set_name(_namess.str()); + auto _track = ::perfetto::Track{get_hash_id(_name), this_pid_track}; + auto _desc = _track.Serialize(); + _desc.set_name(_name); - perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); - agent_thread_tracks[abs_index].emplace(titr, _track); - } + agent_thread_tracks[itr.agent_absolute_index].emplace(_tid, _track); } - for(const auto& [abs_index, queue_ids] : agent_queue_ids) + for(const auto& itr : agent_queue_ids) { - uint32_t nqueue = 0; - const auto _agent = agent_data.at(abs_index).first; - auto agent_index_info = agent_data.at(abs_index).second; - - for(auto qitr : queue_ids) - { - auto _namess = std::stringstream{}; - - _namess << "COMPUTE " << agent_index_info.label << " [" << agent_index_info.index - << "] QUEUE [" << nqueue++ << "] "; - _namess << agent_index_info.type; - - auto _track = ::perfetto::Track{get_hash_id(_namess.str()), this_pid_track}; - auto _desc = _track.Serialize(); - _desc.set_name(_namess.str()); + const auto _agent = agent_data.at(itr.agent_absolute_index).first; + auto _index_info = agent_data.at(itr.agent_absolute_index).second; + auto _name = fmt::format( + "COMPUTE {} [{}] QUEUE [{}]", _index_info.label, _index_info.index, itr.queue_id); + auto _track = ::perfetto::Track{get_hash_id(_name), this_pid_track}; + auto _desc = _track.Serialize(); + _desc.set_name(_name); - ::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + ::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); - agent_queue_tracks[abs_index].emplace(qitr, _track); - } + agent_queue_tracks[itr.agent_absolute_index].emplace(rocprofiler_queue_id_t{itr.queue_id}, + _track); } - for(const auto& sitr : agent_stream_ids) + for(const auto& itr : stream_ids) { - const auto stream_id = sitr.handle; - - auto _name = fmt::format("STREAM [{}]", stream_id); - + auto _name = fmt::format("STREAM [{}]", itr.stream_id); auto _track = ::perfetto::Track{get_hash_id(_name), this_pid_track}; auto _desc = _track.Serialize(); _desc.set_name(_name); ::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); - stream_tracks.emplace(sitr, _track); + stream_tracks.emplace(rocprofiler_stream_id_t{itr.stream_id}, _track); } - // Fetch counter values - auto counter_id_value = std::map{}; - auto counter_id_name = std::map{}; - for(auto ditr : counter_collection_gen) - for(const auto& record : counter_collection_gen.get(ditr)) - { - // Accumulate counters based on ID - counter_id_value[record.counter_id] += record.value; - counter_id_name[record.counter_id] = std::string{record.counter_name}; - } - // trace events { auto get_category_string = [](std::string_view _category) { @@ -367,25 +422,30 @@ write_perfetto( { if(_category == citr.name) _category_idx = citr.value; } - return sdk::get_perfetto_category(_category_idx); + return (_category_idx != ROCPROFILER_BUFFER_TRACING_NONE) + ? sdk::get_perfetto_category(_category_idx) + : _category.data(); }; + auto _regions_perf = get_simple_timer("regions"); for(auto ditr : region_gen) { for(auto itr : region_gen.get(ditr)) { auto& track = thread_tracks.at(itr.tid); - auto _name = itr.name; + auto _func = std::string{}; if(itr.has_extdata()) { if(auto _extdata = itr.get_extdata(); !_extdata.message.empty()) - _name = _extdata.message; + _func = _extdata.message; } - auto _category = ::perfetto::DynamicCategory{get_category_string(itr.category)}; + auto _args = read_event_args(itr.event_id); + auto _pmc_events = read_pmc_events(itr.event_id); + auto _category = ::perfetto::DynamicCategory{get_category_string(itr.category)}; TRACE_EVENT_BEGIN(_category, - ::perfetto::DynamicString{_name}, + ::perfetto::DynamicString{itr.name}, track, itr.start, ::perfetto::Flow::Global(itr.stack_id ^ uuid_pid), @@ -400,60 +460,90 @@ write_perfetto( "kind", itr.category, "operation", - _name, + _func.empty() ? itr.name : _func, "corr_id", itr.stack_id, "ancestor_id", itr.parent_stack_id, - [&](::perfetto::EventContext ctx) { (void) ctx; }); + [&](::perfetto::EventContext ctx) { + for(const auto& arg : _args) + sdk::add_perfetto_annotation(ctx, arg.name, arg.value); + + for(const auto& pevt : _pmc_events) + { + if(const auto* pinfo = read_pmc_info(pevt.pmc_id); pinfo) + { + sdk::add_perfetto_annotation( + ctx, pinfo->name, pevt.value); + } + } + }); TRACE_EVENT_END(_category, track, itr.end); tracing_session->FlushBlocking(); } } + report_simple_timer(_regions_perf); + auto _samples_perf = get_simple_timer("samples"); for(auto ditr : sample_gen) { for(auto itr : sample_gen.get(ditr)) { auto& track = thread_tracks.at(itr.tid); - auto _name = itr.name; + auto _func = std::string{}; if(itr.has_extdata()) { if(auto _extdata = itr.get_extdata(); !_extdata.message.empty()) - _name = _extdata.message; + _func = _extdata.message; } - auto _category = ::perfetto::DynamicCategory{get_category_string(itr.category)}; - TRACE_EVENT_INSTANT(_category, - ::perfetto::DynamicString{_name}, - track, - itr.timestamp, - ::perfetto::Flow::Global(itr.stack_id ^ uuid_pid), - "begin_ns", - itr.timestamp, - "end_ns", - itr.timestamp, - "delta_ns", - 0, - "tid", - itr.tid, - "kind", - itr.category, - "operation", - _name, - "corr_id", - itr.stack_id, - "ancestor_id", - itr.parent_stack_id, - [&](::perfetto::EventContext ctx) { (void) ctx; }); + auto _args = read_event_args(itr.event_id); + auto _pmc_events = read_pmc_events(itr.event_id); + auto _category = ::perfetto::DynamicCategory{get_category_string(itr.category)}; + TRACE_EVENT_INSTANT( + _category, + ::perfetto::DynamicString{itr.name}, + track, + itr.timestamp, + ::perfetto::Flow::Global(itr.stack_id ^ uuid_pid), + "begin_ns", + itr.timestamp, + "end_ns", + itr.timestamp, + "delta_ns", + 0, + "tid", + itr.tid, + "kind", + itr.category, + "operation", + _func.empty() ? itr.name : _func, + "corr_id", + itr.stack_id, + "ancestor_id", + itr.parent_stack_id, + [&](::perfetto::EventContext ctx) { + for(const auto& arg : _args) + sdk::add_perfetto_annotation(ctx, arg.name, arg.value); + + for(const auto& pevt : _pmc_events) + { + if(const auto* pinfo = read_pmc_info(pevt.pmc_id); pinfo) + { + sdk::add_perfetto_annotation(ctx, pinfo->name, pevt.value); + } + } + }); tracing_session->FlushBlocking(); } } + report_simple_timer(_samples_perf); + auto _memcpy_perf = get_simple_timer("memory copies"); for(auto ditr : memory_copy_gen) { for(auto itr : memory_copy_gen.get(ditr)) @@ -461,7 +551,7 @@ write_perfetto( ::perfetto::Track* _track = nullptr; if(ocfg.group_by_queue) { - _track = &agent_thread_tracks.at(itr.dst_agent_abs_index).at(itr.tid); + _track = &agent_thread_tracks.at(itr.dst_agent_absolute_index).at(itr.tid); } else { @@ -469,8 +559,10 @@ write_perfetto( _track = &stream_tracks.at(stream_id); } - auto src_agent_index = agent_data.at(itr.src_agent_abs_index).second; - auto dst_agent_index = agent_data.at(itr.dst_agent_abs_index).second; + auto _args = read_event_args(itr.event_id); + auto _pmc_events = read_pmc_events(itr.event_id); + auto src_agent_index = agent_data.at(itr.src_agent_absolute_index).second; + auto dst_agent_index = agent_data.at(itr.dst_agent_absolute_index).second; TRACE_EVENT_BEGIN(sdk::perfetto_category::name, ::perfetto::DynamicString{itr.name}, *_track, @@ -496,14 +588,28 @@ write_perfetto( itr.stack_id, "tid", itr.tid, - "stream_id", - itr.stream_id); + [&](::perfetto::EventContext ctx) { + for(const auto& arg : _args) + sdk::add_perfetto_annotation(ctx, arg.name, arg.value); + + for(const auto& pevt : _pmc_events) + { + if(const auto* pinfo = read_pmc_info(pevt.pmc_id); pinfo) + { + sdk::add_perfetto_annotation( + ctx, pinfo->name, pevt.value); + } + } + }); TRACE_EVENT_END( sdk::perfetto_category::name, *_track, itr.end); + + tracing_session->FlushBlocking(); } - tracing_session->FlushBlocking(); } + report_simple_timer(_memcpy_perf); + auto _kernels_perf = get_simple_timer("kernel dispatches"); for(auto ditr : kernel_dispatch_gen) { auto gen = kernel_dispatch_gen.get(ditr); @@ -512,7 +618,7 @@ write_perfetto( auto& current = *it; ::perfetto::Track* _track = nullptr; - auto agent_id = current.agent_abs_index; + auto agent_id = current.agent_absolute_index; auto queue_id = rocprofiler_queue_id_t{.handle = current.queue_id}; auto stream_id = rocprofiler_stream_id_t{.handle = current.stream_id}; if(ocfg.group_by_queue) @@ -529,7 +635,7 @@ write_perfetto( // kernel dispatches. Perfetto displays slices incorrectly if overlapping // slices on the same track are not completely enveloped. auto next = std::next(it); - if(next != end(gen) && next->agent_abs_index == it->agent_abs_index && + if(next != end(gen) && next->agent_absolute_index == it->agent_absolute_index && ((ocfg.group_by_queue && next->queue_id == it->queue_id) || (!ocfg.group_by_queue && next->stream_id == it->stream_id)) && next->start < it->end) @@ -550,7 +656,9 @@ write_perfetto( next->start = mid; } - auto agent_index = agent_data.at(current.agent_abs_index).second; + auto _args = read_event_args(current.event_id); + auto _pmc_events = read_pmc_events(current.event_id); + auto agent_index = agent_data.at(current.agent_absolute_index).second; auto _name = (ocfg.kernel_rename && !current.region.empty()) ? current.region : current.name; TRACE_EVENT_BEGIN(sdk::perfetto_category::name, @@ -587,365 +695,387 @@ write_perfetto( "stream_id", current.stream_id, [&](::perfetto::EventContext ctx) { - for(auto& [counter_id, counter_value] : counter_id_value) + for(const auto& arg : _args) + sdk::add_perfetto_annotation(ctx, arg.name, arg.value); + + for(const auto& pevt : _pmc_events) { - rocprofiler::sdk::add_perfetto_annotation( - ctx, counter_id_name.at(counter_id), counter_value); + if(const auto* pinfo = read_pmc_info(pevt.pmc_id); pinfo) + { + sdk::add_perfetto_annotation( + ctx, pinfo->name, pevt.value); + } } }); TRACE_EVENT_END(sdk::perfetto_category::name, *_track, current.end); + + tracing_session->FlushBlocking(); } - tracing_session->FlushBlocking(); } + report_simple_timer(_kernels_perf); } // counter tracks - { - // memory copy counter track - auto mem_cpy_endpoints = std::map>{}; - auto mem_cpy_extremes = std::pair{std::numeric_limits::max(), - std::numeric_limits::min()}; - auto constexpr timestamp_buffer = 1000; - for(auto ditr : memory_copy_gen) - { - for(const auto& itr : memory_copy_gen.get(ditr)) - { - uint64_t _mean_timestamp = itr.start + (0.5 * (itr.end - itr.start)); - - mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.start - timestamp_buffer, 0); - mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.start, 0); - mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(_mean_timestamp, 0); - mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.end, 0); - mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.end + timestamp_buffer, 0); - - mem_cpy_extremes = std::make_pair(std::min(mem_cpy_extremes.first, itr.start), - std::max(mem_cpy_extremes.second, itr.end)); - } - } - - for(auto ditr : memory_copy_gen) - { - for(const auto& itr : memory_copy_gen.get(ditr)) - { - auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_abs_index).lower_bound(itr.start); - auto mend = mem_cpy_endpoints.at(itr.dst_agent_abs_index).upper_bound(itr.end); - - LOG_IF(FATAL, mbeg == mend) - << "Missing range for timestamp [" << itr.start << ", " << itr.end << "]"; - - for(auto mitr = mbeg; mitr != mend; ++mitr) - mitr->second += itr.size; - } - } - - constexpr auto bytes_multiplier = 1024; - constexpr auto extremes_endpoint_buffer = 5000; - - auto mem_cpy_tracks = std::unordered_map{}; - auto mem_cpy_cnt_names = std::vector{}; - mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size()); - - for(auto& [abs_index, ts_map] : mem_cpy_endpoints) - { - mem_cpy_endpoints[abs_index].emplace(mem_cpy_extremes.first - extremes_endpoint_buffer, - 0); - mem_cpy_endpoints[abs_index].emplace(mem_cpy_extremes.second + extremes_endpoint_buffer, - 0); - - auto _track_name = std::stringstream{}; - const auto _agent = agent_data.at(abs_index).first; - auto agent_index_info = agent_data.at(abs_index).second; - _track_name << "COPY BYTES to " << agent_index_info.label << " [" - << agent_index_info.index << "] (" << agent_index_info.type << ")"; - - constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; - auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str()); - mem_cpy_tracks.emplace(abs_index, - ::perfetto::CounterTrack{_name.c_str(), this_pid_track} - .set_unit(_unit) - .set_unit_multiplier(bytes_multiplier) - .set_is_incremental(false)); - } - - for(auto& mitr : mem_cpy_endpoints) - { - for(auto itr : mitr.second) - { - TRACE_COUNTER(sdk::perfetto_category::name, - mem_cpy_tracks.at(mitr.first), - itr.first, - itr.second / bytes_multiplier); - } - tracing_session->FlushBlocking(); - } - - // memory allocation counter track - struct free_memory_information - { - rocprofiler_timestamp_t start_timestamp = 0; - rocprofiler_timestamp_t end_timestamp = 0; - rocprofiler_address_t address = {.handle = 0}; - }; - - struct memory_information - { - uint64_t alloc_size = {0}; - rocprofiler_address_t address = {.handle = 0}; - bool is_alloc_op = {false}; - }; - - struct agent_and_size - { - uint64_t agent_abs_index = {}; - uint64_t size = {0}; - }; - - auto mem_alloc_endpoints = - std::unordered_map>{}; - auto mem_alloc_extremes = std::pair{ - std::numeric_limits::max(), std::numeric_limits::min()}; - auto address_to_agent_and_size = - std::unordered_map{}; - auto free_mem_info = std::vector{}; - - // Load memory allocation endpoints - for(auto ditr : memory_allocation_gen) - { - for(const auto& itr : memory_allocation_gen.get(ditr)) - { - if(itr.type == "ALLOC") - { - LOG_IF(FATAL, itr.agent_name.empty()) - << "Missing agent id for memory allocation trace"; - mem_alloc_endpoints[itr.agent_abs_index].emplace( - itr.start, - memory_information{ - itr.size, rocprofiler_address_t{.handle = itr.address}, true}); - mem_alloc_endpoints[itr.agent_abs_index].emplace( - itr.end, - memory_information{ - itr.size, rocprofiler_address_t{.handle = itr.address}, true}); - address_to_agent_and_size.emplace( - rocprofiler_address_t{.handle = itr.address}, - agent_and_size{itr.agent_abs_index, itr.size}); - } - else if(itr.type == "FREE") - { - // Store free memory operations in seperate vector to pair with agent - // and allocation size in following loop - free_mem_info.push_back(free_memory_information{ - itr.start, itr.end, rocprofiler_address_t{.handle = itr.address}}); - } - else - { - ROCP_CI_LOG(WARNING) << "unhandled memory allocation type " << itr.type; - } - } - } - - // Add free memory operations to the endpoint map - for(const auto& itr : free_mem_info) - { - if(address_to_agent_and_size.count(itr.address) == 0) - { - if(itr.address.handle == 0) - { - // Freeing null pointers is expected behavior and is occurs in HSA functions - // like hipStreamDestroy - ROCP_INFO << "null pointer freed due to HSA operation"; - } - else - { - // Following should not occur - ROCP_INFO << "Unpaired free operation occurred"; - } - continue; - } - auto [agent_abs_index, size] = address_to_agent_and_size[itr.address]; - mem_alloc_endpoints[agent_abs_index].emplace( - itr.start_timestamp, memory_information{size, itr.address, false}); - mem_alloc_endpoints[agent_abs_index].emplace( - itr.end_timestamp, memory_information{size, itr.address, false}); - } - // Create running sum of allocated memory - for(auto& [_, endpoint_map] : mem_alloc_endpoints) - { - if(!endpoint_map.empty()) - { - auto earliest_agent_timestamp = endpoint_map.begin()->first; - auto latest_agent_timestamp = (--endpoint_map.end())->first; - mem_alloc_extremes = - std::make_pair(std::min(mem_alloc_extremes.first, earliest_agent_timestamp), - std::max(mem_alloc_extremes.second, latest_agent_timestamp)); - } - if(endpoint_map.size() <= 1) - { - continue; - } - - auto prev = endpoint_map.begin(); - auto itr = std::next(prev); - for(; itr != endpoint_map.end(); ++itr, ++prev) - { - // If address or allocation type are different, add or subtract from running sum - if(prev->second.address != itr->second.address || - prev->second.is_alloc_op != itr->second.is_alloc_op) - { - if(itr->second.is_alloc_op) - { - itr->second.alloc_size += prev->second.alloc_size; - } - else if(prev->second.alloc_size >= itr->second.alloc_size) - { - itr->second.alloc_size = prev->second.alloc_size - itr->second.alloc_size; - } - } - else - { - itr->second.alloc_size = prev->second.alloc_size; - } - } - } - - auto mem_alloc_tracks = std::unordered_map{}; - auto mem_alloc_cnt_names = std::vector{}; - mem_alloc_cnt_names.reserve(mem_alloc_endpoints.size()); - - for(auto& [abs_index, ts_map] : mem_alloc_endpoints) - { - mem_alloc_endpoints[abs_index].emplace( - mem_alloc_extremes.first - extremes_endpoint_buffer, - memory_information{0, {0}, false}); - mem_alloc_endpoints[abs_index].emplace( - mem_alloc_extremes.second + extremes_endpoint_buffer, - memory_information{0, {0}, false}); - - auto _track_name = std::stringstream{}; - - if(agent_data.find(abs_index) != agent_data.end()) - { - const auto _agent = agent_data.at(abs_index).first; - auto agent_index_info = agent_data.at(abs_index).second; - _track_name << "ALLOCATE BYTES on " << agent_index_info.label << " [" - << agent_index_info.index << "] (" << agent_index_info.type << ")"; - } - else - { - _track_name << "FREE BYTES"; - } - - constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; - auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str()); - mem_alloc_tracks.emplace(abs_index, - ::perfetto::CounterTrack{_name.c_str(), this_pid_track} - .set_unit(_unit) - .set_unit_multiplier(bytes_multiplier) - .set_is_incremental(false)); - } - - for(auto& alloc_itr : mem_alloc_endpoints) - { - for(auto itr : alloc_itr.second) - { - TRACE_COUNTER(sdk::perfetto_category::name, - mem_alloc_tracks.at(alloc_itr.first), - itr.first, - itr.second.alloc_size / bytes_multiplier); - } - } - tracing_session->FlushBlocking(); - } + // { + // // memory copy counter track + // auto mem_cpy_endpoints = std::map>{}; auto mem_cpy_extremes = std::pair{std::numeric_limits::max(), + // std::numeric_limits::min()}; + // auto constexpr timestamp_buffer = 1000; + // for(auto ditr : memory_copy_gen) + // { + // for(const auto& itr : memory_copy_gen.get(ditr)) + // { + // uint64_t _mean_timestamp = itr.start + (0.5 * (itr.end - itr.start)); + + // mem_cpy_endpoints[itr.dst_agent_absolute_index].emplace( + // itr.start - timestamp_buffer, 0); + // mem_cpy_endpoints[itr.dst_agent_absolute_index].emplace(itr.start, 0); + // mem_cpy_endpoints[itr.dst_agent_absolute_index].emplace(_mean_timestamp, 0); + // mem_cpy_endpoints[itr.dst_agent_absolute_index].emplace(itr.end, 0); + // mem_cpy_endpoints[itr.dst_agent_absolute_index].emplace(itr.end + + // timestamp_buffer, + // 0); + + // mem_cpy_extremes = std::make_pair(std::min(mem_cpy_extremes.first, itr.start), + // std::max(mem_cpy_extremes.second, itr.end)); + // } + // } + + // for(auto ditr : memory_copy_gen) + // { + // for(const auto& itr : memory_copy_gen.get(ditr)) + // { + // auto mbeg = + // mem_cpy_endpoints.at(itr.dst_agent_absolute_index).lower_bound(itr.start); + // auto mend = + // mem_cpy_endpoints.at(itr.dst_agent_absolute_index).upper_bound(itr.end); + + // LOG_IF(FATAL, mbeg == mend) + // << "Missing range for timestamp [" << itr.start << ", " << itr.end << "]"; + + // for(auto mitr = mbeg; mitr != mend; ++mitr) + // mitr->second += itr.size; + // } + // } + + // constexpr auto bytes_multiplier = 1024; + // constexpr auto extremes_endpoint_buffer = 5000; + + // auto mem_cpy_tracks = std::unordered_map{}; + // auto mem_cpy_cnt_names = std::vector{}; + // mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size()); + + // for(auto& [abs_index, ts_map] : mem_cpy_endpoints) + // { + // mem_cpy_endpoints[abs_index].emplace(mem_cpy_extremes.first - + // extremes_endpoint_buffer, + // 0); + // mem_cpy_endpoints[abs_index].emplace(mem_cpy_extremes.second + + // extremes_endpoint_buffer, + // 0); + + // auto _track_name = std::stringstream{}; + // const auto _agent = agent_data.at(abs_index).first; + // auto agent_index_info = agent_data.at(abs_index).second; + // _track_name << "COPY BYTES to " << agent_index_info.label << " [" + // << agent_index_info.index << "] (" << agent_index_info.type << ")"; + + // constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; + // auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str()); + // mem_cpy_tracks.emplace(abs_index, + // ::perfetto::CounterTrack{_name.c_str(), this_pid_track} + // .set_unit(_unit) + // .set_unit_multiplier(bytes_multiplier) + // .set_is_incremental(false)); + // } + + // for(auto& mitr : mem_cpy_endpoints) + // { + // for(auto itr : mitr.second) + // { + // TRACE_COUNTER(sdk::perfetto_category::name, + // mem_cpy_tracks.at(mitr.first), + // itr.first, + // itr.second / bytes_multiplier); + // } + // tracing_session->FlushBlocking(); + // } + + // // memory allocation counter track + // struct free_memory_information + // { + // rocprofiler_timestamp_t start_timestamp = 0; + // rocprofiler_timestamp_t end_timestamp = 0; + // rocprofiler_address_t address = {.handle = 0}; + // }; + + // struct memory_information + // { + // uint64_t alloc_size = {0}; + // rocprofiler_address_t address = {.handle = 0}; + // bool is_alloc_op = {false}; + // }; + + // struct agent_and_size + // { + // uint64_t agent_absolute_index = {}; + // uint64_t size = {0}; + // }; + + // auto mem_alloc_endpoints = + // std::unordered_map>{}; + // auto mem_alloc_extremes = std::pair{ + // std::numeric_limits::max(), std::numeric_limits::min()}; + // auto address_to_agent_and_size = + // std::unordered_map{}; + // auto free_mem_info = std::vector{}; + + // // Load memory allocation endpoints + // for(auto ditr : memory_allocation_gen) + // { + // for(const auto& itr : memory_allocation_gen.get(ditr)) + // { + // if(itr.type == "ALLOC") + // { + // LOG_IF(FATAL, itr.agent_name.empty()) + // << "Missing agent id for memory allocation trace"; + // mem_alloc_endpoints[itr.agent_absolute_index].emplace( + // itr.start, + // memory_information{ + // itr.size, rocprofiler_address_t{.handle = itr.address}, true}); + // mem_alloc_endpoints[itr.agent_absolute_index].emplace( + // itr.end, + // memory_information{ + // itr.size, rocprofiler_address_t{.handle = itr.address}, true}); + // address_to_agent_and_size.emplace( + // rocprofiler_address_t{.handle = itr.address}, + // agent_and_size{itr.agent_absolute_index, itr.size}); + // } + // else if(itr.type == "FREE") + // { + // // Store free memory operations in seperate vector to pair with agent + // // and allocation size in following loop + // free_mem_info.push_back(free_memory_information{ + // itr.start, itr.end, rocprofiler_address_t{.handle = itr.address}}); + // } + // else + // { + // ROCP_CI_LOG(WARNING) << "unhandled memory allocation type " << itr.type; + // } + // } + // } + + // // Add free memory operations to the endpoint map + // for(const auto& itr : free_mem_info) + // { + // if(address_to_agent_and_size.count(itr.address) == 0) + // { + // if(itr.address.handle == 0) + // { + // // Freeing null pointers is expected behavior and is occurs in HSA functions + // // like hipStreamDestroy + // ROCP_INFO << "null pointer freed due to HSA operation"; + // } + // else + // { + // // Following should not occur + // ROCP_INFO << "Unpaired free operation occurred"; + // } + // continue; + // } + // auto [agent_absolute_index, size] = address_to_agent_and_size[itr.address]; + // mem_alloc_endpoints[agent_absolute_index].emplace( + // itr.start_timestamp, memory_information{size, itr.address, false}); + // mem_alloc_endpoints[agent_absolute_index].emplace( + // itr.end_timestamp, memory_information{size, itr.address, false}); + // } + // // Create running sum of allocated memory + // for(auto& [_, endpoint_map] : mem_alloc_endpoints) + // { + // if(!endpoint_map.empty()) + // { + // auto earliest_agent_timestamp = endpoint_map.begin()->first; + // auto latest_agent_timestamp = (--endpoint_map.end())->first; + // mem_alloc_extremes = + // std::make_pair(std::min(mem_alloc_extremes.first, earliest_agent_timestamp), + // std::max(mem_alloc_extremes.second, latest_agent_timestamp)); + // } + // if(endpoint_map.size() <= 1) + // { + // continue; + // } + + // auto prev = endpoint_map.begin(); + // auto itr = std::next(prev); + // for(; itr != endpoint_map.end(); ++itr, ++prev) + // { + // // If address or allocation type are different, add or subtract from running sum + // if(prev->second.address != itr->second.address || + // prev->second.is_alloc_op != itr->second.is_alloc_op) + // { + // if(itr->second.is_alloc_op) + // { + // itr->second.alloc_size += prev->second.alloc_size; + // } + // else if(prev->second.alloc_size >= itr->second.alloc_size) + // { + // itr->second.alloc_size = prev->second.alloc_size - + // itr->second.alloc_size; + // } + // } + // else + // { + // itr->second.alloc_size = prev->second.alloc_size; + // } + // } + // } + + // auto mem_alloc_tracks = std::unordered_map{}; + // auto mem_alloc_cnt_names = std::vector{}; + // mem_alloc_cnt_names.reserve(mem_alloc_endpoints.size()); + + // for(auto& [abs_index, ts_map] : mem_alloc_endpoints) + // { + // mem_alloc_endpoints[abs_index].emplace( + // mem_alloc_extremes.first - extremes_endpoint_buffer, + // memory_information{0, {0}, false}); + // mem_alloc_endpoints[abs_index].emplace( + // mem_alloc_extremes.second + extremes_endpoint_buffer, + // memory_information{0, {0}, false}); + + // auto _track_name = std::stringstream{}; + + // if(agent_data.find(abs_index) != agent_data.end()) + // { + // const auto _agent = agent_data.at(abs_index).first; + // auto agent_index_info = agent_data.at(abs_index).second; + // _track_name << "ALLOCATE BYTES on " << agent_index_info.label << " [" + // << agent_index_info.index << "] (" << agent_index_info.type << ")"; + // } + // else + // { + // _track_name << "FREE BYTES"; + // } + + // constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; + // auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str()); + // mem_alloc_tracks.emplace(abs_index, + // ::perfetto::CounterTrack{_name.c_str(), this_pid_track} + // .set_unit(_unit) + // .set_unit_multiplier(bytes_multiplier) + // .set_is_incremental(false)); + // } + + // for(auto& alloc_itr : mem_alloc_endpoints) + // { + // for(auto itr : alloc_itr.second) + // { + // TRACE_COUNTER(sdk::perfetto_category::name, + // mem_alloc_tracks.at(alloc_itr.first), + // itr.first, + // itr.second.alloc_size / bytes_multiplier); + // } + // } + // tracing_session->FlushBlocking(); + // } // Create counter tracks per agent - { - auto counters_endpoints = - std::unordered_map>>{}; - - auto counters_extremes = std::pair{ - std::numeric_limits::max(), std::numeric_limits::min()}; - - auto constexpr timestamp_buffer = 1000; - - for(auto ditr : counter_collection_gen) - for(const auto& record : counter_collection_gen.get(ditr)) - { - // const auto& info = record.; - - const auto& start_timestamp = record.start; - const auto& end_timestamp = record.end; - - uint64_t _mean_timestamp = - start_timestamp + (0.5 * (end_timestamp - start_timestamp)); - - for(auto& [counter_id, counter_value] : counter_id_value) - { - counters_endpoints[record.agent_abs_index][counter_id].emplace( - start_timestamp - timestamp_buffer, 0); - counters_endpoints[record.agent_abs_index][counter_id].emplace(start_timestamp, - counter_value); - counters_endpoints[record.agent_abs_index][counter_id].emplace(_mean_timestamp, - counter_value); - counters_endpoints[record.agent_abs_index][counter_id].emplace(end_timestamp, - 0); - counters_endpoints[record.agent_abs_index][counter_id].emplace( - end_timestamp + timestamp_buffer, 0); - } - - counters_extremes = std::make_pair(std::min(counters_extremes.first, record.start), - std::max(counters_extremes.second, record.end)); - } - - auto counter_tracks = - std::unordered_map>{}; - - constexpr auto extremes_endpoint_buffer = 5000; - - for(auto ditr : counter_collection_gen) - { - for(const auto& record : counter_collection_gen.get(ditr)) - { - // const auto& info = record.dispatch_data.dispatch_info; - // const auto& sym = tool_metadata.get_kernel_symbol(info.kernel_id); - - // CHECK(sym != nullptr); - - auto name = record.kernel_name; - - for(auto& [counter_id, counter_value] : counter_id_value) - { - counters_endpoints[record.agent_id][counter_id].emplace( - counters_extremes.first - extremes_endpoint_buffer, 0); - counters_endpoints[record.agent_id][counter_id].emplace( - counters_extremes.second + extremes_endpoint_buffer, 0); - - const auto _agent = agent_data.at(record.agent_abs_index).first; - auto agent_index_info = agent_data.at(record.agent_abs_index).second; - auto track_name_ss = std::stringstream{}; - track_name_ss << agent_index_info.label << " [" << agent_index_info.index - << "] " - << "PMC " << record.counter_name; - - auto track_name = track_name_ss.str(); - - counter_tracks[record.agent_abs_index].emplace( - track_name, ::perfetto::CounterTrack{track_name.c_str(), this_pid_track}); - auto& endpoints = counters_endpoints[record.agent_id][counter_id]; - for(auto& counter_itr : endpoints) - { - TRACE_COUNTER( - sdk::perfetto_category::name, - counter_tracks[record.agent_abs_index].at(track_name), - counter_itr.first, - counter_itr.second); - } - } - } - tracing_session->FlushBlocking(); - } - } + // { + // auto counters_endpoints = + // std::unordered_map>>{}; + + // auto counters_extremes = std::pair{ + // std::numeric_limits::max(), std::numeric_limits::min()}; + + // auto constexpr timestamp_buffer = 1000; + + // for(auto ditr : counter_collection_gen) + // for(const auto& record : counter_collection_gen.get(ditr)) + // { + // // const auto& info = record.; + + // const auto& start_timestamp = record.start; + // const auto& end_timestamp = record.end; + + // uint64_t _mean_timestamp = + // start_timestamp + (0.5 * (end_timestamp - start_timestamp)); + + // for(auto& [counter_id, counter_value] : counter_id_value) + // { + // counters_endpoints[record.agent_absolute_index][counter_id].emplace( + // start_timestamp - timestamp_buffer, 0); + // counters_endpoints[record.agent_absolute_index][counter_id].emplace( + // start_timestamp, counter_value); + // counters_endpoints[record.agent_absolute_index][counter_id].emplace( + // _mean_timestamp, counter_value); + // counters_endpoints[record.agent_absolute_index][counter_id].emplace( + // end_timestamp, 0); + // counters_endpoints[record.agent_absolute_index][counter_id].emplace( + // end_timestamp + timestamp_buffer, 0); + // } + + // counters_extremes = std::make_pair(std::min(counters_extremes.first, + // record.start), + // std::max(counters_extremes.second, + // record.end)); + // } + + // auto counter_tracks = + // std::unordered_map>{}; + + // constexpr auto extremes_endpoint_buffer = 5000; + + // for(auto ditr : counter_collection_gen) + // { + // for(const auto& record : counter_collection_gen.get(ditr)) + // { + // // const auto& info = record.dispatch_data.dispatch_info; + // // const auto& sym = tool_metadata.get_kernel_symbol(info.kernel_id); + + // // CHECK(sym != nullptr); + + // auto name = record.kernel_name; + + // for(auto& [counter_id, counter_value] : counter_id_value) + // { + // counters_endpoints[record.agent_id][counter_id].emplace( + // counters_extremes.first - extremes_endpoint_buffer, 0); + // counters_endpoints[record.agent_id][counter_id].emplace( + // counters_extremes.second + extremes_endpoint_buffer, 0); + + // const auto _agent = + // agent_data.at(record.agent_absolute_index).first; auto agent_index_info + // = agent_data.at(record.agent_absolute_index).second; auto track_name_ss + // = std::stringstream{}; track_name_ss << agent_index_info.label << " [" << + // agent_index_info.index + // << "] " + // << "PMC " << record.counter_name; + + // auto track_name = track_name_ss.str(); + + // counter_tracks[record.agent_absolute_index].emplace( + // track_name, ::perfetto::CounterTrack{track_name.c_str(), + // this_pid_track}); + // auto& endpoints = counters_endpoints[record.agent_id][counter_id]; + // for(auto& counter_itr : endpoints) + // { + // TRACE_COUNTER( + // sdk::perfetto_category::name, + // counter_tracks[record.agent_absolute_index].at(track_name), + // counter_itr.first, + // counter_itr.second); + // } + // } + // } + // tracing_session->FlushBlocking(); + // } + // } ::perfetto::TrackEvent::Flush(); tracing_session->FlushBlocking(); diff --git a/source/lib/python/rocpd/source/perfetto.hpp b/source/lib/python/rocpd/source/perfetto.hpp index 601a83edd..42bef0ffd 100644 --- a/source/lib/python/rocpd/source/perfetto.hpp +++ b/source/lib/python/rocpd/source/perfetto.hpp @@ -45,25 +45,23 @@ namespace tool = ::rocprofiler::tool; struct PerfettoSession { - PerfettoSession(const tool::output_config&); + PerfettoSession(const tool::output_config&, sqlite3* connection); ~PerfettoSession(); std::unique_ptr<::perfetto::TracingSession> tracing_session = {}; const tool::output_config& config; + sqlite3* connection = nullptr; }; void -write_perfetto( - const PerfettoSession& perfetto_session, - const types::process& process, - const std::unordered_map>& - agent_data, - const tool::generator& thread_gen, - const tool::generator& region_gen, - const tool::generator& sample_gen, - const tool::generator& kernel_dispatch_gen, - const tool::generator& memory_copy_gen, - const tool::generator& memory_allocation_gen, - const tool::generator& counter_collection_gen); +write_perfetto(const PerfettoSession& perfetto_session, + const types::process& process, + const std::vector& agents, + const tool::generator& thread_gen, + const tool::generator& region_gen, + const tool::generator& sample_gen, + const tool::generator& kernel_dispatch_gen, + const tool::generator& memory_copy_gen, + const tool::generator& memory_allocation_gen); } // namespace output } // namespace rocpd diff --git a/source/lib/python/rocpd/source/serialization/CMakeLists.txt b/source/lib/python/rocpd/source/serialization/CMakeLists.txt index c2303112f..e546c4eda 100644 --- a/source/lib/python/rocpd/source/serialization/CMakeLists.txt +++ b/source/lib/python/rocpd/source/serialization/CMakeLists.txt @@ -4,8 +4,6 @@ set(libpyrocpd_source_serialization_sources sql.hpp) set(libpyrocpd_source_serialization_headers sql.cpp) -foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS}) - rocprofiler_rocpd_python_bindings_target_sources( - ${_PYTHON_VERSION} PRIVATE ${libpyrocpd_source_serialization_sources} - ${libpyrocpd_source_serialization_headers}) -endforeach() +rocprofiler_rocpd_python_bindings_object_sources( + PRIVATE ${libpyrocpd_source_serialization_sources} + ${libpyrocpd_source_serialization_headers}) diff --git a/source/lib/python/rocpd/source/sql_generator.hpp b/source/lib/python/rocpd/source/sql_generator.hpp index 6284857af..cf050ee08 100644 --- a/source/lib/python/rocpd/source/sql_generator.hpp +++ b/source/lib/python/rocpd/source/sql_generator.hpp @@ -25,7 +25,9 @@ #include "lib/python/rocpd/source/serialization/sql.hpp" #include "lib/common/container/ring_buffer.hpp" +#include "lib/common/logging.hpp" #include "lib/common/mpl.hpp" +#include "lib/common/simple_timer.hpp" #include "lib/common/units.hpp" #include "lib/output/domain_type.hpp" #include "lib/output/generator.hpp" @@ -131,17 +133,13 @@ sql_generator::get(size_t idx) const if(idx < static_cast(m_num_chunks)) { - // auto _offset = idx * m_chunk_size; - // auto _limit = m_chunk_size; - // auto _query = fmt::format("{}{} LIMIT {} OFFSET {};", m_query, m_order, _limit, - // _offset); - - // auto* conn = const_cast(m_conn); - // auto ar = cereal::SQLite3InputArchive{conn, _query}; - auto& ar = const_cast(m_archive); ar.set_chunk_index(idx); + auto _query_perf = rocprofiler::common::simple_timer{ + fmt::format("SQL Query {} of {} :: {}", idx, m_num_chunks, m_query), + ROCP_LOG_LEVEL_INFO}; + cereal::load(ar, _data); ROCP_FATAL_IF(_data.size() != m_expected.at(idx)) @@ -158,4 +156,19 @@ sql_generator::get(size_t idx) const return _data; } + +template +auto +read_sql_query(sqlite3* conn, std::string_view query) +{ + auto data = std::vector{}; + if(conn) + { + auto _query_perf = rocprofiler::common::simple_timer{fmt::format("SQL Query :: {}", query), + ROCP_LOG_LEVEL_INFO}; + auto ar = cereal::SQLite3InputArchive{conn, fmt::format("{}", query)}; + cereal::load(ar, data); + } + return data; +} } // namespace rocpd diff --git a/source/lib/python/rocpd/source/types.hpp b/source/lib/python/rocpd/source/types.hpp index 454ff9ec0..9bb4e789b 100644 --- a/source/lib/python/rocpd/source/types.hpp +++ b/source/lib/python/rocpd/source/types.hpp @@ -103,6 +103,7 @@ struct node : public base_class // common base class for node info struct common_node_info { + int64_t id = 0; guid_t guid = {}; uint64_t nid = 0; std::string machine_id = {}; @@ -139,9 +140,12 @@ struct agent : public base_class { guid_t guid = {}; uint64_t nid = 0; + uint64_t pid = 0; uint64_t absolute_index = 0; - std::string type = {}; - std::string user_name = {}; + uint64_t logical_index = 0; + uint64_t type_index = 0; + std::string type_name = {}; + std::string generic_name = {}; std::string extdata = {}; bool has_extdata() const { return (extdata.length() > 2); } @@ -150,25 +154,25 @@ struct agent : public base_class struct code_object { - uint64_t id = 0; - guid_t guid = {}; - uint64_t nid = 0; - uint64_t pid = 0; - uint64_t agent_abs_index = 0; - std::string uri = {}; - uint64_t load_base = 0; - uint64_t load_size = 0; - uint64_t load_delta = 0; - std::string storage_type_str = {}; - uint64_t storage_type = 0; - uint64_t memory_base = 0; - uint64_t memory_size = 0; - uint16_t code_object_size = 0; + int64_t id = 0; + guid_t guid = {}; + uint64_t nid = 0; + uint64_t pid = 0; + uint64_t agent_absolute_index = 0; + std::string uri = {}; + uint64_t load_base = 0; + uint64_t load_size = 0; + uint64_t load_delta = 0; + std::string storage_type_str = {}; + uint64_t storage_type = 0; + uint64_t memory_base = 0; + uint64_t memory_size = 0; + uint16_t code_object_size = 0; }; struct kernel_symbol { - uint64_t id = 0; + int64_t id = 0; guid_t guid = {}; uint64_t nid = 0; pid_t pid = 0; @@ -199,7 +203,7 @@ struct region std::string message = {}; }; - uint64_t id = 0; + int64_t id = 0; guid_t guid = {}; std::string category = {}; std::string name = {}; @@ -211,7 +215,7 @@ struct region uint64_t event_id = 0; uint64_t stack_id = 0; uint64_t parent_stack_id = 0; - uint64_t corr_id = 0; + uint64_t correlation_id = 0; std::string extdata = {}; bool has_extdata() const { return (extdata.length() > 2); } @@ -225,7 +229,7 @@ struct sample std::string message = {}; }; - uint64_t id = 0; + int64_t id = 0; guid_t guid = {}; std::string category = {}; std::string name = {}; @@ -236,140 +240,145 @@ struct sample uint64_t event_id = 0; uint64_t stack_id = 0; uint64_t parent_stack_id = 0; - uint64_t corr_id = 0; + uint64_t correlation_id = 0; std::string extdata = {}; bool has_extdata() const { return (extdata.length() > 2); } decoded_extdata get_extdata() const; }; -struct region_arg +struct argument { - uint64_t id = 0; - guid_t guid = {}; - pid_t nid = 0; - pid_t pid = 0; - std::string type = {}; - std::string name = {}; - std::string value = {}; + int64_t id = 0; + guid_t guid = {}; + uint64_t event_id = 0; + uint64_t position = 0; + std::string type = {}; + std::string name = {}; + std::string value = {}; }; struct kernel_dispatch { - uint64_t id = 0; - guid_t guid = {}; - std::string category = {}; - std::string region = {}; - std::string name = {}; - pid_t nid = 0; - pid_t pid = 0; - pid_t tid = 0; - uint64_t agent_abs_index = 0; - uint64_t agent_log_index = 0; - uint64_t agent_type_index = 0; - std::string agent_type = {}; - uint64_t code_object_id = 0; - uint64_t kernel_id = 0; - uint64_t dispatch_id = 0; - uint64_t stream_id = 0; - uint64_t queue_id = 0; - std::string queue = {}; - std::string stream = {}; - rocprofiler_timestamp_t start = 0; - rocprofiler_timestamp_t end = 0; - rocprofiler_dim3_t grid_size = {}; - rocprofiler_dim3_t workgroup_size = {}; - uint64_t lds_size = 0; - uint64_t scratch_size = 0; - uint64_t static_lds_size = 0; - uint64_t static_scratch_size = 0; - uint64_t stack_id = 0; - uint64_t parent_stack_id = 0; - uint64_t corr_id = 0; - uint64_t vgpr_count = 0; - uint64_t accum_vgpr_count = 0; - uint64_t sgpr_count = 0; + int64_t id = 0; + guid_t guid = {}; + std::string category = {}; + std::string region = {}; + std::string name = {}; + pid_t nid = 0; + pid_t pid = 0; + pid_t tid = 0; + uint64_t agent_absolute_index = 0; + uint64_t agent_logical_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t code_object_id = 0; + uint64_t kernel_id = 0; + uint64_t dispatch_id = 0; + uint64_t stream_id = 0; + uint64_t queue_id = 0; + std::string queue = {}; + std::string stream = {}; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + rocprofiler_dim3_t grid_size = {}; + rocprofiler_dim3_t workgroup_size = {}; + uint64_t lds_size = 0; + uint64_t scratch_size = 0; + uint64_t static_lds_size = 0; + uint64_t static_scratch_size = 0; + uint64_t stack_id = 0; + uint64_t sgpr_count = 0; + uint64_t arch_vgpr_count = 0; + uint64_t accum_vgpr_count = 0; + uint64_t parent_stack_id = 0; + uint64_t correlation_id = 0; + uint64_t event_id = 0; }; struct memory_allocation { - uint64_t id = 0; - guid_t guid = {}; - pid_t pid = 0; - pid_t tid = 0; - rocprofiler_timestamp_t start = 0; - rocprofiler_timestamp_t end = 0; - std::string type = {}; - std::string level = {}; - std::string agent_name = {}; - std::string category = {}; - uint64_t agent_abs_index = 0; - uint64_t agent_log_index = 0; - uint64_t agent_type_index = 0; - std::string agent_type = {}; - uint64_t address = 0; - uint64_t size = 0; - uint64_t queue_id = 0; - std::string queue_name = {}; - uint64_t stream_id = 0; - std::string stream_name = {}; - uint64_t stack_id = 0; - uint64_t parent_stack_id = 0; - uint64_t corr_id = 0; -}; - -struct memory_copies -{ - uint64_t id = 0; + int64_t id = 0; guid_t guid = {}; pid_t pid = 0; pid_t tid = 0; rocprofiler_timestamp_t start = 0; rocprofiler_timestamp_t end = 0; - std::string name = {}; - std::string region_name = {}; + std::string type = {}; + std::string level = {}; + std::string agent_name = {}; std::string category = {}; - uint64_t stream_id = 0; + uint64_t agent_absolute_index = 0; + uint64_t agent_logical_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t address = 0; + uint64_t size = 0; uint64_t queue_id = 0; - std::string stream_name = {}; std::string queue_name = {}; - uint64_t size = 0; - std::string dst_device = {}; - uint64_t dst_agent_abs_index = 0; - uint64_t dst_agent_log_index = 0; - uint64_t dst_agent_type_index = 0; - std::string dst_agent_type = {}; - uint64_t dst_address = 0; - std::string src_device = {}; - uint64_t src_agent_abs_index = 0; - uint64_t src_agent_log_index = 0; - uint64_t src_agent_type_index = 0; - std::string src_agent_type = {}; - uint64_t src_address = 0; + uint64_t stream_id = 0; + std::string stream_name = {}; uint64_t stack_id = 0; uint64_t parent_stack_id = 0; - uint64_t corr_id = 0; + uint64_t correlation_id = 0; + uint64_t event_id = 0; +}; + +struct memory_copies +{ + int64_t id = 0; + guid_t guid = {}; + pid_t pid = 0; + pid_t tid = 0; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + std::string name = {}; + std::string region_name = {}; + std::string category = {}; + uint64_t stream_id = 0; + uint64_t queue_id = 0; + std::string stream_name = {}; + std::string queue_name = {}; + uint64_t size = 0; + std::string dst_device = {}; + uint64_t dst_agent_absolute_index = 0; + uint64_t dst_agent_logical_index = 0; + uint64_t dst_agent_type_index = 0; + std::string dst_agent_type = {}; + uint64_t dst_address = 0; + std::string src_device = {}; + uint64_t src_agent_absolute_index = 0; + uint64_t src_agent_logical_index = 0; + uint64_t src_agent_type_index = 0; + std::string src_agent_type = {}; + uint64_t src_address = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t correlation_id = 0; + uint64_t event_id = 0; }; struct scratch_memory { - guid_t guid = {}; - std::string operation = {}; - std::string category = {}; - uint64_t agent_abs_index = 0; - uint64_t agent_log_index = 0; - uint64_t agent_type_index = 0; - std::string agent_type = {}; - uint64_t queue_id = 0; - pid_t pid = 0; - pid_t tid = 0; - std::string alloc_flags = {}; - rocprofiler_timestamp_t start = 0; - rocprofiler_timestamp_t end = 0; - uint64_t size = 0; - uint64_t stack_id = 0; - uint64_t parent_stack_id = 0; - uint64_t corr_id = 0; + int64_t id = 0; + guid_t guid = {}; + std::string operation = {}; + std::string category = {}; + uint64_t agent_absolute_index = 0; + uint64_t agent_logical_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t queue_id = 0; + pid_t pid = 0; + pid_t tid = 0; + std::string alloc_flags = {}; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + uint64_t size = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t correlation_id = 0; + uint64_t event_id = 0; }; struct stats @@ -407,75 +416,158 @@ struct stats_node struct pmc_event { - uint64_t id = 0; - guid_t guid = {}; - pid_t pid = 0; - uint64_t event_id = 0; - uint64_t pmc_id = 0; - double counter_value = 0; + int64_t id = 0; + guid_t guid = {}; + pid_t pid = 0; + uint64_t event_id = 0; + uint64_t pmc_id = 0; + double value = 0; + std::string extdata = {}; }; struct counter { - uint64_t id = 0; - guid_t guid = {}; - uint64_t dispatch_id = 0; - uint64_t kernel_id = 0; - uint32_t stack_id = 0; - uint64_t correlation_id = 0; - uint64_t event_id = 0; - pid_t pid = 0; - pid_t tid = 0; - uint32_t agent_id = 0; - uint64_t agent_abs_index = 0; - uint64_t agent_log_index = 0; - uint64_t agent_type_index = 0; - std::string agent_type = {}; - uint64_t queue_id = 0; - uint32_t grid_size_x = 0; - uint32_t grid_size_y = 0; - uint32_t grid_size_z = 0; - uint64_t grid_size = 0; - std::string kernel_name = {}; - std::string kernel_region = {}; - uint32_t workgroup_size_x = 0; - uint32_t workgroup_size_y = 0; - uint32_t workgroup_size_z = 0; - uint32_t workgroup_size = 0; - uint32_t lds_block_size = 0; - uint32_t scratch_size = 0; - uint32_t vgpr_count = 0; - uint32_t accum_vgpr_count = 0; - uint32_t sgpr_count = 0; - std::string counter_name = {}; - std::string counter_symbol = {}; - std::string component = {}; - std::string description = {}; - std::string block = {}; - std::string expression = {}; - std::string value_type = {}; - uint32_t counter_id = 0; - double value = 0; - rocprofiler_timestamp_t start = 0; - rocprofiler_timestamp_t end = 0; - bool is_constant = false; - bool is_derived = false; + int64_t id = 0; + guid_t guid = {}; + uint64_t dispatch_id = 0; + uint64_t kernel_id = 0; + uint32_t stack_id = 0; + uint64_t correlation_id = 0; + uint64_t event_id = 0; + pid_t pid = 0; + pid_t tid = 0; + uint32_t agent_id = 0; + uint64_t agent_absolute_index = 0; + uint64_t agent_logical_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t queue_id = 0; + uint64_t stream_id = 0; + uint32_t grid_x = 0; + uint32_t grid_y = 0; + uint32_t grid_z = 0; + std::string name = {}; + std::string region = {}; + uint32_t workgroup_x = 0; + uint32_t workgroup_y = 0; + uint32_t workgroup_z = 0; + uint64_t lds_size = 0; + uint64_t scratch_size = 0; + uint64_t static_lds_size = 0; + uint64_t static_scratch_size = 0; + uint32_t sgpr_count = 0; + uint32_t arch_vgpr_count = 0; + uint32_t accum_vgpr_count = 0; + std::string pmc_name = {}; + std::string pmc_symbol = {}; + std::string pmc_component = {}; + std::string pmc_description = {}; + std::string pmc_block = {}; + std::string pmc_expression = {}; + std::string pmc_value_type = {}; + uint32_t pmc_id = 0; + double pmc_value = 0; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + bool pmc_is_constant = false; + bool pmc_is_derived = false; + + // computed + uint64_t grid_size = 0; + uint64_t workgroup_size = 0; }; struct pmc_info { - uint64_t id = 0; - guid_t guid = {}; - uint64_t nid = 0; - uint64_t agent_abs_index = 0; - bool is_constant = false; - bool is_derived = false; - std::string name = {}; - std::string description = {}; - std::string block = {}; - std::string expression = {}; + int64_t id = 0; + guid_t guid = {}; + std::string name = {}; + std::string symbol = {}; + std::string description = {}; + uint64_t agent_id = 0; + std::string target_arch = {}; + uint64_t event_code = 0; + uint64_t instance_id = 0; + std::string long_description = {}; + std::string component = {}; + std::string units = {}; + std::string value_type = {}; + std::string block = {}; + std::string expression = {}; + int16_t is_constant = 0; + int16_t is_derived = 0; + std::string extdata = {}; +}; + +#define DEFINE_GROUP_BY_OPERATORS(TYPE, ...) \ + auto get_tie() const { return std::tie(__VA_ARGS__); } \ + static auto get_group_by() { return std::string_view{#__VA_ARGS__}; } \ + static auto name() { return std::string_view{#TYPE}; } \ + friend bool operator==(const TYPE& lhs, const TYPE& rhs) \ + { \ + return (lhs.get_tie() == rhs.get_tie()); \ + } \ + friend bool operator!=(const TYPE& lhs, const TYPE& rhs) { return !(lhs == rhs); } \ + friend bool operator<(const TYPE& lhs, const TYPE& rhs) \ + { \ + return (lhs.get_tie() < rhs.get_tie()); \ + } \ + friend bool operator>(const TYPE& lhs, const TYPE& rhs) { return !(lhs < rhs || lhs == rhs); } \ + friend bool operator<=(const TYPE& lhs, const TYPE& rhs) { return (lhs < rhs || lhs == rhs); } \ + friend bool operator>=(const TYPE& lhs, const TYPE& rhs) { return !(lhs < rhs); } + +struct group_by_tid +{ + guid_t guid = {}; + uint64_t nid = 0; + pid_t pid = 0; + uint64_t tid = 0; + + DEFINE_GROUP_BY_OPERATORS(group_by_tid, guid, nid, pid, tid); + static auto get_order_by() { return std::string_view{"tid"}; } +}; + +struct group_by_agent_tid +{ + guid_t guid = {}; + uint64_t nid = 0; + pid_t pid = 0; + pid_t tid = 0; + pid_t agent_absolute_index = 0; + + DEFINE_GROUP_BY_OPERATORS(group_by_agent_tid, guid, nid, pid, tid, agent_absolute_index); + static auto get_order_by() { return std::string_view{"agent_absolute_index, tid"}; } +}; + +struct group_by_agent_queue_id +{ + guid_t guid = {}; + uint64_t nid = 0; + pid_t pid = 0; + pid_t agent_absolute_index = 0; + uint64_t queue_id = 0; + + DEFINE_GROUP_BY_OPERATORS(group_by_agent_queue_id, + guid, + nid, + pid, + agent_absolute_index, + queue_id); + static auto get_order_by() { return std::string_view{"agent_absolute_index, queue_id"}; } +}; + +struct group_by_stream_id +{ + guid_t guid = {}; + uint64_t nid = 0; + pid_t pid = 0; + uint64_t stream_id = 0; + + DEFINE_GROUP_BY_OPERATORS(group_by_stream_id, guid, nid, pid, stream_id); + static auto get_order_by() { return std::string_view{"stream_id"}; } }; +#undef DEFINE_GROUP_BY_OPERATORS } // namespace types } // namespace rocpd @@ -505,6 +597,7 @@ template void load(ArchiveT& ar, rocpd::types::common_node_info& data) { + LOAD_DATA_FIELD(id); LOAD_DATA_FIELD(guid); LOAD_DATA_FIELD(nid); LOAD_DATA_FIELD(machine_id); @@ -553,9 +646,12 @@ load(ArchiveT& ar, rocpd::types::agent& data) { LOAD_DATA_FIELD(guid); LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); LOAD_DATA_FIELD(absolute_index); - LOAD_DATA_FIELD(type); - LOAD_DATA_FIELD(user_name); + LOAD_DATA_FIELD(logical_index); + LOAD_DATA_FIELD(type_index); + LOAD_DATA_NAMED("type", type_name); + LOAD_DATA_FIELD(generic_name); LOAD_DATA_FIELD(extdata); data.load_extdata(); @@ -569,7 +665,7 @@ load(ArchiveT& ar, rocpd::types::code_object& data) LOAD_DATA_FIELD(guid); LOAD_DATA_FIELD(nid); LOAD_DATA_FIELD(pid); - LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(agent_absolute_index); LOAD_DATA_FIELD(uri); LOAD_DATA_FIELD(load_base); LOAD_DATA_FIELD(load_size); @@ -627,7 +723,7 @@ load(ArchiveT& ar, rocpd::types::region& data) LOAD_DATA_FIELD(event_id); LOAD_DATA_FIELD(stack_id); LOAD_DATA_FIELD(parent_stack_id); - LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(correlation_id); LOAD_DATA_FIELD(extdata); } @@ -653,7 +749,7 @@ load(ArchiveT& ar, rocpd::types::sample& data) LOAD_DATA_FIELD(event_id); LOAD_DATA_FIELD(stack_id); LOAD_DATA_FIELD(parent_stack_id); - LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(correlation_id); LOAD_DATA_FIELD(extdata); } @@ -666,12 +762,12 @@ load(ArchiveT& ar, rocpd::types::sample::decoded_extdata& data) template void -load(ArchiveT& ar, rocpd::types::region_arg& data) +load(ArchiveT& ar, rocpd::types::argument& data) { LOAD_DATA_FIELD(id); LOAD_DATA_FIELD(guid); - LOAD_DATA_FIELD(nid); - LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(event_id); + LOAD_DATA_FIELD(position); LOAD_DATA_FIELD(type); LOAD_DATA_FIELD(name); LOAD_DATA_FIELD(value); @@ -695,8 +791,8 @@ load(ArchiveT& ar, rocpd::types::kernel_dispatch& data) LOAD_DATA_FIELD(nid); LOAD_DATA_FIELD(pid); LOAD_DATA_FIELD(tid); - LOAD_DATA_FIELD(agent_abs_index); - LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_absolute_index); + LOAD_DATA_FIELD(agent_logical_index); LOAD_DATA_FIELD(agent_type_index); LOAD_DATA_FIELD(agent_type); LOAD_DATA_FIELD(code_object_id); @@ -712,14 +808,15 @@ load(ArchiveT& ar, rocpd::types::kernel_dispatch& data) load_dim3("grid", data.grid_size); LOAD_DATA_FIELD(lds_size); LOAD_DATA_FIELD(scratch_size); - LOAD_DATA_FIELD(vgpr_count); - LOAD_DATA_FIELD(accum_vgpr_count); - LOAD_DATA_FIELD(sgpr_count); LOAD_DATA_FIELD(static_lds_size); LOAD_DATA_FIELD(static_scratch_size); + LOAD_DATA_FIELD(sgpr_count); + LOAD_DATA_FIELD(arch_vgpr_count); + LOAD_DATA_FIELD(accum_vgpr_count); LOAD_DATA_FIELD(stack_id); LOAD_DATA_FIELD(parent_stack_id); - LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(correlation_id); + LOAD_DATA_FIELD(event_id); } template @@ -735,8 +832,8 @@ load(ArchiveT& ar, rocpd::types::memory_allocation& data) LOAD_DATA_FIELD(type); LOAD_DATA_FIELD(level); LOAD_DATA_FIELD(agent_name); - LOAD_DATA_FIELD(agent_abs_index); - LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_absolute_index); + LOAD_DATA_FIELD(agent_logical_index); LOAD_DATA_FIELD(agent_type_index); LOAD_DATA_FIELD(agent_type); LOAD_DATA_FIELD(address); @@ -748,7 +845,8 @@ load(ArchiveT& ar, rocpd::types::memory_allocation& data) LOAD_DATA_FIELD(category); LOAD_DATA_FIELD(stack_id); LOAD_DATA_FIELD(parent_stack_id); - LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(correlation_id); + LOAD_DATA_FIELD(event_id); } template @@ -769,44 +867,47 @@ load(ArchiveT& ar, rocpd::types::memory_copies& data) LOAD_DATA_FIELD(queue_name); LOAD_DATA_FIELD(size); LOAD_DATA_FIELD(dst_device); - LOAD_DATA_FIELD(dst_agent_abs_index); - LOAD_DATA_FIELD(dst_agent_log_index); + LOAD_DATA_FIELD(dst_agent_absolute_index); + LOAD_DATA_FIELD(dst_agent_logical_index); LOAD_DATA_FIELD(dst_agent_type_index); LOAD_DATA_FIELD(dst_agent_type); LOAD_DATA_FIELD(dst_address); LOAD_DATA_FIELD(src_device); - LOAD_DATA_FIELD(src_agent_abs_index); - LOAD_DATA_FIELD(src_agent_log_index); + LOAD_DATA_FIELD(src_agent_absolute_index); + LOAD_DATA_FIELD(src_agent_logical_index); LOAD_DATA_FIELD(src_agent_type_index); LOAD_DATA_FIELD(src_agent_type); LOAD_DATA_FIELD(src_address); LOAD_DATA_FIELD(category); LOAD_DATA_FIELD(stack_id); LOAD_DATA_FIELD(parent_stack_id); - LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(correlation_id); + LOAD_DATA_FIELD(event_id); } template void load(ArchiveT& ar, rocpd::types::scratch_memory& data) { + LOAD_DATA_FIELD(id); LOAD_DATA_FIELD(guid); LOAD_DATA_FIELD(operation); - LOAD_DATA_FIELD(agent_abs_index); - LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_absolute_index); + LOAD_DATA_FIELD(agent_logical_index); LOAD_DATA_FIELD(agent_type_index); LOAD_DATA_FIELD(agent_type); LOAD_DATA_FIELD(queue_id); LOAD_DATA_FIELD(pid); LOAD_DATA_FIELD(tid); - LOAD_DATA_FIELD(alloc_flags); + // LOAD_DATA_FIELD(alloc_flags); // INVALID FIELD LOAD_DATA_FIELD(start); LOAD_DATA_FIELD(end); LOAD_DATA_FIELD(size); LOAD_DATA_FIELD(category); LOAD_DATA_FIELD(stack_id); LOAD_DATA_FIELD(parent_stack_id); - LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(correlation_id); + LOAD_DATA_FIELD(event_id); } template @@ -852,10 +953,10 @@ load(ArchiveT& ar, rocpd::types::pmc_event& data) { LOAD_DATA_FIELD(id); LOAD_DATA_FIELD(guid); - LOAD_DATA_FIELD(pid); LOAD_DATA_FIELD(event_id); LOAD_DATA_FIELD(pmc_id); - // LOAD_DATA_FIELD(counter_value); + LOAD_DATA_FIELD(value); + LOAD_DATA_FIELD(extdata); } template @@ -872,56 +973,114 @@ load(ArchiveT& ar, rocpd::types::counter& data) LOAD_DATA_FIELD(pid); LOAD_DATA_FIELD(tid); LOAD_DATA_FIELD(agent_id); - LOAD_DATA_FIELD(agent_abs_index); - LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_absolute_index); + LOAD_DATA_FIELD(agent_logical_index); LOAD_DATA_FIELD(agent_type_index); LOAD_DATA_FIELD(agent_type); LOAD_DATA_FIELD(queue_id); - LOAD_DATA_FIELD(grid_size_x); - LOAD_DATA_FIELD(grid_size_y); - LOAD_DATA_FIELD(grid_size_z); - LOAD_DATA_FIELD(grid_size); - LOAD_DATA_FIELD(kernel_name); - LOAD_DATA_FIELD(kernel_region); - LOAD_DATA_FIELD(workgroup_size_x); - LOAD_DATA_FIELD(workgroup_size_y); - LOAD_DATA_FIELD(workgroup_size_z); - LOAD_DATA_FIELD(workgroup_size); - LOAD_DATA_FIELD(lds_block_size); - LOAD_DATA_FIELD(scratch_size); - LOAD_DATA_FIELD(vgpr_count); - LOAD_DATA_FIELD(accum_vgpr_count); - LOAD_DATA_FIELD(sgpr_count); - LOAD_DATA_FIELD(counter_name); - LOAD_DATA_FIELD(counter_symbol); - LOAD_DATA_FIELD(component); - LOAD_DATA_FIELD(description); - LOAD_DATA_FIELD(block); - LOAD_DATA_FIELD(expression); - LOAD_DATA_FIELD(value_type); - LOAD_DATA_FIELD(counter_id); - LOAD_DATA_FIELD(value); + LOAD_DATA_FIELD(stream_id); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(region); LOAD_DATA_FIELD(start); LOAD_DATA_FIELD(end); - LOAD_DATA_FIELD(is_constant); - LOAD_DATA_FIELD(is_derived); + LOAD_DATA_FIELD(grid_x); + LOAD_DATA_FIELD(grid_y); + LOAD_DATA_FIELD(grid_z); + LOAD_DATA_FIELD(workgroup_x); + LOAD_DATA_FIELD(workgroup_y); + LOAD_DATA_FIELD(workgroup_z); + LOAD_DATA_FIELD(lds_size); + LOAD_DATA_FIELD(scratch_size); + LOAD_DATA_FIELD(static_lds_size); + LOAD_DATA_FIELD(static_scratch_size); + LOAD_DATA_FIELD(sgpr_count); + LOAD_DATA_FIELD(arch_vgpr_count); + LOAD_DATA_FIELD(accum_vgpr_count); + LOAD_DATA_FIELD(pmc_name); + LOAD_DATA_FIELD(pmc_symbol); + LOAD_DATA_FIELD(pmc_component); + LOAD_DATA_FIELD(pmc_description); + LOAD_DATA_FIELD(pmc_block); + LOAD_DATA_FIELD(pmc_expression); + LOAD_DATA_FIELD(pmc_value_type); + LOAD_DATA_FIELD(pmc_id); + LOAD_DATA_FIELD(pmc_value); + LOAD_DATA_FIELD(pmc_is_constant); + LOAD_DATA_FIELD(pmc_is_derived); + + auto dotproduct = [](uint32_t x, uint32_t y, uint32_t z) -> uint64_t { + return (static_cast(x) * y * z); + }; + + data.grid_size = dotproduct(data.grid_x, data.grid_y, data.grid_z); + data.workgroup_size = dotproduct(data.workgroup_x, data.workgroup_y, data.workgroup_z); } + template void load(ArchiveT& ar, rocpd::types::pmc_info& data) { LOAD_DATA_FIELD(id); LOAD_DATA_FIELD(guid); - LOAD_DATA_FIELD(nid); - LOAD_DATA_FIELD(agent_abs_index); - LOAD_DATA_FIELD(is_constant); - LOAD_DATA_FIELD(is_derived); LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(symbol); LOAD_DATA_FIELD(description); + LOAD_DATA_FIELD(agent_id); + LOAD_DATA_FIELD(target_arch); + LOAD_DATA_FIELD(event_code); + LOAD_DATA_FIELD(instance_id); + LOAD_DATA_FIELD(long_description); + LOAD_DATA_FIELD(component); + LOAD_DATA_FIELD(units); + LOAD_DATA_FIELD(value_type); LOAD_DATA_FIELD(block); LOAD_DATA_FIELD(expression); + LOAD_DATA_FIELD(is_constant); + LOAD_DATA_FIELD(is_derived); + LOAD_DATA_FIELD(extdata); +} + +template +void +load(ArchiveT& ar, rocpd::types::group_by_tid& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); } +template +void +load(ArchiveT& ar, rocpd::types::group_by_agent_tid& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(agent_absolute_index); +} + +template +void +load(ArchiveT& ar, rocpd::types::group_by_agent_queue_id& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(agent_absolute_index); + LOAD_DATA_FIELD(queue_id); +} + +template +void +load(ArchiveT& ar, rocpd::types::group_by_stream_id& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(stream_id); +} } // namespace cereal #undef LOAD_DATA_FIELD diff --git a/source/lib/python/rocpd/summary.py b/source/lib/python/rocpd/summary.py index 816b50b26..76c55d162 100644 --- a/source/lib/python/rocpd/summary.py +++ b/source/lib/python/rocpd/summary.py @@ -67,9 +67,9 @@ def get_temp_view_names(connection: RocpdImportData) -> List[str]: def get_temp_view_columns(connection: RocpdImportData, view_name: str) -> List[str]: """Return the column names of a given temporary view.""" - cursor = connection.cursor() - cursor.execute(f"PRAGMA table_xinfo('{view_name}')") - return [row[1] for row in cursor.fetchall()] + print(f"Retrieving columns for view: {view_name}") + cursor = connection.execute(f"SELECT * FROM '{view_name}' LIMIT 0") + return [desc[0] for desc in cursor.description] def make_temp_view_query(view_name, query) -> str: diff --git a/source/lib/python/rocpd/time_window.py b/source/lib/python/rocpd/time_window.py index 79aec8c09..4afd86320 100644 --- a/source/lib/python/rocpd/time_window.py +++ b/source/lib/python/rocpd/time_window.py @@ -62,17 +62,11 @@ def markers2timestamp( def get_min_max_time(connection): min_max_query = """ SELECT - MIN(min_time) as min_time, - MAX(max_time) as max_time - FROM ( - SELECT start as min_time, end as max_time FROM regions_and_samples - UNION ALL - SELECT start as min_time, end as max_time FROM rocpd_kernel_dispatch - UNION ALL - SELECT start as min_time, end as max_time FROM rocpd_memory_allocate - UNION ALL - SELECT start as min_time, end as max_time FROM rocpd_memory_copy - )""" + MIN(value) as min_time, + MAX(value) as max_time + FROM + rocpd_timestamp + """ min_time, max_time = execute_statement(connection, min_max_query).fetchone() return (min_time, max_time) @@ -112,24 +106,14 @@ def convert_time(time_str: Optional[str], is_start: bool = False) -> float: return (convert_time(start_time, True), convert_time(end_time, False)) -def get_time_filter(inclusive: bool, start_time, end_time) -> str: - """Create SQL filter for start/end time ranges.""" - _beg = int(start_time) - _end = int(end_time) - if inclusive: - return f"start >= {_beg} AND end <= {_end}" - else: - return f"start <= {_end} AND end >= {_beg}" - - -def get_timestamp_filter(inclusive: bool, start_time, end_time) -> str: +def get_time_filter(inclusive: bool, start_field, end_field, start_time, end_time) -> str: """Create SQL filter for timestamp columns.""" _beg = int(start_time) _end = int(end_time) if inclusive: - return f"timestamp >= {_beg} AND timestamp <= {_end}" + return f"{start_field} >= {_beg} AND {end_field} <= {_end}" else: - return f"timestamp <= {_end} AND timestamp >= {_beg}" + return f"{start_field} <= {_end} AND {end_field} >= {_beg}" def create_view(connection: sqlite3.Connection, view_name: str, query: str) -> None: @@ -199,8 +183,9 @@ def dump_min_max(label): ) # Create views for tables with start and end times - start_end_timed_tables = [] - timestamp_timed_tables = [] + start_end_timed_tables = [] # legacy + timestamp_timed_tables = [] # legacy + timestamp_tables = ["rocpd_timestamp"] # dedicated table for timestamps for itr in connection.table_info.keys(): if itr.find("rocpd_info_") == 0: @@ -211,31 +196,23 @@ def dump_min_max(label): elif "timestamp" in column_names: timestamp_timed_tables += [itr] - # Restrict the scope of the tables with start/end columns - for table_name in start_end_timed_tables: - dbs = [ - f"{itr} WHERE {get_time_filter(inclusive, start_time, end_time)}" - for itr in connection.table_info[table_name] - ] - table_union = " UNION ALL ".join(dbs) - create_view_query = f""" - CREATE TEMPORARY VIEW {table_name} AS - {table_union} - """ - create_view(connection, table_name, create_view_query) - - # Restrict the scope of the tables with timestamp columns - for table_name in timestamp_timed_tables: - dbs = [ - f"{itr} WHERE {get_timestamp_filter(inclusive, start_time, end_time)}" - for itr in connection.table_info[table_name] - ] - table_union = " UNION ALL ".join(dbs) - create_view_query = f""" - CREATE TEMPORARY VIEW {table_name} AS - {table_union} - """ - create_view(connection, table_name, create_view_query) + for fields, tables in [ + [["start", "end"], start_end_timed_tables], + [["timestamp", "timestamp"], timestamp_timed_tables], + [["value", "value"], timestamp_tables], + ]: + # Restrict the scope of the tables with start/end columns + for table_name in tables: + dbs = [ + f"{itr} WHERE {get_time_filter(inclusive, fields[0], fields[1], start_time, end_time)}" + for itr in connection.table_info[table_name] + ] + table_union = " UNION ALL ".join(dbs) + create_view_query = f""" + CREATE TEMPORARY VIEW {table_name} AS + {table_union} + """ + create_view(connection, table_name, create_view_query) # # Create node view # create_view_query = """CREATE VIEW rocpd_node AS """ diff --git a/source/lib/python/utilities.cmake b/source/lib/python/utilities.cmake index 9c8a987d7..8232bfd68 100644 --- a/source/lib/python/utilities.cmake +++ b/source/lib/python/utilities.cmake @@ -4,84 +4,6 @@ include_guard(DIRECTORY) -macro(rocprofiler_reset_python3_cache) - foreach( - _VAR - _Python3_Compiler_REASON_FAILURE - _Python3_Development_REASON_FAILURE - _Python3_EXECUTABLE - _Python3_INCLUDE_DIR - _Python3_INTERPRETER_PROPERTIES - _Python3_INTERPRETER_SIGNATURE - _Python3_LIBRARY_RELEASE - _Python3_NumPy_REASON_FAILURE - Python3_EXECUTABLE - Python3_INCLUDE_DIR - Python3_INTERPRETER_ID - Python3_STDLIB - Python3_STDARCH - Python3_SITELIB - Python3_SOABI - ${ARGN}) - unset(${_VAR} CACHE) - unset(${_VAR}) - endforeach() -endmacro() - -macro(rocprofiler_find_python3 _VERSION) - rocprofiler_reset_python3_cache() - - if("${_VERSION}" MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$") - find_package(Python3 ${_VERSION} EXACT ${ARGN} REQUIRED MODULE - COMPONENTS Interpreter Development) - elseif("${_VERSION}" MATCHES "^([0-9]+)\\.([0-9]+)$") - find_package(Python3 ${_VERSION}.0...${_VERSION}.999 ${ARGN} REQUIRED MODULE - COMPONENTS Interpreter Development) - else() - message( - FATAL_ERROR - "Invalid Python3 version (${_VERSION}). Specify . or .." - ) - endif() -endmacro() - -# make sure we have all python version candidates -set(ROCPROFILER_PYTHON_VERSION_CANDIDATES - "3.20;3.19;3.18;3.17;3.16;3.15;3.14;3.13;3.12;3.11;3.10;3.9;3.8;3.7;3.6" - CACHE STRING "Python versions to search for, newest first") - -function(get_default_python_versions _VAR) - rocprofiler_reset_python3_cache() - - set(_PYTHON_FOUND_VERSIONS) - - foreach(_VER IN LISTS ROCPROFILER_PYTHON_VERSION_CANDIDATES) - find_package(Python3 ${_VER} EXACT QUIET COMPONENTS Interpreter Development) - if(Python3_FOUND) - list(APPEND _PYTHON_FOUND_VERSIONS - "${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}") - endif() - endforeach() - - # If none found, do one last check for 3.6 (no EXACT) - if(NOT _PYTHON_FOUND_VERSIONS) - find_package(Python3 3.6 COMPONENTS Interpreter Development) - if(Python3_FOUND) - list(APPEND _PYTHON_FOUND_VERSIONS - "${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}") - endif() - endif() - - # Set the output variable to the first found version, if any - if(_PYTHON_FOUND_VERSIONS) - set(${_VAR} - "${_PYTHON_FOUND_VERSIONS}" - PARENT_SCOPE) - endif() - - rocprofiler_reset_python3_cache() -endfunction() - function(rocprofiler_roctx_python_bindings _VERSION) message( STATUS "Building rocprofiler-sdk roctx python bindings for python ${_VERSION}") @@ -132,6 +54,12 @@ function(rocprofiler_roctx_python_bindings _VERSION) COMPONENT roctx) endfunction() +function(rocprofiler_rocpd_python_bindings_object_sources) + if(TARGET rocprofiler-sdk-rocpd-python-bindings-object-library) + target_sources(rocprofiler-sdk-rocpd-python-bindings-object-library ${ARGN}) + endif() +endfunction() + function(rocprofiler_rocpd_python_bindings_target_sources _VERSION) target_sources(rocprofiler-sdk-rocpd-python-bindings-${_VERSION} ${ARGN}) endfunction() @@ -148,6 +76,7 @@ function(rocprofiler_rocpd_python_bindings _VERSION) ${PROJECT_BINARY_DIR}/${rocpd_PYTHON_INSTALL_DIRECTORY}) set(rocpd_PYTHON_SOURCES csv.py + filter.py importer.py __init__.py __main__.py @@ -168,11 +97,36 @@ function(rocprofiler_rocpd_python_bindings _VERSION) COMPONENT rocpd) endforeach() + if(NOT TARGET rocprofiler-sdk-rocpd-python-bindings-object-library) + add_library(rocprofiler-sdk-rocpd-python-bindings-object-library OBJECT) + add_library(rocprofiler-sdk::rocpd-python-bindings-object-library ALIAS + rocprofiler-sdk-rocpd-python-bindings-object-library) + target_link_libraries( + rocprofiler-sdk-rocpd-python-bindings-object-library + PRIVATE rocprofiler-sdk::rocprofiler-sdk-headers + rocprofiler-sdk::rocprofiler-sdk-build-flags + rocprofiler-sdk::rocprofiler-sdk-memcheck + rocprofiler-sdk::rocprofiler-sdk-common-library + rocprofiler-sdk::rocprofiler-sdk-output-library + rocprofiler-sdk::rocprofiler-sdk-cereal + rocprofiler-sdk::rocprofiler-sdk-perfetto + rocprofiler-sdk::rocprofiler-sdk-otf2 + rocprofiler-sdk::rocprofiler-sdk-sqlite3 + rocprofiler-sdk::rocprofiler-sdk-pybind11 + rocprofiler-sdk::rocprofiler-sdk-gotcha + rocprofiler-sdk::rocprofiler-sdk-dw + rocprofiler-sdk::rocprofiler-sdk-static-library + rocprofiler-sdk::rocprofiler-sdk-rocpd-library) + set_target_properties(rocprofiler-sdk-rocpd-python-bindings-object-library + PROPERTIES POSITION_INDEPENDENT_CODE ON) + endif() + add_library(rocprofiler-sdk-rocpd-python-bindings-${_VERSION} MODULE) target_sources( rocprofiler-sdk-rocpd-python-bindings-${_VERSION} PRIVATE libpyrocpd.cpp libpyrocpd.hpp - $) + $ + $) target_include_directories(rocprofiler-sdk-rocpd-python-bindings-${_VERSION} SYSTEM PRIVATE ${Python3_INCLUDE_DIRS}) target_link_libraries( diff --git a/source/lib/rocprofiler-sdk-rocpd/sql.cpp b/source/lib/rocprofiler-sdk-rocpd/sql.cpp index b052ec51c..e2072c742 100644 --- a/source/lib/rocprofiler-sdk-rocpd/sql.cpp +++ b/source/lib/rocprofiler-sdk-rocpd/sql.cpp @@ -137,9 +137,9 @@ rocpd_sql_load_schema(rocpd_sql_engine_t engine, {ROCPD_SQL_SCHEMA_ROCPD_TABLES, "rocpd_tables.sql"}, {ROCPD_SQL_SCHEMA_ROCPD_INDEXES, "rocpd_indexes.sql"}, {ROCPD_SQL_SCHEMA_ROCPD_VIEWS, "rocpd_views.sql"}, + {ROCPD_SQL_SCHEMA_ROCPD_METADATA, "rocpd_metadata.sql"}, {ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS, "data_views.sql"}, {ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS, "summary_views.sql"}, - {ROCPD_SQL_SCHEMA_ROCPD_MARKER_VIEWS, "marker_views.sql"}, }; const auto _lib_schema_path = rocpd::sql::get_install_path(); diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 7afcbd9f7..596b6ef85 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -1463,8 +1463,8 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da auto _counter_id = rocprofiler_counter_id_t{}; ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id), "query record counter id"); - serialized_records.emplace_back( - tool::tool_counter_value_t{_counter_id, record_data[count].counter_value}); + serialized_records.emplace_back(tool::tool_counter_value_t{ + _counter_id, record_data[count].id, record_data[count].counter_value}); } if(!serialized_records.empty()) diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp index 51d04542c..3417bd090 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp @@ -164,7 +164,7 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache& {buffer_size_hi}}); } - if(perf_exclude_mask) + if(perf_exclude_mask != 0u) { // Bitwise NOT because aqlprofile receives the mask, not the exclude mask aql_params.push_back( diff --git a/source/share/rocprofiler-sdk-rocpd/CMakeLists.txt b/source/share/rocprofiler-sdk-rocpd/CMakeLists.txt index c96093bad..2adf3ed79 100644 --- a/source/share/rocprofiler-sdk-rocpd/CMakeLists.txt +++ b/source/share/rocprofiler-sdk-rocpd/CMakeLists.txt @@ -2,7 +2,7 @@ # # -set(rocpd_schemas data_views.sql marker_views.sql rocpd_indexes.sql rocpd_tables.sql +set(rocpd_schemas data_views.sql rocpd_indexes.sql rocpd_metadata.sql rocpd_tables.sql rocpd_views.sql summary_views.sql) foreach(_FILE ${rocpd_schemas}) diff --git a/source/share/rocprofiler-sdk-rocpd/data_views.sql b/source/share/rocprofiler-sdk-rocpd/data_views.sql index 0cb976688..ce542b99e 100644 --- a/source/share/rocprofiler-sdk-rocpd/data_views.sql +++ b/source/share/rocprofiler-sdk-rocpd/data_views.sql @@ -1,5 +1,91 @@ -- -- Useful views +-- +-- This is a view of all the joined track information +CREATE VIEW IF NOT EXISTS + `tracks` AS +SELECT + T.id, + T.guid, + T.name_id AS track_name_id, + ST.string AS track_name, + T.nid, + N.name AS node_name, + N.hash AS node_hash, + N.machine_id AS node_machine_id, + N.system_name AS node_system_name, + N.hostname AS node_hostname, + N.release AS node_release, + N.version AS node_version, + N.hardware_name AS node_hardware_version, + T.pid, + P.name AS process_name, + P.ppid, + P.init AS process_init, + P.fini AS process_fini, + P.start AS process_start, + P.end AS process_end, + P.command AS process_command, + T.tid, + TH.name AS thread_name, + TH.start AS thread_start, + TH.end AS thread_end, + T.agent_id, + A.name AS agent_name, + A.type AS agent_type, + A.absolute_index AS agent_absolute_index, + A.logical_index AS agent_logical_index, + A.type_index AS agent_type_index, + A.uuid AS agent_uuid, + A.generic_name AS agent_generic_name, + A.model_name AS agent_model_name, + A.vendor_name AS agent_vendor_name, + A.product_name AS agent_product_name, + T.queue_id, + Q.name AS queue_name, + T.stream_id, + S.name AS stream_name +FROM + `rocpd_track` T + LEFT JOIN `rocpd_info_node` N ON N.id = T.nid + AND N.guid = T.guid + LEFT JOIN `rocpd_info_process` P ON P.pid = T.pid + AND P.guid = T.guid + LEFT JOIN `rocpd_info_thread` TH ON TH.tid = T.tid + AND TH.guid = T.guid + LEFT JOIN `rocpd_info_agent` A ON A.id = T.agent_id + AND A.guid = T.guid + LEFT JOIN `rocpd_info_queue` Q ON Q.id = T.queue_id + AND Q.guid = T.guid + LEFT JOIN `rocpd_info_stream` S ON S.id = T.stream_id + AND S.guid = T.guid + LEFT JOIN `rocpd_string` ST ON ST.id = T.name_id + AND ST.guid = T.guid; + +-- +-- This is a view of all the joined event information +CREATE VIEW IF NOT EXISTS + `events` AS +SELECT + E.id, + E.guid, + E.category_id, + ( + SELECT + name + FROM + `rocpd_info_category` C + WHERE + C.id = E.category_id + AND C.guid = E.guid + ) AS category, + E.stack_id, + E.parent_stack_id, + E.correlation_id, + E.extdata +FROM + `rocpd_event` E; + -- -- Code objects CREATE VIEW IF NOT EXISTS @@ -9,7 +95,7 @@ SELECT CO.guid, CO.nid, P.pid, - A.absolute_index AS agent_abs_index, + A.absolute_index AS agent_absolute_index, CO.uri, CO.load_base, CO.load_size, @@ -23,7 +109,7 @@ FROM `rocpd_info_code_object` CO INNER JOIN `rocpd_info_agent` A ON CO.agent_id = A.id AND CO.guid = A.guid - INNER JOIN `rocpd_info_process` P ON CO.pid = P.id + INNER JOIN `rocpd_info_process` P ON CO.pid = P.pid AND CO.guid = P.guid; CREATE VIEW IF NOT EXISTS @@ -53,13 +139,14 @@ SELECT JSON_EXTRACT(KS.extdata, '$.kernel_address.handle') AS kernel_address FROM `rocpd_info_kernel_symbol` KS - INNER JOIN `rocpd_info_process` P ON KS.pid = P.id + INNER JOIN `rocpd_info_process` P ON KS.pid = P.pid AND KS.guid = P.guid; -- Processes CREATE VIEW IF NOT EXISTS `processes` AS SELECT + P.id, N.id AS nid, N.machine_id, N.system_name, @@ -83,6 +170,7 @@ FROM CREATE VIEW IF NOT EXISTS `threads` AS SELECT + T.id, N.id AS nid, N.machine_id, N.system_name, @@ -98,7 +186,7 @@ SELECT T.name FROM `rocpd_info_thread` T - INNER JOIN `rocpd_info_process` P ON P.id = T.pid + INNER JOIN `rocpd_info_process` P ON P.pid = T.pid AND N.guid = T.guid INNER JOIN `rocpd_info_node` N ON N.id = T.nid AND N.guid = T.guid; @@ -109,154 +197,86 @@ CREATE VIEW IF NOT EXISTS SELECT R.id, R.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - S.string AS name, - R.nid, - P.pid, + E.category, + NS.string AS name, + T.nid, + T.pid, T.tid, - R.start, - R.end, - (R.end - R.start) AS duration, + DS.value AS `start`, + DE.value AS `end`, + (DE.value - DS.value) AS `duration`, R.event_id, + R.track_id, E.stack_id, E.parent_stack_id, - E.correlation_id AS corr_id, - E.extdata, - E.call_stack, - E.line_info -FROM - `rocpd_region` R - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_string` S ON S.id = R.name_id - AND S.guid = R.guid - INNER JOIN `rocpd_info_process` P ON P.id = R.pid - AND P.guid = R.guid - INNER JOIN `rocpd_info_thread` T ON T.id = R.tid - AND T.guid = R.guid; - -CREATE VIEW IF NOT EXISTS - `region_args` AS -SELECT - R.id, - R.guid, - R.nid, - P.pid, - A.type, - A.name, - A.value + E.correlation_id, + E.extdata FROM `rocpd_region` R - INNER JOIN `rocpd_event` E ON E.id = R.event_id + INNER JOIN `events` E ON E.id = R.event_id AND E.guid = R.guid - INNER JOIN `rocpd_arg` A ON A.event_id = E.id - AND A.guid = R.guid - INNER JOIN `rocpd_info_process` P ON P.id = R.pid - AND P.guid = R.guid; + INNER JOIN `tracks` T ON T.id = R.track_id + AND T.guid = R.guid + INNER JOIN `rocpd_string` NS ON NS.id = R.name_id + AND NS.guid = R.guid + INNER JOIN `rocpd_timestamp` DS ON DS.id = R.start_id + AND DS.guid = R.guid + INNER JOIN `rocpd_timestamp` DE ON DE.id = R.end_id + AND DE.guid = R.guid; -- -- Samples CREATE VIEW IF NOT EXISTS `samples` AS SELECT - R.id, - R.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = T.name_id - AND RS.guid = T.guid - ) AS name, + S.id, + S.guid, + E.category, + NS.string AS `name`, T.nid, - P.pid, - TH.tid, - R.timestamp, - R.event_id, + T.pid, + T.tid, + DI.value AS `timestamp`, + S.event_id, + S.track_id, E.stack_id AS stack_id, E.parent_stack_id AS parent_stack_id, - E.correlation_id AS corr_id, - E.extdata AS extdata, - E.call_stack AS call_stack, - E.line_info AS line_info + E.correlation_id, + E.extdata AS extdata FROM - `rocpd_sample` R - INNER JOIN `rocpd_track` T ON T.id = R.track_id - AND T.guid = R.guid - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_info_process` P ON P.id = T.pid - AND P.guid = T.guid - INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid - AND TH.guid = T.guid; + `rocpd_sample` S + INNER JOIN `tracks` T ON T.id = S.track_id + AND T.guid = S.guid + INNER JOIN `events` E ON E.id = S.event_id + AND E.guid = S.guid + INNER JOIN `rocpd_string` NS ON NS.id = S.name_id + AND NS.guid = S.guid + INNER JOIN `rocpd_timestamp` DI ON DI.id = S.timestamp_id + AND DI.guid = S.guid; -- -- Provides samples view with the same columns as regions view CREATE VIEW IF NOT EXISTS `sample_regions` AS SELECT - R.id, - R.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = T.name_id - AND RS.guid = T.guid - ) AS name, - T.nid, - P.pid, - TH.tid, - R.timestamp AS start, - R.timestamp AS END, - (R.timestamp - R.timestamp) AS duration, - R.event_id, - E.stack_id AS stack_id, - E.parent_stack_id AS parent_stack_id, - E.correlation_id AS corr_id, - E.extdata AS extdata, - E.call_stack AS call_stack, - E.line_info AS line_info + S.id, + S.guid, + S.category, + S.name, + S.nid, + S.pid, + S.tid, + S.timestamp AS `start`, + S.timestamp AS `end`, + (S.timestamp - S.timestamp) AS `duration`, + S.event_id, + S.track_id, + S.stack_id, + S.parent_stack_id, + S.correlation_id, + S.extdata FROM - `rocpd_sample` R - INNER JOIN `rocpd_track` T ON T.id = R.track_id - AND T.guid = R.guid - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_info_process` P ON P.id = T.pid - AND P.guid = T.guid - INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid - AND TH.guid = T.guid; + `samples` S; -- -- Provides a unified view of the regions and samples @@ -279,68 +299,70 @@ CREATE VIEW SELECT K.id, K.guid, + T.nid, + T.pid, T.tid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, + E.category, R.string AS region, S.display_name AS name, - K.nid, - P.pid, - A.absolute_index AS agent_abs_index, - A.logical_index AS agent_log_index, - A.type_index AS agent_type_index, - A.type AS agent_type, - S.code_object_id AS code_object_id, + T.agent_id, + T.agent_absolute_index, + T.agent_logical_index, + T.agent_type_index, + T.agent_type, + S.code_object_id, K.kernel_id, K.dispatch_id, - K.stream_id, - K.queue_id, - Q.name AS queue, - ST.name AS stream, - K.start, - K.end, - (K.end - K.start) AS duration, + T.queue_id, + T.queue_name AS `queue`, + T.stream_id, + T.stream_name AS `stream`, + DS.value AS `start`, + DE.value AS `end`, + (DE.value - DS.value) AS `duration`, + K.event_id, + K.track_id, K.grid_size_x AS grid_x, K.grid_size_y AS grid_y, K.grid_size_z AS grid_z, + -- (K.grid_size_x * K.grid_size_y * K.grid_size_z) AS grid_size, K.workgroup_size_x AS workgroup_x, K.workgroup_size_y AS workgroup_y, K.workgroup_size_z AS workgroup_z, + -- (K.workgroup_size_x * K.workgroup_size_y * K.workgroup_size_z) AS workgroup_size, + K.workgroup_size_x AS block_size_x, + K.workgroup_size_y AS block_size_y, + K.workgroup_size_z AS block_size_z, + -- (K.workgroup_size_x * K.workgroup_size_y * K.workgroup_size_z) AS block_size, + (K.grid_size_x / K.workgroup_size_x) AS grid_size_x, + (K.grid_size_y / K.workgroup_size_y) AS grid_size_y, + (K.grid_size_z / K.workgroup_size_z) AS grid_size_z, + -- (K.grid_size_x / K.workgroup_size_x) * (K.grid_size_y / K.workgroup_size_y) * (K.grid_size_z / K.workgroup_size_z) AS grid_size, K.group_segment_size AS lds_size, K.private_segment_size AS scratch_size, - S.arch_vgpr_count AS vgpr_count, - S.accum_vgpr_count, - S.sgpr_count, S.group_segment_size AS static_lds_size, S.private_segment_size AS static_scratch_size, + S.sgpr_count, + S.arch_vgpr_count, + S.accum_vgpr_count, E.stack_id, E.parent_stack_id, - E.correlation_id AS corr_id + E.correlation_id, + E.extdata FROM `rocpd_kernel_dispatch` K - INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id - AND A.guid = K.guid - INNER JOIN `rocpd_event` E ON E.id = K.event_id + INNER JOIN `tracks` T ON T.id = K.track_id + AND T.guid = K.guid + INNER JOIN `events` E ON E.id = K.event_id AND E.guid = K.guid INNER JOIN `rocpd_string` R ON R.id = K.region_name_id AND R.guid = K.guid INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id AND S.guid = K.guid - LEFT JOIN `rocpd_info_stream` ST ON ST.id = K.stream_id - AND ST.guid = K.guid - LEFT JOIN `rocpd_info_queue` Q ON Q.id = K.queue_id - AND Q.guid = K.guid - INNER JOIN `rocpd_info_process` P ON P.id = Q.pid - AND P.guid = Q.guid - INNER JOIN `rocpd_info_thread` T ON T.id = K.tid - AND T.guid = K.guid; + INNER JOIN `rocpd_timestamp` DS ON DS.id = K.start_id + AND DS.guid = K.guid + INNER JOIN `rocpd_timestamp` DE ON DE.id = K.end_id + AND DE.guid = K.guid; -- -- Performance Monitoring Counters (PMC) @@ -351,7 +373,7 @@ SELECT PMC_I.guid, PMC_I.nid, P.pid, - A.absolute_index AS agent_abs_index, + A.absolute_index AS agent_absolute_index, PMC_I.is_constant, PMC_I.is_derived, PMC_I.name, @@ -362,104 +384,44 @@ FROM `rocpd_info_pmc` PMC_I INNER JOIN `rocpd_info_agent` A ON PMC_I.agent_id = A.id AND PMC_I.guid = A.guid - INNER JOIN `rocpd_info_process` P ON P.id = PMC_I.pid + INNER JOIN `rocpd_info_process` P ON P.pid = PMC_I.pid AND PMC_I.guid = P.guid; +-- +-- Join PMC records with PMC info and event info CREATE VIEW IF NOT EXISTS `pmc_events` AS SELECT PMC_E.id, PMC_E.guid, PMC_E.pmc_id, - E.id AS event_id, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - ( - SELECT - display_name - FROM - `rocpd_info_kernel_symbol` KS - WHERE - KS.id = K.kernel_id - AND KS.guid = K.guid - ) AS name, - K.nid, - P.pid, - K.dispatch_id, - K.start, - K.end, - (K.end - K.start) AS duration, - PMC_I.name AS counter_name, - PMC_E.value AS counter_value + PMC_E.event_id, + E.category_id, + E.category, + PMC_I.name, + PMC_I.symbol, + PMC_E.value, + PMC_I.agent_id, + PMC_I.target_arch, + PMC_I.event_code, + PMC_I.instance_id, + PMC_I.component, + PMC_I.units, + PMC_I.value_type, + PMC_I.block, + PMC_I.expression, + PMC_I.is_constant, + PMC_I.is_derived, + PMC_I.description, + PMC_I.long_description, + PMC_I.extdata AS pmc_info_extdata, + PMC_E.extdata AS pmc_event_extdata FROM `rocpd_pmc_event` PMC_E INNER JOIN `rocpd_info_pmc` PMC_I ON PMC_I.id = PMC_E.pmc_id AND PMC_I.guid = PMC_E.guid - INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id - AND E.guid = PMC_E.guid - INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id - AND K.guid = PMC_E.guid - INNER JOIN `rocpd_info_process` P ON P.id = K.pid - AND P.guid = K.guid; - --- events with arguments --- -CREATE VIEW IF NOT EXISTS - `events_args` AS -SELECT - E.id AS event_id, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - E.stack_id, - E.parent_stack_id, - E.correlation_id, - A.position AS arg_position, - A.type AS arg_type, - A.name AS arg_name, - A.value AS arg_value, - E.call_stack, - E.line_info, - A.extdata -FROM - `rocpd_event` E - INNER JOIN `rocpd_arg` A ON A.event_id = E.id - AND A.guid = E.guid; - --- list of astream arguments enriched by the corresponding stream descriptions -CREATE VIEW IF NOT EXISTS - `stream_args` AS -SELECT - A.id AS argument_id, - A.event_id AS event_id, - A.position AS arg_position, - A.type AS arg_type, - A.value AS arg_value, - JSON_EXTRACT(A.extdata, '$.stream_id') AS stream_id, - S.nid, - P.pid, - S.name AS stream_name, - S.extdata AS extdata -FROM - `rocpd_arg` A - INNER JOIN `rocpd_info_stream` S ON JSON_EXTRACT(A.extdata, '$.stream_id') = S.id - AND A.guid = S.guid - INNER JOIN `rocpd_info_process` P ON P.id = S.pid - AND P.guid = S.guid -WHERE - A.name = 'stream'; + INNER JOIN `events` E ON E.id = PMC_E.event_id + AND E.guid = PMC_E.guid; -- -- @@ -468,63 +430,57 @@ CREATE VIEW IF NOT EXISTS SELECT M.id, M.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - M.nid, - P.pid, + T.nid, + T.pid, T.tid, - M.start, - M.end, - (M.end - M.start) AS duration, - S.string AS name, + E.id AS event_id, + E.category, + NS.string AS name, R.string AS region_name, - M.stream_id, - M.queue_id, - ST.name AS stream_name, - Q.name AS queue_name, + DS.value AS `start`, + DE.value AS `end`, + (DE.value - DS.value) AS `duration`, + T.queue_id, + T.queue_name, + T.stream_id, + T.stream_name, M.size, dst_agent.name AS dst_device, - dst_agent.absolute_index AS dst_agent_abs_index, - dst_agent.logical_index AS dst_agent_log_index, + dst_agent.id AS dst_agent_id, + dst_agent.absolute_index AS dst_agent_absolute_index, + dst_agent.logical_index AS dst_agent_logical_index, dst_agent.type_index AS dst_agent_type_index, dst_agent.type AS dst_agent_type, M.dst_address, src_agent.name AS src_device, - src_agent.absolute_index AS src_agent_abs_index, - src_agent.logical_index AS src_agent_log_index, + src_agent.id AS src_agent_id, + src_agent.absolute_index AS src_agent_absolute_index, + src_agent.logical_index AS src_agent_logical_index, src_agent.type_index AS src_agent_type_index, src_agent.type AS src_agent_type, M.src_address, E.stack_id, E.parent_stack_id, - E.correlation_id AS corr_id + E.correlation_id, + E.extdata FROM `rocpd_memory_copy` M - INNER JOIN `rocpd_string` S ON S.id = M.name_id - AND S.guid = M.guid + INNER JOIN `events` E ON E.id = M.event_id + AND E.guid = M.guid + INNER JOIN `tracks` T ON T.id = M.track_id + AND T.guid = M.guid + INNER JOIN `rocpd_string` NS ON NS.id = M.name_id + AND NS.guid = M.guid LEFT JOIN `rocpd_string` R ON R.id = M.region_name_id AND R.guid = M.guid INNER JOIN `rocpd_info_agent` dst_agent ON dst_agent.id = M.dst_agent_id AND dst_agent.guid = M.guid INNER JOIN `rocpd_info_agent` src_agent ON src_agent.id = M.src_agent_id AND src_agent.guid = M.guid - LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id - AND Q.guid = M.guid - LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id - AND ST.guid = M.guid - INNER JOIN `rocpd_event` E ON E.id = M.event_id - AND E.guid = M.guid - INNER JOIN `rocpd_info_process` P ON P.id = M.pid - AND P.guid = M.guid - INNER JOIN `rocpd_info_thread` T ON T.id = M.tid - AND T.guid = M.guid; + INNER JOIN `rocpd_timestamp` DS ON DS.id = M.start_id + AND DS.guid = M.guid + INNER JOIN `rocpd_timestamp` DE ON DE.id = M.end_id + AND DE.guid = M.guid; -- -- @@ -533,194 +489,111 @@ CREATE VIEW IF NOT EXISTS SELECT M.id, M.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - M.nid, - P.pid, + T.nid, + T.pid, T.tid, - M.start, - M.end, - (M.end - M.start) AS duration, + E.id AS event_id, + E.category, + NS.string AS name, + R.string AS region_name, + DS.value AS `start`, + DE.value AS `end`, + (DE.value - DS.value) AS `duration`, + T.queue_id, + T.queue_name, + T.stream_id, + T.stream_name, + M.size, M.type, M.level, - A.name AS agent_name, - A.absolute_index AS agent_abs_index, - A.logical_index AS agent_log_index, - A.type_index AS agent_type_index, - A.type AS agent_type, + T.agent_name, + T.agent_absolute_index, + T.agent_logical_index, + T.agent_type_index, + T.agent_type, M.address, - M.size, - M.queue_id, - Q.name AS queue_name, - M.stream_id, - ST.name AS stream_name, E.stack_id, E.parent_stack_id, - E.correlation_id AS corr_id -FROM - `rocpd_memory_allocate` M - LEFT JOIN `rocpd_info_agent` A ON M.agent_id = A.id - AND M.guid = A.guid - LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id - AND Q.guid = M.guid - LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id - AND ST.guid = M.guid - INNER JOIN `rocpd_event` E ON E.id = M.event_id - AND E.guid = M.guid - INNER JOIN `rocpd_info_process` P ON P.id = M.pid - AND P.guid = M.guid - INNER JOIN `rocpd_info_thread` T ON T.id = M.tid - AND P.guid = M.guid; - --- --- -CREATE VIEW IF NOT EXISTS - `scratch_memory` AS -SELECT - M.id, - M.guid, - M.nid, - P.pid, - M.type AS operation, - A.name AS agent_name, - A.absolute_index AS agent_abs_index, - A.logical_index AS agent_log_index, - A.type_index AS agent_type_index, - A.type AS agent_type, - M.queue_id, - T.tid, - JSON_EXTRACT(M.extdata, '$.flags') AS alloc_flags, - M.start, - M.end, - (M.end - M.start) AS duration, - M.size, - M.address, E.correlation_id, - E.stack_id, - E.parent_stack_id, - E.correlation_id AS corr_id, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - E.extdata AS event_extdata + E.extdata FROM `rocpd_memory_allocate` M - LEFT JOIN `rocpd_info_agent` A ON M.agent_id = A.id - AND M.guid = A.guid - LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id - AND Q.guid = M.guid - INNER JOIN `rocpd_event` E ON E.id = M.event_id + INNER JOIN `events` E ON E.id = M.event_id AND E.guid = M.guid - INNER JOIN `rocpd_info_process` P ON P.id = M.pid - AND P.guid = M.guid - INNER JOIN `rocpd_info_thread` T ON T.id = M.tid - AND T.guid = M.guid -WHERE - M.level = 'SCRATCH' -ORDER BY - M.start ASC; + INNER JOIN `tracks` T ON T.id = M.track_id + AND E.guid = M.guid + INNER JOIN `rocpd_string` NS ON NS.id = M.name_id + AND NS.guid = M.guid + LEFT JOIN `rocpd_string` R ON R.id = M.region_name_id + AND R.guid = M.guid + INNER JOIN `rocpd_timestamp` DS ON DS.id = M.start_id + AND DS.guid = M.guid + INNER JOIN `rocpd_timestamp` DE ON DE.id = M.end_id + AND DE.guid = M.guid; -- --- +-- PMC events specific to kernels CREATE VIEW IF NOT EXISTS - `counters_collection` AS + `kernel_pmc_events` AS SELECT - MIN(PMC_E.id) AS id, - PMC_E.guid, - K.dispatch_id, - K.kernel_id, - E.id AS event_id, - E.correlation_id, - E.stack_id, - E.parent_stack_id, - P.pid, - T.tid, + K.id, + K.guid, + K.nid, + K.pid, + K.tid, + K.category, + K.region, + K.name, K.agent_id, - A.absolute_index AS agent_abs_index, - A.logical_index AS agent_log_index, - A.type_index AS agent_type_index, - A.type AS agent_type, + K.agent_absolute_index, + K.agent_logical_index, + K.agent_type_index, + K.agent_type, + K.code_object_id, + K.kernel_id, + K.dispatch_id, K.queue_id, - k.grid_size_x AS grid_size_x, - k.grid_size_y AS grid_size_y, - k.grid_size_z AS grid_size_z, - (K.grid_size_x * K.grid_size_y * K.grid_size_z) AS grid_size, - S.display_name AS kernel_name, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = K.region_name_id - AND RS.guid = K.guid - ) AS kernel_region, - K.workgroup_size_x AS workgroup_size_x, - K.workgroup_size_y AS workgroup_size_y, - K.workgroup_size_z AS workgroup_size_z, - (K.workgroup_size_x * K.workgroup_size_y * K.workgroup_size_z) AS workgroup_size, - K.group_segment_size AS lds_block_size, - K.private_segment_size AS scratch_size, - S.arch_vgpr_count AS vgpr_count, - S.accum_vgpr_count, - S.sgpr_count, - PMC_I.name AS counter_name, - PMC_I.symbol AS counter_symbol, - PMC_I.component, - PMC_I.description, - PMC_I.block, - PMC_I.expression, - PMC_I.value_type, - PMC_I.id AS counter_id, - SUM(PMC_E.value) AS value, + K.queue, + K.stream_id, + K.stream, K.start, K.end, - PMC_I.is_constant, - PMC_I.is_derived, - (K.end - K.start) AS duration, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - K.nid, - E.extdata, - S.code_object_id + K.duration, + K.event_id, + K.track_id, + K.stack_id, + K.parent_stack_id, + K.correlation_id, + K.grid_x, + K.grid_y, + K.grid_z, + K.workgroup_x, + K.workgroup_y, + K.workgroup_z, + K.lds_size, + K.scratch_size, + K.static_lds_size, + K.static_scratch_size, + K.sgpr_count, + K.arch_vgpr_count, + K.accum_vgpr_count, + E.pmc_id, + E.name AS `pmc_name`, + E.symbol AS `pmc_symbol`, + E.value AS `pmc_value`, + E.agent_id AS `pmc_agent_id`, + E.target_arch AS `pmc_target_arch`, + E.event_code AS `pmc_event_code`, + E.instance_id AS `pmc_instance_id`, + E.component AS `pmc_component`, + E.units AS `pmc_units`, + E.value_type AS `pmc_value_type`, + E.block AS `pmc_block`, + E.expression AS `pmc_expression`, + E.is_constant AS `pmc_is_constant`, + E.is_derived AS `pmc_is_derived`, + E.description AS `pmc_description`, + E.long_description AS `pmc_long_description` FROM - `rocpd_pmc_event` PMC_E - INNER JOIN `rocpd_info_pmc` PMC_I ON PMC_I.id = PMC_E.pmc_id - AND PMC_I.guid = PMC_E.guid - INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id - AND E.guid = PMC_E.guid - INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id - AND K.guid = PMC_E.guid - INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id - AND A.guid = K.guid - INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id - AND S.guid = K.guid - INNER JOIN `rocpd_info_process` P ON P.id = K.pid - AND P.guid = K.guid - INNER JOIN `rocpd_info_thread` T ON T.id = K.tid - AND T.guid = K.guid -GROUP BY - PMC_E.guid, - K.dispatch_id, - PMC_I.name, - K.agent_id; + `kernels` K + INNER JOIN `pmc_events` E ON E.event_id = K.event_id; diff --git a/source/share/rocprofiler-sdk-rocpd/marker_views.sql b/source/share/rocprofiler-sdk-rocpd/marker_views.sql deleted file mode 100644 index 9cd821a41..000000000 --- a/source/share/rocprofiler-sdk-rocpd/marker_views.sql +++ /dev/null @@ -1,3 +0,0 @@ --- --- Views related to markers --- diff --git a/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql b/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql index 43d722ea1..ae8ee9c13 100644 --- a/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql +++ b/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql @@ -2,44 +2,21 @@ -- Indexes for the various fields -- --- string field --- CREATE INDEX `rocpd_string{{uuid}}_string_idx` ON `rocpd_string{{uuid}}` ("string"); +-- these have been verified to improve performance in perfetto +CREATE INDEX `rocpd_arg{{uuid}}_event_id_idx` ON `rocpd_arg{{uuid}}` ("event_id"); +CREATE INDEX `rocpd_pmc_event{{uuid}}_event_id_idx` ON `rocpd_pmc_event{{uuid}}` ("event_id"); +CREATE INDEX `rocpd_arg{{uuid}}_guid_event_id_idx` ON `rocpd_arg{{uuid}}` ("guid", "event_id"); +CREATE INDEX `rocpd_pmc_event{{uuid}}_guid_event_id_idx` ON `rocpd_pmc_event{{uuid}}` ("guid", "event_id"); +CREATE INDEX `rocpd_event{{uuid}}_category_id_idx` ON `rocpd_event{{uuid}}` ("category_id"); --- guid field --- CREATE INDEX `rocpd_string{{uuid}}_guid_idx` ON `rocpd_string{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_node{{uuid}}_guid_idx` ON `rocpd_info_node{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_process{{uuid}}_guid_idx` ON `rocpd_info_process{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_thread{{uuid}}_guid_idx` ON `rocpd_info_thread{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_agent{{uuid}}_guid_idx` ON `rocpd_info_agent{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_queue{{uuid}}_guid_idx` ON `rocpd_info_queue{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_stream{{uuid}}_guid_idx` ON `rocpd_info_stream{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_pmc{{uuid}}_guid_idx` ON `rocpd_info_pmc{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_code_object{{uuid}}_guid_idx` ON `rocpd_info_code_object{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_info_kernel_symbol{{uuid}}_guid_idx` ON `rocpd_info_kernel_symbol{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_track{{uuid}}_guid_idx` ON `rocpd_track{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_event{{uuid}}_guid_idx` ON `rocpd_event{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_arg{{uuid}}_guid_idx` ON `rocpd_arg{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_pmc_event{{uuid}}_guid_idx` ON `rocpd_pmc_event{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_sample{{uuid}}_guid_idx` ON `rocpd_sample{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_idx` ON `rocpd_memory_copy{{uuid}}` ("id", "guid"); --- CREATE INDEX `rocpd_memory_allocate{{uuid}}_guid_idx` ON `rocpd_memory_allocate{{uuid}}` ("id", "guid"); +-- these are speculative +CREATE INDEX `rocpd_info_process{{uuid}}_pid_idx` ON `rocpd_info_process{{uuid}}` ("pid"); +CREATE INDEX `rocpd_info_thread{{uuid}}_tid_idx` ON `rocpd_info_thread{{uuid}}` ("tid"); +CREATE INDEX `rocpd_info_process{{uuid}}_guid_pid_idx` ON `rocpd_info_process{{uuid}}` ("guid", "pid"); +CREATE INDEX `rocpd_info_thread{{uuid}}_guid_tid_idx` ON `rocpd_info_thread{{uuid}}` ("guid", "tid"); +CREATE INDEX `rocpd_timestamp{{uuid}}_value_idx` ON `rocpd_timestamp{{uuid}}` ("value"); +CREATE INDEX `rocpd_timestamp{{uuid}}_track_id_idx` ON `rocpd_timestamp{{uuid}}` ("track_id"); --- CREATE INDEX `rocpd_event{{uuid}}_category_idx` ON `rocpd_event{{uuid}}` ("id", "guid", "category_id"); --- CREATE INDEX `rocpd_region{{uuid}}_event_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "event_id"); --- CREATE INDEX `rocpd_region{{uuid}}_name_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "name_id"); --- CREATE INDEX `rocpd_sample{{uuid}}_event_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "event_id"); --- CREATE INDEX `rocpd_sample{{uuid}}_track_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "track_id"); --- CREATE INDEX `rocpd_track{{uuid}}_name_idx` ON `rocpd_track{{uuid}}` ("id", "guid", "name_id"); - --- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_nid_pid_idx` ON `rocpd_memory_copy{{uuid}}` ("guid", "nid", "pid"); --- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_nid_pid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("guid", "nid", "pid"); --- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid", "nid", "pid"); --- CREATE INDEX `rocpd_sample{{uuid}}_guid_nid_pid_idx` ON `rocpd_sample{{uuid}}` ("guid", "nid", "pid"); - --- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid"); --- CREATE INDEX `rocpd_region{{uuid}}_nid_idx` ON `rocpd_region{{uuid}}` ("nid"); --- CREATE INDEX `rocpd_region{{uuid}}_pid_idx` ON `rocpd_region{{uuid}}` ("pid"); --- CREATE INDEX `rocpd_region{{uuid}}_start_idx` ON `rocpd_region{{uuid}}` ("start"); --- CREATE INDEX `rocpd_region{{uuid}}_end_idx` ON `rocpd_region{{uuid}}` ("end"); +-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_pid_tid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("guid", "pid", "tid"); +CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_pid_tid_idx` ON `rocpd_memory_copy{{uuid}}` ("guid", "pid", "tid"); +-- CREATE INDEX `rocpd_memory_allocate{{uuid}}_guid_pid_tid_idx` ON `rocpd_memory_allocate{{uuid}}` ("guid", "pid", "tid"); diff --git a/source/share/rocprofiler-sdk-rocpd/rocpd_metadata.sql b/source/share/rocprofiler-sdk-rocpd/rocpd_metadata.sql new file mode 100644 index 000000000..4a8a2d9f1 --- /dev/null +++ b/source/share/rocprofiler-sdk-rocpd/rocpd_metadata.sql @@ -0,0 +1,10 @@ +-- +-- Standard metadata insertion +-- +-- +INSERT INTO + `rocpd_metadata{{uuid}}` ("tag", "value") +VALUES + ("schema_version", "3"), + ("uuid", "{{uuid}}"), + ("guid", "{{guid}}"); diff --git a/source/share/rocprofiler-sdk-rocpd/rocpd_tables.sql b/source/share/rocprofiler-sdk-rocpd/rocpd_tables.sql index 65f3c6308..6d58a76bc 100644 --- a/source/share/rocprofiler-sdk-rocpd/rocpd_tables.sql +++ b/source/share/rocprofiler-sdk-rocpd/rocpd_tables.sql @@ -21,6 +21,7 @@ CREATE TABLE IF NOT EXISTS "guid" TEXT DEFAULT "{{guid}}" NOT NULL, "hash" BIGINT NOT NULL UNIQUE, "machine_id" TEXT NOT NULL UNIQUE, + "name" TEXT, -- optional user provided name "system_name" TEXT, "hostname" TEXT, "release" TEXT, @@ -36,6 +37,7 @@ CREATE TABLE IF NOT EXISTS "nid" INTEGER NOT NULL, "ppid" INTEGER, "pid" INTEGER NOT NULL, + "name" TEXT, -- optional user provided name "init" BIGINT, "fini" BIGINT, "start" BIGINT, @@ -54,7 +56,7 @@ CREATE TABLE IF NOT EXISTS "ppid" INTEGER, "pid" INTEGER NOT NULL, "tid" INTEGER NOT NULL, - "name" TEXT, + "name" TEXT, -- optional user provided name "start" BIGINT, "end" BIGINT, "extdata" JSONB DEFAULT "{}" NOT NULL, @@ -62,6 +64,15 @@ CREATE TABLE IF NOT EXISTS FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE ); +-- Stores all the categories for filtering +CREATE TABLE IF NOT EXISTS + `rocpd_info_category{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "name" TEXT NOT NULL, + "extdata" JSONB DEFAULT "{}" NOT NULL + ); + CREATE TABLE IF NOT EXISTS `rocpd_info_agent{{uuid}}` ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, @@ -73,11 +84,11 @@ CREATE TABLE IF NOT EXISTS "logical_index" INTEGER, "type_index" INTEGER, "uuid" INTEGER, - "name" TEXT, + "name" TEXT, -- optional user provided name + "generic_name" TEXT, "model_name" TEXT, "vendor_name" TEXT, "product_name" TEXT, - "user_name" TEXT, "extdata" JSONB DEFAULT "{}" NOT NULL, FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE @@ -89,7 +100,7 @@ CREATE TABLE IF NOT EXISTS "guid" TEXT DEFAULT "{{guid}}" NOT NULL, "nid" INTEGER NOT NULL, "pid" INTEGER NOT NULL, - "name" TEXT, + "name" TEXT, -- optional user provided name "extdata" JSONB DEFAULT "{}" NOT NULL, FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE @@ -101,14 +112,12 @@ CREATE TABLE IF NOT EXISTS "guid" TEXT DEFAULT "{{guid}}" NOT NULL, "nid" INTEGER NOT NULL, "pid" INTEGER NOT NULL, - "name" TEXT, + "name" TEXT, -- optional user provided name "extdata" JSONB DEFAULT "{}" NOT NULL, FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE ); --- 2993533, 2269219937, 2993533 --- 2993533, 2269219937, 2993533 -- Performance monitoring counters (PMC) descriptions CREATE TABLE IF NOT EXISTS `rocpd_info_pmc{{uuid}}` ( @@ -122,6 +131,7 @@ CREATE TABLE IF NOT EXISTS "instance_id" INTEGER, "name" TEXT NOT NULL, "symbol" TEXT NOT NULL, + "qualifier" TEXT, "description" TEXT, "long_description" TEXT DEFAULT "", "component" TEXT, @@ -178,22 +188,103 @@ CREATE TABLE IF NOT EXISTS FOREIGN KEY (code_object_id) REFERENCES `rocpd_info_code_object{{uuid}}` (id) ON UPDATE CASCADE ); +-- Info related to address ranges +-- This is used to store the base address, low address, and high address +-- for a given address range. Base address is the runtime load offset of the binary. +-- The address low and high are the range within the binary. If base address is non-zero, +-- then the low and high addresses are base + offset within binary +CREATE TABLE IF NOT EXISTS + `rocpd_info_address_range{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "nid" INTEGER NOT NULL, + "pid" INTEGER NOT NULL, + "address_base" BIGINT, + "address_low" BIGINT CHECK ("address_low" >= "address_base"), + "address_high" BIGINT CHECK ("address_high" >= "address_low"), + "extdata" JSONB DEFAULT "{}" NOT NULL, + FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE + ); + +-- Info related to source code information +CREATE TABLE IF NOT EXISTS + `rocpd_info_source_code{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "nid" INTEGER NOT NULL, + "pid" INTEGER NOT NULL, + "address_id" INTEGER, + "file" TEXT, + "line_number" INTEGER, -- starting line number + "lines" JSONB DEFAULT "[]" NOT NULL, -- put the source code lines here + "instructions" JSONB DEFAULT "[]" NOT NULL, -- put the instructions/assembly code here + "extdata" JSONB DEFAULT "{}" NOT NULL, + FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (address_id) REFERENCES `rocpd_info_address_range{{uuid}}` (id) ON UPDATE CASCADE + ); + +-- Info related to program counter (PC) addresses +-- This is used to store the function name, file, and line number +-- for a given PC address +CREATE TABLE IF NOT EXISTS + `rocpd_info_pc{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "nid" INTEGER NOT NULL, + "pid" INTEGER NOT NULL, + "function" TEXT NOT NULL, + "address_id" INTEGER, + "file" TEXT, + "line" INTEGER, + "extdata" JSONB DEFAULT "{}" NOT NULL, + FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (address_id) REFERENCES `rocpd_info_address_range{{uuid}}` (id) ON UPDATE CASCADE + ); + +-- +-- We need to find a place for storing the assembly / instructions in the above + -- Stores repetitive info for samples CREATE TABLE IF NOT EXISTS `rocpd_track{{uuid}}` ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "guid" TEXT DEFAULT "{{guid}}" NOT NULL, "nid" INTEGER NOT NULL, + "ppid" INTEGER, "pid" INTEGER, "tid" INTEGER, + "agent_id" INTEGER, + "queue_id" INTEGER, + "stream_id" INTEGER, "name_id" INTEGER, "extdata" JSONB DEFAULT "{}" NOT NULL, FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE ); +-- Stores all the timestamps +CREATE TABLE IF NOT EXISTS + `rocpd_timestamp{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "value" BIGINT NOT NULL, + "phase" INTEGER CHECK ("phase" IN (0, 1, 2)), + -- Phases: + -- 0 = none/instantaneous + -- 1 = start/enter/load + -- 2 = end/exit/unload + "track_id" INTEGER, -- set to NULL if this timestamp is associated with more than one track (not recommended) + FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE + ); + -- Storage for a region, instant, and counter CREATE TABLE IF NOT EXISTS `rocpd_event{{uuid}}` ( @@ -203,10 +294,8 @@ CREATE TABLE IF NOT EXISTS "stack_id" INTEGER, "parent_stack_id" INTEGER, "correlation_id" INTEGER, - "call_stack" JSONB DEFAULT "{}" NOT NULL, - "line_info" JSONB DEFAULT "{}" NOT NULL, "extdata" JSONB DEFAULT "{}" NOT NULL, - FOREIGN KEY (category_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE + FOREIGN KEY (category_id) REFERENCES `rocpd_info_category{{uuid}}` (id) ON UPDATE CASCADE ); -- stores arguments for events @@ -223,6 +312,33 @@ CREATE TABLE IF NOT EXISTS FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE ); +-- stores line information for events +CREATE TABLE IF NOT EXISTS + `rocpd_line_info{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "event_id" INTEGER NOT NULL, + "source_code_id" INTEGER, + "pc_id" INTEGER, + "extdata" JSONB DEFAULT "{}" NOT NULL, + FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (source_code_id) REFERENCES `rocpd_info_source_code{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (pc_id) REFERENCES `rocpd_info_pc{{uuid}}` (id) ON UPDATE CASCADE + ); + +-- stores call stack information for events +CREATE TABLE IF NOT EXISTS + `rocpd_call_stack{{uuid}}` ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "guid" TEXT DEFAULT "{{guid}}" NOT NULL, + "event_id" INTEGER NOT NULL, + "pc_id" INTEGER, + "depth" INTEGER NOT NULL, -- depth of the call stack entry, zero is the top of the stack + "extdata" JSONB DEFAULT "{}" NOT NULL, + FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (pc_id) REFERENCES `rocpd_info_pc{{uuid}}` (id) ON UPDATE CASCADE + ); + -- Region with a start/stop on the same thread (CPU) CREATE TABLE IF NOT EXISTS `rocpd_pmc_event{{uuid}}` ( @@ -241,18 +357,16 @@ CREATE TABLE IF NOT EXISTS `rocpd_region{{uuid}}` ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "guid" TEXT DEFAULT "{{guid}}" NOT NULL, - "nid" INTEGER NOT NULL, - "pid" INTEGER NOT NULL, - "tid" INTEGER NOT NULL, - "start" BIGINT NOT NULL, - "end" BIGINT NOT NULL, + "track_id" INTEGER NOT NULL, "name_id" INTEGER NOT NULL, + "start_id" INTEGER NOT NULL, + "end_id" INTEGER NOT NULL, "event_id" INTEGER, "extdata" JSONB DEFAULT "{}" NOT NULL, - FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (start_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (end_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE ); @@ -262,10 +376,13 @@ CREATE TABLE IF NOT EXISTS "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "guid" TEXT DEFAULT "{{guid}}" NOT NULL, "track_id" INTEGER NOT NULL, - "timestamp" BIGINT NOT NULL, + "name_id" INTEGER NOT NULL, + "timestamp_id" INTEGER NOT NULL, "event_id" INTEGER, "extdata" JSONB DEFAULT "{}" NOT NULL, FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (timestamp_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE ); @@ -273,16 +390,11 @@ CREATE TABLE IF NOT EXISTS `rocpd_kernel_dispatch{{uuid}}` ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "guid" TEXT DEFAULT "{{guid}}" NOT NULL, - "nid" INTEGER NOT NULL, - "pid" INTEGER NOT NULL, - "tid" INTEGER, - "agent_id" INTEGER NOT NULL, + "track_id" INTEGER NOT NULL, "kernel_id" INTEGER NOT NULL, "dispatch_id" INTEGER NOT NULL, - "queue_id" INTEGER NOT NULL, - "stream_id" INTEGER NOT NULL, - "start" BIGINT NOT NULL, - "end" BIGINT NOT NULL, + "start_id" INTEGER NOT NULL, + "end_id" INTEGER NOT NULL, "private_segment_size" INTEGER, "group_segment_size" INTEGER, "workgroup_size_x" INTEGER NOT NULL, @@ -294,13 +406,10 @@ CREATE TABLE IF NOT EXISTS "region_name_id" INTEGER, "event_id" INTEGER, "extdata" JSONB DEFAULT "{}" NOT NULL, - FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (kernel_id) REFERENCES `rocpd_info_kernel_symbol{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (start_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (end_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE ); @@ -309,30 +418,24 @@ CREATE TABLE IF NOT EXISTS `rocpd_memory_copy{{uuid}}` ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "guid" TEXT DEFAULT "{{guid}}" NOT NULL, - "nid" INTEGER NOT NULL, - "pid" INTEGER NOT NULL, - "tid" INTEGER, - "start" BIGINT NOT NULL, - "end" BIGINT NOT NULL, + "track_id" INTEGER NOT NULL, + "start_id" INTEGER NOT NULL, + "end_id" INTEGER NOT NULL, "name_id" INTEGER NOT NULL, "dst_agent_id" INTEGER, "dst_address" INTEGER, "src_agent_id" INTEGER, "src_address" INTEGER, "size" INTEGER NOT NULL, - "queue_id" INTEGER, - "stream_id" INTEGER, "region_name_id" INTEGER, "event_id" INTEGER, "extdata" JSONB DEFAULT "{}" NOT NULL, - FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (start_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (end_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (dst_agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (src_agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE ); @@ -342,32 +445,21 @@ CREATE TABLE IF NOT EXISTS `rocpd_memory_allocate{{uuid}}` ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "guid" TEXT DEFAULT "{{guid}}" NOT NULL, - "nid" INTEGER NOT NULL, - "pid" INTEGER NOT NULL, - "tid" INTEGER, - "agent_id" INTEGER, + "track_id" INTEGER NOT NULL, "type" TEXT CHECK ("type" IN ('ALLOC', 'FREE', 'REALLOC', 'RECLAIM')), "level" TEXT CHECK ("level" IN ('REAL', 'VIRTUAL', 'SCRATCH')), - "start" BIGINT NOT NULL, - "end" BIGINT NOT NULL, + "start_id" INTEGER NOT NULL, + "end_id" INTEGER NOT NULL, + "name_id" INTEGER NOT NULL, "address" INTEGER, "size" INTEGER NOT NULL, - "queue_id" INTEGER, - "stream_id" INTEGER, + "region_name_id" INTEGER, "event_id" INTEGER, "extdata" JSONB DEFAULT "{}" NOT NULL, - FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE, - FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (start_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (end_id) REFERENCES `rocpd_timestamp{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, + FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE, FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE ); - -INSERT INTO - `rocpd_metadata{{uuid}}` ("tag", "value") -VALUES - ("schema_version", "3"), - ("uuid", "{{uuid}}"), - ("guid", "{{guid}}"); diff --git a/source/share/rocprofiler-sdk-rocpd/rocpd_views.sql b/source/share/rocprofiler-sdk-rocpd/rocpd_views.sql index 8eaed1125..527b2f02a 100644 --- a/source/share/rocprofiler-sdk-rocpd/rocpd_views.sql +++ b/source/share/rocprofiler-sdk-rocpd/rocpd_views.sql @@ -33,6 +33,13 @@ SELECT FROM `rocpd_info_thread{{uuid}}`; +CREATE VIEW IF NOT EXISTS + `rocpd_info_category` AS +SELECT + * +FROM + `rocpd_info_category{{uuid}}`; + CREATE VIEW IF NOT EXISTS `rocpd_info_agent` AS SELECT @@ -75,6 +82,34 @@ SELECT FROM `rocpd_info_kernel_symbol{{uuid}}`; +CREATE VIEW IF NOT EXISTS + `rocpd_info_address_range` AS +SELECT + * +FROM + `rocpd_info_address_range{{uuid}}`; + +CREATE VIEW IF NOT EXISTS + `rocpd_info_source_code` AS +SELECT + * +FROM + `rocpd_info_source_code{{uuid}}`; + +CREATE VIEW IF NOT EXISTS + `rocpd_info_pc` AS +SELECT + * +FROM + `rocpd_info_pc{{uuid}}`; + +CREATE VIEW IF NOT EXISTS + `rocpd_timestamp` AS +SELECT + * +FROM + `rocpd_timestamp{{uuid}}`; + CREATE VIEW IF NOT EXISTS `rocpd_track` AS SELECT @@ -96,6 +131,20 @@ SELECT FROM `rocpd_arg{{uuid}}`; +CREATE VIEW IF NOT EXISTS + `rocpd_line_info` AS +SELECT + * +FROM + `rocpd_line_info{{uuid}}`; + +CREATE VIEW IF NOT EXISTS + `rocpd_call_stack` AS +SELECT + * +FROM + `rocpd_call_stack{{uuid}}`; + CREATE VIEW IF NOT EXISTS `rocpd_pmc_event` AS SELECT diff --git a/source/share/rocprofiler-sdk-rocpd/summary_views.sql b/source/share/rocprofiler-sdk-rocpd/summary_views.sql index 357eecec0..d2ee8046b 100644 --- a/source/share/rocprofiler-sdk-rocpd/summary_views.sql +++ b/source/share/rocprofiler-sdk-rocpd/summary_views.sql @@ -6,7 +6,7 @@ CREATE VIEW IF NOT EXISTS `top_kernels` AS SELECT - S.display_name AS name, + K.name, COUNT(K.kernel_id) AS total_calls, SUM(K.end - K.start) / 1000.0 AS total_duration, (SUM(K.end - K.start) / COUNT(K.kernel_id)) / 1000.0 AS average, @@ -14,12 +14,10 @@ SELECT SELECT SUM(A.end - A.start) FROM - `rocpd_kernel_dispatch` A + `kernels` A ) AS percentage FROM - `rocpd_kernel_dispatch` K - INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id - AND S.guid = K.guid + `kernels` K GROUP BY name ORDER BY @@ -39,46 +37,46 @@ FROM ( SELECT agent_id, - guid, - SUM(END - start) AS GpuTime + `guid`, + SUM(`end` - `start`) AS GpuTime FROM ( SELECT agent_id, - guid, - END, - start + `guid`, + `end`, + `start` FROM - `rocpd_kernel_dispatch` + `kernels` UNION ALL SELECT dst_agent_id AS agent_id, - guid, - END, - start + `guid`, + `end`, + `start` FROM - `rocpd_memory_copy` + `memory_copies` ) GROUP BY agent_id, - guid + `guid` ) A INNER JOIN ( SELECT - MAX(END) - MIN(start) AS WallTime + MAX(`end`) - MIN(`start`) AS WallTime FROM ( SELECT - END, - start + `end`, + `start` FROM - `rocpd_kernel_dispatch` + `kernels` UNION ALL SELECT - END, - start + `end`, + `start` FROM - `rocpd_memory_copy` + `memory_copies` ) ) W ON 1 = 1 INNER JOIN `rocpd_info_agent` AG ON AG.id = A.agent_id @@ -98,53 +96,47 @@ FROM ( -- Kernel operations SELECT - ks.display_name AS name, - (kd.end - kd.start) AS duration + K.name, + K.duration FROM - `rocpd_kernel_dispatch` kd - INNER JOIN `rocpd_info_kernel_symbol` ks ON kd.kernel_id = ks.id - AND kd.guid = ks.guid + `kernels` K UNION ALL -- Memory operations SELECT - rs.string AS name, - (END - start) AS duration + MC.name, + MC.duration FROM - `rocpd_memory_copy` mc - INNER JOIN `rocpd_string` rs ON rs.id = mc.name_id - AND rs.guid = mc.guid + `memory_copies` MC UNION ALL -- Regions SELECT - rs.string AS name, - (END - start) AS duration + R.name, + R.duration FROM - `rocpd_region` rr - INNER JOIN `rocpd_string` rs ON rs.id = rr.name_id - AND rs.guid = rr.guid + `regions` R ) operations CROSS JOIN ( SELECT - SUM(END - start) AS total_time + SUM(`end` - `start`) AS total_time FROM ( SELECT - END, - start + `end`, + `start` FROM - `rocpd_kernel_dispatch` + `kernels` UNION ALL SELECT - END, - start + `end`, + `start` FROM - `rocpd_memory_copy` + `memory_copies` UNION ALL SELECT - END, - start + `end`, + `start` FROM - `rocpd_region` + `regions` ) ) TOTAL GROUP BY diff --git a/tests/pytest-packages/tests/rocprofv3.py b/tests/pytest-packages/tests/rocprofv3.py index 48f018eb0..9f28a6117 100644 --- a/tests/pytest-packages/tests/rocprofv3.py +++ b/tests/pytest-packages/tests/rocprofv3.py @@ -169,36 +169,27 @@ def test_rocpd_data( mapping = { "hip": ( "hip_api", - ( - "HIP_COMPILER_API", - "HIP_COMPILER_API_EXT", - "HIP_RUNTIME_API", - "HIP_RUNTIME_API_EXT", - ), + ("hip_api",), ), "hsa": ( "hsa_api", - ( - "HSA_CORE_API", - "HSA_AMD_EXT_API", - "HSA_IMAGE_EXT_API", - "HSA_FINALIZE_EXT_API", - ), + ("hsa_api",), ), "marker": ( "marker_api", + ("marker_api",), + ), + "kernel": ("kernel_dispatch", ("kernel_dispatch",)), + "memory_copy": ("memory_copy", ("memory_copy")), + "memory_allocation": ( + "memory_allocation", ( - "MARKER_CORE_API", - "MARKER_CONTROL_API", - "MARKER_NAME_API", - "MARKER_CORE_RANGE_API", + "memory_allocation", + "scratch_memory", ), ), - "kernel": ("kernel_dispatch", ("KERNEL_DISPATCH")), - "memory_copy": ("memory_copy", ("MEMORY_COPY")), - "memory_allocation": ("memory_allocation", ("MEMORY_ALLOCATION")), - "rocdecode_api": ("rocdecode_api", ("ROCDECODE_API")), - "rocjpeg_api": ("rocjpeg_api", ("ROCJPEG_API")), + "rocdecode_api": ("rocdecode_api", ("rocdecode_api")), + "rocjpeg_api": ("rocjpeg_api", ("rocjpeg_api")), } view_mapping = {