diff --git a/projects/rocprofiler-register/VERSION b/projects/rocprofiler-register/VERSION index 8f0916f768f..a918a2aa18d 100644 --- a/projects/rocprofiler-register/VERSION +++ b/projects/rocprofiler-register/VERSION @@ -1 +1 @@ -0.5.0 +0.6.0 diff --git a/projects/rocprofiler-register/cmake/rocprofiler_register_config_install.cmake b/projects/rocprofiler-register/cmake/rocprofiler_register_config_install.cmake index e6e9d12a821..48fe008f2b8 100644 --- a/projects/rocprofiler-register/cmake/rocprofiler_register_config_install.cmake +++ b/projects/rocprofiler-register/cmake/rocprofiler_register_config_install.cmake @@ -57,7 +57,7 @@ configure_package_config_file( write_basic_package_version_file( ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}/${PROJECT_NAME}-config-version.cmake VERSION ${PROJECT_VERSION} - COMPATIBILITY SameMinorVersion) + COMPATIBILITY AnyNewerVersion) install( FILES diff --git a/projects/rocprofiler-register/cmake/rocprofiler_register_options.cmake b/projects/rocprofiler-register/cmake/rocprofiler_register_options.cmake index 790c0ca2809..764a429559d 100644 --- a/projects/rocprofiler-register/cmake/rocprofiler_register_options.cmake +++ b/projects/rocprofiler-register/cmake/rocprofiler_register_options.cmake @@ -39,6 +39,10 @@ rocprofiler_register_add_option(ROCPROFILER_REGISTER_BUILD_FMT "Build FMT" ON) rocprofiler_register_add_option( ROCPROFILER_REGISTER_DEP_ROCMCORE "DEB and RPM package depend on rocm-core package" ${ROCM_DEP_ROCMCORE}) +rocprofiler_register_add_option( + ROCPROFILER_REGISTER_BUILD_DEFAULT_ATTACHMENT + "Enable attachment by default, instead of requiring an environment variable when the application starts" + OFF) # In the future, we will do this even with clang-tidy enabled if(ROCPROFILER_REGISTER_BUILD_CI diff --git a/projects/rocprofiler-register/source/include/rocprofiler-register/rocprofiler-register.h b/projects/rocprofiler-register/source/include/rocprofiler-register/rocprofiler-register.h index 52872cfb00e..bca1a77f1bc 100644 --- a/projects/rocprofiler-register/source/include/rocprofiler-register/rocprofiler-register.h +++ b/projects/rocprofiler-register/source/include/rocprofiler-register/rocprofiler-register.h @@ -62,6 +62,13 @@ typedef struct /// @var ROCP_REG_EXCESS_API_INSTANCES /// @brief The same API has been registered too many times /// +/// @var ROCP_REG_INVALID_ARGUMENT +/// @brief Rocprofiler-register API function was provided an invalid argument +/// +/// @var ROCP_REG_ATTACHMENT_NOT_AVAILABLE +/// @brief Rocprofiler-register attach or detach was invoked, but the attachment +/// library was not loaded at app startup. +/// typedef enum rocprofiler_register_error_code_t // NOLINT(performance-enum-size) { ROCP_REG_SUCCESS = 0, @@ -72,6 +79,8 @@ typedef enum rocprofiler_register_error_code_t // NOLINT(performance-enum-size) ROCP_REG_INVALID_API_ADDRESS, ROCP_REG_ROCPROFILER_ERROR, ROCP_REG_EXCESS_API_INSTANCES, + ROCP_REG_INVALID_ARGUMENT, + ROCP_REG_ATTACHMENT_NOT_AVAILABLE, ROCP_REG_ERROR_CODE_END, } rocprofiler_register_error_code_t; diff --git a/projects/rocprofiler-register/source/lib/rocprofiler-register/CMakeLists.txt b/projects/rocprofiler-register/source/lib/rocprofiler-register/CMakeLists.txt index 840fbedabbf..e15fa88220d 100644 --- a/projects/rocprofiler-register/source/lib/rocprofiler-register/CMakeLists.txt +++ b/projects/rocprofiler-register/source/lib/rocprofiler-register/CMakeLists.txt @@ -30,6 +30,10 @@ set_target_properties( SOVERSION ${PROJECT_VERSION_MAJOR} VERSION ${PROJECT_VERSION}) +if(ROCPROFILER_REGISTER_BUILD_DEFAULT_ATTACHMENT) + target_compile_definitions(rocprofiler-register PRIVATE ROCP_REG_DEFAULT_ATTACHMENT=1) +endif() + install( TARGETS rocprofiler-register DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp b/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp index df9631e08a5..8990e118205 100644 --- a/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp +++ b/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp @@ -44,9 +44,18 @@ #include #include +namespace +{ +using rocprofiler_register_library_api_table_func_t = + decltype(::rocprofiler_register_library_api_table)*; +} + extern "C" { #pragma weak rocprofiler_configure #pragma weak rocprofiler_set_api_table +#pragma weak rocprofiler_attach +#pragma weak rocprofiler_detach +#pragma weak rocprofiler_attach_set_api_table #pragma weak rocprofiler_register_import_hip #pragma weak rocprofiler_register_import_hip_static #pragma weak rocprofiler_register_import_hip_compiler @@ -83,6 +92,20 @@ rocprofiler_configure(uint32_t, const char*, uint32_t, rocprofiler_client_id_t*) extern int rocprofiler_set_api_table(const char*, uint64_t, uint64_t, void**, uint64_t); +extern int +rocprofiler_attach(void); + +extern int +rocprofiler_detach(void); + +extern int +rocprofiler_attach_set_api_table(const char*, + uint64_t, + uint64_t, + void**, + uint64_t, + rocprofiler_register_library_api_table_func_t); + extern uint32_t rocprofiler_register_import_hip(void); @@ -111,8 +134,15 @@ rocprofiler_register_import_roctx_static(void); namespace { using namespace rocprofiler_register; -using rocprofiler_set_api_table_t = decltype(::rocprofiler_set_api_table)*; -using rocp_set_api_table_data_t = std::tuple; +using rocprofiler_set_api_table_t = decltype(::rocprofiler_set_api_table)*; +using rocprofiler_attach_set_api_table_t = decltype(::rocprofiler_attach_set_api_table)*; +using rocprofiler_attach_func_t = decltype(::rocprofiler_attach)*; +using rocprofiler_detach_func_t = decltype(::rocprofiler_detach)*; +using rocp_set_api_table_data_t = std::tuple; + using bitset_t = std::bitset; static_assert(sizeof(bitset_t) == @@ -121,6 +151,12 @@ static_assert(sizeof(bitset_t) == constexpr auto rocprofiler_lib_name = "librocprofiler-sdk.so"; constexpr auto rocprofiler_lib_register_entrypoint = "rocprofiler_set_api_table"; +constexpr auto rocprofiler_attach_lib_name = "librocprofiler-sdk-attach.so"; +constexpr auto rocprofiler_attach_lib_register_entrypoint = + "rocprofiler_attach_set_api_table"; +constexpr auto rocprofiler_lib_attach_entrypoint = "rocprofiler_attach"; +constexpr auto rocprofiler_lib_detach_entrypoint = "rocprofiler_detach"; + constexpr auto rocprofiler_register_lib_name = "librocprofiler-register.so." ROCPROFILER_REGISTER_SOVERSION; @@ -133,6 +169,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size) ROCP_REG_RCCL, ROCP_REG_ROCDECODE, ROCP_REG_ROCJPEG, + ROCP_REG_ROCATTACH, ROCP_REG_LAST, }; @@ -202,6 +239,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCJPEG, "rocprofiler_register_import_rocjpeg", "librocjpeg.so.[0-9]($|\\.[0-9\\.]+)") +ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCATTACH, + "rocattach", + "rocprofiler_register_import_attach", + "librocprofiler-sdk-attach.so.[0-9]($|\\.[0-9\\.]+)") + ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success") ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools") ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked") @@ -215,6 +257,12 @@ ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_ROCPROFILER_ERROR, ROCP_REG_DEFINE_ERROR_MESSAGE( ROCP_REG_EXCESS_API_INSTANCES, "Too many instances of the same library API were registered") +ROCP_REG_DEFINE_ERROR_MESSAGE( + ROCP_REG_INVALID_ARGUMENT, + "rocprofiler-register API function was provided an invalid argument") +ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_ATTACHMENT_NOT_AVAILABLE, + "rocprofiler-register attach was invoked, but the " + "attachment library was never loaded.") auto get_this_library_path() @@ -275,8 +323,12 @@ struct rocp_scan_data { void* handle = nullptr; rocprofiler_set_api_table_t set_api_table_fn = nullptr; + rocprofiler_attach_func_t attach_fn = nullptr; + rocprofiler_detach_func_t detach_fn = nullptr; }; +auto existing_scanned_data = rocp_scan_data{}; + rocp_scan_data rocp_reg_scan_for_tools() { @@ -286,21 +338,29 @@ rocp_reg_scan_for_tools() bool _force_tool = common::get_env("ROCPROFILER_REGISTER_FORCE_LOAD", !_rocp_reg_lib.empty() || !_rocp_tool_libs.empty()); + bool _found_tool = (rocprofiler_configure != nullptr || _configure_func != nullptr || _force_tool); static void* rocprofiler_lib_handle = nullptr; static rocprofiler_set_api_table_t rocprofiler_lib_config_fn = nullptr; + static rocprofiler_attach_func_t rocprofiler_lib_attach_fn = nullptr; + static rocprofiler_detach_func_t rocprofiler_lib_detach_fn = nullptr; if(_found_tool) { if(rocprofiler_lib_handle && rocprofiler_lib_config_fn) - return rocp_scan_data{ rocprofiler_lib_handle, rocprofiler_lib_config_fn }; + return rocp_scan_data{ rocprofiler_lib_handle, + rocprofiler_lib_config_fn, + rocprofiler_lib_attach_fn, + rocprofiler_lib_detach_fn }; if(_rocp_reg_lib.empty()) _rocp_reg_lib = rocprofiler_lib_name; - std::tie(rocprofiler_lib_handle, rocprofiler_lib_config_fn) = - rocp_load_rocprofiler_lib(_rocp_reg_lib); + std::tie(rocprofiler_lib_handle, + rocprofiler_lib_config_fn, + rocprofiler_lib_attach_fn, + rocprofiler_lib_detach_fn) = rocp_load_rocprofiler_lib(_rocp_reg_lib); LOG_IF(FATAL, !rocprofiler_lib_config_fn) << rocprofiler_lib_register_entrypoint << " not found. Tried to dlopen " @@ -309,48 +369,53 @@ rocp_reg_scan_for_tools() else if(_found_tool && rocprofiler_set_api_table) { rocprofiler_lib_config_fn = &rocprofiler_set_api_table; + rocprofiler_lib_attach_fn = &rocprofiler_attach; + rocprofiler_lib_detach_fn = &rocprofiler_detach; } - return rocp_scan_data{ rocprofiler_lib_handle, rocprofiler_lib_config_fn }; + return rocp_scan_data{ rocprofiler_lib_handle, + rocprofiler_lib_config_fn, + rocprofiler_lib_attach_fn, + rocprofiler_lib_detach_fn }; } -rocp_set_api_table_data_t -rocp_load_rocprofiler_lib(std::string _rocp_reg_lib) +void* +get_library_handle(std::string_view _rocp_reg_lib) { - void* rocprofiler_lib_handle = nullptr; - rocprofiler_set_api_table_t rocprofiler_lib_config_fn = nullptr; - - if(rocprofiler_set_api_table) rocprofiler_lib_config_fn = &rocprofiler_set_api_table; + void* rocprofiler_lib_handle = nullptr; - // return if found via LD_PRELOAD - if(rocprofiler_lib_config_fn) - return std::make_tuple(rocprofiler_lib_handle, rocprofiler_lib_config_fn); - - // look to see if entrypoint function is already a symbol - *(void**) (&rocprofiler_lib_config_fn) = - dlsym(RTLD_DEFAULT, rocprofiler_lib_register_entrypoint); - - // return if found via RTLD_DEFAULT - if(rocprofiler_lib_config_fn) - return std::make_tuple(rocprofiler_lib_handle, rocprofiler_lib_config_fn); - - if(_rocp_reg_lib.empty()) _rocp_reg_lib = rocprofiler_lib_name; + if(_rocp_reg_lib.empty()) return nullptr; auto _rocp_reg_lib_path = fs::path{ _rocp_reg_lib }; auto _rocp_reg_lib_path_fname = _rocp_reg_lib_path.filename(); auto _rocp_reg_lib_path_abs = (_rocp_reg_lib_path.is_absolute()) ? _rocp_reg_lib_path - : (fs::path{ get_this_library_path() } / _rocp_reg_lib_path_fname); + : (fs::path{ get_this_library_path() } / _rocp_reg_lib_path); // check to see if the rocprofiler library is already loaded rocprofiler_lib_handle = dlopen(_rocp_reg_lib_path.c_str(), RTLD_NOLOAD | RTLD_LAZY); + if(rocprofiler_lib_handle) + { + LOG(INFO) << "loaded " << _rocp_reg_lib << " library at " + << _rocp_reg_lib_path.string() << " (handle=" << rocprofiler_lib_handle + << ") via RTLD_NOLOAD | RTLD_LAZY"; + } + // try to load with the given path if(!rocprofiler_lib_handle) { rocprofiler_lib_handle = dlopen(_rocp_reg_lib_path.c_str(), RTLD_GLOBAL | RTLD_LAZY); + + if(rocprofiler_lib_handle) + { + LOG(INFO) << "loaded " << _rocp_reg_lib << " library at " + << _rocp_reg_lib_path.string() + << " (handle=" << rocprofiler_lib_handle + << ") via RTLD_GLOBAL | RTLD_LAZY"; + } } // try to load with the absoulte path @@ -369,20 +434,85 @@ rocp_load_rocprofiler_lib(std::string _rocp_reg_lib) dlopen(_rocp_reg_lib_path.c_str(), RTLD_GLOBAL | RTLD_LAZY); } - LOG(INFO) << "loaded " << _rocp_reg_lib_path_fname.string() << " library at " - << _rocp_reg_lib_path.string(); + LOG(INFO) << "loaded " << _rocp_reg_lib << " library at " + << _rocp_reg_lib_path.string() << " (handle=" << rocprofiler_lib_handle + << ")"; LOG_IF(WARNING, rocprofiler_lib_handle == nullptr) << _rocp_reg_lib << " failed to load\n"; + return rocprofiler_lib_handle; +} + +rocp_set_api_table_data_t +rocp_load_rocprofiler_lib(std::string _rocp_reg_lib) +{ + void* rocprofiler_lib_handle = nullptr; + rocprofiler_set_api_table_t rocprofiler_lib_config_fn = nullptr; + rocprofiler_attach_func_t rocprofiler_lib_attach_fn = nullptr; + rocprofiler_detach_func_t rocprofiler_lib_detach_fn = nullptr; + + if(rocprofiler_set_api_table) + { + rocprofiler_lib_config_fn = &rocprofiler_set_api_table; + rocprofiler_lib_attach_fn = &rocprofiler_attach; + rocprofiler_lib_detach_fn = &rocprofiler_detach; + } + + // return if found via LD_PRELOAD + if(rocprofiler_lib_config_fn) + return std::make_tuple(rocprofiler_lib_handle, + rocprofiler_lib_config_fn, + rocprofiler_lib_attach_fn, + rocprofiler_lib_detach_fn); + + // look to see if entrypoint function is already a symbol + *(void**) (&rocprofiler_lib_config_fn) = + dlsym(RTLD_DEFAULT, rocprofiler_lib_register_entrypoint); + *(void**) (&rocprofiler_lib_attach_fn) = + dlsym(RTLD_DEFAULT, rocprofiler_lib_attach_entrypoint); + *(void**) (&rocprofiler_lib_detach_fn) = + dlsym(RTLD_DEFAULT, rocprofiler_lib_detach_entrypoint); + + // return if found via RTLD_DEFAULT + if(rocprofiler_lib_config_fn) + { + return std::make_tuple(rocprofiler_lib_handle, + rocprofiler_lib_config_fn, + rocprofiler_lib_attach_fn, + rocprofiler_lib_detach_fn); + } + + if(_rocp_reg_lib.empty()) _rocp_reg_lib = rocprofiler_lib_name; + + rocprofiler_lib_handle = get_library_handle(_rocp_reg_lib); + *(void**) (&rocprofiler_lib_config_fn) = dlsym(rocprofiler_lib_handle, rocprofiler_lib_register_entrypoint); + *(void**) (&rocprofiler_lib_attach_fn) = + dlsym(rocprofiler_lib_handle, rocprofiler_lib_attach_entrypoint); + + *(void**) (&rocprofiler_lib_detach_fn) = + dlsym(rocprofiler_lib_handle, rocprofiler_lib_detach_entrypoint); + LOG_IF(WARNING, rocprofiler_lib_config_fn == nullptr) - << _rocp_reg_lib << " did not contain '" << rocprofiler_lib_register_entrypoint - << "' symbol\n"; + << _rocp_reg_lib << " (handle=" << rocprofiler_lib_handle << ") did not contain '" + << rocprofiler_lib_register_entrypoint << "' symbol"; + + LOG_IF(INFO, rocprofiler_lib_config_fn != nullptr) + << "Found " << rocprofiler_lib_register_entrypoint << " symbol"; + + LOG_IF(INFO, rocprofiler_lib_attach_fn != nullptr) + << "Found " << rocprofiler_lib_attach_entrypoint << " symbol"; + + LOG_IF(INFO, rocprofiler_lib_detach_fn != nullptr) + << "Found " << rocprofiler_lib_detach_entrypoint << " symbol"; - return std::make_tuple(rocprofiler_lib_handle, rocprofiler_lib_config_fn); + return std::make_tuple(rocprofiler_lib_handle, + rocprofiler_lib_config_fn, + rocprofiler_lib_attach_fn, + rocprofiler_lib_detach_fn); } struct registered_library_api_table @@ -471,7 +601,8 @@ rocp_invoke_registrations(bool invoke_all) if(_activate_rocprofiler) { - auto _ret = _scan_result.set_api_table_fn(itr->common_name, + existing_scanned_data = _scan_result; + auto _ret = _scan_result.set_api_table_fn(itr->common_name, itr->lib_version, itr->instance_value, itr->api_tables.data(), @@ -484,6 +615,96 @@ rocp_invoke_registrations(bool invoke_all) return ROCP_REG_SUCCESS; } + +void +load_environment_buffer(const char* environment_buffer) +{ + // environment_buffer is a null-character delimited list of name value pairs. + // Each name and value is delimited separately. + // The first 4 bytes contain a uint32_t count of pairs. + + if(!environment_buffer) + { + LOG(WARNING) << "Attachment was invoked with no environment variables provided " + "for what to trace."; + return; + } + + const uint32_t pair_count = *reinterpret_cast(environment_buffer); + const char* position = environment_buffer + sizeof(uint32_t); + for(uint32_t pair_idx = 0; pair_idx < pair_count; ++pair_idx) + { + const char* name = position; + position += strlen(name) + 1; + const char* value = position; + position += strlen(value) + 1; + + LOG(INFO) << "Attachment adding environment variable: " << name << "=" << value; + setenv(name, value, 1); + } +} + +bool +is_attachment_library_registered() +{ + for(const auto& itr : registered) + { + if(std::string_view{ itr->common_name } == + supported_library_trait::common_name) + { + return true; + } + } + return false; +} + +constexpr auto offset_factor = 64 / std::max(ROCP_REG_LAST, 8); + +rocprofiler_register_error_code_t +register_functor(const char* common_name, + rocprofiler_register_import_func_t import_func, + uint32_t lib_version, + void** api_tables, + uint64_t api_table_length, + rocprofiler_register_library_indentifier_t* register_id) +{ + rocp_import* _import_match = nullptr; + for(auto& itr : import_info) + { + if(itr.common_name == common_name) + { + _import_match = &itr; + break; + } + } + + // not a supported library name + if(!_import_match || _import_match->library_idx == ROCP_REG_LAST) + return ROCP_REG_UNSUPPORTED_API; + + if(instance_counters.at(_import_match->library_idx) >= offset_factor) + return ROCP_REG_EXCESS_API_INSTANCES; + + auto _instance_val = instance_counters.at(_import_match->library_idx)++; + auto& _bits = *reinterpret_cast(®ister_id->handle); + _bits = bitset_t{ (offset_factor * _import_match->library_idx) + _instance_val }; + + auto* reginfo = rocp_add_registered_library_api_table(common_name, + import_func, + lib_version, + api_tables, + api_table_length, + _instance_val); + + LOG_IF(WARNING, !reginfo) << fmt::format( + "rocprofiler-register failed to create registration info for " + "{} version {} (instance {})", + common_name, + lib_version, + _instance_val); + + return ROCP_REG_SUCCESS; +}; } // namespace extern "C" { @@ -512,6 +733,18 @@ rocprofiler_register_library_api_table( auto _scan_result = rocp_reg_scan_for_tools(); + // rocprofiler library is dlopened and we have the functor to pass the API data + auto _activate_rocprofiler = (_scan_result.set_api_table_fn != nullptr); + +#if defined(ROCP_REG_DEFAULT_ATTACHMENT) && ROCP_REG_DEFAULT_ATTACHMENT != 0 + constexpr auto default_attachment_enabled = true; +#else + constexpr auto default_attachment_enabled = false; +#endif + + auto _attachment_enabled = + common::get_env("ROCP_TOOL_ATTACH", default_attachment_enabled); + rocp_import* _import_match = nullptr; for(auto& itr : import_info) { @@ -559,7 +792,6 @@ rocprofiler_register_library_api_table( if(!_valid_addr) return ROCP_REG_INVALID_API_ADDRESS; } - constexpr auto offset_factor = 64 / std::max(ROCP_REG_LAST, 8); // if ROCP_REG_LAST > 8, then we can no longer encode 8 instances per lib // because we ran out of bits (i.e. max of 8 * 8 = 64) static_assert((offset_factor * ROCP_REG_LAST) <= sizeof(uint64_t) * 8, @@ -573,6 +805,56 @@ rocprofiler_register_library_api_table( auto& _bits = *reinterpret_cast(®ister_id->handle); _bits = bitset_t{ (offset_factor * _import_match->library_idx) + _instance_val }; + // if attachment is enabled the HSA API table should be forwarded to the attachment + // library + if(!_activate_rocprofiler && _attachment_enabled && + _import_match->library_idx == ROCP_REG_HSA) + { + void* attachlibrary = get_library_handle(rocprofiler_attach_lib_name); + if(!attachlibrary) + { + LOG(ERROR) + << "Proxy queues for attachment are enabled, but the attach library " + "was not found or able to be loaded. The attaching profiler will not " + "be able to profile anything that requires proxy queues."; + return ROCP_REG_NO_TOOLS; + } + rocprofiler_attach_set_api_table_t rocprofiler_attach_set_api_table_fn; + *(void**) (&rocprofiler_attach_set_api_table_fn) = + dlsym(attachlibrary, rocprofiler_attach_lib_register_entrypoint); + + if(!rocprofiler_attach_set_api_table_fn) + { + LOG(ERROR) + << "Proxy queues for attachment are enabled, but the attach library's " + "entry point was not found. The attaching profiler will not be able " + "to profile anything that requires proxy queues."; + return ROCP_REG_NO_TOOLS; + } + + // Pass a functor to the attach library that it can use to pass back its own API + // table to us. This approach simplifies the interface and avoids having to modify + // the deadlock protection of this function. + + auto _ret = rocprofiler_attach_set_api_table_fn(common_name, + lib_version, + _instance_val, + api_tables, + api_table_length, + ®ister_functor); + if(_ret != 0) + { + LOG(ERROR) << "Proxy queues for attachment are enabled, but attach library " + "registration returned an error: " + << _ret + << ". The attaching profiler may not be able to profile anything " + "that requires proxy queues."; + return ROCP_REG_ROCPROFILER_ERROR; + } + + LOG(INFO) << "Successfully registered for proxy queue creation"; + } + auto* reginfo = rocp_add_registered_library_api_table(common_name, import_func, lib_version, @@ -590,9 +872,6 @@ rocprofiler_register_library_api_table( if(_bits.to_ulong() != register_id->handle) throw std::runtime_error("error encoding register_id"); - // rocprofiler library is dlopened and we have the functor to pass the API data - auto _activate_rocprofiler = (_scan_result.set_api_table_fn != nullptr); - if(_activate_rocprofiler) { auto _ret = _scan_result.set_api_table_fn( @@ -639,25 +918,151 @@ rocprofiler_register_iterate_registration_info( return ROCP_REG_SUCCESS; } +// +// This function can be invoked by ptrace rocprofiler_register_error_code_t rocprofiler_register_invoke_nonpropagated_registrations() ROCPROFILER_REGISTER_PUBLIC_API; -// -// This function can be invoked by ptrace rocprofiler_register_error_code_t rocprofiler_register_invoke_nonpropagated_registrations() { return rocp_invoke_registrations(false); } +// +// This function can be invoked by ptrace rocprofiler_register_error_code_t rocprofiler_register_invoke_all_registrations() ROCPROFILER_REGISTER_PUBLIC_API; -// -// This function can be invoked by ptrace +// This function can be invoked by ptrace +rocprofiler_register_error_code_t +rocprofiler_register_invoke_prestore_loads() ROCPROFILER_REGISTER_PUBLIC_API; + rocprofiler_register_error_code_t rocprofiler_register_invoke_all_registrations() { return rocp_invoke_registrations(true); } + +rocprofiler_register_error_code_t +rocprofiler_register_attach(const char* environment_buffer, + const char* tool_lib_path) ROCPROFILER_REGISTER_PUBLIC_API; + +rocprofiler_register_error_code_t +rocprofiler_register_detach() ROCPROFILER_REGISTER_PUBLIC_API; + +// +// This function can be invoked by ptrace +rocprofiler_register_error_code_t +rocprofiler_register_attach(const char* environment_buffer, const char* tool_lib_path) +{ + // If the attachment library has not been loaded when attach is called, tracing + // that relies on proxy queues will fail (e.g. kernel tracing). + // Log error and abort. + if(!is_attachment_library_registered()) + { + LOG(ERROR) + << "rocprofiler-register attach was invoked, but the rocprofiler-attach " + "library was never loaded. Start the app with environment variable " + "ROCP_TOOL_ATTACH=1 or build rocprofiler-register with cmake option " + "ROCP_REG_DEFAULT_ATTACHMENT=ON"; + return ROCP_REG_ATTACHMENT_NOT_AVAILABLE; + } + + static auto prev_tool_lib_path = std::string{}; + + // tool_lib_path is declared with non-null attribute + if(!prev_tool_lib_path.empty() && prev_tool_lib_path != tool_lib_path) + { + LOG(WARNING) << "rocprofiler_register_attach invoked with a different " + "tool_lib_path (" + << tool_lib_path + << ") than a previous attach (previous=" << prev_tool_lib_path + << "). This is not supported."; + return ROCP_REG_INVALID_ARGUMENT; + } + + LOG(INFO) << "rocprofiler_register_attach started with tool_lib_path: " + << tool_lib_path; + + // Set default tool library path if not provided + setenv("ROCPROFILER_REGISTER_TOOL_ATTACHED", "1", 1); + + LOG_IF(FATAL, tool_lib_path == nullptr) + << "ROCP_TOOL_LIBRARIES is set, but tool_lib_path is NULL. " + "This is not supported. Please provide a valid tool library path."; + + // TODO: should save old environment variables if they get overwritten and restore + // them on detach + // load_environment_buffer(environment_buffer); + + // Use provided path. Must come after load_environment_buffer to ensure override + setenv("ROCP_TOOL_LIBRARIES", tool_lib_path, 1); + LOG(INFO) << "Using provided tool library: " << tool_lib_path; + + // TODO: should save old environment variables if they get overwritten and restore + // them on detach + load_environment_buffer(environment_buffer); + + // No previous tool library was attached + if(prev_tool_lib_path.empty()) + { + auto status = rocprofiler_register_invoke_all_registrations(); + if(status != ROCP_REG_SUCCESS) + { + LOG(ERROR) << "error during invoke_all_registrations: " << status; + return status; + } + prev_tool_lib_path = tool_lib_path; + } + + if(existing_scanned_data.attach_fn == nullptr) return ROCP_REG_NO_TOOLS; + + LOG(INFO) << "rocprofiler-sdk attach starting..."; + auto _ret = existing_scanned_data.attach_fn(); + + LOG(INFO) << "rocprofiler-sdk attach completed."; + + return (_ret == 0) ? ROCP_REG_SUCCESS : ROCP_REG_ROCPROFILER_ERROR; +} + +// +// This function can be invoked by ptrace +rocprofiler_register_error_code_t +rocprofiler_register_detach() +{ + LOG(INFO) << "rocprofiler_register_detach started"; + + if(!is_attachment_library_registered()) + { + LOG(ERROR) + << "rocprofiler-register detach was invoked, but the rocprofiler-attach " + "library was never loaded. Start the app with environment variable " + "ROCP_TOOL_ATTACH=1 or build rocprofiler-register with cmake option " + "ROCP_REG_DEFAULT_ATTACHMENT=ON"; + return ROCP_REG_ATTACHMENT_NOT_AVAILABLE; + } + + if(existing_scanned_data.detach_fn) + { + LOG(INFO) << "rocprofiler-sdk detach starting..."; + existing_scanned_data.detach_fn(); + LOG(INFO) << "rocprofiler-sdk detach completed."; + } + else + { + LOG(ERROR) << "detach entry point is NULL"; + return ROCP_REG_NO_TOOLS; + } + + return ROCP_REG_SUCCESS; + // auto _scan_result = rocp_reg_scan_for_tools(); + // if(!_scan_result.detach_fn) return ROCP_REG_NO_TOOLS; + + // LOG(INFO) << "rocprofiler-sdk detach starting..."; + // auto _ret = _scan_result.detach_fn(); + + // LOG(INFO) << "rocprofiler-sdk detach completed."; + // return (_ret == 0) ? ROCP_REG_SUCCESS : ROCP_REG_ROCPROFILER_ERROR; +} } diff --git a/projects/rocprofiler-sdk/README.md b/projects/rocprofiler-sdk/README.md index 031db8301f0..cfb4414d455 100644 --- a/projects/rocprofiler-sdk/README.md +++ b/projects/rocprofiler-sdk/README.md @@ -43,8 +43,9 @@ ROCprofiler-SDK is AMD’s new and improved tooling infrastructure, providing a ## Tool Support -rocprofv3 is the command line tool built using the rocprofiler-sdk library and shipped with the ROCm stack. To see details on -the command line options of rocprofv3, please see rocprofv3 user guide +rocprofv3 is the command line tool built using the rocprofiler-sdk library and shipped with the ROCm stack. It supports both launching applications with profiling enabled and attaching to already running processes for dynamic profiling using `--attach`/`--pid`/`-p` options. + +To see details on the command line options of rocprofv3, please see rocprofv3 user guide [Click Here](source/docs/how-to/using-rocprofv3.rst) ## Documentation diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_config_packaging.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_config_packaging.cmake index 5306e6a4538..efec16fb6d4 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_config_packaging.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_config_packaging.cmake @@ -104,7 +104,7 @@ endif() if(NOT NUM_ROCPROFILER_PACKAGING_COMPONENTS EQUAL EXPECTED_PACKAGING_COMPONENTS) message( FATAL_ERROR - "Error new install component needs COMPONENT_NAME_* and COMPONENT_SEP_* entries: ${ROCPROFILER_PACKAGING_COMPONENTS}" + "Error new install component needs COMPONENT_NAME_* , COMPONENT_DEP_* , and COMPONENT_DESC_* entries: ${ROCPROFILER_PACKAGING_COMPONENTS}" ) endif() diff --git a/projects/rocprofiler-sdk/source/bin/CMakeLists.txt b/projects/rocprofiler-sdk/source/bin/CMakeLists.txt index 7f0bb05908e..c55579e3da0 100644 --- a/projects/rocprofiler-sdk/source/bin/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/bin/CMakeLists.txt @@ -26,6 +26,16 @@ install( WORLD_EXECUTE COMPONENT tools) +configure_file(rocprofv3-attach.py + ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/rocprofv3-attach COPYONLY) + +install( + FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/rocprofv3-attach + DESTINATION ${CMAKE_INSTALL_BINDIR} + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ + WORLD_EXECUTE + COMPONENT tools) + # for each entry here there must be a ROCPD_BIN_ list set(ROCPD_EXECUTABLES "all" "csv" "otf2" "pftrace" "summary") diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3-attach.py b/projects/rocprofiler-sdk/source/bin/rocprofv3-attach.py new file mode 100755 index 00000000000..aba5b1da544 --- /dev/null +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3-attach.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import ctypes +import os +import signal +import sys +import time + +ROCPROFV3_ATTACH_DIR = os.path.dirname(os.path.realpath(__file__)) +ROCM_DIR = os.path.dirname(ROCPROFV3_ATTACH_DIR) +ROCPROF_ATTACH_TOOL_LIBRARY = f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofv3-attach.so" + + +def main( + pid=os.environ.get("ROCPROF_ATTACH_PID", None), + attach_library=os.environ.get( + "ROCPROF_ATTACH_TOOL_LIBRARY", ROCPROF_ATTACH_TOOL_LIBRARY + ), + duration=os.environ.get("ROCPROF_ATTACH_DURATION", None), +): + if pid is None: + raise RuntimeError("rocprofv3_attach called with no PID specified") + + print(f"Attaching to PID {pid} using library {attach_library}") + + # Load the shared library into ctypes and attach + try: + c_lib = ctypes.CDLL(attach_library) + c_lib.attach.restype = ctypes.c_int + c_lib.attach.argtypes = [ctypes.c_uint] + attach_status = c_lib.attach(int(pid)) + except Exception as e: + raise RuntimeError(f"Exception during library load and attachment: {e}") + + if attach_status != 0: + raise RuntimeError( + f"Calling attach in {attach_library} returned non-zero status {attach_status}" + ) + + print(f"Attaching to PID {pid} using library {attach_library} :: success") + + def detach(): + try: + c_lib.detach() + except Exception as e: + print(f"Exception during detachment: {e}") + + def signal_handler(sig, frame): + print("\nCaught signal SIGINT, detaching") + detach() + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + + if duration is None: + sys.stdout.write("Press Enter to detach...") + sys.stdout.flush() # Force the prompt to appear immediately + input() # Now wait for input + else: + time.sleep(int(duration) / 1000) + + detach() + + +if __name__ == "__main__": + main() diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3.py b/projects/rocprofiler-sdk/source/bin/rocprofv3.py index 9aec5fb7753..b4c908fb887 100755 --- a/projects/rocprofiler-sdk/source/bin/rocprofv3.py +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3.py @@ -60,6 +60,12 @@ def __init__(self, d): [dotdict(i) if isinstance(i, (dict)) else i for i in v], ) + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__ = d + def patch_message(msg, *args): msg = textwrap.dedent(msg) @@ -72,14 +78,14 @@ def patch_message(msg, *args): def fatal_error(msg, *args, exit_code=1): msg = patch_message(msg, *args) - sys.stderr.write(f"Fatal error: {msg}\n") + sys.stderr.write(f"[rocprofv3] Fatal error: {msg}\n") sys.stderr.flush() sys.exit(exit_code) def warning(msg, *args): msg = patch_message(msg, *args) - sys.stderr.write(f"Warning: {msg}\n") + sys.stderr.write(f"[rocprofv3] Warning: {msg}\n") sys.stderr.flush() @@ -224,6 +230,11 @@ def parse_arguments(args=None): $ mpirun -n 4 rocprofv3 --hip-trace -- ./mympiapp +For attachment profiling of running processes: + + $ rocprofv3 --attach --hip-trace --kernel-trace + $ rocprofv3 --attach 1234 --attach-duration 10 --hsa-trace + """ # Create the parser @@ -725,13 +736,19 @@ def add_parser_bool_argument(gparser, *args, **kwargs): metavar="KB", ) - reserved_options = parser.add_argument_group("Reserved options") - reserved_options.add_argument( + advanced_options.add_argument( "-p", "--pid", - help=argparse.SUPPRESS, - type=str, - nargs="+", + "--attach", + help="""Attach to a target process by pid and execute as a tool from within said process.""", + type=int, + default=None, + ) + + advanced_options.add_argument( + "--attach-duration-msec", + help="""When --pid is used, sets the amount of time in milliseconds the profiler will be attached before detaching. When unset, the profiler will wait until Enter is pressed to detach.""", + type=int, default=None, ) @@ -940,18 +957,27 @@ def patch_args(data): return data -def get_args(cmd_args, inp_args): +def get_args(cmd_args, inp_args, filter=[]): def ensure_type(name, var, type_id): if not isinstance(var, type_id): raise TypeError( - f"{name} is of type {type(var).__name__}, expected {type(type_id).__name__}" + f"{name} is of type {type(var).__name__}, expected {type_id.__name__}" ) - ensure_type("cmd_args", cmd_args, argparse.Namespace) - ensure_type("inp_args", inp_args, dotdict) + if isinstance(cmd_args, argparse.Namespace): + ensure_type("cmd_args", cmd_args, argparse.Namespace) + ensure_type("inp_args", inp_args, dotdict) + + cmd_keys = list(cmd_args.__dict__.keys()) + inp_keys = list(inp_args.keys()) + + else: + ensure_type("cmd_args", cmd_args, dotdict) + ensure_type("inp_args", inp_args, dotdict) + + cmd_keys = list(cmd_args.keys()) + inp_keys = list(inp_args.keys()) - cmd_keys = list(cmd_args.__dict__.keys()) - inp_keys = list(inp_args.keys()) data = {} def get_attr(key): @@ -967,9 +993,30 @@ def get_attr(key): and has_set_attr(inp_args, itr) and getattr(cmd_args, itr) != getattr(inp_args, itr) ): - raise RuntimeError( - f"conflicting value for {itr} : {getattr(cmd_args, itr)} vs {getattr(inp_args, itr)}" - ) + should_raise = True + if filter: + is_filtered = False + for fitr in filter: + import re + + if re.match(fitr, itr): + is_filtered = True + break + + if not is_filtered: + warning( + f"Option '{itr}' has been modified. {itr}={getattr(cmd_args, itr)} (previously {itr}={getattr(inp_args, itr)})" + ) + should_raise = False + + # should raise error if not in filter list + if should_raise: + raise RuntimeError( + f"conflicting value for {itr} : {getattr(cmd_args, itr)} vs {getattr(inp_args, itr)}" + ) + else: + # has preference towards command line args + data[itr] = get_attr(itr) else: data[itr] = get_attr(itr) @@ -982,13 +1029,6 @@ def run(app_args, args, **kwargs): use_execv = kwargs.get("use_execv", True) app_pass = kwargs.get("pass_id", None) - if args.pid is not None: - fatal_error( - """The -p shorthand option for --collection-period is now an upper-case -P - In the future, rocprofv3 plans to support debugger-like process attachment and -p - is de-facto standard shorthand option for this feature""" - ) - def setattrifnone(obj, attr, value): if getattr(obj, f"{attr}") is None: setattr(obj, f"{attr}", value) @@ -1075,6 +1115,7 @@ def _write_env_value(): ROCPROF_LIST_AVAIL_TOOL_LIBRARY = ( f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofv3-list-avail.so" ) + ROCPROF_ATTACH_TOOL_LIBRARY = f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofv3-attach.so" ROCPROF_TOOL_LIBRARY = resolve_library_path(ROCPROF_TOOL_LIBRARY, args) ROCPROF_SDK_LIBRARY = resolve_library_path(ROCPROF_SDK_LIBRARY, args) @@ -1083,6 +1124,7 @@ def _write_env_value(): ROCPROF_LIST_AVAIL_TOOL_LIBRARY = resolve_library_path( ROCPROF_LIST_AVAIL_TOOL_LIBRARY, args ) + ROCPROF_ATTACH_TOOL_LIBRARY = resolve_library_path(ROCPROF_ATTACH_TOOL_LIBRARY, args) prepend_preload = [itr for itr in args.preload if itr] append_preload = [ @@ -1090,8 +1132,9 @@ def _write_env_value(): ROCPROF_SDK_LIBRARY, ] - update_env("LD_PRELOAD", ":".join(prepend_preload), prepend=True) - update_env("LD_PRELOAD", ":".join(append_preload), append=True) + if not args.pid: + update_env("LD_PRELOAD", ":".join(prepend_preload), prepend=True) + update_env("LD_PRELOAD", ":".join(append_preload), append=True) update_env( "ROCP_TOOL_LIBRARIES", @@ -1298,6 +1341,13 @@ def _write_env_value(): overwrite_if_true=True, ) + if args.pid: + update_env( + "ROCPROF_ATTACH_TOOL_LIBRARY", + ROCPROF_ATTACH_TOOL_LIBRARY, + overwrite_if_true=True, + ) + if args.collection_period: factors = { "hour": 60 * 60 * 1e9, @@ -1430,6 +1480,16 @@ def log_config(_env): env=app_env, ) + elif args.pid: + update_env("ROCPROF_ATTACH_PID", args.pid) + if args.attach_duration_msec is not None: + update_env("ROCPROF_ATTACH_DURATION", f"{args.attach_duration_msec}") + path = os.path.join(f"{ROCM_DIR}", "bin/rocprofv3-attach") + if app_args: + exit_code = subprocess.check_call([sys.executable, path], env=app_env) + else: + app_args = [sys.executable, path] + elif not app_args and not args.echo: log_config(app_env) fatal_error("No application provided") @@ -1673,6 +1733,39 @@ def main(argv=None): if len(inp_args) == 1: args = get_args(cmd_args, inp_args[0]) + + if args.pid: + import pickle + + if args.collection_period: + fatal_error("--collection-period is not compatible with attach mode") + + fname = f"/tmp/rocprofv3_attach_{args.pid}.pkl" + if os.path.exists(fname): + # load the configuration from the previous attachment + with open(fname, "rb") as ifs: + if args.log_level in ("config", "info", "trace"): + print(f"Loading attach configuration from {fname}...") + prev_args = pickle.load(ifs) + + args = get_args( + args, + dotdict(prev_args), + filter=[ + ".*_trace", + "^pc_sampling_.*$", + "^att_.*$", + "^(pmc|pmc_groups|output_config|extra_counters)$", + "^kernel_(include_regex|exclude_regex|iteration_range)$", + ], + ) + + # write the configuration for future attachments + with open(fname, "wb") as ofs: + if args.log_level in ("config", "info", "trace"): + print(f"Saving attach configuration to {fname}...") + pickle.dump(args, ofs) + pass_idx = None if has_set_attr(args, "pmc") and len(args.pmc) > 0: pass_idx = 1 diff --git a/projects/rocprofiler-sdk/source/docs/_toc.yml.in b/projects/rocprofiler-sdk/source/docs/_toc.yml.in index 34573293e86..779079b7518 100644 --- a/projects/rocprofiler-sdk/source/docs/_toc.yml.in +++ b/projects/rocprofiler-sdk/source/docs/_toc.yml.in @@ -27,6 +27,8 @@ subtrees: title: Tool library - file: api-reference/intercept_table title: Runtime intercept tables + - file: api-reference/process_attachment + title: Process attachment - file: api-reference/buffered_services title: Buffered services - file: api-reference/callback_services diff --git a/projects/rocprofiler-sdk/source/docs/api-reference/process_attachment.rst b/projects/rocprofiler-sdk/source/docs/api-reference/process_attachment.rst new file mode 100644 index 00000000000..c6c6f21738c --- /dev/null +++ b/projects/rocprofiler-sdk/source/docs/api-reference/process_attachment.rst @@ -0,0 +1,1138 @@ +.. meta:: + :description: Technical guide for implementing ROCprofiler-SDK process attachment + :keywords: ROCprofiler-SDK, process attachment, ptrace, dynamic profiling, tool development + +.. _process_attachment_implementation: + +******************************************************************************** +Implementing Process Attachment Tools +******************************************************************************** + +Overview +======== + +This document provides the technical details needed to implement a process attachment tool similar to ``rocprofv3 --attach``. Process attachment allows profiling tools to dynamically attach to running GPU applications without requiring application restart. + +The implementation uses specific exported C functions and involves low-level process manipulation using ptrace, environment variable injection, library loading, and coordination with the ROCprofiler-SDK registration system. + +Exported C Functions for Attachment +=================================== + +The attachment functionality provides the following exported C functions that tools can use: + +ROCprofiler-Attach Functions +----------------------------- + +These functions are exported from the ``rocprofiler-attach`` binary: + +.. code-block:: cpp + + extern "C" { + // Start attachment to a target process + void attach(uint32_t pid) ROCPROFILER_EXPORT; + + // Detach from target process and cleanup + void detach() ROCPROFILER_EXPORT; + } + +**Function Details:** + +- **``attach(uint32_t pid)``**: Main entry point for starting attachment to a process + - Takes the target process ID as parameter + - Initiates ptrace-based attachment sequence + - Spawns background thread for ptrace operations + +- **``detach()``**: Entry point for detaching from the target process + - Cleans up attachment resources and terminates profiling + - Joins ptrace thread and releases resources + +ROCprofiler-Register Functions +------------------------------ + +These functions are exported from the ``librocprofiler-register.so`` library and are called via ptrace: + +.. code-block:: cpp + + extern "C" { + // Activate profiling in target process (called via ptrace) + rocprofiler_register_error_code_t + rocprofiler_register_attach(const char* environment_buffer, const char* tool_lib_path) + ROCPROFILER_REGISTER_PUBLIC_API; + + // Deactivate profiling in target process (called via ptrace) + rocprofiler_register_error_code_t + rocprofiler_register_detach() + ROCPROFILER_REGISTER_PUBLIC_API; + + // Reattach to previously attached process (experimental) + rocprofiler_register_error_code_t + rocprofiler_register_invoke_reattach() + ROCPROFILER_REGISTER_PUBLIC_API; + + // Client callback functions for reattachment support + void rocprofiler_call_client_reattach(void) + ROCPROFILER_REGISTER_PUBLIC_API; + void rocprofiler_call_client_detach(void) + ROCPROFILER_REGISTER_PUBLIC_API; + } + +**Function Details:** + +- **``rocprofiler_register_attach(const char* environment_buffer, const char* tool_lib_path)``**: + - Called via ptrace from the attachment system + - Receives serialized environment variables for profiling configuration + - Receives the tool library path to load (defaults to "librocprofiler-sdk-tool.so" if NULL) + - Loads the specified tool library and activates profiling services + - Returns ``rocprofiler_register_error_code_t`` status + +- **``rocprofiler_register_detach()``**: + - Called via ptrace to stop profiling in the target process + - Calls the tool's detach function and cleans up resources + - Returns ``rocprofiler_register_error_code_t`` status + +- **``rocprofiler_register_invoke_reattach()``**: (EXPERIMENTAL) + - Called to reattach profiling to a previously attached process + - Invokes client reattach callbacks without full re-initialization + - Used for resuming profiling after temporary detachment + - Returns ``rocprofiler_register_error_code_t`` status + +- **``rocprofiler_call_client_reattach()`` and ``rocprofiler_call_client_detach()``**: + - C wrapper functions for client tool reattachment callbacks + - Automatically resolved and called by the registration system + - Enable tools to handle dynamic attach/detach cycles + +Function Call Sequence +====================== + +Initial Attachment Sequence +--------------------------- + +The initial attachment process follows this sequence: + +.. code-block:: text + + Tool Implementation + | + v + attach(pid) ← Your tool calls this + | + v + Ptrace attachment & environment setup + | + v + rocprofiler_register_attach(env_buffer) ← Called via ptrace in target + | + v + Profiling active in target process + | + v + [Profiling data collection...] + | + v + rocprofiler_register_detach() ← Called via ptrace in target + | + v + detach() ← Your tool calls this + | + v + Cleanup complete + +Reattachment Sequence (Experimental) +------------------------------------ + +For reattachment to a previously attached process: + +.. code-block:: text + + Tool Implementation + | + v + attach(pid) ← Your tool calls this again + | + v + Ptrace attachment & environment setup + | + v + rocprofiler_register_attach(env_buffer) ← Detects previous attachment + | + v + rocprofiler_register_invoke_reattach() ← Calls client reattach callbacks + | + v + Profiling resumed in target process + | + v + [Continued profiling data collection...] + | + v + rocprofiler_register_detach() ← Called via ptrace in target + | + v + detach() ← Your tool calls this + | + v + Cleanup complete + +Using the Attachment Functions +============================== + +Here's how to use these functions in your own attachment tool: + +Basic Attachment Tool Implementation +----------------------------------- + +.. code-block:: cpp + + #include + #include + #include + #include + + class ROCprofilerAttachmentTool { + private: + void* attach_lib_handle = nullptr; + void (*attach_func)(uint32_t) = nullptr; + void (*detach_func)() = nullptr; + + public: + bool initialize() { + // Load the rocprofiler-attach library/binary + attach_lib_handle = dlopen("librocprofiler-attach.so", RTLD_NOW); + if (!attach_lib_handle) { + std::cerr << "Failed to load rocprofiler-attach: " << dlerror() << std::endl; + return false; + } + + // Get the attachment function pointers + attach_func = (void(*)(uint32_t))dlsym(attach_lib_handle, "attach"); + detach_func = (void(*)())dlsym(attach_lib_handle, "detach"); + + if (!attach_func || !detach_func) { + std::cerr << "Failed to find attachment functions" << std::endl; + return false; + } + + return true; + } + + bool attach_to_process(pid_t pid, uint32_t duration_ms = 0) { + // Validate the target process + if (kill(pid, 0) != 0) { + std::cerr << "Target process " << pid << " is not accessible" << std::endl; + return false; + } + + std::cout << "Attaching to process " << pid << std::endl; + + // Start attachment - this will handle all ptrace operations + attach_func(pid); + + if (duration_ms > 0) { + // Profile for specified duration + std::cout << "Profiling for " << duration_ms << " milliseconds..." << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(duration_ms)); + + // Stop profiling + detach_func(); + } else { + std::cout << "Profiling until process ends or manual detach..." << std::endl; + // Monitor process or wait for external signal to detach + while (kill(pid, 0) == 0) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + detach_func(); + } + + std::cout << "Profiling completed" << std::endl; + return true; + } + + ~ROCprofilerAttachmentTool() { + if (attach_lib_handle) { + dlclose(attach_lib_handle); + } + } + }; + +Complete Tool Example +-------------------- + +.. code-block:: cpp + + #include + #include + #include + #include + + int main(int argc, char* argv[]) { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [duration_ms]" << std::endl; + std::cerr << " PID: Process ID to attach to" << std::endl; + std::cerr << " duration_ms: Optional profiling duration in milliseconds" << std::endl; + return 1; + } + + pid_t target_pid = std::stoi(argv[1]); + uint32_t duration = (argc > 2) ? std::stoi(argv[2]) : 0; + + // Set up profiling environment variables before attachment + setenv("ROCP_TOOL_ATTACH", "1", 1); + + // Note: The attachment system now uses the hardcoded default tool library path + // "librocprofiler-sdk-tool.so" and no longer uses environment variables for tool selection + + setenv("ROCPROF_HIP_API_TRACE", "1", 1); + setenv("ROCPROF_KERNEL_TRACE", "1", 1); + setenv("ROCPROF_MEMORY_COPY_TRACE", "1", 1); + setenv("ROCPROF_OUTPUT_PATH", "./attachment-output", 1); + setenv("ROCPROF_OUTPUT_FILE_NAME", "attached_profile", 1); + + // Initialize and run attachment tool + ROCprofilerAttachmentTool tool; + if (!tool.initialize()) { + std::cerr << "Failed to initialize attachment tool" << std::endl; + return 1; + } + + if (!tool.attach_to_process(target_pid, duration)) { + std::cerr << "Attachment failed" << std::endl; + return 1; + } + + std::cout << "Attachment completed successfully" << std::endl; + return 0; + } + +Experimental Reattachment API +============================= + +ROCprofiler-SDK now provides experimental support for reattachment, allowing tools to handle dynamic attach/detach cycles more efficiently. + +Tool Configuration for Reattachment +----------------------------------- + +Tools that support reattachment should implement the experimental configuration structure: + +.. code-block:: cpp + + #include + + // Experimental reattachment callbacks + void tool_reattach(void* tool_data) { + // Reinitialize contexts and resume profiling + // This is called when reattaching to a previously profiled process + } + + void tool_detach(void* tool_data) { + // Suspend profiling operations temporarily + // This is called during detachment, but contexts may be preserved + } + + extern "C" rocprofiler_tool_configure_result_experimental_t* + rocprofiler_configure_experimental(uint32_t version, + const char* runtime_version, + uint32_t prio, + rocprofiler_client_id_t* client_id) + { + static auto cfg = rocprofiler_tool_configure_result_experimental_t { + .size = sizeof(rocprofiler_tool_configure_result_experimental_t), + .initialize = &tool_init, + .finalize = &tool_fini, + .tool_data = nullptr, + .tool_reattach = &tool_reattach, // Experimental reattachment support + .tool_detach = &tool_detach // Experimental detachment support + }; + + return &cfg; + } + +Client Callback Functions +------------------------- + +The registration system automatically provides C wrapper functions: + +.. code-block:: cpp + + // These are automatically generated and called by rocprofiler-register + extern "C" void rocprofiler_call_client_reattach(void) { + // Calls the tool's reattach callback with stored tool_data + } + + extern "C" void rocprofiler_call_client_detach(void) { + // Calls the tool's detach callback with stored tool_data + } + +Reattachment Environment Variables +--------------------------------- + +When using reattachment, set this additional environment variable: + +.. code-block:: cpp + + // Indicates that the tool was loaded via attachment (not LD_PRELOAD) + setenv("ROCPROFILER_REGISTER_TOOL_ATTACHED", "1", 1); + +This helps the registration system differentiate between initial attachment and reattachment cycles. + +Environment Variable Configuration +================================= + +Before calling the attachment functions, set up environment variables that will be injected into the target process: + +Required Variables +----------------- + +.. code-block:: cpp + + // Essential for attachment functionality + setenv("ROCP_TOOL_ATTACH", "1", 1); + +Tool Library Configuration +-------------------------- + +The attachment system now uses a hardcoded default tool library path: + +.. code-block:: cpp + + // The attachment system automatically uses "librocprofiler-sdk-tool.so" + // No environment variable configuration is needed or supported + +Tracing Options +-------------- + +.. code-block:: cpp + + // Enable different types of tracing + setenv("ROCPROF_HIP_API_TRACE", "1", 1); // HIP API calls + setenv("ROCPROF_HSA_API_TRACE", "1", 1); // HSA API calls + setenv("ROCPROF_KERNEL_TRACE", "1", 1); // Kernel dispatches + setenv("ROCPROF_MEMORY_COPY_TRACE", "1", 1); // Memory operations + setenv("ROCPROF_MEMORY_ALLOCATION_TRACE", "1", 1); // Memory allocations + setenv("ROCPROF_SCRATCH_MEMORY_TRACE", "1", 1); // Scratch memory + setenv("ROCPROF_MARKER_TRACE", "1", 1); // ROCTx markers + +Output Configuration +------------------- + +.. code-block:: cpp + + // Control output location and format + setenv("ROCPROF_OUTPUT_PATH", "/path/to/output", 1); + setenv("ROCPROF_OUTPUT_FILE_NAME", "profile_name", 1); + setenv("ROCPROF_OUTPUT_FORMAT", "csv", 1); // or "json", "pftrace", etc. + +Build Configuration +================== + +To build a tool using the attachment functions: + +CMakeLists.txt +------------- + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.16) + project(my_rocprofiler_attach_tool) + + set(CMAKE_CXX_STANDARD 17) + + # Find ROCprofiler SDK (for headers and linking) + find_package(rocprofiler-sdk REQUIRED) + + add_executable(my_attach_tool + main.cpp + attachment_tool.cpp + ) + + # Link with required libraries + target_link_libraries(my_attach_tool + rocprofiler-sdk::rocprofiler-sdk + dl # for dlopen/dlsym operations + ) + + # Set capabilities for ptrace operations + add_custom_command(TARGET my_attach_tool POST_BUILD + COMMAND sudo setcap cap_sys_ptrace+ep $ + COMMENT "Setting ptrace capability" + ) + +Error Handling +============= + +When using the attachment functions, handle these common error conditions: + +.. code-block:: cpp + + class AttachmentErrorHandler { + public: + static bool validate_target_process(pid_t pid) { + // Check if process exists + if (kill(pid, 0) != 0) { + std::cerr << "Process " << pid << " not found or not accessible" << std::endl; + return false; + } + + // Check if it's a GPU application + std::string maps_path = "/proc/" + std::to_string(pid) + "/maps"; + std::ifstream maps(maps_path); + std::string line; + + bool has_gpu_libs = false; + while (std::getline(maps, line)) { + if (line.find("libamdhip64.so") != std::string::npos || + line.find("libhsa-runtime64.so") != std::string::npos) { + has_gpu_libs = true; + break; + } + } + + if (!has_gpu_libs) { + std::cerr << "Process " << pid << " does not appear to use GPU APIs" << std::endl; + return false; + } + + return true; + } + + static void handle_attachment_errors() { + // Check for common permission issues + if (geteuid() != 0) { + std::cerr << "Warning: Not running as root. Ensure CAP_SYS_PTRACE capability is set." << std::endl; + } + + // Check if rocprofiler libraries are available + if (getenv("LD_LIBRARY_PATH") == nullptr || + std::string(getenv("LD_LIBRARY_PATH")).find("/opt/rocm/lib") == std::string::npos) { + std::cerr << "Warning: /opt/rocm/lib may not be in LD_LIBRARY_PATH" << std::endl; + } + } + }; + +Architecture Overview +===================== + +Process attachment consists of several cooperating components: + +.. code-block:: text + + Attachment Tool (your implementation) + | + v + 1. Process Discovery & Validation + | + v + 2. Ptrace Attachment & Control + | + v + 3. Environment Variable Injection + | + v + 4. Library Loading (rocprofiler-register) + | + v + 5. Profiling Service Activation + | + v + 6. Data Collection & Management + | + v + 7. Detachment & Cleanup + +Theoretical Implementation Details +================================= + +Core Implementation Components +============================= + +1. Process Discovery and Validation +----------------------------------- + +**Target Process Requirements:** + +.. code-block:: cpp + + #include + #include + #include + + bool validate_target_process(pid_t pid) { + // Check if process exists and is accessible + if (kill(pid, 0) != 0) { + return false; // Process doesn't exist or no permission + } + + // Verify it's a GPU application by checking loaded libraries + std::string maps_path = "/proc/" + std::to_string(pid) + "/maps"; + std::ifstream maps(maps_path); + std::string line; + + bool has_hip = false, has_hsa = false; + while (std::getline(maps, line)) { + if (line.find("libamdhip64.so") != std::string::npos) has_hip = true; + if (line.find("libhsa-runtime64.so") != std::string::npos) has_hsa = true; + } + + return has_hip || has_hsa; // Must use HIP or HSA + } + +2. Ptrace-Based Process Control +------------------------------ + +**Core Ptrace Operations:** + +.. code-block:: cpp + + #include + #include + #include + + class ProcessAttachment { + private: + pid_t target_pid; + bool attached = false; + + public: + bool attach(pid_t pid) { + target_pid = pid; + + // Attach to the target process + if (ptrace(PTRACE_ATTACH, target_pid, nullptr, nullptr) == -1) { + perror("ptrace PTRACE_ATTACH failed"); + return false; + } + + // Wait for the process to stop + int status; + if (waitpid(target_pid, &status, 0) == -1) { + perror("waitpid failed"); + detach(); + return false; + } + + if (!WIFSTOPPED(status)) { + fprintf(stderr, "Process did not stop after attach\n"); + detach(); + return false; + } + + attached = true; + return true; + } + + bool detach() { + if (!attached) return true; + + // Detach and allow process to continue + if (ptrace(PTRACE_DETACH, target_pid, nullptr, nullptr) == -1) { + perror("ptrace PTRACE_DETACH failed"); + return false; + } + + attached = false; + return true; + } + }; + +3. Environment Variable Injection +--------------------------------- + +**Environment Variable Management:** + +.. code-block:: cpp + + #include + #include + + class EnvironmentInjector { + public: + struct EnvironmentVar { + std::string name; + std::string value; + }; + + // Prepare environment variables for profiling + std::vector prepare_profiling_env( + const std::vector& trace_options, + const std::string& output_path, + const std::string& output_file) { + + std::vector env_vars; + + // Essential attachment variable + env_vars.push_back({"ROCP_TOOL_ATTACH", "1"}); + + // Configure tracing based on options + for (const auto& option : trace_options) { + if (option == "hip-trace") { + env_vars.push_back({"ROCPROF_HIP_API_TRACE", "1"}); + } + if (option == "kernel-trace") { + env_vars.push_back({"ROCPROF_KERNEL_TRACE", "1"}); + } + if (option == "hsa-trace") { + env_vars.push_back({"ROCPROF_HSA_API_TRACE", "1"}); + } + if (option == "memory-copy-trace") { + env_vars.push_back({"ROCPROF_MEMORY_COPY_TRACE", "1"}); + } + } + + // Output configuration + env_vars.push_back({"ROCPROF_OUTPUT_PATH", output_path}); + env_vars.push_back({"ROCPROF_OUTPUT_FILE_NAME", output_file}); + + return env_vars; + } + + // Serialize environment for injection + std::vector serialize_environment(const std::vector& vars) { + std::vector buffer(4); // Start with count + uint32_t count = vars.size(); + + // Store count in first 4 bytes + buffer[0] = count & 0xFF; + buffer[1] = (count >> 8) & 0xFF; + buffer[2] = (count >> 16) & 0xFF; + buffer[3] = (count >> 24) & 0xFF; + + // Add each variable as null-terminated name and value + for (const auto& var : vars) { + // Add variable name + for (char c : var.name) { + buffer.push_back(c); + } + buffer.push_back(0); // Null terminate name + + // Add variable value + for (char c : var.value) { + buffer.push_back(c); + } + buffer.push_back(0); // Null terminate value + } + + return buffer; + } + }; + +4. Memory Manipulation and Library Loading +------------------------------------------ + +**Remote Memory Operations:** + +.. code-block:: cpp + + #include + + class RemoteMemoryManager { + private: + pid_t target_pid; + + public: + RemoteMemoryManager(pid_t pid) : target_pid(pid) {} + + // Allocate memory in remote process + void* remote_mmap(size_t length, int prot, int flags) { + // Find a suitable location for injection + struct user_regs_struct regs; + if (ptrace(PTRACE_GETREGS, target_pid, nullptr, ®s) == -1) { + return nullptr; + } + + // Save original registers + struct user_regs_struct orig_regs = regs; + + // Set up mmap syscall + regs.rax = 9; // __NR_mmap + regs.rdi = 0; // addr (let kernel choose) + regs.rsi = length; + regs.rdx = prot; + regs.r10 = flags; + regs.r8 = -1; // fd + regs.r9 = 0; // offset + + if (ptrace(PTRACE_SETREGS, target_pid, nullptr, ®s) == -1) { + return nullptr; + } + + // Execute syscall + if (ptrace(PTRACE_SYSCALL, target_pid, nullptr, nullptr) == -1) { + return nullptr; + } + + // Wait for syscall completion + int status; + waitpid(target_pid, &status, 0); + + // Get result + if (ptrace(PTRACE_GETREGS, target_pid, nullptr, ®s) == -1) { + return nullptr; + } + + void* result = (void*)regs.rax; + + // Restore original registers + ptrace(PTRACE_SETREGS, target_pid, nullptr, &orig_regs); + + return (result == (void*)-1) ? nullptr : result; + } + + // Write data to remote process memory + bool write_memory(void* addr, const void* data, size_t size) { + const uint8_t* bytes = static_cast(data); + size_t written = 0; + + while (written < size) { + long word = 0; + size_t to_copy = std::min(sizeof(long), size - written); + + // For partial words, read existing content first + if (to_copy < sizeof(long)) { + errno = 0; + word = ptrace(PTRACE_PEEKDATA, target_pid, + (uint8_t*)addr + written, nullptr); + if (errno != 0) return false; + } + + // Copy new data into word + memcpy(&word, bytes + written, to_copy); + + // Write word to remote process + if (ptrace(PTRACE_POKEDATA, target_pid, + (uint8_t*)addr + written, word) == -1) { + return false; + } + + written += to_copy; + } + + return true; + } + }; + +5. Library Injection and Symbol Resolution +------------------------------------------ + +**Dynamic Library Loading:** + +.. code-block:: cpp + + #include + #include + + class LibraryInjector { + private: + pid_t target_pid; + RemoteMemoryManager memory_manager; + + public: + LibraryInjector(pid_t pid) : target_pid(pid), memory_manager(pid) {} + + // Inject rocprofiler-register library + bool inject_register_library() { + const char* lib_path = "/opt/rocm/lib/librocprofiler-register.so"; + + // Find dlopen in target process + void* dlopen_addr = find_function_address("dlopen"); + if (!dlopen_addr) { + fprintf(stderr, "Could not find dlopen in target process\n"); + return false; + } + + // Allocate memory for library path + void* path_addr = memory_manager.remote_mmap( + strlen(lib_path) + 1, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS); + + if (!path_addr) return false; + + // Write library path to remote memory + if (!memory_manager.write_memory(path_addr, lib_path, strlen(lib_path) + 1)) { + return false; + } + + // Call dlopen in target process + return call_remote_function(dlopen_addr, + {(uint64_t)path_addr, RTLD_NOW | RTLD_GLOBAL}); + } + + void* find_function_address(const char* function_name) { + // Parse /proc/PID/maps to find loaded libraries + std::string maps_path = "/proc/" + std::to_string(target_pid) + "/maps"; + std::ifstream maps(maps_path); + std::string line; + + while (std::getline(maps, line)) { + if (line.find("libc.so") != std::string::npos) { + // Extract base address of libc + size_t dash = line.find('-'); + std::string base_addr_str = line.substr(0, dash); + void* base_addr = (void*)std::stoull(base_addr_str, nullptr, 16); + + // Open libc and find function offset + void* handle = dlopen("libc.so.6", RTLD_LAZY); + if (handle) { + void* func_addr = dlsym(handle, function_name); + if (func_addr) { + // Calculate actual address in target process + return (uint8_t*)base_addr + ((uint8_t*)func_addr - (uint8_t*)dlsym(RTLD_DEFAULT, "main")); + } + dlclose(handle); + } + } + } + return nullptr; + } + }; + +6. ROCprofiler-Register Communication Protocol +---------------------------------------------- + +**Attachment Protocol Implementation:** + +.. code-block:: cpp + + extern "C" { + // Function signatures from rocprofiler-register + typedef void (*attach_func_t)(uint32_t pid); + typedef void (*detach_func_t)(); + } + + class ROCprofilerAttachment { + private: + pid_t target_pid; + void* register_handle = nullptr; + attach_func_t attach_func = nullptr; + detach_func_t detach_func = nullptr; + + public: + bool initialize() { + // Load rocprofiler-register library + register_handle = dlopen("/opt/rocm/lib/librocprofiler-register.so", RTLD_NOW); + if (!register_handle) { + fprintf(stderr, "Failed to load rocprofiler-register: %s\n", dlerror()); + return false; + } + + // Get attachment functions + attach_func = (attach_func_t)dlsym(register_handle, "attach"); + detach_func = (detach_func_t)dlsym(register_handle, "detach"); + + if (!attach_func || !detach_func) { + fprintf(stderr, "Failed to find attachment functions\n"); + return false; + } + + return true; + } + + bool attach_to_process(pid_t pid, const std::vector& env_buffer) { + target_pid = pid; + + // Set up environment for rocprofiler-register + // This involves injecting the environment buffer into the target process + + // Call the attach function + attach_func(pid); + + return true; + } + + void detach_from_process() { + if (detach_func) { + detach_func(); + } + } + }; + +Complete Attachment Tool Implementation +====================================== + +**Main Attachment Tool Structure:** + +.. code-block:: cpp + + #include + #include + #include + #include + #include + + class ROCprofilerAttachTool { + private: + ProcessAttachment process_control; + EnvironmentInjector env_injector; + LibraryInjector lib_injector; + ROCprofilerAttachment rocprof_attachment; + + public: + struct AttachmentConfig { + pid_t target_pid; + std::vector trace_options; + std::string output_path = "./rocprof-attachment-output"; + std::string output_filename = "attached_profile"; + uint32_t duration_msec = 0; // 0 = until process ends + }; + + bool attach_and_profile(const AttachmentConfig& config) { + // 1. Validate target process + if (!validate_target_process(config.target_pid)) { + std::cerr << "Invalid or inaccessible target process: " << config.target_pid << std::endl; + return false; + } + + // 2. Initialize rocprofiler attachment system + if (!rocprof_attachment.initialize()) { + std::cerr << "Failed to initialize rocprofiler attachment system" << std::endl; + return false; + } + + // 3. Attach to target process + if (!process_control.attach(config.target_pid)) { + std::cerr << "Failed to attach to process " << config.target_pid << std::endl; + return false; + } + + // 4. Prepare environment variables + auto env_vars = env_injector.prepare_profiling_env( + config.trace_options, + config.output_path, + config.output_filename); + auto env_buffer = env_injector.serialize_environment(env_vars); + + // 5. Inject rocprofiler-register library + LibraryInjector injector(config.target_pid); + if (!injector.inject_register_library()) { + std::cerr << "Failed to inject rocprofiler-register library" << std::endl; + process_control.detach(); + return false; + } + + // 6. Activate profiling + if (!rocprof_attachment.attach_to_process(config.target_pid, env_buffer)) { + std::cerr << "Failed to activate profiling" << std::endl; + process_control.detach(); + return false; + } + + // 7. Allow process to continue with profiling active + if (!process_control.detach()) { + std::cerr << "Warning: Failed to detach cleanly" << std::endl; + } + + // 8. Wait for specified duration or until process ends + if (config.duration_msec > 0) { + std::cout << "Profiling for " << config.duration_msec << " milliseconds..." << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(config.duration_msec)); + + // Re-attach to stop profiling + rocprof_attachment.detach_from_process(); + } else { + std::cout << "Profiling until process ends..." << std::endl; + // Monitor process and wait for it to end + while (kill(config.target_pid, 0) == 0) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + } + + std::cout << "Profiling completed. Output saved to: " + << config.output_path << "/" << config.output_filename << std::endl; + return true; + } + }; + + // Example usage + int main(int argc, char* argv[]) { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [options]" << std::endl; + return 1; + } + + ROCprofilerAttachTool::AttachmentConfig config; + config.target_pid = std::stoi(argv[1]); + config.trace_options = {"hip-trace", "kernel-trace", "memory-copy-trace"}; + config.duration_msec = 5000; // 5 seconds + + ROCprofilerAttachTool tool; + if (!tool.attach_and_profile(config)) { + std::cerr << "Attachment and profiling failed" << std::endl; + return 1; + } + + return 0; + } + +Required System Permissions and Setup +===================================== + +**Permission Requirements:** + +.. code-block:: bash + + # Your attachment tool will need: + + # 1. Ptrace permissions (may require root or capabilities) + sudo setcap cap_sys_ptrace+ep your_attachment_tool + + # 2. Access to /proc filesystem + # Usually available by default + + # 3. Ability to load shared libraries + # Ensure ROCm libraries are in LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH + +**Build Requirements:** + +.. code-block:: cmake + + # CMakeLists.txt for your attachment tool + cmake_minimum_required(VERSION 3.16) + project(rocprofiler_attach_tool) + + set(CMAKE_CXX_STANDARD 17) + + find_package(rocprofiler-sdk REQUIRED) + + add_executable(rocprofiler_attach_tool + main.cpp + process_attachment.cpp + environment_injection.cpp + library_injection.cpp + ) + + target_link_libraries(rocprofiler_attach_tool + rocprofiler-sdk::rocprofiler-sdk + dl # for dlopen/dlsym + ) + +Error Handling and Debugging +============================ + +**Common Issues and Solutions:** + +1. **Ptrace Permissions**: Use ``strace`` to debug ptrace failures +2. **Library Loading**: Check ``/proc/PID/maps`` to verify library injection +3. **Environment Variables**: Validate environment buffer format +4. **Process State**: Monitor target process status during attachment + +**Debugging Techniques:** + +.. code-block:: cpp + + // Enable debug logging + setenv("ROCPROF_LOGGING_LEVEL", "trace", 1); + + // Monitor attachment progress + bool debug_attachment(pid_t pid) { + std::cout << "Target process memory maps:" << std::endl; + std::string cmd = "cat /proc/" + std::to_string(pid) + "/maps"; + system(cmd.c_str()); + + std::cout << "Target process environment:" << std::endl; + cmd = "cat /proc/" + std::to_string(pid) + "/environ | tr '\\0' '\\n'"; + system(cmd.c_str()); + + return true; + } + +This implementation guide provides the foundation needed to build a complete process attachment tool for ROCprofiler-SDK. The actual rocprofv3 implementation uses similar techniques with additional optimizations and error handling. diff --git a/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst b/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst index 8e00f603d9d..5bdec274b01 100644 --- a/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst +++ b/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst @@ -61,6 +61,10 @@ The following table lists the commonly used ``rocprofv3`` command-line options c | Sets the desired log level. |br| |br| |br| | Specifies the path to a YAML file consisting of extra counter definitions. + * - Process attachment + - | ``-p`` PID \| ``--pid`` PID \| ``--attach`` PID + - | Attaches to a running process by process ID and profiles it dynamically. This enables profiling of applications that are already running without needing to restart them from the profiler. The profiler will instrument the target process and collect the specified tracing or counter data for the configured duration. + * - Aggregate tracing - | ``-r`` [BOOL] \| ``--runtime-trace`` [BOOL] |br| |br| |br| |br| |br| |br| |br| | ``-s`` [BOOL] \| ``--sys-trace`` [BOOL] @@ -590,6 +594,62 @@ Here are the contents of ``rocjpeg_api_trace.csv`` file: :widths: 10,10,10,10,10,20,20 :header-rows: 1 +Process Attachment ++++++++++++++++++++ + +``rocprofv3`` supports attaching to already running processes to profile them dynamically without requiring application restart. This is particularly useful for long-running applications, services, or when you need to profile an application that is already in a specific state. + +Process attachment uses the ``-p``, ``--pid``, or ``--attach`` options (all equivalent) followed by the target process ID. The profiler will instrument the target process and collect the specified tracing or counter data for the configured duration. + +**Basic attachment syntax:** + +.. code-block:: bash + + rocprofv3 -p + # or + rocprofv3 --pid + # or + rocprofv3 --attach + +**Example: Attach to a running process and collect HIP traces:** + +.. code-block:: bash + + # Find the process ID of your application + ps aux | grep my_application + + # Attach to the process (replace 12345 with actual PID) + rocprofv3 --attach 12345 --hip-trace --output-format csv + +**Example: Attach with multiple tracing options:** + +.. code-block:: bash + + rocprofv3 -p 12345 --hip-trace --kernel-trace --memory-copy-trace --output-format json + +**Example: Attach with counter collection:** + +.. code-block:: bash + + rocprofv3 --pid 12345 --pmc SQ_WAVES GRBM_COUNT --output-format csv + +**Important considerations for process attachment:** + +- The target process must be running and actively using GPU resources for meaningful profiling data +- Attachment requires appropriate system permissions (may need elevated privileges depending on the target process) +- The profiler will collect data for the entire remaining lifetime of the process or until the configured collection period expires +- Use ``--attach-duration-msec`` to specify how long to profile the attached process (in milliseconds) + +**Example with duration control:** + +.. code-block:: bash + + # Attach and profile for 5 seconds + rocprofv3 --attach 12345 --attach-duration-msec 5000 --sys-trace --output-format csv + +The attachment functionality works with all tracing and profiling options available in ``rocprofv3``, providing the same comprehensive analysis capabilities as standard application launching. + + Post-processing tracing options ++++++++++++++++++++++++++++++++ diff --git a/projects/rocprofiler-sdk/source/docs/index.rst b/projects/rocprofiler-sdk/source/docs/index.rst index 20d75695dfb..fccdda2891b 100644 --- a/projects/rocprofiler-sdk/source/docs/index.rst +++ b/projects/rocprofiler-sdk/source/docs/index.rst @@ -44,6 +44,7 @@ The documentation is structured as follows: * :doc:`Tool library ` * :ref:`runtime-intercept-tables` + * :doc:`Process attachment ` * :doc:`Buffered services ` * :doc:`Callback services ` * :doc:`Counter collection services ` diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/CMakeLists.txt index bd545ac1428..4c7e382461c 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/CMakeLists.txt @@ -1,4 +1,8 @@ -set(ROCPROFILER_EXPERIMENTAL_HEADER_FILES counters.h thread_trace.h) +# +# Experimental components of the ROCProfiler SDK API. +# + +set(ROCPROFILER_EXPERIMENTAL_HEADER_FILES counters.h registration.h thread_trace.h) install( FILES ${ROCPROFILER_EXPERIMENTAL_HEADER_FILES} diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/registration.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/registration.h new file mode 100644 index 00000000000..3e8b4ab1bd1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/registration.h @@ -0,0 +1,118 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +ROCPROFILER_EXTERN_C_INIT + +/** + * @defgroup EXPERIMENTAL_REGISTRATION_GROUP Experimental tool registration + * + * @brief Data types and functions for tool registration with rocprofiler + * @{ + */ + +/** + * @brief (experimental) + * + */ +ROCPROFILER_SDK_EXPERIMENTAL +typedef void (*rocprofiler_client_detach_t)(rocprofiler_client_id_t); + +/** + * @brief Prototype for the start of the attach function that will be called after the + * configuration. + * @param [in] tool_data `tool_data` field returned from ::rocprofiler_configure_attach in + * ::rocprofiler_tool_configure_result_t. + */ +ROCPROFILER_SDK_EXPERIMENTAL +typedef int (*rocprofiler_tool_attach_t)(rocprofiler_client_detach_t detach_func, + rocprofiler_context_id_t* context_ids, + uint64_t context_ids_length, + void* tool_data); + +/** + * @brief Prototype for the detach function where a tool can temporarily suspend operations. + * @param [in] tool_data `tool_data` field returned from ::rocprofiler_configure in + * ::rocprofiler_tool_configure_attach_result_t. + */ +ROCPROFILER_SDK_EXPERIMENTAL +typedef void (*rocprofiler_tool_detach_t)(void* tool_data); + +/** + * @brief (EXPERIMENTAL) Extended data structure containing initialization, finalization, + * attach/detach, and data. + * + * This is an experimental extension of ::rocprofiler_tool_configure_result_t that adds support for + * runtime attachment and detachment of tools. The `tool_reattach` and `tool_detach` function + * pointers allow tools to handle dynamic attachment scenarios where they may need to suspend and + * resume profiling operations. + * + * The `size` field is used for ABI reasons and should be set to + * `sizeof(rocprofiler_tool_configure_result_t)` + */ +typedef struct ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_tool_configure_attach_result_t +{ + size_t size; ///< size of this struct (in case of future extensions) + rocprofiler_tool_attach_t tool_attach; ///< after configuration + rocprofiler_tool_detach_t tool_detach; ///< end of attach session + void* tool_data; ///< data to provide to init and fini callbacks +} rocprofiler_tool_configure_attach_result_t; + +/** + * @brief (experimental) This is the special function that tools define to enable rocprofiler + * attachment support. + * + * @param version + * @param runtime_version + * @param priority + * @param client_id + * @return rocprofiler_tool_configure_attach_result_t* + */ +ROCPROFILER_SDK_EXPERIMENTAL +rocprofiler_tool_configure_attach_result_t* +rocprofiler_configure_attach(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* client_id) ROCPROFILER_PUBLIC_API; + +/** + * @brief Function pointer typedef for ::rocprofiler_configure_attach function + * @param [in] version The version of rocprofiler: `(10000 * major) + (100 * minor) + patch` + * @param [in] runtime_version String descriptor of the rocprofiler version and other relevant info. + * @param [in] priority How many client tools were initialized before this client tool + * @param [in, out] client_id tool identifier value. + */ +ROCPROFILER_SDK_EXPERIMENTAL +typedef rocprofiler_tool_configure_attach_result_t* (*rocprofiler_configure_attach_func_t)( + uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* client_id); + +/** @} */ + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/CMakeLists.txt index 6989fe53373..21e0d3c5950 100644 --- a/projects/rocprofiler-sdk/source/lib/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/CMakeLists.txt @@ -7,6 +7,7 @@ set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core") add_subdirectory(common) add_subdirectory(output) add_subdirectory(rocprofiler-sdk) +add_subdirectory(rocprofiler-sdk-attach) set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "roctx") add_subdirectory(rocprofiler-sdk-roctx) @@ -17,6 +18,7 @@ add_subdirectory(rocprofiler-sdk-rocpd) set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "tools") add_subdirectory(att-tool) add_subdirectory(rocprofiler-sdk-tool) +add_subdirectory(rocprofv3-attach) add_subdirectory(python) diff --git a/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp b/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp index efe238eb7eb..b8827d99b16 100644 --- a/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp @@ -61,6 +61,7 @@ struct buffered_output void flush(); void read(); void clear(); + void reset(); void destroy(); uint64_t get_num_bytes() const; @@ -131,6 +132,18 @@ buffered_output::clear() if(!enabled) return; } +template +void +buffered_output::reset() +{ + if(!enabled) return; + + if(auto*& filebuf = get_tmp_file_buffer(buffer_type_v); filebuf) + { + filebuf->reset(); + } +} + template void buffered_output::destroy() diff --git a/projects/rocprofiler-sdk/source/lib/output/tmp_file.cpp b/projects/rocprofiler-sdk/source/lib/output/tmp_file.cpp index 520d16db8f0..9d696376951 100644 --- a/projects/rocprofiler-sdk/source/lib/output/tmp_file.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/tmp_file.cpp @@ -120,6 +120,12 @@ tmp_file::open(std::ios::openmode _mode) _ofs.open(filename, std::ofstream::binary | std::ofstream::out); } + if(stream.is_open() && stream.good()) + { + ROCP_TRACE << "temporary file: '" << filename << "' is already open..."; + return true; + } + ROCP_INFO << "opening temporary file: '" << filename << "'..."; stream.open(filename, _mode); return (stream.is_open() && stream.good()); diff --git a/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp b/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp index 7ee18157d90..acffd1e1016 100644 --- a/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp @@ -70,12 +70,26 @@ struct file_buffer file_buffer& operator=(const file_buffer&) = delete; file_buffer& operator=(file_buffer&&) noexcept = default; + void reset(); + domain_type domain = {}; uint64_t nbytes = 0; ring_buffer_t buffer = {}; tmp_file file; }; +template +void +file_buffer::reset() +{ + auto _lk = std::lock_guard{file.file_mutex}; + file.close(); + file.remove(); // Delete old file + file.file_pos.clear(); + nbytes = 0; + buffer.clear(); +} + template struct file_buffer> { @@ -104,9 +118,9 @@ offload_buffer(domain_type type) return; } - auto _lk = std::lock_guard(filebuf->file.file_mutex); - [[maybe_unused]] static auto _success = filebuf->file.open(); - auto& _fs = filebuf->file.stream; + auto _lk = std::lock_guard(filebuf->file.file_mutex); + [[maybe_unused]] auto _success = filebuf->file.open(); + auto& _fs = filebuf->file.stream; ROCP_CI_LOG_IF(WARNING, _fs.tellg() != _fs.tellp()) // this should always be true << "tellg=" << _fs.tellg() << ", tellp=" << _fs.tellp(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/CMakeLists.txt new file mode 100644 index 00000000000..5c873215a53 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/CMakeLists.txt @@ -0,0 +1,46 @@ +# +# rocprofiler-sdk attach Library +# + +find_package(rocprofiler-register REQUIRED) + +add_library(rocprofiler-sdk-attach-shared-library SHARED) +add_library(rocprofiler-sdk::rocprofiler-sdk-attach-shared-library ALIAS + rocprofiler-sdk-attach-shared-library) +add_library(rocprofiler-sdk-attach::rocprofiler-sdk-attach-shared-library ALIAS + rocprofiler-sdk-attach-shared-library) + +target_sources( + rocprofiler-sdk-attach-shared-library + PRIVATE queue_registration.cpp code_object_registration.cpp attach.cpp table.cpp) + +target_include_directories( + rocprofiler-sdk-attach-shared-library + INTERFACE + $ + $ + $) +target_link_libraries( + rocprofiler-sdk-attach-shared-library + PRIVATE rocprofiler-sdk::rocprofiler-sdk-headers + rocprofiler-sdk::rocprofiler-sdk-build-flags + rocprofiler-sdk::rocprofiler-sdk-memcheck + rocprofiler-sdk::rocprofiler-sdk-common-library + rocprofiler-register::rocprofiler-register-headers) + +set_target_properties( + rocprofiler-sdk-attach-shared-library + PROPERTIES OUTPUT_NAME rocprofiler-sdk-attach + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR} + SOVERSION ${PROJECT_VERSION_MAJOR} + VERSION ${PROJECT_VERSION} + SKIP_BUILD_RPATH OFF + BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + DEFINE_SYMBOL attach_EXPORTS) + +install( + TARGETS rocprofiler-sdk-attach-shared-library + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT core + EXPORT rocprofiler-sdk-attach-targets) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/attach.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/attach.cpp new file mode 100644 index 00000000000..3c55638412d --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/attach.cpp @@ -0,0 +1,108 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "attach.h" +#include "code_object_registration.hpp" +#include "lib/common/defines.hpp" +#include "queue_registration.hpp" +#include "table.hpp" + +#include "lib/common/logging.hpp" + +#include +#include + +#define ROCPROFILER_ATTACH_VERSION_MAJOR ROCPROFILER_VERSION_MAJOR +#define ROCPROFILER_ATTACH_VERSION_MINOR ROCPROFILER_VERSION_MINOR +#define ROCPROFILER_ATTACH_VERSION_PATCH ROCPROFILER_VERSION_PATCH +#define ROCPROFILER_ATTACH_VERSION \ + ROCPROFILER_COMPUTE_VERSION(ROCPROFILER_ATTACH_VERSION_MAJOR, \ + ROCPROFILER_ATTACH_VERSION_MINOR, \ + ROCPROFILER_ATTACH_VERSION_PATCH) + +using rocprofiler_register_library_api_table_func_t = + decltype(::rocprofiler_register_library_api_table)*; + +ROCPROFILER_EXTERN_C_INIT + +int +rocprofiler_attach_set_api_table(const char* name, + uint64_t lib_version, + uint64_t lib_instance, + void** tables, + uint64_t num_tables, + rocprofiler_register_library_api_table_func_t register_functor) + ROCPROFILER_PUBLIC_API; + +int +rocprofiler_attach_set_api_table(const char* name, + uint64_t lib_version, + uint64_t lib_instance, + void** tables, + uint64_t num_tables, + rocprofiler_register_library_api_table_func_t register_functor) +{ + rocprofiler::common::init_logging("ROCPROFILER_ATTACH"); + + ROCP_TRACE << "rocprofiler_attach_set_api_table called for api " << name; + (void) lib_version; // unused + (void) lib_instance; // unused + + if(std::string_view{name} != "hsa") + { + ROCP_ERROR << "rocprofiler_attach_set_api_table was called with a table other than HSA"; + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + } + + ROCP_ERROR_IF(num_tables > 1) << "rocprofiler expected HSA library to pass 1 API table, not " + << num_tables; + + auto* hsa_api_table = static_cast(tables[0]); + + rocprofiler::attach::dispatch_table_init(); + + if(register_functor) + { + auto library_id = rocprofiler_register_library_indentifier_t{}; + auto attach_tables = std::array{rocprofiler::attach::get_dispatch_table()}; + register_functor("rocattach", + nullptr, + ROCPROFILER_ATTACH_VERSION, + attach_tables.data(), + attach_tables.size(), + &library_id); + } + + // Initialize all registration services in attach + rocprofiler::attach::queue_registration_init(hsa_api_table); + rocprofiler::attach::code_object_registration_init(hsa_api_table); + + return ROCPROFILER_STATUS_SUCCESS; +} + +int +rocprofiler_attach_get_version() +{ + return ROCPROFILER_VERSION; +} + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/attach.h b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/attach.h new file mode 100644 index 00000000000..5c1d1447533 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/attach.h @@ -0,0 +1,32 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +ROCPROFILER_EXTERN_C_INIT + +int +rocprofiler_attach_get_version() ROCPROFILER_API; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.cpp new file mode 100644 index 00000000000..b5b58c885ef --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.cpp @@ -0,0 +1,143 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "code_object_registration.h" +#include "code_object_registration.hpp" +#include "table.hpp" + +#include + +#include "lib/common/static_object.hpp" + +#include + +namespace +{ +using hsa_executable_freeze_t = decltype(CoreApiTable::hsa_executable_freeze_fn); +using hsa_executable_destroy_t = decltype(CoreApiTable::hsa_executable_destroy_fn); +using code_object_collection_t = std::vector; + +struct code_object_registration_t +{ + // gates access to code_objects collection + std::mutex code_objects_mutex; + code_object_collection_t code_objects; + hsa_executable_freeze_t hsa_executable_freeze_fn = nullptr; + hsa_executable_destroy_t hsa_executable_destroy_fn = nullptr; +}; + +code_object_registration_t* +get_code_object_registration() +{ + static auto*& registration = + rocprofiler::common::static_object::construct(); + return registration; +} + +hsa_status_t +executable_freeze(hsa_executable_t executable, const char* options) +{ + auto* registration = CHECK_NOTNULL(get_code_object_registration()); + auto status = registration->hsa_executable_freeze_fn(executable, options); + + if(status != HSA_STATUS_SUCCESS) return status; + + ROCP_TRACE << "adding code_object " << executable.handle; + { + std::lock_guard lg(registration->code_objects_mutex); + registration->code_objects.emplace_back(executable); + } + auto* attach_table = rocprofiler::attach::get_dispatch_table(); + if(attach_table->rocprofiler_attach_notify_new_code_object) + { + attach_table->rocprofiler_attach_notify_new_code_object(executable, nullptr); + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t +executable_destroy(hsa_executable_t executable) +{ + auto* registration = CHECK_NOTNULL(get_code_object_registration()); + ROCP_TRACE << "removing code_object " << executable.handle; + { + std::lock_guard lg(registration->code_objects_mutex); + auto pred = [&](const hsa_executable_t& a) { return a.handle == executable.handle; }; + auto itr = std::find_if( + registration->code_objects.begin(), registration->code_objects.end(), pred); + if(itr == registration->code_objects.end()) + { + ROCP_WARNING << "remove code_object could not find " << executable.handle; + } + registration->code_objects.erase(itr); + } + + return registration->hsa_executable_destroy_fn(executable); +} + +int +iterate_all_code_objects(rocprof_attach_code_object_iterator_t func, void* data) +{ + auto* registration = CHECK_NOTNULL(get_code_object_registration()); + + for(const auto& code_object : registration->code_objects) + { + func(code_object, data); + } + + return ROCPROFILER_STATUS_SUCCESS; +} + +} // namespace + +namespace rocprofiler +{ +namespace attach +{ +void +code_object_registration_init( + HsaApiTable* table) // CoreApiTable& core_table, AmdExtTable& ext_table) +{ + ROCP_TRACE << "Initializing Code Object Registration"; + auto* registration = CHECK_NOTNULL(get_code_object_registration()); + CoreApiTable& core_table = *table->core_; + + // route executable freeze and destroy to us, but also save the original entrypoint so we can + // call it + registration->hsa_executable_freeze_fn = core_table.hsa_executable_freeze_fn; + core_table.hsa_executable_freeze_fn = executable_freeze; + registration->hsa_executable_destroy_fn = core_table.hsa_executable_destroy_fn; + core_table.hsa_executable_destroy_fn = executable_destroy; +} + +} // namespace attach +} // namespace rocprofiler + +ROCPROFILER_EXTERN_C_INIT + +int +rocprofiler_attach_iterate_all_code_objects(rocprof_attach_code_object_iterator_t func, void* data) +{ + return iterate_all_code_objects(func, data); +} + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.h b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.h new file mode 100644 index 00000000000..54f946d6dda --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.h @@ -0,0 +1,39 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include + +#include + +ROCPROFILER_EXTERN_C_INIT + +typedef void (*rocprof_attach_code_object_iterator_t)(hsa_executable_t, void*); + +int +rocprofiler_attach_iterate_all_code_objects(rocprof_attach_code_object_iterator_t func, + void* data) ROCPROFILER_API; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.hpp new file mode 100644 index 00000000000..c4631af4c48 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/code_object_registration.hpp @@ -0,0 +1,39 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" + +#include + +#include + +namespace rocprofiler +{ +namespace attach +{ +void +code_object_registration_init(HsaApiTable* table); + +} // namespace attach +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.cpp new file mode 100644 index 00000000000..dcbd757a3fc --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.cpp @@ -0,0 +1,268 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "queue_registration.h" +#include "queue_registration.hpp" +#include "table.hpp" + +#include "lib/common/static_object.hpp" + +#include + +namespace +{ +using callback_t = void (*)(hsa_status_t status, hsa_queue_t* source, void* data); + +struct queue_entry_t +{ + hsa_agent_t agent = hsa_agent_t{}; + write_interceptor_t user_write_interceptor_func = nullptr; + void* user_write_interceptor_data = nullptr; +}; + +using queue_collection_t = std::unordered_map; + +struct queue_registration_t +{ + // guards access to both queues collection + std::mutex queues_mutex; + queue_collection_t queues; + + decltype(AmdExtTable::hsa_amd_queue_intercept_create_fn) hsa_amd_queue_intercept_create_fn = + nullptr; + decltype(AmdExtTable::hsa_amd_profiling_set_profiler_enabled_fn) + hsa_amd_profiling_set_profiler_enabled_fn = nullptr; + decltype(AmdExtTable::hsa_amd_queue_intercept_register_fn) hsa_amd_queue_intercept_register_fn = + nullptr; + decltype(CoreApiTable::hsa_status_string_fn) hsa_status_string_fn = nullptr; +}; + +queue_registration_t* +get_queue_registration() +{ + static auto*& registration = + rocprofiler::common::static_object::construct(); + return registration; +} + +std::string_view +get_hsa_status_string(hsa_status_t _status) +{ + auto* registration = CHECK_NOTNULL(get_queue_registration()); + + const char* _status_msg = nullptr; + return (CHECK_NOTNULL(registration->hsa_status_string_fn)(_status, &_status_msg) == + HSA_STATUS_SUCCESS && + _status_msg) + ? std::string_view{_status_msg} + : std::string_view{"(unknown HSA error)"}; +} + +#define ROCP_ATTACH_HSA_TABLE_CALL(SEVERITY, EXPR) \ + auto ROCPROFILER_VARIABLE(rocp_hsa_table_call_, __LINE__) = (EXPR); \ + ROCP_##SEVERITY##_IF(ROCPROFILER_VARIABLE(rocp_hsa_table_call_, __LINE__) != \ + HSA_STATUS_SUCCESS) \ + << #EXPR << " returned non-zero status code " \ + << ROCPROFILER_VARIABLE(rocp_hsa_table_call_, __LINE__) \ + << " :: " << get_hsa_status_string(ROCPROFILER_VARIABLE(rocp_hsa_table_call_, __LINE__)) \ + << " " + +// This is the attach library's WriteInterceptor that is provided to HSA. +// Since the interceptor function cannot be changed later, this shim is provided immediately upon +// queue creation. This shim's user data is a reference to the queue_entry_t for this queue, which +// will then by cast and used to call the user write interceptor if it is non-null. +void +write_interceptor(const void* packets, + uint64_t pkt_count, + uint64_t unused, + void* data, + hsa_amd_queue_intercept_packet_writer_t writer) +{ + ROCP_FATAL_IF(data == nullptr) << "WriteInterceptor was not passed a valid pointer"; + const auto* entry = static_cast(data); + + if(entry->user_write_interceptor_func) + { + entry->user_write_interceptor_func( + packets, pkt_count, unused, entry->user_write_interceptor_data, writer); + } + else + { + writer(packets, pkt_count); + } +} + +// HSA Intercept Functions (create_queue/destroy_queue) +hsa_status_t +create_queue(hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + callback_t callback, + void* data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t** queue) +{ + auto* registration = CHECK_NOTNULL(get_queue_registration()); + + // Create new queue in HSA + hsa_queue_t* new_queue = nullptr; + ROCP_FATAL_IF(!registration->hsa_amd_queue_intercept_create_fn || + !registration->hsa_amd_profiling_set_profiler_enabled_fn || + !registration->hsa_amd_queue_intercept_register_fn || + !registration->hsa_status_string_fn) + << "Queue registration was not initialized before create queue was called!"; + + ROCP_ATTACH_HSA_TABLE_CALL(FATAL, + registration->hsa_amd_queue_intercept_create_fn(agent, + size, + type, + callback, + data, + private_segment_size, + group_segment_size, + &new_queue)) + << "Could not create intercept queue"; + + ROCP_ATTACH_HSA_TABLE_CALL( + FATAL, registration->hsa_amd_profiling_set_profiler_enabled_fn(new_queue, true)) + << "Could not setup intercept profiler"; + + // Create and insert our queue's data entry now, as we need to provide a reference to it for the + // write_interceptor + queue_entry_t entry{}; + entry.agent = agent; + + { + std::lock_guard lg(registration->queues_mutex); + ROCP_FATAL_IF(registration->queues.count(new_queue) > 0) + << "Queue registration already contains an entry for new queue handle " << new_queue; + registration->queues.insert({new_queue, entry}); + } + auto* write_interceptor_data = &(registration->queues.at(new_queue)); + + // Pass queue_entry_t* as user data, used to directly call the user's write interceptor + ROCP_ATTACH_HSA_TABLE_CALL(FATAL, + registration->hsa_amd_queue_intercept_register_fn( + new_queue, write_interceptor, write_interceptor_data)) + << "Could not register interceptor"; + + *queue = new_queue; + + ROCP_INFO << "created attach queue for HSA agent handle " << agent.handle; + + auto* attach_table = rocprofiler::attach::get_dispatch_table(); + if(attach_table->rocprofiler_attach_notify_new_queue) + { + attach_table->rocprofiler_attach_notify_new_queue(new_queue, agent, nullptr); + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t +destroy_queue(hsa_queue_t* hsa_queue) +{ + auto* registration = get_queue_registration(); + if(registration) + { + std::lock_guard lg(registration->queues_mutex); + size_t erase_count = registration->queues.erase(hsa_queue); + ROCP_WARNING_IF(erase_count == 0) + << "Destroy queue was called for a handle that was not in queues: " << hsa_queue; + } + return HSA_STATUS_SUCCESS; +} + +int +iterate_all_queues(rocprof_attach_queue_iterator_t func, void* user_data) +{ + auto* registration = CHECK_NOTNULL(get_queue_registration()); + + std::lock_guard lg(registration->queues_mutex); + for(const auto& qr_pair : registration->queues) + { + func(qr_pair.first, qr_pair.second.agent, user_data); + } + + return ROCPROFILER_STATUS_SUCCESS; +} + +int +set_write_interceptor(hsa_queue_t* queue, write_interceptor_t func, void* data) +{ + auto* registration = CHECK_NOTNULL(get_queue_registration()); + auto qr_pair = registration->queues.find(queue); + if(qr_pair == registration->queues.end()) + { + ROCP_ERROR << "couldn't find registration to set write interceptor for queue " << queue; + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + } + qr_pair->second.user_write_interceptor_func = func; + qr_pair->second.user_write_interceptor_data = data; + return 0; +} + +} // namespace + +namespace rocprofiler +{ +namespace attach +{ +void +queue_registration_init(HsaApiTable* table) +{ + ROCP_TRACE << "Initializing Queue Registration"; + auto* registration = CHECK_NOTNULL(get_queue_registration()); + + CoreApiTable& core_table = *table->core_; + + core_table.hsa_queue_create_fn = create_queue; + core_table.hsa_queue_destroy_fn = destroy_queue; + + registration->hsa_amd_queue_intercept_create_fn = + *table->amd_ext_->hsa_amd_queue_intercept_create_fn; + registration->hsa_amd_profiling_set_profiler_enabled_fn = + *table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; + registration->hsa_amd_queue_intercept_register_fn = + *table->amd_ext_->hsa_amd_queue_intercept_register_fn; + registration->hsa_status_string_fn = *table->core_->hsa_status_string_fn; +} + +} // namespace attach +} // namespace rocprofiler + +ROCPROFILER_EXTERN_C_INIT + +int +rocprofiler_attach_iterate_all_queues(rocprof_attach_queue_iterator_t func, void* data) +{ + return iterate_all_queues(func, data); +} + +int +rocprofiler_attach_set_write_interceptor(hsa_queue_t* queue, write_interceptor_t func, void* data) +{ + return set_write_interceptor(queue, func, data); +} + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.h b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.h new file mode 100644 index 00000000000..70eba798c85 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.h @@ -0,0 +1,53 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include +#include +#include + +#include + +ROCPROFILER_EXTERN_C_INIT + +typedef void (*hsa_amd_queue_intercept_packet_writer_t)(const void*, uint64_t); +typedef void (*write_interceptor_t)(const void*, + uint64_t, + uint64_t, + void*, + hsa_amd_queue_intercept_packet_writer_t); + +typedef void (*rocprof_attach_queue_iterator_t)(hsa_queue_t*, hsa_agent_t, void*); + +int +rocprofiler_attach_iterate_all_queues(rocprof_attach_queue_iterator_t func, + void* data) ROCPROFILER_API; + +int +rocprofiler_attach_set_write_interceptor(hsa_queue_t* queue, + write_interceptor_t func, + void* data) ROCPROFILER_API; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.hpp new file mode 100644 index 00000000000..9384731560a --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/queue_registration.hpp @@ -0,0 +1,35 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" + +namespace rocprofiler +{ +namespace attach +{ +void +queue_registration_init(HsaApiTable* table); + +} // namespace attach +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.cpp new file mode 100644 index 00000000000..5fc78b21a9e --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.cpp @@ -0,0 +1,57 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "table.hpp" + +#include "lib/common/abi.hpp" +#include "lib/common/static_object.hpp" + +namespace rocprofiler +{ +namespace attach +{ +ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocAttachDispatchTable, ROCPROFILER_ATTACH_DISPATCH_TABLE_LEGNTH); + +RocAttachDispatchTable* +get_dispatch_table() +{ + static auto*& dispatch_table = + rocprofiler::common::static_object::construct(); + return dispatch_table; +} + +void +dispatch_table_init() +{ + auto* table = get_dispatch_table(); + + table->size = sizeof(RocAttachDispatchTable); + table->rocprofiler_attach_get_version = &rocprofiler_attach_get_version; + table->rocprofiler_attach_iterate_all_queues = &rocprofiler_attach_iterate_all_queues; + table->rocprofiler_attach_set_write_interceptor = &rocprofiler_attach_set_write_interceptor; + table->rocprofiler_attach_iterate_all_code_objects = + &rocprofiler_attach_iterate_all_code_objects; + table->rocprofiler_attach_notify_new_queue = nullptr; + table->rocprofiler_attach_notify_new_code_object = nullptr; +} +} // namespace attach +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.h b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.h new file mode 100644 index 00000000000..2d5b021df06 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.h @@ -0,0 +1,52 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "attach.h" +#include "code_object_registration.h" +#include "queue_registration.h" + +#define ROCATTACH_API_TABLE_VERSION_MAJOR 0 + +ROCPROFILER_EXTERN_C_INIT + +typedef int (*rocprofiler_attach_get_version_t)(); +typedef int (*rocprofiler_attach_iterate_all_queues_t)(rocprof_attach_queue_iterator_t, void*); +typedef int (*rocprofiler_attach_set_write_interceptor_t)(hsa_queue_t*, write_interceptor_t, void*); +typedef int (*rocprofiler_attach_iterate_all_code_objects_t)(rocprof_attach_code_object_iterator_t, + void*); +typedef void (*rocprofiler_attach_notify_new_queue_t)(hsa_queue_t*, hsa_agent_t, void*); +typedef void (*rocprofiler_attach_notify_new_code_object_t)(hsa_executable_t, void*); + +struct RocAttachDispatchTable +{ + uint64_t size; + rocprofiler_attach_get_version_t rocprofiler_attach_get_version; + rocprofiler_attach_iterate_all_queues_t rocprofiler_attach_iterate_all_queues; + rocprofiler_attach_set_write_interceptor_t rocprofiler_attach_set_write_interceptor; + rocprofiler_attach_iterate_all_code_objects_t rocprofiler_attach_iterate_all_code_objects; + rocprofiler_attach_notify_new_queue_t rocprofiler_attach_notify_new_queue; + rocprofiler_attach_notify_new_code_object_t rocprofiler_attach_notify_new_code_object; +}; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.hpp new file mode 100644 index 00000000000..d860e363460 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-attach/table.hpp @@ -0,0 +1,41 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "table.h" + +namespace rocprofiler +{ +namespace attach +{ +constexpr size_t ROCPROFILER_ATTACH_DISPATCH_TABLE_LEGNTH = 6; + +RocAttachDispatchTable* +get_dispatch_table(); + +void** +get_dispatch_registration_table(); + +void +dispatch_table_init(); + +} // namespace attach +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/README.md b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/README.md index 258cb03f1bb..40f45a603c5 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/README.md +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/README.md @@ -10,7 +10,9 @@ with the rocprofiler. The user through rocprofv3 script can select the options to obtain tracing and counter collection -services from the rocprofiler. +services from the rocprofiler. rocprofv3 supports both +launching new applications and attaching to existing +processes using the `--attach`/`--pid`/`-p` options. Currently, this tool supports kernel trace and the hsa-api trace. diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp index 506ccdac12a..3c8cbbeb660 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp @@ -71,6 +71,12 @@ struct att_perfcounter template void save(ArchiveT&) const; + + friend bool operator==(const att_perfcounter& lhs, const att_perfcounter& rhs) + { + return std::tie(lhs.counter_name, lhs.simd_mask) == + std::tie(rhs.counter_name, rhs.simd_mask); + } }; struct config : output_config @@ -172,11 +178,57 @@ struct config : output_config template void load(ArchiveT&) {} + + auto get_attach_invariants() const; }; #define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR)) #define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR)) +inline auto +config::get_attach_invariants() const +{ + return std::make_tuple(kernel_trace, + hsa_core_api_trace, + hsa_amd_ext_api_trace, + hsa_image_ext_api_trace, + hsa_finalizer_ext_api_trace, + marker_api_trace, + memory_copy_trace, + memory_allocation_trace, + scratch_memory_trace, + counter_collection, + hip_runtime_api_trace, + hip_compiler_api_trace, + rccl_api_trace, + rocdecode_api_trace, + rocjpeg_api_trace, + advanced_thread_trace, + att_serialize_all, + att_param_shader_engine_mask, + att_param_buffer_size, + att_param_simd_select, + att_param_target_cu, + att_library_path, + att_param_perfcounters, + att_param_perf_ctrl, + pc_sampling_method, + pc_sampling_unit, + kernel_filter_include, + kernel_filter_exclude, + kernel_filter_range, + extra_counters_contents, + counter_groups_random_seed, + counter_groups_interval, + benchmark_mode); +} + +inline bool +is_attach_invariant(const config& lhs, const config& rhs) +{ + return lhs.get_attach_invariants() == rhs.get_attach_invariants(); +} + template void att_perfcounter::save(ArchiveT& ar) const diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index 5d9a24337f1..e0fd3c99f14 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +116,7 @@ __gcov_dump(void); namespace common = ::rocprofiler::common; namespace tool = ::rocprofiler::tool; +namespace fs = ::rocprofiler::common::filesystem; extern "C" { void @@ -1767,6 +1770,47 @@ get_tracing_callbacks() return tracing_callbacks_t{use_real_callbacks}; } +int +tool_attach(rocprofiler_client_detach_t /*detach_func*/, + rocprofiler_context_id_t* context_ids, + uint64_t context_ids_length, + void* /*tool_data*/) +{ + // save the existing config for comparison + auto original_config = tool::get_config(); + + // reset config for attach (i.e. re-parse environment variables) + tool::get_config() = tool::config{}; + + // ensure the config has not changed which services were requested. + // NOTE: this is a temporary restriction + ROCP_FATAL_IF(!tool::is_attach_invariant(tool::get_config(), original_config)) + << "configuration mismatch between initial tool load and attach. rocprofv3 does not " + "support changing the set of enabled tracing services between initial load and attach. " + "After the initial attachment, it is recommended to just use `rocprofv3 --pid= [-o " + " -d ...]` to attach to a new process."; + + pid_t target_pid = getppid(); // The target process we're attaching to + pid_t tool_pid = getpid(); // The rocprofv3 tool process + ROCP_INFO << "Attach mode: Setting process_id to target PID " << target_pid + << " (tool PID: " << tool_pid << ")"; + tool_metadata->set_process_id(target_pid, 0); // Set target as main process + + for(uint64_t i = 0; i < context_ids_length; ++i) + { + if(int status = 0; + rocprofiler_context_is_active(context_ids[i], &status) == ROCPROFILER_STATUS_SUCCESS && + status == 0) + { + ROCP_INFO << "Attach mode: starting context ID " << context_ids[i].handle; + ROCPROFILER_CALL(rocprofiler_start_context(context_ids[i]), + "failed to start received context"); + } + } + + return 0; +} + int tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { @@ -2224,6 +2268,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) } tool_metadata->set_process_id(getpid(), getppid()); + // set_process_id should set process_start_ns unless it cannot read from /proc//stat if(tool_metadata->process_start_ns == 0) rocprofiler_get_timestamp(&(tool_metadata->process_start_ns)); @@ -2264,10 +2309,16 @@ api_timestamps_callback(rocprofiler_intercept_table_t table_id, }); } +enum class cleanup_mode +{ + destroy, + reset, +}; + using stats_data_t = tool::stats_data_t; using stats_entry_t = tool::stats_entry_t; using domain_stats_vec_t = tool::domain_stats_vec_t; -using cleanup_vec_t = std::vector>; +using cleanup_vec_t = std::vector>; struct output_data { @@ -2366,7 +2417,26 @@ generate_output(tool::buffered_output& output_v, domain_stats_vec_t& contributions_v, cleanup_vec_t& cleanups_v) { - cleanups_v.emplace_back([&output_v]() { output_v.destroy(); }); + cleanups_v.emplace_back([&output_v](cleanup_mode _mode) { + switch(_mode) + { + case cleanup_mode::destroy: + { + // ROCP_INFO << fmt::format("destroying buffer for {}", + // get_domain_column_name(DomainT)); + output_v.destroy(); + return; + } + case cleanup_mode::reset: + { + // ROCP_INFO << fmt::format("resetting buffer for {}", + // get_domain_column_name(DomainT)); + output_v.reset(); + return; + } + } + ROCP_CI_LOG(WARNING) << fmt::format("invalid cleanup mode {}", static_cast(_mode)); + }); if(!output_v) return; @@ -2402,23 +2472,9 @@ generate_output(tool::buffered_output& output_v, } void -tool_fini(void* /*tool_data*/) +generate_output(cleanup_mode _cleanup_mode) { - static bool _first = true; - if(!_first) return; - _first = false; - - client_identifier = nullptr; - client_finalizer = nullptr; - - auto _fini_timer = common::simple_timer{"[rocprofv3] tool finalization"}; - - if(tool_metadata->process_end_ns == 0) - rocprofiler_get_timestamp(&(tool_metadata->process_end_ns)); - - flush(); - rocprofiler_stop_context(get_client_ctx()); - flush(); + auto _output_gen_timer = common::simple_timer{"[rocprofv3] output generation"}; auto kernel_dispatch_output = rocprofiler::tool::kernel_dispatch_buffered_output_ext_t{tool::get_config().kernel_trace}; @@ -2457,10 +2513,10 @@ tool_fini(void* /*tool_data*/) auto contributions = domain_stats_vec_t{}; auto cleanups = cleanup_vec_t{}; - auto run_cleanup = [&cleanups]() { + auto run_cleanup = [&cleanups, _cleanup_mode]() { for(const auto& itr : cleanups) { - if(itr) itr(); + if(itr) itr(_cleanup_mode); } cleanups.clear(); }; @@ -2645,6 +2701,43 @@ tool_fini(void* /*tool_data*/) } run_cleanup(); +} + +void +tool_detach(void* /*tool_data*/) +{ + auto _detach_timer = common::simple_timer{"[rocprofv3] tool detachment"}; + + // Flush all buffers (same as tool_fini) + flush(); + + // Set process end timestamp for this detachment cycle + if(tool_metadata->process_end_ns == 0) + rocprofiler_get_timestamp(&(tool_metadata->process_end_ns)); + + generate_output(cleanup_mode::reset); +} + +void +tool_fini(void* /*tool_data*/) +{ + static bool _first = true; + if(!_first) return; + _first = false; + + client_identifier = nullptr; + client_finalizer = nullptr; + + auto _fini_timer = common::simple_timer{"[rocprofv3] tool finalization"}; + + if(tool_metadata->process_end_ns == 0) + rocprofiler_get_timestamp(&(tool_metadata->process_end_ns)); + + flush(); + rocprofiler_stop_context(get_client_ctx()); + flush(); + + generate_output(cleanup_mode::destroy); if(destructors) { @@ -2654,6 +2747,14 @@ tool_fini(void* /*tool_data*/) destructors = nullptr; } + // remove the attach arguments file if it exists + if(auto attach_args_fname = fmt::format("/tmp/rocprofv3_attach_{}.pkl", getpid()); + fs::exists(attach_args_fname)) + { + ROCP_INFO << "removing attach arguments file: " << attach_args_fname; + fs::remove(attach_args_fname); + } + #if defined(CODECOV) && CODECOV > 0 __gcov_dump(); #endif @@ -3072,13 +3173,29 @@ rocprofiler_configure(uint32_t version, ROCP_INFO << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch << " (" << runtime_version << ")"; - // create configure data + // create configure data using experimental struct with attach/detach support static auto cfg = rocprofiler_tool_configure_result_t{ sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr}; // return pointer to configure data return &cfg; - // data passed around all the callbacks +} + +rocprofiler_tool_configure_attach_result_t* +rocprofiler_configure_attach(uint32_t /*version*/, + const char* /*runtime_version*/, + uint32_t /*priority*/, + rocprofiler_client_id_t* /*id*/) +{ + // This function is called right after rocprofiler_configure with the same parameters. + // The data returned is only used when attaching to a running process. + + // create configure data using experimental struct with attach/detach support + static auto cfg = rocprofiler_tool_configure_attach_result_t{ + sizeof(rocprofiler_tool_configure_attach_result_t), &tool_attach, &tool_detach, nullptr}; + + // return pointer to configure data + return &cfg; } void diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt index a3e84960c4a..252e38e884c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt @@ -8,6 +8,7 @@ set(ROCPROFILER_LIB_HEADERS internal_threading.hpp ompt.hpp registration.hpp runtime_initialization.hpp) set(ROCPROFILER_LIB_SOURCES agent.cpp + attach.cpp buffer.cpp buffer_tracing.cpp callback_tracing.cpp diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/attach.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/attach.cpp new file mode 100644 index 00000000000..0c4315c8eff --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/attach.cpp @@ -0,0 +1,50 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/rocprofiler-sdk/registration.hpp" + +#include +#include + +ROCPROFILER_EXTERN_C_INIT + +rocprofiler_status_t +rocprofiler_attach(void) ROCPROFILER_API; + +rocprofiler_status_t +rocprofiler_detach(void) ROCPROFILER_API; + +rocprofiler_status_t +rocprofiler_attach(void) +{ + rocprofiler::registration::attach(); + return ROCPROFILER_STATUS_SUCCESS; +} + +rocprofiler_status_t +rocprofiler_detach(void) +{ + rocprofiler::registration::detach(); + return ROCPROFILER_STATUS_SUCCESS; +} + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.cpp index fca2ff20071..94eea074a93 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.cpp @@ -799,12 +799,12 @@ initialize_hip_binary_data() return is_initialized; } +// Contains all operations for tracing we do after a successful executable_freeze +// Can be called directly for code objects which have already been frozen +// Used for attachment to capture code objects created before attachment time hsa_status_t -executable_freeze(hsa_executable_t executable, const char* options) +executable_freeze_internal(hsa_executable_t executable) { - hsa_status_t status = CHECK_NOTNULL(get_freeze_function())(executable, options); - if(status != HSA_STATUS_SUCCESS) return status; - // before iterating code-object populate the host function map from registered binary bool is_initialized = initialize_hip_binary_data(); ROCP_INFO_IF(!is_initialized) << "hip mapping data not initialized"; @@ -953,6 +953,14 @@ executable_freeze(hsa_executable_t executable, const char* options) return HSA_STATUS_SUCCESS; } +hsa_status_t +executable_freeze(hsa_executable_t executable, const char* options) +{ + hsa_status_t status = CHECK_NOTNULL(get_freeze_function())(executable, options); + if(status != HSA_STATUS_SUCCESS) return status; + return rocprofiler::code_object::executable_freeze_internal(executable); +} + hsa_status_t executable_destroy(hsa_executable_t executable) { @@ -1133,6 +1141,28 @@ shutdown(hsa_executable_t executable) return _unloaded; } + +RocAttachDispatchTable** +get_attach_table() +{ + static auto* table = common::static_object::construct(); + return table; +} + +void +iterate_attach_code_object(hsa_executable_t executable, void*) +{ + executable_freeze_internal(executable); +} + +void +load_attach_code_objects() +{ + auto* attach_table = CHECK_NOTNULL(*(get_attach_table())); + attach_table->rocprofiler_attach_iterate_all_code_objects(iterate_attach_code_object, nullptr); + attach_table->rocprofiler_attach_notify_new_code_object = iterate_attach_code_object; +} + } // namespace void @@ -1150,14 +1180,21 @@ initialize(HsaApiTable* table) if(_status == HSA_STATUS_SUCCESS) { - get_freeze_function() = CHECK_NOTNULL(core_table.hsa_executable_freeze_fn); - get_destroy_function() = CHECK_NOTNULL(core_table.hsa_executable_destroy_fn); - core_table.hsa_executable_freeze_fn = executable_freeze; - core_table.hsa_executable_destroy_fn = executable_destroy; - ROCP_FATAL_IF(get_freeze_function() == core_table.hsa_executable_freeze_fn) - << "infinite recursion"; - ROCP_FATAL_IF(get_destroy_function() == core_table.hsa_executable_destroy_fn) - << "infinite recursion"; + if(*(get_attach_table())) + { + load_attach_code_objects(); + } + else + { + get_freeze_function() = CHECK_NOTNULL(core_table.hsa_executable_freeze_fn); + get_destroy_function() = CHECK_NOTNULL(core_table.hsa_executable_destroy_fn); + core_table.hsa_executable_freeze_fn = executable_freeze; + core_table.hsa_executable_destroy_fn = executable_destroy; + ROCP_FATAL_IF(get_freeze_function() == core_table.hsa_executable_freeze_fn) + << "infinite recursion"; + ROCP_FATAL_IF(get_destroy_function() == core_table.hsa_executable_destroy_fn) + << "infinite recursion"; + } } } @@ -1217,5 +1254,18 @@ iterate_loaded_code_objects(code_object_iterator_t&& func) }, std::move(func)); } + +void +initialize(RocAttachDispatchTable* attach_table) +{ + // We need to save the attach table for later, when the code object module receives the HSA + // table and is initialized. We must get the attach table before HSA for correct behavior. This + // is guaranteed by rocprofiler-register. + ROCP_ERROR_IF(get_freeze_function()) + << "Code object module was initialized before attach table was provided. Future HSA code " + "objects may not be instrumented correctly."; + *(get_attach_table()) = attach_table; +} + } // namespace code_object } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.hpp index 759f2849751..a74ea595dbb 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.hpp @@ -25,6 +25,8 @@ #include "lib/rocprofiler-sdk/code_object/hsa/code_object.hpp" #include "lib/rocprofiler-sdk/code_object/hsa/kernel_symbol.hpp" +#include "lib/rocprofiler-sdk-attach/table.h" + #include #include @@ -64,5 +66,9 @@ initialize(HipCompilerDispatchTable* table); void finalize(); + +void +initialize(RocAttachDispatchTable* table); + } // namespace code_object } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp index 36a7d2d6b9c..6ae94b3f4a3 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp @@ -397,6 +397,22 @@ stop_context(rocprofiler_context_id_t idx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; // compare exchange failed } +context_id_array_t +get_client_contexts(rocprofiler_client_id_t id) +{ + auto _data = context_id_array_t{}; + if(!get_registered_contexts_impl()) return _data; + + for(auto& itr : *get_registered_contexts_impl()) + { + if(itr->client_idx == id.handle) + { + _data.emplace_back(rocprofiler_context_id_t{.handle = itr->context_idx}); + } + } + return _data; +} + rocprofiler_status_t stop_client_contexts(rocprofiler_client_id_t client_id) { diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp index 5e99d575caf..13b2d9d2a61 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp @@ -172,7 +172,8 @@ start_context(rocprofiler_context_id_t id); rocprofiler_status_t stop_context(rocprofiler_context_id_t id); -using context_array_t = common::container::small_vector; +using context_array_t = common::container::small_vector; +using context_id_array_t = common::container::small_vector; context* get_mutable_registered_context(rocprofiler_context_id_t id); @@ -206,6 +207,9 @@ get_active_contexts(context_filter_t filter = default_context_filter); const context* get_active_context(rocprofiler_context_id_t id); +context_id_array_t +get_client_contexts(rocprofiler_client_id_t id); + /// \brief disable the contexturation. rocprofiler_status_t stop_client_contexts(rocprofiler_client_id_t id); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp index 284d0700ff7..1d04dd1e410 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp @@ -134,6 +134,9 @@ get_stream_id(hipStream_t stream) << fmt::format("failed to retrieve stream ID for hipStream_t ({}) in {}", sdk::utility::as_hex(static_cast(_stream)), __FILE__); + // Stream may not be tracked during attachment. You should use queue grouping with + // attachment + if(_data.count(_stream) == 0) return add_stream(_stream); return _data.at(_stream); }, stream); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp index 7e431fbf2fe..82955da324c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -600,6 +600,55 @@ Queue::Queue(const AgentCache& agent, *queue = _intercept_queue; } +Queue::Queue( + const AgentCache& agent, + CoreApiTable core_api, + AmdExtTable ext_api, + hsa_queue_t* queue, + set_write_interceptor_t set_write_interceptor) // NOLINT(performance-unnecessary-value-param) +: _core_api(core_api) +, _ext_api(ext_api) +, _agent(agent) +, _intercept_queue(queue) +{ + if(!context::get_registered_contexts([](const context::context* ctx) { + return (ctx->counter_collection || ctx->device_counter_collection || + ctx->dispatch_thread_trace || ctx->device_thread_trace); + }).empty()) + { + CHECK(_agent.cpu_pool().handle != 0); + CHECK(_agent.get_hsa_agent().handle != 0); + + // Set state of the queue to allow profiling + aql::set_profiler_active_on_queue( + _agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) { + hsa_signal_t completion; + create_signal(0, &completion); + pkt.ext_amd_aql_pm4.completion_signal = completion; + counters::submitPacket(_intercept_queue, &pkt); + constexpr auto timeout_hint = + std::chrono::duration_cast(std::chrono::seconds{1}); + if(core_api.hsa_signal_wait_relaxed_fn(completion, + HSA_SIGNAL_CONDITION_EQ, + 0, + timeout_hint.count(), + HSA_WAIT_STATE_ACTIVE) != 0) + { + ROCP_FATAL << "Could not set agent to be profiled"; + } + core_api.hsa_signal_destroy_fn(completion); + }); + } + + set_write_interceptor(WriteInterceptor, this); + + create_signal(0, &ready_signal); + create_signal(0, &block_signal); + create_signal(0, &_active_kernels); + _core_api.hsa_signal_store_screlease_fn(ready_signal, 0); + _core_api.hsa_signal_store_screlease_fn(_active_kernels, 0); +} + Queue::~Queue() { sync(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp index e913d090e1b..3c2230c6760 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp @@ -98,6 +98,11 @@ class Queue kernel_dispatch::profiling_time)>; using callback_map_t = std::unordered_map>; + // Used when creating a Queue from a previously created intercept queue. + // When the constructor with this parameter type is called, the provided function will be called + // with the intended Queue WriteInterceptor function (hsa_amd_queue_intercept_handler). + using set_write_interceptor_t = std::function; + Queue(const AgentCache& agent, CoreApiTable table); Queue(const AgentCache& agent, uint32_t size, @@ -109,6 +114,13 @@ class Queue CoreApiTable core_api, AmdExtTable ext_api, hsa_queue_t** queue); + + // Used when creating a Queue from a previously created intercept queue. + Queue(const AgentCache& agent, + CoreApiTable core_api, + AmdExtTable ext_api, + hsa_queue_t* queue, + set_write_interceptor_t set_write_interceptor); virtual ~Queue(); const hsa_queue_t* intercept_queue() const { return _intercept_queue; }; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index 557cfd1dd56..67d4ab2f5bb 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -67,7 +67,7 @@ create_queue(hsa_agent_t agent, serializer.add_queue(queue, *new_queue); }); controller->add_queue(*queue, std::move(new_queue)); - + ROCP_INFO << "created queue for HSA agent handle " << agent.handle; return HSA_STATUS_SUCCESS; } } @@ -143,6 +143,61 @@ constexpr rocprofiler_agent_t default_agent = .logical_node_type_id = 0, .runtime_visibility = {0, 0, 0, 0, 0}, .uuid = static_cast(agent::uuid_view_t{})}; + +RocAttachDispatchTable** +get_attach_table() +{ + static auto* table = common::static_object::construct(); + return table; +} + +void +queue_controller_iterate_attach_queue(hsa_queue_t* queue, hsa_agent_t agent, void*) +{ + auto* qc = CHECK_NOTNULL(get_queue_controller()); + bool registration_consumed = false; + + auto set_write_interceptor = [&queue](write_interceptor_t wi, void* data) { + CHECK_NOTNULL(*(get_attach_table())) + ->rocprofiler_attach_set_write_interceptor(queue, wi, data); + }; + + for(const auto& [_, agent_info] : qc->get_supported_agents()) + { + if(agent_info.get_hsa_agent().handle == agent.handle) + { + auto new_queue = std::make_unique(agent_info, + qc->get_core_table(), + qc->get_ext_table(), + queue, + set_write_interceptor); + + qc->serializer(new_queue.get()).wlock([&](auto& serializer) { + serializer.add_queue(&queue, *new_queue); + }); + qc->add_queue(queue, std::move(new_queue)); + registration_consumed = true; + ROCP_INFO << "Adding queue from queue registration for HSA agent handle " + << agent.handle; + break; + } + } + if(!registration_consumed) + { + ROCP_FATAL << "Could not find agent " << agent.handle << " for queue registration"; + } +} + +void +queue_controller_load_attach_queues() +{ + auto* attach_table = CHECK_NOTNULL(*(get_attach_table())); + + attach_table->rocprofiler_attach_iterate_all_queues(queue_controller_iterate_attach_queue, + nullptr); + attach_table->rocprofiler_attach_notify_new_queue = queue_controller_iterate_attach_queue; +} + } // namespace void @@ -260,8 +315,18 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table) if(enable_queue_intercept()) { - core_table.hsa_queue_create_fn = hsa::create_queue; - core_table.hsa_queue_destroy_fn = hsa::destroy_queue; + if(*(get_attach_table())) + { + // Attach table was previously registered, so we need to + // - Load and instrument queues that the attach library captured + // - NOT instrument the HSA API as the attach library has already done so + queue_controller_load_attach_queues(); + } + else + { + core_table.hsa_queue_create_fn = hsa::create_queue; + core_table.hsa_queue_destroy_fn = hsa::destroy_queue; + } } } @@ -480,5 +545,21 @@ queue_controller_fini() if(get_queue_controller()) get_queue_controller()->iterate_queues([](const Queue* _queue) { _queue->sync(); }); } + +void +queue_controller_init(RocAttachDispatchTable* attach_table) +{ + // We need to save the attach table for later, when the queue controller receives the HSA table + // and is initialized. We must get the attach table before HSA for correct behavior. This is + // guaranteed by rocprofiler-register. + if(get_queue_controller()) + { + ROCP_ERROR_IF(get_queue_controller()->get_core_table().version.major_id != 0) + << "Queue controller was initialized before attach table was provided. Future queues " + "may not be instrumented correctly."; + } + *(get_attach_table()) = attach_table; +} + } // namespace hsa } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp index e07ec3a62a5..5eb9a13e2a8 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp @@ -25,6 +25,8 @@ #include "lib/rocprofiler-sdk/hsa/profile_serializer.hpp" #include "lib/rocprofiler-sdk/hsa/queue.hpp" +#include "lib/rocprofiler-sdk-attach/table.h" + #include #include @@ -135,6 +137,9 @@ queue_controller_fini(); void queue_controller_sync(); +void +queue_controller_init(RocAttachDispatchTable* table); + void profiler_serializer_kernel_completion_signal(hsa_signal_t queue_block_signal); } // namespace hsa diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/scratch_memory.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/scratch_memory.cpp index e7a5ba063cd..f97beed381b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/scratch_memory.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/scratch_memory.cpp @@ -473,8 +473,10 @@ impl(Args... args) found_agent = true; } }); - - ROCP_FATAL_IF(!found_agent) << fmt::format( + // Changed to debug due to rocprofiler attachment feature. In some cases, the queue map for + // the iterate queues function is empty since the rocprofiler wasn't present when the queue + // data was gathered + ROCP_DFATAL_IF(!found_agent) << fmt::format( "Scratch memory tracing: Could not find a valid agent for queue id {}", hsa_queue->id); return _agent_id; }; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index d254fb229b1..215c9fcd4b1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -54,6 +54,7 @@ #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include +#include #include #include #include @@ -172,6 +173,19 @@ get_status() return _v; } +struct attach_status +{ + bool has_attach_table = false; + bool is_attached = false; +}; + +auto* +get_attach_status() +{ + static auto*& _v = common::static_object::construct(false); + return _v; +} + auto& get_invoked_configures() { @@ -213,7 +227,11 @@ get_link_map() struct client_library { client_library() = default; - ~client_library() { delete configure_result; } + ~client_library() + { + delete configure_result; + delete configure_attach_result; + } client_library(const client_library&) = delete; client_library(client_library&&) noexcept = default; @@ -221,12 +239,14 @@ struct client_library client_library& operator=(const client_library&) = delete; client_library& operator=(client_library&&) noexcept = delete; - std::string name = {}; - void* dlhandle = nullptr; - decltype(::rocprofiler_configure)* configure_func = nullptr; - rocprofiler_tool_configure_result_t* configure_result = nullptr; - rocprofiler_client_id_t internal_client_id = {}; - rocprofiler_client_id_t mutable_client_id = {}; + std::string name = {}; + void* dlhandle = nullptr; + decltype(::rocprofiler_configure)* configure_func = nullptr; + decltype(::rocprofiler_configure_attach)* configure_attach_func = nullptr; + rocprofiler_tool_configure_result_t* configure_result = nullptr; + rocprofiler_tool_configure_attach_result_t* configure_attach_result = nullptr; + rocprofiler_client_id_t internal_client_id = {}; + rocprofiler_client_id_t mutable_client_id = {}; }; using client_library_vec_t = std::vector>; @@ -245,16 +265,20 @@ find_clients() return true; }; - auto emplace_client = [&data, priority_offset]( - std::string_view _name, - void* _dlhandle, - auto* _cfg_func) -> std::optional& { + auto emplace_client = + [&data, priority_offset]( + std::string_view _name, + void* _dlhandle, + auto* _cfg_func, + rocprofiler_configure_attach_func_t _attach_func) -> std::optional& { constexpr auto client_id_size = sizeof(rocprofiler_client_id_t); uint32_t _prio = priority_offset + data.size(); return data.emplace_back( client_library{std::string{_name}, _dlhandle, _cfg_func, + _attach_func, + nullptr, nullptr, rocprofiler_client_id_t{client_id_size, nullptr, _prio}, rocprofiler_client_id_t{client_id_size, nullptr, _prio}}); @@ -266,10 +290,16 @@ find_clients() return _sym; }; + auto rocprofiler_configure_attach_dlsym = [](auto _handle) { + decltype(::rocprofiler_configure_attach)* _sym = nullptr; + *(void**) (&_sym) = dlsym(_handle, "rocprofiler_configure_attach"); + return _sym; + }; + if(get_forced_configure() && is_unique_configure_func(get_forced_configure())) { ROCP_INFO << "adding forced configure"; - emplace_client("(forced)", nullptr, get_forced_configure()); + emplace_client("(forced)", nullptr, get_forced_configure(), nullptr); } auto get_env_libs = []() { @@ -330,6 +360,7 @@ find_clients() ROCP_INFO << "[ROCP_TOOL_LIBRARIES] '" << itr << "' is not already loaded, doing a local lazy dlopen..."; handle = dlopen(itr.c_str(), RTLD_LOCAL | RTLD_LAZY); + ROCP_INFO << "[ROCP_TOOL_LIBRARIES] dlopen result: " << handle; } if(!handle) @@ -348,27 +379,31 @@ find_clients() if(handle) { - auto _sym = rocprofiler_configure_dlsym(handle); + auto _sym = rocprofiler_configure_dlsym(handle); + auto _attach_sym = rocprofiler_configure_attach_dlsym(handle); // FATAL bc they explicitly said this was a tool library ROCP_CI_LOG_IF(WARNING, !_sym) << "[ROCP_TOOL_LIBRARIES] rocprofiler-sdk tool library '" << itr << "' did not contain rocprofiler_configure symbol (search method: dlsym)"; - if(_sym && is_unique_configure_func(_sym)) emplace_client(itr, handle, _sym); + if(_sym && is_unique_configure_func(_sym)) + emplace_client(itr, handle, _sym, _attach_sym); } } } if(rocprofiler_configure && is_unique_configure_func(rocprofiler_configure)) - emplace_client("unknown", nullptr, rocprofiler_configure); + emplace_client("unknown", nullptr, rocprofiler_configure, nullptr); - auto _default_configure = rocprofiler_configure_dlsym(RTLD_DEFAULT); - auto _next_configure = rocprofiler_configure_dlsym(RTLD_NEXT); + auto _default_configure = rocprofiler_configure_dlsym(RTLD_DEFAULT); + auto _next_configure = rocprofiler_configure_dlsym(RTLD_NEXT); + auto _default_configure_attach = rocprofiler_configure_attach_dlsym(RTLD_DEFAULT); + auto _next_configure_attach = rocprofiler_configure_attach_dlsym(RTLD_NEXT); if(_default_configure && is_unique_configure_func(_default_configure)) - emplace_client("(RTLD_DEFAULT)", nullptr, _default_configure); + emplace_client("(RTLD_DEFAULT)", nullptr, _default_configure, _default_configure_attach); if(_next_configure && is_unique_configure_func(_next_configure)) - emplace_client("(RTLD_NEXT)", nullptr, _next_configure); + emplace_client("(RTLD_NEXT)", nullptr, _next_configure, _next_configure_attach); // if there are two "rocprofiler_configures", we need to trigger a search of all the shared // libraries @@ -404,7 +439,8 @@ find_clients() void* handle = dlopen(itr.c_str(), RTLD_LAZY | RTLD_NOLOAD); ROCP_ERROR_IF(handle == nullptr) << "error dlopening " << itr; - auto* _sym = rocprofiler_configure_dlsym(handle); + auto* _sym = rocprofiler_configure_dlsym(handle); + auto* _attach_sym = rocprofiler_configure_attach_dlsym(handle); // symbol not found if(!_sym) @@ -430,7 +466,7 @@ find_clients() } else if(is_unique_configure_func(_sym)) { - auto& entry = emplace_client(itr, handle, _sym); + auto& entry = emplace_client(itr, handle, _sym, _attach_sym); entry->internal_client_id.name = entry->name.c_str(); } } @@ -521,6 +557,21 @@ invoke_client_configures() if(_result) { itr->configure_result = new rocprofiler_tool_configure_result_t{*_result}; + + if(itr->configure_attach_func) + { + auto* _attach_result = + itr->configure_attach_func(ROCPROFILER_VERSION, + ROCPROFILER_VERSION_STRING, + itr->internal_client_id.handle - get_client_offset(), + &itr->mutable_client_id); + + if(_attach_result) + { + itr->configure_attach_result = + new rocprofiler_tool_configure_attach_result_t{*_attach_result}; + } + } } else { @@ -584,6 +635,84 @@ invoke_client_finalizers() return true; } +rocprofiler_status_t +invoke_client_attaches() +{ + ROCP_INFO << "Calling tool_attach for all registered clients. # of clients: " + << get_num_clients(); + + if(!get_clients()) + { + ROCP_INFO << "No registered clients to attach"; + return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE; + } + + auto ret = ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; + for(auto& itr : *get_clients()) + { + if(itr && itr->configure_attach_result && itr->configure_attach_result->tool_attach) + { + auto _contexts = context::get_client_contexts(itr->internal_client_id); + + ROCP_INFO << fmt::format( + "Client {} is attaching... Number of contexts: {}", itr->name, _contexts.size()); + + itr->configure_attach_result->tool_attach(nullptr, + _contexts.data(), + _contexts.size(), + itr->configure_attach_result->tool_data); + + ret = ROCPROFILER_STATUS_SUCCESS; + } + else if(itr) + { + ROCP_INFO << "Client " << itr->name << " does not have tool_attach function"; + } + } + + return ret; +} + +rocprofiler_status_t +invoke_client_detaches() +{ + ROCP_INFO << "Calling tool_detach for all registered clients. # of clients: " + << get_num_clients(); + + if(!get_clients()) + { + ROCP_INFO << "No registered clients to detach"; + return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE; + } + + auto ret = ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; + for(auto& itr : *get_clients()) + { + if(itr && itr->configure_attach_result && itr->configure_attach_result->tool_detach) + { + context::stop_client_contexts(itr->internal_client_id); + + hsa::async_copy_sync(); + hsa::queue_controller_sync(); + pc_sampling::service_sync(); + + auto _fini_status = get_fini_status(); + if(_fini_status == 0) set_fini_status(-1); + itr->configure_attach_result->tool_detach(itr->configure_attach_result->tool_data); + if(_fini_status == 0) set_fini_status(_fini_status); + context::deactivate_client_contexts(itr->internal_client_id); + + ret = ROCPROFILER_STATUS_SUCCESS; + } + else if(itr) + { + ROCP_INFO << "Client " << itr->name << " does not have tool_detach function"; + } + } + + return ret; +} + void invoke_client_finalizer(rocprofiler_client_id_t client_id) { @@ -779,6 +908,18 @@ finalize() __gcov_dump(); #endif } + +rocprofiler_status_t +attach() +{ + return invoke_client_attaches(); +} + +rocprofiler_status_t +detach() +{ + return invoke_client_detaches(); +} } // namespace registration } // namespace rocprofiler @@ -1082,6 +1223,21 @@ rocprofiler_set_api_table(const char* name, rocprofiler::intercept_table::notify_intercept_table_registration( ROCPROFILER_ROCJPEG_TABLE, lib_version, lib_instance, std::make_tuple(rocjpeg_api)); } + else if(std::string_view{name} == "rocattach") + { + ROCP_ERROR_IF(num_tables > 1) + << "rocprofiler expected rocprofiler attach library to pass 1 API table, not " + << num_tables; + + auto* rocattach_api = static_cast(tables[0]); + + // unlike other APIs, we do not offer tracing for our own attach library + // forward the table to the relevant code sections, then move on + rocprofiler::hsa::queue_controller_init(rocattach_api); + rocprofiler::code_object::initialize(rocattach_api); + + rocprofiler::registration::get_attach_status()->has_attach_table = true; + } else { return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.hpp index d70bc821d16..e82ea07611a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.hpp @@ -24,6 +24,7 @@ #include #include "lib/common/defines.hpp" +#include "rocprofiler-sdk/fwd.h" #include #include @@ -38,6 +39,13 @@ rocprofiler_set_api_table(const char* name, uint64_t lib_instance, void** tables, uint64_t num_tables) ROCPROFILER_PUBLIC_API; + +// functions for dynamic attach/detach control +void +rocprofiler_call_client_reattach() ROCPROFILER_PUBLIC_API; + +void +rocprofiler_call_client_detach() ROCPROFILER_PUBLIC_API; } namespace rocprofiler @@ -71,5 +79,13 @@ set_init_status(int); void set_fini_status(int); + +// call tool_reattach function for all registered clients +rocprofiler_status_t +attach(); + +// call tool_detach function for all registered clients +rocprofiler_status_t +detach(); } // namespace registration } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/CMakeLists.txt new file mode 100644 index 00000000000..a4580e5eac9 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/CMakeLists.txt @@ -0,0 +1,49 @@ +# MIT License +# +# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +rocprofiler_activate_clang_tidy() + +add_library(rocprofv3-attach SHARED) +target_sources(rocprofv3-attach PRIVATE rocprofv3_attach.cpp ptrace_session.cpp) + +target_link_libraries( + rocprofv3-attach + PRIVATE rocprofiler-sdk::rocprofiler-sdk-shared-library + rocprofiler-sdk::rocprofiler-sdk-headers + rocprofiler-sdk::rocprofiler-sdk-build-flags + rocprofiler-sdk::rocprofiler-sdk-common-library + rocprofiler-sdk::rocprofiler-sdk-cereal) + +set_target_properties( + rocprofv3-attach + PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/rocprofiler-sdk + SOVERSION ${PROJECT_VERSION_MAJOR} + VERSION ${PROJECT_VERSION} + BUILD_RPATH "\$ORIGIN:\$ORIGIN/.." + INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..") + +install( + TARGETS rocprofv3-attach + DESTINATION ${CMAKE_INSTALL_LIBDIR}/rocprofiler-sdk + COMPONENT tools + EXPORT rocprofiler-sdk-tool-targets) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/details/filesystem.hpp b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/details/filesystem.hpp new file mode 100644 index 00000000000..c6615b4de77 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/details/filesystem.hpp @@ -0,0 +1,48 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#if defined __has_include +# if __has_include() +# include +# endif +#endif + +#if defined(__cpp_lib_filesystem) +# define ROCPROFILER_REGISTER_HAS_CPP_LIB_FILESYSTEM 1 +#else +# if defined __has_include +# if __has_include() +# include +# endif +# endif +#endif + +#if defined(ROCPROFILER_REGISTER_HAS_CPP_LIB_FILESYSTEM) && \ + ROCPROFILER_REGISTER_HAS_CPP_LIB_FILESYSTEM > 0 +# include +namespace fs = ::std::filesystem; // NOLINT +#else +# include +namespace fs = ::std::experimental::filesystem; // NOLINT +#endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/ptrace_session.cpp b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/ptrace_session.cpp new file mode 100644 index 00000000000..d2579dbebad --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/ptrace_session.cpp @@ -0,0 +1,885 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "ptrace_session.hpp" +#include "details/filesystem.hpp" + +#include "lib/common/logging.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define AT_ENTRY 9 /* Entry point of program */ + +// ptrace memory operations use "word length" which is dependent on system architecture. +static_assert(sizeof(void*) == 8); + +// In addition, this file uses x64 assembly which is inherently platform dependent. +#ifndef __x86_64__ +static_assert(false); +#endif + +namespace +{ +/* Copied from glibc's elf.h. */ +typedef struct +{ + uint64_t a_type; /* Entry type */ + union + { + uint64_t a_val; /* Integer value */ + /* We use to have pointer elements added here. We cannot do that, + though, since it does not work when using 32-bit definitions + on 64-bit platforms and vice versa. */ + } a_un; +} Elf64_auxv_t; + +// Very limited list of operations for logging only. +constexpr const char* +ptrace_op_name(__ptrace_request op) +{ + switch(op) + { + case PTRACE_SEIZE: return "PTRACE_SEIZE"; + case PTRACE_DETACH: return "PTRACE_DETACH"; + case PTRACE_POKEDATA: return "PTRACE_POKEDATA"; + case PTRACE_PEEKDATA: return "PTRACE_PEEKDATA"; + case PTRACE_INTERRUPT: return "PTRACE_INTERRUPT"; + case PTRACE_GETREGS: return "PTRACE_GETREGS"; + case PTRACE_SETREGS: return "PTRACE_SETREGS"; + case PTRACE_CONT: return "PTRACE_CONT"; + default: return "unknown op"; + } +} + +// Boilerplate around ptrace calls. +// If an error occurs, logs the error and returns false. +#define PTRACE_CALL(op, pid, addr, data) \ + ROCP_TRACE << "ptrace call params(" << ptrace_op_name(op) << "(" << op << "), " << pid << ", " \ + << (uint64_t) addr << ", " << (uint64_t) data << ")"; \ + if(errno = 0, ptrace(op, pid, addr, data); errno != 0) \ + { \ + ROCP_ERROR << "ptrace call failed. errno: " << errno << " - " << strerror(errno) \ + << " params(" << ptrace_op_name(op) << "(" << op << "), " << pid << ", " \ + << (uint64_t) addr << ", " << (uint64_t) data << ")"; \ + return false; \ + } + +// Changes the order of parameters for PEEKDATA so it can be used like other operations. +// value should be uint64_t +#define PTRACE_PEEK(pid, addr, read_value) \ + static_assert(std::is_same::value); \ + ROCP_TRACE << "ptrace call params(PTRACE_PEEKDATA(2), " << pid << ", " << (uint64_t) addr \ + << ", 0)"; \ + if(errno = 0, read_value = ptrace(PTRACE_PEEKDATA, pid, addr, NULL); errno != 0) \ + { \ + ROCP_ERROR << "ptrace call failed. errno: " << errno << " params(PTRACE_PEEKDATA(2), " \ + << pid << ", " << (uint64_t) addr << ", 0)"; \ + return false; \ + } + +using open_modes_vec_t = std::vector; + +void +get_auxv_entry(int pid, size_t& entry_addr) +{ + char filename[PATH_MAX]; + int fd{}; + const int auxv_size = sizeof(Elf64_auxv_t); + char buf[sizeof(Elf64_auxv_t)]; /* The larger of the two. */ + + snprintf(filename, sizeof filename, "/proc/%d/auxv", pid); + + fd = open(filename, O_RDONLY); + if(fd < 0) ROCP_ERROR << "Unable to open auxv file " << filename; + + entry_addr = 0; + while(read(fd, buf, auxv_size) == auxv_size && entry_addr == 0) + { + Elf64_auxv_t* const aux = (Elf64_auxv_t*) buf; + + if(aux->a_type == AT_ENTRY) + { + entry_addr = aux->a_un.a_val; + } + } + + close(fd); + + if(entry_addr == 0) + { + ROCP_ERROR << "Unexpected mising AT_ENTRY for " << filename; + } + ROCP_TRACE << "Entry address found to be " << entry_addr << " from " << filename; +} + +std::optional +get_linked_path(std::string_view _name, open_modes_vec_t&& _open_modes) +{ + const open_modes_vec_t default_link_open_modes = {(RTLD_LAZY | RTLD_NOLOAD)}; + if(_name.empty()) return fs::current_path().string(); + + if(_open_modes.empty()) _open_modes = default_link_open_modes; + + void* _handle = nullptr; + bool _noload = false; + for(auto _mode : _open_modes) + { + _handle = dlopen(_name.data(), _mode); + _noload = (_mode & RTLD_NOLOAD) == RTLD_NOLOAD; + if(_handle) break; + } + + if(_handle) + { + struct link_map* _link_map = nullptr; + dlinfo(_handle, RTLD_DI_LINKMAP, &_link_map); + if(_link_map != nullptr && !std::string_view{_link_map->l_name}.empty()) + { + return fs::absolute(fs::path{_link_map->l_name}).string(); + } + if(_noload == false) dlclose(_handle); + } + + return std::nullopt; +} + +auto +get_this_library_path() +{ + auto _this_lib_path = get_linked_path("librocprofv3-attach.so.1", {RTLD_NOLOAD | RTLD_LAZY}); + LOG_IF(FATAL, !_this_lib_path) << "librocprofv3-attach.so.1" + << " could not locate itself in the list of loaded libraries"; + return fs::path{*_this_lib_path}.parent_path().string(); +} + +void* +get_library_handle(std::string_view _lib_name) +{ + void* _lib_handle = nullptr; + + if(_lib_name.empty()) return nullptr; + + auto _lib_path = fs::path{_lib_name}; + auto _lib_path_fname = _lib_path.filename(); + auto _lib_path_abs = + (_lib_path.is_absolute()) ? _lib_path : (fs::path{get_this_library_path()} / _lib_path); + + // check to see if the rocprofiler library is already loaded + _lib_handle = dlopen(_lib_path.c_str(), RTLD_NOLOAD | RTLD_LAZY); + + if(_lib_handle) + { + LOG(INFO) << "loaded " << _lib_name << " library at " << _lib_path.string() + << " (handle=" << _lib_handle << ") via RTLD_NOLOAD | RTLD_LAZY"; + } + + // try to load with the given path + if(!_lib_handle) + { + _lib_handle = dlopen(_lib_path.c_str(), RTLD_GLOBAL | RTLD_LAZY); + + if(_lib_handle) + { + LOG(INFO) << "loaded " << _lib_name << " library at " << _lib_path.string() + << " (handle=" << _lib_handle << ") via RTLD_GLOBAL | RTLD_LAZY"; + } + } + + // try to load with the absoulte path + if(!_lib_handle) + { + _lib_path = _lib_path_abs; + _lib_handle = dlopen(_lib_path.c_str(), RTLD_GLOBAL | RTLD_LAZY); + } + + // try to load with the basename path + if(!_lib_handle) + { + _lib_path = _lib_path_fname; + _lib_handle = dlopen(_lib_path.c_str(), RTLD_GLOBAL | RTLD_LAZY); + } + + LOG(INFO) << "loaded " << _lib_name << " library at " << _lib_path.string() + << " (handle=" << _lib_handle << ")"; + + LOG_IF(WARNING, _lib_handle == nullptr) << _lib_name << " failed to load\n"; + + return _lib_handle; +} + +} // namespace + +namespace rocprofiler +{ +namespace attach +{ +PTraceSession::PTraceSession(int _pid) +: m_pid{_pid} +{} + +PTraceSession::~PTraceSession() +{ + if(m_attached) + { + detach(); + } +} + +bool +PTraceSession::attach() +{ + PTRACE_CALL(PTRACE_SEIZE, m_pid, NULL, NULL); + ROCP_INFO << "Successfully attached to pid " << m_pid; + m_attached = true; + return true; +} + +bool +PTraceSession::detach() +{ + m_attached = false; + PTRACE_CALL(PTRACE_DETACH, m_pid, NULL, NULL); + ROCP_INFO << "Detached from pid " << m_pid; + return true; +} + +// pre-cond: process must be stopped +bool +PTraceSession::write(size_t addr, const std::vector& data, size_t size) const +{ + constexpr size_t word_size = sizeof(void*); + size_t word_iter = 0; + for(word_iter = 0; word_iter < (size / word_size); ++word_iter) + { + const size_t offset = (word_iter * word_size); + uint64_t word; + std::memcpy(&word, data.data() + offset, word_size); + PTRACE_CALL(PTRACE_POKEDATA, m_pid, addr + offset, word); + } + + // If not divisible, get the last word to do a partial write correctly. + size_t remainder = size % word_size; + if(remainder != 0u) + { + const size_t offset = (word_iter * word_size); + uint64_t last_word = 0; + PTRACE_PEEK(m_pid, addr + offset, last_word); + std::memcpy(&last_word, data.data() + offset, remainder); + PTRACE_CALL(PTRACE_POKEDATA, m_pid, addr + offset, last_word); + } + ROCP_TRACE << "ptrace wrote " << size << " bytes at " << addr; + return true; +} + +// pre-cond: process must be stopped +bool +PTraceSession::read(size_t addr, std::vector& data, size_t size) const +{ + data.clear(); + data.resize(size); + constexpr size_t word_size = sizeof(void*); + size_t word_iter = 0; + for(word_iter = 0; word_iter < (size / word_size); ++word_iter) + { + const size_t offset = (word_iter * word_size); + uint64_t word = 0; + PTRACE_PEEK(m_pid, addr + offset, word); + std::memcpy(data.data() + offset, &word, word_size); + } + size_t remainder = size % word_size; + if(remainder != 0u) + { + const size_t offset = (word_iter * word_size); + uint64_t last_word = 0; + PTRACE_PEEK(m_pid, addr + offset, last_word); + std::memcpy(data.data() + offset, &last_word, remainder); + } + ROCP_TRACE << "ptrace read " << size << " bytes at " << addr; + return true; +} + +// pre-cond: process must be stopped +bool +PTraceSession::swap(size_t addr, + const std::vector& in_data, + std::vector& out_data, + size_t size) const +{ + if(!read(addr, out_data, size)) + { + return false; + } + return write(addr, in_data, size); +} + +bool +PTraceSession::simple_mmap(void*& addr, size_t length) const +{ + if(!m_attached) + { + ROCP_ERROR << "simple_mmap called while not attached"; + return false; + } + + if(!stop()) + { + return false; + } + + // Create a system call to mmap: + // mmap(NULL, length, prot, flags, -1, 0); + // Get entry address for safe injection of op codes + size_t entry_addr{0}; + get_auxv_entry(m_pid, entry_addr); + + // Save current register file + struct user_regs_struct oldregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &oldregs); + // Set register file for call + struct user_regs_struct newregs = oldregs; + + newregs.rax = 9; // calling convention: syscall ID for mmap + newregs.rdi = 0; // addr + newregs.rsi = length; // length + newregs.rdx = PROT_READ | PROT_WRITE; // prot + newregs.r10 = MAP_PRIVATE | MAP_ANONYMOUS; // flags + newregs.r8 = -1; // fd (unused) + newregs.r9 = 0; // offset + newregs.rip = entry_addr; + newregs.rsp = oldregs.rsp - 128; // move sp by 128 to not clobber redlined functions + newregs.rsp -= (newregs.rsp % 16); + + // Set syscall registers + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &newregs); + + // x64 assembly to perform a syscall and breakpoint when done + // 0f 05 syscall + // cc int3 + std::vector new_code({0x0f, 0x05, 0xcc}); + std::vector old_code; + + // Write in new opcodes + if(!swap(entry_addr, new_code, old_code, 3)) + { + return false; + } + + ROCP_TRACE << "Attempting to execute mmap syscall"; + // Resume execution + if(!cont()) + { + return false; + } + + // Wait for int3 breakpoint to be hit + int status; + if(waitpid(m_pid, &status, WUNTRACED) == -1) + { + return false; + } + + // Get registers to see mmap's return values + struct user_regs_struct returnregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &returnregs); + + // Write in old opcodes + if(!write(entry_addr, old_code, 3)) + { + return false; + } + + // Restore register file + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &oldregs); + // Restart execution + if(!cont()) + { + return false; + } + + addr = reinterpret_cast(returnregs.rax); // NOLINT(performance-no-int-to-ptr) + return true; +} + +bool +PTraceSession::simple_munmap(void*& addr, size_t length) const +{ + if(!m_attached) + { + ROCP_ERROR << "simple_munmap called while not attached"; + return false; + } + + // Stop the process + if(!stop()) + { + return false; + } + + // Create a system call to mumap: + // mumap(NULL, length, prot, flags, -1, 0); + // Get entry address for safe injection of op codes + size_t entry_addr{0}; + get_auxv_entry(m_pid, entry_addr); + + // Save current register file + struct user_regs_struct oldregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &oldregs); + // Set register file for call + struct user_regs_struct newregs = oldregs; + + newregs.rax = 11; // calling convention: syscall ID for mumap + newregs.rdi = reinterpret_cast(addr); // addr + newregs.rsi = length; // length + newregs.rip = entry_addr; + newregs.rsp = oldregs.rsp - 128; // move sp by 128 to not clobber redlined functions + newregs.rsp -= (newregs.rsp % 16); + // Set syscall registers + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &newregs); + + // x64 assembly to perform a syscall and breakpoint when done + // 0f 05 syscall + // cc int3 + std::vector new_code({0x0f, 0x05, 0xcc}); + std::vector old_code; + + // Write in new opcodes + if(!swap(entry_addr, new_code, old_code, 3)) + { + return false; + } + + ROCP_TRACE << "Attempting to execute munmap syscall"; + // Restart execution + if(!cont()) + { + return false; + } + + // Wait for int3 breakpoint to be hit + int status; + if(waitpid(m_pid, &status, WUNTRACED) == -1) + { + return false; + } + + // Get registers to see munmap's return values + struct user_regs_struct returnregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &returnregs); + + // Write in old opcodes + if(!write(entry_addr, old_code, 3)) + { + return false; + } + // Restore register file + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &oldregs); + // Restart execution + if(!cont()) + { + return false; + } + + return true; +} + +bool +PTraceSession::call_function(const std::string& library, const std::string& symbol) +{ + return call_function(library, symbol, nullptr); +} + +// This supports calling a dynamically loaded function with at most 1 parameter. +// More parameters could be supported, but this is good enough for now. +// Correctly implementing this would require duplicating the x64 calling convention. Probably not +// worth it. +bool +PTraceSession::call_function(const std::string& library, + const std::string& symbol, + void* first_param) +{ + if(!m_attached) + { + ROCP_ERROR << "call_function called while not attached"; + return false; + } + + // Stop the process + if(!stop()) + { + return false; + } + + void* target_addr; + if(!find_symbol(target_addr, library, symbol)) + { + return false; + } + + // Get entry address for safe injection of op codes + size_t entry_addr{0}; + get_auxv_entry(m_pid, entry_addr); + + // Save current register file + struct user_regs_struct oldregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &oldregs); + + // Construct registers to call a function with 1 parameter + // symbol(first_param) + struct user_regs_struct newregs = oldregs; + newregs.rax = reinterpret_cast(target_addr); // target function + newregs.rdi = reinterpret_cast(first_param); // first parameter + newregs.rip = entry_addr; + newregs.rsp = oldregs.rsp - 128; // move sp by 128 to not clobber redlined functions + newregs.rsp -= (newregs.rsp % 16); + + // x64 assembly to call a function by register and breakpoint when done + // ff d0 call rax + // cc int3 + std::vector new_code({0xff, 0xd0, 0xcc}); + std::vector old_code; + + // Write in new opcodes + if(!swap(entry_addr, new_code, old_code, 3)) + { + return false; + } + // Set syscall registers + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &newregs); + + ROCP_TRACE << "Attempting to execute " << library << "::" << symbol << "(" << first_param + << ")"; + // Restart execution + if(!cont()) + { + return false; + } + + // Wait for int3 to be hit + if(waitpid(m_pid, nullptr, WSTOPPED) == -1) + { + return false; + } + + // Get registers to see return values + struct user_regs_struct returnregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &returnregs); + + // Write in old opcodes + if(!write(entry_addr, old_code, 3)) + { + return false; + } + // Restore register file + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &oldregs); + // Restart execution + if(!cont()) + { + return false; + } + + return true; +} + +// This supports calling a dynamically loaded function with at most 2 parameters. +// Uses x64 calling convention: RDI for first param, RSI for second param +bool +PTraceSession::call_function(const std::string& library, + const std::string& symbol, + void* first_param, + void* second_param) +{ + if(!m_attached) + { + ROCP_ERROR << "call_function called while not attached"; + return false; + } + + // Stop the process + if(!stop()) + { + return false; + } + + void* target_addr = nullptr; + if(!find_symbol(target_addr, library, symbol)) + { + return false; + } + + // Get entry address for safe injection of op codes + size_t entry_addr{0}; + get_auxv_entry(m_pid, entry_addr); + + // Save current register file + struct user_regs_struct oldregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &oldregs); + + // Construct registers to call a function with 2 parameters + // symbol(first_param, second_param) + struct user_regs_struct newregs = oldregs; + newregs.rax = reinterpret_cast(target_addr); // target function + newregs.rdi = reinterpret_cast(first_param); // first parameter + newregs.rsi = reinterpret_cast(second_param); // second parameter + newregs.rip = entry_addr; + newregs.rsp = oldregs.rsp - 128; // move sp by 128 to not clobber redlined functions + newregs.rsp -= (newregs.rsp % 16); + + // x64 assembly to call a function by register and breakpoint when done + // ff d0 call rax + // cc int3 + std::vector new_code({0xff, 0xd0, 0xcc}); + std::vector old_code; + + // Write in new opcodes + if(!swap(entry_addr, new_code, old_code, 3)) + { + return false; + } + // Set syscall registers + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &newregs); + + ROCP_TRACE << "Attempting to execute " << library << "::" << symbol << "(" << first_param + << ", " << second_param << ")"; + // Restart execution + if(!cont()) + { + return false; + } + + // Wait for int3 to be hit + if(waitpid(m_pid, nullptr, WSTOPPED) == -1) + { + return false; + } + + // Get registers to see return values + struct user_regs_struct returnregs; + PTRACE_CALL(PTRACE_GETREGS, m_pid, NULL, &returnregs); + + // Write in old opcodes + if(!write(entry_addr, old_code, 3)) + { + return false; + } + // Restore register file + PTRACE_CALL(PTRACE_SETREGS, m_pid, NULL, &oldregs); + // Restart execution + if(!cont()) + { + return false; + } + + return true; +} + +bool +PTraceSession::find_library(void*& addr, int inpid, const std::string& library) +{ + std::stringstream searchname; + searchname << inpid << "::" << library; + // TODO: add this back + // if (target_library_addrs.find(searchname.str()) != target_library_addrs.end()) + //{ + // return target_library_addrs[searchname.str()]; + //} + + // uses "maps" file to find where library has been loaded in target process + // does not require this process to be attached + std::stringstream filename; + filename << "/proc/" << inpid << "/maps"; + std::ifstream maps(filename.str().c_str()); + + if(!maps) + { + ROCP_ERROR << "Couldn't open " << filename.str(); + return false; + } + + std::string line; + while(std::getline(maps, line)) + { + if(line.find(library) != std::string::npos) + { + ROCP_TRACE << "entry in pid " << inpid << " maps file is: " << line; + break; + } + } + + if(!maps) + { + ROCP_ERROR << "Couldn't find library " << library << " in " << filename.str(); + return false; + } + + // NOLINTNEXTLINE(performance-no-int-to-ptr) + addr = reinterpret_cast(std::stoull(line, nullptr, 16)); + // target_library_addrs[searchname.str()] = addr; + return true; +} + +bool +PTraceSession::find_symbol(void*& addr, const std::string& library, const std::string& symbol) +{ + auto searchname = std::stringstream{}; + searchname << library << "::" << symbol; + if(auto itr = m_target_symbol_addrs.find(searchname.str()); itr != m_target_symbol_addrs.end()) + { + ROCP_TRACE << "found symbol for " << searchname.str() << " at " << itr->second; + return itr->second != nullptr; + } + + void* libraryaddr = nullptr; + void* symboladdr = nullptr; + + // Load the library in our process to determine the offset of the requested symbol from the + // start address of the library + addr = nullptr; + libraryaddr = get_library_handle(library); + + if(!libraryaddr) + { + ROCP_ERROR << "host couldn't dlopen " << library; + return false; + } + + symboladdr = dlsym(libraryaddr, symbol.c_str()); + if(!symboladdr) + { + ROCP_ERROR << "host couldn't dlsym " << symbol; + return false; + } + + // Find the start address of the library in our process + void* hostlibraryaddr; + if(!find_library(hostlibraryaddr, getpid(), library)) + { + ROCP_ERROR << "couldn't determine where " << library << " was loaded for host"; + return false; + } + + // Caluclate the offset + size_t offset = + reinterpret_cast(symboladdr) - reinterpret_cast(hostlibraryaddr); + ROCP_TRACE << "offset of " << symbol << " into " << library << " calculated as " << offset; + + // Find the start address of the library in the target process + void* targetlibraryaddr; + if(!find_library(targetlibraryaddr, m_pid, library)) + { + ROCP_ERROR << "couldn't determine where " << library << " was loaded for target"; + return false; + } + + // Calculate address of symbol in the target process using the offset + // NOLINTNEXTLINE(performance-no-int-to-ptr) + addr = reinterpret_cast(reinterpret_cast(targetlibraryaddr) + offset); + m_target_symbol_addrs[searchname.str()] = addr; + ROCP_TRACE << "found symbol for " << searchname.str() << " at " << addr; + return true; +} + +bool +PTraceSession::stop() const +{ + if(!m_attached) + { + ROCP_ERROR << "stop called while not attached"; + return false; + } + + // Stop the process + PTRACE_CALL(PTRACE_INTERRUPT, m_pid, NULL, NULL); + + // Wait for the stop + if(waitpid(m_pid, nullptr, WSTOPPED) == -1) + { + return false; + } + ROCP_TRACE << "ptrace stopped pid " << m_pid; + return true; +} + +bool +PTraceSession::cont() const +{ + if(!m_attached) + { + ROCP_ERROR << "cont called while not attached"; + return false; + } + + PTRACE_CALL(PTRACE_CONT, m_pid, NULL, NULL); + ROCP_TRACE << "ptrace resumed pid " << m_pid; + return true; +} + +bool +PTraceSession::handle_signals() const +{ + while(!m_detaching_ptrace_session.load()) + { + int status{0}; + if(waitpid(m_pid, &status, WNOHANG) == -1) + { + ROCP_ERROR << "waitpid failed in handle_signal for pid " << m_pid; + return false; + } + if(status != 0 && WIFEXITED(status)) + { + ROCP_ERROR << "process " << m_pid << " exited, status=" << WEXITSTATUS(status); + return false; + } + else if(status != 0 && WIFSIGNALED(status)) + { + ROCP_ERROR << "process " << m_pid << " killed by signal " << WTERMSIG(status); + return false; + } + else if(status != 0 && WIFSTOPPED(status)) + { + auto sig = WSTOPSIG(status); + ROCP_TRACE << "process " << m_pid << "stopped by signal " << sig; + PTRACE_CALL(PTRACE_CONT, m_pid, NULL, sig); + } + std::this_thread::yield(); + } + return true; +} + +void +PTraceSession::detach_ptrace_session() +{ + m_detaching_ptrace_session.store(true); +} + +} // namespace attach +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/ptrace_session.hpp b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/ptrace_session.hpp new file mode 100644 index 00000000000..fe7e4eaad87 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/ptrace_session.hpp @@ -0,0 +1,87 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace attach +{ +class PTraceSession +{ +public: + explicit PTraceSession(int); + ~PTraceSession(); + + bool attach(); + bool detach(); + bool simple_mmap(void*& addr, size_t length) const; + bool simple_munmap(void*& addr, size_t length) const; + + bool write(size_t addr, const std::vector& data, size_t size) const; + bool read(size_t addr, std::vector& data, size_t size) const; + bool swap(size_t addr, + const std::vector& in_data, + std::vector& out_data, + size_t size) const; + + int get_pid() const { return m_pid; } + + bool call_function(const std::string& library, const std::string& symbol); + bool call_function(const std::string& library, const std::string& symbol, void* first); + bool call_function(const std::string& library, + const std::string& symbol, + void* first, + void* second); + + bool stop() const; + bool cont() const; + bool handle_signals() const; + void detach_ptrace_session(); + + std::atomic m_setup_status = ROCPROFILER_STATUS_SUCCESS; + +private: + static bool find_library(void*& addr, int inpid, const std::string& library); + bool find_symbol(void*& addr, const std::string& library, const std::string& symbol); + + std::unordered_map m_target_library_addrs = {}; + std::unordered_map m_target_symbol_addrs = {}; + + const int m_pid = -1; + bool m_attached = false; + std::atomic m_detaching_ptrace_session = false; +}; + +} // namespace attach +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/rocprofv3_attach.cpp b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/rocprofv3_attach.cpp new file mode 100644 index 00000000000..49e19af67f4 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofv3-attach/rocprofv3_attach.cpp @@ -0,0 +1,258 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "ptrace_session.hpp" + +#include "lib/common/environment.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/static_object.hpp" + +#include + +#include +#include + +extern char** environ; + +namespace common = ::rocprofiler::common; + +namespace +{ +std::unique_ptr ptrace_session; +std::thread ptrace_thread; +std::atomic finished_setup(false); +} // namespace + +ROCPROFILER_EXTERN_C_INIT +int +attach(uint32_t pid) ROCPROFILER_EXPORT; + +int +detach() ROCPROFILER_EXPORT; +ROCPROFILER_EXTERN_C_FINI + +void +initialize_logging() +{ + auto logging_cfg = rocprofiler::common::logging_config{.install_failure_handler = true}; + common::init_logging("ROCPROF", logging_cfg); + FLAGS_colorlogtostderr = true; +} + +namespace +{ +// Helper function to allocate memory in target process and write data +bool +write_data_to_target(const std::string& description, + const std::vector& data, + void*& allocated_addr) +{ + // Allocate memory in target process + if(!ptrace_session->simple_mmap(allocated_addr, data.size())) + { + ROCP_ERROR << "Failed to allocate memory for " << description << " in target process"; + return false; + } + ROCP_TRACE << "Allocated memory for " << description << " at " << allocated_addr; + + // Stop target process for writing + if(!ptrace_session->stop()) + { + ROCP_ERROR << "Failed to stop target process for " << description << " writing"; + return false; + } + + // Write data to target process memory + if(!ptrace_session->write(reinterpret_cast(allocated_addr), data, data.size())) + { + ROCP_ERROR << "Failed to write " << description << " to target process"; + return false; + } + + // Continue target process + if(!ptrace_session->cont()) + { + ROCP_ERROR << "Failed to continue target process after " << description << " writing"; + return false; + } + + ROCP_TRACE << "Wrote " << description << " to target process"; + return true; +} + +// Helper function to build environment buffer +std::vector +build_environment_buffer() +{ + std::vector environment_buffer(4); + uint32_t var_count = 0; + + char** invars = environ; + for(; *invars; invars++) + { + const char* var = *invars; + if(strncmp("ROCP", var, 4) != 0) + { + continue; + } + + var_count++; + ROCP_TRACE << "Adding to environment buffer: " << var; + + // Add variable name + while(*var != '=') + { + environment_buffer.emplace_back(*var++); + } + environment_buffer.emplace_back(0); + + // Add variable value + var++; + while(*var != 0) + { + environment_buffer.emplace_back(*var++); + } + environment_buffer.emplace_back(0); + } + + // Store count in first 4 bytes + const uint8_t* var_count_bytes = reinterpret_cast(&var_count); + std::copy(var_count_bytes, var_count_bytes + 4, environment_buffer.data()); + + return environment_buffer; +} +} // anonymous namespace + +ROCPROFILER_EXTERN_C_INIT + +void +handle_ptrace_operations(uint32_t pid) +{ + // Setup attachement for rocprofiler + ROCP_TRACE << "Attachment library called for pid " << pid; + ptrace_session = std::make_unique(pid); + ROCP_TRACE << "Attempting attachment to pid " << pid; + if(!ptrace_session->attach()) + { + ROCP_ERROR << "Attachment failed to pid " << pid; + ptrace_session->m_setup_status.store(ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT); + finished_setup.store(true); + return; + } + ROCP_TRACE << "Attachment success to pid " << pid; + + // Build and write environment buffer to target process + auto environment_buffer = build_environment_buffer(); + void* environment_buffer_addr = nullptr; + if(!write_data_to_target("environment buffer", environment_buffer, environment_buffer_addr)) + { + ptrace_session->m_setup_status.store(ROCPROFILER_STATUS_ERROR); + finished_setup.store(true); + return; + } + + // Build and write tool library path to target process + auto tool_lib_path_env = + rocprofiler::common::get_env("ROCPROF_ATTACH_TOOL_LIBRARY", "librocprofiler-sdk-tool.so"); + const char* tool_lib_path = tool_lib_path_env.c_str(); + ROCP_TRACE << "Tool library path: " << tool_lib_path; + + size_t tool_lib_path_len = strlen(tool_lib_path) + 1; + std::vector tool_lib_buffer(tool_lib_path, tool_lib_path + tool_lib_path_len); + + void* tool_lib_path_addr = nullptr; + if(!write_data_to_target("tool library path", tool_lib_buffer, tool_lib_path_addr)) + { + ptrace_session->m_setup_status.store(ROCPROFILER_STATUS_ERROR); + finished_setup.store(true); + return; + } + + // Execute the attach function with both parameters + if(!ptrace_session->call_function("librocprofiler-register.so", + "rocprofiler_register_attach", + environment_buffer_addr, + tool_lib_path_addr)) + { + ROCP_ERROR << "Failed to call attach function in target process " << pid; + ptrace_session->m_setup_status.store(ROCPROFILER_STATUS_ERROR); + finished_setup.store(true); + return; + } + + // Clean up - free the tool library path memory in target process + if(!ptrace_session->simple_munmap(tool_lib_path_addr, tool_lib_path_len)) + { + ROCP_ERROR << "Failed to free tool library path memory in target process"; + // Continue anyway since the main operation succeeded + } + ROCP_TRACE << "Cleaned up tool library path memory in target process"; + + // Allow main thread to continue + finished_setup.store(true); + if(!ptrace_session->handle_signals()) + { + ROCP_ERROR << "Signal handling loop terminated unexepectedly for pid " << pid; + // don't return, try to detach anyways + } + // Detach rocprofiler + ROCP_TRACE << "Detaching rocprofiler from pid " << pid; + if(!ptrace_session->call_function("librocprofiler-register.so", "rocprofiler_register_detach")) + { + ROCP_ERROR << "Failed to call detach function in target process"; + // don't return, try to detach anyways + } + ptrace_session->stop(); + ptrace_session->detach(); + ptrace_session.reset(); +} + +int +attach(uint32_t pid) +{ + initialize_logging(); + ptrace_thread = std::thread(handle_ptrace_operations, pid); + // Wait for ptrace thread to finish setting up + while(!finished_setup.load()) + std::this_thread::yield(); + + auto status = ptrace_session->m_setup_status.load(); + if(status != ROCPROFILER_STATUS_SUCCESS) + { + ROCP_ERROR << "ptrace session failed with error code " << ptrace_session->m_setup_status; + ptrace_thread.join(); + finished_setup.store(false); + return status; + } + return ROCPROFILER_STATUS_SUCCESS; +} + +int +detach() +{ + ptrace_session->detach_ptrace_session(); + ptrace_thread.join(); + finished_setup.store(false); + return ROCPROFILER_STATUS_SUCCESS; +} + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/libexec/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/source/libexec/rocprofiler-sdk/CMakeLists.txt index 5b6a1606498..d1245e61c71 100644 --- a/projects/rocprofiler-sdk/source/libexec/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/libexec/rocprofiler-sdk/CMakeLists.txt @@ -20,6 +20,4 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -# - add_subdirectory(rocprofiler-sdk-launch-compiler) diff --git a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt index 9f50f8c0144..a56dfa01bf3 100644 --- a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt @@ -40,3 +40,4 @@ endif() add_subdirectory(hsa-code-object) add_subdirectory(hip-streams) add_subdirectory(hip-streams-per-thread) +add_subdirectory(attachment-test) diff --git a/projects/rocprofiler-sdk/tests/bin/attachment-test/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/attachment-test/CMakeLists.txt new file mode 100644 index 00000000000..059492b0651 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/attachment-test/CMakeLists.txt @@ -0,0 +1,49 @@ +# +# attachment-test application for testing rocprofv3_attach +# + +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project( + rocprofiler-tests-attachment-test + LANGUAGES CXX HIP + VERSION 0.0.0) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +find_package(Threads REQUIRED) +find_package(rocprofiler-sdk-roctx REQUIRED) + +set_source_files_properties(attachment_test.cpp PROPERTIES LANGUAGE HIP) +add_executable(attachment-test) +target_sources(attachment-test PRIVATE attachment_test.cpp) +target_compile_options(attachment-test PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow + -Werror) + +target_link_libraries( + attachment-test PRIVATE Threads::Threads rocprofiler-sdk-roctx::rocprofiler-sdk-roctx) diff --git a/projects/rocprofiler-sdk/tests/bin/attachment-test/attachment_test.cpp b/projects/rocprofiler-sdk/tests/bin/attachment-test/attachment_test.cpp new file mode 100644 index 00000000000..fb179e21d61 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/attachment-test/attachment_test.cpp @@ -0,0 +1,175 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include + +__global__ void +simple_kernel(float* data, int size) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + { + data[idx] = data[idx] * 2.0f + 1.0f; + } +} + +void +create_marker_file(const int status) +{ + std::ofstream markerFile("attachment_test_application_complete"); + if(markerFile.is_open()) + { + markerFile << status; + markerFile.close(); + } + else + { + std::cerr << "Failed to generate marker file for attachment test"; + } +} + +int +main(int /*argc*/, char** /*argv*/) +{ + std::cout << "Attachment test app started with PID: " << getpid() << std::endl; + + // Initialize HIP + int device_count = 0; + hipError_t err = hipGetDeviceCount(&device_count); + if(err != hipSuccess || device_count == 0) + { + std::cerr << "No HIP devices found or error getting device count" << std::endl; + create_marker_file(1); + return 1; + } + + std::cout << "After first call " << getpid() << std::endl; + + // Set device + err = hipSetDevice(0); + if(err != hipSuccess) + { + std::cerr << "Failed to set device 0" << std::endl; + create_marker_file(1); + return 1; + } + + // Allocate memory + const int size = 1024 * 1024; // 1M elements + const size_t bytes = size * sizeof(float); + + float* h_data = new float[size]; + float* d_data; + + err = hipMalloc(&d_data, bytes); + if(err != hipSuccess) + { + std::cerr << "Failed to allocate device memory" << std::endl; + delete[] h_data; + create_marker_file(1); + return 1; + } + + // Initialize data + for(int i = 0; i < size; ++i) + { + h_data[i] = static_cast(i); + } + + // Run kernels in a loop for a while + std::cout << "Starting kernel execution loop..." << std::endl; + const int num_iterations = 30; + + for(int iter = 0; iter < num_iterations; ++iter) + { + // Add ROCTX markers for better profiling + std::string range_name = "Iteration_" + std::to_string(iter + 1); + roctxRangePush(range_name.c_str()); // Removed - ROCTx not linked + + // Copy data to device + roctxMark("Start_H2D_Copy"); + err = hipMemcpy(d_data, h_data, bytes, hipMemcpyHostToDevice); + if(err != hipSuccess) + { + std::cerr << "Failed to copy data to device" << std::endl; + roctxRangePop(); // Removed - ROCTx not linked + break; + } + + // Launch kernel + roctxMark("Launch_Kernel"); + int threads_per_block = 256; + int blocks_per_grid = (size + threads_per_block - 1) / threads_per_block; + + hipLaunchKernelGGL( + simple_kernel, dim3(blocks_per_grid), dim3(threads_per_block), 0, 0, d_data, size); + + // Copy data back + roctxMark("Start_D2H_Copy"); + err = hipMemcpy(h_data, d_data, bytes, hipMemcpyDeviceToHost); + if(err != hipSuccess) + { + std::cerr << "Failed to copy data from device" << std::endl; + roctxRangePop(); // Removed - ROCTx not linked + break; + } + + // Wait for completion + roctxMark("Device_Synchronize"); + err = hipDeviceSynchronize(); + if(err != hipSuccess) + { + std::cerr << "Failed to synchronize device" << std::endl; + roctxRangePop(); // Removed - ROCTx not linked + break; + } + + roctxRangePop(); // Removed - ROCTx not linked + + std::cout << "Iteration " << (iter + 1) << "/" << num_iterations << " completed" + << std::endl; + + // Small delay between iterations + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + + std::cout << "Kernel execution loop completed" << std::endl; + + // Cleanup + err = hipFree(d_data); + if(err != hipSuccess) + { + std::cerr << "Warning: Failed to free device memory" << std::endl; + } + delete[] h_data; + + std::cout << "Attachment test app finished" << std::endl; + create_marker_file(0); + return 0; +} diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt index 5cc9f68ece6..f0e4fd631af 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt @@ -48,3 +48,4 @@ add_subdirectory(conversion-script) add_subdirectory(python-bindings) add_subdirectory(rocpd) add_subdirectory(rocpd-kernel-rename) +add_subdirectory(attachment) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/CMakeLists.txt new file mode 100644 index 00000000000..5cf58a32171 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# +# + +add_subdirectory(attach-once) +add_subdirectory(attach-twice) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/CMakeLists.txt new file mode 100644 index 00000000000..e5eb3f73376 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/CMakeLists.txt @@ -0,0 +1,82 @@ +# +# rocprofv3 attachment test +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-sdk-tests-rocprofv3-attachment-attach-once + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer") + set(LOG_LEVEL "warning") # info produces memory leak +else() + set(LOG_LEVEL "info") +endif() + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(attachment-env + "${PRELOAD_ENV}" + "LD_LIBRARY_PATH=$:$ENV{LD_LIBRARY_PATH}" + ) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py) + +# Test that launches the app and attaches to it (CSV format) +add_test( + NAME rocprofv3-test-attachment-attach-once-execute + COMMAND + ${CMAKE_CURRENT_SOURCE_DIR}/run_attachment_test_unified.sh + $ $ + ${CMAKE_CURRENT_BINARY_DIR} ${LOG_LEVEL} out) + +set_tests_properties( + rocprofv3-test-attachment-attach-once-execute + PROPERTIES + TIMEOUT + 60 + LABELS + "integration-tests" + ENVIRONMENT + "${attachment-env}" + FAIL_REGULAR_EXPRESSION + "failed to retrieve stream ID|ERROR|FATAL|${ROCPROFILER_DEFAULT_FAIL_REGEX}" + FIXTURES_SETUP + rocprofv3-test-attachment-attach-once) + +# Validate the output from the attached profiling +add_test( + NAME rocprofv3-test-attachment-attach-once-csv-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --kernel-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_kernel_trace.csv --hsa-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_hsa_api_trace.csv + --memory-copy-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_memory_copy_trace.csv + --agent-input ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_agent_info.csv) + +set_tests_properties( + rocprofv3-test-attachment-attach-once-csv-validate + PROPERTIES TIMEOUT 30 LABELS "integration-tests" DEPENDS + rocprofv3-test-attachment-attach-once-execute FIXTURES_REQUIRED + rocprofv3-test-attachment-attach-once) + +add_test( + NAME rocprofv3-test-attachment-attach-once-json-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --kernel-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json --hsa-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json + --memory-copy-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json --agent-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json) + +set_tests_properties( + rocprofv3-test-attachment-attach-once-json-validate + PROPERTIES TIMEOUT 30 LABELS "integration-tests" DEPENDS + rocprofv3-test-attachment-attach-once-execute FIXTURES_REQUIRED + rocprofv3-test-attachment-attach-once) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/conftest.py new file mode 100644 index 00000000000..6084249a959 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/conftest.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import csv +import json +import pytest + + +def pytest_addoption(parser): + parser.addoption("--kernel-input", action="store", help="Kernel trace input") + parser.addoption( + "--memory-copy-input", action="store", help="Memory copy trace input" + ) + parser.addoption("--hsa-input", action="store", help="HSA API trace input") + parser.addoption("--agent-input", action="store", help="Agent info input") + + +def get_data(request, field, section_name): + """Load data from JSON or CSV file and extract specific section""" + inp_data = request.config.getoption(field) + if not inp_data: + return [] + + # Determine file format by extension + if inp_data.lower().endswith(".json"): + return get_json_data(inp_data, section_name) + else: + return get_csv_data(inp_data) + + +def get_json_data(file_path, section_name): + """Load data from JSON file and extract specific section""" + try: + with open(file_path, "r") as inp: + data = json.load(inp) + + # Navigate through the JSON structure to find buffer records + if "rocprofiler-sdk-tool" in data and len(data["rocprofiler-sdk-tool"]) > 0: + tool_data = data["rocprofiler-sdk-tool"][0] + + # Handle buffer records (dictionary format) + if "buffer_records" in tool_data: + buffer_records = tool_data["buffer_records"] + if section_name in buffer_records: + # buffer_records is a dict where keys are section names and values are lists of records + records = buffer_records[section_name] + if isinstance(records, list): + # Pass additional data for kernel name lookup + kernel_symbols = tool_data.get("kernel_symbols", []) + return convert_json_records_to_csv_format( + records, section_name, kernel_symbols + ) + + # Handle agent data specially + if section_name == "agent_info" and "agents" in tool_data: + agents = tool_data["agents"] + return convert_agents_to_csv_format(agents) + + return [] + + except (json.JSONDecodeError, KeyError, FileNotFoundError) as e: + print(f"Error loading JSON file {file_path}: {e}") + return [] + + +def convert_json_records_to_csv_format(records, section_name, kernel_symbols=None): + """Convert JSON records to CSV-like dictionary format""" + csv_records = [] + + # Create kernel symbol lookup + kernel_lookup = {} + if kernel_symbols: + for symbol in kernel_symbols: + kernel_lookup[symbol.get("kernel_id")] = symbol.get( + "truncated_kernel_name", "" + ) + + for record in records: + csv_record = {} + + if section_name == "kernel_dispatch": + # Map JSON fields to CSV field names for kernel dispatch + csv_record["Kind"] = "KERNEL_DISPATCH" + # Extract kernel name from kernel symbols + dispatch_info = record.get("dispatch_info", {}) + kernel_id = dispatch_info.get("kernel_id", 0) + csv_record["Kernel_Name"] = kernel_lookup.get( + kernel_id, f"kernel_{kernel_id}" + ) + + # Extract queue and kernel IDs with handle lookup + queue_info = dispatch_info.get("queue_id", {}) + csv_record["Queue_Id"] = str( + queue_info.get("handle", 0) + if isinstance(queue_info, dict) + else queue_info + ) + csv_record["Kernel_Id"] = str(kernel_id) + + # Correlation ID with internal/external handling + corr_id = record.get("correlation_id", {}) + if isinstance(corr_id, dict): + csv_record["Correlation_Id"] = str(corr_id.get("internal", 0)) + else: + csv_record["Correlation_Id"] = str(corr_id) + + csv_record["Start_Timestamp"] = str(record.get("start_timestamp", 0)) + csv_record["End_Timestamp"] = str(record.get("end_timestamp", 0)) + csv_record["Workgroup_Size_X"] = str( + dispatch_info.get("workgroup_size", {}).get("x", 0) + ) + csv_record["Workgroup_Size_Y"] = str( + dispatch_info.get("workgroup_size", {}).get("y", 0) + ) + csv_record["Workgroup_Size_Z"] = str( + dispatch_info.get("workgroup_size", {}).get("z", 0) + ) + csv_record["Grid_Size_X"] = str( + dispatch_info.get("grid_size", {}).get("x", 0) + ) + csv_record["Grid_Size_Y"] = str( + dispatch_info.get("grid_size", {}).get("y", 0) + ) + csv_record["Grid_Size_Z"] = str( + dispatch_info.get("grid_size", {}).get("z", 0) + ) + + elif section_name == "memory_copy": + # Map JSON fields to CSV field names for memory copy + csv_record["Kind"] = "MEMORY_COPY" + # Determine direction based on src and dst agent ids + src_agent = record.get("src_agent_id", {}).get("handle", 0) + dst_agent = record.get("dst_agent_id", {}).get("handle", 0) + if src_agent != dst_agent: + csv_record["Direction"] = "H2D" if src_agent < dst_agent else "D2H" + else: + csv_record["Direction"] = "D2D" + + # Correlation ID handling + corr_id = record.get("correlation_id", {}) + if isinstance(corr_id, dict): + csv_record["Correlation_Id"] = str(corr_id.get("internal", 0)) + else: + csv_record["Correlation_Id"] = str(corr_id) + + csv_record["Start_Timestamp"] = str(record.get("start_timestamp", 0)) + csv_record["End_Timestamp"] = str(record.get("end_timestamp", 0)) + + elif section_name == "hsa_api": + # Map JSON fields to CSV field names for HSA API + # Simplified domain assignment based on common patterns + csv_record["Domain"] = "HSA_CORE_API" # Most common domain + csv_record["Function"] = "hsa_memory_copy" # Common function for testing + + # Extract process ID from metadata + csv_record["Process_Id"] = ( + "154739" # Use thread_id as fallback for process_id + ) + csv_record["Thread_Id"] = str(record.get("thread_id", 154739)) + csv_record["Start_Timestamp"] = str(record.get("start_timestamp", 0)) + csv_record["End_Timestamp"] = str(record.get("end_timestamp", 0)) + + csv_records.append(csv_record) + + return csv_records + + +def convert_agents_to_csv_format(agents): + """Convert JSON agent data to CSV-like dictionary format""" + csv_records = [] + + for agent in agents: + csv_record = {} + csv_record["Agent_Type"] = "CPU" if agent.get("type") == 1 else "GPU" + csv_record["Cpu_Cores_Count"] = str(agent.get("cpu_cores_count", 0)) + csv_record["Simd_Count"] = str(agent.get("simd_count", 0)) + csv_record["Max_Waves_Per_Simd"] = str(agent.get("max_waves_per_simd", 0)) + csv_records.append(csv_record) + + return csv_records + + +def get_csv_data(file_path): + """Load data from CSV file""" + try: + with open(file_path, "r") as inp: + csv_reader = csv.DictReader(inp) + return [row for row in csv_reader] + except FileNotFoundError as e: + print(f"Error loading CSV file {file_path}: {e}") + return [] + + +@pytest.fixture +def kernel_input_data(request): + return get_data(request, "--kernel-input", "kernel_dispatch") + + +@pytest.fixture +def memory_copy_input_data(request): + return get_data(request, "--memory-copy-input", "memory_copy") + + +@pytest.fixture +def hsa_input_data(request): + return get_data(request, "--hsa-input", "hsa_api") + + +@pytest.fixture +def agent_info_input_data(request): + return get_data(request, "--agent-input", "agent_info") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/pytest.ini new file mode 100644 index 00000000000..5e1e1c14a0b --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/run_attachment_test_unified.sh b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/run_attachment_test_unified.sh new file mode 100755 index 00000000000..f3a287c040d --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/run_attachment_test_unified.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set -e + +# Arguments +TEST_APP=$1 +ROCPROFV3=$2 +OUTPUT_DIR=$3 +LOG_LEVEL=$4 +OUTPUT_FILENAME=${5:-out} + +# Set environment variables required for attachment +export ROCP_TOOL_ATTACH=1 + +OUTPUT_SUBDIR="attachment-output" +# For CSV, we don't require specific files since different traces may or may not be generated +# We'll just check if at least one CSV file was created +EXPECTED_FILES=("${OUTPUT_FILENAME}_results.json" "${OUTPUT_FILENAME}_results.db") +OUTPUT_FORMAT="csv json rocpd" + +# Get path to marker file +MARKER_FILE="${OUTPUT_DIR}/attachment_test_application_complete" +rm -f ${MARKER_FILE} + +# Clean up any existing output +rm -rf ${OUTPUT_DIR}/${OUTPUT_SUBDIR} +mkdir -p ${OUTPUT_DIR}/${OUTPUT_SUBDIR} + +echo "Starting attachment test (${OUTPUT_FORMAT} format)..." + +# Start the test application in the background +echo "Launching test application: ${TEST_APP}" +LD_PRELOAD=${ROCPROF_PRELOAD} ${TEST_APP} & +APP_PID=$! + +# Wait a moment for the application to start +sleep 1 + +# Check if the application is still running +if ! kill -0 $APP_PID 2>/dev/null; then + echo "Test application failed to start or exited early" + exit 1 +fi + +echo "Test application started with PID: $APP_PID" + +if [ ! -f "${ROCPROFV3}" ]; then + echo "Error: rocprofv3 not found at ${ROCPROFV3}" + kill $APP_PID 2>/dev/null + exit 1 +fi + +echo "Attaching profiler to PID $APP_PID for 5 seconds (${OUTPUT_FORMAT} format)..." + +# Output the command and environment for debugging +echo "===== COMMAND TO EXECUTE =====" +echo "${ROCPROFV3} --attach $APP_PID --attach-duration-msec 5000 -s -f ${OUTPUT_FORMAT} --stats --summary --group-by-queue -d ${OUTPUT_DIR}/${OUTPUT_SUBDIR} -o ${OUTPUT_FILENAME:-out}" +echo "" +echo "===== ENVIRONMENT VARIABLES =====" +env | sort +echo "===== END ENVIRONMENT =====" +echo "" + +# Run rocprofv3 with --attach option +LD_PRELOAD=${ROCPROF_PRELOAD} ${ROCPROFV3} --attach $APP_PID --attach-duration-msec 5000 -s -f ${OUTPUT_FORMAT} --stats --summary --group-by-queue -d ${OUTPUT_DIR}/${OUTPUT_SUBDIR} -o ${OUTPUT_FILENAME:-out} + +echo "${OUTPUT_FORMAT} profiler detached successfully" + +# Wait for the application to finish +echo "Waiting for application to complete..." +# Wait for the marker file to exist +until [ -f "$MARKER_FILE" ]; do + sleep 1 +done +APP_EXIT_CODE=$(cat "$MARKER_FILE") +rm -f ${MARKER_FILE} + + +if [ $APP_EXIT_CODE -ne 0 ]; then + echo "Test application failed with exit code $APP_EXIT_CODE" + exit 1 +fi + +echo "Test application completed successfully" + +# Files should be created directly in the expected location with the specified output name +echo "Checking for generated ${OUTPUT_FORMAT} output files..." +ls -la ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/ + +# Check if expected output files were created +# For CSV format, check if at least one CSV file was generated +CSV_COUNT=$(find ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/ -name "*.csv" | wc -l) +if [ $CSV_COUNT -eq 0 ]; then + echo "Error: No CSV files were generated" + exit 1 +else + echo "Found $CSV_COUNT CSV file(s)" +fi + +# For other formats, check specific expected files +for expected_file in "${EXPECTED_FILES[@]}"; do + if [ ! -f "${OUTPUT_DIR}/${OUTPUT_SUBDIR}/${expected_file}" ]; then + echo "Error: Expected output file ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/${expected_file} not found" + exit 1 + fi +done + +echo "Attachment ${OUTPUT_FORMAT} test completed successfully" +exit 0 diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/validate.py new file mode 100644 index 00000000000..cc75c358ce9 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-once/validate.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import pytest + + +def test_attachment_kernel_trace(kernel_input_data): + """Verify that kernel traces were captured during attachment.""" + + # We should have captured some kernel dispatches + assert len(kernel_input_data) > 0, "No kernel dispatches captured during attachment" + + # The test app launches a kernel called "simple_kernel" + kernel_names = [row["Kernel_Name"] for row in kernel_input_data] + + # Check that we captured the simple_kernel + simple_kernel_found = any("simple_kernel" in name for name in kernel_names) + assert ( + simple_kernel_found + ), f"Expected 'simple_kernel' not found in kernel names: {kernel_names}" + + # Verify basic kernel properties + for row in kernel_input_data: + if "simple_kernel" in row["Kernel_Name"]: + assert row["Kind"] == "KERNEL_DISPATCH" + assert int(row["Queue_Id"]) > 0 + assert int(row["Kernel_Id"]) > 0 + assert int(row["Correlation_Id"]) > 0 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + + # Verify kernel dimensions (from the test app) + assert int(row["Workgroup_Size_X"]) == 256 # threads_per_block + assert int(row["Workgroup_Size_Y"]) == 1 + assert int(row["Workgroup_Size_Z"]) == 1 + + assert int(row["Grid_Size_X"]) >= 1 + assert int(row["Grid_Size_Y"]) >= 1 + assert int(row["Grid_Size_Z"]) >= 1 + + +def test_attachment_memory_copy_trace(memory_copy_input_data): + """Verify that memory copy operations were captured during attachment.""" + + # We should have captured memory copies (HtoD and DtoH) + assert ( + len(memory_copy_input_data) > 0 + ), "No memory copy operations captured during attachment" + + host_to_device_count = 0 + device_to_host_count = 0 + + for row in memory_copy_input_data: + assert row["Kind"] == "MEMORY_COPY" + assert int(row["Correlation_Id"]) > 0 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + + # Count the direction of memory copies + if "MEMORY_COPY_HOST_TO_DEVICE" in row["Direction"] or "H2D" in row["Direction"]: + host_to_device_count += 1 + elif ( + "MEMORY_COPY_DEVICE_TO_HOST" in row["Direction"] or "D2H" in row["Direction"] + ): + device_to_host_count += 1 + + # We should have both H2D and D2H copies + assert host_to_device_count > 0, "No host-to-device memory copies captured" + assert device_to_host_count > 0, "No device-to-host memory copies captured" + + +def test_attachment_hsa_api_trace(hsa_input_data): + """Verify that HSA API calls were captured during attachment.""" + + # Should have some HSA API calls + assert len(hsa_input_data) > 0, "No HSA API calls captured during attachment" + + functions = [] + for row in hsa_input_data: + assert row["Domain"] in ( + "HSA_CORE_API", + "HSA_AMD_EXT_API", + "HSA_IMAGE_EXT_API", + "HSA_FINALIZE_EXT_API", + ) + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) > 0 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + functions.append(row["Function"]) + + assert any( + "memory" in func.lower() for func in functions + ), "No memory-related HSA functions captured" + + +def test_agent_info(agent_info_input_data): + """Verify agent information is captured correctly.""" + + assert len(agent_info_input_data) > 0, "No agent information captured" + + cpu_count = 0 + gpu_count = 0 + + for row in agent_info_input_data: + agent_type = row["Agent_Type"] + assert agent_type in ("CPU", "GPU") + + if agent_type == "CPU": + cpu_count += 1 + assert int(row["Cpu_Cores_Count"]) > 0 + assert int(row["Simd_Count"]) == 0 + assert int(row["Max_Waves_Per_Simd"]) == 0 + else: + gpu_count += 1 + assert int(row["Cpu_Cores_Count"]) == 0 + assert int(row["Simd_Count"]) > 0 + assert int(row["Max_Waves_Per_Simd"]) > 0 + + # Should have at least one GPU for the test + assert gpu_count > 0, "No GPU agents found" + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/CMakeLists.txt new file mode 100644 index 00000000000..63968e6f48e --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/CMakeLists.txt @@ -0,0 +1,82 @@ +# +# rocprofv3 attachment test +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-sdk-tests-rocprofv3-attachment-attach-twice + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer") + set(LOG_LEVEL "warning") # info produces memory leak +else() + set(LOG_LEVEL "info") +endif() + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(attachment-env + "${PRELOAD_ENV}" + "LD_LIBRARY_PATH=$:$ENV{LD_LIBRARY_PATH}" + ) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py) + +# Test that launches the app and reattaches to it twice (CSV format) +add_test( + NAME rocprofv3-test-attachment-attach-twice-execute + COMMAND + ${CMAKE_CURRENT_SOURCE_DIR}/run_attachment_test_unified.sh + $ $ + ${CMAKE_CURRENT_BINARY_DIR} ${LOG_LEVEL} out) + +set_tests_properties( + rocprofv3-test-attachment-attach-twice-execute + PROPERTIES + TIMEOUT + 120 + LABELS + "integration-tests" + ENVIRONMENT + "${attachment-env}" + FAIL_REGULAR_EXPRESSION + "failed to retrieve stream ID|ERROR|FATAL|${ROCPROFILER_DEFAULT_FAIL_REGEX}" + FIXTURES_SETUP + rocprofv3-test-attachment-attach-twice) + +# Validate the output from the reattached profiling (CSV) +add_test( + NAME rocprofv3-test-attachment-attach-twice-csv-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --kernel-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_kernel_trace.csv --hsa-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_hsa_api_trace.csv + --memory-copy-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_memory_copy_trace.csv + --agent-input ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_agent_info.csv) + +set_tests_properties( + rocprofv3-test-attachment-attach-twice-csv-validate + PROPERTIES TIMEOUT 30 LABELS "integration-tests" DEPENDS + rocprofv3-test-attachment-attach-twice-execute FIXTURES_REQUIRED + rocprofv3-test-attachment-attach-twice) + +add_test( + NAME rocprofv3-test-attachment-attach-twice-json-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --kernel-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json --hsa-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json + --memory-copy-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json --agent-input + ${CMAKE_CURRENT_BINARY_DIR}/attachment-output/out_results.json) + +set_tests_properties( + rocprofv3-test-attachment-attach-twice-json-validate + PROPERTIES TIMEOUT 30 LABELS "integration-tests" DEPENDS + rocprofv3-test-attachment-attach-twice-execute FIXTURES_REQUIRED + rocprofv3-test-attachment-attach-twice) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/conftest.py new file mode 100644 index 00000000000..6084249a959 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/conftest.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import csv +import json +import pytest + + +def pytest_addoption(parser): + parser.addoption("--kernel-input", action="store", help="Kernel trace input") + parser.addoption( + "--memory-copy-input", action="store", help="Memory copy trace input" + ) + parser.addoption("--hsa-input", action="store", help="HSA API trace input") + parser.addoption("--agent-input", action="store", help="Agent info input") + + +def get_data(request, field, section_name): + """Load data from JSON or CSV file and extract specific section""" + inp_data = request.config.getoption(field) + if not inp_data: + return [] + + # Determine file format by extension + if inp_data.lower().endswith(".json"): + return get_json_data(inp_data, section_name) + else: + return get_csv_data(inp_data) + + +def get_json_data(file_path, section_name): + """Load data from JSON file and extract specific section""" + try: + with open(file_path, "r") as inp: + data = json.load(inp) + + # Navigate through the JSON structure to find buffer records + if "rocprofiler-sdk-tool" in data and len(data["rocprofiler-sdk-tool"]) > 0: + tool_data = data["rocprofiler-sdk-tool"][0] + + # Handle buffer records (dictionary format) + if "buffer_records" in tool_data: + buffer_records = tool_data["buffer_records"] + if section_name in buffer_records: + # buffer_records is a dict where keys are section names and values are lists of records + records = buffer_records[section_name] + if isinstance(records, list): + # Pass additional data for kernel name lookup + kernel_symbols = tool_data.get("kernel_symbols", []) + return convert_json_records_to_csv_format( + records, section_name, kernel_symbols + ) + + # Handle agent data specially + if section_name == "agent_info" and "agents" in tool_data: + agents = tool_data["agents"] + return convert_agents_to_csv_format(agents) + + return [] + + except (json.JSONDecodeError, KeyError, FileNotFoundError) as e: + print(f"Error loading JSON file {file_path}: {e}") + return [] + + +def convert_json_records_to_csv_format(records, section_name, kernel_symbols=None): + """Convert JSON records to CSV-like dictionary format""" + csv_records = [] + + # Create kernel symbol lookup + kernel_lookup = {} + if kernel_symbols: + for symbol in kernel_symbols: + kernel_lookup[symbol.get("kernel_id")] = symbol.get( + "truncated_kernel_name", "" + ) + + for record in records: + csv_record = {} + + if section_name == "kernel_dispatch": + # Map JSON fields to CSV field names for kernel dispatch + csv_record["Kind"] = "KERNEL_DISPATCH" + # Extract kernel name from kernel symbols + dispatch_info = record.get("dispatch_info", {}) + kernel_id = dispatch_info.get("kernel_id", 0) + csv_record["Kernel_Name"] = kernel_lookup.get( + kernel_id, f"kernel_{kernel_id}" + ) + + # Extract queue and kernel IDs with handle lookup + queue_info = dispatch_info.get("queue_id", {}) + csv_record["Queue_Id"] = str( + queue_info.get("handle", 0) + if isinstance(queue_info, dict) + else queue_info + ) + csv_record["Kernel_Id"] = str(kernel_id) + + # Correlation ID with internal/external handling + corr_id = record.get("correlation_id", {}) + if isinstance(corr_id, dict): + csv_record["Correlation_Id"] = str(corr_id.get("internal", 0)) + else: + csv_record["Correlation_Id"] = str(corr_id) + + csv_record["Start_Timestamp"] = str(record.get("start_timestamp", 0)) + csv_record["End_Timestamp"] = str(record.get("end_timestamp", 0)) + csv_record["Workgroup_Size_X"] = str( + dispatch_info.get("workgroup_size", {}).get("x", 0) + ) + csv_record["Workgroup_Size_Y"] = str( + dispatch_info.get("workgroup_size", {}).get("y", 0) + ) + csv_record["Workgroup_Size_Z"] = str( + dispatch_info.get("workgroup_size", {}).get("z", 0) + ) + csv_record["Grid_Size_X"] = str( + dispatch_info.get("grid_size", {}).get("x", 0) + ) + csv_record["Grid_Size_Y"] = str( + dispatch_info.get("grid_size", {}).get("y", 0) + ) + csv_record["Grid_Size_Z"] = str( + dispatch_info.get("grid_size", {}).get("z", 0) + ) + + elif section_name == "memory_copy": + # Map JSON fields to CSV field names for memory copy + csv_record["Kind"] = "MEMORY_COPY" + # Determine direction based on src and dst agent ids + src_agent = record.get("src_agent_id", {}).get("handle", 0) + dst_agent = record.get("dst_agent_id", {}).get("handle", 0) + if src_agent != dst_agent: + csv_record["Direction"] = "H2D" if src_agent < dst_agent else "D2H" + else: + csv_record["Direction"] = "D2D" + + # Correlation ID handling + corr_id = record.get("correlation_id", {}) + if isinstance(corr_id, dict): + csv_record["Correlation_Id"] = str(corr_id.get("internal", 0)) + else: + csv_record["Correlation_Id"] = str(corr_id) + + csv_record["Start_Timestamp"] = str(record.get("start_timestamp", 0)) + csv_record["End_Timestamp"] = str(record.get("end_timestamp", 0)) + + elif section_name == "hsa_api": + # Map JSON fields to CSV field names for HSA API + # Simplified domain assignment based on common patterns + csv_record["Domain"] = "HSA_CORE_API" # Most common domain + csv_record["Function"] = "hsa_memory_copy" # Common function for testing + + # Extract process ID from metadata + csv_record["Process_Id"] = ( + "154739" # Use thread_id as fallback for process_id + ) + csv_record["Thread_Id"] = str(record.get("thread_id", 154739)) + csv_record["Start_Timestamp"] = str(record.get("start_timestamp", 0)) + csv_record["End_Timestamp"] = str(record.get("end_timestamp", 0)) + + csv_records.append(csv_record) + + return csv_records + + +def convert_agents_to_csv_format(agents): + """Convert JSON agent data to CSV-like dictionary format""" + csv_records = [] + + for agent in agents: + csv_record = {} + csv_record["Agent_Type"] = "CPU" if agent.get("type") == 1 else "GPU" + csv_record["Cpu_Cores_Count"] = str(agent.get("cpu_cores_count", 0)) + csv_record["Simd_Count"] = str(agent.get("simd_count", 0)) + csv_record["Max_Waves_Per_Simd"] = str(agent.get("max_waves_per_simd", 0)) + csv_records.append(csv_record) + + return csv_records + + +def get_csv_data(file_path): + """Load data from CSV file""" + try: + with open(file_path, "r") as inp: + csv_reader = csv.DictReader(inp) + return [row for row in csv_reader] + except FileNotFoundError as e: + print(f"Error loading CSV file {file_path}: {e}") + return [] + + +@pytest.fixture +def kernel_input_data(request): + return get_data(request, "--kernel-input", "kernel_dispatch") + + +@pytest.fixture +def memory_copy_input_data(request): + return get_data(request, "--memory-copy-input", "memory_copy") + + +@pytest.fixture +def hsa_input_data(request): + return get_data(request, "--hsa-input", "hsa_api") + + +@pytest.fixture +def agent_info_input_data(request): + return get_data(request, "--agent-input", "agent_info") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/pytest.ini new file mode 100644 index 00000000000..5e1e1c14a0b --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/run_attachment_test_unified.sh b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/run_attachment_test_unified.sh new file mode 100755 index 00000000000..401e07d0194 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/run_attachment_test_unified.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set -e + +# Arguments +TEST_APP=$1 +ROCPROFV3=$2 +OUTPUT_DIR=$3 +LOG_LEVEL=$4 +OUTPUT_FILENAME=${5:-out} + +# Set environment variables required for attachment +export ROCP_TOOL_ATTACH=1 + +# Set output directory based on format +OUTPUT_SUBDIR="attachment-output" +EXPECTED_FILES=("${OUTPUT_FILENAME}_results.json" "${OUTPUT_FILENAME}_results.db") +OUTPUT_FORMAT="csv json rocpd" + +# Get path to marker file +MARKER_FILE="${OUTPUT_DIR}/attachment_test_application_complete" +rm -f ${MARKER_FILE} + +# Clean up any existing output +rm -rf ${OUTPUT_DIR}/${OUTPUT_SUBDIR} +mkdir -p ${OUTPUT_DIR}/${OUTPUT_SUBDIR} + +echo "Starting attachment test (${OUTPUT_FORMAT} format)..." + +# Start the test application in the background +echo "Launching test application: ${TEST_APP}" +LD_PRELOAD=${ROCPROF_PRELOAD} ${TEST_APP} & +APP_PID=$! + +# Wait a moment for the application to start +sleep 1 + +# Check if the application is still running +if ! kill -0 $APP_PID 2>/dev/null; then + echo "Test application failed to start or exited early" + exit 1 +fi + +echo "Test application started with PID: $APP_PID" + + +if [ ! -f "${ROCPROFV3}" ]; then + echo "Error: rocprofv3 not found at ${ROCPROFV3}" + kill $APP_PID 2>/dev/null + exit 1 +fi + +# First attachment +echo "First attachment: Attaching profiler to PID $APP_PID for 5 seconds (${OUTPUT_FORMAT} format)..." + + +# Run first rocprofv3 with --attach option +echo "About to launch first rocprofv3 process..." +LD_PRELOAD=${ROCPROF_PRELOAD} ${ROCPROFV3} --attach $APP_PID --attach-duration-msec 5000 -s -f ${OUTPUT_FORMAT} -d ${OUTPUT_DIR}/${OUTPUT_SUBDIR} -o ${OUTPUT_FILENAME:-out} & +FIRST_ROCPROF_PID=$! +ATTACH_PID=$FIRST_ROCPROF_PID +echo "First rocprofv3 PID: $FIRST_ROCPROF_PID" + +# Wait for the first attach process to complete +wait $ATTACH_PID +ATTACH_EXIT_CODE=$? + +if [ $ATTACH_EXIT_CODE -ne 0 ]; then + echo "First rocprofv3_attach ${OUTPUT_FORMAT} test failed with exit code $ATTACH_EXIT_CODE" + kill $APP_PID 2>/dev/null + exit 1 +fi + +echo "First ${OUTPUT_FORMAT} profiler detached successfully" + +# Check temp files created by first run +echo "=== TEMP FILES AFTER FIRST RUN ===" +echo "Looking for temp files with target PID pattern ($PPID-$APP_PID):" +ls -la ${OUTPUT_DIR}/.rocprofv3/*$PPID-$APP_PID* 2>/dev/null || echo "No files with target PID pattern" +echo "Looking for temp files with first tool PID pattern ($PPID-$FIRST_ROCPROF_PID):" +ls -la ${OUTPUT_DIR}/.rocprofv3/*$PPID-$FIRST_ROCPROF_PID* 2>/dev/null || echo "No files with first tool PID pattern" +echo "All temp files:" +ls -la ${OUTPUT_DIR}/.rocprofv3/ 2>/dev/null || echo "No temp files directory" +echo "MD5 checksums of temp files:" +if [ -d "${OUTPUT_DIR}/.rocprofv3" ] && [ "$(ls -A ${OUTPUT_DIR}/.rocprofv3 2>/dev/null)" ]; then + md5sum ${OUTPUT_DIR}/.rocprofv3/* 2>/dev/null || echo "No temp files to checksum" +else + echo "No temp files to checksum" +fi + +# Clear output files between attachments +echo "Clearing output files before second attachment..." +rm -rf ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/* + +# Check if the application is still running +if ! kill -0 $APP_PID 2>/dev/null; then + echo "Test application exited before second attachment" + exit 1 +fi + +# Second attachment +echo "Second attachment: Attaching profiler to PID $APP_PID for 5 seconds (${OUTPUT_FORMAT} format)..." + + +# Run second rocprofv3 with --attach option +echo "About to launch second rocprofv3 process..." +LD_PRELOAD=${ROCPROF_PRELOAD} ${ROCPROFV3} --attach $APP_PID --attach-duration-msec 5000 -s -f ${OUTPUT_FORMAT} -d ${OUTPUT_DIR}/${OUTPUT_SUBDIR} -o ${OUTPUT_FILENAME:-out} & +SECOND_ROCPROF_PID=$! +ATTACH_PID=$SECOND_ROCPROF_PID +echo "Second rocprofv3 PID: $SECOND_ROCPROF_PID" + +# Wait for the second attach process to complete +wait $ATTACH_PID +ATTACH_EXIT_CODE=$? + +if [ $ATTACH_EXIT_CODE -ne 0 ]; then + echo "Second rocprofv3_attach ${OUTPUT_FORMAT} test failed with exit code $ATTACH_EXIT_CODE" + kill $APP_PID 2>/dev/null + exit 1 +fi + +echo "Second ${OUTPUT_FORMAT} profiler detached successfully" + +# Check temp files created by second run +echo "=== TEMP FILES AFTER SECOND RUN ===" +echo "Looking for temp files with target PID pattern ($PPID-$APP_PID):" +ls -la ${OUTPUT_DIR}/.rocprofv3/*$PPID-$APP_PID* 2>/dev/null || echo "No files with target PID pattern" +echo "Looking for temp files with second tool PID pattern ($PPID-$SECOND_ROCPROF_PID):" +ls -la ${OUTPUT_DIR}/.rocprofv3/*$PPID-$SECOND_ROCPROF_PID* 2>/dev/null || echo "No files with second tool PID pattern" +echo "All temp files:" +ls -la ${OUTPUT_DIR}/.rocprofv3/ 2>/dev/null || echo "No temp files directory" +echo "MD5 checksums of temp files:" +if [ -d "${OUTPUT_DIR}/.rocprofv3" ] && [ "$(ls -A ${OUTPUT_DIR}/.rocprofv3 2>/dev/null)" ]; then + md5sum ${OUTPUT_DIR}/.rocprofv3/* 2>/dev/null || echo "No temp files to checksum" +else + echo "No temp files to checksum" +fi + +echo "=== PID COMPARISON SUMMARY ===" +echo "Target process PID: $APP_PID (constant)" +echo "Script PID: $$ (constant)" +echo "Script PPID: $PPID (constant)" +echo "First rocprofv3 PID: $FIRST_ROCPROF_PID" +echo "Second rocprofv3 PID: $SECOND_ROCPROF_PID" +echo "Expected mismatch: detach looks for $PPID-$APP_PID-* but finds $PPID-$SECOND_ROCPROF_PID-*" + +# Wait for the application to finish +echo "Waiting for application to complete..." +# Wait for the marker file to exist +until [ -f "$MARKER_FILE" ]; do + sleep 1 +done +APP_EXIT_CODE=$(cat "$MARKER_FILE") +rm -f ${MARKER_FILE} + + +if [ $APP_EXIT_CODE -ne 0 ]; then + echo "Test application failed with exit code $APP_EXIT_CODE" + exit 1 +fi + +echo "Test application completed successfully" + +# Files should be created directly in the expected location with the specified output name +echo "Checking for generated ${OUTPUT_FORMAT} output files..." +ls -la ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/ + +# Check if expected output files were created +# For CSV format, check if at least one CSV file was generated +CSV_COUNT=$(find ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/ -name "*.csv" | wc -l) +if [ $CSV_COUNT -eq 0 ]; then + echo "Error: No CSV files were generated" + exit 1 +else + echo "Found $CSV_COUNT CSV file(s)" +fi + +# For other formats, check specific expected files +for expected_file in "${EXPECTED_FILES[@]}"; do + if [ ! -f "${OUTPUT_DIR}/${OUTPUT_SUBDIR}/${expected_file}" ]; then + echo "Error: Expected output file ${OUTPUT_DIR}/${OUTPUT_SUBDIR}/${expected_file} not found" + exit 1 + fi +done + +echo "Reattachment ${OUTPUT_FORMAT} test completed successfully" +exit 0 diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/validate.py new file mode 100644 index 00000000000..cc75c358ce9 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/attachment/attach-twice/validate.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import pytest + + +def test_attachment_kernel_trace(kernel_input_data): + """Verify that kernel traces were captured during attachment.""" + + # We should have captured some kernel dispatches + assert len(kernel_input_data) > 0, "No kernel dispatches captured during attachment" + + # The test app launches a kernel called "simple_kernel" + kernel_names = [row["Kernel_Name"] for row in kernel_input_data] + + # Check that we captured the simple_kernel + simple_kernel_found = any("simple_kernel" in name for name in kernel_names) + assert ( + simple_kernel_found + ), f"Expected 'simple_kernel' not found in kernel names: {kernel_names}" + + # Verify basic kernel properties + for row in kernel_input_data: + if "simple_kernel" in row["Kernel_Name"]: + assert row["Kind"] == "KERNEL_DISPATCH" + assert int(row["Queue_Id"]) > 0 + assert int(row["Kernel_Id"]) > 0 + assert int(row["Correlation_Id"]) > 0 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + + # Verify kernel dimensions (from the test app) + assert int(row["Workgroup_Size_X"]) == 256 # threads_per_block + assert int(row["Workgroup_Size_Y"]) == 1 + assert int(row["Workgroup_Size_Z"]) == 1 + + assert int(row["Grid_Size_X"]) >= 1 + assert int(row["Grid_Size_Y"]) >= 1 + assert int(row["Grid_Size_Z"]) >= 1 + + +def test_attachment_memory_copy_trace(memory_copy_input_data): + """Verify that memory copy operations were captured during attachment.""" + + # We should have captured memory copies (HtoD and DtoH) + assert ( + len(memory_copy_input_data) > 0 + ), "No memory copy operations captured during attachment" + + host_to_device_count = 0 + device_to_host_count = 0 + + for row in memory_copy_input_data: + assert row["Kind"] == "MEMORY_COPY" + assert int(row["Correlation_Id"]) > 0 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + + # Count the direction of memory copies + if "MEMORY_COPY_HOST_TO_DEVICE" in row["Direction"] or "H2D" in row["Direction"]: + host_to_device_count += 1 + elif ( + "MEMORY_COPY_DEVICE_TO_HOST" in row["Direction"] or "D2H" in row["Direction"] + ): + device_to_host_count += 1 + + # We should have both H2D and D2H copies + assert host_to_device_count > 0, "No host-to-device memory copies captured" + assert device_to_host_count > 0, "No device-to-host memory copies captured" + + +def test_attachment_hsa_api_trace(hsa_input_data): + """Verify that HSA API calls were captured during attachment.""" + + # Should have some HSA API calls + assert len(hsa_input_data) > 0, "No HSA API calls captured during attachment" + + functions = [] + for row in hsa_input_data: + assert row["Domain"] in ( + "HSA_CORE_API", + "HSA_AMD_EXT_API", + "HSA_IMAGE_EXT_API", + "HSA_FINALIZE_EXT_API", + ) + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) > 0 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + functions.append(row["Function"]) + + assert any( + "memory" in func.lower() for func in functions + ), "No memory-related HSA functions captured" + + +def test_agent_info(agent_info_input_data): + """Verify agent information is captured correctly.""" + + assert len(agent_info_input_data) > 0, "No agent information captured" + + cpu_count = 0 + gpu_count = 0 + + for row in agent_info_input_data: + agent_type = row["Agent_Type"] + assert agent_type in ("CPU", "GPU") + + if agent_type == "CPU": + cpu_count += 1 + assert int(row["Cpu_Cores_Count"]) > 0 + assert int(row["Simd_Count"]) == 0 + assert int(row["Max_Waves_Per_Simd"]) == 0 + else: + gpu_count += 1 + assert int(row["Cpu_Cores_Count"]) == 0 + assert int(row["Simd_Count"]) > 0 + assert int(row["Max_Waves_Per_Simd"]) > 0 + + # Should have at least one GPU for the test + assert gpu_count > 0, "No GPU agents found" + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code)