|
| 1 | +// MIT License |
| 2 | +// |
| 3 | +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. |
| 4 | +// |
| 5 | +// Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | +// of this software and associated documentation files (the "Software"), to deal |
| 7 | +// in the Software without restriction, including without limitation the rights |
| 8 | +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | +// copies of the Software, and to permit persons to whom the Software is |
| 10 | +// furnished to do so, subject to the following conditions: |
| 11 | +// |
| 12 | +// The above copyright notice and this permission notice shall be included in all |
| 13 | +// copies or substantial portions of the Software. |
| 14 | +// |
| 15 | +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | +// SOFTWARE. |
| 22 | + |
| 23 | +#include "lib/rocprofiler-sdk/spm/core.hpp" |
| 24 | +#include "lib/common/container/stable_vector.hpp" |
| 25 | +#include "lib/common/utility.hpp" |
| 26 | +#include "lib/rocprofiler-sdk/buffer.hpp" |
| 27 | +#include "lib/rocprofiler-sdk/context/context.hpp" |
| 28 | +#include "lib/rocprofiler-sdk/counters/metrics.hpp" |
| 29 | +#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" |
| 30 | +#include "lib/rocprofiler-sdk/internal_threading.hpp" |
| 31 | +#include "lib/rocprofiler-sdk/registration.hpp" |
| 32 | +#include "lib/rocprofiler-sdk/spm/dispatch_handlers.hpp" |
| 33 | + |
| 34 | +#include <hsa/hsa_api_trace.h> |
| 35 | +#include <rocprofiler-sdk/fwd.h> |
| 36 | +#include <rocprofiler-sdk/intercept_table.h> |
| 37 | +#include <rocprofiler-sdk/rocprofiler.h> |
| 38 | + |
| 39 | +#include <atomic> |
| 40 | +#include <cstdint> |
| 41 | +#include <mutex> |
| 42 | +#include <stdexcept> |
| 43 | +#include <string> |
| 44 | +#include <vector> |
| 45 | + |
| 46 | +#define CHECK_HSA(fn, message) \ |
| 47 | + { \ |
| 48 | + auto _status = (fn); \ |
| 49 | + if(_status != HSA_STATUS_SUCCESS) \ |
| 50 | + { \ |
| 51 | + ROCP_ERROR << "HSA Err: " << _status << '\n'; \ |
| 52 | + throw std::runtime_error(message); \ |
| 53 | + } \ |
| 54 | + } |
| 55 | + |
| 56 | +namespace rocprofiler |
| 57 | +{ |
| 58 | +namespace spm |
| 59 | +{ |
| 60 | +/** |
| 61 | + *This is a singleton class with lazy initialization |
| 62 | + */ |
| 63 | +class SpmCounterController |
| 64 | +{ |
| 65 | +public: |
| 66 | + SpmCounterController() = default; |
| 67 | + // Adds a counter collection profile to our global cache. |
| 68 | + // Note: these profiles can be used across multiple contexts |
| 69 | + // and are independent of the context. |
| 70 | + void spm_add_profile(std::shared_ptr<spm_counter_config>&& config); |
| 71 | + |
| 72 | + rocprofiler_status_t spm_destroy_profile(uint64_t id); |
| 73 | + // Setup the SPM counter collection service. spm_counter_callback_info is created here |
| 74 | + |
| 75 | + std::shared_ptr<spm_counter_config> get_profile_cfg(rocprofiler_spm_counter_config_id_t id); |
| 76 | + |
| 77 | +private: |
| 78 | + // Cache to contain the map of config id handle to spm counter config |
| 79 | + common::Synchronized<std::unordered_map<uint64_t, std::shared_ptr<spm_counter_config>>> |
| 80 | + _configs; |
| 81 | +}; |
| 82 | + |
| 83 | +SpmCounterController& |
| 84 | +spm_get_controller(); |
| 85 | + |
| 86 | +/** |
| 87 | + * @brief The functions checks if the `ROCPROFILER_SPM_BETA_ENABLED` is set. |
| 88 | + * If so, it will enable SPM service. Otherwise, the API is reported |
| 89 | + * as not implemented. |
| 90 | + * |
| 91 | + * The SPM is in experimental phase . |
| 92 | + By enabling the `ROCPROFILER_SPM_BETA_ENABLED`, |
| 93 | + * user accepts all consequences of using early implementation of SPM API. |
| 94 | + */ |
| 95 | +bool |
| 96 | +is_spm_explicitly_enabled() |
| 97 | +{ |
| 98 | + auto spm_sampling_enabled = rocprofiler::common::get_env("ROCPROFILER_SPM_BETA_ENABLED", false); |
| 99 | + |
| 100 | + if(!spm_sampling_enabled) |
| 101 | + ROCP_INFO << " SPM unavailable. The feature is implicitly disabled. " |
| 102 | + << "To use it on a supported architecture, " |
| 103 | + << "set ROCPROFILER_SPM_BETA_ENABLED=ON in the environment"; |
| 104 | + |
| 105 | + return spm_sampling_enabled; |
| 106 | +} |
| 107 | + |
| 108 | +/** |
| 109 | + * Adds a counter collection profile to our global cache. |
| 110 | + * Note: these profiles can be used across multiple contexts and are independent of the context. |
| 111 | + * Note: these profiles are per agent |
| 112 | + * Assigns the config id and increments the monotonic counter. |
| 113 | + */ |
| 114 | +void |
| 115 | +SpmCounterController::spm_add_profile(std::shared_ptr<spm_counter_config>&& config) |
| 116 | +{ |
| 117 | + static std::atomic<uint64_t> profile_val = 1; |
| 118 | + _configs.wlock([&](auto& data) { |
| 119 | + config->id = rocprofiler_spm_counter_config_id_t{.handle = profile_val}; |
| 120 | + data.emplace(profile_val, std::move(config)); |
| 121 | + profile_val++; |
| 122 | + }); |
| 123 | +} |
| 124 | + |
| 125 | +/** |
| 126 | + * @brief Removes the profile entry from the global cache |
| 127 | + */ |
| 128 | +rocprofiler_status_t |
| 129 | +SpmCounterController::spm_destroy_profile(uint64_t id) |
| 130 | +{ |
| 131 | + return _configs.wlock([&](auto& data) { |
| 132 | + if(data.erase(id) != 1) return ROCPROFILER_STATUS_ERROR; |
| 133 | + return ROCPROFILER_STATUS_SUCCESS; |
| 134 | + }); |
| 135 | +} |
| 136 | + |
| 137 | +/** |
| 138 | + * @brief Queries the global cache for the config using config id |
| 139 | + */ |
| 140 | +std::shared_ptr<spm_counter_config> |
| 141 | +SpmCounterController::get_profile_cfg(rocprofiler_spm_counter_config_id_t id) |
| 142 | +{ |
| 143 | + std::shared_ptr<spm_counter_config> cfg; |
| 144 | + _configs.rlock([&](const auto& map) { cfg = map.at(id.handle); }); |
| 145 | + return cfg; |
| 146 | +} |
| 147 | + |
| 148 | +rocprofiler_status_t |
| 149 | +destroy_spm_counter_profile(uint64_t id) |
| 150 | +{ |
| 151 | + return spm_get_controller().spm_destroy_profile(id); |
| 152 | +} |
| 153 | + |
| 154 | +SpmCounterController& |
| 155 | +spm_get_controller() |
| 156 | +{ |
| 157 | + static auto* controller = rocprofiler::common::static_object<SpmCounterController>::construct(); |
| 158 | + return *CHECK_NOTNULL(controller); |
| 159 | +} |
| 160 | + |
| 161 | +/** |
| 162 | + * @brief looks into the config's packet cache to re-use the packet |
| 163 | + * If not, constructs the packet using packet generator |
| 164 | + * updates packet_return map |
| 165 | + */ |
| 166 | +rocprofiler_status_t |
| 167 | +get_spm_packet(const std::shared_ptr<spm_counter_callback_info>& info, |
| 168 | + std::unique_ptr<rocprofiler::hsa::AQLPacket>& ret_pkt, |
| 169 | + std::shared_ptr<spm_counter_config>& profile) |
| 170 | +{ |
| 171 | + profile->packets.wlock([&](auto& pkt_vector) { |
| 172 | + if(!pkt_vector.empty()) |
| 173 | + { |
| 174 | + ret_pkt = std::move(pkt_vector.back()); |
| 175 | + pkt_vector.pop_back(); |
| 176 | + } |
| 177 | + }); |
| 178 | + |
| 179 | + if(!ret_pkt) |
| 180 | + { |
| 181 | + // If we do not have a packet in the cache, create one. |
| 182 | + ret_pkt = rocprofiler::aql::spm_construct_packet( |
| 183 | + profile->agent->id, |
| 184 | + std::vector<counters::Metric>{profile->metrics.begin(), profile->metrics.end()}, |
| 185 | + profile->sample_freq, |
| 186 | + profile->buffer_size, |
| 187 | + profile->timeout); |
| 188 | + }; |
| 189 | + |
| 190 | + ret_pkt->clear(); |
| 191 | + info->packet_return_map.wlock([&](auto& data) { data.emplace(ret_pkt.get(), profile); }); |
| 192 | + |
| 193 | + return ROCPROFILER_STATUS_SUCCESS; |
| 194 | +} |
| 195 | + |
| 196 | +/** @brief Creates spm the counter config |
| 197 | + * Checks if the input counters does not exceed hardware limit |
| 198 | + * Adds the config to configs cache |
| 199 | + */ |
| 200 | +rocprofiler_status_t |
| 201 | +create_spm_counter_profile(std::shared_ptr<spm_counter_config> config) |
| 202 | +{ |
| 203 | + auto status = ROCPROFILER_STATUS_SUCCESS; |
| 204 | + if(status = rocprofiler::aql::spm_can_collect(config->agent->id, config->metrics); |
| 205 | + status != ROCPROFILER_STATUS_SUCCESS) |
| 206 | + { |
| 207 | + return status; |
| 208 | + } |
| 209 | + |
| 210 | + spm_get_controller().spm_add_profile(std::move(config)); |
| 211 | + |
| 212 | + return status; |
| 213 | +} |
| 214 | + |
| 215 | +std::shared_ptr<spm_counter_config> |
| 216 | +get_spm_counter_config(rocprofiler_spm_counter_config_id_t id) |
| 217 | +{ |
| 218 | + try |
| 219 | + { |
| 220 | + return spm_get_controller().get_profile_cfg(id); |
| 221 | + } catch(std::out_of_range&) |
| 222 | + { |
| 223 | + return nullptr; |
| 224 | + } |
| 225 | +} |
| 226 | + |
| 227 | +/** @brief Configures SPM dispatch for the context |
| 228 | + * Checks for conflicting services |
| 229 | + * Instantiates spm_dispatch_counter_collection_service |
| 230 | + */ |
| 231 | + |
| 232 | +rocprofiler_status_t |
| 233 | +configure_callback_spm_dispatch(rocprofiler_context_id_t context_id, |
| 234 | + rocprofiler_spm_dispatch_counting_service_cb_t callback, |
| 235 | + void* callback_args, |
| 236 | + rocprofiler_spm_dispatch_counting_record_cb_t record_callback, |
| 237 | + void* record_callback_args) |
| 238 | +{ |
| 239 | + auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id); |
| 240 | + if(!ctx_p) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID; |
| 241 | + |
| 242 | + auto& ctx = *ctx_p; |
| 243 | + |
| 244 | + // FIXME: Due to the clock gating issue, counter collection and PC sampling service |
| 245 | + // cannot coexist in the same context for now. |
| 246 | + if(ctx.pc_sampler) return ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT; |
| 247 | + if(ctx.counter_collection) return ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT; |
| 248 | + if(ctx.device_counter_collection) return ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT; |
| 249 | + if(!ctx.dispatch_spm) |
| 250 | + ctx.dispatch_spm = |
| 251 | + std::make_unique<rocprofiler::context::spm_dispatch_counter_collection_service>(); |
| 252 | + auto& cb = *ctx.dispatch_spm->callbacks.emplace_back( |
| 253 | + std::make_shared<rocprofiler::spm::spm_counter_callback_info>()); |
| 254 | + |
| 255 | + cb.user_cb = callback; |
| 256 | + cb.callback_args = callback_args; |
| 257 | + cb.context = context_id; |
| 258 | + cb.record_callback = record_callback; |
| 259 | + cb.record_callback_args = record_callback_args; |
| 260 | + cb.internal_context = ctx_p; |
| 261 | + |
| 262 | + return ROCPROFILER_STATUS_SUCCESS; |
| 263 | +} |
| 264 | + |
| 265 | +/** @brief start SPM dispatch context |
| 266 | + * Enables serialization |
| 267 | + * Returns if callback has already been added by checking the queue id |
| 268 | + * Adds a pre kernel and a post kernel callback |
| 269 | + * Enabled flag is used to check if context has already been enabled |
| 270 | + */ |
| 271 | + |
| 272 | +rocprofiler_status_t |
| 273 | +start_context(const context::context* ctx) |
| 274 | +{ |
| 275 | + if(!ctx || !ctx->dispatch_spm) return ROCPROFILER_STATUS_ERROR; |
| 276 | + |
| 277 | + auto* controller = hsa::get_queue_controller(); |
| 278 | + |
| 279 | + bool already_enabled = true; |
| 280 | + CHECK_NOTNULL(controller)->enable_serialization(); |
| 281 | + ctx->dispatch_spm->enabled.wlock([&](auto& enabled) { |
| 282 | + if(enabled) return; |
| 283 | + already_enabled = false; |
| 284 | + enabled = true; |
| 285 | + }); |
| 286 | + |
| 287 | + if(!already_enabled) |
| 288 | + { |
| 289 | + // Insert our callbacks into HSA Interceptor. This |
| 290 | + // turns on counter instrumentation. |
| 291 | + for(auto& cb : ctx->dispatch_spm->callbacks) |
| 292 | + { |
| 293 | + if(cb->queue_id != rocprofiler::hsa::ClientID{-1}) continue; |
| 294 | + cb->queue_id = controller->add_callback( |
| 295 | + std::nullopt, |
| 296 | + [=](const hsa::Queue& q, |
| 297 | + const hsa::rocprofiler_packet& kern_pkt, |
| 298 | + rocprofiler_kernel_id_t kernel_id, |
| 299 | + rocprofiler_dispatch_id_t dispatch_id, |
| 300 | + rocprofiler_user_data_t* user_data, |
| 301 | + const hsa::Queue::queue_info_session_t::external_corr_id_map_t& extern_corr_ids, |
| 302 | + const context::correlation_id* correlation_id) { |
| 303 | + return pre_kernel_call(ctx, |
| 304 | + cb, |
| 305 | + q, |
| 306 | + kern_pkt, |
| 307 | + kernel_id, |
| 308 | + dispatch_id, |
| 309 | + user_data, |
| 310 | + extern_corr_ids, |
| 311 | + correlation_id); |
| 312 | + }, |
| 313 | + // Completion CB |
| 314 | + [=](const hsa::Queue& /* q */, |
| 315 | + hsa::rocprofiler_packet /* kern_pkt */, |
| 316 | + std::shared_ptr<hsa::Queue::queue_info_session_t>& session, |
| 317 | + inst_pkt_t& aql, |
| 318 | + kernel_dispatch::profiling_time dispatch_time) { |
| 319 | + post_kernel_call(ctx, cb, session, aql, dispatch_time); |
| 320 | + }); |
| 321 | + } |
| 322 | + } |
| 323 | + |
| 324 | + return ROCPROFILER_STATUS_SUCCESS; |
| 325 | +} |
| 326 | + |
| 327 | +/** @brief stop SPM dispatch context |
| 328 | + * Disables serialization |
| 329 | + * Sets Enabled flag to false |
| 330 | + */ |
| 331 | + |
| 332 | +void |
| 333 | +stop_context(const context::context* ctx) |
| 334 | +{ |
| 335 | + if(!ctx || !ctx->dispatch_spm) return; |
| 336 | + |
| 337 | + auto* controller = hsa::get_queue_controller(); |
| 338 | + |
| 339 | + ctx->dispatch_spm->enabled.wlock([&](auto& enabled) { |
| 340 | + if(!enabled) return; |
| 341 | + enabled = false; |
| 342 | + }); |
| 343 | + |
| 344 | + if(controller) controller->disable_serialization(); |
| 345 | +} |
| 346 | + |
| 347 | +} // namespace spm |
| 348 | + |
| 349 | +} // namespace rocprofiler |
0 commit comments