Skip to content

Commit 63509bc

Browse files
committed
Add SPM core library implementation
1 parent 19a2714 commit 63509bc

File tree

9 files changed

+1367
-0
lines changed

9 files changed

+1367
-0
lines changed

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ add_subdirectory(aql)
4848
add_subdirectory(pc_sampling)
4949
add_subdirectory(marker)
5050
add_subdirectory(thread_trace)
51+
add_subdirectory(spm)
5152
add_subdirectory(tracing)
5253
add_subdirectory(kernel_dispatch)
5354
add_subdirectory(kfd)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
set(ROCPROFILER_LIB_SPM_SOURCES core.cpp service.cpp decode.cpp dlsym.cpp
2+
dispatch_handlers.cpp)
3+
set(ROCPROFILER_LIB_SPM_HEADERS core.hpp dlsym.hpp dispatch_handlers.hpp)
4+
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_SPM_SOURCES}
5+
${ROCPROFILER_LIB_SPM_HEADERS})
6+
if(ROCPROFILER_BUILD_TESTS)
7+
add_subdirectory(tests)
8+
endif()
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
// MIT License
2+
//
3+
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
4+
//
5+
// Permission is hereby granted, free of charge, to any person obtaining a copy
6+
// of this software and associated documentation files (the "Software"), to deal
7+
// in the Software without restriction, including without limitation the rights
8+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
// copies of the Software, and to permit persons to whom the Software is
10+
// furnished to do so, subject to the following conditions:
11+
//
12+
// The above copyright notice and this permission notice shall be included in all
13+
// copies or substantial portions of the Software.
14+
//
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
// SOFTWARE.
22+
23+
#include "lib/rocprofiler-sdk/spm/core.hpp"
24+
#include "lib/common/container/stable_vector.hpp"
25+
#include "lib/common/utility.hpp"
26+
#include "lib/rocprofiler-sdk/buffer.hpp"
27+
#include "lib/rocprofiler-sdk/context/context.hpp"
28+
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
29+
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
30+
#include "lib/rocprofiler-sdk/internal_threading.hpp"
31+
#include "lib/rocprofiler-sdk/registration.hpp"
32+
#include "lib/rocprofiler-sdk/spm/dispatch_handlers.hpp"
33+
34+
#include <hsa/hsa_api_trace.h>
35+
#include <rocprofiler-sdk/fwd.h>
36+
#include <rocprofiler-sdk/intercept_table.h>
37+
#include <rocprofiler-sdk/rocprofiler.h>
38+
39+
#include <atomic>
40+
#include <cstdint>
41+
#include <mutex>
42+
#include <stdexcept>
43+
#include <string>
44+
#include <vector>
45+
46+
#define CHECK_HSA(fn, message) \
47+
{ \
48+
auto _status = (fn); \
49+
if(_status != HSA_STATUS_SUCCESS) \
50+
{ \
51+
ROCP_ERROR << "HSA Err: " << _status << '\n'; \
52+
throw std::runtime_error(message); \
53+
} \
54+
}
55+
56+
namespace rocprofiler
57+
{
58+
namespace spm
59+
{
60+
/**
61+
*This is a singleton class with lazy initialization
62+
*/
63+
class SpmCounterController
64+
{
65+
public:
66+
SpmCounterController() = default;
67+
// Adds a counter collection profile to our global cache.
68+
// Note: these profiles can be used across multiple contexts
69+
// and are independent of the context.
70+
void spm_add_profile(std::shared_ptr<spm_counter_config>&& config);
71+
72+
rocprofiler_status_t spm_destroy_profile(uint64_t id);
73+
// Setup the SPM counter collection service. spm_counter_callback_info is created here
74+
75+
std::shared_ptr<spm_counter_config> get_profile_cfg(rocprofiler_spm_counter_config_id_t id);
76+
77+
private:
78+
// Cache to contain the map of config id handle to spm counter config
79+
common::Synchronized<std::unordered_map<uint64_t, std::shared_ptr<spm_counter_config>>>
80+
_configs;
81+
};
82+
83+
SpmCounterController&
84+
spm_get_controller();
85+
86+
/**
87+
* @brief The functions checks if the `ROCPROFILER_SPM_BETA_ENABLED` is set.
88+
* If so, it will enable SPM service. Otherwise, the API is reported
89+
* as not implemented.
90+
*
91+
* The SPM is in experimental phase .
92+
By enabling the `ROCPROFILER_SPM_BETA_ENABLED`,
93+
* user accepts all consequences of using early implementation of SPM API.
94+
*/
95+
bool
96+
is_spm_explicitly_enabled()
97+
{
98+
auto spm_sampling_enabled = rocprofiler::common::get_env("ROCPROFILER_SPM_BETA_ENABLED", false);
99+
100+
if(!spm_sampling_enabled)
101+
ROCP_INFO << " SPM unavailable. The feature is implicitly disabled. "
102+
<< "To use it on a supported architecture, "
103+
<< "set ROCPROFILER_SPM_BETA_ENABLED=ON in the environment";
104+
105+
return spm_sampling_enabled;
106+
}
107+
108+
/**
109+
* Adds a counter collection profile to our global cache.
110+
* Note: these profiles can be used across multiple contexts and are independent of the context.
111+
* Note: these profiles are per agent
112+
* Assigns the config id and increments the monotonic counter.
113+
*/
114+
void
115+
SpmCounterController::spm_add_profile(std::shared_ptr<spm_counter_config>&& config)
116+
{
117+
static std::atomic<uint64_t> profile_val = 1;
118+
_configs.wlock([&](auto& data) {
119+
config->id = rocprofiler_spm_counter_config_id_t{.handle = profile_val};
120+
data.emplace(profile_val, std::move(config));
121+
profile_val++;
122+
});
123+
}
124+
125+
/**
126+
* @brief Removes the profile entry from the global cache
127+
*/
128+
rocprofiler_status_t
129+
SpmCounterController::spm_destroy_profile(uint64_t id)
130+
{
131+
return _configs.wlock([&](auto& data) {
132+
if(data.erase(id) != 1) return ROCPROFILER_STATUS_ERROR;
133+
return ROCPROFILER_STATUS_SUCCESS;
134+
});
135+
}
136+
137+
/**
138+
* @brief Queries the global cache for the config using config id
139+
*/
140+
std::shared_ptr<spm_counter_config>
141+
SpmCounterController::get_profile_cfg(rocprofiler_spm_counter_config_id_t id)
142+
{
143+
std::shared_ptr<spm_counter_config> cfg;
144+
_configs.rlock([&](const auto& map) { cfg = map.at(id.handle); });
145+
return cfg;
146+
}
147+
148+
rocprofiler_status_t
149+
destroy_spm_counter_profile(uint64_t id)
150+
{
151+
return spm_get_controller().spm_destroy_profile(id);
152+
}
153+
154+
SpmCounterController&
155+
spm_get_controller()
156+
{
157+
static auto* controller = rocprofiler::common::static_object<SpmCounterController>::construct();
158+
return *CHECK_NOTNULL(controller);
159+
}
160+
161+
/**
162+
* @brief looks into the config's packet cache to re-use the packet
163+
* If not, constructs the packet using packet generator
164+
* updates packet_return map
165+
*/
166+
rocprofiler_status_t
167+
get_spm_packet(const std::shared_ptr<spm_counter_callback_info>& info,
168+
std::unique_ptr<rocprofiler::hsa::AQLPacket>& ret_pkt,
169+
std::shared_ptr<spm_counter_config>& profile)
170+
{
171+
profile->packets.wlock([&](auto& pkt_vector) {
172+
if(!pkt_vector.empty())
173+
{
174+
ret_pkt = std::move(pkt_vector.back());
175+
pkt_vector.pop_back();
176+
}
177+
});
178+
179+
if(!ret_pkt)
180+
{
181+
// If we do not have a packet in the cache, create one.
182+
ret_pkt = rocprofiler::aql::spm_construct_packet(
183+
profile->agent->id,
184+
std::vector<counters::Metric>{profile->metrics.begin(), profile->metrics.end()},
185+
profile->sample_freq,
186+
profile->buffer_size,
187+
profile->timeout);
188+
};
189+
190+
ret_pkt->clear();
191+
info->packet_return_map.wlock([&](auto& data) { data.emplace(ret_pkt.get(), profile); });
192+
193+
return ROCPROFILER_STATUS_SUCCESS;
194+
}
195+
196+
/** @brief Creates spm the counter config
197+
* Checks if the input counters does not exceed hardware limit
198+
* Adds the config to configs cache
199+
*/
200+
rocprofiler_status_t
201+
create_spm_counter_profile(std::shared_ptr<spm_counter_config> config)
202+
{
203+
auto status = ROCPROFILER_STATUS_SUCCESS;
204+
if(status = rocprofiler::aql::spm_can_collect(config->agent->id, config->metrics);
205+
status != ROCPROFILER_STATUS_SUCCESS)
206+
{
207+
return status;
208+
}
209+
210+
spm_get_controller().spm_add_profile(std::move(config));
211+
212+
return status;
213+
}
214+
215+
std::shared_ptr<spm_counter_config>
216+
get_spm_counter_config(rocprofiler_spm_counter_config_id_t id)
217+
{
218+
try
219+
{
220+
return spm_get_controller().get_profile_cfg(id);
221+
} catch(std::out_of_range&)
222+
{
223+
return nullptr;
224+
}
225+
}
226+
227+
/** @brief Configures SPM dispatch for the context
228+
* Checks for conflicting services
229+
* Instantiates spm_dispatch_counter_collection_service
230+
*/
231+
232+
rocprofiler_status_t
233+
configure_callback_spm_dispatch(rocprofiler_context_id_t context_id,
234+
rocprofiler_spm_dispatch_counting_service_cb_t callback,
235+
void* callback_args,
236+
rocprofiler_spm_dispatch_counting_record_cb_t record_callback,
237+
void* record_callback_args)
238+
{
239+
auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id);
240+
if(!ctx_p) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
241+
242+
auto& ctx = *ctx_p;
243+
244+
// FIXME: Due to the clock gating issue, counter collection and PC sampling service
245+
// cannot coexist in the same context for now.
246+
if(ctx.pc_sampler) return ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT;
247+
if(ctx.counter_collection) return ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT;
248+
if(ctx.device_counter_collection) return ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT;
249+
if(!ctx.dispatch_spm)
250+
ctx.dispatch_spm =
251+
std::make_unique<rocprofiler::context::spm_dispatch_counter_collection_service>();
252+
auto& cb = *ctx.dispatch_spm->callbacks.emplace_back(
253+
std::make_shared<rocprofiler::spm::spm_counter_callback_info>());
254+
255+
cb.user_cb = callback;
256+
cb.callback_args = callback_args;
257+
cb.context = context_id;
258+
cb.record_callback = record_callback;
259+
cb.record_callback_args = record_callback_args;
260+
cb.internal_context = ctx_p;
261+
262+
return ROCPROFILER_STATUS_SUCCESS;
263+
}
264+
265+
/** @brief start SPM dispatch context
266+
* Enables serialization
267+
* Returns if callback has already been added by checking the queue id
268+
* Adds a pre kernel and a post kernel callback
269+
* Enabled flag is used to check if context has already been enabled
270+
*/
271+
272+
rocprofiler_status_t
273+
start_context(const context::context* ctx)
274+
{
275+
if(!ctx || !ctx->dispatch_spm) return ROCPROFILER_STATUS_ERROR;
276+
277+
auto* controller = hsa::get_queue_controller();
278+
279+
bool already_enabled = true;
280+
CHECK_NOTNULL(controller)->enable_serialization();
281+
ctx->dispatch_spm->enabled.wlock([&](auto& enabled) {
282+
if(enabled) return;
283+
already_enabled = false;
284+
enabled = true;
285+
});
286+
287+
if(!already_enabled)
288+
{
289+
// Insert our callbacks into HSA Interceptor. This
290+
// turns on counter instrumentation.
291+
for(auto& cb : ctx->dispatch_spm->callbacks)
292+
{
293+
if(cb->queue_id != rocprofiler::hsa::ClientID{-1}) continue;
294+
cb->queue_id = controller->add_callback(
295+
std::nullopt,
296+
[=](const hsa::Queue& q,
297+
const hsa::rocprofiler_packet& kern_pkt,
298+
rocprofiler_kernel_id_t kernel_id,
299+
rocprofiler_dispatch_id_t dispatch_id,
300+
rocprofiler_user_data_t* user_data,
301+
const hsa::Queue::queue_info_session_t::external_corr_id_map_t& extern_corr_ids,
302+
const context::correlation_id* correlation_id) {
303+
return pre_kernel_call(ctx,
304+
cb,
305+
q,
306+
kern_pkt,
307+
kernel_id,
308+
dispatch_id,
309+
user_data,
310+
extern_corr_ids,
311+
correlation_id);
312+
},
313+
// Completion CB
314+
[=](const hsa::Queue& /* q */,
315+
hsa::rocprofiler_packet /* kern_pkt */,
316+
std::shared_ptr<hsa::Queue::queue_info_session_t>& session,
317+
inst_pkt_t& aql,
318+
kernel_dispatch::profiling_time dispatch_time) {
319+
post_kernel_call(ctx, cb, session, aql, dispatch_time);
320+
});
321+
}
322+
}
323+
324+
return ROCPROFILER_STATUS_SUCCESS;
325+
}
326+
327+
/** @brief stop SPM dispatch context
328+
* Disables serialization
329+
* Sets Enabled flag to false
330+
*/
331+
332+
void
333+
stop_context(const context::context* ctx)
334+
{
335+
if(!ctx || !ctx->dispatch_spm) return;
336+
337+
auto* controller = hsa::get_queue_controller();
338+
339+
ctx->dispatch_spm->enabled.wlock([&](auto& enabled) {
340+
if(!enabled) return;
341+
enabled = false;
342+
});
343+
344+
if(controller) controller->disable_serialization();
345+
}
346+
347+
} // namespace spm
348+
349+
} // namespace rocprofiler

0 commit comments

Comments
 (0)