Skip to content

Commit 5e28f59

Browse files
authored
[rocprofiler-systems] AMD SMI collector using policy-based design (#3703)
1 parent c42abea commit 5e28f59

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+9146
-3392
lines changed

projects/rocprofiler-systems/cmake/Packages.cmake

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,55 @@ endif()
266266

267267
target_link_libraries(rocprofiler-systems-rocm INTERFACE amd_smi)
268268

269+
# Detect AMD SMI library version from header
270+
set(_AMDSMI_HEADER "${ROCM_PATH}/include/amd_smi/amdsmi.h")
271+
if(EXISTS "${_AMDSMI_HEADER}")
272+
file(READ "${_AMDSMI_HEADER}" _AMDSMI_HEADER_CONTENTS)
273+
274+
string(
275+
REGEX MATCH
276+
"#define AMDSMI_LIB_VERSION_MAJOR ([0-9]+)"
277+
_
278+
"${_AMDSMI_HEADER_CONTENTS}"
279+
)
280+
set(ROCPROFSYS_AMDSMI_VERSION_MAJOR "${CMAKE_MATCH_1}")
281+
282+
string(
283+
REGEX MATCH
284+
"#define AMDSMI_LIB_VERSION_MINOR ([0-9]+)"
285+
_
286+
"${_AMDSMI_HEADER_CONTENTS}"
287+
)
288+
set(ROCPROFSYS_AMDSMI_VERSION_MINOR "${CMAKE_MATCH_1}")
289+
290+
message(
291+
STATUS
292+
"AMD SMI version detected: ${ROCPROFSYS_AMDSMI_VERSION_MAJOR}.${ROCPROFSYS_AMDSMI_VERSION_MINOR}"
293+
)
294+
endif()
295+
296+
# AINIC requires AMD SMI >= 26.3 AND ROCPROFSYS_USE_AINIC option
297+
set(ROCPROFSYS_BUILD_AINIC OFF CACHE INTERNAL "Build AINIC support")
298+
if(ROCPROFSYS_USE_AINIC)
299+
if(
300+
ROCPROFSYS_AMDSMI_VERSION_MAJOR GREATER 26
301+
OR (
302+
ROCPROFSYS_AMDSMI_VERSION_MAJOR EQUAL 26
303+
AND ROCPROFSYS_AMDSMI_VERSION_MINOR GREATER 2
304+
)
305+
)
306+
set(ROCPROFSYS_BUILD_AINIC ON CACHE INTERNAL "Build AINIC support" FORCE)
307+
message(STATUS "AINIC support enabled (AMD SMI >= 26.3)")
308+
else()
309+
message(
310+
STATUS
311+
"AINIC disabled: AMD SMI ${ROCPROFSYS_AMDSMI_VERSION_MAJOR}.${ROCPROFSYS_AMDSMI_VERSION_MINOR} < 26.3"
312+
)
313+
endif()
314+
else()
315+
message(STATUS "AINIC disabled: ROCPROFSYS_USE_AINIC is OFF")
316+
endif()
317+
269318
# ----------------------------------------------------------------------------------------#
270319
#
271320
# ROCpd

projects/rocprofiler-systems/source/lib/common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ target_sources(
2626
${CMAKE_CURRENT_SOURCE_DIR}/invoke.hpp
2727
${CMAKE_CURRENT_SOURCE_DIR}/join.hpp
2828
${CMAKE_CURRENT_SOURCE_DIR}/setup.hpp
29+
${CMAKE_CURRENT_SOURCE_DIR}/span.hpp
2930
${CMAKE_CURRENT_SOURCE_DIR}/traits.hpp
3031
${CMAKE_CURRENT_SOURCE_DIR}/md5sum.hpp
3132
${CMAKE_CURRENT_SOURCE_DIR}/static_object.hpp

projects/rocprofiler-systems/source/lib/core/perfetto.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ std::unique_ptr<::perfetto::TracingSession>& get_perfetto_session(
5353
template <typename Tp>
5454
struct perfetto_counter_track
5555
{
56-
using track_map_t = std::map<uint32_t, std::vector<::perfetto::CounterTrack>>;
57-
using name_map_t = std::map<uint32_t, std::vector<std::unique_ptr<std::string>>>;
58-
using data_t = std::pair<name_map_t, track_map_t>;
56+
using category_type = Tp;
57+
using track_map_t = std::map<uint32_t, std::vector<::perfetto::CounterTrack>>;
58+
using name_map_t = std::map<uint32_t, std::vector<std::unique_ptr<std::string>>>;
59+
using data_t = std::pair<name_map_t, track_map_t>;
5960

6061
static auto init() { (void) get_data(); }
6162
static auto exists(size_t _idx, int64_t _n = -1);

projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
#include "core/trace_cache/metadata_registry.hpp"
2727
#include "core/trace_cache/sample_type.hpp"
2828
#include "core/trace_cache/storage_parser.hpp"
29-
29+
#include "library/pmc/collectors/gpu/sample.hpp"
30+
#include "library/pmc/collectors/nic/sample.hpp"
3031
#include "library/runtime.hpp"
31-
3232
#include <memory>
3333
#include <unistd.h>
3434

@@ -40,8 +40,9 @@ namespace trace_cache
4040
using storage_parser_t =
4141
storage_parser<type_identifier_t, kernel_dispatch_sample, memory_copy_sample,
4242
memory_allocate_sample, region_sample, in_time_sample,
43-
pmc_event_with_sample, amd_smi_sample, cpu_freq_sample,
44-
backtrace_region_sample, scratch_memory_sample, ainic_sample>;
43+
pmc_event_with_sample, pmc::collectors::gpu::sample,
44+
pmc::collectors::nic::sample, cpu_freq_sample, backtrace_region_sample,
45+
scratch_memory_sample>;
4546

4647
using buffer_storage_t = buffer_storage<flush_worker_factory_t, type_identifier_t>;
4748

projects/rocprofiler-systems/source/lib/core/trace_cache/cache_type_traits.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#pragma once
2424
#include "common/span.hpp"
25+
#include <array>
2526
#include <cstdint>
2627
#include <optional>
2728
#include <string_view>
@@ -104,6 +105,17 @@ struct is_vector<std::vector<T>> : std::true_type
104105
template <typename T>
105106
inline constexpr bool is_vector_v = is_vector<T>::value;
106107

108+
template <typename T>
109+
struct is_array : std::false_type
110+
{};
111+
112+
template <typename T, size_t N>
113+
struct is_array<std::array<T, N>> : std::true_type
114+
{};
115+
116+
template <typename T>
117+
inline constexpr bool is_array_v = is_array<T>::value;
118+
107119
template <typename T>
108120
static constexpr bool is_string_view_v =
109121
std::is_same_v<std::decay_t<T>, std::string_view>;
@@ -122,7 +134,7 @@ inline constexpr bool is_optional_v = is_optional<T>::value;
122134
template <typename T>
123135
inline constexpr bool is_supported_type_v =
124136
is_span_v<T> || std::is_integral_v<T> || std::is_floating_point_v<T> ||
125-
is_string_view_v<T> || is_vector_v<T> || is_optional_v<T>;
137+
is_string_view_v<T> || is_vector_v<T> || is_optional_v<T> || is_array_v<T>;
126138

127139
template <typename T>
128140
struct is_enum_class

projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
#include <rocprofiler-sdk/callback_tracing.h>
3838
#include <rocprofiler-sdk/cxx/name_info.hpp>
3939
#include <set>
40-
#include <sstream>
40+
#include <spdlog/fmt/ranges.h>
4141
#include <stdint.h>
4242
#include <string.h>
4343
#include <string>
@@ -62,18 +62,6 @@ struct process
6262
uint32_t end;
6363
};
6464

65-
template <typename Category>
66-
inline std::string
67-
annotate_category(std::optional<int> first_section = std::nullopt,
68-
std::optional<int> second_section = std::nullopt)
69-
{
70-
std::stringstream ss;
71-
ss << std::string(tim::trait::name<Category>::value);
72-
if(first_section) ss << "_" << std::to_string(*first_section);
73-
if(second_section) ss << "_" << std::to_string(*second_section);
74-
return ss.str();
75-
}
76-
7765
struct pmc
7866
{
7967
agent_type type;
@@ -131,16 +119,12 @@ struct thread
131119

132120
template <typename Category>
133121
inline std::string
134-
annotate_with_device_id(uint32_t device_id,
135-
std::optional<int> first_section = std::nullopt,
136-
std::optional<int> second_section = std::nullopt)
122+
format_track_name(std::optional<int> first_section = std::nullopt,
123+
std::optional<int> second_section = std::nullopt)
137124
{
138-
std::stringstream ss;
139-
ss << std::string(tim::trait::name<Category>::value) + " [" +
140-
std::to_string(device_id) + "]";
141-
if(first_section) ss << "_" << std::to_string(*first_section);
142-
if(second_section) ss << "_" << std::to_string(*second_section);
143-
return ss.str();
125+
return fmt::format("{}{}{}", tim::trait::name<Category>::value,
126+
first_section ? fmt::format("_{}", *first_section) : "",
127+
second_section ? fmt::format("_{}", *second_section) : "");
144128
}
145129

146130
template <typename Category>

0 commit comments

Comments
 (0)