Skip to content

Commit 3710324

Browse files
committed
fix(pmc): improve performance and fix CMake for GPU/NIC collectors
- Replace std::stringstream with fmt::format in types.hpp - Use std::vector instead of std::unordered_map for SDMA states - Template sample() callback to avoid std::function overhead - Fix CMake include dirs and link GPU/NIC tests to unit test target
1 parent 7e8546c commit 3710324

File tree

8 files changed

+76
-83
lines changed

8 files changed

+76
-83
lines changed

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ target_link_libraries(
3030
PRIVATE
3131
rocprofiler-systems-core-library
3232
rocprofiler-systems::rocprofiler-systems-interface-library
33+
rocprofiler-systems::rocprofiler-systems-rocm
3334
)

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/collectors/base/collector.hpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#include <algorithm>
1010
#include <cassert>
11-
#include <functional>
1211
#include <limits>
1312
#include <memory>
1413
#include <stdexcept>
@@ -23,10 +22,6 @@ namespace collectors
2322
namespace base
2423
{
2524

26-
#if ROCPROFSYS_USE_ROCM > 0
27-
28-
using get_timestamp_t = std::function<unsigned long()>;
29-
3025
/**
3126
* @brief Generic collector template for device performance monitoring.
3227
*
@@ -137,9 +132,11 @@ struct collector
137132
* via the cache API and optionally Perfetto. Devices that fail to read metrics
138133
* are automatically disabled and removed from the device list.
139134
*
135+
* @tparam GetTimestamp Callable returning uint64_t timestamp (avoids std::function overhead)
140136
* @param get_timestamp Function to retrieve the current timestamp for the sample.
141137
*/
142-
void sample(const get_timestamp_t& get_timestamp)
138+
template <typename GetTimestamp>
139+
void sample(GetTimestamp&& get_timestamp)
143140
{
144141
auto new_end = std::remove_if(
145142
m_device_entries.begin(), m_device_entries.end(),
@@ -231,7 +228,6 @@ struct collector
231228
enabled_metrics_t m_enabled_metrics; ///< Enabled metrics
232229
};
233230

234-
#endif // ROCPROFSYS_USE_ROCM > 0
235231

236232
} // namespace base
237233
} // namespace collectors

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/collectors/gpu/collector.hpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@
99

1010
#include <algorithm>
1111
#include <cassert>
12-
#include <functional>
1312
#include <limits>
1413
#include <memory>
15-
#include <unordered_map>
14+
#include <vector>
1615

1716
namespace rocprofsys
1817
{
@@ -23,15 +22,12 @@ namespace collectors
2322
namespace gpu
2423
{
2524

26-
#if ROCPROFSYS_USE_ROCM > 0
2725

2826
using ::rocprofsys::pmc::device_filter;
2927
using ::rocprofsys::pmc::device_selection_mode;
3028
using ::rocprofsys::pmc::collectors::gpu::check_status;
3129
using ::rocprofsys::pmc::collectors::gpu::enabled_metrics;
3230

33-
using gettimestamp_t = std::function<unsigned long()>;
34-
3531
/**
3632
* @brief GPU metrics collector for performance monitoring.
3733
*
@@ -158,9 +154,10 @@ struct collector
158154
return false; // Keep device
159155
} catch(const std::runtime_error& e)
160156
{
161-
LOG_ERROR("Reading metrics failed for device with ID %zu. Error: %s. "
162-
"Disabling device!",
163-
device->get_index(), e.what());
157+
LOG_ERROR(
158+
"Reading metrics failed for GPU device {}. Error: {}. "
159+
"Disabling device!",
160+
device->get_index(), e.what());
164161
return true; // Remove device
165162
}
166163
});
@@ -292,6 +289,12 @@ struct collector
292289
auto _device_id = device->get_index();
293290
uint64_t current_cumulative = device->get_raw_sdma_usage();
294291

292+
// Ensure vector is sized to accommodate device index
293+
if(_device_id >= m_sdma_states.size())
294+
{
295+
m_sdma_states.resize(_device_id + 1);
296+
}
297+
295298
auto& state = m_sdma_states[_device_id];
296299
if(state.has_prev && timestamp > static_cast<int64_t>(state.prev_timestamp))
297300
{
@@ -320,14 +323,12 @@ struct collector
320323
bool has_prev = false;
321324
};
322325

323-
device_vector_t m_gpu_devices; ///< List of enabled GPU devices for sampling
326+
device_vector_t m_gpu_devices; ///< Enabled GPU devices for sampling
324327
std::shared_ptr<device_provider> m_device_provider; ///< Device provider instance
325-
enabled_metrics m_enabled_metrics; ///< Set of metrics enabled for collection
326-
std::unordered_map<size_t, sdma_state>
327-
m_sdma_states; ///< Per-device SDMA delta tracking
328+
enabled_metrics m_enabled_metrics; ///< Metrics enabled for collection
329+
std::vector<sdma_state> m_sdma_states; ///< Per-device SDMA delta tracking
328330
};
329331

330-
#endif // ROCPROFSYS_USE_ROCM > 0
331332

332333
} // namespace gpu
333334
} // namespace collectors

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/collectors/gpu/device.hpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,15 @@
66
#include "library/pmc/collectors/gpu/types.hpp"
77
#include "logger/debug.hpp"
88

9+
#include <amd_smi/amdsmi.h>
10+
911
#include <algorithm>
1012
#include <cstdint>
1113
#include <limits>
1214
#include <memory>
1315
#include <string>
1416
#include <vector>
1517

16-
#if ROCPROFSYS_USE_ROCM > 0
17-
# include "core/amd_smi.hpp"
18-
#endif
19-
20-
#if ROCPROFSYS_USE_ROCM > 0
21-
# include <amd_smi/amdsmi.h>
22-
#endif
23-
2418
namespace rocprofsys
2519
{
2620
namespace pmc
@@ -30,8 +24,6 @@ namespace collectors
3024
namespace gpu
3125
{
3226

33-
#if ROCPROFSYS_USE_ROCM > 0
34-
3527
using ::rocprofsys::pmc::collectors::gpu::enabled_metrics;
3628
using ::rocprofsys::pmc::collectors::gpu::metrics;
3729

@@ -417,7 +409,6 @@ class device
417409
bool m_is_supported = false;
418410
};
419411

420-
#endif // ROCPROFSYS_USE_ROCM > 0
421412

422413
} // namespace gpu
423414
} // namespace collectors

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/collectors/gpu/tests/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,13 @@ target_link_libraries(
3131
rocprofiler-systems-pmc-library
3232
rocprofiler-systems-googletest-library
3333
rocprofiler-systems-logger
34+
rocprofiler-systems::rocprofiler-systems-rocm
3435
)
3536

3637
target_include_directories(
3738
pmc-gpu-collector-tests
38-
PRIVATE ${PROJECT_SOURCE_DIR}/source/lib ${PROJECT_SOURCE_DIR}/source/lib/rocprof-sys
39+
PRIVATE
40+
${PROJECT_SOURCE_DIR}/source/lib
41+
${PROJECT_SOURCE_DIR}/source/lib/rocprof-sys
42+
${PROJECT_BINARY_DIR}/source/lib
3943
)

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/collectors/gpu/types.hpp

Lines changed: 43 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,15 @@
55

66
#include "library/pmc/common/types.hpp"
77

8+
#include <spdlog/fmt/fmt.h>
9+
10+
#include <array>
811
#include <cstdint>
9-
#include <sstream>
12+
#include <set>
13+
#include <stdexcept>
1014
#include <string>
1115

12-
#if ROCPROFSYS_USE_ROCM > 0
13-
# include <array>
14-
# include <set>
15-
# include <stdexcept>
16-
17-
# include <amd_smi/amdsmi.h>
18-
#endif
16+
#include <amd_smi/amdsmi.h>
1917

2018
namespace rocprofsys
2119
{
@@ -77,46 +75,47 @@ union enabled_metrics
7775
inline std::string
7876
to_string(const enabled_metrics& metrics)
7977
{
80-
std::stringstream ss;
81-
ss << "[SMI enabled metrics] ";
82-
ss << "Current socket power: " << metrics.bits.current_socket_power
83-
<< ", Average socket power: " << metrics.bits.average_socket_power
84-
<< ", Memory usage: " << static_cast<bool>(metrics.bits.memory_usage)
85-
<< ", Hotspot temperature: " << static_cast<bool>(metrics.bits.hotspot_temperature)
86-
<< ", Edge temperature: " << static_cast<bool>(metrics.bits.edge_temperature)
87-
<< ", GFX activity: " << static_cast<bool>(metrics.bits.gfx_activity)
88-
<< ", UMC activity: " << static_cast<bool>(metrics.bits.umc_activity)
89-
<< ", MM activity: " << static_cast<bool>(metrics.bits.mm_activity)
90-
<< ", VCN activity: " << static_cast<bool>(metrics.bits.vcn_activity)
91-
<< ", JPEG activity: " << static_cast<bool>(metrics.bits.jpeg_activity)
92-
<< ", VCN busy: " << static_cast<bool>(metrics.bits.vcn_busy)
93-
<< ", JPEG busy: " << static_cast<bool>(metrics.bits.jpeg_busy)
94-
<< ", XGMI: " << static_cast<bool>(metrics.bits.xgmi)
95-
<< ", PCIE: " << static_cast<bool>(metrics.bits.pcie)
96-
<< ", SDMA: " << static_cast<bool>(metrics.bits.sdma_usage) << "\n";
97-
return ss.str();
78+
return fmt::format(
79+
"[SMI enabled metrics] Current socket power: {}, Average socket power: {}, "
80+
"Memory usage: {}, Hotspot temperature: {}, Edge temperature: {}, "
81+
"GFX activity: {}, UMC activity: {}, MM activity: {}, "
82+
"VCN activity: {}, JPEG activity: {}, VCN busy: {}, JPEG busy: {}, "
83+
"XGMI: {}, PCIE: {}, SDMA: {}\n",
84+
static_cast<bool>(metrics.bits.current_socket_power),
85+
static_cast<bool>(metrics.bits.average_socket_power),
86+
static_cast<bool>(metrics.bits.memory_usage),
87+
static_cast<bool>(metrics.bits.hotspot_temperature),
88+
static_cast<bool>(metrics.bits.edge_temperature),
89+
static_cast<bool>(metrics.bits.gfx_activity),
90+
static_cast<bool>(metrics.bits.umc_activity),
91+
static_cast<bool>(metrics.bits.mm_activity),
92+
static_cast<bool>(metrics.bits.vcn_activity),
93+
static_cast<bool>(metrics.bits.jpeg_activity),
94+
static_cast<bool>(metrics.bits.vcn_busy),
95+
static_cast<bool>(metrics.bits.jpeg_busy),
96+
static_cast<bool>(metrics.bits.xgmi),
97+
static_cast<bool>(metrics.bits.pcie),
98+
static_cast<bool>(metrics.bits.sdma_usage));
9899
}
99100

100-
#if ROCPROFSYS_USE_ROCM > 0
101-
102101
// Ensure we have the correct max values defined
103-
# ifdef AMDSMI_MAX_NUM_JPEG_ENG_V1
104-
# define ROCPROFSYS_MAX_NUM_JPEG_ENGINES AMDSMI_MAX_NUM_JPEG_ENG_V1
105-
# else
106-
# define ROCPROFSYS_MAX_NUM_JPEG_ENGINES 40
107-
# endif
102+
#ifdef AMDSMI_MAX_NUM_JPEG_ENG_V1
103+
# define ROCPROFSYS_MAX_NUM_JPEG_ENGINES AMDSMI_MAX_NUM_JPEG_ENG_V1
104+
#else
105+
# define ROCPROFSYS_MAX_NUM_JPEG_ENGINES 40
106+
#endif
108107

109-
# ifndef AMDSMI_MAX_NUM_VCN
110-
# define AMDSMI_MAX_NUM_VCN 4
111-
# endif
108+
#ifndef AMDSMI_MAX_NUM_VCN
109+
# define AMDSMI_MAX_NUM_VCN 4
110+
#endif
112111

113-
# ifndef AMDSMI_MAX_NUM_JPEG
114-
# define AMDSMI_MAX_NUM_JPEG 32
115-
# endif
112+
#ifndef AMDSMI_MAX_NUM_JPEG
113+
# define AMDSMI_MAX_NUM_JPEG 32
114+
#endif
116115

117-
# ifndef AMDSMI_MAX_NUM_XCP
118-
# define AMDSMI_MAX_NUM_XCP 8
119-
# endif
116+
#ifndef AMDSMI_MAX_NUM_XCP
117+
# define AMDSMI_MAX_NUM_XCP 8
118+
#endif
120119

121120
struct metrics
122121
{
@@ -178,14 +177,11 @@ check_status(amdsmi_status_t status, const char* error_message)
178177
{
179178
if(status != AMDSMI_STATUS_SUCCESS)
180179
{
181-
std::stringstream ss;
182-
ss << error_message << " AMD SMI Error code: " << status;
183-
throw std::runtime_error(ss.str());
180+
throw std::runtime_error(fmt::format("{} AMD SMI Error code: {}", error_message,
181+
static_cast<int>(status)));
184182
}
185183
}
186184

187-
#endif // ROCPROFSYS_USE_ROCM > 0
188-
189185
} // namespace gpu
190186
} // namespace collectors
191187
} // namespace pmc

projects/rocprofiler-systems/source/lib/rocprof-sys/library/pmc/collectors/nic/tests/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,13 @@ target_link_libraries(
3131
rocprofiler-systems-pmc-library
3232
rocprofiler-systems-googletest-library
3333
rocprofiler-systems-logger
34+
rocprofiler-systems::rocprofiler-systems-rocm
3435
)
3536

3637
target_include_directories(
3738
pmc-nic-collector-tests
38-
PRIVATE ${PROJECT_SOURCE_DIR}/source/lib ${PROJECT_SOURCE_DIR}/source/lib/rocprof-sys
39+
PRIVATE
40+
${PROJECT_SOURCE_DIR}/source/lib
41+
${PROJECT_SOURCE_DIR}/source/lib/rocprof-sys
42+
${PROJECT_BINARY_DIR}/source/lib
3943
)

projects/rocprofiler-systems/source/tests/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ set(UNIT_TEST_OBJECTS
1010
$<$<TARGET_EXISTS:rccl-unit-tests>:$<TARGET_OBJECTS:rccl-unit-tests>>
1111
)
1212

13-
if(ROCPROFSYS_USE_ROCM)
14-
list(APPEND UNIT_TEST_OBJECTS $<TARGET_OBJECTS:pmc-common-tests>)
15-
endif()
13+
list(APPEND UNIT_TEST_OBJECTS $<TARGET_OBJECTS:pmc-common-tests>)
14+
list(APPEND UNIT_TEST_OBJECTS $<TARGET_OBJECTS:pmc-gpu-collector-tests>)
15+
list(APPEND UNIT_TEST_OBJECTS $<TARGET_OBJECTS:pmc-nic-collector-tests>)
1616

1717
add_executable(rocprof-sys-unit-tests ${UNIT_TEST_OBJECTS})
1818

0 commit comments

Comments
 (0)