|
29 | 29 | #include <stdexcept> |
30 | 30 | #include <type_traits> |
31 | 31 | #include <unordered_set> |
| 32 | +#include <utility> |
32 | 33 |
|
33 | 34 | namespace nvbench::detail |
34 | 35 | { |
@@ -60,6 +61,35 @@ void cupti_host_call(const CUptiResult status) |
60 | 61 | cupti_call_impl(status, "CUPTI Host API call returned error: "); |
61 | 62 | } |
62 | 63 |
|
| 64 | +template <typename T> |
| 65 | +auto set_allow_device_level_counters(T ¶ms, int) |
| 66 | + -> decltype(std::declval<T &>().bAllowDeviceLevelCounters, void()) |
| 67 | +{ |
| 68 | + params.bAllowDeviceLevelCounters = true; |
| 69 | +} |
| 70 | + |
| 71 | +template <typename T> |
| 72 | +void set_allow_device_level_counters(T &, long) |
| 73 | +{} |
| 74 | + |
| 75 | +template <typename T> |
| 76 | +auto is_context_scope(const T ¶ms, int) |
| 77 | + -> decltype(std::declval<const T &>().metricCollectionScope, bool()) |
| 78 | +{ |
| 79 | +#if defined(CUPTI_METRIC_COLLECTION_SCOPE_CONTEXT) |
| 80 | + return params.metricCollectionScope == CUPTI_METRIC_COLLECTION_SCOPE_CONTEXT; |
| 81 | +#else |
| 82 | + (void)params; |
| 83 | + return true; |
| 84 | +#endif |
| 85 | +} |
| 86 | + |
| 87 | +template <typename T> |
| 88 | +bool is_context_scope(const T &, long) |
| 89 | +{ |
| 90 | + return true; |
| 91 | +} |
| 92 | + |
63 | 93 | } // namespace |
64 | 94 |
|
65 | 95 | struct cupti_profiler::host_impl |
@@ -198,9 +228,9 @@ void cupti_profiler::initialize_availability_image() |
198 | 228 | { |
199 | 229 | CUpti_Profiler_GetCounterAvailability_Params params{}; |
200 | 230 |
|
201 | | - params.structSize = CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE; |
202 | | - params.ctx = m_device.get_context(); |
203 | | - params.bAllowDeviceLevelCounters = true; |
| 231 | + params.structSize = CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE; |
| 232 | + params.ctx = m_device.get_context(); |
| 233 | + set_allow_device_level_counters(params, 0); |
204 | 234 |
|
205 | 235 | cupti_call(cuptiProfilerGetCounterAvailability(¶ms)); |
206 | 236 |
|
@@ -330,7 +360,7 @@ void append_metric_names(CUpti_Profiler_Host_Object *host_object, |
330 | 360 | continue; |
331 | 361 | } |
332 | 362 |
|
333 | | - if (props_params.metricCollectionScope != CUPTI_METRIC_COLLECTION_SCOPE_CONTEXT) |
| 363 | + if (!is_context_scope(props_params, 0)) |
334 | 364 | { |
335 | 365 | continue; |
336 | 366 | } |
|
0 commit comments