Skip to content

Commit e90423a

Browse files
authored
Update rocm options (#708)
* Use rocprofiler service for rocm.activities option * Add rocm.activity.count metric * Add rocm.activity.stats option * Add rocm_counters.json example config file * Fix group by in runtime-report
1 parent 4f996ad commit e90423a

File tree

4 files changed

+71
-11
lines changed

4 files changed

+71
-11
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"options":
3+
[
4+
{
5+
"name": "sq_waves",
6+
"description": "ROCm SQ_WAVES counter",
7+
"category": "metric",
8+
"services": [ "rocprofiler" ],
9+
"config": { "CALI_ROCPROFILER_COUNTERS": "SQ_WAVES" },
10+
"query":
11+
{
12+
"local": "select sum(sum#rocm.SQ_WAVES) as sq_waves",
13+
"cross": "select min(sum#sum#rocm.SQ_WAVES) as \"sq_waves (min)\",avg(sum#sum#rocm.SQ_WAVES) as \"sq_waves (avg)\",max(sum#sum#rocm.SQ_WAVES) as \"sq_waves (max)\""
14+
}
15+
}
16+
]
17+
}

src/caliper/controllers/RuntimeReportController.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@ class RuntimeReportController : public cali::ChannelController
5757
}
5858

5959
// Config for second aggregation step in MPI mode (cross-process aggregation)
60-
std::string q_cross = std::string(" select min(") + tmetric + ") as \"Min time/rank\"" + std::string(",max(")
61-
+ tmetric + ") as \"Max time/rank\"" + std::string(",avg(") + tmetric
62-
+ ") as \"Avg time/rank\"" + std::string(",") + pmetric + " as \"Time %\" ";
60+
std::string q_cross = std::string(" group by path select ");
61+
q_cross.append(" min(").append(tmetric).append(") as \"Min time/rank\"");
62+
q_cross.append(",max(").append(tmetric).append(") as \"Max time/rank\"");
63+
q_cross.append(",avg(").append(tmetric).append(") as \"Avg time/rank\"");
64+
q_cross.append(",").append(pmetric).append(" as \"Time %\" ");
6365

6466
std::string format = std::string(" format ") + util::build_tree_format_spec(config(), opts);
6567

src/caliper/controllers/controllers.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ const char* event_trace_spec = R"json(
8282
"name": "rocm.activities",
8383
"description": "Trace ROCm activities",
8484
"type": "bool",
85-
"services": [ "roctracer" ],
86-
"config": { "CALI_ROCTRACER_SNAPSHOT_TIMESTAMPS": "true" }
85+
"services": [ "rocprofiler" ],
86+
"config": { "CALI_ROCPROFILER_ENABLE_ACTIVITY_TRACING": "true", "CALI_ROCPROFILER_ENABLE_SNAPSHOT_TIMESTAMPS": "true" }
8787
},{
8888
"name": "rocm.counters",
8989
"description": "Record ROCm counters through rocprofiler-sdk",
@@ -739,6 +739,38 @@ const char* builtin_rocm_option_specs = R"json(
739739
avg(avg#alloc.size) as \"Avg Bytes/alloc\",
740740
max(max#alloc.size) as \"Max Bytes/alloc\""
741741
}
742+
},{
743+
"name": "rocm.activity.stats",
744+
"description": "ROCm activity statistics",
745+
"type": "bool",
746+
"category": "metric",
747+
"services": [ "rocprofiler" ],
748+
"config": { "CALI_ROCPROFILER_ENABLE_ACTIVITY_TRACING": "true" },
749+
"query":
750+
{
751+
"local":
752+
"
753+
select
754+
rocm.kernel.name as Kernel,
755+
sum(sum#rocm.activity.count) as \"GPU invoc.\",
756+
min(min#rocm.activity.duration) as \"Nsec/invoc (min)\",
757+
avg(avg#rocm.activity.duration) as \"Nsec/invoc (avg)\",
758+
max(max#rocm.activity.duration) as \"Nsec/invoc (max)\"
759+
group by
760+
rocm.kernel.name
761+
",
762+
"cross":
763+
"
764+
select
765+
rocm.kernel.name as Kernel,
766+
sum(sum#sum#rocm.activity.count) as \"GPU invoc.\",
767+
min(min#min#rocm.activity.duration) as \"Nsec/invoc (min)\",
768+
avg(avg#avg#rocm.activity.duration) as \"Nsec/incov (avg)\",
769+
max(max#max#rocm.activity.duration) as \"Nsec/invoc (max)\"
770+
group by
771+
rocm.kernel.name
772+
"
773+
}
742774
}
743775
]
744776
)json";

src/services/rocprofiler/RocProfiler.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class RocProfilerService
8282
Attribute m_activity_device_id_attr;
8383
Attribute m_activity_queue_id_attr;
8484
Attribute m_activity_duration_attr;
85+
Attribute m_activity_count_attr;
8586
Attribute m_src_agent_attr;
8687
Attribute m_dst_agent_attr;
8788
Attribute m_agent_attr;
@@ -149,6 +150,11 @@ class RocProfilerService
149150
CALI_TYPE_UINT,
150151
CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS | CALI_ATTR_AGGREGATABLE
151152
);
153+
m_activity_count_attr = c->create_attribute(
154+
"rocm.activity.count",
155+
CALI_TYPE_UINT,
156+
CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS | CALI_ATTR_AGGREGATABLE
157+
);
152158

153159
m_host_timestamp_attr = c->create_attribute(
154160
"rocm.host.timestamp",
@@ -244,6 +250,7 @@ class RocProfilerService
244250
s_instance->m_activity_start_attr,
245251
s_instance->m_activity_end_attr,
246252
s_instance->m_activity_duration_attr,
253+
s_instance->m_activity_count_attr,
247254
s_instance->m_dispatch_id_attr
248255
};
249256

@@ -276,13 +283,14 @@ class RocProfilerService
276283
Variant(cali_make_variant_from_uint(record->start_timestamp)),
277284
Variant(cali_make_variant_from_uint(record->end_timestamp)),
278285
Variant(cali_make_variant_from_uint(record->end_timestamp - record->start_timestamp)),
286+
Variant(cali_make_variant_from_uint(1)),
279287
Variant(cali_make_variant_from_uint(record->dispatch_info.dispatch_id))
280288
};
281289

282290
cali::Node* correlation = static_cast<cali::Node*>(record->correlation_id.external.ptr);
283291

284-
FixedSizeSnapshotRecord<8> snapshot;
285-
c.make_record(7, attr, data, snapshot.builder(), correlation);
292+
FixedSizeSnapshotRecord<10> snapshot;
293+
c.make_record(8, attr, data, snapshot.builder(), correlation);
286294
if (!mpi_rank_entry.empty())
287295
snapshot.builder().append(mpi_rank_entry);
288296

@@ -296,7 +304,7 @@ class RocProfilerService
296304
const Attribute attr[] = { s_instance->m_activity_name_attr, s_instance->m_activity_start_attr,
297305
s_instance->m_activity_end_attr, s_instance->m_activity_duration_attr,
298306
s_instance->m_src_agent_attr, s_instance->m_dst_agent_attr,
299-
s_instance->m_bytes_attr
307+
s_instance->m_bytes_attr, s_instance->m_activity_count_attr
300308
};
301309

302310
const char* activity_name = nullptr;
@@ -318,13 +326,14 @@ class RocProfilerService
318326
Variant(cali_make_variant_from_uint(record->end_timestamp - record->start_timestamp)),
319327
Variant(cali_make_variant_from_uint(src_agent)),
320328
Variant(cali_make_variant_from_uint(dst_agent)),
321-
Variant(cali_make_variant_from_uint(record->bytes))
329+
Variant(cali_make_variant_from_uint(record->bytes)),
330+
Variant(cali_make_variant_from_uint(1))
322331
};
323332

324333
cali::Node* correlation = static_cast<cali::Node*>(record->correlation_id.external.ptr);
325334

326-
FixedSizeSnapshotRecord<8> snapshot;
327-
c.make_record(7, attr, data, snapshot.builder(), correlation);
335+
FixedSizeSnapshotRecord<10> snapshot;
336+
c.make_record(8, attr, data, snapshot.builder(), correlation);
328337
if (!mpi_rank_entry.empty())
329338
snapshot.builder().append(mpi_rank_entry);
330339

0 commit comments

Comments
 (0)