Skip to content

Commit 2503cde

Browse files
authored
Merge pull request #318 from djwoun/rocm_smi_init
rocm_smi: Initial event count and event table initialization event count upper bound mismatch & handling unsupported events
2 parents a1f0c41 + 772477a commit 2503cde

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

src/components/rocm_smi/rocs.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,12 @@ init_device_table(void)
10221022

10231023
for (i = 0; i < device_count; ++i) {
10241024
status = rsmi_dev_pci_bandwidth_get_p(i, &pcie_table[i]);
1025-
if (status != RSMI_STATUS_SUCCESS && status != RSMI_STATUS_NOT_YET_IMPLEMENTED) {
1025+
/*
1026+
Retrieve available PCIe bandwidths. This function is not supported on newer hardware (i.e., MI250 and MI300), but
1027+
supported on some hardware. Ignore statuses indicating lack of support or
1028+
unimplemented functionality.
1029+
*/
1030+
if (status != RSMI_STATUS_SUCCESS && status != RSMI_STATUS_NOT_YET_IMPLEMENTED && status != RSMI_STATUS_NOT_SUPPORTED) {
10261031
papi_errno = PAPI_EMISC;
10271032
goto fn_fail;
10281033
}
@@ -1377,7 +1382,7 @@ handle_derived_events_count(const char *v_name, int32_t dev, int64_t v_variant,
13771382
(*events_count) += ROCS_PCI_BW_VARIANT__CURRENT + 1;
13781383
}
13791384
int i;
1380-
for (i = 0; i < ROCS_PCI_BW_VARIANT__LANE_IDX - ROCS_PCI_BW_VARIANT__CURRENT + 1; ++i) {
1385+
for (i = 0; i < ROCS_PCI_BW_VARIANT__LANE_IDX - ROCS_PCI_BW_VARIANT__CURRENT; ++i) {
13811386
(*events_count) += pcie_table[dev].transfer_rate.num_supported;
13821387
}
13831388

@@ -1584,7 +1589,6 @@ handle_derived_events(const char *v_name, int32_t dev, int64_t v_variant, int64_
15841589
if (pcie_table[dev].transfer_rate.num_supported == 0) {
15851590
return ROCS_EVENT_TYPE__DERIVED;
15861591
}
1587-
15881592
int64_t i;
15891593
for (i = 0; i <= ROCS_PCI_BW_VARIANT__CURRENT; ++i) {
15901594
events[*events_count].id = *events_count;
@@ -1604,7 +1608,7 @@ handle_derived_events(const char *v_name, int32_t dev, int64_t v_variant, int64_
16041608
}
16051609

16061610
int64_t j;
1607-
for (; i <= ROCS_PCI_BW_VARIANT__LANE_IDX; ++i) {
1611+
for (i = ROCS_PCI_BW_VARIANT__CURRENT + 1; i <= ROCS_PCI_BW_VARIANT__LANE_IDX; ++i) {
16081612
for (j = 0; j < pcie_table[dev].transfer_rate.num_supported; ++j) {
16091613
events[*events_count].id = *events_count;
16101614
events[*events_count].name = get_event_name(v_name, dev, i, j);
@@ -1774,7 +1778,7 @@ handle_xgmi_events(int32_t dev, int *events_count, ntv_event_t *events)
17741778

17751779
status = rsmi_dev_counter_group_supported_p(dev, RSMI_EVNT_GRP_XGMI);
17761780
if (status == RSMI_STATUS_SUCCESS) {
1777-
for (i = RSMI_EVNT_XGMI_FIRST; i <= RSMI_EVNT_XGMI_LAST; ++i) {
1781+
for (i = RSMI_EVNT_XGMI_FIRST; i < RSMI_EVNT_XGMI_LAST; ++i) {
17781782
events[*events_count].id = *events_count;
17791783
events[*events_count].name = get_event_name("rsmi_dev_xgmi_evt_get", dev, i, -1);
17801784
events[*events_count].descr = get_event_descr("rsmi_dev_xgmi_evt_get", i, -1);
@@ -1794,7 +1798,7 @@ handle_xgmi_events(int32_t dev, int *events_count, ntv_event_t *events)
17941798

17951799
status = rsmi_dev_counter_group_supported_p(dev, RSMI_EVNT_GRP_XGMI_DATA_OUT);
17961800
if (status == RSMI_STATUS_SUCCESS) {
1797-
for (i = RSMI_EVNT_XGMI_DATA_OUT_FIRST; i <= RSMI_EVNT_XGMI_DATA_OUT_LAST; ++i) {
1801+
for (i = RSMI_EVNT_XGMI_DATA_OUT_FIRST; i < RSMI_EVNT_XGMI_DATA_OUT_LAST; ++i) {
17981802
events[*events_count].id = *events_count;
17991803
events[*events_count].name = get_event_name("rsmi_dev_xgmi_evt_get", dev, i, -1);
18001804
events[*events_count].descr = get_event_descr("rsmi_dev_xgmi_evt_get", i, -1);

0 commit comments

Comments
 (0)