Skip to content

Commit 6f236ff

Browse files
authored
[SPM] Clean up obsolete SPM logics and refine some SPM-related definition (#157)
* Clean up obsolete SPM logics * Add PERFCOUNTER_SELECT1 to CounterRegInfo * Add RLC_SPM_PERFMON_SAMPLE_DELAY_MAX
1 parent fdedcfc commit 6f236ff

20 files changed

+544
-1222
lines changed

gfxip/gfx10/gfx10_block_table.h

Lines changed: 109 additions & 91 deletions
Large diffs are not rendered by default.

gfxip/gfx10/gfx10_primitives.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ class gfx10_cntx_prim {
128128
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);
129129
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
130130
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_DATA);
131+
static constexpr Register RLC_SPM_PERFMON_SAMPLE_DELAY_MAX__ADDR = REG_32B_NULL;
131132
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
132133
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
133134

@@ -165,15 +166,15 @@ class gfx10_cntx_prim {
165166
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
166167
const uint32_t& instance_index) {
167168
const auto* block_info = counter_des.block_info;
168-
return block_info->delay_info[instance_index].val - 1;
169+
return block_info->delay_info.val[instance_index];
169170
}
170171

171172
// SPM delay functions for se instance
172173
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
173174
const uint32_t& instance_index) {
174175
const auto* block_info = counter_des.block_info;
175176
int delay_index = se_index * block_info->instance_count + instance_index;
176-
return block_info->delay_info[delay_index].val - 1;
177+
return block_info->delay_info.val[delay_index];
177178
}
178179

179180
// GRBM broadcasting mode

gfxip/gfx11/gfx11_block_table.h

Lines changed: 90 additions & 87 deletions
Large diffs are not rendered by default.

gfxip/gfx11/gfx11_primitives.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ class gfx11_cntx_prim {
135135
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_ADDR);
136136
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
137137
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_DATA);
138+
static constexpr Register RLC_SPM_PERFMON_SAMPLE_DELAY_MAX__ADDR = REG_32B_NULL;
138139
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
139140
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
140141

@@ -172,15 +173,15 @@ class gfx11_cntx_prim {
172173
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
173174
const uint32_t& instance_index) {
174175
const auto* block_info = counter_des.block_info;
175-
return block_info->delay_info[instance_index].val - 1;
176+
return block_info->delay_info.val[instance_index];
176177
}
177178

178179
// SPM delay functions for se instance
179180
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
180181
const uint32_t& instance_index) {
181182
const auto* block_info = counter_des.block_info;
182183
int delay_index = se_index * block_info->instance_count + instance_index;
183-
return block_info->delay_info[delay_index].val - 1;
184+
return block_info->delay_info.val[delay_index];
184185
}
185186

186187
// GRBM broadcasting mode

gfxip/gfx12/gfx12_block_table.h

Lines changed: 43 additions & 45 deletions
Large diffs are not rendered by default.

gfxip/gfx12/gfx12_primitives.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class gfx12_cntx_prim {
113113
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_ADDR);
114114
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
115115
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_DATA);
116+
static constexpr Register RLC_SPM_PERFMON_SAMPLE_DELAY_MAX__ADDR = REG_32B_NULL;
116117
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
117118
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
118119

@@ -153,15 +154,15 @@ class gfx12_cntx_prim {
153154
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
154155
const uint32_t& instance_index) {
155156
const auto* block_info = counter_des.block_info;
156-
return block_info->delay_info[instance_index].val - 1;
157+
return block_info->delay_info.val[instance_index];
157158
}
158159

159160
// SPM delay functions for se instance
160161
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
161162
const uint32_t& instance_index) {
162163
const auto* block_info = counter_des.block_info;
163164
int delay_index = se_index * block_info->instance_count + instance_index;
164-
return block_info->delay_info[delay_index].val - 1;
165+
return block_info->delay_info.val[delay_index];
165166
}
166167

167168
// GRBM broadcasting mode

gfxip/gfx9/gfx9_block_table.h

Lines changed: 197 additions & 441 deletions
Large diffs are not rendered by default.

gfxip/gfx9/gfx9_primitives.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ class gfx9_cntx_prim {
133133
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);
134134
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
135135
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_DATA);
136+
static constexpr Register RLC_SPM_PERFMON_SAMPLE_DELAY_MAX__ADDR =
137+
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_SAMPLE_DELAY_MAX);
136138
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
137139
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
138140

@@ -189,15 +191,15 @@ class gfx9_cntx_prim {
189191
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
190192
const uint32_t& instance_index) {
191193
const auto* block_info = counter_des.block_info;
192-
return block_info->delay_info[instance_index].val - 1;
194+
return block_info->delay_info.val[instance_index];
193195
}
194196

195197
// SPM delay functions for se instance
196198
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
197199
const uint32_t& instance_index) {
198200
const auto* block_info = counter_des.block_info;
199201
int delay_index = se_index * block_info->instance_count + instance_index;
200-
return block_info->delay_info[delay_index].val - 1;
202+
return block_info->delay_info.val[delay_index];
201203
}
202204

203205
// GRBM broadcasting mode

src/core/aql_profile.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,8 @@
5252
}
5353

5454
// Getting SPM data using driver API
55-
namespace spm_kfd_namespace {
5655
hsa_status_t spm_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
5756
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data);
58-
}
5957

6058
// PC sampling callback data
6159
struct pcsmp_callback_data_t {
@@ -255,6 +253,16 @@ PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start(hsa_ven_amd_aqlprofile_prof
255253
pm4_builder::TraceConfig trace_config{};
256254
const uint64_t se_number_total = pm4_factory->GetShaderEnginesNumber();
257255

256+
trace_config.spm_sq_32bit_mode = true;
257+
trace_config.spm_has_core1 = (pm4_factory->GetGpuId() == aql_profile::MI100_GPU_ID) ||
258+
(pm4_factory->GetGpuId() == aql_profile::MI200_GPU_ID);
259+
trace_config.spm_sample_delay_max = pm4_factory->GetSpmSampleDelayMax();
260+
trace_config.sampleRate = 1600;
261+
262+
trace_config.xcc_number = pm4_factory->GetXccNumber();
263+
trace_config.se_number = se_number_total / trace_config.xcc_number;
264+
trace_config.sa_number = pm4_factory->GetGpuId() >= aql_profile::GFX10_GPU_ID ? 2 : 0;
265+
258266
if (profile->parameters) {
259267
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
260268
p < (profile->parameters + profile->parameter_count); ++p) {
@@ -752,7 +760,7 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof
752760
sample_ptr = reinterpret_cast<char*>(sample_ptr) + sample_capacity;
753761
}
754762
} else {
755-
status = spm_kfd_namespace::spm_iterate_data(profile, callback, data);
763+
status = spm_iterate_data(profile, callback, data);
756764
}
757765
}
758766
} else {

src/core/gfx9_factory.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ class Mi100Factory : public Gfx9Factory {
5252
public:
5353
explicit Mi100Factory(const AgentInfo* agent_info);
5454

55+
virtual uint32_t GetSpmSampleDelayMax() { return 0x34; }
56+
5557
protected:
5658
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
5759
};

0 commit comments

Comments
 (0)