Skip to content

Commit e45b011

Browse files
mukjoshialexdeucher
authored andcommitted
drm/amdkfd: Fix CU occupancy for GFX 9.4.3
Make CU occupancy calculations work on GFX 9.4.3 by updating the logic to handle multiple XCCs correctly. Signed-off-by: Mukul Joshi <[email protected]> Reviewed-by: Harish Kasiviswanathan <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 6ae9e1a commit e45b011

File tree

4 files changed

+29
-11
lines changed

4 files changed

+29
-11
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -963,14 +963,14 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
963963
*/
964964
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
965965
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
966-
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
967-
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst,
966+
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
967+
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
968968
mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
969969
wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
970970
if (wave_cnt != 0) {
971971
queue_cnt->wave_cnt += wave_cnt;
972972
queue_cnt->doorbell_off =
973-
(RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) &
973+
(RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
974974
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
975975
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
976976
}
@@ -1033,7 +1033,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
10331033
DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
10341034

10351035
lock_spi_csq_mutexes(adev);
1036-
soc15_grbm_select(adev, 1, 0, 0, 0, inst);
1036+
soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
10371037

10381038
/*
10391039
* Iterate through the shader engines and arrays of the device
@@ -1046,7 +1046,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
10461046
se_cnt = adev->gfx.config.max_shader_engines;
10471047
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
10481048
amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
1049-
queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
1049+
queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
10501050

10511051
/*
10521052
* Assumption: queue map encodes following schema: four
@@ -1071,7 +1071,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
10711071
}
10721072

10731073
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
1074-
soc15_grbm_select(adev, 0, 0, 0, 0, inst);
1074+
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
10751075
unlock_spi_csq_mutexes(adev);
10761076

10771077
/* Update the output parameters and return */

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3542,15 +3542,19 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
35423542

35433543
bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
35443544
struct qcm_process_device *qpd,
3545-
int doorbell_off)
3545+
int doorbell_off, u32 *queue_format)
35463546
{
35473547
struct queue *q;
35483548
bool r = false;
35493549

3550+
if (!queue_format)
3551+
return r;
3552+
35503553
dqm_lock(dqm);
35513554

35523555
list_for_each_entry(q, &qpd->queues_list, list) {
35533556
if (q->properties.doorbell_off == doorbell_off) {
3557+
*queue_format = q->properties.format;
35543558
r = true;
35553559
goto out;
35563560
}

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ int debug_map_and_unlock(struct device_queue_manager *dqm);
326326
int debug_refresh_runlist(struct device_queue_manager *dqm);
327327
bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
328328
struct qcm_process_device *qpd,
329-
int doorbell_off);
329+
int doorbell_off, u32 *queue_format);
330330

331331
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
332332
{

drivers/gpu/drm/amd/amdkfd/kfd_process.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
272272
struct kfd_process_device *pdd = NULL;
273273
int i;
274274
struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES];
275+
u32 queue_format;
275276

276277
memset(cu_occupancy, 0x0, sizeof(cu_occupancy));
277278

@@ -292,14 +293,27 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
292293
wave_cnt = 0;
293294
max_waves_per_cu = 0;
294295

296+
/*
297+
* For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
298+
* For AQL queues, because of cooperative dispatch we multiply the wave count
299+
* by number of XCCs in the partition to get the total wave counts across all
300+
* XCCs in the partition.
301+
* For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
302+
*/
295303
dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
296-
&max_waves_per_cu, 0);
304+
&max_waves_per_cu, ffs(dev->xcc_mask) - 1);
297305

298306
for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
299307
if (cu_occupancy[i].wave_cnt != 0 &&
300308
kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
301-
cu_occupancy[i].doorbell_off))
302-
wave_cnt += cu_occupancy[i].wave_cnt;
309+
cu_occupancy[i].doorbell_off,
310+
&queue_format)) {
311+
if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
312+
wave_cnt += cu_occupancy[i].wave_cnt;
313+
else
314+
wave_cnt += (NUM_XCC(dev->xcc_mask) *
315+
cu_occupancy[i].wave_cnt);
316+
}
303317
}
304318

305319
/* Translate wave count to number of compute units */

0 commit comments

Comments
 (0)