Skip to content

Commit 8a7820c

Browse files
hkasivisalexdeucher
authored andcommitted
drm/amdgpu: Reduce dequeue retry timeout for gfx9 family
Dequeue retry timeout controls the interval between checks for unmet conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The cost of additional bandwidth consumed by CP when polling memory shouldn't be substantial. Signed-off-by: Harish Kasiviswanathan <[email protected]> Reviewed-by: Jonathan Kim <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 02fc2f3 commit 8a7820c

10 files changed

+72
-52
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
189189
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
190190
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
191191
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
192-
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
192+
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
193193
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
194194
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
195195
.hqd_reset = kgd_gfx_v9_hqd_reset,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
415415
.set_address_watch = kgd_gfx_v9_set_address_watch,
416416
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
417417
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
418-
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
418+
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
419419
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
420420
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
421421
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
541541
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
542542
.program_trap_handler_settings =
543543
kgd_gfx_v9_program_trap_handler_settings,
544-
.build_grace_period_packet_info =
545-
kgd_gfx_v9_build_grace_period_packet_info,
544+
.build_dequeue_wait_counts_packet_info =
545+
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
546546
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
547547
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
548548
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
10211021
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
10221022
}
10231023

1024-
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
1024+
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
10251025
uint32_t wait_times,
1026-
uint32_t grace_period,
1026+
uint32_t sch_wave,
1027+
uint32_t que_sleep,
10271028
uint32_t *reg_offset,
10281029
uint32_t *reg_data)
10291030
{
10301031
*reg_data = wait_times;
10311032

1032-
/*
1033-
* The CP cannont handle a 0 grace period input and will result in
1034-
* an infinite grace period being set so set to 1 to prevent this.
1035-
*/
1036-
if (grace_period == 0)
1037-
grace_period = 1;
1038-
1039-
*reg_data = REG_SET_FIELD(*reg_data,
1040-
CP_IQ_WAIT_TIME2,
1041-
SCH_WAVE,
1042-
grace_period);
1033+
if (sch_wave)
1034+
*reg_data = REG_SET_FIELD(*reg_data,
1035+
CP_IQ_WAIT_TIME2,
1036+
SCH_WAVE,
1037+
sch_wave);
1038+
if (que_sleep)
1039+
*reg_data = REG_SET_FIELD(*reg_data,
1040+
CP_IQ_WAIT_TIME2,
1041+
QUE_SLEEP,
1042+
que_sleep);
10431043

10441044
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
10451045
}
@@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
11151115
.set_address_watch = kgd_gfx_v10_set_address_watch,
11161116
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
11171117
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
1118-
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
1118+
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
11191119
.program_trap_handler_settings = program_trap_handler_settings,
11201120
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
11211121
.hqd_reset = kgd_gfx_v10_hqd_reset,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
5151
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
5252
uint32_t *wait_times,
5353
uint32_t inst);
54-
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
54+
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
5555
uint32_t wait_times,
56-
uint32_t grace_period,
56+
uint32_t sch_wave,
57+
uint32_t que_sleep,
5758
uint32_t *reg_offset,
5859
uint32_t *reg_data);
5960
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
673673
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
674674
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
675675
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
676-
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
676+
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
677677
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
678678
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
679679
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
10771077
adev->gfx.cu_info.max_waves_per_simd;
10781078
}
10791079

1080-
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
1080+
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
10811081
uint32_t wait_times,
1082-
uint32_t grace_period,
1082+
uint32_t sch_wave,
1083+
uint32_t que_sleep,
10831084
uint32_t *reg_offset,
10841085
uint32_t *reg_data)
10851086
{
10861087
*reg_data = wait_times;
10871088

1088-
/*
1089-
* The CP cannot handle a 0 grace period input and will result in
1090-
* an infinite grace period being set so set to 1 to prevent this.
1091-
*/
1092-
if (grace_period == 0)
1093-
grace_period = 1;
1094-
1095-
*reg_data = REG_SET_FIELD(*reg_data,
1096-
CP_IQ_WAIT_TIME2,
1097-
SCH_WAVE,
1098-
grace_period);
1089+
if (sch_wave)
1090+
*reg_data = REG_SET_FIELD(*reg_data,
1091+
CP_IQ_WAIT_TIME2,
1092+
SCH_WAVE,
1093+
sch_wave);
1094+
if (que_sleep)
1095+
*reg_data = REG_SET_FIELD(*reg_data,
1096+
CP_IQ_WAIT_TIME2,
1097+
QUE_SLEEP,
1098+
que_sleep);
10991099

11001100
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
11011101
}
@@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
12551255
.set_address_watch = kgd_gfx_v9_set_address_watch,
12561256
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
12571257
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
1258-
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
1258+
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
12591259
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
12601260
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
12611261
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
9797
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
9898
uint32_t *wait_times,
9999
uint32_t inst);
100-
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
100+
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
101101
uint32_t wait_times,
102-
uint32_t grace_period,
102+
uint32_t sch_wave,
103+
uint32_t que_sleep,
103104
uint32_t *reg_offset,
104105
uint32_t *reg_data);
105106
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
298298
}
299299

300300
static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
301-
uint32_t sch_value, uint32_t *reg_offset,
301+
uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
302302
uint32_t *reg_data)
303303
{
304-
pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
304+
pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
305305
pm->dqm->dev->adev,
306306
pm->dqm->wait_times,
307307
sch_value,
308+
que_sleep,
308309
reg_offset,
309310
reg_data);
310311
}
@@ -319,27 +320,43 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
319320
uint32_t reg_data = 0;
320321

321322
switch (cmd) {
322-
case KFD_DEQUEUE_WAIT_INIT:
323-
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
324-
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
325-
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
326-
pm_build_dequeue_wait_counts_packet_info(pm, 1, &reg_offset, &reg_data);
327-
else
323+
case KFD_DEQUEUE_WAIT_INIT: {
324+
uint32_t sch_wave = 0, que_sleep = 0;
325+
/* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
326+
* On a 1GHz machine this is roughly 1 microsecond, which is
327+
* about how long it takes to load data out of memory during
328+
* queue connect
329+
* QUE_SLEEP: Wait Count for Dequeue Retry.
330+
*/
331+
if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
332+
KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
333+
que_sleep = 1;
334+
335+
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
336+
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
337+
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
338+
sch_wave = 1;
339+
} else {
328340
return 0;
341+
}
342+
pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
343+
&reg_offset, &reg_data);
344+
329345
break;
346+
}
330347
case KFD_DEQUEUE_WAIT_RESET:
331-
/* function called only to get reg_offset */
332-
pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset, &reg_data);
333-
reg_data = pm->dqm->wait_times;
348+
/* reg_data would be set to dqm->wait_times */
349+
pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data);
334350
break;
335351

336352
case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
337353
/* The CP cannot handle value 0 and it will result in
338-
* an infinite grace period being set so set to 1 to prevent this.
354+
* an infinite grace period being set so set to 1 to prevent this. Also
355+
* avoid debugger API breakage as it sets 0 and expects a low value.
339356
*/
340357
if (!value)
341358
value = 1;
342-
pm_build_dequeue_wait_counts_packet_info(pm, value, &reg_offset, &reg_data);
359+
pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data);
343360
break;
344361
default:
345362
pr_err("Invalid dequeue wait cmd\n");

drivers/gpu/drm/amd/include/kgd_kfd_interface.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,10 @@ struct kfd2kgd_calls {
313313
void (*get_iq_wait_times)(struct amdgpu_device *adev,
314314
uint32_t *wait_times,
315315
uint32_t inst);
316-
void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
316+
void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device *adev,
317317
uint32_t wait_times,
318-
uint32_t grace_period,
318+
uint32_t sch_wave,
319+
uint32_t que_sleep,
319320
uint32_t *reg_offset,
320321
uint32_t *reg_data);
321322
void (*get_cu_occupancy)(struct amdgpu_device *adev,

0 commit comments

Comments
 (0)