Skip to content

Commit fc7f1d9

Browse files
jokim-amdalexdeucher
authored andcommitted
drm/amdkfd: fix and enable ttmp setup for gfx11
The MES cached process context must be cleared on adding any queue for the first time. For proper debug support, the MES will clear it's cached process context on the first call to SET_SHADER_DEBUGGER. This allows TTMPs to be pesistently enabled in a safe manner. Signed-off-by: Jonathan Kim <[email protected]> Reviewed-by: Eric Huang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent f9acfaf commit fc7f1d9

File tree

6 files changed

+39
-20
lines changed

6 files changed

+39
-20
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
637637
{
638638
uint32_t data = 0;
639639

640-
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
640+
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
641641
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
642642
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
643643

drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2759,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
27592759

27602760
if (pdd->qpd.queue_count)
27612761
return -EEXIST;
2762+
2763+
/*
2764+
* Setup TTMPs by default.
2765+
* Note that this call must remain here for MES ADD QUEUE to
2766+
* skip_process_ctx_clear unconditionally as the first call to
2767+
* SET_SHADER_DEBUGGER clears any stale process context data
2768+
* saved in MES.
2769+
*/
2770+
if (pdd->dev->kfd->shared_resources.enable_mes)
2771+
kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
27622772
}
27632773

27642774
p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
@@ -2852,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p)
28522862
if (!pdd->dev->kfd->shared_resources.enable_mes)
28532863
debug_refresh_runlist(pdd->dev->dqm);
28542864
else
2855-
kfd_dbg_set_mes_debug_mode(pdd);
2865+
kfd_dbg_set_mes_debug_mode(pdd,
2866+
!kfd_dbg_has_cwsr_workaround(pdd->dev));
28562867
}
28572868
}
28582869

drivers/gpu/drm/amd/amdkfd/kfd_debug.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -344,11 +344,10 @@ static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
344344
return r;
345345
}
346346

347-
int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
347+
int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
348348
{
349349
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
350350
uint32_t flags = pdd->process->dbg_flags;
351-
bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
352351

353352
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
354353
return 0;
@@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
432431
if (!pdd->dev->kfd->shared_resources.enable_mes)
433432
r = debug_map_and_unlock(pdd->dev->dqm);
434433
else
435-
r = kfd_dbg_set_mes_debug_mode(pdd);
434+
r = kfd_dbg_set_mes_debug_mode(pdd, true);
436435

437436
kfd_dbg_clear_dev_watch_id(pdd, watch_id);
438437

@@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
474473
if (!pdd->dev->kfd->shared_resources.enable_mes)
475474
r = debug_map_and_unlock(pdd->dev->dqm);
476475
else
477-
r = kfd_dbg_set_mes_debug_mode(pdd);
476+
r = kfd_dbg_set_mes_debug_mode(pdd, true);
478477

479478
/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
480479
if (r)
@@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
516515
if (!pdd->dev->kfd->shared_resources.enable_mes)
517516
r = debug_refresh_runlist(pdd->dev->dqm);
518517
else
519-
r = kfd_dbg_set_mes_debug_mode(pdd);
518+
r = kfd_dbg_set_mes_debug_mode(pdd, true);
520519

521520
if (r) {
522521
target->dbg_flags = prev_flags;
@@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
539538
if (!pdd->dev->kfd->shared_resources.enable_mes)
540539
debug_refresh_runlist(pdd->dev->dqm);
541540
else
542-
kfd_dbg_set_mes_debug_mode(pdd);
541+
kfd_dbg_set_mes_debug_mode(pdd, true);
543542
}
544543
}
545544

@@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
601600
if (!pdd->dev->kfd->shared_resources.enable_mes)
602601
debug_refresh_runlist(pdd->dev->dqm);
603602
else
604-
kfd_dbg_set_mes_debug_mode(pdd);
603+
kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
605604
}
606605

607606
kfd_dbg_set_workaround(target, false);
@@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
717716
if (!pdd->dev->kfd->shared_resources.enable_mes)
718717
r = debug_refresh_runlist(pdd->dev->dqm);
719718
else
720-
r = kfd_dbg_set_mes_debug_mode(pdd);
719+
r = kfd_dbg_set_mes_debug_mode(pdd, true);
721720

722721
if (r) {
723722
target->runtime_info.runtime_state =
@@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
851850
if (!pdd->dev->kfd->shared_resources.enable_mes)
852851
r = debug_refresh_runlist(pdd->dev->dqm);
853852
else
854-
r = kfd_dbg_set_mes_debug_mode(pdd);
853+
r = kfd_dbg_set_mes_debug_mode(pdd, true);
855854

856855
if (r)
857856
break;
@@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
883882
if (!pdd->dev->kfd->shared_resources.enable_mes)
884883
r = debug_refresh_runlist(pdd->dev->dqm);
885884
else
886-
r = kfd_dbg_set_mes_debug_mode(pdd);
885+
r = kfd_dbg_set_mes_debug_mode(pdd, true);
887886

888887
if (r)
889888
break;

drivers/gpu/drm/amd/amdkfd/kfd_debug.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
126126
return true;
127127
}
128128

129-
int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
129+
int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
130+
131+
static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
132+
{
133+
return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
134+
KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
135+
(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
136+
KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
137+
(dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
138+
}
130139
#endif

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
228228
queue_input.tma_addr = qpd->tma_addr;
229229
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
230230
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
231+
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
232+
kfd_dbg_has_ttmps_always_setup(q->device);
231233

232234
queue_type = convert_to_mes_queue_type(q->properties.type);
233235
if (queue_type < 0) {

drivers/gpu/drm/amd/amdkfd/kfd_topology.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "kfd_device_queue_manager.h"
3939
#include "kfd_iommu.h"
4040
#include "kfd_svm.h"
41+
#include "kfd_debug.h"
4142
#include "amdgpu_amdkfd.h"
4243
#include "amdgpu_ras.h"
4344
#include "amdgpu.h"
@@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
19311932
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
19321933
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
19331934

1935+
if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
1936+
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
1937+
19341938
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
19351939
if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
19361940
dev->node_props.debug_prop |=
@@ -1941,20 +1945,14 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
19411945
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
19421946
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
19431947

1944-
if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2))
1945-
dev->node_props.debug_prop |=
1946-
HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
1947-
19481948
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
19491949
dev->node_props.capability |=
19501950
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
19511951
} else {
19521952
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
19531953
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
19541954

1955-
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
1956-
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
1957-
else
1955+
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
19581956
dev->node_props.capability |=
19591957
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
19601958
}

0 commit comments

Comments
 (0)