Skip to content

Commit fdf8364

Browse files
Marek Olšákalexdeucher
authored andcommitted
drm/amdgpu: invalidate L2 before SDMA IBs (v2)
This fixes GPU hangs due to cache coherency issues. v2: Split the version bump to a separate patch Signed-off-by: Marek Olšák <[email protected]> Reviewed-by: Christian König <[email protected]> Tested-by: Pierre-Eric Pelloux-Prayer <[email protected]> Signed-off-by: Alex Deucher <[email protected]> Cc: [email protected]
1 parent c938628 commit fdf8364

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,22 @@
7373
#define SDMA_OP_AQL_COPY 0
7474
#define SDMA_OP_AQL_BARRIER_OR 0
7575

76+
#define SDMA_GCR_RANGE_IS_PA (1 << 18)
77+
#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
78+
#define SDMA_GCR_GL2_WB (1 << 15)
79+
#define SDMA_GCR_GL2_INV (1 << 14)
80+
#define SDMA_GCR_GL2_DISCARD (1 << 13)
81+
#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
82+
#define SDMA_GCR_GL2_US (1 << 10)
83+
#define SDMA_GCR_GL1_INV (1 << 9)
84+
#define SDMA_GCR_GLV_INV (1 << 8)
85+
#define SDMA_GCR_GLK_INV (1 << 7)
86+
#define SDMA_GCR_GLK_WB (1 << 6)
87+
#define SDMA_GCR_GLM_INV (1 << 5)
88+
#define SDMA_GCR_GLM_WB (1 << 4)
89+
#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
90+
#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
91+
7692
/*define for op field*/
7793
#define SDMA_PKT_HEADER_op_offset 0
7894
#define SDMA_PKT_HEADER_op_mask 0x000000FF

drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,18 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
382382
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
383383
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
384384

385+
/* Invalidate L2, because if we don't do it, we might get stale cache
386+
* lines from previous IBs.
387+
*/
388+
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
389+
amdgpu_ring_write(ring, 0);
390+
amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
391+
SDMA_GCR_GL2_WB |
392+
SDMA_GCR_GLM_INV |
393+
SDMA_GCR_GLM_WB) << 16);
394+
amdgpu_ring_write(ring, 0xffffff80);
395+
amdgpu_ring_write(ring, 0xffff);
396+
385397
/* An IB packet must end on a 8 DW boundary--the next dword
386398
* must be on a 8-dword boundary. Our IB packet below is 6
387399
* dwords long, thus add x number of NOPs, such that, in
@@ -1595,7 +1607,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
15951607
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
15961608
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
15971609
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
1598-
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
1610+
.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
15991611
.emit_ib = sdma_v5_0_ring_emit_ib,
16001612
.emit_fence = sdma_v5_0_ring_emit_fence,
16011613
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,

0 commit comments

Comments
 (0)