Skip to content

Commit 6ec04e3

Browse files
Jie1zhangalexdeucher
authored andcommitted
drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA
This commit updates the VM flush implementation for the SDMA engine. - Added a new function `sdma_v4_4_2_get_invalidate_req` to construct the VM_INVALIDATE_ENG0_REQ register value for the specified VMID and flush type. This function ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and L2 PDEs) are invalidated. - Modified the `sdma_v4_4_2_ring_emit_vm_flush` function to use the new `sdma_v4_4_2_get_invalidate_req` function. The updated function emits the necessary register writes and waits to perform a VM flush for the specified VMID. It updates the PTB address registers and issues a VM invalidation request using the specified VM invalidation engine. - Included the necessary header file `gc/gc_9_0_sh_mask.h` to provide access to the required register definitions. v2: vm flush by the vm inalidation packet (Lijo) v3: code stle and define thh macro for the vm invalidation packet (Christian) v4: Format definition sdma vm invalidate packet (Lijo) Suggested-by: Lijo Lazar <[email protected]> Signed-off-by: Jesse Zhang <[email protected]> Reviewed-by: Lijo Lazar <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent b09cdeb commit 6ec04e3

File tree

2 files changed

+133
-14
lines changed

2 files changed

+133
-14
lines changed

drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "amdgpu_ucode.h"
3232
#include "amdgpu_trace.h"
3333
#include "amdgpu_reset.h"
34+
#include "gc/gc_9_0_sh_mask.h"
3435

3536
#include "sdma/sdma_4_4_2_offset.h"
3637
#include "sdma/sdma_4_4_2_sh_mask.h"
@@ -1290,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
12901291
seq, 0xffffffff, 4);
12911292
}
12921293

1293-
1294-
/**
1295-
* sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
1294+
/*
1295+
* sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value
1296+
* @vmid: The VMID to invalidate
1297+
* @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight)
12961298
*
1297-
* @ring: amdgpu_ring pointer
1298-
* @vmid: vmid number to use
1299-
* @pd_addr: address
1299+
* This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID
1300+
* and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and
1301+
* L2 PDEs) are invalidated.
1302+
*/
1303+
static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid,
1304+
uint32_t flush_type)
1305+
{
1306+
u32 req = 0;
1307+
1308+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
1309+
PER_VMID_INVALIDATE_REQ, 1 << vmid);
1310+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
1311+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
1312+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
1313+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
1314+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
1315+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
1316+
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
1317+
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
1318+
1319+
return req;
1320+
}
1321+
1322+
/*
1323+
* sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA
1324+
* @ring: The SDMA ring
1325+
* @vmid: The VMID to flush
1326+
* @pd_addr: The page directory address
13001327
*
1301-
* Update the page table base and flush the VM TLB
1302-
* using sDMA.
1328+
* This function emits the necessary register writes and waits to perform a VM flush for the
1329+
* specified VMID. It updates the PTB address registers and issues a VM invalidation request
1330+
* using the specified VM invalidation engine.
13031331
*/
13041332
static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
1305-
unsigned vmid, uint64_t pd_addr)
1333+
unsigned int vmid, uint64_t pd_addr)
13061334
{
1307-
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1335+
struct amdgpu_device *adev = ring->adev;
1336+
uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0);
1337+
unsigned int eng = ring->vm_inv_eng;
1338+
struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
1339+
1340+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
1341+
(hub->ctx_addr_distance * vmid),
1342+
lower_32_bits(pd_addr));
1343+
1344+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
1345+
(hub->ctx_addr_distance * vmid),
1346+
upper_32_bits(pd_addr));
1347+
/*
1348+
* Construct and emit the VM invalidation packet
1349+
*/
1350+
amdgpu_ring_write(ring,
1351+
SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) |
1352+
SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) |
1353+
SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) |
1354+
SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) |
1355+
SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng));
1356+
amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req));
1357+
amdgpu_ring_write(ring, 0);
1358+
amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid)));
13081359
}
13091360

13101361
static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -2126,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
21262177
3 + /* hdp invalidate */
21272178
6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
21282179
/* sdma_v4_4_2_ring_emit_vm_flush */
2129-
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2130-
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2180+
4 + 2 * 3 +
21312181
10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
21322182
.emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
21332183
.emit_ib = sdma_v4_4_2_ring_emit_ib,
@@ -2159,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
21592209
3 + /* hdp invalidate */
21602210
6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
21612211
/* sdma_v4_4_2_ring_emit_vm_flush */
2162-
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2163-
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2212+
4 + 2 * 3 +
21642213
10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
21652214
.emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
21662215
.emit_ib = sdma_v4_4_2_ring_emit_ib,

drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
#define HEADER_BARRIER 5
6565
#define SDMA_OP_AQL_COPY 0
6666
#define SDMA_OP_AQL_BARRIER_OR 0
67+
/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */
68+
#define SDMA_OP_VM_INVALIDATE 8
69+
#define SDMA_SUBOP_VM_INVALIDATE 4
6770

6871
/*define for op field*/
6972
#define SDMA_PKT_HEADER_op_offset 0
@@ -3331,5 +3334,72 @@
33313334
#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
33323335
#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
33333336

3337+
/*
3338+
** Definitions for SDMA_PKT_VM_INVALIDATION packet
3339+
*/
3340+
3341+
/*define for HEADER word*/
3342+
/*define for op field*/
3343+
#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
3344+
#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
3345+
#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
3346+
#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
3347+
3348+
/*define for sub_op field*/
3349+
#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
3350+
#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
3351+
#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
3352+
#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
3353+
3354+
/*define for xcc0_eng_id field*/
3355+
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0
3356+
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F
3357+
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16
3358+
#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift)
3359+
3360+
/*define for xcc1_eng_id field*/
3361+
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0
3362+
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F
3363+
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21
3364+
#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift)
3365+
3366+
/*define for mmhub_eng_id field*/
3367+
#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0
3368+
#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F
3369+
#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26
3370+
#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift)
3371+
3372+
/*define for INVALIDATEREQ word*/
3373+
/*define for invalidatereq field*/
3374+
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
3375+
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
3376+
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
3377+
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
3378+
3379+
/*define for ADDRESSRANGELO word*/
3380+
/*define for addressrangelo field*/
3381+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
3382+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
3383+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
3384+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
3385+
3386+
/*define for ADDRESSRANGEHI word*/
3387+
/*define for invalidateack field*/
3388+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
3389+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
3390+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
3391+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
3392+
3393+
/*define for addressrangehi field*/
3394+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
3395+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
3396+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
3397+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
3398+
3399+
/*define for reserved field*/
3400+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
3401+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
3402+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
3403+
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
33343404

33353405
#endif /* __SDMA_PKT_OPEN_H_ */

0 commit comments

Comments
 (0)