|
31 | 31 | #include "amdgpu_ucode.h"
|
32 | 32 | #include "amdgpu_trace.h"
|
33 | 33 | #include "amdgpu_reset.h"
|
| 34 | +#include "gc/gc_9_0_sh_mask.h" |
34 | 35 |
|
35 | 36 | #include "sdma/sdma_4_4_2_offset.h"
|
36 | 37 | #include "sdma/sdma_4_4_2_sh_mask.h"
|
@@ -1290,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
|
1290 | 1291 | seq, 0xffffffff, 4);
|
1291 | 1292 | }
|
1292 | 1293 |
|
1293 |
| - |
1294 |
| -/** |
1295 |
| - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA |
| 1294 | +/* |
| 1295 | + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value |
| 1296 | + * @vmid: The VMID to invalidate |
| 1297 | + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) |
1296 | 1298 | *
|
1297 |
| - * @ring: amdgpu_ring pointer |
1298 |
| - * @vmid: vmid number to use |
1299 |
| - * @pd_addr: address |
| 1299 | + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID |
| 1300 | + * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and |
| 1301 | + * L2 PDEs) are invalidated. |
| 1302 | + */ |
| 1303 | +static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, |
| 1304 | + uint32_t flush_type) |
| 1305 | +{ |
| 1306 | + u32 req = 0; |
| 1307 | + |
| 1308 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, |
| 1309 | + PER_VMID_INVALIDATE_REQ, 1 << vmid); |
| 1310 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); |
| 1311 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); |
| 1312 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); |
| 1313 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); |
| 1314 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); |
| 1315 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); |
| 1316 | + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, |
| 1317 | + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); |
| 1318 | + |
| 1319 | + return req; |
| 1320 | +} |
| 1321 | + |
| 1322 | +/* |
| 1323 | + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA |
| 1324 | + * @ring: The SDMA ring |
| 1325 | + * @vmid: The VMID to flush |
| 1326 | + * @pd_addr: The page directory address |
1300 | 1327 | *
|
1301 |
| - * Update the page table base and flush the VM TLB |
1302 |
| - * using sDMA. |
| 1328 | + * This function emits the necessary register writes and waits to perform a VM flush for the |
| 1329 | + * specified VMID. It updates the PTB address registers and issues a VM invalidation request |
| 1330 | + * using the specified VM invalidation engine. |
1303 | 1331 | */
|
1304 | 1332 | static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
1305 |
| - unsigned vmid, uint64_t pd_addr) |
| 1333 | + unsigned int vmid, uint64_t pd_addr) |
1306 | 1334 | {
|
1307 |
| - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
| 1335 | + struct amdgpu_device *adev = ring->adev; |
| 1336 | + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); |
| 1337 | + unsigned int eng = ring->vm_inv_eng; |
| 1338 | + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; |
| 1339 | + |
| 1340 | + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + |
| 1341 | + (hub->ctx_addr_distance * vmid), |
| 1342 | + lower_32_bits(pd_addr)); |
| 1343 | + |
| 1344 | + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + |
| 1345 | + (hub->ctx_addr_distance * vmid), |
| 1346 | + upper_32_bits(pd_addr)); |
| 1347 | + /* |
| 1348 | + * Construct and emit the VM invalidation packet |
| 1349 | + */ |
| 1350 | + amdgpu_ring_write(ring, |
| 1351 | + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | |
| 1352 | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | |
| 1353 | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | |
| 1354 | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | |
| 1355 | + SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); |
| 1356 | + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); |
| 1357 | + amdgpu_ring_write(ring, 0); |
| 1358 | + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); |
1308 | 1359 | }
|
1309 | 1360 |
|
1310 | 1361 | static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
|
@@ -2126,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
|
2126 | 2177 | 3 + /* hdp invalidate */
|
2127 | 2178 | 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
|
2128 | 2179 | /* sdma_v4_4_2_ring_emit_vm_flush */
|
2129 |
| - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + |
2130 |
| - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + |
| 2180 | + 4 + 2 * 3 + |
2131 | 2181 | 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
|
2132 | 2182 | .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
|
2133 | 2183 | .emit_ib = sdma_v4_4_2_ring_emit_ib,
|
@@ -2159,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
|
2159 | 2209 | 3 + /* hdp invalidate */
|
2160 | 2210 | 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
|
2161 | 2211 | /* sdma_v4_4_2_ring_emit_vm_flush */
|
2162 |
| - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + |
2163 |
| - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + |
| 2212 | + 4 + 2 * 3 + |
2164 | 2213 | 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
|
2165 | 2214 | .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
|
2166 | 2215 | .emit_ib = sdma_v4_4_2_ring_emit_ib,
|
|
0 commit comments