Skip to content

Commit 6ef2971

Browse files
xiaogang-chen-amdalexdeucher
authored andcommitted
drm/amdkfd: Change kfd/svm page fault drain handling
When app unmap vm ranges(munmap) kfd/svm starts drain pending page fault and not handle any incoming pages fault of this process until a deferred work item got executed by default system wq. The time period of "not handle page fault" can be long and is unpredicable. That is advese to kfd performance on page faults recovery. This patch uses time stamp of incoming page fault to decide to drop or recover page fault. When app unmap vm ranges kfd records each gpu device's ih ring current time stamp. These time stamps are used at kfd page fault recovery routine. Any page fault happened on unmapped ranges after unmap events is application bug that accesses vm range after unmap. It is not driver work to cover that. By using time stamp of page fault do not need drain page faults at deferred work. So, the time period that kfd does not handle page faults is reduced and can be controlled. Signed-off-by: Xiaogang.Chen <[email protected]> Reviewed-by: Philip Yang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 010cc73 commit 6ef2971

File tree

7 files changed

+73
-41
lines changed

7 files changed

+73
-41
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2776,7 +2776,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
27762776
* shouldn't be reported any more.
27772777
*/
27782778
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
2779-
u32 vmid, u32 node_id, uint64_t addr,
2779+
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
27802780
bool write_fault)
27812781
{
27822782
bool is_compute_context = false;
@@ -2802,7 +2802,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
28022802
addr /= AMDGPU_GPU_PAGE_SIZE;
28032803

28042804
if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
2805-
node_id, addr, write_fault)) {
2805+
node_id, addr, ts, write_fault)) {
28062806
amdgpu_bo_unref(&root);
28072807
return true;
28082808
}

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
558558
void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info);
559559

560560
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
561-
u32 vmid, u32 node_id, uint64_t addr,
561+
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
562562
bool write_fault);
563563

564564
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);

drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
132132
/* Try to handle the recoverable page faults by filling page
133133
* tables
134134
*/
135-
if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault))
135+
if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
136+
entry->timestamp, write_fault))
136137
return 1;
137138
}
138139

drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
595595
cam_index = entry->src_data[2] & 0x3ff;
596596

597597
ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
598-
addr, write_fault);
598+
addr, entry->timestamp, write_fault);
599599
WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
600600
if (ret)
601601
return 1;
@@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
618618
* tables
619619
*/
620620
if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
621-
addr, write_fault))
621+
addr, entry->timestamp, write_fault))
622622
return 1;
623623
}
624624
}

drivers/gpu/drm/amd/amdkfd/kfd_priv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,8 @@ struct svm_range_list {
866866
struct delayed_work restore_work;
867867
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
868868
struct task_struct *faulting_task;
869+
/* check point ts decides if page fault recovery need be dropped */
870+
uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
869871
};
870872

871873
/* Process data */

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

Lines changed: 62 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
22622262
{
22632263
struct kfd_process_device *pdd;
22642264
struct kfd_process *p;
2265-
int drain;
22662265
uint32_t i;
22672266

22682267
p = container_of(svms, struct kfd_process, svms);
22692268

2270-
restart:
2271-
drain = atomic_read(&svms->drain_pagefaults);
2272-
if (!drain)
2273-
return;
2274-
22752269
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
22762270
pdd = p->pdds[i];
22772271
if (!pdd)
@@ -2291,8 +2285,6 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
22912285

22922286
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
22932287
}
2294-
if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
2295-
goto restart;
22962288
}
22972289

22982290
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -2314,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work)
23142306
prange->start, prange->last, prange->work_item.op);
23152307

23162308
mm = prange->work_item.mm;
2317-
retry:
2318-
mmap_write_lock(mm);
23192309

2320-
/* Checking for the need to drain retry faults must be inside
2321-
* mmap write lock to serialize with munmap notifiers.
2322-
*/
2323-
if (unlikely(atomic_read(&svms->drain_pagefaults))) {
2324-
mmap_write_unlock(mm);
2325-
svm_range_drain_retry_fault(svms);
2326-
goto retry;
2327-
}
2310+
mmap_write_lock(mm);
23282311

23292312
/* Remove from deferred_list must be inside mmap write lock, for
23302313
* two race cases:
@@ -2445,6 +2428,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
24452428
struct kfd_process *p;
24462429
unsigned long s, l;
24472430
bool unmap_parent;
2431+
uint32_t i;
24482432

24492433
if (atomic_read(&prange->queue_refcount)) {
24502434
int r;
@@ -2464,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
24642448
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
24652449
prange, prange->start, prange->last, start, last);
24662450

2467-
/* Make sure pending page faults are drained in the deferred worker
2468-
* before the range is freed to avoid straggler interrupts on
2469-
* unmapped memory causing "phantom faults".
2451+
/* calculate time stamps that are used to decide which page faults need be
2452+
* dropped or handled before unmap pages from gpu vm
24702453
*/
2471-
atomic_inc(&svms->drain_pagefaults);
2454+
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
2455+
struct kfd_process_device *pdd;
2456+
struct amdgpu_device *adev;
2457+
struct amdgpu_ih_ring *ih;
2458+
uint32_t checkpoint_wptr;
2459+
2460+
pdd = p->pdds[i];
2461+
if (!pdd)
2462+
continue;
2463+
2464+
adev = pdd->dev->adev;
2465+
2466+
/* Check and drain ih1 ring if cam not available */
2467+
ih = &adev->irq.ih1;
2468+
checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
2469+
if (ih->rptr != checkpoint_wptr) {
2470+
svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
2471+
continue;
2472+
}
2473+
2474+
/* check if dev->irq.ih_soft is not empty */
2475+
ih = &adev->irq.ih_soft;
2476+
checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
2477+
if (ih->rptr != checkpoint_wptr)
2478+
svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
2479+
}
24722480

24732481
unmap_parent = start <= prange->start && last >= prange->last;
24742482

@@ -2909,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
29092917
int
29102918
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29112919
uint32_t vmid, uint32_t node_id,
2912-
uint64_t addr, bool write_fault)
2920+
uint64_t addr, uint64_t ts, bool write_fault)
29132921
{
29142922
unsigned long start, last, size;
29152923
struct mm_struct *mm = NULL;
@@ -2919,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29192927
ktime_t timestamp = ktime_get_boottime();
29202928
struct kfd_node *node;
29212929
int32_t best_loc;
2922-
int32_t gpuidx = MAX_GPU_INSTANCE;
2930+
int32_t gpuid, gpuidx = MAX_GPU_INSTANCE;
29232931
bool write_locked = false;
29242932
struct vm_area_struct *vma;
29252933
bool migration = false;
@@ -2940,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29402948
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
29412949

29422950
if (atomic_read(&svms->drain_pagefaults)) {
2943-
pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
2951+
pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr);
29442952
r = 0;
29452953
goto out;
29462954
}
29472955

2956+
node = kfd_node_by_irq_ids(adev, node_id, vmid);
2957+
if (!node) {
2958+
pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
2959+
vmid);
2960+
r = -EFAULT;
2961+
goto out;
2962+
}
2963+
2964+
if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
2965+
pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id);
2966+
r = -EFAULT;
2967+
goto out;
2968+
}
2969+
2970+
/* check if this page fault time stamp is before svms->checkpoint_ts */
2971+
if (svms->checkpoint_ts[gpuidx] != 0) {
2972+
if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) {
2973+
pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
2974+
r = 0;
2975+
goto out;
2976+
} else
2977+
/* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
2978+
* to zero to avoid following ts wrap around give wrong comparing
2979+
*/
2980+
svms->checkpoint_ts[gpuidx] = 0;
2981+
}
2982+
29482983
if (!p->xnack_enabled) {
29492984
pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
29502985
r = -EFAULT;
@@ -2961,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29612996
goto out;
29622997
}
29632998

2964-
node = kfd_node_by_irq_ids(adev, node_id, vmid);
2965-
if (!node) {
2966-
pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
2967-
vmid);
2968-
r = -EFAULT;
2969-
goto out;
2970-
}
29712999
mmap_read_lock(mm);
29723000
retry_write_locked:
29733001
mutex_lock(&svms->lock);
@@ -3182,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p)
31823210
/*
31833211
* Ensure no retry fault comes in afterwards, as page fault handler will
31843212
* not find kfd process and take mm lock to recover fault.
3213+
* stop kfd page fault handing, then wait pending page faults got drained
31853214
*/
3186-
atomic_inc(&p->svms.drain_pagefaults);
3215+
atomic_set(&p->svms.drain_pagefaults, 1);
31873216
svm_range_drain_retry_fault(&p->svms);
31883217

31893218
list_for_each_entry_safe(prange, next, &p->svms.list, list) {

drivers/gpu/drm/amd/amdkfd/kfd_svm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
174174
bool clear);
175175
void svm_range_vram_node_free(struct svm_range *prange);
176176
int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
177-
uint32_t vmid, uint32_t node_id, uint64_t addr,
177+
uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts,
178178
bool write_fault);
179179
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
180180
void svm_range_add_list_work(struct svm_range_list *svms,
@@ -225,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p)
225225
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
226226
unsigned int pasid,
227227
uint32_t client_id, uint32_t node_id,
228-
uint64_t addr, bool write_fault)
228+
uint64_t addr, uint64_t ts, bool write_fault)
229229
{
230230
return -EFAULT;
231231
}

0 commit comments

Comments
 (0)