Skip to content

Commit 2e44772

Browse files
PhilipYangAalexdeucher
authored andcommitted
drm/amdkfd: simplify drain retry fault
unmap range always increase atomic svms->drain_pagefaults to simplify both parent range and child range unmap, page fault handle ignores the retry fault if svms->drain_pagefaults is set to speed up interrupt handling. svm_range_drain_retry_fault restart draining if another range unmap from cpu. Signed-off-by: Philip Yang <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 7ad153d commit 2e44772

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_priv.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,7 @@ struct svm_range_list {
767767
struct list_head deferred_range_list;
768768
spinlock_t deferred_list_lock;
769769
atomic_t evicted_ranges;
770-
bool drain_pagefaults;
770+
atomic_t drain_pagefaults;
771771
struct delayed_work restore_work;
772772
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
773773
struct task_struct *faulting_task;

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,10 +1957,16 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
19571957
{
19581958
struct kfd_process_device *pdd;
19591959
struct kfd_process *p;
1960+
int drain;
19601961
uint32_t i;
19611962

19621963
p = container_of(svms, struct kfd_process, svms);
19631964

1965+
restart:
1966+
drain = atomic_read(&svms->drain_pagefaults);
1967+
if (!drain)
1968+
return;
1969+
19641970
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
19651971
pdd = p->pdds[i];
19661972
if (!pdd)
@@ -1972,6 +1978,8 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
19721978
&pdd->dev->adev->irq.ih1);
19731979
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
19741980
}
1981+
if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
1982+
goto restart;
19751983
}
19761984

19771985
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -1997,8 +2005,7 @@ static void svm_range_deferred_list_work(struct work_struct *work)
19972005
/* Checking for the need to drain retry faults must be inside
19982006
* mmap write lock to serialize with munmap notifiers.
19992007
*/
2000-
if (unlikely(READ_ONCE(svms->drain_pagefaults))) {
2001-
WRITE_ONCE(svms->drain_pagefaults, false);
2008+
if (unlikely(atomic_read(&svms->drain_pagefaults))) {
20022009
mmap_write_unlock(mm);
20032010
svm_range_drain_retry_fault(svms);
20042011
goto retry;
@@ -2045,12 +2052,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
20452052
struct mm_struct *mm, enum svm_work_list_ops op)
20462053
{
20472054
spin_lock(&svms->deferred_list_lock);
2048-
/* Make sure pending page faults are drained in the deferred worker
2049-
* before the range is freed to avoid straggler interrupts on
2050-
* unmapped memory causing "phantom faults".
2051-
*/
2052-
if (op == SVM_OP_UNMAP_RANGE)
2053-
svms->drain_pagefaults = true;
20542055
/* if prange is on the deferred list */
20552056
if (!list_empty(&prange->deferred_list)) {
20562057
pr_debug("update exist prange 0x%p work op %d\n", prange, op);
@@ -2129,6 +2130,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
21292130
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
21302131
prange, prange->start, prange->last, start, last);
21312132

2133+
/* Make sure pending page faults are drained in the deferred worker
2134+
* before the range is freed to avoid straggler interrupts on
2135+
* unmapped memory causing "phantom faults".
2136+
*/
2137+
atomic_inc(&svms->drain_pagefaults);
2138+
21322139
unmap_parent = start <= prange->start && last >= prange->last;
21332140

21342141
list_for_each_entry(pchild, &prange->child_list, child_list) {
@@ -2594,6 +2601,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
25942601

25952602
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
25962603

2604+
if (atomic_read(&svms->drain_pagefaults)) {
2605+
pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
2606+
goto out;
2607+
}
2608+
25972609
/* p->lead_thread is available as kfd_process_wq_release flush the work
25982610
* before releasing task ref.
25992611
*/
@@ -2740,6 +2752,7 @@ void svm_range_list_fini(struct kfd_process *p)
27402752
* Ensure no retry fault comes in afterwards, as page fault handler will
27412753
* not find kfd process and take mm lock to recover fault.
27422754
*/
2755+
atomic_inc(&p->svms.drain_pagefaults);
27432756
svm_range_drain_retry_fault(&p->svms);
27442757

27452758

@@ -2763,6 +2776,7 @@ int svm_range_list_init(struct kfd_process *p)
27632776
mutex_init(&svms->lock);
27642777
INIT_LIST_HEAD(&svms->list);
27652778
atomic_set(&svms->evicted_ranges, 0);
2779+
atomic_set(&svms->drain_pagefaults, 0);
27662780
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
27672781
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
27682782
INIT_LIST_HEAD(&svms->deferred_range_list);

0 commit comments

Comments
 (0)