@@ -2262,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
2262
2262
{
2263
2263
struct kfd_process_device * pdd ;
2264
2264
struct kfd_process * p ;
2265
- int drain ;
2266
2265
uint32_t i ;
2267
2266
2268
2267
p = container_of (svms , struct kfd_process , svms );
2269
2268
2270
- restart :
2271
- drain = atomic_read (& svms -> drain_pagefaults );
2272
- if (!drain )
2273
- return ;
2274
-
2275
2269
for_each_set_bit (i , svms -> bitmap_supported , p -> n_pdds ) {
2276
2270
pdd = p -> pdds [i ];
2277
2271
if (!pdd )
@@ -2291,8 +2285,6 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
2291
2285
2292
2286
pr_debug ("drain retry fault gpu %d svms 0x%p done\n" , i , svms );
2293
2287
}
2294
- if (atomic_cmpxchg (& svms -> drain_pagefaults , drain , 0 ) != drain )
2295
- goto restart ;
2296
2288
}
2297
2289
2298
2290
static void svm_range_deferred_list_work (struct work_struct * work )
@@ -2314,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work)
2314
2306
prange -> start , prange -> last , prange -> work_item .op );
2315
2307
2316
2308
mm = prange -> work_item .mm ;
2317
- retry :
2318
- mmap_write_lock (mm );
2319
2309
2320
- /* Checking for the need to drain retry faults must be inside
2321
- * mmap write lock to serialize with munmap notifiers.
2322
- */
2323
- if (unlikely (atomic_read (& svms -> drain_pagefaults ))) {
2324
- mmap_write_unlock (mm );
2325
- svm_range_drain_retry_fault (svms );
2326
- goto retry ;
2327
- }
2310
+ mmap_write_lock (mm );
2328
2311
2329
2312
/* Remove from deferred_list must be inside mmap write lock, for
2330
2313
* two race cases:
@@ -2445,6 +2428,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
2445
2428
struct kfd_process * p ;
2446
2429
unsigned long s , l ;
2447
2430
bool unmap_parent ;
2431
+ uint32_t i ;
2448
2432
2449
2433
if (atomic_read (& prange -> queue_refcount )) {
2450
2434
int r ;
@@ -2464,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
2464
2448
pr_debug ("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n" , svms ,
2465
2449
prange , prange -> start , prange -> last , start , last );
2466
2450
2467
- /* Make sure pending page faults are drained in the deferred worker
2468
- * before the range is freed to avoid straggler interrupts on
2469
- * unmapped memory causing "phantom faults".
2451
+ /* calculate time stamps that are used to decide which page faults need be
2452
+ * dropped or handled before unmap pages from gpu vm
2470
2453
*/
2471
- atomic_inc (& svms -> drain_pagefaults );
2454
+ for_each_set_bit (i , svms -> bitmap_supported , p -> n_pdds ) {
2455
+ struct kfd_process_device * pdd ;
2456
+ struct amdgpu_device * adev ;
2457
+ struct amdgpu_ih_ring * ih ;
2458
+ uint32_t checkpoint_wptr ;
2459
+
2460
+ pdd = p -> pdds [i ];
2461
+ if (!pdd )
2462
+ continue ;
2463
+
2464
+ adev = pdd -> dev -> adev ;
2465
+
2466
+ /* Check and drain ih1 ring if cam not available */
2467
+ ih = & adev -> irq .ih1 ;
2468
+ checkpoint_wptr = amdgpu_ih_get_wptr (adev , ih );
2469
+ if (ih -> rptr != checkpoint_wptr ) {
2470
+ svms -> checkpoint_ts [i ] = amdgpu_ih_decode_iv_ts (adev , ih , checkpoint_wptr , -1 );
2471
+ continue ;
2472
+ }
2473
+
2474
+ /* check if dev->irq.ih_soft is not empty */
2475
+ ih = & adev -> irq .ih_soft ;
2476
+ checkpoint_wptr = amdgpu_ih_get_wptr (adev , ih );
2477
+ if (ih -> rptr != checkpoint_wptr )
2478
+ svms -> checkpoint_ts [i ] = amdgpu_ih_decode_iv_ts (adev , ih , checkpoint_wptr , -1 );
2479
+ }
2472
2480
2473
2481
unmap_parent = start <= prange -> start && last >= prange -> last ;
2474
2482
@@ -2909,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
2909
2917
int
2910
2918
svm_range_restore_pages (struct amdgpu_device * adev , unsigned int pasid ,
2911
2919
uint32_t vmid , uint32_t node_id ,
2912
- uint64_t addr , bool write_fault )
2920
+ uint64_t addr , uint64_t ts , bool write_fault )
2913
2921
{
2914
2922
unsigned long start , last , size ;
2915
2923
struct mm_struct * mm = NULL ;
@@ -2919,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
2919
2927
ktime_t timestamp = ktime_get_boottime ();
2920
2928
struct kfd_node * node ;
2921
2929
int32_t best_loc ;
2922
- int32_t gpuidx = MAX_GPU_INSTANCE ;
2930
+ int32_t gpuid , gpuidx = MAX_GPU_INSTANCE ;
2923
2931
bool write_locked = false;
2924
2932
struct vm_area_struct * vma ;
2925
2933
bool migration = false;
@@ -2940,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
2940
2948
pr_debug ("restoring svms 0x%p fault address 0x%llx\n" , svms , addr );
2941
2949
2942
2950
if (atomic_read (& svms -> drain_pagefaults )) {
2943
- pr_debug ("draining retry fault, drop fault 0x%llx\n" , addr );
2951
+ pr_debug ("page fault handling disabled , drop fault 0x%llx\n" , addr );
2944
2952
r = 0 ;
2945
2953
goto out ;
2946
2954
}
2947
2955
2956
+ node = kfd_node_by_irq_ids (adev , node_id , vmid );
2957
+ if (!node ) {
2958
+ pr_debug ("kfd node does not exist node_id: %d, vmid: %d\n" , node_id ,
2959
+ vmid );
2960
+ r = - EFAULT ;
2961
+ goto out ;
2962
+ }
2963
+
2964
+ if (kfd_process_gpuid_from_node (p , node , & gpuid , & gpuidx )) {
2965
+ pr_debug ("failed to get gpuid/gpuidex for node_id: %d\n" , node_id );
2966
+ r = - EFAULT ;
2967
+ goto out ;
2968
+ }
2969
+
2970
+ /* check if this page fault time stamp is before svms->checkpoint_ts */
2971
+ if (svms -> checkpoint_ts [gpuidx ] != 0 ) {
2972
+ if (amdgpu_ih_ts_after (ts , svms -> checkpoint_ts [gpuidx ])) {
2973
+ pr_debug ("draining retry fault, drop fault 0x%llx\n" , addr );
2974
+ r = 0 ;
2975
+ goto out ;
2976
+ } else
2977
+ /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
2978
+ * to zero to avoid following ts wrap around give wrong comparing
2979
+ */
2980
+ svms -> checkpoint_ts [gpuidx ] = 0 ;
2981
+ }
2982
+
2948
2983
if (!p -> xnack_enabled ) {
2949
2984
pr_debug ("XNACK not enabled for pasid 0x%x\n" , pasid );
2950
2985
r = - EFAULT ;
@@ -2961,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
2961
2996
goto out ;
2962
2997
}
2963
2998
2964
- node = kfd_node_by_irq_ids (adev , node_id , vmid );
2965
- if (!node ) {
2966
- pr_debug ("kfd node does not exist node_id: %d, vmid: %d\n" , node_id ,
2967
- vmid );
2968
- r = - EFAULT ;
2969
- goto out ;
2970
- }
2971
2999
mmap_read_lock (mm );
2972
3000
retry_write_locked :
2973
3001
mutex_lock (& svms -> lock );
@@ -3182,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p)
3182
3210
/*
3183
3211
* Ensure no retry fault comes in afterwards, as page fault handler will
3184
3212
* not find kfd process and take mm lock to recover fault.
3213
+ * stop kfd page fault handing, then wait pending page faults got drained
3185
3214
*/
3186
- atomic_inc (& p -> svms .drain_pagefaults );
3215
+ atomic_set (& p -> svms .drain_pagefaults , 1 );
3187
3216
svm_range_drain_retry_fault (& p -> svms );
3188
3217
3189
3218
list_for_each_entry_safe (prange , next , & p -> svms .list , list ) {
0 commit comments