@@ -2262,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
22622262{
22632263 struct kfd_process_device * pdd ;
22642264 struct kfd_process * p ;
2265- int drain ;
22662265 uint32_t i ;
22672266
22682267 p = container_of (svms , struct kfd_process , svms );
22692268
2270- restart :
2271- drain = atomic_read (& svms -> drain_pagefaults );
2272- if (!drain )
2273- return ;
2274-
22752269 for_each_set_bit (i , svms -> bitmap_supported , p -> n_pdds ) {
22762270 pdd = p -> pdds [i ];
22772271 if (!pdd )
@@ -2291,8 +2285,6 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
22912285
22922286 pr_debug ("drain retry fault gpu %d svms 0x%p done\n" , i , svms );
22932287 }
2294- if (atomic_cmpxchg (& svms -> drain_pagefaults , drain , 0 ) != drain )
2295- goto restart ;
22962288}
22972289
22982290static void svm_range_deferred_list_work (struct work_struct * work )
@@ -2314,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work)
23142306 prange -> start , prange -> last , prange -> work_item .op );
23152307
23162308 mm = prange -> work_item .mm ;
2317- retry :
2318- mmap_write_lock (mm );
23192309
2320- /* Checking for the need to drain retry faults must be inside
2321- * mmap write lock to serialize with munmap notifiers.
2322- */
2323- if (unlikely (atomic_read (& svms -> drain_pagefaults ))) {
2324- mmap_write_unlock (mm );
2325- svm_range_drain_retry_fault (svms );
2326- goto retry ;
2327- }
2310+ mmap_write_lock (mm );
23282311
23292312 /* Remove from deferred_list must be inside mmap write lock, for
23302313 * two race cases:
@@ -2445,6 +2428,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
24452428 struct kfd_process * p ;
24462429 unsigned long s , l ;
24472430 bool unmap_parent ;
2431+ uint32_t i ;
24482432
24492433 if (atomic_read (& prange -> queue_refcount )) {
24502434 int r ;
@@ -2464,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
24642448 pr_debug ("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n" , svms ,
24652449 prange , prange -> start , prange -> last , start , last );
24662450
2467- /* Make sure pending page faults are drained in the deferred worker
2468- * before the range is freed to avoid straggler interrupts on
2469- * unmapped memory causing "phantom faults".
2451+ /* calculate time stamps that are used to decide which page faults need be
2452+ * dropped or handled before unmap pages from gpu vm
24702453 */
2471- atomic_inc (& svms -> drain_pagefaults );
2454+ for_each_set_bit (i , svms -> bitmap_supported , p -> n_pdds ) {
2455+ struct kfd_process_device * pdd ;
2456+ struct amdgpu_device * adev ;
2457+ struct amdgpu_ih_ring * ih ;
2458+ uint32_t checkpoint_wptr ;
2459+
2460+ pdd = p -> pdds [i ];
2461+ if (!pdd )
2462+ continue ;
2463+
2464+ adev = pdd -> dev -> adev ;
2465+
2466+ /* Check and drain ih1 ring if cam not available */
2467+ ih = & adev -> irq .ih1 ;
2468+ checkpoint_wptr = amdgpu_ih_get_wptr (adev , ih );
2469+ if (ih -> rptr != checkpoint_wptr ) {
2470+ svms -> checkpoint_ts [i ] = amdgpu_ih_decode_iv_ts (adev , ih , checkpoint_wptr , -1 );
2471+ continue ;
2472+ }
2473+
2474+ /* check if dev->irq.ih_soft is not empty */
2475+ ih = & adev -> irq .ih_soft ;
2476+ checkpoint_wptr = amdgpu_ih_get_wptr (adev , ih );
2477+ if (ih -> rptr != checkpoint_wptr )
2478+ svms -> checkpoint_ts [i ] = amdgpu_ih_decode_iv_ts (adev , ih , checkpoint_wptr , -1 );
2479+ }
24722480
24732481 unmap_parent = start <= prange -> start && last >= prange -> last ;
24742482
@@ -2909,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
29092917int
29102918svm_range_restore_pages (struct amdgpu_device * adev , unsigned int pasid ,
29112919 uint32_t vmid , uint32_t node_id ,
2912- uint64_t addr , bool write_fault )
2920+ uint64_t addr , uint64_t ts , bool write_fault )
29132921{
29142922 unsigned long start , last , size ;
29152923 struct mm_struct * mm = NULL ;
@@ -2919,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29192927 ktime_t timestamp = ktime_get_boottime ();
29202928 struct kfd_node * node ;
29212929 int32_t best_loc ;
2922- int32_t gpuidx = MAX_GPU_INSTANCE ;
2930+ int32_t gpuid , gpuidx = MAX_GPU_INSTANCE ;
29232931 bool write_locked = false;
29242932 struct vm_area_struct * vma ;
29252933 bool migration = false;
@@ -2940,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29402948 pr_debug ("restoring svms 0x%p fault address 0x%llx\n" , svms , addr );
29412949
29422950 if (atomic_read (& svms -> drain_pagefaults )) {
2943- pr_debug ("draining retry fault, drop fault 0x%llx\n" , addr );
2951+ pr_debug ("page fault handling disabled , drop fault 0x%llx\n" , addr );
29442952 r = 0 ;
29452953 goto out ;
29462954 }
29472955
2956+ node = kfd_node_by_irq_ids (adev , node_id , vmid );
2957+ if (!node ) {
2958+ pr_debug ("kfd node does not exist node_id: %d, vmid: %d\n" , node_id ,
2959+ vmid );
2960+ r = - EFAULT ;
2961+ goto out ;
2962+ }
2963+
2964+ if (kfd_process_gpuid_from_node (p , node , & gpuid , & gpuidx )) {
2965+ pr_debug ("failed to get gpuid/gpuidex for node_id: %d\n" , node_id );
2966+ r = - EFAULT ;
2967+ goto out ;
2968+ }
2969+
2970+ /* check if this page fault time stamp is before svms->checkpoint_ts */
2971+ if (svms -> checkpoint_ts [gpuidx ] != 0 ) {
2972+ if (amdgpu_ih_ts_after (ts , svms -> checkpoint_ts [gpuidx ])) {
2973+ pr_debug ("draining retry fault, drop fault 0x%llx\n" , addr );
2974+ r = 0 ;
2975+ goto out ;
2976+ } else
2977+ /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
2978+ * to zero to avoid following ts wrap around give wrong comparing
2979+ */
2980+ svms -> checkpoint_ts [gpuidx ] = 0 ;
2981+ }
2982+
29482983 if (!p -> xnack_enabled ) {
29492984 pr_debug ("XNACK not enabled for pasid 0x%x\n" , pasid );
29502985 r = - EFAULT ;
@@ -2961,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
29612996 goto out ;
29622997 }
29632998
2964- node = kfd_node_by_irq_ids (adev , node_id , vmid );
2965- if (!node ) {
2966- pr_debug ("kfd node does not exist node_id: %d, vmid: %d\n" , node_id ,
2967- vmid );
2968- r = - EFAULT ;
2969- goto out ;
2970- }
29712999 mmap_read_lock (mm );
29723000retry_write_locked :
29733001 mutex_lock (& svms -> lock );
@@ -3182,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p)
31823210 /*
31833211 * Ensure no retry fault comes in afterwards, as page fault handler will
31843212 * not find kfd process and take mm lock to recover fault.
3213+ * stop kfd page fault handing, then wait pending page faults got drained
31853214 */
3186- atomic_inc (& p -> svms .drain_pagefaults );
3215+ atomic_set (& p -> svms .drain_pagefaults , 1 );
31873216 svm_range_drain_retry_fault (& p -> svms );
31883217
31893218 list_for_each_entry_safe (prange , next , & p -> svms .list , list ) {
0 commit comments