@@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
4646{
4747 struct kvm_async_pf * apf =
4848 container_of (work , struct kvm_async_pf , work );
49- struct mm_struct * mm = apf -> mm ;
5049 struct kvm_vcpu * vcpu = apf -> vcpu ;
50+ struct mm_struct * mm = vcpu -> kvm -> mm ;
5151 unsigned long addr = apf -> addr ;
5252 gpa_t cr2_or_gpa = apf -> cr2_or_gpa ;
5353 int locked = 1 ;
@@ -56,15 +56,24 @@ static void async_pf_execute(struct work_struct *work)
5656 might_sleep ();
5757
5858 /*
59- * This work is run asynchronously to the task which owns
60- * mm and might be done in another context, so we must
61- * access remotely.
59+ * Attempt to pin the VM's host address space, and simply skip gup() if
60+ * acquiring a pin fail, i.e. if the process is exiting. Note, KVM
61+ * holds a reference to its associated mm_struct until the very end of
62+ * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
63+ * work item is fully processed.
6264 */
63- mmap_read_lock (mm );
64- get_user_pages_remote (mm , addr , 1 , FOLL_WRITE , NULL , & locked );
65- if (locked )
66- mmap_read_unlock (mm );
65+ if (mmget_not_zero (mm )) {
66+ mmap_read_lock (mm );
67+ get_user_pages_remote (mm , addr , 1 , FOLL_WRITE , NULL , & locked );
68+ if (locked )
69+ mmap_read_unlock (mm );
70+ mmput (mm );
71+ }
6772
73+ /*
74+ * Notify and kick the vCPU even if faulting in the page failed, e.g.
75+ * so that the vCPU can retry the fault synchronously.
76+ */
6877 if (IS_ENABLED (CONFIG_KVM_ASYNC_PF_SYNC ))
6978 kvm_arch_async_page_present (vcpu , apf );
7079
@@ -74,20 +83,39 @@ static void async_pf_execute(struct work_struct *work)
7483 apf -> vcpu = NULL ;
7584 spin_unlock (& vcpu -> async_pf .lock );
7685
77- if (!IS_ENABLED (CONFIG_KVM_ASYNC_PF_SYNC ) && first )
78- kvm_arch_async_page_present_queued (vcpu );
79-
8086 /*
81- * apf may be freed by kvm_check_async_pf_completion() after
82- * this point
87+ * The apf struct may be freed by kvm_check_async_pf_completion() as
88+ * soon as the lock is dropped. Nullify it to prevent improper usage.
8389 */
90+ apf = NULL ;
91+
92+ if (!IS_ENABLED (CONFIG_KVM_ASYNC_PF_SYNC ) && first )
93+ kvm_arch_async_page_present_queued (vcpu );
8494
8595 trace_kvm_async_pf_completed (addr , cr2_or_gpa );
8696
8797 __kvm_vcpu_wake_up (vcpu );
98+ }
8899
89- mmput (mm );
90- kvm_put_kvm (vcpu -> kvm );
100+ static void kvm_flush_and_free_async_pf_work (struct kvm_async_pf * work )
101+ {
102+ /*
103+ * The async #PF is "done", but KVM must wait for the work item itself,
104+ * i.e. async_pf_execute(), to run to completion. If KVM is a module,
105+ * KVM must ensure *no* code owned by the KVM (the module) can be run
106+ * after the last call to module_put(). Note, flushing the work item
107+ * is always required when the item is taken off the completion queue.
108+ * E.g. even if the vCPU handles the item in the "normal" path, the VM
109+ * could be terminated before async_pf_execute() completes.
110+ *
111+ * Wake all events skip the queue and go straight done, i.e. don't
112+ * need to be flushed (but sanity check that the work wasn't queued).
113+ */
114+ if (work -> wakeup_all )
115+ WARN_ON_ONCE (work -> work .func );
116+ else
117+ flush_work (& work -> work );
118+ kmem_cache_free (async_pf_cache , work );
91119}
92120
93121void kvm_clear_async_pf_completion_queue (struct kvm_vcpu * vcpu )
@@ -112,11 +140,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
112140#ifdef CONFIG_KVM_ASYNC_PF_SYNC
113141 flush_work (& work -> work );
114142#else
115- if (cancel_work_sync (& work -> work )) {
116- mmput (work -> mm );
117- kvm_put_kvm (vcpu -> kvm ); /* == work->vcpu->kvm */
143+ if (cancel_work_sync (& work -> work ))
118144 kmem_cache_free (async_pf_cache , work );
119- }
120145#endif
121146 spin_lock (& vcpu -> async_pf .lock );
122147 }
@@ -126,7 +151,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
126151 list_first_entry (& vcpu -> async_pf .done ,
127152 typeof (* work ), link );
128153 list_del (& work -> link );
129- kmem_cache_free (async_pf_cache , work );
154+
155+ spin_unlock (& vcpu -> async_pf .lock );
156+ kvm_flush_and_free_async_pf_work (work );
157+ spin_lock (& vcpu -> async_pf .lock );
130158 }
131159 spin_unlock (& vcpu -> async_pf .lock );
132160
@@ -151,7 +179,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
151179
152180 list_del (& work -> queue );
153181 vcpu -> async_pf .queued -- ;
154- kmem_cache_free ( async_pf_cache , work );
182+ kvm_flush_and_free_async_pf_work ( work );
155183 }
156184}
157185
@@ -184,9 +212,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
184212 work -> cr2_or_gpa = cr2_or_gpa ;
185213 work -> addr = hva ;
186214 work -> arch = * arch ;
187- work -> mm = current -> mm ;
188- mmget (work -> mm );
189- kvm_get_kvm (work -> vcpu -> kvm );
190215
191216 INIT_WORK (& work -> work , async_pf_execute );
192217
0 commit comments