@@ -3262,9 +3262,19 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
3262
3262
{
3263
3263
gva_t gva = fault -> is_tdp ? 0 : fault -> addr ;
3264
3264
3265
+ if (fault -> is_private ) {
3266
+ kvm_mmu_prepare_memory_fault_exit (vcpu , fault );
3267
+ return - EFAULT ;
3268
+ }
3269
+
3265
3270
vcpu_cache_mmio_info (vcpu , gva , fault -> gfn ,
3266
3271
access & shadow_mmio_access_mask );
3267
3272
3273
+ fault -> slot = NULL ;
3274
+ fault -> pfn = KVM_PFN_NOSLOT ;
3275
+ fault -> map_writable = false;
3276
+ fault -> hva = KVM_HVA_ERR_BAD ;
3277
+
3268
3278
/*
3269
3279
* If MMIO caching is disabled, emulate immediately without
3270
3280
* touching the shadow page tables as attempting to install an
@@ -4207,24 +4217,28 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
4207
4217
return (vcpu -> arch .apf .id ++ << 12 ) | vcpu -> vcpu_id ;
4208
4218
}
4209
4219
4210
- static bool kvm_arch_setup_async_pf (struct kvm_vcpu * vcpu , gpa_t cr2_or_gpa ,
4211
- gfn_t gfn )
4220
+ static bool kvm_arch_setup_async_pf (struct kvm_vcpu * vcpu ,
4221
+ struct kvm_page_fault * fault )
4212
4222
{
4213
4223
struct kvm_arch_async_pf arch ;
4214
4224
4215
4225
arch .token = alloc_apf_token (vcpu );
4216
- arch .gfn = gfn ;
4226
+ arch .gfn = fault -> gfn ;
4227
+ arch .error_code = fault -> error_code ;
4217
4228
arch .direct_map = vcpu -> arch .mmu -> root_role .direct ;
4218
4229
arch .cr3 = kvm_mmu_get_guest_pgd (vcpu , vcpu -> arch .mmu );
4219
4230
4220
- return kvm_setup_async_pf (vcpu , cr2_or_gpa ,
4221
- kvm_vcpu_gfn_to_hva (vcpu , gfn ), & arch );
4231
+ return kvm_setup_async_pf (vcpu , fault -> addr ,
4232
+ kvm_vcpu_gfn_to_hva (vcpu , fault -> gfn ), & arch );
4222
4233
}
4223
4234
4224
4235
void kvm_arch_async_page_ready (struct kvm_vcpu * vcpu , struct kvm_async_pf * work )
4225
4236
{
4226
4237
int r ;
4227
4238
4239
+ if (WARN_ON_ONCE (work -> arch .error_code & PFERR_PRIVATE_ACCESS ))
4240
+ return ;
4241
+
4228
4242
if ((vcpu -> arch .mmu -> root_role .direct != work -> arch .direct_map ) ||
4229
4243
work -> wakeup_all )
4230
4244
return ;
@@ -4237,7 +4251,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
4237
4251
work -> arch .cr3 != kvm_mmu_get_guest_pgd (vcpu , vcpu -> arch .mmu ))
4238
4252
return ;
4239
4253
4240
- kvm_mmu_do_page_fault (vcpu , work -> cr2_or_gpa , 0 , true, NULL );
4254
+ kvm_mmu_do_page_fault (vcpu , work -> cr2_or_gpa , work -> arch . error_code , true, NULL );
4241
4255
}
4242
4256
4243
4257
static inline u8 kvm_max_level_for_order (int order )
@@ -4257,14 +4271,6 @@ static inline u8 kvm_max_level_for_order(int order)
4257
4271
return PG_LEVEL_4K ;
4258
4272
}
4259
4273
4260
- static void kvm_mmu_prepare_memory_fault_exit (struct kvm_vcpu * vcpu ,
4261
- struct kvm_page_fault * fault )
4262
- {
4263
- kvm_prepare_memory_fault_exit (vcpu , fault -> gfn << PAGE_SHIFT ,
4264
- PAGE_SIZE , fault -> write , fault -> exec ,
4265
- fault -> is_private );
4266
- }
4267
-
4268
4274
static int kvm_faultin_pfn_private (struct kvm_vcpu * vcpu ,
4269
4275
struct kvm_page_fault * fault )
4270
4276
{
@@ -4291,48 +4297,15 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
4291
4297
4292
4298
static int __kvm_faultin_pfn (struct kvm_vcpu * vcpu , struct kvm_page_fault * fault )
4293
4299
{
4294
- struct kvm_memory_slot * slot = fault -> slot ;
4295
4300
bool async ;
4296
4301
4297
- /*
4298
- * Retry the page fault if the gfn hit a memslot that is being deleted
4299
- * or moved. This ensures any existing SPTEs for the old memslot will
4300
- * be zapped before KVM inserts a new MMIO SPTE for the gfn.
4301
- */
4302
- if (slot && (slot -> flags & KVM_MEMSLOT_INVALID ))
4303
- return RET_PF_RETRY ;
4304
-
4305
- if (!kvm_is_visible_memslot (slot )) {
4306
- /* Don't expose private memslots to L2. */
4307
- if (is_guest_mode (vcpu )) {
4308
- fault -> slot = NULL ;
4309
- fault -> pfn = KVM_PFN_NOSLOT ;
4310
- fault -> map_writable = false;
4311
- return RET_PF_CONTINUE ;
4312
- }
4313
- /*
4314
- * If the APIC access page exists but is disabled, go directly
4315
- * to emulation without caching the MMIO access or creating a
4316
- * MMIO SPTE. That way the cache doesn't need to be purged
4317
- * when the AVIC is re-enabled.
4318
- */
4319
- if (slot && slot -> id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
4320
- !kvm_apicv_activated (vcpu -> kvm ))
4321
- return RET_PF_EMULATE ;
4322
- }
4323
-
4324
- if (fault -> is_private != kvm_mem_is_private (vcpu -> kvm , fault -> gfn )) {
4325
- kvm_mmu_prepare_memory_fault_exit (vcpu , fault );
4326
- return - EFAULT ;
4327
- }
4328
-
4329
4302
if (fault -> is_private )
4330
4303
return kvm_faultin_pfn_private (vcpu , fault );
4331
4304
4332
4305
async = false;
4333
- fault -> pfn = __gfn_to_pfn_memslot (slot , fault -> gfn , false, false, & async ,
4334
- fault -> write , & fault -> map_writable ,
4335
- & fault -> hva );
4306
+ fault -> pfn = __gfn_to_pfn_memslot (fault -> slot , fault -> gfn , false, false,
4307
+ & async , fault -> write ,
4308
+ & fault -> map_writable , & fault -> hva );
4336
4309
if (!async )
4337
4310
return RET_PF_CONTINUE ; /* *pfn has correct page already */
4338
4311
@@ -4342,7 +4315,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
4342
4315
trace_kvm_async_pf_repeated_fault (fault -> addr , fault -> gfn );
4343
4316
kvm_make_request (KVM_REQ_APF_HALT , vcpu );
4344
4317
return RET_PF_RETRY ;
4345
- } else if (kvm_arch_setup_async_pf (vcpu , fault -> addr , fault -> gfn )) {
4318
+ } else if (kvm_arch_setup_async_pf (vcpu , fault )) {
4346
4319
return RET_PF_RETRY ;
4347
4320
}
4348
4321
}
@@ -4352,17 +4325,72 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
4352
4325
* to wait for IO. Note, gup always bails if it is unable to quickly
4353
4326
* get a page and a fatal signal, i.e. SIGKILL, is pending.
4354
4327
*/
4355
- fault -> pfn = __gfn_to_pfn_memslot (slot , fault -> gfn , false, true, NULL ,
4356
- fault -> write , & fault -> map_writable ,
4357
- & fault -> hva );
4328
+ fault -> pfn = __gfn_to_pfn_memslot (fault -> slot , fault -> gfn , false, true,
4329
+ NULL , fault -> write ,
4330
+ & fault -> map_writable , & fault -> hva );
4358
4331
return RET_PF_CONTINUE ;
4359
4332
}
4360
4333
4361
4334
static int kvm_faultin_pfn (struct kvm_vcpu * vcpu , struct kvm_page_fault * fault ,
4362
4335
unsigned int access )
4363
4336
{
4337
+ struct kvm_memory_slot * slot = fault -> slot ;
4364
4338
int ret ;
4365
4339
4340
+ /*
4341
+ * Note that the mmu_invalidate_seq also serves to detect a concurrent
4342
+ * change in attributes. is_page_fault_stale() will detect an
4343
+ * invalidation relate to fault->fn and resume the guest without
4344
+ * installing a mapping in the page tables.
4345
+ */
4346
+ fault -> mmu_seq = vcpu -> kvm -> mmu_invalidate_seq ;
4347
+ smp_rmb ();
4348
+
4349
+ /*
4350
+ * Now that we have a snapshot of mmu_invalidate_seq we can check for a
4351
+ * private vs. shared mismatch.
4352
+ */
4353
+ if (fault -> is_private != kvm_mem_is_private (vcpu -> kvm , fault -> gfn )) {
4354
+ kvm_mmu_prepare_memory_fault_exit (vcpu , fault );
4355
+ return - EFAULT ;
4356
+ }
4357
+
4358
+ if (unlikely (!slot ))
4359
+ return kvm_handle_noslot_fault (vcpu , fault , access );
4360
+
4361
+ /*
4362
+ * Retry the page fault if the gfn hit a memslot that is being deleted
4363
+ * or moved. This ensures any existing SPTEs for the old memslot will
4364
+ * be zapped before KVM inserts a new MMIO SPTE for the gfn.
4365
+ */
4366
+ if (slot -> flags & KVM_MEMSLOT_INVALID )
4367
+ return RET_PF_RETRY ;
4368
+
4369
+ if (slot -> id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT ) {
4370
+ /*
4371
+ * Don't map L1's APIC access page into L2, KVM doesn't support
4372
+ * using APICv/AVIC to accelerate L2 accesses to L1's APIC,
4373
+ * i.e. the access needs to be emulated. Emulating access to
4374
+ * L1's APIC is also correct if L1 is accelerating L2's own
4375
+ * virtual APIC, but for some reason L1 also maps _L1's_ APIC
4376
+ * into L2. Note, vcpu_is_mmio_gpa() always treats access to
4377
+ * the APIC as MMIO. Allow an MMIO SPTE to be created, as KVM
4378
+ * uses different roots for L1 vs. L2, i.e. there is no danger
4379
+ * of breaking APICv/AVIC for L1.
4380
+ */
4381
+ if (is_guest_mode (vcpu ))
4382
+ return kvm_handle_noslot_fault (vcpu , fault , access );
4383
+
4384
+ /*
4385
+ * If the APIC access page exists but is disabled, go directly
4386
+ * to emulation without caching the MMIO access or creating a
4387
+ * MMIO SPTE. That way the cache doesn't need to be purged
4388
+ * when the AVIC is re-enabled.
4389
+ */
4390
+ if (!kvm_apicv_activated (vcpu -> kvm ))
4391
+ return RET_PF_EMULATE ;
4392
+ }
4393
+
4366
4394
fault -> mmu_seq = vcpu -> kvm -> mmu_invalidate_seq ;
4367
4395
smp_rmb ();
4368
4396
@@ -4387,8 +4415,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
4387
4415
* *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
4388
4416
* to detect retry guarantees the worst case latency for the vCPU.
4389
4417
*/
4390
- if (fault -> slot &&
4391
- mmu_invalidate_retry_gfn_unsafe (vcpu -> kvm , fault -> mmu_seq , fault -> gfn ))
4418
+ if (mmu_invalidate_retry_gfn_unsafe (vcpu -> kvm , fault -> mmu_seq , fault -> gfn ))
4392
4419
return RET_PF_RETRY ;
4393
4420
4394
4421
ret = __kvm_faultin_pfn (vcpu , fault );
@@ -4398,7 +4425,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
4398
4425
if (unlikely (is_error_pfn (fault -> pfn )))
4399
4426
return kvm_handle_error_pfn (vcpu , fault );
4400
4427
4401
- if (unlikely (!fault -> slot ))
4428
+ if (WARN_ON_ONCE (!fault -> slot || is_noslot_pfn ( fault -> pfn ) ))
4402
4429
return kvm_handle_noslot_fault (vcpu , fault , access );
4403
4430
4404
4431
/*
@@ -4509,6 +4536,16 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
4509
4536
if (WARN_ON_ONCE (fault_address >> 32 ))
4510
4537
return - EFAULT ;
4511
4538
#endif
4539
+ /*
4540
+ * Legacy #PF exception only have a 32-bit error code. Simply drop the
4541
+ * upper bits as KVM doesn't use them for #PF (because they are never
4542
+ * set), and to ensure there are no collisions with KVM-defined bits.
4543
+ */
4544
+ if (WARN_ON_ONCE (error_code >> 32 ))
4545
+ error_code = lower_32_bits (error_code );
4546
+
4547
+ /* Ensure the above sanity check also covers KVM-defined flags. */
4548
+ BUILD_BUG_ON (lower_32_bits (PFERR_SYNTHETIC_MASK ));
4512
4549
4513
4550
vcpu -> arch .l1tf_flush_l1d = true;
4514
4551
if (!flags ) {
@@ -5794,30 +5831,35 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
5794
5831
int r , emulation_type = EMULTYPE_PF ;
5795
5832
bool direct = vcpu -> arch .mmu -> root_role .direct ;
5796
5833
5797
- /*
5798
- * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
5799
- * checks when emulating instructions that triggers implicit access.
5800
- * WARN if hardware generates a fault with an error code that collides
5801
- * with the KVM-defined value. Clear the flag and continue on, i.e.
5802
- * don't terminate the VM, as KVM can't possibly be relying on a flag
5803
- * that KVM doesn't know about.
5804
- */
5805
- if (WARN_ON_ONCE (error_code & PFERR_IMPLICIT_ACCESS ))
5806
- error_code &= ~PFERR_IMPLICIT_ACCESS ;
5807
-
5808
5834
if (WARN_ON_ONCE (!VALID_PAGE (vcpu -> arch .mmu -> root .hpa )))
5809
5835
return RET_PF_RETRY ;
5810
5836
5837
+ /*
5838
+ * Except for reserved faults (emulated MMIO is shared-only), set the
5839
+ * PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's
5840
+ * current attributes, which are the source of truth for such VMs. Note,
5841
+ * this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't
5842
+ * currently supported nested virtualization (among many other things)
5843
+ * for software-protected VMs.
5844
+ */
5845
+ if (IS_ENABLED (CONFIG_KVM_SW_PROTECTED_VM ) &&
5846
+ !(error_code & PFERR_RSVD_MASK ) &&
5847
+ vcpu -> kvm -> arch .vm_type == KVM_X86_SW_PROTECTED_VM &&
5848
+ kvm_mem_is_private (vcpu -> kvm , gpa_to_gfn (cr2_or_gpa )))
5849
+ error_code |= PFERR_PRIVATE_ACCESS ;
5850
+
5811
5851
r = RET_PF_INVALID ;
5812
5852
if (unlikely (error_code & PFERR_RSVD_MASK )) {
5853
+ if (WARN_ON_ONCE (error_code & PFERR_PRIVATE_ACCESS ))
5854
+ return - EFAULT ;
5855
+
5813
5856
r = handle_mmio_page_fault (vcpu , cr2_or_gpa , direct );
5814
5857
if (r == RET_PF_EMULATE )
5815
5858
goto emulate ;
5816
5859
}
5817
5860
5818
5861
if (r == RET_PF_INVALID ) {
5819
- r = kvm_mmu_do_page_fault (vcpu , cr2_or_gpa ,
5820
- lower_32_bits (error_code ), false,
5862
+ r = kvm_mmu_do_page_fault (vcpu , cr2_or_gpa , error_code , false,
5821
5863
& emulation_type );
5822
5864
if (KVM_BUG_ON (r == RET_PF_INVALID , vcpu -> kvm ))
5823
5865
return - EIO ;
0 commit comments