Skip to content

Commit fb491d5

Browse files
Claudio Imbrendafrankjaa
authored andcommitted
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous operation that could potentially take a very long time, depending on the size of the guest, due to the time needed to clean up the address space from protected pages. This patch implements an asynchronous destroy mechanism, that allows a protected guest to reboot significantly faster than previously. This is achieved by clearing the pages of the old guest in background. In case of reboot, the new guest will be able to run in the same address space almost immediately. The old protected guest is then only destroyed when all of its memory has been destroyed or otherwise made non protected. Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl: KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for later asynchronous teardown. The current KVM VM will then continue immediately as non-protected. If a protected VM had already been set aside for asynchronous teardown, but without starting the teardown process, this call will fail. There can be at most one VM set aside at any time. Once it is set aside, the protected VM only exists in the context of the Ultravisor, it is not associated with the KVM VM anymore. Its protected CPUs have already been destroyed, but not its memory. This command can be issued again immediately after starting KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion. KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace from a separate thread. If a fatal signal is received (or if the process terminates naturally), the command will terminate immediately without completing. All protected VMs whose teardown was interrupted will be put in the need_cleanup list. The rest of the normal KVM teardown process will take care of properly cleaning up all remaining protected VMs, including the ones on the need_cleanup list. Signed-off-by: Claudio Imbrenda <[email protected]> Reviewed-by: Nico Boehr <[email protected]> Reviewed-by: Janosch Frank <[email protected]> Reviewed-by: Steffen Eiden <[email protected]> Link: https://lore.kernel.org/r/[email protected] Message-Id: <[email protected]> Signed-off-by: Janosch Frank <[email protected]>
1 parent 58635d6 commit fb491d5

File tree

5 files changed

+333
-18
lines changed

5 files changed

+333
-18
lines changed

arch/s390/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,8 @@ struct kvm_s390_pv {
942942
unsigned long stor_base;
943943
void *stor_var;
944944
bool dumping;
945+
void *set_aside;
946+
struct list_head need_cleanup;
945947
struct mmu_notifier mmu_notifier;
946948
};
947949

arch/s390/kvm/kvm-s390.c

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz;
209209
module_param(diag9c_forwarding_hz, uint, 0644);
210210
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211211

212+
static int async_destroy;
213+
212214
/*
213215
* For now we handle at most 16 double words as this is what the s390 base
214216
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
25042506

25052507
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
25062508
{
2509+
const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2510+
void __user *argp = (void __user *)cmd->data;
25072511
int r = 0;
25082512
u16 dummy;
2509-
void __user *argp = (void __user *)cmd->data;
2513+
2514+
if (need_lock)
2515+
mutex_lock(&kvm->lock);
25102516

25112517
switch (cmd->cmd) {
25122518
case KVM_PV_ENABLE: {
@@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
25402546
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
25412547
break;
25422548
}
2549+
case KVM_PV_ASYNC_CLEANUP_PREPARE:
2550+
r = -EINVAL;
2551+
if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2552+
break;
2553+
2554+
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2555+
/*
2556+
* If a CPU could not be destroyed, destroy VM will also fail.
2557+
* There is no point in trying to destroy it. Instead return
2558+
* the rc and rrc from the first CPU that failed destroying.
2559+
*/
2560+
if (r)
2561+
break;
2562+
r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2563+
2564+
/* no need to block service interrupts any more */
2565+
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2566+
break;
2567+
case KVM_PV_ASYNC_CLEANUP_PERFORM:
2568+
r = -EINVAL;
2569+
if (!async_destroy)
2570+
break;
2571+
/* kvm->lock must not be held; this is asserted inside the function. */
2572+
r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2573+
break;
25432574
case KVM_PV_DISABLE: {
25442575
r = -EINVAL;
25452576
if (!kvm_s390_pv_is_protected(kvm))
@@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
25532584
*/
25542585
if (r)
25552586
break;
2556-
r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2587+
r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
25572588

25582589
/* no need to block service interrupts any more */
25592590
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
@@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
27032734
default:
27042735
r = -ENOTTY;
27052736
}
2737+
if (need_lock)
2738+
mutex_unlock(&kvm->lock);
2739+
27062740
return r;
27072741
}
27082742

@@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
29072941
r = -EINVAL;
29082942
break;
29092943
}
2910-
mutex_lock(&kvm->lock);
2944+
/* must be called without kvm->lock */
29112945
r = kvm_s390_handle_pv(kvm, &args);
2912-
mutex_unlock(&kvm->lock);
29132946
if (copy_to_user(argp, &args, sizeof(args))) {
29142947
r = -EFAULT;
29152948
break;
@@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
32283261
kvm_s390_vsie_init(kvm);
32293262
if (use_gisa)
32303263
kvm_s390_gisa_init(kvm);
3264+
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3265+
kvm->arch.pv.set_aside = NULL;
32313266
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
32323267

32333268
return 0;
@@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
32723307
/*
32733308
* We are already at the end of life and kvm->lock is not taken.
32743309
* This is ok as the file descriptor is closed by now and nobody
3275-
* can mess with the pv state. To avoid lockdep_assert_held from
3276-
* complaining we do not use kvm_s390_pv_is_protected.
3310+
* can mess with the pv state.
32773311
*/
3278-
if (kvm_s390_pv_get_handle(kvm))
3279-
kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
3312+
kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
32803313
/*
32813314
* Remove the mmu notifier only when the whole KVM VM is torn down,
32823315
* and only if one was registered to begin with. If the VM is

arch/s390/kvm/kvm-s390.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
244244
/* implemented in pv.c */
245245
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
246246
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
247+
int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc);
248+
int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
249+
int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc);
247250
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
248251
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
249252
int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,

0 commit comments

Comments
 (0)