2626/* Initial target buffer size */
2727#define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
2828
29+ static int
30+ virtiovf_read_device_context_chunk (struct virtiovf_migration_file * migf ,
31+ u32 ctx_size );
32+
2933static struct page *
3034virtiovf_get_migration_page (struct virtiovf_data_buffer * buf ,
3135 unsigned long offset )
@@ -159,6 +163,41 @@ virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
159163 VIRTIO_RESOURCE_OBJ_DEV_PARTS , obj_id );
160164}
161165
166+ static struct virtiovf_data_buffer *
167+ virtiovf_get_data_buffer (struct virtiovf_migration_file * migf , size_t length )
168+ {
169+ struct virtiovf_data_buffer * buf , * temp_buf ;
170+ struct list_head free_list ;
171+
172+ INIT_LIST_HEAD (& free_list );
173+
174+ spin_lock_irq (& migf -> list_lock );
175+ list_for_each_entry_safe (buf , temp_buf , & migf -> avail_list , buf_elm ) {
176+ list_del_init (& buf -> buf_elm );
177+ if (buf -> allocated_length >= length ) {
178+ spin_unlock_irq (& migf -> list_lock );
179+ goto found ;
180+ }
181+ /*
182+ * Prevent holding redundant buffers. Put in a free
183+ * list and call at the end not under the spin lock
184+ * (&migf->list_lock) to minimize its scope usage.
185+ */
186+ list_add (& buf -> buf_elm , & free_list );
187+ }
188+ spin_unlock_irq (& migf -> list_lock );
189+ buf = virtiovf_alloc_data_buffer (migf , length );
190+
191+ found :
192+ while ((temp_buf = list_first_entry_or_null (& free_list ,
193+ struct virtiovf_data_buffer , buf_elm ))) {
194+ list_del (& temp_buf -> buf_elm );
195+ virtiovf_free_data_buffer (temp_buf );
196+ }
197+
198+ return buf ;
199+ }
200+
162201static void virtiovf_clean_migf_resources (struct virtiovf_migration_file * migf )
163202{
164203 struct virtiovf_data_buffer * entry ;
@@ -345,6 +384,7 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
345384{
346385 struct virtiovf_migration_file * migf = filp -> private_data ;
347386 struct virtiovf_data_buffer * vhca_buf ;
387+ bool first_loop_call = true;
348388 bool end_of_data ;
349389 ssize_t done = 0 ;
350390
@@ -362,6 +402,19 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
362402 ssize_t count ;
363403
364404 vhca_buf = virtiovf_get_data_buff_from_pos (migf , * pos , & end_of_data );
405+ if (first_loop_call ) {
406+ first_loop_call = false;
407+ /* Temporary end of file as part of PRE_COPY */
408+ if (end_of_data && migf -> state == VIRTIOVF_MIGF_STATE_PRECOPY ) {
409+ done = - ENOMSG ;
410+ goto out_unlock ;
411+ }
412+ if (end_of_data && migf -> state != VIRTIOVF_MIGF_STATE_COMPLETE ) {
413+ done = - EINVAL ;
414+ goto out_unlock ;
415+ }
416+ }
417+
365418 if (end_of_data )
366419 goto out_unlock ;
367420
@@ -383,9 +436,101 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
383436 return done ;
384437}
385438
439+ static long virtiovf_precopy_ioctl (struct file * filp , unsigned int cmd ,
440+ unsigned long arg )
441+ {
442+ struct virtiovf_migration_file * migf = filp -> private_data ;
443+ struct virtiovf_pci_core_device * virtvdev = migf -> virtvdev ;
444+ struct vfio_precopy_info info = {};
445+ loff_t * pos = & filp -> f_pos ;
446+ bool end_of_data = false;
447+ unsigned long minsz ;
448+ u32 ctx_size = 0 ;
449+ int ret ;
450+
451+ if (cmd != VFIO_MIG_GET_PRECOPY_INFO )
452+ return - ENOTTY ;
453+
454+ minsz = offsetofend (struct vfio_precopy_info , dirty_bytes );
455+ if (copy_from_user (& info , (void __user * )arg , minsz ))
456+ return - EFAULT ;
457+
458+ if (info .argsz < minsz )
459+ return - EINVAL ;
460+
461+ mutex_lock (& virtvdev -> state_mutex );
462+ if (virtvdev -> mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
463+ virtvdev -> mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P ) {
464+ ret = - EINVAL ;
465+ goto err_state_unlock ;
466+ }
467+
468+ /*
469+ * The virtio specification does not include a PRE_COPY concept.
470+ * Since we can expect the data to remain the same for a certain period,
471+ * we use a rate limiter mechanism before making a call to the device.
472+ */
473+ if (__ratelimit (& migf -> pre_copy_rl_state )) {
474+
475+ ret = virtio_pci_admin_dev_parts_metadata_get (virtvdev -> core_device .pdev ,
476+ VIRTIO_RESOURCE_OBJ_DEV_PARTS , migf -> obj_id ,
477+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE ,
478+ & ctx_size );
479+ if (ret )
480+ goto err_state_unlock ;
481+ }
482+
483+ mutex_lock (& migf -> lock );
484+ if (migf -> state == VIRTIOVF_MIGF_STATE_ERROR ) {
485+ ret = - ENODEV ;
486+ goto err_migf_unlock ;
487+ }
488+
489+ if (migf -> pre_copy_initial_bytes > * pos ) {
490+ info .initial_bytes = migf -> pre_copy_initial_bytes - * pos ;
491+ } else {
492+ info .dirty_bytes = migf -> max_pos - * pos ;
493+ if (!info .dirty_bytes )
494+ end_of_data = true;
495+ info .dirty_bytes += ctx_size ;
496+ }
497+
498+ if (!end_of_data || !ctx_size ) {
499+ mutex_unlock (& migf -> lock );
500+ goto done ;
501+ }
502+
503+ mutex_unlock (& migf -> lock );
504+ /*
505+ * We finished transferring the current state and the device has a
506+ * dirty state, read a new state.
507+ */
508+ ret = virtiovf_read_device_context_chunk (migf , ctx_size );
509+ if (ret )
510+ /*
511+ * The machine is running, and context size could be grow, so no reason to mark
512+ * the device state as VIRTIOVF_MIGF_STATE_ERROR.
513+ */
514+ goto err_state_unlock ;
515+
516+ done :
517+ virtiovf_state_mutex_unlock (virtvdev );
518+ if (copy_to_user ((void __user * )arg , & info , minsz ))
519+ return - EFAULT ;
520+ return 0 ;
521+
522+ err_migf_unlock :
523+ mutex_unlock (& migf -> lock );
524+ err_state_unlock :
525+ virtiovf_state_mutex_unlock (virtvdev );
526+ return ret ;
527+ }
528+
386529static const struct file_operations virtiovf_save_fops = {
387530 .owner = THIS_MODULE ,
388531 .read = virtiovf_save_read ,
532+ .unlocked_ioctl = virtiovf_precopy_ioctl ,
533+ .compat_ioctl = compat_ptr_ioctl ,
389534 .release = virtiovf_release_file ,
390535};
391536
@@ -429,7 +574,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
429574 int nent ;
430575 int ret ;
431576
432- buf = virtiovf_alloc_data_buffer (migf , ctx_size );
577+ buf = virtiovf_get_data_buffer (migf , ctx_size );
433578 if (IS_ERR (buf ))
434579 return PTR_ERR (buf );
435580
@@ -464,7 +609,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
464609 goto out ;
465610
466611 buf -> length = res_size ;
467- header_buf = virtiovf_alloc_data_buffer (migf ,
612+ header_buf = virtiovf_get_data_buffer (migf ,
468613 sizeof (struct virtiovf_migration_header ));
469614 if (IS_ERR (header_buf )) {
470615 ret = PTR_ERR (header_buf );
@@ -489,8 +634,43 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
489634 return ret ;
490635}
491636
637+ static int
638+ virtiovf_pci_save_device_final_data (struct virtiovf_pci_core_device * virtvdev )
639+ {
640+ struct virtiovf_migration_file * migf = virtvdev -> saving_migf ;
641+ u32 ctx_size ;
642+ int ret ;
643+
644+ if (migf -> state == VIRTIOVF_MIGF_STATE_ERROR )
645+ return - ENODEV ;
646+
647+ ret = virtio_pci_admin_dev_parts_metadata_get (virtvdev -> core_device .pdev ,
648+ VIRTIO_RESOURCE_OBJ_DEV_PARTS , migf -> obj_id ,
649+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE ,
650+ & ctx_size );
651+ if (ret )
652+ goto err ;
653+
654+ if (!ctx_size ) {
655+ ret = - EINVAL ;
656+ goto err ;
657+ }
658+
659+ ret = virtiovf_read_device_context_chunk (migf , ctx_size );
660+ if (ret )
661+ goto err ;
662+
663+ migf -> state = VIRTIOVF_MIGF_STATE_COMPLETE ;
664+ return 0 ;
665+
666+ err :
667+ migf -> state = VIRTIOVF_MIGF_STATE_ERROR ;
668+ return ret ;
669+ }
670+
492671static struct virtiovf_migration_file *
493- virtiovf_pci_save_device_data (struct virtiovf_pci_core_device * virtvdev )
672+ virtiovf_pci_save_device_data (struct virtiovf_pci_core_device * virtvdev ,
673+ bool pre_copy )
494674{
495675 struct virtiovf_migration_file * migf ;
496676 u32 ctx_size ;
@@ -541,6 +721,18 @@ virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev)
541721 if (ret )
542722 goto out_clean ;
543723
724+ if (pre_copy ) {
725+ migf -> pre_copy_initial_bytes = migf -> max_pos ;
726+ /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
727+ ratelimit_state_init (& migf -> pre_copy_rl_state , 1 * HZ , 1 );
728+ /* Prevent any rate messages upon its usage */
729+ ratelimit_set_flags (& migf -> pre_copy_rl_state ,
730+ RATELIMIT_MSG_ON_RELEASE );
731+ migf -> state = VIRTIOVF_MIGF_STATE_PRECOPY ;
732+ } else {
733+ migf -> state = VIRTIOVF_MIGF_STATE_COMPLETE ;
734+ }
735+
544736 return migf ;
545737
546738out_clean :
@@ -950,15 +1142,17 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
9501142 return NULL ;
9511143 }
9521144
953- if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P ) {
1145+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P ) ||
1146+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P )) {
9541147 ret = virtio_pci_admin_mode_set (virtvdev -> core_device .pdev ,
9551148 BIT (VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED ));
9561149 if (ret )
9571150 return ERR_PTR (ret );
9581151 return NULL ;
9591152 }
9601153
961- if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING ) {
1154+ if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING ) ||
1155+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY )) {
9621156 ret = virtio_pci_admin_mode_set (virtvdev -> core_device .pdev , 0 );
9631157 if (ret )
9641158 return ERR_PTR (ret );
@@ -968,15 +1162,17 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
9681162 if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY ) {
9691163 struct virtiovf_migration_file * migf ;
9701164
971- migf = virtiovf_pci_save_device_data (virtvdev );
1165+ migf = virtiovf_pci_save_device_data (virtvdev , false );
9721166 if (IS_ERR (migf ))
9731167 return ERR_CAST (migf );
9741168 get_file (migf -> filp );
9751169 virtvdev -> saving_migf = migf ;
9761170 return migf -> filp ;
9771171 }
9781172
979- if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP ) {
1173+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP ) ||
1174+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING ) ||
1175+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P )) {
9801176 virtiovf_disable_fds (virtvdev );
9811177 return NULL ;
9821178 }
@@ -997,6 +1193,24 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
9971193 return NULL ;
9981194 }
9991195
1196+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY ) ||
1197+ (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
1198+ new == VFIO_DEVICE_STATE_PRE_COPY_P2P )) {
1199+ struct virtiovf_migration_file * migf ;
1200+
1201+ migf = virtiovf_pci_save_device_data (virtvdev , true);
1202+ if (IS_ERR (migf ))
1203+ return ERR_CAST (migf );
1204+ get_file (migf -> filp );
1205+ virtvdev -> saving_migf = migf ;
1206+ return migf -> filp ;
1207+ }
1208+
1209+ if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY ) {
1210+ ret = virtiovf_pci_save_device_final_data (virtvdev );
1211+ return ret ? ERR_PTR (ret ) : NULL ;
1212+ }
1213+
10001214 /*
10011215 * vfio_mig_get_next_state() does not use arcs other than the above
10021216 */
@@ -1101,7 +1315,8 @@ void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
11011315 spin_lock_init (& virtvdev -> reset_lock );
11021316 virtvdev -> core_device .vdev .migration_flags =
11031317 VFIO_MIGRATION_STOP_COPY |
1104- VFIO_MIGRATION_P2P ;
1318+ VFIO_MIGRATION_P2P |
1319+ VFIO_MIGRATION_PRE_COPY ;
11051320 virtvdev -> core_device .vdev .mig_ops = & virtvdev_pci_mig_ops ;
11061321}
11071322
0 commit comments