Skip to content

Commit 34e2f27

Browse files
yishaihawilliam
authored andcommitted
vfio/mlx5: Introduce multiple loads
In order to support PRE_COPY, mlx5 driver transfers multiple states (images) of the device. e.g.: the source VF can save and transfer multiple states, and the target VF will load them by that order. This patch implements the changes for the target VF to decompose the header for each state and to write and load multiple states. Reviewed-by: Jason Gunthorpe <[email protected]> Signed-off-by: Yishai Hadas <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alex Williamson <[email protected]>
1 parent 81156c2 commit 34e2f27

File tree

3 files changed

+257
-45
lines changed

3 files changed

+257
-45
lines changed

drivers/vfio/pci/mlx5/cmd.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,9 +598,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
598598
if (mvdev->mdev_detach)
599599
return -ENOTCONN;
600600

601-
err = mlx5vf_dma_data_buffer(buf);
602-
if (err)
603-
return err;
601+
if (!buf->dmaed) {
602+
err = mlx5vf_dma_data_buffer(buf);
603+
if (err)
604+
return err;
605+
}
604606

605607
MLX5_SET(load_vhca_state_in, in, opcode,
606608
MLX5_CMD_OP_LOAD_VHCA_STATE);
@@ -644,6 +646,11 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
644646
migf->buf = NULL;
645647
}
646648

649+
if (migf->buf_header) {
650+
mlx5vf_free_data_buffer(migf->buf_header);
651+
migf->buf_header = NULL;
652+
}
653+
647654
list_splice(&migf->avail_list, &migf->buf_list);
648655

649656
while ((entry = list_first_entry_or_null(&migf->buf_list,

drivers/vfio/pci/mlx5/cmd.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ enum mlx5_vf_migf_state {
2222
MLX5_MIGF_STATE_COMPLETE,
2323
};
2424

25+
enum mlx5_vf_load_state {
26+
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
27+
MLX5_VF_LOAD_STATE_READ_HEADER,
28+
MLX5_VF_LOAD_STATE_PREP_IMAGE,
29+
MLX5_VF_LOAD_STATE_READ_IMAGE,
30+
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
31+
};
32+
2533
struct mlx5_vf_migration_header {
2634
__le64 image_size;
2735
/* For future use in case we may need to change the kernel protocol */
@@ -60,9 +68,11 @@ struct mlx5_vf_migration_file {
6068
struct mutex lock;
6169
enum mlx5_vf_migf_state state;
6270

71+
enum mlx5_vf_load_state load_state;
6372
u32 pdn;
6473
loff_t max_pos;
6574
struct mlx5_vhca_data_buffer *buf;
75+
struct mlx5_vhca_data_buffer *buf_header;
6676
spinlock_t list_lock;
6777
struct list_head buf_list;
6878
struct list_head avail_list;

drivers/vfio/pci/mlx5/main.c

Lines changed: 237 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -518,13 +518,162 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
518518
return ERR_PTR(ret);
519519
}
520520

521+
static int
522+
mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
523+
const char __user **buf, size_t *len,
524+
loff_t *pos, ssize_t *done)
525+
{
526+
unsigned long offset;
527+
size_t page_offset;
528+
struct page *page;
529+
size_t page_len;
530+
u8 *to_buff;
531+
int ret;
532+
533+
offset = *pos - vhca_buf->start_pos;
534+
page_offset = offset % PAGE_SIZE;
535+
536+
page = mlx5vf_get_migration_page(vhca_buf, offset - page_offset);
537+
if (!page)
538+
return -EINVAL;
539+
page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
540+
to_buff = kmap_local_page(page);
541+
ret = copy_from_user(to_buff + page_offset, *buf, page_len);
542+
kunmap_local(to_buff);
543+
if (ret)
544+
return -EFAULT;
545+
546+
*pos += page_len;
547+
*done += page_len;
548+
*buf += page_len;
549+
*len -= page_len;
550+
vhca_buf->length += page_len;
551+
return 0;
552+
}
553+
554+
static int
555+
mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
556+
loff_t requested_length,
557+
const char __user **buf, size_t *len,
558+
loff_t *pos, ssize_t *done)
559+
{
560+
int ret;
561+
562+
if (requested_length > MAX_MIGRATION_SIZE)
563+
return -ENOMEM;
564+
565+
if (vhca_buf->allocated_length < requested_length) {
566+
ret = mlx5vf_add_migration_pages(
567+
vhca_buf,
568+
DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
569+
PAGE_SIZE));
570+
if (ret)
571+
return ret;
572+
}
573+
574+
while (*len) {
575+
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
576+
done);
577+
if (ret)
578+
return ret;
579+
}
580+
581+
return 0;
582+
}
583+
584+
static ssize_t
585+
mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
586+
struct mlx5_vhca_data_buffer *vhca_buf,
587+
size_t image_size, const char __user **buf,
588+
size_t *len, loff_t *pos, ssize_t *done,
589+
bool *has_work)
590+
{
591+
size_t copy_len, to_copy;
592+
int ret;
593+
594+
to_copy = min_t(size_t, *len, image_size - vhca_buf->length);
595+
copy_len = to_copy;
596+
while (to_copy) {
597+
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
598+
done);
599+
if (ret)
600+
return ret;
601+
}
602+
603+
*len -= copy_len;
604+
if (vhca_buf->length == image_size) {
605+
migf->load_state = MLX5_VF_LOAD_STATE_LOAD_IMAGE;
606+
migf->max_pos += image_size;
607+
*has_work = true;
608+
}
609+
610+
return 0;
611+
}
612+
613+
static int
614+
mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
615+
struct mlx5_vhca_data_buffer *vhca_buf,
616+
const char __user **buf,
617+
size_t *len, loff_t *pos,
618+
ssize_t *done, bool *has_work)
619+
{
620+
struct page *page;
621+
size_t copy_len;
622+
u8 *to_buff;
623+
int ret;
624+
625+
copy_len = min_t(size_t, *len,
626+
sizeof(struct mlx5_vf_migration_header) - vhca_buf->length);
627+
page = mlx5vf_get_migration_page(vhca_buf, 0);
628+
if (!page)
629+
return -EINVAL;
630+
to_buff = kmap_local_page(page);
631+
ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
632+
if (ret) {
633+
ret = -EFAULT;
634+
goto end;
635+
}
636+
637+
*buf += copy_len;
638+
*pos += copy_len;
639+
*done += copy_len;
640+
*len -= copy_len;
641+
vhca_buf->length += copy_len;
642+
if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
643+
u64 flags;
644+
645+
vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
646+
if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) {
647+
ret = -ENOMEM;
648+
goto end;
649+
}
650+
651+
flags = le64_to_cpup((__le64 *)(to_buff +
652+
offsetof(struct mlx5_vf_migration_header, flags)));
653+
if (flags) {
654+
ret = -EOPNOTSUPP;
655+
goto end;
656+
}
657+
658+
migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
659+
migf->max_pos += vhca_buf->length;
660+
*has_work = true;
661+
}
662+
end:
663+
kunmap_local(to_buff);
664+
return ret;
665+
}
666+
521667
static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
522668
size_t len, loff_t *pos)
523669
{
524670
struct mlx5_vf_migration_file *migf = filp->private_data;
525671
struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
672+
struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header;
526673
loff_t requested_length;
674+
bool has_work = false;
527675
ssize_t done = 0;
676+
int ret = 0;
528677

529678
if (pos)
530679
return -ESPIPE;
@@ -534,56 +683,83 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
534683
check_add_overflow((loff_t)len, *pos, &requested_length))
535684
return -EINVAL;
536685

537-
if (requested_length > MAX_MIGRATION_SIZE)
538-
return -ENOMEM;
539-
686+
mutex_lock(&migf->mvdev->state_mutex);
540687
mutex_lock(&migf->lock);
541688
if (migf->state == MLX5_MIGF_STATE_ERROR) {
542-
done = -ENODEV;
689+
ret = -ENODEV;
543690
goto out_unlock;
544691
}
545692

546-
if (vhca_buf->allocated_length < requested_length) {
547-
done = mlx5vf_add_migration_pages(
548-
vhca_buf,
549-
DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
550-
PAGE_SIZE));
551-
if (done)
552-
goto out_unlock;
553-
}
693+
while (len || has_work) {
694+
has_work = false;
695+
switch (migf->load_state) {
696+
case MLX5_VF_LOAD_STATE_READ_HEADER:
697+
ret = mlx5vf_resume_read_header(migf, vhca_buf_header,
698+
&buf, &len, pos,
699+
&done, &has_work);
700+
if (ret)
701+
goto out_unlock;
702+
break;
703+
case MLX5_VF_LOAD_STATE_PREP_IMAGE:
704+
{
705+
u64 size = vhca_buf_header->header_image_size;
706+
707+
if (vhca_buf->allocated_length < size) {
708+
mlx5vf_free_data_buffer(vhca_buf);
709+
710+
migf->buf = mlx5vf_alloc_data_buffer(migf,
711+
size, DMA_TO_DEVICE);
712+
if (IS_ERR(migf->buf)) {
713+
ret = PTR_ERR(migf->buf);
714+
migf->buf = NULL;
715+
goto out_unlock;
716+
}
554717

555-
while (len) {
556-
size_t page_offset;
557-
struct page *page;
558-
size_t page_len;
559-
u8 *to_buff;
560-
int ret;
718+
vhca_buf = migf->buf;
719+
}
561720

562-
page_offset = (*pos) % PAGE_SIZE;
563-
page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset);
564-
if (!page) {
565-
if (done == 0)
566-
done = -EINVAL;
567-
goto out_unlock;
721+
vhca_buf->start_pos = migf->max_pos;
722+
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
723+
break;
568724
}
725+
case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
726+
ret = mlx5vf_resume_read_image_no_header(vhca_buf,
727+
requested_length,
728+
&buf, &len, pos, &done);
729+
if (ret)
730+
goto out_unlock;
731+
break;
732+
case MLX5_VF_LOAD_STATE_READ_IMAGE:
733+
ret = mlx5vf_resume_read_image(migf, vhca_buf,
734+
vhca_buf_header->header_image_size,
735+
&buf, &len, pos, &done, &has_work);
736+
if (ret)
737+
goto out_unlock;
738+
break;
739+
case MLX5_VF_LOAD_STATE_LOAD_IMAGE:
740+
ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf, vhca_buf);
741+
if (ret)
742+
goto out_unlock;
743+
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
569744

570-
page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
571-
to_buff = kmap_local_page(page);
572-
ret = copy_from_user(to_buff + page_offset, buf, page_len);
573-
kunmap_local(to_buff);
574-
if (ret) {
575-
done = -EFAULT;
576-
goto out_unlock;
745+
/* prep header buf for next image */
746+
vhca_buf_header->length = 0;
747+
vhca_buf_header->header_image_size = 0;
748+
/* prep data buf for next image */
749+
vhca_buf->length = 0;
750+
751+
break;
752+
default:
753+
break;
577754
}
578-
*pos += page_len;
579-
len -= page_len;
580-
done += page_len;
581-
buf += page_len;
582-
vhca_buf->length += page_len;
583755
}
756+
584757
out_unlock:
758+
if (ret)
759+
migf->state = MLX5_MIGF_STATE_ERROR;
585760
mutex_unlock(&migf->lock);
586-
return done;
761+
mlx5vf_state_mutex_unlock(migf->mvdev);
762+
return ret ? ret : done;
587763
}
588764

589765
static const struct file_operations mlx5vf_resume_fops = {
@@ -623,12 +799,29 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
623799
}
624800

625801
migf->buf = buf;
802+
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
803+
buf = mlx5vf_alloc_data_buffer(migf,
804+
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
805+
if (IS_ERR(buf)) {
806+
ret = PTR_ERR(buf);
807+
goto out_buf;
808+
}
809+
810+
migf->buf_header = buf;
811+
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
812+
} else {
813+
/* Initial state will be to read the image */
814+
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
815+
}
816+
626817
stream_open(migf->filp->f_inode, migf->filp);
627818
mutex_init(&migf->lock);
628819
INIT_LIST_HEAD(&migf->buf_list);
629820
INIT_LIST_HEAD(&migf->avail_list);
630821
spin_lock_init(&migf->list_lock);
631822
return migf;
823+
out_buf:
824+
mlx5vf_free_data_buffer(buf);
632825
out_pd:
633826
mlx5vf_cmd_dealloc_pd(migf);
634827
out_free:
@@ -728,11 +921,13 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
728921
}
729922

730923
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
731-
ret = mlx5vf_cmd_load_vhca_state(mvdev,
732-
mvdev->resuming_migf,
733-
mvdev->resuming_migf->buf);
734-
if (ret)
735-
return ERR_PTR(ret);
924+
if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
925+
ret = mlx5vf_cmd_load_vhca_state(mvdev,
926+
mvdev->resuming_migf,
927+
mvdev->resuming_migf->buf);
928+
if (ret)
929+
return ERR_PTR(ret);
930+
}
736931
mlx5vf_disable_fds(mvdev);
737932
return NULL;
738933
}

0 commit comments

Comments
 (0)