Hello,
This patchset builds upon the code at https://lore.kernel.org/lkml/20230718234512.1690985-1-seanjc@google.com/T/.
This code is available at https://github.com/googleprodkernel/linux-cc/tree/kvm-gmem-link-migrate-rfcv....
In guest_mem v11, a split file/inode model was proposed, where memslot bindings belong to the file and pages belong to the inode. This model lends itself well to having different VMs use separate files pointing to the same inode.
This RFC proposes an ioctl, KVM_LINK_GUEST_MEMFD, that takes a VM and a gmem fd, and returns another gmem fd referencing a different file and associated with VM. This RFC also includes an update to KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM to migrate memory context (slot->arch.lpage_info and kvm->mem_attr_array) from source to destination vm, intra-host.
Intended usage of the two ioctls:
1. Source VM’s fd is passed to destination VM via unix sockets 2. Destination VM uses new ioctl KVM_LINK_GUEST_MEMFD to link source VM’s fd to a new fd. 3. Destination VM will pass new fds to KVM_SET_USER_MEMORY_REGION, which will bind the new file, pointing to the same inode that the source VM’s file points to, to memslots 4. Use KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM to move kvm->mem_attr_array and slot->arch.lpage_info to the destination VM. 5. Run the destination VM as per normal
Some other approaches considered were:
+ Using the linkat() syscall, but that requires a mount/directory for a source fd to be linked to + Using the dup() syscall, but that only duplicates the fd, and both fds point to the same file
---
Ackerley Tng (11): KVM: guest_mem: Refactor out kvm_gmem_alloc_file() KVM: guest_mem: Add ioctl KVM_LINK_GUEST_MEMFD KVM: selftests: Add tests for KVM_LINK_GUEST_MEMFD ioctl KVM: selftests: Test transferring private memory to another VM KVM: x86: Refactor sev's flag migration_in_progress to kvm struct KVM: x86: Refactor common code out of sev.c KVM: x86: Refactor common migration preparation code out of sev_vm_move_enc_context_from KVM: x86: Let moving encryption context be configurable KVM: x86: Handle moving of memory context for intra-host migration KVM: selftests: Generalize migration functions from sev_migrate_tests.c KVM: selftests: Add tests for migration of private mem
arch/x86/include/asm/kvm_host.h | 4 +- arch/x86/kvm/svm/sev.c | 85 ++----- arch/x86/kvm/svm/svm.h | 3 +- arch/x86/kvm/x86.c | 221 +++++++++++++++++- arch/x86/kvm/x86.h | 6 + include/linux/kvm_host.h | 18 ++ include/uapi/linux/kvm.h | 8 + tools/testing/selftests/kvm/Makefile | 1 + .../testing/selftests/kvm/guest_memfd_test.c | 42 ++++ .../selftests/kvm/include/kvm_util_base.h | 31 +++ .../kvm/x86_64/private_mem_migrate_tests.c | 93 ++++++++ .../selftests/kvm/x86_64/sev_migrate_tests.c | 48 ++-- virt/kvm/guest_mem.c | 151 ++++++++++-- virt/kvm/kvm_main.c | 10 + virt/kvm/kvm_mm.h | 7 + 15 files changed, 596 insertions(+), 132 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c
-- 2.41.0.640.ga95def55d0-goog
kvm_gmem_alloc_file() will allocate and build a file out of an inode.
Will be reused later by __kvm_gmem_link()
Signed-off-by: Ackerley Tng ackerleytng@google.com --- virt/kvm/guest_mem.c | 53 ++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c index 3a3e38151b45..30d0ab8745ee 100644 --- a/virt/kvm/guest_mem.c +++ b/virt/kvm/guest_mem.c @@ -365,12 +365,42 @@ static const struct inode_operations kvm_gmem_iops = { .setattr = kvm_gmem_setattr, };
+static struct file *kvm_gmem_alloc_file(struct kvm *kvm, struct inode *inode, + struct vfsmount *mnt) +{ + struct file *file; + struct kvm_gmem *gmem; + + gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); + if (!gmem) + return ERR_PTR(-ENOMEM); + + file = alloc_file_pseudo(inode, mnt, "kvm-gmem", O_RDWR, &kvm_gmem_fops); + if (IS_ERR(file)) + goto err; + + file->f_flags |= O_LARGEFILE; + file->f_mapping = inode->i_mapping; + + kvm_get_kvm(kvm); + gmem->kvm = kvm; + xa_init(&gmem->bindings); + + file->private_data = gmem; + + list_add(&gmem->entry, &inode->i_mapping->private_list); + + return file; +err: + kfree(gmem); + return file; +} + static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags, struct vfsmount *mnt) { const char *anon_name = "[kvm-gmem]"; const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); - struct kvm_gmem *gmem; struct inode *inode; struct file *file; int fd, err; @@ -399,34 +429,15 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags, goto err_inode; }
- file = alloc_file_pseudo(inode, mnt, "kvm-gmem", O_RDWR, &kvm_gmem_fops); + file = kvm_gmem_alloc_file(kvm, inode, mnt); if (IS_ERR(file)) { err = PTR_ERR(file); goto err_fd; }
- file->f_flags |= O_LARGEFILE; - file->f_mapping = inode->i_mapping; - - gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); - if (!gmem) { - err = -ENOMEM; - goto err_file; - } - - kvm_get_kvm(kvm); - gmem->kvm = kvm; - xa_init(&gmem->bindings); - - file->private_data = gmem; - - list_add(&gmem->entry, &inode->i_mapping->private_list); - fd_install(fd, file); return fd;
-err_file: - fput(file); err_fd: put_unused_fd(fd); err_inode:
KVM_LINK_GUEST_MEMFD will link a gmem fd's underlying inode to a new file (and fd).
Signed-off-by: Ackerley Tng ackerleytng@google.com --- include/uapi/linux/kvm.h | 8 +++++ virt/kvm/guest_mem.c | 73 ++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 10 ++++++ virt/kvm/kvm_mm.h | 7 ++++ 4 files changed, 98 insertions(+)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eb900344a054..d0e2a2ce0df2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2299,4 +2299,12 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; };
+#define KVM_LINK_GUEST_MEMFD _IOWR(KVMIO, 0xd5, struct kvm_link_guest_memfd) + +struct kvm_link_guest_memfd { + __u64 fd; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c index 30d0ab8745ee..1b3df273f785 100644 --- a/virt/kvm/guest_mem.c +++ b/virt/kvm/guest_mem.c @@ -477,6 +477,79 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) return __kvm_gmem_create(kvm, size, flags, kvm_gmem_mnt); }
+static inline void __kvm_gmem_do_link(struct inode *inode) +{ + /* Refer to simple_link() */ + + inode->i_ctime = current_time(inode); + inc_nlink(inode); + + /* + * ihold() to add additional reference to inode for reference in dentry, + * created in kvm_gmem_alloc_file() -> alloc_file_pseudo(). This is not + * necessary when creating a new file because alloc_inode() creates + * inodes with i_count = 1, which is the refcount for the dentry in the + * file. + */ + ihold(inode); + + /* + * dget() and d_instantiate() complete the setup of a dentry, but those + * have already been done in kvm_gmem_alloc_file() -> + * alloc_file_pseudo() + */ +} + +int kvm_gmem_link(struct kvm *kvm, struct kvm_link_guest_memfd *args) +{ + int ret; + int fd; + struct fd f; + struct kvm_gmem *gmem; + u64 flags = args->flags; + u64 valid_flags = 0; + struct inode *inode; + struct file *dst_file; + + if (flags & ~valid_flags) + return -EINVAL; + + f = fdget(args->fd); + if (!f.file) + return -EINVAL; + + ret = -EINVAL; + if (f.file->f_op != &kvm_gmem_fops) + goto out; + + /* Cannot link a gmem file with the same vm again */ + gmem = f.file->private_data; + if (gmem->kvm == kvm) + goto out; + + ret = fd = get_unused_fd_flags(0); + if (fd < 0) + goto out; + + inode = file_inode(f.file); + dst_file = kvm_gmem_alloc_file(inode, kvm_gmem_mnt); + if (IS_ERR(dst_file)) { + ret = PTR_ERR(dst_file); + goto out_fd; + } + + __kvm_gmem_do_link(inode); + + fd_install(fd, dst_file); + return fd; + +out_fd: + put_unused_fd(fd); +out: + fdput(f); + return ret; +} + int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned int fd, loff_t offset) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ee331cf8ba54..51cc8b80ebe0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5177,6 +5177,16 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_gmem_create(kvm, &guest_memfd); break; } + case KVM_LINK_GUEST_MEMFD: { + struct kvm_link_guest_memfd params; + + r = -EFAULT; + if (copy_from_user(¶ms, argp, sizeof(params))) + goto out; + + r = kvm_gmem_link(kvm, ¶ms); + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index 798f20d612bb..f85f452133b3 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -41,6 +41,7 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, int kvm_gmem_init(void); void kvm_gmem_exit(void); int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); +int kvm_gmem_link(struct kvm *kvm, struct kvm_link_guest_memfd *args); int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned int fd, loff_t offset); void kvm_gmem_unbind(struct kvm_memory_slot *slot); @@ -61,6 +62,12 @@ static inline int kvm_gmem_create(struct kvm *kvm, return -EOPNOTSUPP; }
+static inline int kvm_gmem_link(struct kvm *kvm, + struct kvm_link_guest_memfd *args) +{ + return -EOPNOTSUPP; +} + static inline int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned int fd, loff_t offset)
On Mon, Aug 07, 2023, Ackerley Tng wrote:
KVM_LINK_GUEST_MEMFD will link a gmem fd's underlying inode to a new file (and fd).
Signed-off-by: Ackerley Tng ackerleytng@google.com
include/uapi/linux/kvm.h | 8 +++++ virt/kvm/guest_mem.c | 73 ++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 10 ++++++ virt/kvm/kvm_mm.h | 7 ++++ 4 files changed, 98 insertions(+)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eb900344a054..d0e2a2ce0df2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2299,4 +2299,12 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; }; +#define KVM_LINK_GUEST_MEMFD _IOWR(KVMIO, 0xd5, struct kvm_link_guest_memfd)
+struct kvm_link_guest_memfd {
- __u64 fd;
- __u64 flags;
- __u64 reserved[6];
+};
#endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c index 30d0ab8745ee..1b3df273f785 100644 --- a/virt/kvm/guest_mem.c +++ b/virt/kvm/guest_mem.c @@ -477,6 +477,79 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) return __kvm_gmem_create(kvm, size, flags, kvm_gmem_mnt); } +static inline void __kvm_gmem_do_link(struct inode *inode) +{
- /* Refer to simple_link() */
- inode->i_ctime = current_time(inode);
- inc_nlink(inode);
- /*
* ihold() to add additional reference to inode for reference in dentry,
* created in kvm_gmem_alloc_file() -> alloc_file_pseudo(). This is not
* necessary when creating a new file because alloc_inode() creates
* inodes with i_count = 1, which is the refcount for the dentry in the
* file.
*/
- ihold(inode);
- /*
* dget() and d_instantiate() complete the setup of a dentry, but those
* have already been done in kvm_gmem_alloc_file() ->
* alloc_file_pseudo()
*/
+}
Does this have to be done before the fd is exposed to userspace, or can it be done after? If it can be done after, I'd prefer to have the allocation helper also install the fd, and also rename it to something that better conveys that it's allocating more than just the file, e.g. that it allocates and initialize kvm_gmem too.
Completely untested, but this is what I'm thinkin/hoping.
static int kvm_gmem_alloc_view(struct kvm *kvm, struct inode *inode, struct vfsmount *mnt) { struct file *file; struct kvm_gmem *gmem;
gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); if (!gmem) return -ENOMEM;
fd = get_unused_fd_flags(0); if (fd < 0) { r = fd; goto err_fd; }
file = alloc_file_pseudo(inode, mnt, "kvm-gmem", O_RDWR, &kvm_gmem_fops); if (IS_ERR(file)) { r = PTR_ERR(file); goto err_file; }
file->f_flags |= O_LARGEFILE; file->f_mapping = inode->i_mapping;
kvm_get_kvm(kvm); gmem->kvm = kvm; xa_init(&gmem->bindings);
file->private_data = gmem;
list_add(&gmem->entry, &inode->i_mapping->private_list);
fd_install(fd, file);
return 0; err: put_unused_fd(fd); err_fd: kfree(gmem); return r; }
static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags, struct vfsmount *mnt) { const char *anon_name = "[kvm-gmem]"; const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); struct inode *inode; struct file *file; int fd, err;
inode = alloc_anon_inode(mnt->mnt_sb); if (IS_ERR(inode)) return PTR_ERR(inode);
err = security_inode_init_security_anon(inode, &qname, NULL); if (err) goto err;
inode->i_private = (void *)(unsigned long)flags; inode->i_op = &kvm_gmem_iops; inode->i_mapping->a_ops = &kvm_gmem_aops; inode->i_mode |= S_IFREG; inode->i_size = size; mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_large_folios(inode->i_mapping); mapping_set_unevictable(inode->i_mapping); mapping_set_unmovable(inode->i_mapping);
fd = kvm_gmem_alloc_view(kvm, inode, mnt); if (fd < 0) { err = fd; goto err; } return fd; err: iput(inode); return err; }
Test that
+ Invalid inputs should be rejected with EINVAL + Successful inputs return a new (destination) fd + Destination and source fds have the same inode number + No crash on program exit
Signed-off-by: Ackerley Tng ackerleytng@google.com --- .../testing/selftests/kvm/guest_memfd_test.c | 42 +++++++++++++++++++ .../selftests/kvm/include/kvm_util_base.h | 18 ++++++++ 2 files changed, 60 insertions(+)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ad20f11b2d2c..38fe96ea60f9 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -105,6 +105,47 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm, size_t page_size) ASSERT_EQ(errno, EINVAL); }
+static void test_link(struct kvm_vm *src_vm, int src_fd, size_t total_size) +{ + int ret; + int dst_fd; + struct kvm_vm *dst_vm; + struct stat src_stat; + struct stat dst_stat; + + dst_vm = vm_create_barebones(); + + /* Linking with a nonexistent fd */ + dst_fd = __vm_link_guest_memfd(dst_vm, 99, 0); + ASSERT_EQ(dst_fd, -1); + ASSERT_EQ(errno, EINVAL); + + /* Linking with a non-gmem fd */ + dst_fd = __vm_link_guest_memfd(dst_vm, 0, 1); + ASSERT_EQ(dst_fd, -1); + ASSERT_EQ(errno, EINVAL); + + /* Linking with invalid flags */ + dst_fd = __vm_link_guest_memfd(dst_vm, src_fd, 1); + ASSERT_EQ(dst_fd, -1); + ASSERT_EQ(errno, EINVAL); + + /* Linking with an already-associated vm */ + dst_fd = __vm_link_guest_memfd(src_vm, src_fd, 1); + ASSERT_EQ(dst_fd, -1); + ASSERT_EQ(errno, EINVAL); + + dst_fd = __vm_link_guest_memfd(dst_vm, src_fd, 0); + TEST_ASSERT(dst_vm > 0, "linking should succeed with valid inputs"); + TEST_ASSERT(src_fd != dst_fd, "linking should return a different fd"); + + ret = fstat(src_fd, &src_stat); + ASSERT_EQ(ret, 0); + ret = fstat(dst_fd, &dst_stat); + ASSERT_EQ(ret, 0); + TEST_ASSERT(src_stat.st_ino == dst_stat.st_ino, + "src and dst files should have the same inode number"); +}
int main(int argc, char *argv[]) { @@ -126,6 +167,7 @@ int main(int argc, char *argv[]) test_mmap(fd, page_size); test_file_size(fd, page_size, total_size); test_fallocate(fd, page_size, total_size); + test_link(vm, fd, total_size);
close(fd); } diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 8bdfadd72349..868925b26a7b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -495,6 +495,24 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, return fd; }
+static inline int __vm_link_guest_memfd(struct kvm_vm *vm, int fd, uint64_t flags) +{ + struct kvm_link_guest_memfd params = { + .fd = fd, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_LINK_GUEST_MEMFD, ¶ms); +} + +static inline int vm_link_guest_memfd(struct kvm_vm *vm, int fd, uint64_t flags) +{ + int new_fd = __vm_link_guest_memfd(vm, fd, flags); + + TEST_ASSERT(new_fd >= 0, KVM_IOCTL_ERROR(KVM_LINK_GUEST_MEMFD, new_fd)); + return new_fd; +} + void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
Signed-off-by: Ackerley Tng ackerleytng@google.com --- tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/private_mem_migrate_tests.c | 87 +++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index cb9450022302..d348ff56c92b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -82,6 +82,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c new file mode 100644 index 000000000000..4226de3ebd41 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "kvm_util_base.h" +#include "test_util.h" +#include "ucall_common.h" +#include <linux/kvm.h> +#include <linux/sizes.h> + +#define TRANSFER_PRIVATE_MEM_TEST_SLOT 10 +#define TRANSFER_PRIVATE_MEM_GPA ((uint64_t)(1ull << 32)) +#define TRANSFER_PRIVATE_MEM_GVA TRANSFER_PRIVATE_MEM_GPA +#define TRANSFER_PRIVATE_MEM_VALUE 0xdeadbeef + +static void transfer_private_mem_guest_code_src(void) +{ + uint64_t volatile *const ptr = (uint64_t *)TRANSFER_PRIVATE_MEM_GVA; + + *ptr = TRANSFER_PRIVATE_MEM_VALUE; + + GUEST_SYNC1(*ptr); +} + +static void transfer_private_mem_guest_code_dst(void) +{ + uint64_t volatile *const ptr = (uint64_t *)TRANSFER_PRIVATE_MEM_GVA; + + GUEST_SYNC1(*ptr); +} + +static void test_transfer_private_mem(void) +{ + struct kvm_vm *src_vm, *dst_vm; + struct kvm_vcpu *src_vcpu, *dst_vcpu; + int src_memfd, dst_memfd; + struct ucall uc; + + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + /* Build the source VM, use it to write to private memory */ + src_vm = __vm_create_shape_with_one_vcpu( + shape, &src_vcpu, 0, transfer_private_mem_guest_code_src); + src_memfd = vm_create_guest_memfd(src_vm, SZ_4K, 0); + + vm_mem_add(src_vm, DEFAULT_VM_MEM_SRC, TRANSFER_PRIVATE_MEM_GPA, + TRANSFER_PRIVATE_MEM_TEST_SLOT, 1, KVM_MEM_PRIVATE, + src_memfd, 0); + + virt_map(src_vm, TRANSFER_PRIVATE_MEM_GVA, TRANSFER_PRIVATE_MEM_GPA, 1); + vm_set_memory_attributes(src_vm, TRANSFER_PRIVATE_MEM_GPA, SZ_4K, + KVM_MEMORY_ATTRIBUTE_PRIVATE); + + vcpu_run(src_vcpu); + TEST_ASSERT_KVM_EXIT_REASON(src_vcpu, KVM_EXIT_IO); + get_ucall(src_vcpu, &uc); + TEST_ASSERT(uc.args[0] == TRANSFER_PRIVATE_MEM_VALUE, + "Source VM should be able to write to private memory"); + + /* Build the destination VM with linked fd */ + dst_vm = __vm_create_shape_with_one_vcpu( + shape, &dst_vcpu, 0, transfer_private_mem_guest_code_dst); + dst_memfd = vm_link_guest_memfd(dst_vm, src_memfd, 0); + + vm_mem_add(dst_vm, DEFAULT_VM_MEM_SRC, TRANSFER_PRIVATE_MEM_GPA, + TRANSFER_PRIVATE_MEM_TEST_SLOT, 1, KVM_MEM_PRIVATE, + dst_memfd, 0); + + virt_map(dst_vm, TRANSFER_PRIVATE_MEM_GVA, TRANSFER_PRIVATE_MEM_GPA, 1); + vm_set_memory_attributes(dst_vm, TRANSFER_PRIVATE_MEM_GPA, SZ_4K, + KVM_MEMORY_ATTRIBUTE_PRIVATE); + + vcpu_run(dst_vcpu); + TEST_ASSERT_KVM_EXIT_REASON(dst_vcpu, KVM_EXIT_IO); + get_ucall(dst_vcpu, &uc); + TEST_ASSERT(uc.args[0] == TRANSFER_PRIVATE_MEM_VALUE, + "Destination VM should be able to read value transferred"); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + test_transfer_private_mem(); + + return 0; +}
The migration_in_progress flag will also be needed for migration of non-sev VMs.
Co-developed-by: Sagi Shahar sagis@google.com Signed-off-by: Sagi Shahar sagis@google.com Co-developed-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ackerley Tng ackerleytng@google.com --- arch/x86/kvm/svm/sev.c | 17 ++++++----------- arch/x86/kvm/svm/svm.h | 1 - include/linux/kvm_host.h | 1 + 3 files changed, 7 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 07756b7348ae..725289b523c7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1556,8 +1556,6 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) { - struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; - struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; int r = -EBUSY;
if (dst_kvm == src_kvm) @@ -1567,10 +1565,10 @@ static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) * Bail if these VMs are already involved in a migration to avoid * deadlock between two VMs trying to migrate to/from each other. */ - if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1)) + if (atomic_cmpxchg_acquire(&dst_kvm->migration_in_progress, 0, 1)) return -EBUSY;
- if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1)) + if (atomic_cmpxchg_acquire(&src_kvm->migration_in_progress, 0, 1)) goto release_dst;
r = -EINTR; @@ -1583,21 +1581,18 @@ static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) unlock_dst: mutex_unlock(&dst_kvm->lock); release_src: - atomic_set_release(&src_sev->migration_in_progress, 0); + atomic_set_release(&src_kvm->migration_in_progress, 0); release_dst: - atomic_set_release(&dst_sev->migration_in_progress, 0); + atomic_set_release(&dst_kvm->migration_in_progress, 0); return r; }
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) { - struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; - struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; - mutex_unlock(&dst_kvm->lock); mutex_unlock(&src_kvm->lock); - atomic_set_release(&dst_sev->migration_in_progress, 0); - atomic_set_release(&src_sev->migration_in_progress, 0); + atomic_set_release(&dst_kvm->migration_in_progress, 0); + atomic_set_release(&src_kvm->migration_in_progress, 0); }
/* vCPU mutex subclasses. */ diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 18af7e712a5a..d306e2312b53 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -87,7 +87,6 @@ struct kvm_sev_info { struct list_head mirror_vms; /* List of VMs mirroring */ struct list_head mirror_entry; /* Use as a list entry of mirrors */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ - atomic_t migration_in_progress; };
struct kvm_svm { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 091bc89ae805..3e03eeca279f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -842,6 +842,7 @@ struct kvm { #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES struct xarray mem_attr_array; #endif + atomic_t migration_in_progress; char stats_id[KVM_STATS_NAME_SIZE]; };
Split sev_lock_two_vms() into kvm_mark_migration_in_progress() and kvm_lock_two_vms() and refactor sev.c to use these two new functions.
Co-developed-by: Sagi Shahar sagis@google.com Signed-off-by: Sagi Shahar sagis@google.com Co-developed-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ackerley Tng ackerleytng@google.com --- arch/x86/kvm/svm/sev.c | 59 ++++++++++------------------------------ arch/x86/kvm/x86.c | 62 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 6 ++++ 3 files changed, 82 insertions(+), 45 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 725289b523c7..3c4313417966 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1554,47 +1554,6 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id) return false; }
-static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) -{ - int r = -EBUSY; - - if (dst_kvm == src_kvm) - return -EINVAL; - - /* - * Bail if these VMs are already involved in a migration to avoid - * deadlock between two VMs trying to migrate to/from each other. - */ - if (atomic_cmpxchg_acquire(&dst_kvm->migration_in_progress, 0, 1)) - return -EBUSY; - - if (atomic_cmpxchg_acquire(&src_kvm->migration_in_progress, 0, 1)) - goto release_dst; - - r = -EINTR; - if (mutex_lock_killable(&dst_kvm->lock)) - goto release_src; - if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) - goto unlock_dst; - return 0; - -unlock_dst: - mutex_unlock(&dst_kvm->lock); -release_src: - atomic_set_release(&src_kvm->migration_in_progress, 0); -release_dst: - atomic_set_release(&dst_kvm->migration_in_progress, 0); - return r; -} - -static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) -{ - mutex_unlock(&dst_kvm->lock); - mutex_unlock(&src_kvm->lock); - atomic_set_release(&dst_kvm->migration_in_progress, 0); - atomic_set_release(&src_kvm->migration_in_progress, 0); -} - /* vCPU mutex subclasses. */ enum sev_migration_role { SEV_MIGRATION_SOURCE = 0, @@ -1777,9 +1736,12 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) }
source_kvm = f.file->private_data; - ret = sev_lock_two_vms(kvm, source_kvm); + ret = kvm_mark_migration_in_progress(kvm, source_kvm); if (ret) goto out_fput; + ret = kvm_lock_two_vms(kvm, source_kvm); + if (ret) + goto out_mark_migration_done;
if (sev_guest(kvm) || !sev_guest(source_kvm)) { ret = -EINVAL; @@ -1823,8 +1785,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) sev_misc_cg_uncharge(cg_cleanup_sev); put_misc_cg(cg_cleanup_sev->misc_cg); cg_cleanup_sev->misc_cg = NULL; +out_mark_migration_done: + kvm_mark_migration_done(kvm, source_kvm); out_unlock: - sev_unlock_two_vms(kvm, source_kvm); + kvm_unlock_two_vms(kvm, source_kvm); out_fput: fdput(f); return ret; @@ -2057,9 +2021,12 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) }
source_kvm = f.file->private_data; - ret = sev_lock_two_vms(kvm, source_kvm); + ret = kvm_mark_migration_in_progress(kvm, source_kvm); if (ret) goto e_source_fput; + ret = kvm_lock_two_vms(kvm, source_kvm); + if (ret) + goto e_mark_migration_done;
/* * Mirrors of mirrors should work, but let's not get silly. Also @@ -2100,7 +2067,9 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) */
e_unlock: - sev_unlock_two_vms(kvm, source_kvm); + kvm_unlock_two_vms(kvm, source_kvm); +e_mark_migration_done: + kvm_mark_migration_done(kvm, source_kvm); e_source_fput: fdput(f); return ret; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index de195ad83ec0..494b75ef7197 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4340,6 +4340,68 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } EXPORT_SYMBOL_GPL(kvm_get_msr_common);
+int kvm_mark_migration_in_progress(struct kvm *dst_kvm, struct kvm *src_kvm) +{ + int r; + + if (dst_kvm == src_kvm) + return -EINVAL; + + /* + * Bail if these VMs are already involved in a migration to avoid + * deadlock between two VMs trying to migrate to/from each other. + */ + r = -EBUSY; + if (atomic_cmpxchg_acquire(&dst_kvm->migration_in_progress, 0, 1)) + return r; + + if (atomic_cmpxchg_acquire(&src_kvm->migration_in_progress, 0, 1)) + goto release_dst; + + return 0; + +release_dst: + atomic_set_release(&dst_kvm->migration_in_progress, 0); + return r; +} +EXPORT_SYMBOL_GPL(kvm_mark_migration_in_progress); + +void kvm_mark_migration_done(struct kvm *dst_kvm, struct kvm *src_kvm) +{ + atomic_set_release(&dst_kvm->migration_in_progress, 0); + atomic_set_release(&src_kvm->migration_in_progress, 0); +} +EXPORT_SYMBOL_GPL(kvm_mark_migration_done); + +int kvm_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) +{ + int r; + + if (dst_kvm == src_kvm) + return -EINVAL; + + r = -EINTR; + if (mutex_lock_killable(&dst_kvm->lock)) + return r; + + if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) + goto unlock_dst; + + return 0; + +unlock_dst: + mutex_unlock(&dst_kvm->lock); + return r; +} +EXPORT_SYMBOL_GPL(kvm_lock_two_vms); + +void kvm_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) +{ + mutex_unlock(&dst_kvm->lock); + mutex_unlock(&src_kvm->lock); +} +EXPORT_SYMBOL_GPL(kvm_unlock_two_vms); + /* * Read or write a bunch of msrs. All parameters are kernel addresses. * diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 82e3dafc5453..4c6edaf5ac5b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -539,4 +539,10 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, void *data, unsigned int count, int in);
+int kvm_mark_migration_in_progress(struct kvm *dst_kvm, struct kvm *src_kvm); +void kvm_mark_migration_done(struct kvm *dst_kvm, struct kvm *src_kvm); + +int kvm_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm); +void kvm_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm); + #endif
Co-developed-by: Sagi Shahar sagis@google.com Signed-off-by: Sagi Shahar sagis@google.com Co-developed-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ackerley Tng ackerleytng@google.com --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/sev.c | 33 ++++---------------------- arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/x86.c | 42 +++++++++++++++++++++++++++++---- 4 files changed, 43 insertions(+), 36 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bbefd79b7950..71c1236e4f18 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1711,7 +1711,7 @@ struct kvm_x86_ops { int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); - int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); + int (*vm_move_enc_context_from)(struct kvm *kvm, struct kvm *source_kvm); void (*guest_memory_reclaimed)(struct kvm *kvm);
int (*get_msr_feature)(struct kvm_msr_entry *entry); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3c4313417966..e0e206aa3e62 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1718,35 +1718,15 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) return 0; }
-int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) +int sev_vm_move_enc_context_from(struct kvm *kvm, struct kvm *source_kvm) { struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_info *src_sev, *cg_cleanup_sev; - struct fd f = fdget(source_fd); - struct kvm *source_kvm; bool charged = false; int ret;
- if (!f.file) - return -EBADF; - - if (!file_is_kvm(f.file)) { - ret = -EBADF; - goto out_fput; - } - - source_kvm = f.file->private_data; - ret = kvm_mark_migration_in_progress(kvm, source_kvm); - if (ret) - goto out_fput; - ret = kvm_lock_two_vms(kvm, source_kvm); - if (ret) - goto out_mark_migration_done; - - if (sev_guest(kvm) || !sev_guest(source_kvm)) { - ret = -EINVAL; - goto out_unlock; - } + if (sev_guest(kvm) || !sev_guest(source_kvm)) + return -EINVAL;
src_sev = &to_kvm_svm(source_kvm)->sev_info;
@@ -1785,12 +1765,7 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) sev_misc_cg_uncharge(cg_cleanup_sev); put_misc_cg(cg_cleanup_sev->misc_cg); cg_cleanup_sev->misc_cg = NULL; -out_mark_migration_done: - kvm_mark_migration_done(kvm, source_kvm); -out_unlock: - kvm_unlock_two_vms(kvm, source_kvm); -out_fput: - fdput(f); + return ret; }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index d306e2312b53..4912ac28a3d8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -721,7 +721,7 @@ int sev_mem_enc_register_region(struct kvm *kvm, int sev_mem_enc_unregister_region(struct kvm *kvm, struct kvm_enc_region *range); int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); -int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); +int sev_vm_move_enc_context_from(struct kvm *kvm, struct kvm *source_kvm); void sev_guest_memory_reclaimed(struct kvm *kvm);
void pre_sev_run(struct vcpu_svm *svm, int cpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 494b75ef7197..75d48379d94d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6325,6 +6325,42 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; }
+static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) +{ + int r; + struct kvm *source_kvm; + struct fd f = fdget(source_fd); + + r = -EBADF; + if (!f.file) + return r; + + if (!file_is_kvm(f.file)) + goto out_fdput; + + r = -EINVAL; + source_kvm = f.file->private_data; + if (kvm->arch.vm_type != source_kvm->arch.vm_type) + goto out_fdput; + + r = kvm_mark_migration_in_progress(kvm, source_kvm); + if (r) + goto out_fdput; + + r = kvm_lock_two_vms(kvm, source_kvm); + if (r) + goto out_mark_migration_done; + + r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm); + + kvm_unlock_two_vms(kvm, source_kvm); +out_mark_migration_done: + kvm_mark_migration_done(kvm, source_kvm); +out_fdput: + fdput(f); + return r; +} + int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -6463,11 +6499,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = static_call(kvm_x86_vm_copy_enc_context_from)(kvm, cap->args[0]); break; case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM: - r = -EINVAL; - if (!kvm_x86_ops.vm_move_enc_context_from) - break; - - r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, cap->args[0]); + r = kvm_vm_move_enc_context_from(kvm, cap->args[0]); break; case KVM_CAP_EXIT_HYPERCALL: if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
SEV-capable VMs may also use the KVM_X86_SW_PROTECTED_VM type, but they will still need architecture-specific handling to move encryption context. Hence, we let moving of encryption context be configurable and store that configuration in a flag.
Co-developed-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ackerley Tng ackerleytng@google.com --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm/sev.c | 2 ++ arch/x86/kvm/x86.c | 9 ++++++++- 3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 71c1236e4f18..ab45a3d3c867 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1445,6 +1445,8 @@ struct kvm_arch { */ #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) struct kvm_mmu_memory_cache split_desc_cache; + + bool vm_move_enc_ctxt_supported; };
struct kvm_vm_stat { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e0e206aa3e62..b09e6477e309 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -256,6 +256,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_no_asid; sev->asid = asid;
+ kvm->arch.vm_move_enc_ctxt_supported = true; + ret = sev_platform_init(&argp->error); if (ret) goto e_free; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75d48379d94d..a1a28dd77b94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6351,7 +6351,14 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (r) goto out_mark_migration_done;
- r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm); + /* + * Different types of VMs will allow userspace to define if moving + * encryption context should be supported. + */ + if (kvm->arch.vm_move_enc_ctxt_supported && + kvm_x86_ops.vm_move_enc_context_from) { + r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm); + }
kvm_unlock_two_vms(kvm, source_kvm); out_mark_migration_done:
On 8/8/23 01:01, Ackerley Tng wrote:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75d48379d94d..a1a28dd77b94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6351,7 +6351,14 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (r) goto out_mark_migration_done;
- r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm);
- /*
* Different types of VMs will allow userspace to define if moving
* encryption context should be supported.
*/
- if (kvm->arch.vm_move_enc_ctxt_supported &&
kvm_x86_ops.vm_move_enc_context_from) {
r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm);
- }
Rather than "supported" this is more "required". So perhaps kvm->arch.use_vm_enc_ctxt_op?
Paolo
Paolo Bonzini pbonzini@redhat.com writes:
On 8/8/23 01:01, Ackerley Tng wrote:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75d48379d94d..a1a28dd77b94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6351,7 +6351,14 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (r) goto out_mark_migration_done;
- r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm);
- /*
* Different types of VMs will allow userspace to define if moving
* encryption context should be supported.
*/
- if (kvm->arch.vm_move_enc_ctxt_supported &&
kvm_x86_ops.vm_move_enc_context_from) {
r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm);
- }
Rather than "supported" this is more "required". So perhaps kvm->arch.use_vm_enc_ctxt_op?
Paolo
Thanks, that is a great suggestion, I'll incorporate this in the next revision!
Migration of memory context involves moving lpage_info and mem_attr_array from source to destination VM.
Co-developed-by: Sagi Shahar sagis@google.com Signed-off-by: Sagi Shahar sagis@google.com Co-developed-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ackerley Tng ackerleytng@google.com --- arch/x86/kvm/x86.c | 110 +++++++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 17 ++++++ virt/kvm/guest_mem.c | 25 +++++++++ 3 files changed, 152 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1a28dd77b94..12688754c556 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4402,6 +4402,33 @@ void kvm_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) } EXPORT_SYMBOL_GPL(kvm_unlock_two_vms);
+static int kvm_lock_vm_memslots(struct kvm *dst_kvm, struct kvm *src_kvm) +{ + int r = -EINVAL; + + if (dst_kvm == src_kvm) + return r; + + r = -EINTR; + if (mutex_lock_killable(&dst_kvm->slots_lock)) + return r; + + if (mutex_lock_killable_nested(&src_kvm->slots_lock, SINGLE_DEPTH_NESTING)) + goto unlock_dst; + + return 0; + +unlock_dst: + mutex_unlock(&dst_kvm->slots_lock); + return r; +} + +static void kvm_unlock_vm_memslots(struct kvm *dst_kvm, struct kvm *src_kvm) +{ + mutex_unlock(&src_kvm->slots_lock); + mutex_unlock(&dst_kvm->slots_lock); +} + /* * Read or write a bunch of msrs. All parameters are kernel addresses. * @@ -6325,6 +6352,78 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; }
+static bool memslot_configurations_match(struct kvm_memslots *src_slots, + struct kvm_memslots *dst_slots) +{ + struct kvm_memslot_iter src_iter; + struct kvm_memslot_iter dst_iter; + + kvm_for_each_memslot_pair(&src_iter, src_slots, &dst_iter, dst_slots) { + if (src_iter.slot->base_gfn != dst_iter.slot->base_gfn || + src_iter.slot->npages != dst_iter.slot->npages || + src_iter.slot->flags != dst_iter.slot->flags) + return false; + + if (kvm_slot_can_be_private(dst_iter.slot) && + !kvm_gmem_params_match(src_iter.slot, dst_iter.slot)) + return false; + } + + /* There should be no more nodes to iterate if configurations match */ + return !src_iter.node && !dst_iter.node; +} + +static int kvm_move_memory_ctxt_from(struct kvm *dst, struct kvm *src) +{ + struct kvm_memslot_iter src_iter; + struct kvm_memslot_iter dst_iter; + struct kvm_memslots *src_slots, *dst_slots; + int i; + + /* TODO: Do we also need to check consistency for as_id == SMM? */ + src_slots = __kvm_memslots(src, 0); + dst_slots = __kvm_memslots(dst, 0); + + if (!memslot_configurations_match(src_slots, dst_slots)) + return -EINVAL; + + /* + * Transferring lpage_info is an optimization, lpage_info can be rebuilt + * by the destination VM. + */ + kvm_for_each_memslot_pair(&src_iter, src_slots, &dst_iter, dst_slots) { + for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) { + unsigned long ugfn = dst_iter.slot->userspace_addr >> PAGE_SHIFT; + int level = i + 1; + + /* + * If the gfn and userspace address are not aligned wrt each + * other, skip migrating lpage_info. + */ + if ((dst_iter.slot->base_gfn ^ ugfn) & + (KVM_PAGES_PER_HPAGE(level) - 1)) + continue; + + kvfree(dst_iter.slot->arch.lpage_info[i - 1]); + dst_iter.slot->arch.lpage_info[i - 1] = + src_iter.slot->arch.lpage_info[i - 1]; + src_iter.slot->arch.lpage_info[i - 1] = NULL; + } + } + +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + /* + * For VMs that don't use private memory, this will just be moving an + * empty xarray pointer. + */ + dst->mem_attr_array.xa_head = src->mem_attr_array.xa_head; + src->mem_attr_array.xa_head = NULL; +#endif + + kvm_vm_dead(src); + return 0; +} + static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) { int r; @@ -6351,6 +6450,14 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (r) goto out_mark_migration_done;
+ r = kvm_lock_vm_memslots(kvm, source_kvm); + if (r) + goto out_unlock; + + r = kvm_move_memory_ctxt_from(kvm, source_kvm); + if (r) + goto out_unlock_memslots; + /* * Different types of VMs will allow userspace to define if moving * encryption context should be supported. @@ -6360,6 +6467,9 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, source_kvm); }
+out_unlock_memslots: + kvm_unlock_vm_memslots(kvm, source_kvm); +out_unlock: kvm_unlock_two_vms(kvm, source_kvm); out_mark_migration_done: kvm_mark_migration_done(kvm, source_kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3e03eeca279f..2f44b5d294a8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1144,6 +1144,15 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_ kvm_memslot_iter_is_valid(iter, end); \ kvm_memslot_iter_next(iter))
+/* Iterate over a pair of memslots in gfn order until one of the trees end */ +#define kvm_for_each_memslot_pair(iter1, slots1, iter2, slots2) \ + for (kvm_memslot_iter_start(iter1, slots1, 0), \ + kvm_memslot_iter_start(iter2, slots2, 0); \ + kvm_memslot_iter_is_valid(iter1, U64_MAX) && \ + kvm_memslot_iter_is_valid(iter2, U64_MAX); \ + kvm_memslot_iter_next(iter1), \ + kvm_memslot_iter_next(iter2)) + /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot @@ -2359,6 +2368,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) #ifdef CONFIG_KVM_PRIVATE_MEM int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order); +bool kvm_gmem_params_match(struct kvm_memory_slot *slot1, + struct kvm_memory_slot *slot2); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, @@ -2367,6 +2378,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, KVM_BUG_ON(1, kvm); return -EIO; } + +static inline bool kvm_gmem_params_match(struct kvm_memory_slot *slot1, + struct kvm_memory_slot *slot2) +{ + return false; +} #endif /* CONFIG_KVM_PRIVATE_MEM */
#endif diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c index 1b3df273f785..2f84e5c67942 100644 --- a/virt/kvm/guest_mem.c +++ b/virt/kvm/guest_mem.c @@ -686,6 +686,31 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
+bool kvm_gmem_params_match(struct kvm_memory_slot *slot1, + struct kvm_memory_slot *slot2) +{ + bool ret; + struct file *file1; + struct file *file2; + + if (slot1->gmem.pgoff != slot2->gmem.pgoff) + return false; + + file1 = kvm_gmem_get_file(slot1); + file2 = kvm_gmem_get_file(slot2); + + ret = (file1 && file2 && + file_inode(file1) == file_inode(file2)); + + if (file1) + fput(file1); + if (file2) + fput(file2); + + return ret; +} +EXPORT_SYMBOL_GPL(kvm_gmem_params_match); + static int kvm_gmem_init_fs_context(struct fs_context *fc) { if (!init_pseudo(fc, GUEST_MEMORY_MAGIC))
These functions will be used in private (guest mem) migration tests.
Signed-off-by: Ackerley Tng ackerleytng@google.com --- .../selftests/kvm/include/kvm_util_base.h | 13 +++++ .../selftests/kvm/x86_64/sev_migrate_tests.c | 48 +++++++------------ 2 files changed, 30 insertions(+), 31 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 868925b26a7b..af6ebead5bc3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -331,6 +331,19 @@ static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); }
+static inline int __vm_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) +{ + return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd); +} + +static inline void vm_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) +{ + int ret; + + ret = __vm_migrate_from(dst, src); + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); +} + static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, uint64_t size, uint64_t attributes) { diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index c7ef97561038..cee8219fe8d2 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -80,20 +80,6 @@ static struct kvm_vm *aux_vm_create(bool with_vcpus) return vm; }
-static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) -{ - return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd); -} - - -static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) -{ - int ret; - - ret = __sev_migrate_from(dst, src); - TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); -} - static void test_sev_migrate_from(bool es) { struct kvm_vm *src_vm; @@ -105,13 +91,13 @@ static void test_sev_migrate_from(bool es) dst_vms[i] = aux_vm_create(true);
/* Initial migration from the src to the first dst. */ - sev_migrate_from(dst_vms[0], src_vm); + vm_migrate_from(dst_vms[0], src_vm);
for (i = 1; i < NR_MIGRATE_TEST_VMS; i++) - sev_migrate_from(dst_vms[i], dst_vms[i - 1]); + vm_migrate_from(dst_vms[i], dst_vms[i - 1]);
/* Migrate the guest back to the original VM. */ - ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); + ret = __vm_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); TEST_ASSERT(ret == -1 && errno == EIO, "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, errno); @@ -133,7 +119,7 @@ static void *locking_test_thread(void *arg)
for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) { j = i % NR_LOCK_TESTING_THREADS; - __sev_migrate_from(input->vm, input->source_vms[j]); + __vm_migrate_from(input->vm, input->source_vms[j]); }
return NULL; @@ -170,7 +156,7 @@ static void test_sev_migrate_parameters(void)
vm_no_vcpu = vm_create_barebones(); vm_no_sev = aux_vm_create(true); - ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); + ret = __vm_migrate_from(vm_no_vcpu, vm_no_sev); TEST_ASSERT(ret == -1 && errno == EINVAL, "Migrations require SEV enabled. ret %d, errno: %d\n", ret, errno); @@ -184,25 +170,25 @@ static void test_sev_migrate_parameters(void) sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
- ret = __sev_migrate_from(sev_vm, sev_es_vm); + ret = __vm_migrate_from(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", ret, errno);
- ret = __sev_migrate_from(sev_es_vm, sev_vm); + ret = __vm_migrate_from(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", ret, errno);
- ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); + ret = __vm_migrate_from(vm_no_vcpu, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", ret, errno);
- ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); + ret = __vm_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); TEST_ASSERT( ret == -1 && errno == EINVAL, "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", @@ -355,14 +341,14 @@ static void test_sev_move_copy(void)
sev_mirror_create(mirror_vm, sev_vm);
- sev_migrate_from(dst_mirror_vm, mirror_vm); - sev_migrate_from(dst_vm, sev_vm); + vm_migrate_from(dst_mirror_vm, mirror_vm); + vm_migrate_from(dst_vm, sev_vm);
- sev_migrate_from(dst2_vm, dst_vm); - sev_migrate_from(dst2_mirror_vm, dst_mirror_vm); + vm_migrate_from(dst2_vm, dst_vm); + vm_migrate_from(dst2_mirror_vm, dst_mirror_vm);
- sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm); - sev_migrate_from(dst3_vm, dst2_vm); + vm_migrate_from(dst3_mirror_vm, dst2_mirror_vm); + vm_migrate_from(dst3_vm, dst2_vm);
kvm_vm_free(dst_vm); kvm_vm_free(sev_vm); @@ -384,8 +370,8 @@ static void test_sev_move_copy(void)
sev_mirror_create(mirror_vm, sev_vm);
- sev_migrate_from(dst_mirror_vm, mirror_vm); - sev_migrate_from(dst_vm, sev_vm); + vm_migrate_from(dst_mirror_vm, mirror_vm); + vm_migrate_from(dst_vm, sev_vm);
kvm_vm_free(mirror_vm); kvm_vm_free(dst_mirror_vm);
Tests that private mem (in guest_mem files) can be migrated. Also demonstrates the migration flow.
Signed-off-by: Ackerley Tng ackerleytng@google.com --- .../kvm/x86_64/private_mem_migrate_tests.c | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-)
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c index 4226de3ebd41..2691497cf207 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/private_mem_migrate_tests.c @@ -5,28 +5,28 @@ #include <linux/kvm.h> #include <linux/sizes.h>
-#define TRANSFER_PRIVATE_MEM_TEST_SLOT 10 -#define TRANSFER_PRIVATE_MEM_GPA ((uint64_t)(1ull << 32)) -#define TRANSFER_PRIVATE_MEM_GVA TRANSFER_PRIVATE_MEM_GPA -#define TRANSFER_PRIVATE_MEM_VALUE 0xdeadbeef +#define MIGRATE_PRIVATE_MEM_TEST_SLOT 10 +#define MIGRATE_PRIVATE_MEM_GPA ((uint64_t)(1ull << 32)) +#define MIGRATE_PRIVATE_MEM_GVA MIGRATE_PRIVATE_MEM_GPA +#define MIGRATE_PRIVATE_MEM_VALUE 0xdeadbeef
-static void transfer_private_mem_guest_code_src(void) +static void migrate_private_mem_data_guest_code_src(void) { - uint64_t volatile *const ptr = (uint64_t *)TRANSFER_PRIVATE_MEM_GVA; + uint64_t volatile *const ptr = (uint64_t *)MIGRATE_PRIVATE_MEM_GVA;
- *ptr = TRANSFER_PRIVATE_MEM_VALUE; + *ptr = MIGRATE_PRIVATE_MEM_VALUE;
GUEST_SYNC1(*ptr); }
-static void transfer_private_mem_guest_code_dst(void) +static void migrate_private_mem_guest_code_dst(void) { - uint64_t volatile *const ptr = (uint64_t *)TRANSFER_PRIVATE_MEM_GVA; + uint64_t volatile *const ptr = (uint64_t *)MIGRATE_PRIVATE_MEM_GVA;
GUEST_SYNC1(*ptr); }
-static void test_transfer_private_mem(void) +static void test_migrate_private_mem_data(bool migrate) { struct kvm_vm *src_vm, *dst_vm; struct kvm_vcpu *src_vcpu, *dst_vcpu; @@ -40,40 +40,43 @@ static void test_transfer_private_mem(void)
/* Build the source VM, use it to write to private memory */ src_vm = __vm_create_shape_with_one_vcpu( - shape, &src_vcpu, 0, transfer_private_mem_guest_code_src); + shape, &src_vcpu, 0, migrate_private_mem_data_guest_code_src); src_memfd = vm_create_guest_memfd(src_vm, SZ_4K, 0);
- vm_mem_add(src_vm, DEFAULT_VM_MEM_SRC, TRANSFER_PRIVATE_MEM_GPA, - TRANSFER_PRIVATE_MEM_TEST_SLOT, 1, KVM_MEM_PRIVATE, + vm_mem_add(src_vm, DEFAULT_VM_MEM_SRC, MIGRATE_PRIVATE_MEM_GPA, + MIGRATE_PRIVATE_MEM_TEST_SLOT, 1, KVM_MEM_PRIVATE, src_memfd, 0);
- virt_map(src_vm, TRANSFER_PRIVATE_MEM_GVA, TRANSFER_PRIVATE_MEM_GPA, 1); - vm_set_memory_attributes(src_vm, TRANSFER_PRIVATE_MEM_GPA, SZ_4K, + virt_map(src_vm, MIGRATE_PRIVATE_MEM_GVA, MIGRATE_PRIVATE_MEM_GPA, 1); + vm_set_memory_attributes(src_vm, MIGRATE_PRIVATE_MEM_GPA, SZ_4K, KVM_MEMORY_ATTRIBUTE_PRIVATE);
vcpu_run(src_vcpu); TEST_ASSERT_KVM_EXIT_REASON(src_vcpu, KVM_EXIT_IO); get_ucall(src_vcpu, &uc); - TEST_ASSERT(uc.args[0] == TRANSFER_PRIVATE_MEM_VALUE, + TEST_ASSERT(uc.args[0] == MIGRATE_PRIVATE_MEM_VALUE, "Source VM should be able to write to private memory");
/* Build the destination VM with linked fd */ dst_vm = __vm_create_shape_with_one_vcpu( - shape, &dst_vcpu, 0, transfer_private_mem_guest_code_dst); + shape, &dst_vcpu, 0, migrate_private_mem_guest_code_dst); dst_memfd = vm_link_guest_memfd(dst_vm, src_memfd, 0);
- vm_mem_add(dst_vm, DEFAULT_VM_MEM_SRC, TRANSFER_PRIVATE_MEM_GPA, - TRANSFER_PRIVATE_MEM_TEST_SLOT, 1, KVM_MEM_PRIVATE, + vm_mem_add(dst_vm, DEFAULT_VM_MEM_SRC, MIGRATE_PRIVATE_MEM_GPA, + MIGRATE_PRIVATE_MEM_TEST_SLOT, 1, KVM_MEM_PRIVATE, dst_memfd, 0);
- virt_map(dst_vm, TRANSFER_PRIVATE_MEM_GVA, TRANSFER_PRIVATE_MEM_GPA, 1); - vm_set_memory_attributes(dst_vm, TRANSFER_PRIVATE_MEM_GPA, SZ_4K, - KVM_MEMORY_ATTRIBUTE_PRIVATE); + virt_map(dst_vm, MIGRATE_PRIVATE_MEM_GVA, MIGRATE_PRIVATE_MEM_GPA, 1); + if (migrate) + vm_migrate_from(dst_vm, src_vm); + else + vm_set_memory_attributes(dst_vm, MIGRATE_PRIVATE_MEM_GPA, SZ_4K, + KVM_MEMORY_ATTRIBUTE_PRIVATE);
vcpu_run(dst_vcpu); TEST_ASSERT_KVM_EXIT_REASON(dst_vcpu, KVM_EXIT_IO); get_ucall(dst_vcpu, &uc); - TEST_ASSERT(uc.args[0] == TRANSFER_PRIVATE_MEM_VALUE, + TEST_ASSERT(uc.args[0] == MIGRATE_PRIVATE_MEM_VALUE, "Destination VM should be able to read value transferred"); }
@@ -81,7 +84,10 @@ int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
- test_transfer_private_mem(); + test_migrate_private_mem_data(false); + + if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) + test_migrate_private_mem_data(true);
return 0; }
linux-kselftest-mirror@lists.linaro.org