On 8/12/2025 2:53 AM, Ackerley Tng wrote:
David Hildenbrand david@redhat.com writes:
On 11.08.25 11:06, Shivank Garg wrote:
From: Ackerley Tng ackerleytng@google.com
[...snip...]
+static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
u64 flags)
+{
- static const char *name = "[kvm-gmem]";
- struct inode *inode;
- struct file *file;
- int err;
- err = -ENOENT;
Maybe add a comment here when the module reference will get dropped. And maybe we should just switch to fops_get() + fops_put?
/* __fput() will take care of fops_put(). */ if (!fops_get(&kvm_gmem_fops)) goto err;
Sounds good! Please see attached patch. It's exactly what you suggested except I renamed the goto target to err_fops_put:
- inode = kvm_gmem_inode_make_secure_inode(name, size, flags);
- if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto err_put_module;
- }
- file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
&kvm_gmem_fops);
- if (IS_ERR(file)) {
err = PTR_ERR(file);
goto err_put_inode;
- }
- file->f_flags |= O_LARGEFILE;
- file->private_data = priv;
+out:
- return file;
+err_put_inode:
- iput(inode);
+err_put_module:
- module_put(kvm_gmem_fops.owner);
fops_put(&kvm_gmem_fops);
?
Acked-by: David Hildenbrand david@redhat.com
-- Cheers,
David / dhildenb
From f2bd4499bce4db69bf34be75e009579db4329b7c Mon Sep 17 00:00:00 2001 From: Ackerley Tng ackerleytng@google.com Date: Sun, 13 Jul 2025 17:43:35 +0000 Subject: [PATCH] KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes
guest_memfd's inode represents memory the guest_memfd is providing. guest_memfd's file represents a struct kvm's view of that memory.
Using a custom inode allows customization of the inode teardown process via callbacks. For example, ->evict_inode() allows customization of the truncation process on file close, and ->destroy_inode() and ->free_inode() allow customization of the inode freeing process.
Customizing the truncation process allows flexibility in management of guest_memfd memory and customization of the inode freeing process allows proper cleanup of memory metadata stored on the inode.
Memory metadata is more appropriately stored on the inode (as opposed to the file), since the metadata is for the memory and is not unique to a specific binding and struct kvm.
Co-developed-by: Fuad Tabba tabba@google.com Signed-off-by: Fuad Tabba tabba@google.com Signed-off-by: Shivank Garg shivankg@amd.com Signed-off-by: Ackerley Tng ackerleytng@google.com
include/uapi/linux/magic.h | 1 + virt/kvm/guest_memfd.c | 129 ++++++++++++++++++++++++++++++------- virt/kvm/kvm_main.c | 7 +- virt/kvm/kvm_mm.h | 9 +-- 4 files changed, 119 insertions(+), 27 deletions(-)
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index bb575f3ab45e5..638ca21b7a909 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -103,5 +103,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */
#endif /* __LINUX_MAGIC_H__ */ diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 08a6bc7d25b60..6c66a09740550 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/anon_inodes.h> #include <linux/backing-dev.h> #include <linux/falloc.h> +#include <linux/fs.h> #include <linux/kvm_host.h> +#include <linux/pseudo_fs.h> #include <linux/pagemap.h> -#include <linux/anon_inodes.h>
#include "kvm_mm.h"
+static struct vfsmount *kvm_gmem_mnt;
struct kvm_gmem { struct kvm *kvm; struct xarray bindings; @@ -385,9 +389,45 @@ static struct file_operations kvm_gmem_fops = { .fallocate = kvm_gmem_fallocate, };
-void kvm_gmem_init(struct module *module) +static int kvm_gmem_init_fs_context(struct fs_context *fc) +{
- if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
return -ENOMEM;
- fc->s_iflags |= SB_I_NOEXEC;
- fc->s_iflags |= SB_I_NODEV;
- return 0;
+}
+static struct file_system_type kvm_gmem_fs = {
- .name = "guest_memfd",
- .init_fs_context = kvm_gmem_init_fs_context,
- .kill_sb = kill_anon_super,
+};
+static int kvm_gmem_init_mount(void) +{
- kvm_gmem_mnt = kern_mount(&kvm_gmem_fs);
- if (IS_ERR(kvm_gmem_mnt))
return PTR_ERR(kvm_gmem_mnt);
- kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC;
- return 0;
+}
+int kvm_gmem_init(struct module *module) { kvm_gmem_fops.owner = module;
- return kvm_gmem_init_mount();
+}
+void kvm_gmem_exit(void) +{
- kern_unmount(kvm_gmem_mnt);
- kvm_gmem_mnt = NULL;
}
static int kvm_gmem_migrate_folio(struct address_space *mapping, @@ -463,11 +503,72 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) return true; }
+static struct inode *kvm_gmem_inode_make_secure_inode(const char *name,
loff_t size, u64 flags)
+{
- struct inode *inode;
- inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
- if (IS_ERR(inode))
return inode;
- inode->i_private = (void *)(unsigned long)flags;
- inode->i_op = &kvm_gmem_iops;
- inode->i_mapping->a_ops = &kvm_gmem_aops;
- inode->i_mode |= S_IFREG;
- inode->i_size = size;
- mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
- mapping_set_inaccessible(inode->i_mapping);
- /* Unmovable mappings are supposed to be marked unevictable as well. */
- WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
- return inode;
+}
+static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
u64 flags)
+{
- static const char *name = "[kvm-gmem]";
- struct inode *inode;
- struct file *file;
- int err;
- err = -ENOENT;
- /* __fput() will take care of fops_put(). */
- if (!fops_get(&kvm_gmem_fops))
goto err;
- inode = kvm_gmem_inode_make_secure_inode(name, size, flags);
- if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto err_fops_put;
- }
- file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
&kvm_gmem_fops);
- if (IS_ERR(file)) {
err = PTR_ERR(file);
goto err_put_inode;
- }
- file->f_flags |= O_LARGEFILE;
- file->private_data = priv;
+out:
- return file;
+err_put_inode:
- iput(inode);
+err_fops_put:
- fops_put(&kvm_gmem_fops);
Thanks Ackerley. LGTM