'vfio_device' keeps the ->kvm pointer with elevated counter from the first open of the device up until the last close(). So the kvm struct and its dependencies (kvm kthreads, cgroups ...) kept alive even for VFIO device that don't need ->kvm.
Copy ->kvm pointer from the vfio_device struct and store it in the 'intel_vgpu'. Note that kvm_page_track_[un]register_notifier() already does get/put calls, keeping the kvm struct alive.
This will allow to release ->kvm from the vfio_device righ after the first open call, so that devices not using kvm not keeping it alive.
Devices that are using kvm (like intel_vgpu) will be expected to mange the lifetime of the kvm struct by themselves.
Fixes: 2b48f52f2bff ("vfio: fix deadlock between group lock and kvm lock") Cc: stable@vger.kernel.org Signed-off-by: Andrey Ryabinin arbn@yandex-team.com --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/kvmgt.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 2c95aeef4e41..6c62467df22c 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -232,6 +232,7 @@ struct intel_vgpu { unsigned long nr_cache_entries; struct mutex cache_lock;
+ struct kvm *kvm; struct kvm_page_track_notifier_node track_node; #define NR_BKT (1 << 18) struct hlist_head ptable[NR_BKT]; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index b27ff77bfb50..cf418e2c560d 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -36,6 +36,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/kthread.h> +#include <linux/kvm_host.h> #include <linux/sched/mm.h> #include <linux/types.h> #include <linux/list.h> @@ -649,7 +650,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status)) continue;
- if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { + if (vgpu->kvm == itr->kvm) { ret = true; goto out; } @@ -664,13 +665,13 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); int ret;
+ vgpu->kvm = vgpu->vfio_device.kvm; if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST;
vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region; - ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm, - &vgpu->track_node); + ret = kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node); if (ret) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); return ret; @@ -707,8 +708,7 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
debugfs_lookup_and_remove(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs);
- kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, - &vgpu->track_node); + kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); @@ -1560,7 +1560,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) if (kvmgt_gfn_is_write_protected(info, gfn)) return 0;
- r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn); + r = kvm_write_track_add_gfn(info->kvm, gfn); if (r) return r;
@@ -1578,7 +1578,7 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) if (!kvmgt_gfn_is_write_protected(info, gfn)) return 0;
- r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn); + r = kvm_write_track_remove_gfn(info->kvm, gfn); if (r) return r;
Commit 2b48f52f2bff ("vfio: fix deadlock between group lock and kvm lock") made vfio_device to hold KVM struct up until device's close() call.
This lead to a unrleased KVM struct which holds KVM kthreads and related cgroups after VM with VFIO device migrates to from one KVM instance to another on the same host.
Since all drivers, that require 'kvm' (vfio-ap/intel_vgp/vfio-pci zdev) already handle 'kvm' pointer by themselves we can just drop 'kvm' reference right after first vfio_df_open() call. This will release 'kvm' struct and dependent resources for drivers that don't require it after KVM detached from a device (KVM_DEV_VFIO_FILE_DEL).
Fixes: 2b48f52f2bff ("vfio: fix deadlock between group lock and kvm lock") Cc: stable@vger.kernel.org Signed-off-by: Andrey Ryabinin arbn@yandex-team.com --- drivers/vfio/device_cdev.c | 11 ++++++----- drivers/vfio/group.c | 31 ++++++++++++++----------------- 2 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index bb1817bd4ff3..339b69c43300 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -103,14 +103,16 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, /* * Before the device open, get the KVM pointer currently * associated with the device file (if there is) and obtain - * a reference. This reference is held until device closed. + * a reference and release it right after vfio_df_open() bellow. + * The device driver wishes to use KVM must obtain a reference and + * release it on close. * Save the pointer in the device for use by drivers. */ vfio_df_get_kvm_safe(df); - ret = vfio_df_open(df); + vfio_device_put_kvm(device); if (ret) - goto out_put_kvm; + goto out_put_iommufd;
ret = copy_to_user(&arg->out_devid, &df->devid, sizeof(df->devid)) ? -EFAULT : 0; @@ -128,8 +130,7 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
out_close_device: vfio_df_close(df); -out_put_kvm: - vfio_device_put_kvm(device); +out_put_iommufd: iommufd_ctx_put(df->iommufd); df->iommufd = NULL; out_unlock: diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 49559605177e..872cfd795f99 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -175,15 +175,6 @@ static int vfio_df_group_open(struct vfio_device_file *df)
mutex_lock(&device->dev_set->lock);
- /* - * Before the first device open, get the KVM pointer currently - * associated with the group (if there is one) and obtain a reference - * now that will be held until the open_count reaches 0 again. Save - * the pointer in the device for use by drivers. - */ - if (device->open_count == 0) - vfio_device_group_get_kvm_safe(device); - df->iommufd = device->group->iommufd; if (df->iommufd && vfio_device_is_noiommu(device) && device->open_count == 0) { /* @@ -196,12 +187,23 @@ static int vfio_df_group_open(struct vfio_device_file *df) ret = -EPERM; else ret = 0; - goto out_put_kvm; + goto out_iommufd; }
+ /* + * Before the first device open, get the KVM pointer currently + * associated with the group (if there is one) and obtain a reference + * now that will be released right after vfio_df_open() bellow. + * The device driver wishes to use KVM must obtain a reference and + * release it on close. + */ + if (device->open_count == 0) + vfio_device_group_get_kvm_safe(device); + ret = vfio_df_open(df); + vfio_device_put_kvm(device); if (ret) - goto out_put_kvm; + goto out_iommufd;
if (df->iommufd && device->open_count == 1) { ret = vfio_iommufd_compat_attach_ioas(device, df->iommufd); @@ -221,10 +223,8 @@ static int vfio_df_group_open(struct vfio_device_file *df)
out_close_device: vfio_df_close(df); -out_put_kvm: +out_iommufd: df->iommufd = NULL; - if (device->open_count == 0) - vfio_device_put_kvm(device); mutex_unlock(&device->dev_set->lock); out_unlock: mutex_unlock(&device->group->group_lock); @@ -241,9 +241,6 @@ void vfio_df_group_close(struct vfio_device_file *df) vfio_df_close(df); df->iommufd = NULL;
- if (device->open_count == 0) - vfio_device_put_kvm(device); - mutex_unlock(&device->dev_set->lock); mutex_unlock(&device->group->group_lock); }
linux-stable-mirror@lists.linaro.org