Commit 2b48f52f2bff ("vfio: fix deadlock between group lock and kvm lock") made vfio_device to hold KVM struct up until device's close() call.
This lead to a unrleased KVM struct which holds KVM kthreads and related cgroups after VM with VFIO device migrates to from one KVM instance to another on the same host.
Since all drivers, that require 'kvm' (vfio-ap/intel_vgp/vfio-pci zdev) already handle 'kvm' pointer by themselves we can just drop 'kvm' reference right after first vfio_df_open() call. This will release 'kvm' struct and dependent resources for drivers that don't require it after KVM detached from a device (KVM_DEV_VFIO_FILE_DEL).
Fixes: 2b48f52f2bff ("vfio: fix deadlock between group lock and kvm lock") Cc: stable@vger.kernel.org Signed-off-by: Andrey Ryabinin arbn@yandex-team.com --- drivers/vfio/device_cdev.c | 11 ++++++----- drivers/vfio/group.c | 31 ++++++++++++++----------------- 2 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index bb1817bd4ff3..339b69c43300 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -103,14 +103,16 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, /* * Before the device open, get the KVM pointer currently * associated with the device file (if there is) and obtain - * a reference. This reference is held until device closed. + * a reference and release it right after vfio_df_open() bellow. + * The device driver wishes to use KVM must obtain a reference and + * release it on close. * Save the pointer in the device for use by drivers. */ vfio_df_get_kvm_safe(df); - ret = vfio_df_open(df); + vfio_device_put_kvm(device); if (ret) - goto out_put_kvm; + goto out_put_iommufd;
ret = copy_to_user(&arg->out_devid, &df->devid, sizeof(df->devid)) ? -EFAULT : 0; @@ -128,8 +130,7 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
out_close_device: vfio_df_close(df); -out_put_kvm: - vfio_device_put_kvm(device); +out_put_iommufd: iommufd_ctx_put(df->iommufd); df->iommufd = NULL; out_unlock: diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 49559605177e..872cfd795f99 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -175,15 +175,6 @@ static int vfio_df_group_open(struct vfio_device_file *df)
mutex_lock(&device->dev_set->lock);
- /* - * Before the first device open, get the KVM pointer currently - * associated with the group (if there is one) and obtain a reference - * now that will be held until the open_count reaches 0 again. Save - * the pointer in the device for use by drivers. - */ - if (device->open_count == 0) - vfio_device_group_get_kvm_safe(device); - df->iommufd = device->group->iommufd; if (df->iommufd && vfio_device_is_noiommu(device) && device->open_count == 0) { /* @@ -196,12 +187,23 @@ static int vfio_df_group_open(struct vfio_device_file *df) ret = -EPERM; else ret = 0; - goto out_put_kvm; + goto out_iommufd; }
+ /* + * Before the first device open, get the KVM pointer currently + * associated with the group (if there is one) and obtain a reference + * now that will be released right after vfio_df_open() bellow. + * The device driver wishes to use KVM must obtain a reference and + * release it on close. + */ + if (device->open_count == 0) + vfio_device_group_get_kvm_safe(device); + ret = vfio_df_open(df); + vfio_device_put_kvm(device); if (ret) - goto out_put_kvm; + goto out_iommufd;
if (df->iommufd && device->open_count == 1) { ret = vfio_iommufd_compat_attach_ioas(device, df->iommufd); @@ -221,10 +223,8 @@ static int vfio_df_group_open(struct vfio_device_file *df)
out_close_device: vfio_df_close(df); -out_put_kvm: +out_iommufd: df->iommufd = NULL; - if (device->open_count == 0) - vfio_device_put_kvm(device); mutex_unlock(&device->dev_set->lock); out_unlock: mutex_unlock(&device->group->group_lock); @@ -241,9 +241,6 @@ void vfio_df_group_close(struct vfio_device_file *df) vfio_df_close(df); df->iommufd = NULL;
- if (device->open_count == 0) - vfio_device_put_kvm(device); - mutex_unlock(&device->dev_set->lock); mutex_unlock(&device->group->group_lock); }