Dan Williams wrote:
Greg Kroah-Hartman wrote: [..]
This is odd.
Does the latest 6.10.y release also show this problem?
I can't duplicate this here, and it's the first I've heard of it (given that USB mice are pretty popular, I would suspect others would have hit it as well...)
Sorry for missing this earlier. One thought is that userspace has a dependency on uevent_show() flushing device probing. In other words the side effect of taking the device_lock() in uevent_show() is that udev might enjoy some occasions where the reading the uevent flushes probing before the udev rule runs. With this change, uevent_show() no longer waits for any inflight probes to complete.
One idea to fix this problem is to create a special case sysfs attribute type that takes the device_lock() before kernfs_get_active() to avoid the deadlock on attribute teardown.
I'll take a look. Thanks for forwarding the report Thorsten!
Ok, the following boots and passes the CXL unit tests, would appreciate if the reporter can give this a try:
-- >8 -- Subject: driver core: Fix userspace expectations of uevent_show() as a probe barrier
From: Dan Williams dan.j.williams@intel.com
Commit "driver core: Fix uevent_show() vs driver detach race" [1] attempted to fix a lockdep report in uevent_show() by making the lookup of driver information for a given device lockless. It turns out that userspace likely depends on the side-effect of uevent_show() flushing device probing.
Introduce a new "locked" type for sysfs attributes that arranges for the attribute to be called under the device-lock, but without setting up a reverse locking order dependency with the kernfs_get_active() lock.
This new facility holds a reference on a device while any locked-sysfs attribute of that device is open. It then takes the lock around sysfs read/write operations in the following order:
of->mutex of->op_mutex <= device_lock() kernfs_get_active() <operation>
Compare that to problematic locking order of:
of->mutex kernfs_get_active() <operation> device_lock()
...which causes potential deadlocks with kernfs_drain() that may be called while the device_lock() is held.
Fixes: 15fffc6a5624 ("driver core: Fix uevent_show() vs driver detach race") [1] Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=219244 Signed-off-by: Dan Williams dan.j.williams@intel.com --- drivers/base/core.c | 2 +- fs/kernfs/file.c | 24 +++++++++++++++++++----- fs/sysfs/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ fs/sysfs/group.c | 4 ++-- include/linux/device.h | 3 +++ include/linux/kernfs.h | 1 + include/linux/sysfs.h | 10 ++++++++++ 7 files changed, 83 insertions(+), 8 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c index 8c0733d3aad8..1fd5a18cbb62 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2772,7 +2772,7 @@ static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
return count; } -static DEVICE_ATTR_RW(uevent); +static DEVICE_ATTR_LOCKED_RW(uevent);
static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8502ef68459b..eb5c2167beb9 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -142,6 +142,20 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v) kernfs_put_active(of->kn); }
+static void kernfs_open_file_lock(struct kernfs_open_file *of) +{ + mutex_lock(&of->mutex); + if (of->op_mutex) + mutex_lock(of->op_mutex); +} + +static void kernfs_open_file_unlock(struct kernfs_open_file *of) +{ + if (of->op_mutex) + mutex_unlock(of->op_mutex); + mutex_unlock(&of->mutex); +} + static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) { struct kernfs_open_file *of = sf->private; @@ -151,7 +165,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) * @of->mutex nests outside active ref and is primarily to ensure that * the ops aren't called concurrently for the same open file. */ - mutex_lock(&of->mutex); + kernfs_open_file_lock(of); if (!kernfs_get_active(of->kn)) return ERR_PTR(-ENODEV);
@@ -193,7 +207,7 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
if (v != ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, v); - mutex_unlock(&of->mutex); + kernfs_open_file_unlock(of); }
static int kernfs_seq_show(struct seq_file *sf, void *v) @@ -322,9 +336,9 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) * @of->mutex nests outside active ref and is used both to ensure that * the ops aren't called concurrently for the same open file. */ - mutex_lock(&of->mutex); + kernfs_open_file_lock(of); if (!kernfs_get_active(of->kn)) { - mutex_unlock(&of->mutex); + kernfs_open_file_unlock(of); len = -ENODEV; goto out_free; } @@ -336,7 +350,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) len = -EINVAL;
kernfs_put_active(of->kn); - mutex_unlock(&of->mutex); + kernfs_open_file_unlock(of);
if (len > 0) iocb->ki_pos += len; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d1995e2d6c94..1bb878efcf00 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -15,6 +15,7 @@ #include <linux/list.h> #include <linux/mutex.h> #include <linux/seq_file.h> +#include <linux/device.h> #include <linux/mm.h>
#include "sysfs.h" @@ -189,6 +190,26 @@ static int sysfs_kf_bin_open(struct kernfs_open_file *of) return 0; }
+/* locked attributes are always device attributes */ +static int sysfs_kf_setup_lock(struct kernfs_open_file *of) +{ + struct kobject *kobj = of->kn->parent->priv; + struct device *dev = kobj_to_dev(kobj); + + get_device(dev); + of->op_mutex = &dev->mutex; + + return 0; +} + +static void sysfs_kf_free_lock(struct kernfs_open_file *of) +{ + struct kobject *kobj = of->kn->parent->priv; + struct device *dev = kobj_to_dev(kobj); + + put_device(dev); +} + void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { struct kernfs_node *kn = kobj->sd, *tmp; @@ -227,6 +248,25 @@ static const struct kernfs_ops sysfs_file_kfops_rw = { .write = sysfs_kf_write, };
+static const struct kernfs_ops sysfs_locked_kfops_ro = { + .seq_show = sysfs_kf_seq_show, + .open = sysfs_kf_setup_lock, + .release = sysfs_kf_free_lock, +}; + +static const struct kernfs_ops sysfs_locked_kfops_wo = { + .write = sysfs_kf_write, + .open = sysfs_kf_setup_lock, + .release = sysfs_kf_free_lock, +}; + +static const struct kernfs_ops sysfs_locked_kfops_rw = { + .seq_show = sysfs_kf_seq_show, + .write = sysfs_kf_write, + .open = sysfs_kf_setup_lock, + .release = sysfs_kf_free_lock, +}; + static const struct kernfs_ops sysfs_prealloc_kfops_ro = { .read = sysfs_kf_read, .prealloc = true, @@ -287,6 +327,13 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, ops = &sysfs_prealloc_kfops_ro; else if (sysfs_ops->store) ops = &sysfs_prealloc_kfops_wo; + } else if (mode & SYSFS_LOCKED) { + if (sysfs_ops->show && sysfs_ops->store) + ops = &sysfs_locked_kfops_rw; + else if (sysfs_ops->show) + ops = &sysfs_locked_kfops_ro; + else if (sysfs_ops->store) + ops = &sysfs_locked_kfops_wo; } else { if (sysfs_ops->show && sysfs_ops->store) ops = &sysfs_file_kfops_rw; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index d22ad67a0f32..0158367866be 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -68,11 +68,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, continue; }
- WARN(mode & ~(SYSFS_PREALLOC | 0664), + WARN(mode & ~(SYSFS_PREALLOC | SYSFS_LOCKED | 0664), "Attribute %s: Invalid permissions 0%o\n", (*attr)->name, mode);
- mode &= SYSFS_PREALLOC | 0664; + mode &= SYSFS_PREALLOC | SYSFS_LOCKED | 0664; error = sysfs_add_file_mode_ns(parent, *attr, mode, uid, gid, NULL); if (unlikely(error)) diff --git a/include/linux/device.h b/include/linux/device.h index 34eb20f5966f..c38c33bed333 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -180,6 +180,9 @@ ssize_t device_show_string(struct device *dev, struct device_attribute *attr, #define DEVICE_ATTR_RW(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RW(_name)
+#define DEVICE_ATTR_LOCKED_RW(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_LOCKED_RW(_name) + /** * DEVICE_ATTR_ADMIN_RW - Define an admin-only read-write device attribute. * @_name: Attribute name. diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 87c79d076d6d..df6828a7cd3e 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -257,6 +257,7 @@ struct kernfs_open_file {
/* private fields, do not use outside kernfs proper */ struct mutex mutex; + struct mutex *op_mutex; struct mutex prealloc_mutex; int event; struct list_head list; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c4e64dc11206..981588c9c673 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -103,6 +103,7 @@ struct attribute_group {
#define SYSFS_PREALLOC 010000 #define SYSFS_GROUP_INVISIBLE 020000 +#define SYSFS_LOCKED 040000
/* * DEFINE_SYSFS_GROUP_VISIBLE(name): @@ -230,6 +231,13 @@ struct attribute_group { .store = _store, \ }
+#define __ATTR_LOCKED(_name, _mode, _show, _store) { \ + .attr = {.name = __stringify(_name), \ + .mode = SYSFS_LOCKED | VERIFY_OCTAL_PERMISSIONS(_mode) },\ + .show = _show, \ + .store = _store, \ +} + #define __ATTR_RO(_name) { \ .attr = { .name = __stringify(_name), .mode = 0444 }, \ .show = _name##_show, \ @@ -255,6 +263,8 @@ struct attribute_group {
#define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store)
+#define __ATTR_LOCKED_RW(_name) __ATTR_LOCKED(_name, 0644, _name##_show, _name##_store) + #define __ATTR_NULL { .attr = { .name = NULL } }
#ifdef CONFIG_DEBUG_LOCK_ALLOC