From: Waiman Long longman@redhat.com
[ Upstream commit 0495e337b7039191dfce6e03f5f830454b1fae6b ]
A circular locking problem is reported by lockdep due to the following circular locking dependency.
+--> cpu_hotplug_lock --> slab_mutex --> kn->active --+ | | +-----------------------------------------------------+
The forward cpu_hotplug_lock ==> slab_mutex ==> kn->active dependency happens in
kmem_cache_destroy(): cpus_read_lock(); mutex_lock(&slab_mutex); ==> sysfs_slab_unlink() ==> kobject_del() ==> kernfs_remove() ==> __kernfs_remove() ==> kernfs_drain(): rwsem_acquire(&kn->dep_map, ...);
The backward kn->active ==> cpu_hotplug_lock dependency happens in
kernfs_fop_write_iter(): kernfs_get_active(); ==> slab_attr_store() ==> cpu_partial_store() ==> flush_all(): cpus_read_lock()
One way to break this circular locking chain is to avoid holding cpu_hotplug_lock and slab_mutex while deleting the kobject in sysfs_slab_unlink() which should be equivalent to doing a write_lock and write_unlock pair of the kn->active virtual lock.
Since the kobject structures are not protected by slab_mutex or the cpu_hotplug_lock, we can certainly release those locks before doing the delete operation.
Move sysfs_slab_unlink() and sysfs_slab_release() to the newly created kmem_cache_release() and call it outside the slab_mutex & cpu_hotplug_lock critical sections. There will be a slight delay in the deletion of sysfs files if kmem_cache_release() is called indirectly from a work function.
Fixes: 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context") Signed-off-by: Waiman Long longman@redhat.com Reviewed-by: Hyeonggon Yoo 42.hyeyoo@gmail.com Reviewed-by: Roman Gushchin roman.gushchin@linux.dev Acked-by: David Rientjes rientjes@google.com Link: https://lore.kernel.org/all/YwOImVd+nRUsSAga@hyeyoo/ Signed-off-by: Vlastimil Babka vbabka@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- mm/slab_common.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-)
diff --git a/mm/slab_common.c b/mm/slab_common.c index 77c3adf40e504..dbd4b6f9b0e79 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -420,6 +420,28 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, } EXPORT_SYMBOL(kmem_cache_create);
+#ifdef SLAB_SUPPORTS_SYSFS +/* + * For a given kmem_cache, kmem_cache_destroy() should only be called + * once or there will be a use-after-free problem. The actual deletion + * and release of the kobject does not need slab_mutex or cpu_hotplug_lock + * protection. So they are now done without holding those locks. + * + * Note that there will be a slight delay in the deletion of sysfs files + * if kmem_cache_release() is called indrectly from a work function. + */ +static void kmem_cache_release(struct kmem_cache *s) +{ + sysfs_slab_unlink(s); + sysfs_slab_release(s); +} +#else +static void kmem_cache_release(struct kmem_cache *s) +{ + slab_kmem_cache_release(s); +} +#endif + static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) { LIST_HEAD(to_destroy); @@ -446,11 +468,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) list_for_each_entry_safe(s, s2, &to_destroy, list) { debugfs_slab_release(s); kfence_shutdown_cache(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_release(s); -#else - slab_kmem_cache_release(s); -#endif + kmem_cache_release(s); } }
@@ -465,20 +483,11 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list);
if (s->flags & SLAB_TYPESAFE_BY_RCU) { -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_unlink(s); -#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { kfence_shutdown_cache(s); debugfs_slab_release(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_unlink(s); - sysfs_slab_release(s); -#else - slab_kmem_cache_release(s); -#endif }
return 0; @@ -493,14 +502,16 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s) { + int refcnt; + if (unlikely(!s) || !kasan_check_byte(s)) return;
cpus_read_lock(); mutex_lock(&slab_mutex);
- s->refcount--; - if (s->refcount) + refcnt = --s->refcount; + if (refcnt) goto out_unlock;
WARN(shutdown_cache(s), @@ -509,6 +520,8 @@ void kmem_cache_destroy(struct kmem_cache *s) out_unlock: mutex_unlock(&slab_mutex); cpus_read_unlock(); + if (!refcnt && !(s->flags & SLAB_TYPESAFE_BY_RCU)) + kmem_cache_release(s); } EXPORT_SYMBOL(kmem_cache_destroy);