For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs allocated per node for a kmem_cache. Thus, slabs_node() in __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy() will always return 0 for such config. This is wrong and can cause issues for all users of these functions.
Infact in [1] Jason has reported a system crash while using SLUB without CONFIG_SLUB_DEBUG. The reason was the usage of slabs_node() by __kmem_cache_empty().
The right solution is to make slabs_node() work even for !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node slab counters if !SLUB_DEBUG") had put the per node slab counter under CONFIG_SLUB_DEBUG because it was only read through sysfs API and the sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the per node slab counter assumed that it will work in the absence of CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please note that commit f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") exposed this issue but it is present even before.
[1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg...
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Signed-off-by: Shakeel Butt shakeelb@google.com Suggested-by: David Rientjes rientjes@google.com Reported-by: Jason A . Donenfeld Jason@zx2c4.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: stable@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org --- mm/slab.h | 2 +- mm/slub.c | 80 +++++++++++++++++++++++++------------------------------ 2 files changed, 38 insertions(+), 44 deletions(-)
diff --git a/mm/slab.h b/mm/slab.h index 68bdf498da3b..a6545332cc86 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -473,8 +473,8 @@ struct kmem_cache_node { #ifdef CONFIG_SLUB unsigned long nr_partial; struct list_head partial; -#ifdef CONFIG_SLUB_DEBUG atomic_long_t nr_slabs; +#ifdef CONFIG_SLUB_DEBUG atomic_long_t total_objects; struct list_head full; #endif diff --git a/mm/slub.c b/mm/slub.c index a3b8467c14af..c9c190d54687 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1030,42 +1030,6 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct list_del(&page->lru); }
-/* Tracking of the number of slabs for debugging purposes */ -static inline unsigned long slabs_node(struct kmem_cache *s, int node) -{ - struct kmem_cache_node *n = get_node(s, node); - - return atomic_long_read(&n->nr_slabs); -} - -static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) -{ - return atomic_long_read(&n->nr_slabs); -} - -static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) -{ - struct kmem_cache_node *n = get_node(s, node); - - /* - * May be called early in order to allocate a slab for the - * kmem_cache_node structure. Solve the chicken-egg - * dilemma by deferring the increment of the count during - * bootstrap (see early_kmem_cache_node_alloc). - */ - if (likely(n)) { - atomic_long_inc(&n->nr_slabs); - atomic_long_add(objects, &n->total_objects); - } -} -static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) -{ - struct kmem_cache_node *n = get_node(s, node); - - atomic_long_dec(&n->nr_slabs); - atomic_long_sub(objects, &n->total_objects); -} - /* Object debug checks for alloc/free paths */ static void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) @@ -1321,16 +1285,46 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#define disable_higher_order_debug 0
+#endif /* CONFIG_SLUB_DEBUG */ + static inline unsigned long slabs_node(struct kmem_cache *s, int node) - { return 0; } +{ + struct kmem_cache_node *n = get_node(s, node); + + return atomic_long_read(&n->nr_slabs); +} + static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) - { return 0; } -static inline void inc_slabs_node(struct kmem_cache *s, int node, - int objects) {} -static inline void dec_slabs_node(struct kmem_cache *s, int node, - int objects) {} +{ + return atomic_long_read(&n->nr_slabs); +}
-#endif /* CONFIG_SLUB_DEBUG */ +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) +{ + struct kmem_cache_node *n = get_node(s, node); + + /* + * May be called early in order to allocate a slab for the + * kmem_cache_node structure. Solve the chicken-egg + * dilemma by deferring the increment of the count during + * bootstrap (see early_kmem_cache_node_alloc). + */ + if (likely(n)) { + atomic_long_inc(&n->nr_slabs); +#ifdef CONFIG_SLUB_DEBUG + atomic_long_add(objects, &n->total_objects); +#endif + } +} +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) +{ + struct kmem_cache_node *n = get_node(s, node); + + atomic_long_dec(&n->nr_slabs); +#ifdef CONFIG_SLUB_DEBUG + atomic_long_sub(objects, &n->total_objects); +#endif +}
/* * Hooks for other subsystems that check memory allocations. In a typical
On Wed, 20 Jun 2018, Shakeel Butt wrote:
For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs allocated per node for a kmem_cache. Thus, slabs_node() in __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy() will always return 0 for such config. This is wrong and can cause issues for all users of these functions.
CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use the minimum amount of memory (embedded or some such thing).
The right solution is to make slabs_node() work even for !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node slab counters if !SLUB_DEBUG") had put the per node slab counter under CONFIG_SLUB_DEBUG because it was only read through sysfs API and the sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the per node slab counter assumed that it will work in the absence of CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please do not do this. Find a way to avoid these checks. The objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in debuggin checks etc etc in order to reduce the code/data footprint to the minimum necessary while sacrificing debuggability etc etc.
Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in use?
On Wed, Jun 20, 2018 at 6:15 PM Christopher Lameter cl@linux.com wrote:
On Wed, 20 Jun 2018, Shakeel Butt wrote:
For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs allocated per node for a kmem_cache. Thus, slabs_node() in __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy() will always return 0 for such config. This is wrong and can cause issues for all users of these functions.
CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use the minimum amount of memory (embedded or some such thing).
The right solution is to make slabs_node() work even for !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node slab counters if !SLUB_DEBUG") had put the per node slab counter under CONFIG_SLUB_DEBUG because it was only read through sysfs API and the sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the per node slab counter assumed that it will work in the absence of CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please do not do this. Find a way to avoid these checks. The objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in debuggin checks etc etc in order to reduce the code/data footprint to the minimum necessary while sacrificing debuggability etc etc.
Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in use?
Copying from the other thread:
On Wed, Jun 20, 2018 at 6:22 PM Jason A. Donenfeld Jason@zx2c4.com wrote:
On Thu, Jun 21, 2018 at 3:20 AM Christopher Lameter cl@linux.com wrote:
NAK. Its easier to simply not allow !CONFIG_SLUB_DEBUG for cgroups based configs because in that case you certainly have enough memory to include the runtime debug code as well as the extended counters.
FWIW, I ran into issues with a combination of KASAN+CONFIG_SLUB without having CONFIG_SLUB_DEBUG, because KASAN was using functions that were broken without CONFIG_SLUB_DEBUG, so while you're at it with creating dependencies, you might want to also say KASAN+CONFIG_SLUB ==> CONFIG_SLUB_DEBUG.
KASAN is the only user of __kmem_cache_empty(). So, enforcing KASAN+CONFIG_SLUB => CONFIG_SLUB_DEBUG makes sense but not sure about cgroups or memcg. Though is it ok let __kmem_cache_shrink() & __kmem_cache_shutdown() be broken for !CONFIG_SLUB_DEBUG?
For __kmem_cache_shutdown(), I can understand that shutting down a kmem_cache when there are still objects allocated from it, is broken and wrong. For __kmem_cache_shrink(), maybe wrong answer from it is tolerable.
Shakeel
On Thu 21-06-18 01:15:30, Cristopher Lameter wrote:
On Wed, 20 Jun 2018, Shakeel Butt wrote:
For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs allocated per node for a kmem_cache. Thus, slabs_node() in __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy() will always return 0 for such config. This is wrong and can cause issues for all users of these functions.
CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use the minimum amount of memory (embedded or some such thing).
I thought those would be using SLOB rather than SLUB.
The right solution is to make slabs_node() work even for !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node slab counters if !SLUB_DEBUG") had put the per node slab counter under CONFIG_SLUB_DEBUG because it was only read through sysfs API and the sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the per node slab counter assumed that it will work in the absence of CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please do not do this. Find a way to avoid these checks. The objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in debuggin checks etc etc in order to reduce the code/data footprint to the minimum necessary while sacrificing debuggability etc etc.
Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in use?
Why don't we simply remove the config option altogether and make it enabled effectively.
On Thu, Jun 21, 2018 at 8:01 AM Michal Hocko mhocko@kernel.org wrote:
On Thu 21-06-18 01:15:30, Cristopher Lameter wrote:
On Wed, 20 Jun 2018, Shakeel Butt wrote:
For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs allocated per node for a kmem_cache. Thus, slabs_node() in __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy() will always return 0 for such config. This is wrong and can cause issues for all users of these functions.
CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use the minimum amount of memory (embedded or some such thing).
I thought those would be using SLOB rather than SLUB.
The right solution is to make slabs_node() work even for !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node slab counters if !SLUB_DEBUG") had put the per node slab counter under CONFIG_SLUB_DEBUG because it was only read through sysfs API and the sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the per node slab counter assumed that it will work in the absence of CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please do not do this. Find a way to avoid these checks. The objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in debuggin checks etc etc in order to reduce the code/data footprint to the minimum necessary while sacrificing debuggability etc etc.
Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in use?
Why don't we simply remove the config option altogether and make it enabled effectively.
Christopher, how do you want to proceed? I don't have any strong opinion. I just don't want KASAN users kept broken for SLUB.
thanks, Shakeel
KASAN depends on having access to some of the accounting that SLUB_DEBUG does; without it, there are immediate crashes [1]. So, the natural thing to do is to make KASAN select SLUB_DEBUG.
[1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg...
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Cc: Shakeel Butt shakeelb@google.com Cc: David Rientjes rientjes@google.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: stable@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com --- lib/Kconfig.kasan | 1 + 1 file changed, 1 insertion(+)
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..c253c1b46c6b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB || (SLAB && !DEBUG_SLAB) + select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help
On Fri 22-06-18 17:46:23, Jason A. Donenfeld wrote:
KASAN depends on having access to some of the accounting that SLUB_DEBUG does; without it, there are immediate crashes [1]. So, the natural thing to do is to make KASAN select SLUB_DEBUG.
[1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg...
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Cc: Shakeel Butt shakeelb@google.com Cc: David Rientjes rientjes@google.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: stable@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com
This is the simplest way to do but I strongly suspect that the whole SLUB_DEBUG is not really necessary
Acked-by: Michal Hocko mhocko@suse.com
lib/Kconfig.kasan | 1 + 1 file changed, 1 insertion(+)
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..c253c1b46c6b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB || (SLAB && !DEBUG_SLAB)
- select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help
-- 2.17.1
On Fri, Jun 22, 2018 at 8:46 AM Jason A. Donenfeld Jason@zx2c4.com wrote:
KASAN depends on having access to some of the accounting that SLUB_DEBUG does; without it, there are immediate crashes [1]. So, the natural thing to do is to make KASAN select SLUB_DEBUG.
[1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg...
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Cc: Shakeel Butt shakeelb@google.com Cc: David Rientjes rientjes@google.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Andrey Ryabinin aryabinin@virtuozzo.com Cc: stable@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com
Reviewed-by: Shakeel Butt shakeelb@google.com
lib/Kconfig.kasan | 1 + 1 file changed, 1 insertion(+)
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..c253c1b46c6b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB || (SLAB && !DEBUG_SLAB)
select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help
-- 2.17.1
linux-stable-mirror@lists.linaro.org