Hi All,
This patchset is for KASAN backport to LSK 3.18. Any comments are appreciated.
Regards Alex
[PATCH 01/66] arm64: guard asm/assembler.h against multiple [PATCH 02/66] arm64: add macros for common adrp usages [PATCH 03/66] arm64: use ENDPIPROC() to annotate position independent [PATCH 04/66] arm64: pgalloc: consistently use PGALLOC_GFP [PATCH 05/66] arm64: move PGD_SIZE definition to pgalloc.h [PATCH 06/66] arm64: Change memcpy in kernel to use the copy template [PATCH 07/66] mm: add utility for early copy from unmapped ram [PATCH 08/66] arm64: support initrd outside kernel linear map [PATCH 09/66] arm64: introduce VA_START macro - the first kernel [PATCH 10/66] arm64: Move some head.text functions to executable [PATCH 11/66] MODULE_DEVICE_TABLE: fix some callsites [PATCH 12/66] compiler: introduce __alias(symbol) shortcut [PATCH 13/66] kasan: add kernel address sanitizer infrastructure [PATCH 14/66] kasan: disable memory hotplug [PATCH 15/66] x86_64: add KASan support [PATCH 16/66] mm: page_alloc: add kasan hooks on alloc and free paths [PATCH 17/66] mm: slub: introduce virt_to_obj function [PATCH 18/66] mm: slub: share object_err function [PATCH 19/66] mm: slub: introduce [PATCH 20/66] mm: slub: add kernel address sanitizer support for slub [PATCH 21/66] fs: dcache: manually unpoison dname after allocation to [PATCH 22/66] kmemleak: disable kasan instrumentation for kmemleak [PATCH 23/66] lib: add kasan test module [PATCH 24/66] x86_64: kasan: add interceptors for [PATCH 25/66] kasan: enable stack instrumentation [PATCH 26/66] mm: vmalloc: add flag preventing guard hole allocation [PATCH 27/66] mm: vmalloc: pass additional vm_flags to [PATCH 28/66] kernel: add support for .init_array.* constructors [PATCH 29/66] module: fix types of device tables aliases [PATCH 30/66] kasan: enable instrumentation of global variables [PATCH 31/66] kasan, module, vmalloc: rework shadow allocation for [PATCH 32/66] kasan, module: move MODULE_ALIGN macro into [PATCH 33/66] kasan: Makefile: shut up warnings if [PATCH 34/66] kasan: show gcc version requirements in Kconfig and [PATCH 35/66] kasan: remove duplicate definition of the macro [PATCH 36/66] x86/kasan: Define KASAN_SHADOW_OFFSET per architecture [PATCH 37/66] x86/kasan, mm: Introduce generic [PATCH 38/66] mm, mempool: do not allow atomic resizing [PATCH 39/66] mm, mempool: disallow mempools based on slab caches [PATCH 40/66] mm, mempool: poison elements backed by slab allocator [PATCH 41/66] mm/mempool.c: kasan: poison mempool elements [PATCH 42/66] mm/mempool: allow NULL `pool' pointer in [PATCH 43/66] mm/mempool: avoid KASAN marking mempool poison checks [PATCH 44/66] mm/mempool.c: kasan: poison mempool elements [PATCH 45/66] kasan: fix last shadow judgement in [PATCH 46/66] arm64: add KASAN support [PATCH 47/66] arm64: kasan: fix issues reported by sparse [PATCH 48/66] arm64: KASAN depends on !(ARM64_16K_PAGES && [PATCH 49/66] mm/kasan: rename kasan_enabled() to [PATCH 50/66] mm/kasan: MODULE_VADDR is not available on all archs [PATCH 51/66] mm/kasan: don't use kasan shadow pointer in generic [PATCH 52/66] mm/kasan: prevent deadlock in kasan reporting [PATCH 53/66] kasan: update reported bug types for not user nor [PATCH 54/66] kasan: update reported bug types for kernel memory [PATCH 55/66] kasan: accurately determine the type of the bad access [PATCH 56/66] kasan: update log messages [PATCH 57/66] kasan: various fixes in documentation [PATCH 58/66] kasan: move KASAN_SANITIZE in arch/x86/boot/Makefile [PATCH 59/66] kasan: update reference to kasan prototype repo [PATCH 60/66] lib: test_kasan: add some testcases [PATCH 61/66] kasan: Fix a type conversion error [PATCH 62/66] kasan: use IS_ALIGNED in memory_is_poisoned_8() [PATCH 63/66] mm, slub, kasan: enable user tracking by default with [PATCH 64/66] kasan: always taint kernel on report [PATCH 65/66] kasan: fix kmemleak false-positive in [PATCH 66/66] kasan: add functions to clear stack poison
From: Marc Zyngier marc.zyngier@arm.com
asm/assembler.h lacks the usual guard against multiple inclusion, leading to a compilation failure if it is accidentally included twice.
Using the classic #ifndef/#define/#endif construct solves the issue.
Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Will Deacon will.deacon@arm.com (cherry picked from commit f3e39273e0a9a5c9dc78cd667ec3663e97e0e989) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/assembler.h | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 5901480..750bac4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -20,6 +20,9 @@ #error "Only include this from assembly code" #endif
+#ifndef __ASM_ASSEMBLER_H +#define __ASM_ASSEMBLER_H + #include <asm/ptrace.h> #include <asm/thread_info.h>
@@ -155,3 +158,5 @@ lr .req x30 // link register #endif orr \rd, \lbits, \hbits, lsl #32 .endm + +#endif /* __ASM_ASSEMBLER_H */
From: Ard Biesheuvel ard.biesheuvel@linaro.org
The adrp instruction is mostly used in combination with either an add, a ldr or a str instruction with the low bits of the referenced symbol in the 12-bit immediate of the followup instruction.
Introduce the macros adr_l, ldr_l and str_l that encapsulate these common patterns.
Tested-by: Mark Rutland mark.rutland@arm.com Reviewed-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Will Deacon will.deacon@arm.com (cherry picked from commit b784a5d97d0af4835dd0125a3e0e5d0fd48128d6) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/assembler.h | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 750bac4..144b64a 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -159,4 +159,52 @@ lr .req x30 // link register orr \rd, \lbits, \hbits, lsl #32 .endm
+/* + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where + * <symbol> is within the range +/- 4 GB of the PC. + */ + /* + * @dst: destination register (64 bit wide) + * @sym: name of the symbol + * @tmp: optional scratch register to be used if <dst> == sp, which + * is not allowed in an adrp instruction + */ + .macro adr_l, dst, sym, tmp= + .ifb \tmp + adrp \dst, \sym + add \dst, \dst, :lo12:\sym + .else + adrp \tmp, \sym + add \dst, \tmp, :lo12:\sym + .endif + .endm + + /* + * @dst: destination register (32 or 64 bit wide) + * @sym: name of the symbol + * @tmp: optional 64-bit scratch register to be used if <dst> is a + * 32-bit wide register, in which case it cannot be used to hold + * the address + */ + .macro ldr_l, dst, sym, tmp= + .ifb \tmp + adrp \dst, \sym + ldr \dst, [\dst, :lo12:\sym] + .else + adrp \tmp, \sym + ldr \dst, [\tmp, :lo12:\sym] + .endif + .endm + + /* + * @src: source register (32 or 64 bit wide) + * @sym: name of the symbol + * @tmp: mandatory 64-bit scratch register to calculate the address + * while <src> needs to be preserved. + */ + .macro str_l, src, sym, tmp + adrp \tmp, \sym + str \src, [\tmp, :lo12:\sym] + .endm + #endif /* __ASM_ASSEMBLER_H */
From: Ard Biesheuvel ard.biesheuvel@linaro.org
For more control over which functions are called with the MMU off or with the UEFI 1:1 mapping active, annotate some assembler routines as position independent. This is done by introducing ENDPIPROC(), which replaces the ENDPROC() declaration of those routines.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com (cherry picked from commit 207918461eb0aca720fddec5da79bc71c133b9f1) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/assembler.h | 11 +++++++++++ arch/arm64/lib/memchr.S | 2 +- arch/arm64/lib/memcmp.S | 2 +- arch/arm64/lib/memcpy.S | 2 +- arch/arm64/lib/memmove.S | 2 +- arch/arm64/lib/memset.S | 2 +- arch/arm64/lib/strcmp.S | 2 +- arch/arm64/lib/strlen.S | 2 +- arch/arm64/lib/strncmp.S | 2 +- arch/arm64/mm/cache.S | 10 +++++----- 10 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 144b64a..3579988 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -207,4 +207,15 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm
+/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define ENDPIPROC(x) \ + .globl __pi_##x; \ + .type __pi_##x, %function; \ + .set __pi_##x, x; \ + .size __pi_##x, . - x; \ + ENDPROC(x) + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 8636b75..4444c1d 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -41,4 +41,4 @@ ENTRY(memchr) ret 2: mov x0, #0 ret -ENDPROC(memchr) +ENDPIPROC(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 6ea0776..ffbdec0 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPROC(memcmp) +ENDPIPROC(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 8a9a96d..5d6ddb7 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -198,4 +198,4 @@ ENTRY(memcpy) tst count, #0x3f b.ne .Ltail63 ret -ENDPROC(memcpy) +ENDPIPROC(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 57b19ea2..68e2f20 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -194,4 +194,4 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPROC(memmove) +ENDPIPROC(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 7c72dfd3..29f405f 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -213,4 +213,4 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPROC(memset) +ENDPIPROC(memset) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 42f828b..471fe61 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPROC(strcmp) +ENDPIPROC(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 987b68b..55ccc8e 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPROC(strlen) +ENDPIPROC(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index 0224cf5..e267044 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPROC(strncmp) +ENDPIPROC(strncmp) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 8eaf185..7bc9e50 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -167,7 +167,7 @@ ENTRY(__flush_dcache_area) b.lo 1b dsb sy ret -ENDPROC(__flush_dcache_area) +ENDPIPROC(__flush_dcache_area)
/* * __inval_cache_range(start, end) @@ -200,7 +200,7 @@ __dma_inv_range: b.lo 2b dsb sy ret -ENDPROC(__inval_cache_range) +ENDPIPROC(__inval_cache_range) ENDPROC(__dma_inv_range)
/* @@ -235,7 +235,7 @@ ENTRY(__dma_flush_range) b.lo 1b dsb sy ret -ENDPROC(__dma_flush_range) +ENDPIPROC(__dma_flush_range)
/* * __dma_map_area(start, size, dir) @@ -248,7 +248,7 @@ ENTRY(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_range b __dma_clean_range -ENDPROC(__dma_map_area) +ENDPIPROC(__dma_map_area)
/* * __dma_unmap_area(start, size, dir) @@ -261,4 +261,4 @@ ENTRY(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_range ret -ENDPROC(__dma_unmap_area) +ENDPIPROC(__dma_unmap_area)
From: Mark Rutland mark.rutland@arm.com
We currently allocate different levels of page tables with a variety of differing flags, and the PGALLOC_GFP flags, intended for use when allocating any level of page table, are only used for ptes in pte_alloc_one. On x86, PGALLOC_GFP is used for all page table allocations.
Currently the major differences are:
* __GFP_NOTRACK -- Needed to ensure page tables are always accessible in the presence of kmemcheck to prevent recursive faults. Currently kmemcheck cannot be selected for arm64.
* __GFP_REPEAT -- Causes the allocator to try to reclaim pages and retry upon a failure to allocate.
* __GFP_ZERO -- Sometimes passed explicitly, sometimes zalloc variants are used.
While we've no encountered issues so far, it would be preferable to be consistent. This patch ensures all levels of table are allocated in the same manner, with PGALLOC_GFP.
Cc: Steve Capper steve.capper@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Will Deacon will.deacon@arm.com (cherry picked from commit 15670ef1eac9817cf48da12c885aabcdd88e9add) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/pgalloc.h | 8 ++++---- arch/arm64/mm/pgd.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index d5bed02..e20df38 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,11 +26,13 @@
#define check_pgt_cache() do { } while (0)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + #if CONFIG_ARM64_PGTABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)__get_free_page(PGALLOC_GFP); }
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) @@ -50,7 +52,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pud_t *)__get_free_page(PGALLOC_GFP); }
static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -69,8 +71,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) - static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 6682b36..71ca104 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -35,9 +35,9 @@ static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) { if (PGD_SIZE == PAGE_SIZE) - return (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)__get_free_page(PGALLOC_GFP); else - return kmem_cache_zalloc(pgd_cache, GFP_KERNEL); + return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); }
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
From: Andrey Ryabinin ryabinin.a.a@gmail.com
This will be used by KASAN latter.
Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com Acked-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com (cherry picked from commit fd2203dd3556f6553231fa026060793e67a25ce6) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/pgalloc.h | 1 + arch/arm64/mm/pgd.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e20df38..e838b9ad 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,6 +27,7 @@ #define check_pgt_cache() do { } while (0)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_ARM64_PGTABLE_LEVELS > 2
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 71ca104..cb3ba1b 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -28,8 +28,6 @@
#include "mm.h"
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) - static struct kmem_cache *pgd_cache;
pgd_t *pgd_alloc(struct mm_struct *mm)
From: Feng Kan fkan@apm.com
This converts the memcpy.S to use the copy template file. The copy template file was based originally on the memcpy.S
Signed-off-by: Feng Kan fkan@apm.com Signed-off-by: Balamurugan Shanmugam bshanmugam@apm.com [catalin.marinas@arm.com: removed tmp3(w) .req statements as they are not used] Signed-off-by: Catalin Marinas catalin.marinas@arm.com
(cherry picked from commit e5c88e3f2fb35dca5f3e46d65095bf5d008595b7) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/lib/copy_template.S | 193 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/lib/memcpy.S | 179 ++++++-------------------------------- 2 files changed, 219 insertions(+), 153 deletions(-) create mode 100644 arch/arm64/lib/copy_template.S
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S new file mode 100644 index 0000000..410fbdb --- /dev/null +++ b/arch/arm64/lib/copy_template.S @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 5d6ddb7..36a6a62 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -36,166 +36,39 @@ * Returns: * x0 - dest */ -dstin .req x0 -src .req x1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -tmp3 .req x5 -tmp3w .req w5 -dst .req x6 + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm
-A_l .req x7 -A_h .req x8 -B_l .req x9 -B_h .req x10 -C_l .req x11 -C_h .req x12 -D_l .req x13 -D_h .req x14 + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm
-ENTRY(memcpy) - mov dst, dstin - cmp count, #16 - /*When memory length is less than 16, the accessed are not aligned.*/ - b.lo .Ltiny15 + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm
- neg tmp2, src - ands tmp2, tmp2, #15/* Bytes to reach alignment. */ - b.eq .LSrcAligned - sub count, count, tmp2 - /* - * Copy the leading memory data from src to dst in an increasing - * address order.By this way,the risk of overwritting the source - * memory data is eliminated when the distance between src and - * dst is less than 16. The memory accesses here are alignment. - */ - tbz tmp2, #0, 1f - ldrb tmp1w, [src], #1 - strb tmp1w, [dst], #1 -1: - tbz tmp2, #1, 2f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -2: - tbz tmp2, #2, 3f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -3: - tbz tmp2, #3, .LSrcAligned - ldr tmp1, [src],#8 - str tmp1, [dst],#8 + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm
-.LSrcAligned: - cmp count, #64 - b.ge .Lcpy_over64 - /* - * Deal with small copies quickly by dropping straight into the - * exit block. - */ -.Ltail63: - /* - * Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. - */ - ands tmp1, count, #0x30 - b.eq .Ltiny15 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -1: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -2: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -.Ltiny15: - /* - * Prefer to break one ldp/stp into several load/store to access - * memory in an increasing address order,rather than to load/store 16 - * bytes from (src-16) to (dst-16) and to backward the src to aligned - * address,which way is used in original cortex memcpy. If keeping - * the original memcpy process here, memmove need to satisfy the - * precondition that src address is at least 16 bytes bigger than dst - * address,otherwise some source data will be overwritten when memove - * call memcpy directly. To make memmove simpler and decouple the - * memcpy's dependency on memmove, withdrew the original process. - */ - tbz count, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz count, #2, 2f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -2: - tbz count, #1, 3f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -3: - tbz count, #0, .Lexitfunc - ldrb tmp1w, [src] - strb tmp1w, [dst] + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm
-.Lexitfunc: - ret + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm
-.Lcpy_over64: - subs count, count, #128 - b.ge .Lcpy_body_large - /* - * Less than 128 bytes to copy, so handle 64 here and then jump - * to the tail. - */ - ldp A_l, A_h, [src],#16 - stp A_l, A_h, [dst],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - ldp D_l, D_h, [src],#16 - stp D_l, D_h, [dst],#16 + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm
- tst count, #0x3f - b.ne .Ltail63 - ret + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm
- /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lcpy_body_large: - /* pre-get 64 bytes data. */ - ldp A_l, A_h, [src],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - ldp D_l, D_h, [src],#16 -1: - /* - * interlace the load of next 64 bytes data block with store of the last - * loaded 64 bytes data. - */ - stp A_l, A_h, [dst],#16 - ldp A_l, A_h, [src],#16 - stp B_l, B_h, [dst],#16 - ldp B_l, B_h, [src],#16 - stp C_l, C_h, [dst],#16 - ldp C_l, C_h, [src],#16 - stp D_l, D_h, [dst],#16 - ldp D_l, D_h, [src],#16 - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - stp D_l, D_h, [dst],#16 - - tst count, #0x3f - b.ne .Ltail63 +ENTRY(memcpy) +#include "copy_template.S" ret ENDPIPROC(memcpy)
From: Mark Salter msalter@redhat.com
When booting an arm64 kernel w/initrd using UEFI/grub, use of mem= will likely cut off part or all of the initrd. This leaves it outside the kernel linear map which leads to failure when unpacking. The x86 code has a similar need to relocate an initrd outside of mapped memory in some cases.
The current x86 code uses early_memremap() to copy the original initrd from unmapped to mapped RAM. This patchset creates a generic copy_from_early_mem() utility based on that x86 code and has arm64 and x86 share it in their respective initrd relocation code.
This patch (of 3):
In some early boot circumstances, it may be necessary to copy from RAM outside the kernel linear mapping to mapped RAM. The need to relocate an initrd is one example in the x86 code. This patch creates a helper function based on current x86 code.
Signed-off-by: Mark Salter msalter@redhat.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Will Deacon will.deacon@arm.com Cc: Arnd Bergmann arnd@arndb.de Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Mark Rutland mark.rutland@arm.com Cc: Russell King rmk@arm.linux.org.uk Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Yinghai Lu yinghai@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 6b0f68e32ea8749ff7d4a66cd5761e915e48e59d) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/asm-generic/early_ioremap.h | 6 ++++++ mm/early_ioremap.c | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+)
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index a5de55c..e539f27 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h @@ -33,6 +33,12 @@ extern void early_ioremap_setup(void); */ extern void early_ioremap_reset(void);
+/* + * Early copy from unmapped memory to kernel mapped memory. + */ +extern void copy_from_early_mem(void *dest, phys_addr_t src, + unsigned long size); + #else static inline void early_ioremap_init(void) { } static inline void early_ioremap_setup(void) { } diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index e10ccd2..a0baeb4 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -217,6 +217,28 @@ early_memremap(resource_size_t phys_addr, unsigned long size) return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_NORMAL); } + +#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) + +void __init copy_from_early_mem(void *dest, phys_addr_t src, unsigned long size) +{ + unsigned long slop, clen; + char *p; + + while (size) { + slop = src & ~PAGE_MASK; + clen = size; + if (clen > MAX_MAP_CHUNK - slop) + clen = MAX_MAP_CHUNK - slop; + p = early_memremap(src & PAGE_MASK, clen + slop); + memcpy(dest, p + slop, clen); + early_memunmap(p, clen + slop); + dest += clen; + src += clen; + size -= clen; + } +} + #else /* CONFIG_MMU */
void __init __iomem *
From: Mark Salter msalter@redhat.com
The use of mem= could leave part or all of the initrd outside of the kernel linear map. This will lead to an error when unpacking the initrd and a probable failure to boot. This patch catches that situation and relocates the initrd to be fully within the linear map.
Signed-off-by: Mark Salter msalter@redhat.com Acked-by: Will Deacon will.deacon@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Arnd Bergmann arnd@arndb.de Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Mark Rutland mark.rutland@arm.com Cc: Russell King rmk@arm.linux.org.uk Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Yinghai Lu yinghai@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 1570f0d7ab425c1e0905715bf9cc98b2a82e723f) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/kernel/setup.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b5853ee..8669e4e 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -363,6 +363,67 @@ static void __init request_standard_resources(void) } }
+#ifdef CONFIG_BLK_DEV_INITRD +/* + * Relocate initrd if it is not completely within the linear mapping. + * This would be the case if mem= cuts out all or part of it. + */ +static void __init relocate_initrd(void) +{ + phys_addr_t orig_start = __virt_to_phys(initrd_start); + phys_addr_t orig_end = __virt_to_phys(initrd_end); + phys_addr_t ram_end = memblock_end_of_DRAM(); + phys_addr_t new_start; + unsigned long size, to_free = 0; + void *dest; + + if (orig_end <= ram_end) + return; + + /* + * Any of the original initrd which overlaps the linear map should + * be freed after relocating. + */ + if (orig_start < ram_end) + to_free = ram_end - orig_start; + + size = orig_end - orig_start; + + /* initrd needs to be relocated completely inside linear mapping */ + new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), + size, PAGE_SIZE); + if (!new_start) + panic("Cannot relocate initrd of size %ld\n", size); + memblock_reserve(new_start, size); + + initrd_start = __phys_to_virt(new_start); + initrd_end = initrd_start + size; + + pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n", + orig_start, orig_start + size - 1, + new_start, new_start + size - 1); + + dest = (void *)initrd_start; + + if (to_free) { + memcpy(dest, (void *)__phys_to_virt(orig_start), to_free); + dest += to_free; + } + + copy_from_early_mem(dest, orig_start + to_free, size - to_free); + + if (to_free) { + pr_info("Freeing original RAMDISK from [%llx-%llx]\n", + orig_start, orig_start + to_free - 1); + memblock_free(orig_start, to_free); + } +} +#else +static inline void __init relocate_initrd(void) +{ +} +#endif + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p) @@ -392,6 +453,7 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init();
paging_init(); + relocate_initrd(); request_standard_resources();
efi_idmap_init();
From: Andrey Ryabinin ryabinin.a.a@gmail.com
In order to not use lengthy (UL(0xffffffffffffffff) << VA_BITS) everywhere, replace it with VA_START.
Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com Reviewed-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com (cherry picked from commit 127db024a7baee9874014dac33628253f438b4da) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/memory.h | 2 ++ arch/arm64/include/asm/pgtable.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a62cd07..e4a2ef9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -36,12 +36,14 @@ * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. + * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) +#define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 41a43bf..2c77d4a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -41,7 +41,7 @@ * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) +#define VMALLOC_START (VA_START) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
From: Laura Abbott lauraa@codeaurora.org
The head.text section is intended to be run at early bootup before any of the regular kernel mappings have been setup. Parts of head.text may be freed back into the buddy allocator due to TEXT_OFFSET so for security requirements this memory must not be executable. The suspend/resume/hotplug code path requires some of these head.S functions to run however which means they need to be executable. Support these conflicting requirements by moving the few head.text functions that need to be executable to the text section which has the appropriate page table permissions.
Tested-by: Kees Cook keescook@chromium.org Reviewed-by: Mark Rutland mark.rutland@arm.com Tested-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Laura Abbott lauraa@codeaurora.org Signed-off-by: Will Deacon will.deacon@arm.com (cherry picked from commit 034edabe6cf1d0dea49d4c836ba128cec90fad04) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/kernel/head.S | 407 ++++++++++++++++++++++++----------------------- 1 file changed, 209 insertions(+), 198 deletions(-)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2877dd8..b9eb040 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -238,7 +238,13 @@ ENTRY(stext) mov x0, x22 bl lookup_processor_type mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? + /* + * __error_p may end up out of range for cbz if text areas are + * aligned up to section sizes. + */ + cbnz x23, 1f // invalid processor (x23=0)? + b __error_p +1: bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -250,13 +256,214 @@ ENTRY(stext) */ ldr x27, __switch_data // address to jump to after // MMU has been enabled - adr lr, __enable_mmu // return (PIC) address + adrp lr, __enable_mmu // return (PIC) address + add lr, lr, #:lo12:__enable_mmu ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys br x12 // initialise processor ENDPROC(stext)
/* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) +/* + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end + lsr \phys, \phys, #BLOCK_SHIFT + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + mov x0, x25 // idmap_pg_dir + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) + create_pgd_entry x0, x3, x5, x6 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + mov x0, x26 // swapper_pg_dir + mov x5, #PAGE_OFFSET + create_pgd_entry x0, x5, x3, x6 + ldr x6, =KERNEL_END + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __bss_start // x6 + .quad __bss_stop // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x6, x7, [x3], #16 +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * end early head section, begin head code that is also used for + * hotplug and needs to have the same protections as the text region + */ + .section ".text","ax" +/* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. * @@ -497,183 +704,6 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET
/* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index - add \tmp2, \tbl, #PAGE_SIZE - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm - -/* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. - * - * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 - */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 -#endif - .endm - -/* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). - * - * Preserves: tbl, flags - * Corrupts: phys, start, end, pstate - */ - .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT - and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT - and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: str \phys, [\tbl, \start, lsl #3] // store the entry - add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block - cmp \start, \end - b.ls 9999b - .endm - -/* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled, including the FDT blob (TTBR1) - * - pgd entry for fixed mappings (TTBR1) - */ -__create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses - mov x27, lr - - /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - /* - * Clear the idmap and swapper page tables. - */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b - - ldr x7, =MM_MMUFLAGS - - /* - * Create the identity mapping. - */ - mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END - mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET - create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END - mov x3, x24 // phys offset - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the FDT blob (maximum 2MB; must be within 512MB of - * PHYS_OFFSET). - */ - mov x3, x21 // FDT phys address - and x3, x3, #~((1 << 21) - 1) // 2MB aligned - mov x6, #PAGE_OFFSET - sub x5, x3, x24 // subtract PHYS_OFFSET - tst x5, #~((1 << 29) - 1) // within 512MB? - csel x21, xzr, x21, ne // zero the FDT pointer - b.ne 1f - add x5, x5, x6 // __va(FDT blob) - add x6, x5, #1 << 21 // 2MB for the FDT blob - sub x6, x6, #1 // inclusive range - create_block_map x0, x7, x3, x5, x6 -1: - /* - * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - mov lr, x27 - ret -ENDPROC(__create_page_tables) - .ltorg - - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - -/* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. - */ -__mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET - mov x29, #0 - b start_kernel -ENDPROC(__mmap_switched) - -/* * Exception handling. Something went wrong and we can't proceed. We ought to * tell the user, but since we don't have any guarantee that we're even * running on the right architecture, we do virtually nothing. @@ -720,22 +750,3 @@ __lookup_processor_type_data: .quad . .quad cpu_table .size __lookup_processor_type_data, . - __lookup_processor_type_data - -/* - * Determine validity of the x21 FDT pointer. - * The dtb must be 8-byte aligned and live in the first 512M of memory. - */ -__vet_fdt: - tst x21, #0x7 - b.ne 1f - cmp x21, x24 - b.lt 1f - mov x0, #(1 << 29) - add x0, x0, x24 - cmp x21, x0 - b.ge 1f - ret -1: - mov x21, #0 - ret -ENDPROC(__vet_fdt)
From: Andrew Morton akpm@linux-foundation.org
The patch "module: fix types of device tables aliases" newly requires that invocations of
MODULE_DEVICE_TABLE(type, name);
come *after* the definition of `name'. That is reasonable, but some drivers weren't doing this. Fix them.
Cc: James Bottomley James.Bottomley@HansenPartnership.com Cc: Andrey Ryabinin a.ryabinin@samsung.com Cc: David Miller davem@davemloft.net Cc: Hans Verkuil hverkuil@xs4all.nl Acked-by: Mauro Carvalho Chehab mchehab@osg.samsung.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 0f989f749b51ec1fd94bb5a42f8ad10c8b9f73cb) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/video4linux/v4l2-pci-skeleton.c | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 1 - drivers/scsi/be2iscsi/be_main.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/Documentation/video4linux/v4l2-pci-skeleton.c b/Documentation/video4linux/v4l2-pci-skeleton.c index 006721e..7bd1b97 100644 --- a/Documentation/video4linux/v4l2-pci-skeleton.c +++ b/Documentation/video4linux/v4l2-pci-skeleton.c @@ -42,7 +42,6 @@ MODULE_DESCRIPTION("V4L2 PCI Skeleton Driver"); MODULE_AUTHOR("Hans Verkuil"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, skeleton_pci_tbl);
/** * struct skeleton - All internal data for one instance of device @@ -95,6 +94,7 @@ static const struct pci_device_id skeleton_pci_tbl[] = { /* { PCI_DEVICE(PCI_VENDOR_ID_, PCI_DEVICE_ID_) }, */ { 0, } }; +MODULE_DEVICE_TABLE(pci, skeleton_pci_tbl);
/* * HDTV: this structure has the capabilities of the HDTV receiver. diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index fdd3679..1f65817 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -26,7 +26,6 @@ #include <net/vxlan.h>
MODULE_VERSION(DRV_VER); -MODULE_DEVICE_TABLE(pci, be_dev_ids); MODULE_DESCRIPTION(DRV_DESC " " DRV_VER); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index be4586b..dd0820f 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -48,7 +48,6 @@ static unsigned int be_iopoll_budget = 10; static unsigned int be_max_phys_size = 64; static unsigned int enable_msix = 1;
-MODULE_DEVICE_TABLE(pci, beiscsi_pci_id_table); MODULE_DESCRIPTION(DRV_DESC " " BUILD_STR); MODULE_VERSION(BUILD_STR); MODULE_AUTHOR("Emulex Corporation");
From: Andrey Ryabinin a.ryabinin@samsung.com
To be consistent with other compiler attributes introduce __alias(symbol) macro expanding into __attribute__((alias(#symbol)))
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit cb4188ac8e5779f66b9f55888ac2c75b391cde44) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/compiler-gcc.h | 1 + 1 file changed, 1 insertion(+)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 02ae99e..cdf13ca 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,7 @@ #define __deprecated __attribute__((deprecated)) #define __packed __attribute__((packed)) #define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol)))
/* * it doesn't make sense on ARM (currently the only user of __naked) to trace
From: Andrey Ryabinin a.ryabinin@samsung.com
Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides fast and comprehensive solution for finding use-after-free and out-of-bounds bugs.
KASAN uses compile-time instrumentation for checking every memory access, therefore GCC > v4.9.2 required. v4.9.2 almost works, but has issues with putting symbol aliases into the wrong section, which breaks kasan instrumentation of globals.
This patch only adds infrastructure for kernel address sanitizer. It's not available for use yet. The idea and some code was borrowed from [1].
Basic idea:
The main idea of KASAN is to use shadow memory to record whether each byte of memory is safe to access or not, and use compiler's instrumentation to check the shadow memory on each memory access.
Address sanitizer uses 1/8 of the memory addressable in kernel for shadow memory and uses direct mapping with a scale and offset to translate a memory address to its corresponding shadow address.
Here is function to translate address to corresponding shadow address:
unsigned long kasan_mem_to_shadow(unsigned long addr) { return (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; }
where KASAN_SHADOW_SCALE_SHIFT = 3.
So for every 8 bytes there is one corresponding byte of shadow memory. The following encoding used for each shadow byte: 0 means that all 8 bytes of the corresponding memory region are valid for access; k (1 <= k <= 7) means that the first k bytes are valid for access, and other (8 - k) bytes are not; Any negative value indicates that the entire 8-bytes are inaccessible. Different negative values used to distinguish between different kinds of inaccessible memory (redzones, freed memory) (see mm/kasan/kasan.h).
To be able to detect accesses to bad memory we need a special compiler. Such compiler inserts a specific function calls (__asan_load*(addr), __asan_store*(addr)) before each memory access of size 1, 2, 4, 8 or 16.
These functions check whether memory region is valid to access or not by checking corresponding shadow memory. If access is not valid an error printed.
Historical background of the address sanitizer from Dmitry Vyukov:
"We've developed the set of tools, AddressSanitizer (Asan), ThreadSanitizer and MemorySanitizer, for user space. We actively use them for testing inside of Google (continuous testing, fuzzing, running prod services). To date the tools have found more than 10'000 scary bugs in Chromium, Google internal codebase and various open-source projects (Firefox, OpenSSL, gcc, clang, ffmpeg, MySQL and lots of others): [2] [3] [4]. The tools are part of both gcc and clang compilers.
We have not yet done massive testing under the Kernel AddressSanitizer (it's kind of chicken and egg problem, you need it to be upstream to start applying it extensively). To date it has found about 50 bugs. Bugs that we've found in upstream kernel are listed in [5]. We've also found ~20 bugs in out internal version of the kernel. Also people from Samsung and Oracle have found some.
[...]
As others noted, the main feature of AddressSanitizer is its performance due to inline compiler instrumentation and simple linear shadow memory. User-space Asan has ~2x slowdown on computational programs and ~2x memory consumption increase. Taking into account that kernel usually consumes only small fraction of CPU and memory when running real user-space programs, I would expect that kernel Asan will have ~10-30% slowdown and similar memory consumption increase (when we finish all tuning).
I agree that Asan can well replace kmemcheck. We have plans to start working on Kernel MemorySanitizer that finds uses of unitialized memory. Asan+Msan will provide feature-parity with kmemcheck. As others noted, Asan will unlikely replace debug slab and pagealloc that can be enabled at runtime. Asan uses compiler instrumentation, so even if it is disabled, it still incurs visible overheads.
Asan technology is easily portable to other architectures. Compiler instrumentation is fully portable. Runtime has some arch-dependent parts like shadow mapping and atomic operation interception. They are relatively easy to port."
Comparison with other debugging features: ========================================
KMEMCHECK:
- KASan can do almost everything that kmemcheck can. KASan uses compile-time instrumentation, which makes it significantly faster than kmemcheck. The only advantage of kmemcheck over KASan is detection of uninitialized memory reads.
Some brief performance testing showed that kasan could be x500-x600 times faster than kmemcheck:
$ netperf -l 30 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to localhost (127.0.0.1) port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec
no debug: 87380 16384 16384 30.00 41624.72
kasan inline: 87380 16384 16384 30.00 12870.54
kasan outline: 87380 16384 16384 30.00 10586.39
kmemcheck: 87380 16384 16384 30.03 20.23
- Also kmemcheck couldn't work on several CPUs. It always sets number of CPUs to 1. KASan doesn't have such limitation.
DEBUG_PAGEALLOC: - KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page granularity level, so it able to find more bugs.
SLUB_DEBUG (poisoning, redzones): - SLUB_DEBUG has lower overhead than KASan.
- SLUB_DEBUG in most cases are not able to detect bad reads, KASan able to detect both reads and writes.
- In some cases (e.g. redzone overwritten) SLUB_DEBUG detect bugs only on allocation/freeing of object. KASan catch bugs right before it will happen, so we always know exact place of first bad read/write.
[1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel [2] https://code.google.com/p/address-sanitizer/wiki/FoundBugs [3] https://code.google.com/p/thread-sanitizer/wiki/FoundBugs [4] https://code.google.com/p/memory-sanitizer/wiki/FoundBugs [5] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel#T...
Based on work by Andrey Konovalov.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Acked-by: Michal Marek mmarek@suse.cz Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Cc: Stephen Rothwell sfr@canb.auug.org.au Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 0b24becc810dc3be6e3f94103a866f214c282394) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/kasan.txt | 170 +++++++++++++++++++ Makefile | 3 +- drivers/firmware/efi/libstub/Makefile | 1 + include/linux/kasan.h | 46 ++++++ include/linux/sched.h | 3 + lib/Kconfig.debug | 2 + lib/Kconfig.kasan | 43 +++++ mm/Makefile | 1 + mm/kasan/Makefile | 8 + mm/kasan/kasan.c | 302 ++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 34 ++++ mm/kasan/report.c | 209 +++++++++++++++++++++++ scripts/Makefile.kasan | 24 +++ scripts/Makefile.lib | 10 ++ 14 files changed, 855 insertions(+), 1 deletion(-) create mode 100644 Documentation/kasan.txt create mode 100644 include/linux/kasan.h create mode 100644 lib/Kconfig.kasan create mode 100644 mm/kasan/Makefile create mode 100644 mm/kasan/kasan.c create mode 100644 mm/kasan/kasan.h create mode 100644 mm/kasan/report.c create mode 100644 scripts/Makefile.kasan
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt new file mode 100644 index 0000000..f0645a8 --- /dev/null +++ b/Documentation/kasan.txt @@ -0,0 +1,170 @@ +Kernel address sanitizer +================ + +0. Overview +=========== + +Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides +a fast and comprehensive solution for finding use-after-free and out-of-bounds +bugs. + +KASan uses compile-time instrumentation for checking every memory access, +therefore you will need a certain version of GCC >= 4.9.2 + +Currently KASan is supported only for x86_64 architecture and requires that the +kernel be built with the SLUB allocator. + +1. Usage +========= + +To enable KASAN configure kernel with: + + CONFIG_KASAN = y + +and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline +is compiler instrumentation types. The former produces smaller binary the +latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or +latter. + +Currently KASAN works only with the SLUB memory allocator. +For better bug detection and nicer report, enable CONFIG_STACKTRACE and put +at least 'slub_debug=U' in the boot cmdline. + +To disable instrumentation for specific files or directories, add a line +similar to the following to the respective kernel Makefile: + + For a single file (e.g. main.o): + KASAN_SANITIZE_main.o := n + + For all files in one directory: + KASAN_SANITIZE := n + +1.1 Error reports +========== + +A typical out of bounds access report looks like this: + +================================================================== +BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3 +Write of size 1 by task modprobe/1689 +============================================================================= +BUG kmalloc-128 (Not tainted): kasan error +----------------------------------------------------------------------------- + +Disabling lock debugging due to kernel taint +INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689 + __slab_alloc+0x4b4/0x4f0 + kmem_cache_alloc_trace+0x10b/0x190 + kmalloc_oob_right+0x3d/0x75 [test_kasan] + init_module+0x9/0x47 [test_kasan] + do_one_initcall+0x99/0x200 + load_module+0x2cb3/0x3b20 + SyS_finit_module+0x76/0x80 + system_call_fastpath+0x12/0x17 +INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080 +INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720 + +Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ +Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. +Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........ +Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ +CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 + ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78 + ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8 + ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558 +Call Trace: + [<ffffffff81cc68ae>] dump_stack+0x46/0x58 + [<ffffffff811fd848>] print_trailer+0xf8/0x160 + [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan] + [<ffffffff811ff0f5>] object_err+0x35/0x40 + [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan] + [<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0 + [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40 + [<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40 + [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40 + [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan] + [<ffffffff8120a995>] __asan_store1+0x75/0xb0 + [<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan] + [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan] + [<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan] + [<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan] + [<ffffffff810002d9>] do_one_initcall+0x99/0x200 + [<ffffffff811e4e5c>] ? __vunmap+0xec/0x160 + [<ffffffff81114f63>] load_module+0x2cb3/0x3b20 + [<ffffffff8110fd70>] ? m_show+0x240/0x240 + [<ffffffff81115f06>] SyS_finit_module+0x76/0x80 + [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17 +Memory state around the buggy address: + ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc + ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00 +>ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc + ^ + ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +First sections describe slub object where bad access happened. +See 'SLUB Debug output' section in Documentation/vm/slub.txt for details. + +In the last section the report shows memory state around the accessed address. +Reading this part requires some more understanding of how KASAN works. + +Each 8 bytes of memory are encoded in one shadow byte as accessible, +partially accessible, freed or they can be part of a redzone. +We use the following encoding for each shadow byte: 0 means that all 8 bytes +of the corresponding memory region are accessible; number N (1 <= N <= 7) means +that the first N bytes are accessible, and other (8 - N) bytes are not; +any negative value indicates that the entire 8-byte word is inaccessible. +We use different negative values to distinguish between different kinds of +inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h). + +In the report above the arrows point to the shadow byte 03, which means that +the accessed address is partially accessible. + + +2. Implementation details +======================== + +From a high level, our approach to memory error detection is similar to that +of kmemcheck: use shadow memory to record whether each byte of memory is safe +to access, and use compile-time instrumentation to check shadow memory on each +memory access. + +AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory +(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and +offset to translate a memory address to its corresponding shadow address. + +Here is the function witch translate an address to its corresponding shadow +address: + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +where KASAN_SHADOW_SCALE_SHIFT = 3. + +Compile-time instrumentation used for checking memory accesses. Compiler inserts +function calls (__asan_load*(addr), __asan_store*(addr)) before each memory +access of size 1, 2, 4, 8 or 16. These functions check whether memory access is +valid or not by checking corresponding shadow memory. + +GCC 5.0 has possibility to perform inline instrumentation. Instead of making +function calls GCC directly inserts the code to check the shadow memory. +This option significantly enlarges kernel but it gives x1.1-x2 performance +boost over outline instrumented kernel. diff --git a/Makefile b/Makefile index cdc9cf7..2756ef9 100644 --- a/Makefile +++ b/Makefile @@ -422,7 +422,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -775,6 +775,7 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO endif
+include $(srctree)/scripts/Makefile.kasan include $(srctree)/scripts/Makefile.extrawarn
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index b14bc2b..c5533c7 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -19,6 +19,7 @@ KBUILD_CFLAGS := $(cflags-y) \ $(call cc-option,-fno-stack-protector)
GCOV_PROFILE := n +KASAN_SANITIZE := n
lib-y := efi-stub-helper.o lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o diff --git a/include/linux/kasan.h b/include/linux/kasan.h new file mode 100644 index 0000000..9102fda --- /dev/null +++ b/include/linux/kasan.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_KASAN_H +#define _LINUX_KASAN_H + +#include <linux/types.h> + +struct kmem_cache; +struct page; + +#ifdef CONFIG_KASAN + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#include <asm/kasan.h> +#include <linux/sched.h> + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +/* Enable reporting bugs after kasan_disable_current() */ +static inline void kasan_enable_current(void) +{ + current->kasan_depth++; +} + +/* Disable reporting bugs for current task */ +static inline void kasan_disable_current(void) +{ + current->kasan_depth--; +} + +void kasan_unpoison_shadow(const void *address, size_t size); + +#else /* CONFIG_KASAN */ + +static inline void kasan_unpoison_shadow(const void *address, size_t size) {} + +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + +#endif /* CONFIG_KASAN */ + +#endif /* LINUX_KASAN_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2a0bcc8..af0e7ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1625,6 +1625,9 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns;
+#ifdef CONFIG_KASAN + unsigned int kasan_depth; +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4e35a5d..f9f6be1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -635,6 +635,8 @@ config DEBUG_STACKOVERFLOW
source "lib/Kconfig.kmemcheck"
+source "lib/Kconfig.kasan" + endmenu # "Memory Debugging"
config DEBUG_SHIRQ diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan new file mode 100644 index 0000000..e5b3fbe --- /dev/null +++ b/lib/Kconfig.kasan @@ -0,0 +1,43 @@ +config HAVE_ARCH_KASAN + bool + +if HAVE_ARCH_KASAN + +config KASAN + bool "KASan: runtime memory debugger" + help + Enables kernel address sanitizer - runtime memory debugger, + designed to find out-of-bounds accesses and use-after-free bugs. + This is strictly debugging feature. It consumes about 1/8 + of available memory and brings about ~x3 performance slowdown. + For better error detection enable CONFIG_STACKTRACE, + and add slub_debug=U to boot cmdline. + +config KASAN_SHADOW_OFFSET + hex + +choice + prompt "Instrumentation type" + depends on KASAN + default KASAN_OUTLINE + +config KASAN_OUTLINE + bool "Outline instrumentation" + help + Before every memory access compiler insert function call + __asan_load*/__asan_store*. These functions performs check + of shadow memory. This is slower than inline instrumentation, + however it doesn't bloat size of kernel's .text section so + much as inline does. + +config KASAN_INLINE + bool "Inline instrumentation" + help + Compiler directly inserts code checking shadow memory before + memory accesses. This is faster than outline (in some workloads + it gives about x2 boost over outline instrumentation), but + make kernel's .text size much bigger. + +endchoice + +endif diff --git a/mm/Makefile b/mm/Makefile index 8405eb0..61b1a43 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_KMEMCHECK) += kmemcheck.o +obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile new file mode 100644 index 0000000..bd837b8 --- /dev/null +++ b/mm/kasan/Makefile @@ -0,0 +1,8 @@ +KASAN_SANITIZE := n + +CFLAGS_REMOVE_kasan.o = -pg +# Function splitter causes unnecessary splits in __asan_load1/__asan_store1 +# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 +CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) + +obj-y := kasan.o report.o diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c new file mode 100644 index 0000000..6dc1aa7 --- /dev/null +++ b/mm/kasan/kasan.c @@ -0,0 +1,302 @@ +/* + * This file contains shadow memory manipulation code. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin a.ryabinin@samsung.com + * + * Some of code borrowed from https://github.com/xairy/linux by + * Andrey Konovalov adech.fo@gmail.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define DISABLE_BRANCH_PROFILING + +#include <linux/export.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/stacktrace.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kasan.h> + +#include "kasan.h" + +/* + * Poisons the shadow memory for 'size' bytes starting from 'addr'. + * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. + */ +static void kasan_poison_shadow(const void *address, size_t size, u8 value) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(address); + shadow_end = kasan_mem_to_shadow(address + size); + + memset(shadow_start, value, shadow_end - shadow_start); +} + +void kasan_unpoison_shadow(const void *address, size_t size) +{ + kasan_poison_shadow(address, size, 0); + + if (size & KASAN_SHADOW_MASK) { + u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); + *shadow = size & KASAN_SHADOW_MASK; + } +} + + +/* + * All functions below always inlined so compiler could + * perform better optimizations in each of __asan_loadX/__assn_storeX + * depending on memory access size X. + */ + +static __always_inline bool memory_is_poisoned_1(unsigned long addr) +{ + s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(shadow_value)) { + s8 last_accessible_byte = addr & KASAN_SHADOW_MASK; + return unlikely(last_accessible_byte >= shadow_value); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_2(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 1)) + return true; + + if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_4(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 3)) + return true; + + if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_8(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 7)) + return true; + + if (likely(((addr + 7) & KASAN_SHADOW_MASK) >= 7)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_16(unsigned long addr) +{ + u32 *shadow_addr = (u32 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + u16 shadow_first_bytes = *(u16 *)shadow_addr; + s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; + + if (unlikely(shadow_first_bytes)) + return true; + + if (likely(!last_byte)) + return false; + + return memory_is_poisoned_1(addr + 15); + } + + return false; +} + +static __always_inline unsigned long bytes_is_zero(const u8 *start, + size_t size) +{ + while (size) { + if (unlikely(*start)) + return (unsigned long)start; + start++; + size--; + } + + return 0; +} + +static __always_inline unsigned long memory_is_zero(const void *start, + const void *end) +{ + unsigned int words; + unsigned long ret; + unsigned int prefix = (unsigned long)start % 8; + + if (end - start <= 16) + return bytes_is_zero(start, end - start); + + if (prefix) { + prefix = 8 - prefix; + ret = bytes_is_zero(start, prefix); + if (unlikely(ret)) + return ret; + start += prefix; + } + + words = (end - start) / 8; + while (words) { + if (unlikely(*(u64 *)start)) + return bytes_is_zero(start, 8); + start += 8; + words--; + } + + return bytes_is_zero(start, (end - start) % 8); +} + +static __always_inline bool memory_is_poisoned_n(unsigned long addr, + size_t size) +{ + unsigned long ret; + + ret = memory_is_zero(kasan_mem_to_shadow((void *)addr), + kasan_mem_to_shadow((void *)addr + size - 1) + 1); + + if (unlikely(ret)) { + unsigned long last_byte = addr + size - 1; + s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte); + + if (unlikely(ret != (unsigned long)last_shadow || + ((last_byte & KASAN_SHADOW_MASK) >= *last_shadow))) + return true; + } + return false; +} + +static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) +{ + if (__builtin_constant_p(size)) { + switch (size) { + case 1: + return memory_is_poisoned_1(addr); + case 2: + return memory_is_poisoned_2(addr); + case 4: + return memory_is_poisoned_4(addr); + case 8: + return memory_is_poisoned_8(addr); + case 16: + return memory_is_poisoned_16(addr); + default: + BUILD_BUG(); + } + } + + return memory_is_poisoned_n(addr, size); +} + + +static __always_inline void check_memory_region(unsigned long addr, + size_t size, bool write) +{ + struct kasan_access_info info; + + if (unlikely(size == 0)) + return; + + if (unlikely((void *)addr < + kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { + info.access_addr = (void *)addr; + info.access_size = size; + info.is_write = write; + info.ip = _RET_IP_; + kasan_report_user_access(&info); + return; + } + + if (likely(!memory_is_poisoned(addr, size))) + return; + + kasan_report(addr, size, write, _RET_IP_); +} + +#define DEFINE_ASAN_LOAD_STORE(size) \ + void __asan_load##size(unsigned long addr) \ + { \ + check_memory_region(addr, size, false); \ + } \ + EXPORT_SYMBOL(__asan_load##size); \ + __alias(__asan_load##size) \ + void __asan_load##size##_noabort(unsigned long); \ + EXPORT_SYMBOL(__asan_load##size##_noabort); \ + void __asan_store##size(unsigned long addr) \ + { \ + check_memory_region(addr, size, true); \ + } \ + EXPORT_SYMBOL(__asan_store##size); \ + __alias(__asan_store##size) \ + void __asan_store##size##_noabort(unsigned long); \ + EXPORT_SYMBOL(__asan_store##size##_noabort) + +DEFINE_ASAN_LOAD_STORE(1); +DEFINE_ASAN_LOAD_STORE(2); +DEFINE_ASAN_LOAD_STORE(4); +DEFINE_ASAN_LOAD_STORE(8); +DEFINE_ASAN_LOAD_STORE(16); + +void __asan_loadN(unsigned long addr, size_t size) +{ + check_memory_region(addr, size, false); +} +EXPORT_SYMBOL(__asan_loadN); + +__alias(__asan_loadN) +void __asan_loadN_noabort(unsigned long, size_t); +EXPORT_SYMBOL(__asan_loadN_noabort); + +void __asan_storeN(unsigned long addr, size_t size) +{ + check_memory_region(addr, size, true); +} +EXPORT_SYMBOL(__asan_storeN); + +__alias(__asan_storeN) +void __asan_storeN_noabort(unsigned long, size_t); +EXPORT_SYMBOL(__asan_storeN_noabort); + +/* to shut up compiler complaints */ +void __asan_handle_no_return(void) {} +EXPORT_SYMBOL(__asan_handle_no_return); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h new file mode 100644 index 0000000..648b9c0 --- /dev/null +++ b/mm/kasan/kasan.h @@ -0,0 +1,34 @@ +#ifndef __MM_KASAN_KASAN_H +#define __MM_KASAN_KASAN_H + +#include <linux/kasan.h> + +#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) +#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) + +struct kasan_access_info { + const void *access_addr; + const void *first_bad_addr; + size_t access_size; + bool is_write; + unsigned long ip; +}; + +void kasan_report_error(struct kasan_access_info *info); +void kasan_report_user_access(struct kasan_access_info *info); + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) + << KASAN_SHADOW_SCALE_SHIFT); +} + +static inline bool kasan_enabled(void) +{ + return !current->kasan_depth; +} + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip); + +#endif diff --git a/mm/kasan/report.c b/mm/kasan/report.c new file mode 100644 index 0000000..5835d69 --- /dev/null +++ b/mm/kasan/report.c @@ -0,0 +1,209 @@ +/* + * This file contains error reporting code. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin a.ryabinin@samsung.com + * + * Some of code borrowed from https://github.com/xairy/linux by + * Andrey Konovalov adech.fo@gmail.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/stacktrace.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kasan.h> + +#include "kasan.h" + +/* Shadow layout customization. */ +#define SHADOW_BYTES_PER_BLOCK 1 +#define SHADOW_BLOCKS_PER_ROW 16 +#define SHADOW_BYTES_PER_ROW (SHADOW_BLOCKS_PER_ROW * SHADOW_BYTES_PER_BLOCK) +#define SHADOW_ROWS_AROUND_ADDR 2 + +static const void *find_first_bad_addr(const void *addr, size_t size) +{ + u8 shadow_val = *(u8 *)kasan_mem_to_shadow(addr); + const void *first_bad_addr = addr; + + while (!shadow_val && first_bad_addr < addr + size) { + first_bad_addr += KASAN_SHADOW_SCALE_SIZE; + shadow_val = *(u8 *)kasan_mem_to_shadow(first_bad_addr); + } + return first_bad_addr; +} + +static void print_error_description(struct kasan_access_info *info) +{ + const char *bug_type = "unknown crash"; + u8 shadow_val; + + info->first_bad_addr = find_first_bad_addr(info->access_addr, + info->access_size); + + shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr); + + switch (shadow_val) { + case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: + bug_type = "out of bounds access"; + break; + } + + pr_err("BUG: KASan: %s in %pS at addr %p\n", + bug_type, (void *)info->ip, + info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, task_pid_nr(current)); +} + +static void print_address_description(struct kasan_access_info *info) +{ + dump_stack(); +} + +static bool row_is_guilty(const void *row, const void *guilty) +{ + return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW); +} + +static int shadow_pointer_offset(const void *row, const void *shadow) +{ + /* The length of ">ff00ff00ff00ff00: " is + * 3 + (BITS_PER_LONG/8)*2 chars. + */ + return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 + + (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1; +} + +static void print_shadow_for_address(const void *addr) +{ + int i; + const void *shadow = kasan_mem_to_shadow(addr); + const void *shadow_row; + + shadow_row = (void *)round_down((unsigned long)shadow, + SHADOW_BYTES_PER_ROW) + - SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW; + + pr_err("Memory state around the buggy address:\n"); + + for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) { + const void *kaddr = kasan_shadow_to_mem(shadow_row); + char buffer[4 + (BITS_PER_LONG/8)*2]; + + snprintf(buffer, sizeof(buffer), + (i == 0) ? ">%p: " : " %p: ", kaddr); + + kasan_disable_current(); + print_hex_dump(KERN_ERR, buffer, + DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1, + shadow_row, SHADOW_BYTES_PER_ROW, 0); + kasan_enable_current(); + + if (row_is_guilty(shadow_row, shadow)) + pr_err("%*c\n", + shadow_pointer_offset(shadow_row, shadow), + '^'); + + shadow_row += SHADOW_BYTES_PER_ROW; + } +} + +static DEFINE_SPINLOCK(report_lock); + +void kasan_report_error(struct kasan_access_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&report_lock, flags); + pr_err("=================================" + "=================================\n"); + print_error_description(info); + print_address_description(info); + print_shadow_for_address(info->first_bad_addr); + pr_err("=================================" + "=================================\n"); + spin_unlock_irqrestore(&report_lock, flags); +} + +void kasan_report_user_access(struct kasan_access_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&report_lock, flags); + pr_err("=================================" + "=================================\n"); + pr_err("BUG: KASan: user-memory-access on address %p\n", + info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, task_pid_nr(current)); + dump_stack(); + pr_err("=================================" + "=================================\n"); + spin_unlock_irqrestore(&report_lock, flags); +} + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip) +{ + struct kasan_access_info info; + + if (likely(!kasan_enabled())) + return; + + info.access_addr = (void *)addr; + info.access_size = size; + info.is_write = is_write; + info.ip = ip; + kasan_report_error(&info); +} + + +#define DEFINE_ASAN_REPORT_LOAD(size) \ +void __asan_report_load##size##_noabort(unsigned long addr) \ +{ \ + kasan_report(addr, size, false, _RET_IP_); \ +} \ +EXPORT_SYMBOL(__asan_report_load##size##_noabort) + +#define DEFINE_ASAN_REPORT_STORE(size) \ +void __asan_report_store##size##_noabort(unsigned long addr) \ +{ \ + kasan_report(addr, size, true, _RET_IP_); \ +} \ +EXPORT_SYMBOL(__asan_report_store##size##_noabort) + +DEFINE_ASAN_REPORT_LOAD(1); +DEFINE_ASAN_REPORT_LOAD(2); +DEFINE_ASAN_REPORT_LOAD(4); +DEFINE_ASAN_REPORT_LOAD(8); +DEFINE_ASAN_REPORT_LOAD(16); +DEFINE_ASAN_REPORT_STORE(1); +DEFINE_ASAN_REPORT_STORE(2); +DEFINE_ASAN_REPORT_STORE(4); +DEFINE_ASAN_REPORT_STORE(8); +DEFINE_ASAN_REPORT_STORE(16); + +void __asan_report_load_n_noabort(unsigned long addr, size_t size) +{ + kasan_report(addr, size, false, _RET_IP_); +} +EXPORT_SYMBOL(__asan_report_load_n_noabort); + +void __asan_report_store_n_noabort(unsigned long addr, size_t size) +{ + kasan_report(addr, size, true, _RET_IP_); +} +EXPORT_SYMBOL(__asan_report_store_n_noabort); diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan new file mode 100644 index 0000000..7acd6fa --- /dev/null +++ b/scripts/Makefile.kasan @@ -0,0 +1,24 @@ +ifdef CONFIG_KASAN +ifdef CONFIG_KASAN_INLINE + call_threshold := 10000 +else + call_threshold := 0 +endif + +CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address + +CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ + -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + --param asan-instrumentation-with-call-threshold=$(call_threshold)) + +ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) + $(warning Cannot use CONFIG_KASAN: \ + -fsanitize=kernel-address is not supported by compiler) +else + ifeq ($(CFLAGS_KASAN),) + $(warning CONFIG_KASAN: compiler does not support all options.\ + Trying minimal configuration) + CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) + endif +endif +endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 54be19a..37de20d 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -119,6 +119,16 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif
+# +# Enable address sanitizer flags for kernel except some files or directories +# we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE) +# +ifeq ($(CONFIG_KASAN),y) +_c_flags += $(if $(patsubst n%,, \ + $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ + $(CFLAGS_KASAN)) +endif + # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
From: Andrey Ryabinin a.ryabinin@samsung.com
Currently memory hotplug won't work with KASan. As we don't have shadow for hotplugged memory, kernel will crash on the first access to it. To make this work we will need to allocate shadow for new memory.
At some future point proper memory hotplug support will be implemented. Until then, print a warning at startup and disable memory hot-add.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 786a8959912eb94fc2381c2ae487a96ce55dabca) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 6dc1aa7..def8110 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/memblock.h> +#include <linux/memory.h> #include <linux/mm.h> #include <linux/printk.h> #include <linux/sched.h> @@ -300,3 +301,23 @@ EXPORT_SYMBOL(__asan_storeN_noabort); /* to shut up compiler complaints */ void __asan_handle_no_return(void) {} EXPORT_SYMBOL(__asan_handle_no_return); + +#ifdef CONFIG_MEMORY_HOTPLUG +static int kasan_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + return (action == MEM_GOING_ONLINE) ? NOTIFY_BAD : NOTIFY_OK; +} + +static int __init kasan_memhotplug_init(void) +{ + pr_err("WARNING: KASan doesn't support memory hot-add\n"); + pr_err("Memory hot-add will be disabled\n"); + + hotplug_memory_notifier(kasan_mem_notifier, 0); + + return 0; +} + +module_init(kasan_memhotplug_init); +#endif
From: Andrey Ryabinin a.ryabinin@samsung.com
This patch adds arch specific code for kernel address sanitizer.
16TB of virtual addressed used for shadow memory. It's located in range [ffffec0000000000 - fffffc0000000000] between vmemmap and %esp fixup stacks.
At early stage we map whole shadow region with zero page. Latter, after pages mapped to direct mapping address range we unmap zero pages from corresponding shadow (see kasan_map_shadow()) and allocate and map a real shadow memory reusing vmemmap_populate() function.
Also replace __pa with __pa_nodebug before shadow initialized. __pa with CONFIG_DEBUG_VIRTUAL=y make external function call (__phys_addr) __phys_addr is instrumented, so __asan_load could be called before shadow area initialized.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Cc: Jim Davis jim.epost@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit ef7f0d6a6ca8c9e4b27d78895af86c2fbfaeedb2) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/x86/x86_64/mm.txt | 2 + arch/x86/Kconfig | 1 + arch/x86/boot/Makefile | 2 + arch/x86/boot/compressed/Makefile | 2 + arch/x86/include/asm/kasan.h | 31 ++++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/dumpstack.c | 5 +- arch/x86/kernel/head64.c | 9 +- arch/x86/kernel/head_64.S | 30 ++++++ arch/x86/kernel/setup.c | 3 + arch/x86/mm/Makefile | 3 + arch/x86/mm/kasan_init_64.c | 199 ++++++++++++++++++++++++++++++++++++++ arch/x86/realmode/Makefile | 2 +- arch/x86/realmode/rm/Makefile | 1 + arch/x86/vdso/Makefile | 1 + lib/Kconfig.kasan | 1 + 16 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 arch/x86/include/asm/kasan.h create mode 100644 arch/x86/mm/kasan_init_64.c
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 052ee64..05712ac 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... +ffffec0000000000 - fffffc0000000000 (=44 bits) kasan shadow memory (16TB) +... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9148e2..642a358 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -83,6 +83,7 @@ config X86 select HAVE_CMPXCHG_LOCAL select HAVE_CMPXCHG_DOUBLE select HAVE_ARCH_KMEMCHECK + select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_USER_RETURN_NOTIFIER select ARCH_BINFMT_ELF_RANDOMIZE_PIE select HAVE_ARCH_JUMP_LABEL diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 3db07f3..57bbf2f 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -14,6 +14,8 @@ # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup.
+KASAN_SANITIZE := n + SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
targets := vmlinux.bin setup.bin setup.elf bzImage diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 30c0acf..2959cca 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,6 +4,8 @@ # create a compressed vmlinux image from the original vmlinux #
+KASAN_SANITIZE := n + targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h new file mode 100644 index 0000000..8b22422 --- /dev/null +++ b/arch/x86/include/asm/kasan.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_KASAN_H +#define _ASM_X86_KASAN_H + +/* + * Compiler uses shadow offset assuming that addresses start + * from 0. Kernel addresses don't start from 0, so shadow + * for kernel really starts from compiler's shadow offset + + * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT + */ +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ + (0xffff800000000000ULL >> 3)) +/* 47 bits for kernel address -> (47 - 3) bits for shadow */ +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3))) + +#ifndef __ASSEMBLY__ + +extern pte_t kasan_zero_pte[]; +extern pte_t kasan_zero_pmd[]; +extern pte_t kasan_zero_pud[]; + +#ifdef CONFIG_KASAN +void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_init(void); +#else +static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_init(void) { } +#endif + +#endif + +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8f1e774..9d46ee8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif
+KASAN_SANITIZE_head$(BITS).o := n + CFLAGS_irq.o := -I$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o entry_$(BITS).o diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b74ebc7..cf3df1d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -265,7 +265,10 @@ int __die(const char *str, struct pt_regs *regs, long err) printk("SMP "); #endif #ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_KASAN + printk("KASAN"); #endif printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 957fdb5..b111ab5 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -27,6 +27,7 @@ #include <asm/bios_ebda.h> #include <asm/bootparam_utils.h> #include <asm/microcode.h> +#include <asm/kasan.h>
/* * Manage page tables very early on. @@ -46,7 +47,7 @@ static void __init reset_early_page_tables(void)
next_early_pgt = 0;
- write_cr3(__pa(early_level4_pgt)); + write_cr3(__pa_nodebug(early_level4_pgt)); }
/* Create a new PMD entry */ @@ -59,7 +60,7 @@ int __init early_make_pgtable(unsigned long address) pmdval_t pmd, *pmd_p;
/* Invalid address or early pgt is done ? */ - if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) + if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt)) return -1;
again: @@ -160,6 +161,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables();
+ kasan_map_early_shadow(early_level4_pgt); + /* clear bss before set_intr_gate with early_idt_handler */ clear_bss();
@@ -181,6 +184,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511];
+ kasan_map_early_shadow(init_level4_pgt); + x86_64_start_reservations(real_mode_data); }
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a2dc0ad..f8a8406 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -516,8 +516,38 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000
+#ifdef CONFIG_KASAN +#define FILL(VAL, COUNT) \ + .rept (COUNT) ; \ + .quad (VAL) ; \ + .endr + +NEXT_PAGE(kasan_zero_pte) + FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) +NEXT_PAGE(kasan_zero_pmd) + FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) +NEXT_PAGE(kasan_zero_pud) + FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) + +#undef FILL +#endif + + #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE + +#ifdef CONFIG_KASAN +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +NEXT_PAGE(kasan_zero_page) + .skip PAGE_SIZE +#endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 87b6496..28a8506 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,6 +89,7 @@ #include <asm/cacheflush.h> #include <asm/processor.h> #include <asm/bugs.h> +#include <asm/kasan.h>
#include <asm/vsyscall.h> #include <asm/cpu.h> @@ -1174,6 +1175,8 @@ void __init setup_arch(char **cmdline_p)
x86_init.paging.pagetable_init();
+ kasan_init(); + if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ mmu_cr4_features = __read_cr4(); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 6a19ad9..9648838 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -20,6 +20,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o
obj-$(CONFIG_KMEMCHECK) += kmemcheck/
+KASAN_SANITIZE_kasan_init_$(BITS).o := n +obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c new file mode 100644 index 0000000..3e4d9a1 --- /dev/null +++ b/arch/x86/mm/kasan_init_64.c @@ -0,0 +1,199 @@ +#include <linux/bootmem.h> +#include <linux/kasan.h> +#include <linux/kdebug.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> + +#include <asm/tlbflush.h> +#include <asm/sections.h> + +extern pgd_t early_level4_pgt[PTRS_PER_PGD]; +extern struct range pfn_mapped[E820_X_MAX]; + +extern unsigned char kasan_zero_page[PAGE_SIZE]; + +static int __init map_range(struct range *range) +{ + unsigned long start; + unsigned long end; + + start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); + end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); + + /* + * end + 1 here is intentional. We check several shadow bytes in advance + * to slightly speed up fastpath. In some rare cases we could cross + * boundary of mapped shadow, so we just map some more here. + */ + return vmemmap_populate(start, end + 1, NUMA_NO_NODE); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start < end; start += PGDIR_SIZE) + pgd_clear(pgd_offset_k(start)); +} + +void __init kasan_map_early_shadow(pgd_t *pgd) +{ + int i; + unsigned long start = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + + for (i = pgd_index(start); start < end; i++) { + pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) + | _KERNPG_TABLE); + start += PGDIR_SIZE; + } +} + +static int __init zero_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + + while (addr + PAGE_SIZE <= end) { + WARN_ON(!pte_none(*pte)); + set_pte(pte, __pte(__pa_nodebug(kasan_zero_page) + | __PAGE_KERNEL_RO)); + addr += PAGE_SIZE; + pte = pte_offset_kernel(pmd, addr); + } + return 0; +} + +static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, + unsigned long end) +{ + int ret = 0; + pmd_t *pmd = pmd_offset(pud, addr); + + while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { + WARN_ON(!pmd_none(*pmd)); + set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) + | __PAGE_KERNEL_RO)); + addr += PMD_SIZE; + pmd = pmd_offset(pud, addr); + } + if (addr < end) { + if (pmd_none(*pmd)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pmd(pmd, __pmd(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pte_populate(pmd, addr, end); + } + return ret; +} + + +static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + int ret = 0; + pud_t *pud = pud_offset(pgd, addr); + + while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { + WARN_ON(!pud_none(*pud)); + set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) + | __PAGE_KERNEL_RO)); + addr += PUD_SIZE; + pud = pud_offset(pgd, addr); + } + + if (addr < end) { + if (pud_none(*pud)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pud(pud, __pud(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pmd_populate(pud, addr, end); + } + return ret; +} + +static int __init zero_pgd_populate(unsigned long addr, unsigned long end) +{ + int ret = 0; + pgd_t *pgd = pgd_offset_k(addr); + + while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { + WARN_ON(!pgd_none(*pgd)); + set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) + | __PAGE_KERNEL_RO)); + addr += PGDIR_SIZE; + pgd = pgd_offset_k(addr); + } + + if (addr < end) { + if (pgd_none(*pgd)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pgd(pgd, __pgd(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pud_populate(pgd, addr, end); + } + return ret; +} + + +static void __init populate_zero_shadow(const void *start, const void *end) +{ + if (zero_pgd_populate((unsigned long)start, (unsigned long)end)) + panic("kasan: unable to map zero shadow!"); +} + + +#ifdef CONFIG_KASAN_INLINE +static int kasan_die_handler(struct notifier_block *self, + unsigned long val, + void *data) +{ + if (val == DIE_GPF) { + pr_emerg("CONFIG_KASAN_INLINE enabled"); + pr_emerg("GPF could be caused by NULL-ptr deref or user memory access"); + } + return NOTIFY_OK; +} + +static struct notifier_block kasan_die_notifier = { + .notifier_call = kasan_die_handler, +}; +#endif + +void __init kasan_init(void) +{ + int i; + +#ifdef CONFIG_KASAN_INLINE + register_die_notifier(&kasan_die_notifier); +#endif + + memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); + load_cr3(early_level4_pgt); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + for (i = 0; i < E820_X_MAX; i++) { + if (pfn_mapped[i].end == 0) + break; + + if (map_range(&pfn_mapped[i])) + panic("kasan: unable to allocate shadow!"); + } + + populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + (void *)KASAN_SHADOW_END); + + memset(kasan_zero_page, 0, PAGE_SIZE); + + load_cr3(init_level4_pgt); +} diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index 94f7fbe..e02c2c6 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -6,7 +6,7 @@ # for more details. # # - +KASAN_SANITIZE := n subdir- := rm
obj-y += init.o diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 7c0d7be..2730d77 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -6,6 +6,7 @@ # for more details. # # +KASAN_SANITIZE := n
always := realmode.bin realmode.relocs
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5a4affe..2aacd7c 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -3,6 +3,7 @@ #
KBUILD_CFLAGS += $(DISABLE_LTO) +KASAN_SANITIZE := n
VDSO64-$(CONFIG_X86_64) := y VDSOX32-$(CONFIG_X86_X32_ABI) := y diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index e5b3fbe..0052b1b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -15,6 +15,7 @@ config KASAN
config KASAN_SHADOW_OFFSET hex + default 0xdffffc0000000000 if X86_64
choice prompt "Instrumentation type"
From: Andrey Ryabinin a.ryabinin@samsung.com
Add kernel address sanitizer hooks to mark allocated page's addresses as accessible in corresponding shadow region. Mark freed pages as inaccessible.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit b8c73fc2493d42517be95cf2c89659fc6c6f4d02) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/kasan.h | 6 ++++++ mm/compaction.c | 2 ++ mm/kasan/kasan.c | 14 ++++++++++++++ mm/kasan/kasan.h | 2 ++ mm/kasan/report.c | 11 +++++++++++ mm/page_alloc.c | 3 +++ 6 files changed, 38 insertions(+)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9102fda..f00c15c 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -34,6 +34,9 @@ static inline void kasan_disable_current(void)
void kasan_unpoison_shadow(const void *address, size_t size);
+void kasan_alloc_pages(struct page *page, unsigned int order); +void kasan_free_pages(struct page *page, unsigned int order); + #else /* CONFIG_KASAN */
static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -41,6 +44,9 @@ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {}
+static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} +static inline void kasan_free_pages(struct page *page, unsigned int order) {} + #endif /* CONFIG_KASAN */
#endif /* LINUX_KASAN_H */ diff --git a/mm/compaction.c b/mm/compaction.c index b47f08e..81c89c3 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,6 +16,7 @@ #include <linux/sysfs.h> #include <linux/balloon_compaction.h> #include <linux/page-isolation.h> +#include <linux/kasan.h> #include "internal.h"
#ifdef CONFIG_COMPACTION @@ -59,6 +60,7 @@ static void map_pages(struct list_head *list) list_for_each_entry(page, list, lru) { arch_alloc_page(page, 0); kernel_map_pages(page, 1, 1); + kasan_alloc_pages(page, 0); } }
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index def8110..b516eb8 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -254,6 +254,20 @@ static __always_inline void check_memory_region(unsigned long addr, kasan_report(addr, size, write, _RET_IP_); }
+void kasan_alloc_pages(struct page *page, unsigned int order) +{ + if (likely(!PageHighMem(page))) + kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order); +} + +void kasan_free_pages(struct page *page, unsigned int order) +{ + if (likely(!PageHighMem(page))) + kasan_poison_shadow(page_address(page), + PAGE_SIZE << order, + KASAN_FREE_PAGE); +} + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 648b9c0..d3c90d5 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -6,6 +6,8 @@ #define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
+#define KASAN_FREE_PAGE 0xFF /* page was freed */ + struct kasan_access_info { const void *access_addr; const void *first_bad_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 5835d69..fab8e78 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -54,6 +54,9 @@ static void print_error_description(struct kasan_access_info *info) shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr);
switch (shadow_val) { + case KASAN_FREE_PAGE: + bug_type = "use after free"; + break; case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -69,6 +72,14 @@ static void print_error_description(struct kasan_access_info *info)
static void print_address_description(struct kasan_access_info *info) { + const void *addr = info->access_addr; + + if ((addr >= (void *)PAGE_OFFSET) && + (addr < high_memory)) { + struct page *page = virt_to_head_page(addr); + dump_page(page, "kasan: bad access detected"); + } + dump_stack(); }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c32cb64..903d2d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -25,6 +25,7 @@ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/kmemcheck.h> +#include <linux/kasan.h> #include <linux/module.h> #include <linux/suspend.h> #include <linux/pagevec.h> @@ -743,6 +744,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
trace_mm_page_free(page, order); kmemcheck_free_shadow(page, order); + kasan_free_pages(page, order);
if (PageAnon(page)) page->mapping = NULL; @@ -922,6 +924,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); + kasan_alloc_pages(page, order);
if (gfp_flags & __GFP_ZERO) prep_zero_page(page, order, gfp_flags);
From: Andrey Ryabinin a.ryabinin@samsung.com
virt_to_obj takes kmem_cache address, address of slab page, address x pointing somewhere inside slab object, and returns address of the beginning of object.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Acked-by: Christoph Lameter cl@linux.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 912f5fbf1d3060f25d6994aed0265c55b974b2e9) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/slub_def.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d82abd4..a26fff2 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -110,4 +110,20 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) } #endif
+ +/** + * virt_to_obj - returns address of the beginning of object. + * @s: object's kmem_cache + * @slab_page: address of slab page + * @x: address within object memory range + * + * Returns address of the beginning of object + */ +static inline void *virt_to_obj(struct kmem_cache *s, + const void *slab_page, + const void *x) +{ + return (void *)x - ((x - slab_page) % s->size); +} + #endif /* _LINUX_SLUB_DEF_H */
From: Andrey Ryabinin a.ryabinin@samsung.com
Remove static and add function declarations to linux/slub_def.h so it could be used by kernel address sanitizer.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 75c66def8d815201aa0386ecc7c66a5c8dbca1ee) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/slub_def.h | 3 +++ mm/slub.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a26fff2..e565d7c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -126,4 +126,7 @@ static inline void *virt_to_obj(struct kmem_cache *s, return (void *)x - ((x - slab_page) % s->size); }
+void object_err(struct kmem_cache *s, struct page *page, + u8 *object, char *reason); + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/mm/slub.c b/mm/slub.c index ae7b9f1..dcc2059 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -629,7 +629,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) dump_stack(); }
-static void object_err(struct kmem_cache *s, struct page *page, +void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason) { slab_bug(s, "%s", reason);
From: Andrey Ryabinin a.ryabinin@samsung.com
It's ok for slub to access memory that marked by kasan as inaccessible (object's metadata). Kasan shouldn't print report in that case because these accesses are valid. Disabling instrumentation of slub.c code is not enough to achieve this because slub passes pointer to object's metadata into external functions like memchr_inv().
We don't want to disable instrumentation for memchr_inv() because this is quite generic function, and we don't want to miss bugs.
metadata_access_enable/metadata_access_disable used to tell KASan where accesses to metadata starts/end, so we could temporarily disable KASan reports.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit a79316c6178ca419e35feef47d47f50b4e0ee9f2) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/slub.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/mm/slub.c b/mm/slub.c index dcc2059..d569522 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -20,6 +20,7 @@ #include <linux/proc_fs.h> #include <linux/notifier.h> #include <linux/seq_file.h> +#include <linux/kasan.h> #include <linux/kmemcheck.h> #include <linux/cpu.h> #include <linux/cpuset.h> @@ -468,12 +469,30 @@ static char *slub_debug_slabs; static int disable_higher_order_debug;
/* + * slub is about to manipulate internal object metadata. This memory lies + * outside the range of the allocated object, so accessing it would normally + * be reported by kasan as a bounds error. metadata_access_enable() is used + * to tell kasan that these accesses are OK. + */ +static inline void metadata_access_enable(void) +{ + kasan_disable_current(); +} + +static inline void metadata_access_disable(void) +{ + kasan_enable_current(); +} + +/* * Object debugging */ static void print_section(char *text, u8 *addr, unsigned int length) { + metadata_access_enable(); print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, length, 1); + metadata_access_disable(); }
static struct track *get_track(struct kmem_cache *s, void *object, @@ -503,7 +522,9 @@ static void set_track(struct kmem_cache *s, void *object, trace.max_entries = TRACK_ADDRS_COUNT; trace.entries = p->addrs; trace.skip = 3; + metadata_access_enable(); save_stack_trace(&trace); + metadata_access_disable();
/* See rant in lockdep.c */ if (trace.nr_entries != 0 && @@ -677,7 +698,9 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, u8 *fault; u8 *end;
+ metadata_access_enable(); fault = memchr_inv(start, value, bytes); + metadata_access_disable(); if (!fault) return 1;
@@ -770,7 +793,9 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!remainder) return 1;
+ metadata_access_enable(); fault = memchr_inv(end - remainder, POISON_INUSE, remainder); + metadata_access_disable(); if (!fault) return 1; while (end > fault && end[-1] == POISON_INUSE)
From: Andrey Ryabinin a.ryabinin@samsung.com
With this patch kasan will be able to catch bugs in memory allocated by slub. Initially all objects in newly allocated slab page, marked as redzone. Later, when allocation of slub object happens, requested by caller number of bytes marked as accessible, and the rest of the object (including slub's metadata) marked as redzone (inaccessible).
We also mark object as accessible if ksize was called for this object. There is some places in kernel where ksize function is called to inquire size of really allocated area. Such callers could validly access whole allocated memory, so it should be marked as accessible.
Code in slub.c and slab_common.c files could validly access to object's metadata, so instrumentation for this files are disabled.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Signed-off-by: Dmitry Chernenkov dmitryc@google.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 0316bec22ec95ea2faca6406437b0b5950553b7c) Signed-off-by: Alex Shi alex.shi@linaro.org
Conflicts: skip memcg part in mm/slub.c --- include/linux/kasan.h | 27 ++++++++++++++ include/linux/slab.h | 11 ++++-- lib/Kconfig.kasan | 1 + mm/Makefile | 3 ++ mm/kasan/kasan.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 5 +++ mm/kasan/report.c | 21 +++++++++++ mm/slab_common.c | 5 ++- mm/slub.c | 31 ++++++++++++++-- 9 files changed, 197 insertions(+), 5 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f00c15c..d5310ee 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -37,6 +37,18 @@ void kasan_unpoison_shadow(const void *address, size_t size); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order);
+void kasan_poison_slab(struct page *page); +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +void kasan_poison_object_data(struct kmem_cache *cache, void *object); + +void kasan_kmalloc_large(const void *ptr, size_t size); +void kasan_kfree_large(const void *ptr); +void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); +void kasan_krealloc(const void *object, size_t new_size); + +void kasan_slab_alloc(struct kmem_cache *s, void *object); +void kasan_slab_free(struct kmem_cache *s, void *object); + #else /* CONFIG_KASAN */
static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -47,6 +59,21 @@ static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {}
+static inline void kasan_poison_slab(struct page *page) {} +static inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) {} +static inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) {} + +static inline void kasan_kmalloc_large(void *ptr, size_t size) {} +static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size) {} +static inline void kasan_krealloc(const void *object, size_t new_size) {} + +static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} +static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} + #endif /* CONFIG_KASAN */
#endif /* LINUX_KASAN_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index c265bec..5f97037 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -104,6 +104,7 @@ (unsigned long)ZERO_SIZE_PTR)
#include <linux/kmemleak.h> +#include <linux/kasan.h>
struct mem_cgroup; /* @@ -326,7 +327,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { - return kmem_cache_alloc(s, flags); + void *ret = kmem_cache_alloc(s, flags); + + kasan_kmalloc(s, ret, size); + return ret; }
static __always_inline void * @@ -334,7 +338,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) { - return kmem_cache_alloc_node(s, gfpflags, node); + void *ret = kmem_cache_alloc_node(s, gfpflags, node); + + kasan_kmalloc(s, ret, size); + return ret; } #endif /* CONFIG_TRACING */
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 0052b1b..a11ac02 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,6 +5,7 @@ if HAVE_ARCH_KASAN
config KASAN bool "KASan: runtime memory debugger" + depends on SLUB_DEBUG help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/mm/Makefile b/mm/Makefile index 61b1a43..3614adb 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -2,6 +2,9 @@ # Makefile for the linux memory manager. #
+KASAN_SANITIZE_slab_common.o := n +KASAN_SANITIZE_slub.o := n + mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index b516eb8..dc83f07 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -31,6 +31,7 @@ #include <linux/kasan.h>
#include "kasan.h" +#include "../slab.h"
/* * Poisons the shadow memory for 'size' bytes starting from 'addr'. @@ -268,6 +269,103 @@ void kasan_free_pages(struct page *page, unsigned int order) KASAN_FREE_PAGE); }
+void kasan_poison_slab(struct page *page) +{ + kasan_poison_shadow(page_address(page), + PAGE_SIZE << compound_order(page), + KASAN_KMALLOC_REDZONE); +} + +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) +{ + kasan_unpoison_shadow(object, cache->object_size); +} + +void kasan_poison_object_data(struct kmem_cache *cache, void *object) +{ + kasan_poison_shadow(object, + round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE), + KASAN_KMALLOC_REDZONE); +} + +void kasan_slab_alloc(struct kmem_cache *cache, void *object) +{ + kasan_kmalloc(cache, object, cache->object_size); +} + +void kasan_slab_free(struct kmem_cache *cache, void *object) +{ + unsigned long size = cache->object_size; + unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + + /* RCU slabs could be legally used after free within the RCU period */ + if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + return; + + kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); +} + +void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size) +{ + unsigned long redzone_start; + unsigned long redzone_end; + + if (unlikely(object == NULL)) + return; + + redzone_start = round_up((unsigned long)(object + size), + KASAN_SHADOW_SCALE_SIZE); + redzone_end = round_up((unsigned long)object + cache->object_size, + KASAN_SHADOW_SCALE_SIZE); + + kasan_unpoison_shadow(object, size); + kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, + KASAN_KMALLOC_REDZONE); +} +EXPORT_SYMBOL(kasan_kmalloc); + +void kasan_kmalloc_large(const void *ptr, size_t size) +{ + struct page *page; + unsigned long redzone_start; + unsigned long redzone_end; + + if (unlikely(ptr == NULL)) + return; + + page = virt_to_page(ptr); + redzone_start = round_up((unsigned long)(ptr + size), + KASAN_SHADOW_SCALE_SIZE); + redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page)); + + kasan_unpoison_shadow(ptr, size); + kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, + KASAN_PAGE_REDZONE); +} + +void kasan_krealloc(const void *object, size_t size) +{ + struct page *page; + + if (unlikely(object == ZERO_SIZE_PTR)) + return; + + page = virt_to_head_page(object); + + if (unlikely(!PageSlab(page))) + kasan_kmalloc_large(object, size); + else + kasan_kmalloc(page->slab_cache, object, size); +} + +void kasan_kfree_large(const void *ptr) +{ + struct page *page = virt_to_page(ptr); + + kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), + KASAN_FREE_PAGE); +} + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d3c90d5..5b052ab 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,6 +7,11 @@ #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
#define KASAN_FREE_PAGE 0xFF /* page was freed */ +#define KASAN_FREE_PAGE 0xFF /* page was freed */ +#define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ +#define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ +#define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ +
struct kasan_access_info { const void *access_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index fab8e78..2760edb 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -24,6 +24,7 @@ #include <linux/kasan.h>
#include "kasan.h" +#include "../slab.h"
/* Shadow layout customization. */ #define SHADOW_BYTES_PER_BLOCK 1 @@ -55,8 +56,11 @@ static void print_error_description(struct kasan_access_info *info)
switch (shadow_val) { case KASAN_FREE_PAGE: + case KASAN_KMALLOC_FREE: bug_type = "use after free"; break; + case KASAN_PAGE_REDZONE: + case KASAN_KMALLOC_REDZONE: case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -77,6 +81,23 @@ static void print_address_description(struct kasan_access_info *info) if ((addr >= (void *)PAGE_OFFSET) && (addr < high_memory)) { struct page *page = virt_to_head_page(addr); + + if (PageSlab(page)) { + void *object; + struct kmem_cache *cache = page->slab_cache; + void *last_object; + + object = virt_to_obj(cache, page_address(page), addr); + last_object = page_address(page) + + page->objects * cache->size; + + if (unlikely(object > last_object)) + object = last_object; /* we hit into padding */ + + object_err(cache, page, object, + "kasan: bad access detected"); + return; + } dump_page(page, "kasan: bad access detected"); }
diff --git a/mm/slab_common.c b/mm/slab_common.c index dcdab81..9a18f19 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -789,6 +789,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) page = alloc_kmem_pages(flags, order); ret = page ? page_address(page) : NULL; kmemleak_alloc(ret, size, 1, flags); + kasan_kmalloc_large(ret, size); return ret; } EXPORT_SYMBOL(kmalloc_order); @@ -963,8 +964,10 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size, if (p) ks = ksize(p);
- if (ks >= new_size) + if (ks >= new_size) { + kasan_krealloc((void *)p, new_size); return (void *)p; + }
ret = kmalloc_track_caller(new_size, flags); if (ret && p) diff --git a/mm/slub.c b/mm/slub.c index d569522..b8b76d5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1251,11 +1251,13 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) { kmemleak_alloc(ptr, size, 1, flags); + kasan_kmalloc_large(ptr, size); }
static inline void kfree_hook(const void *x) { kmemleak_free(x); + kasan_kfree_large(x); }
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) @@ -1273,6 +1275,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, flags &= gfp_allowed_mask; kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); + kasan_slab_alloc(s, object); }
static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -1296,6 +1299,8 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) #endif if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(x, s->object_size); + + kasan_slab_free(s, x); }
/* @@ -1390,8 +1395,11 @@ static void setup_object(struct kmem_cache *s, struct page *page, void *object) { setup_object_debug(s, page, object); - if (unlikely(s->ctor)) + if (unlikely(s->ctor)) { + kasan_unpoison_object_data(s, object); s->ctor(object); + kasan_poison_object_data(s, object); + } }
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -1421,6 +1429,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << order);
+ kasan_poison_slab(page); + for_each_object_idx(p, idx, s, start, page->objects) { setup_object(s, page, p); if (likely(idx < page->objects)) @@ -2497,6 +2507,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc(s, gfpflags, _RET_IP_); trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); + kasan_kmalloc(s, ret, size); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_trace); @@ -2523,6 +2534,8 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
trace_kmalloc_node(_RET_IP_, ret, size, s->size, gfpflags, node); + + kasan_kmalloc(s, ret, size); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node_trace); @@ -2906,6 +2919,7 @@ static void early_kmem_cache_node_alloc(int node) init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif + kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node)); init_kmem_cache_node(n); inc_slabs_node(kmem_cache_node, node, page->objects);
@@ -3278,6 +3292,8 @@ void *__kmalloc(size_t size, gfp_t flags)
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ kasan_kmalloc(s, ret, size); + return ret; } EXPORT_SYMBOL(__kmalloc); @@ -3321,12 +3337,14 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ kasan_kmalloc(s, ret, size); + return ret; } EXPORT_SYMBOL(__kmalloc_node); #endif
-size_t ksize(const void *object) +static size_t __ksize(const void *object) { struct page *page;
@@ -3342,6 +3360,15 @@ size_t ksize(const void *object)
return slab_ksize(page->slab_cache); } + +size_t ksize(const void *object) +{ + size_t size = __ksize(object); + /* We assume that ksize callers could use whole allocated area, + so we need unpoison this area. */ + kasan_krealloc(object, size); + return size; +} EXPORT_SYMBOL(ksize);
void kfree(const void *x)
From: Andrey Ryabinin a.ryabinin@samsung.com
We need to manually unpoison rounded up allocation size for dname to avoid kasan's reports in dentry_string_cmp(). When CONFIG_DCACHE_WORD_ACCESS=y dentry_string_cmp may access few bytes beyound requested in kmalloc() size.
dentry_string_cmp() relates on that fact that dentry allocated using kmalloc and kmalloc internally round up allocation size. So this is not a bug, but this makes kasan to complain about such accesses. To avoid such reports we mark rounded up allocation size in shadow as accessible.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Reported-by: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit df4c0e36f1b1782b0611a77c52cc240e5c4752dd) Signed-off-by: Alex Shi alex.shi@linaro.org --- fs/dcache.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/fs/dcache.c b/fs/dcache.c index d25f8fd..da349a1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -38,6 +38,8 @@ #include <linux/prefetch.h> #include <linux/ratelimit.h> #include <linux/list_lru.h> +#include <linux/kasan.h> + #include "internal.h" #include "mount.h"
@@ -1433,6 +1435,9 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } atomic_set(&p->u.count, 1); dname = p->name; + if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS)) + kasan_unpoison_shadow(dname, + round_up(name->len + 1, sizeof(unsigned long))); } else { dname = dentry->d_iname; }
From: Andrey Ryabinin a.ryabinin@samsung.com
kmalloc internally round up allocation size, and kmemleak uses rounded up size as object's size. This makes kasan to complain while kmemleak scans memory or calculates of object's checksum. The simplest solution here is to disable kasan.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Acked-by: Catalin Marinas catalin.marinas@arm.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit e79ed2f13faab8fc9d4ad76d5f5a241724e45836) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kmemleak.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 1f14ef6..c607bc0 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -98,6 +98,7 @@ #include <asm/processor.h> #include <linux/atomic.h>
+#include <linux/kasan.h> #include <linux/kmemcheck.h> #include <linux/kmemleak.h> #include <linux/memory_hotplug.h> @@ -1116,7 +1117,10 @@ static bool update_checksum(struct kmemleak_object *object) if (!kmemcheck_is_obj_initialized(object->pointer, object->size)) return false;
+ kasan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); + kasan_enable_current(); + return object->checksum != old_csum; }
@@ -1167,7 +1171,9 @@ static void scan_block(void *_start, void *_end, BYTES_PER_POINTER)) continue;
+ kasan_disable_current(); pointer = *ptr; + kasan_enable_current();
object = find_and_get_object(pointer, 1); if (!object)
From: Andrey Ryabinin a.ryabinin@samsung.com
This is a test module doing various nasty things like out of bounds accesses, use after free. It is useful for testing kernel debugging features like kernel address sanitizer.
It mostly concentrates on testing of slab allocator, but we might want to add more different stuff here in future (like stack/global variables out of bounds accesses and so on).
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 3f15801cdc2379ca4bf507f48bffd788f9e508ae) Signed-off-by: Alex Shi alex.shi@linaro.org
Conflicts: skip test_rhashtable.o in lib/Makefile --- lib/Kconfig.kasan | 8 ++ lib/Makefile | 5 +- lib/test_kasan.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 lib/test_kasan.c
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index a11ac02..4d47d87 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -42,4 +42,12 @@ config KASAN_INLINE
endchoice
+config TEST_KASAN + tristate "Module for testing kasan for bug detection" + depends on m && KASAN + help + This is a test module doing various nasty things like + out of bounds accesses, use after free. It is useful for testing + kernel debugging features like kernel address sanitizer. + endif diff --git a/lib/Makefile b/lib/Makefile index 0211d2b..211fa14 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,11 +30,12 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o +obj-$(CONFIG_TEST_BPF) += test_bpf.o +obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o +obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o -obj-$(CONFIG_TEST_BPF) += test_bpf.o -obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_kasan.c b/lib/test_kasan.c new file mode 100644 index 0000000..098c08e --- /dev/null +++ b/lib/test_kasan.c @@ -0,0 +1,277 @@ +/* + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin a.ryabinin@samsung.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan test: %s " fmt, __func__ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/module.h> + +static noinline void __init kmalloc_oob_right(void) +{ + char *ptr; + size_t size = 123; + + pr_info("out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 'x'; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_left(void) +{ + char *ptr; + size_t size = 15; + + pr_info("out-of-bounds to left\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + *ptr = *(ptr - 1); + kfree(ptr); +} + +static noinline void __init kmalloc_node_oob_right(void) +{ + char *ptr; + size_t size = 4096; + + pr_info("kmalloc_node(): out-of-bounds to right\n"); + ptr = kmalloc_node(size, GFP_KERNEL, 0); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_large_oob_rigth(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc large allocation: out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_krealloc_more(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 19; + + pr_info("out-of-bounds after krealloc more\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + + ptr2[size2] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_krealloc_less(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 15; + + pr_info("out-of-bounds after krealloc less\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + ptr2[size1] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_16(void) +{ + struct { + u64 words[2]; + } *ptr1, *ptr2; + + pr_info("kmalloc out-of-bounds for 16-bytes access\n"); + ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + kfree(ptr2); + return; + } + *ptr1 = *ptr2; + kfree(ptr1); + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_in_memset(void) +{ + char *ptr; + size_t size = 666; + + pr_info("out-of-bounds in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr, 0, size+5); + kfree(ptr); +} + +static noinline void __init kmalloc_uaf(void) +{ + char *ptr; + size_t size = 10; + + pr_info("use-after-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + *(ptr + 8) = 'x'; +} + +static noinline void __init kmalloc_uaf_memset(void) +{ + char *ptr; + size_t size = 33; + + pr_info("use-after-free in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + memset(ptr, 0, size); +} + +static noinline void __init kmalloc_uaf2(void) +{ + char *ptr1, *ptr2; + size_t size = 43; + + pr_info("use-after-free after another kmalloc\n"); + ptr1 = kmalloc(size, GFP_KERNEL); + if (!ptr1) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr1); + ptr2 = kmalloc(size, GFP_KERNEL); + if (!ptr2) { + pr_err("Allocation failed\n"); + return; + } + + ptr1[40] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmem_cache_oob(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache = kmem_cache_create("test_cache", + size, 0, + 0, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("out-of-bounds in kmem_cache_alloc\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + *p = p[size]; + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); +} + +static char global_array[10]; + +static noinline void __init kasan_global_oob(void) +{ + volatile int i = 3; + char *p = &global_array[ARRAY_SIZE(global_array) + i]; + + pr_info("out-of-bounds global variable\n"); + *(volatile char *)p; +} + +static noinline void __init kasan_stack_oob(void) +{ + char stack_array[10]; + volatile int i = 0; + char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; + + pr_info("out-of-bounds on stack\n"); + *(volatile char *)p; +} + +static int __init kmalloc_tests_init(void) +{ + kmalloc_oob_right(); + kmalloc_oob_left(); + kmalloc_node_oob_right(); + kmalloc_large_oob_rigth(); + kmalloc_oob_krealloc_more(); + kmalloc_oob_krealloc_less(); + kmalloc_oob_16(); + kmalloc_oob_in_memset(); + kmalloc_uaf(); + kmalloc_uaf_memset(); + kmalloc_uaf2(); + kmem_cache_oob(); + kasan_stack_oob(); + kasan_global_oob(); + return -EAGAIN; +} + +module_init(kmalloc_tests_init); +MODULE_LICENSE("GPL");
From: Andrey Ryabinin a.ryabinin@samsung.com
Recently instrumentation of builtin functions calls was removed from GCC 5.0. To check the memory accessed by such functions, userspace asan always uses interceptors for them.
So now we should do this as well. This patch declares memset/memmove/memcpy as weak symbols. In mm/kasan/kasan.c we have our own implementation of those functions which checks memory before accessing it.
Default memset/memmove/memcpy now now always have aliases with '__' prefix. For files that built without kasan instrumentation (e.g. mm/slub.c) original mem* replaced (via #define) with prefixed variants, cause we don't want to check memory accesses there.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 393f203f5fd54421fddb1e2a263f64d3876eeadb) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/x86/boot/compressed/eboot.c | 3 +-- arch/x86/boot/compressed/misc.h | 1 + arch/x86/include/asm/string_64.h | 18 +++++++++++++++++- arch/x86/kernel/x8664_ksyms_64.c | 10 ++++++++-- arch/x86/lib/memcpy_64.S | 6 ++++-- arch/x86/lib/memmove_64.S | 4 ++++ arch/x86/lib/memset_64.S | 10 ++++++---- drivers/firmware/efi/libstub/efistub.h | 4 ++++ mm/kasan/kasan.c | 29 +++++++++++++++++++++++++++++ 9 files changed, 74 insertions(+), 11 deletions(-)
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index acdf06b..2da9cef 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -13,8 +13,7 @@ #include <asm/setup.h> #include <asm/desc.h>
-#undef memcpy /* Use memcpy from misc.c */ - +#include "../string.h" #include "eboot.h"
static efi_system_table_t *sys_table; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 24e3e56..04477d6 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -7,6 +7,7 @@ * we just keep it from happening */ #undef CONFIG_PARAVIRT +#undef CONFIG_KASAN #ifdef CONFIG_X86_32 #define _ASM_X86_DESC_H 1 #endif diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 19e2c46..e466119 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,11 +27,12 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */
#define __HAVE_ARCH_MEMCPY 1 +extern void *__memcpy(void *to, const void *from, size_t len); + #ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 extern void *memcpy(void *to, const void *from, size_t len); #else -extern void *__memcpy(void *to, const void *from, size_t len); #define memcpy(dst, src, len) \ ({ \ size_t __len = (len); \ @@ -53,9 +54,11 @@ extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); +void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t count); +void *__memmove(void *dest, const void *src, size_t count);
int memcmp(const void *cs, const void *ct, size_t count); size_t strlen(const char *s); @@ -63,6 +66,19 @@ char *strcpy(char *dest, const char *src); char *strcat(char *dest, const char *src); int strcmp(const char *cs, const char *ct);
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#undef memcpy +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 0406819..37d8fa4 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -50,13 +50,19 @@ EXPORT_SYMBOL(csum_partial); #undef memset #undef memmove
+extern void *__memset(void *, int, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); extern void *memset(void *, int, __kernel_size_t); extern void *memcpy(void *, const void *, __kernel_size_t); -extern void *__memcpy(void *, const void *, __kernel_size_t); +extern void *memmove(void *, const void *, __kernel_size_t); + +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove);
EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(memmove);
#ifndef CONFIG_DEBUG_VIRTUAL diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 56313a3..89b53c9 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -53,6 +53,8 @@ .Lmemcpy_e_e: .previous
+.weak memcpy + ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC @@ -199,8 +201,8 @@ ENDPROC(__memcpy) * only outcome... */ .section .altinstructions, "a" - altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ + altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c - altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ + altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e .previous diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 65268a6..9c4b530 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,10 @@ * Output: * rax: dest */ +.weak memmove + ENTRY(memmove) +ENTRY(__memmove) CFI_STARTPROC
/* Handle more 32 bytes in loop */ @@ -220,4 +223,5 @@ ENTRY(memmove) .Lmemmove_end_forward-.Lmemmove_begin_forward, \ .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs .previous +ENDPROC(__memmove) ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2dcb3808..6f44935 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -56,6 +56,8 @@ .Lmemset_e_e: .previous
+.weak memset + ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -147,8 +149,8 @@ ENDPROC(__memset) * feature to implement the right patch order. */ .section .altinstructions,"a" - altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ - .Lfinal-memset,.Lmemset_e-.Lmemset_c - altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ - .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e + altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ + .Lfinal-__memset,.Lmemset_e-.Lmemset_c + altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ + .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e .previous diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 304ab29..d1ba39c 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -5,6 +5,10 @@ /* error code which can't be mistaken for valid address */ #define EFI_ERROR (~0UL)
+#undef memcpy +#undef memset +#undef memmove + void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index dc83f07..799c52b 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -255,6 +255,35 @@ static __always_inline void check_memory_region(unsigned long addr, kasan_report(addr, size, write, _RET_IP_); }
+void __asan_loadN(unsigned long addr, size_t size); +void __asan_storeN(unsigned long addr, size_t size); + +#undef memset +void *memset(void *addr, int c, size_t len) +{ + __asan_storeN((unsigned long)addr, len); + + return __memset(addr, c, len); +} + +#undef memmove +void *memmove(void *dest, const void *src, size_t len) +{ + __asan_loadN((unsigned long)src, len); + __asan_storeN((unsigned long)dest, len); + + return __memmove(dest, src, len); +} + +#undef memcpy +void *memcpy(void *dest, const void *src, size_t len) +{ + __asan_loadN((unsigned long)src, len); + __asan_storeN((unsigned long)dest, len); + + return __memcpy(dest, src, len); +} + void kasan_alloc_pages(struct page *page, unsigned int order) { if (likely(!PageHighMem(page)))
From: Andrey Ryabinin a.ryabinin@samsung.com
Stack instrumentation allows to detect out of bounds memory accesses for variables allocated on stack. Compiler adds redzones around every variable on stack and poisons redzones in function's prologue.
Such approach significantly increases stack usage, so all in-kernel stacks size were doubled.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit c420f167db8c799d69fe43a801c58a7f02e9d57c) Signed-off-by: Alex Shi alex.shi@linaro.org
Conflicts: skip numa_balancing in include/linux/init_task.h --- arch/x86/include/asm/page_64_types.h | 12 +++++++++--- arch/x86/kernel/Makefile | 2 ++ arch/x86/mm/kasan_init_64.c | 11 +++++++++-- include/linux/init_task.h | 8 ++++++++ mm/kasan/kasan.h | 9 +++++++++ mm/kasan/report.c | 6 ++++++ scripts/Makefile.kasan | 1 + 7 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 75450b2..4edd53b 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,17 +1,23 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H
-#define THREAD_SIZE_ORDER 2 +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1))
-#define EXCEPTION_STACK_ORDER 0 +#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
#define DOUBLEFAULT_STACK 1 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9d46ee8..2e7f86e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,6 +17,8 @@ CFLAGS_REMOVE_early_printk.o = -pg endif
KASAN_SANITIZE_head$(BITS).o := n +KASAN_SANITIZE_dumpstack.o := n +KASAN_SANITIZE_dumpstack_$(BITS).o := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 3e4d9a1..5350870 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -189,11 +189,18 @@ void __init kasan_init(void) if (map_range(&pfn_mapped[i])) panic("kasan: unable to allocate shadow!"); } - populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - (void *)KASAN_SHADOW_END); + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + NUMA_NO_NODE); + + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_VADDR), + (void *)KASAN_SHADOW_END);
memset(kasan_zero_page, 0, PAGE_SIZE);
load_cr3(init_level4_pgt); + init_task.kasan_depth = 0; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 77fc43f..a041f64 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -166,6 +166,13 @@ extern struct task_group root_task_group; # define INIT_RT_MUTEXES(tsk) #endif
+#ifdef CONFIG_KASAN +# define INIT_KASAN(tsk) \ + .kasan_depth = 1, +#else +# define INIT_KASAN(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -237,6 +244,7 @@ extern struct task_group root_task_group; INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ + INIT_KASAN(tsk) \ }
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 5b052ab..1fcc1d8 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -12,6 +12,15 @@ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */
+/* + * Stack redzone shadow values + * (Those are compiler's ABI, don't change them) + */ +#define KASAN_STACK_LEFT 0xF1 +#define KASAN_STACK_MID 0xF2 +#define KASAN_STACK_RIGHT 0xF3 +#define KASAN_STACK_PARTIAL 0xF4 +
struct kasan_access_info { const void *access_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 2760edb..866732e 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -64,6 +64,12 @@ static void print_error_description(struct kasan_access_info *info) case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; + case KASAN_STACK_LEFT: + case KASAN_STACK_MID: + case KASAN_STACK_RIGHT: + case KASAN_STACK_PARTIAL: + bug_type = "out of bounds on stack"; + break; }
pr_err("BUG: KASan: %s in %pS at addr %p\n", diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 7acd6fa..2163b8c 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -9,6 +9,7 @@ CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address
CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + --param asan-stack=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold))
ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),)
From: Andrey Ryabinin a.ryabinin@samsung.com
For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc().
__vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc().
Add a new vm_struct flag 'VM_NO_GUARD' indicating that vm area doesn't have a guard hole.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 71394fe50146202f2c8d92cf50f5ebc761acf254) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/vmalloc.h | 9 +++++++-- mm/vmalloc.c | 6 ++---- 2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b87696f..1526fe7 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_NO_GUARD 0x00000040 /* don't add guard page */ /* bits [20..32] reserved for arch specific ioremap internals */
/* @@ -96,8 +97,12 @@ void vmalloc_sync_all(void);
static inline size_t get_vm_area_size(const struct vm_struct *area) { - /* return actual size without guard page */ - return area->size - PAGE_SIZE; + if (!(area->flags & VM_NO_GUARD)) + /* return actual size without guard page */ + return area->size - PAGE_SIZE; + else + return area->size; + }
extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 90520af..2658d2c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1325,10 +1325,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, if (unlikely(!area)) return NULL;
- /* - * We always allocate a guard page. - */ - size += PAGE_SIZE; + if (!(flags & VM_NO_GUARD)) + size += PAGE_SIZE;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask); if (IS_ERR(va)) {
From: Andrey Ryabinin a.ryabinin@samsung.com
For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc().
__vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc().
Now we have VM_NO_GUARD flag disabling guard page, so we need to pass into __vmalloc_node_range(). Add new parameter 'vm_flags' to __vmalloc_node_range() function.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit cb9e3c292d0115499c660028ad35ac5501d722b5) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm/kernel/module.c | 2 +- arch/arm64/kernel/module.c | 4 ++-- arch/mips/kernel/module.c | 2 +- arch/parisc/kernel/module.c | 2 +- arch/s390/kernel/module.c | 2 +- arch/sparc/kernel/module.c | 2 +- arch/unicore32/kernel/module.c | 2 +- arch/x86/kernel/module.c | 2 +- include/linux/vmalloc.h | 4 +++- mm/vmalloc.c | 10 ++++++---- 10 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 6a4dffe..a7a3b15 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -41,7 +41,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index e366329..468c12a 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -33,8 +33,8 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, - __builtin_return_address(0)); + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); }
enum aarch64_reloc_op { diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 2a52568..1833f51 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -47,7 +47,7 @@ static DEFINE_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 50dfafc..0d498ef 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -219,7 +219,7 @@ void *module_alloc(unsigned long size) * init_data correctly */ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_RWX, NUMA_NO_NODE, + PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, __builtin_return_address(0)); }
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b89b591..411a7ee 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -50,7 +50,7 @@ void *module_alloc(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 97655e0..192a617 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -29,7 +29,7 @@ static void *module_map(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #else diff --git a/arch/unicore32/kernel/module.c b/arch/unicore32/kernel/module.c index dc41f6d..e191b34 100644 --- a/arch/unicore32/kernel/module.c +++ b/arch/unicore32/kernel/module.c @@ -25,7 +25,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); }
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e69f988..e830e61 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -88,7 +88,7 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_EXEC, NUMA_NO_NODE, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); }
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1526fe7..7d7acb3 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -76,7 +76,9 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller); + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller); + extern void vfree(const void *addr);
extern void *vmap(struct page **pages, unsigned int count, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2658d2c..50ddda06 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1620,6 +1620,7 @@ fail: * @end: vm area range end * @gfp_mask: flags for the page level allocator * @prot: protection mask for the allocated pages + * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD) * @node: node to use for allocation or NUMA_NO_NODE * @caller: caller's return address * @@ -1629,7 +1630,8 @@ fail: */ void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller) + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller) { struct vm_struct *area; void *addr; @@ -1639,8 +1641,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) goto fail;
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED, - start, end, node, gfp_mask, caller); + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | + vm_flags, start, end, node, gfp_mask, caller); if (!area) goto fail;
@@ -1689,7 +1691,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, - gfp_mask, prot, node, caller); + gfp_mask, prot, 0, node, caller); }
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
From: Andrey Ryabinin a.ryabinin@samsung.com
KASan uses constructors for initializing redzones for global variables. Globals instrumentation in GCC 4.9.2 produces constructors with priority (.init_array.00099)
Currently kernel ignores such constructors. Only constructors with default priority supported (.init_array)
This patch adds support for constructors with priorities. For kernel image we put pointers to constructors between __ctors_start/__ctors_end and do_ctors() will call them on start up. For modules we merge .init_array.* sections into resulting .init_array. Module code properly handles constructors in .init_array section.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 9ddf82521c86ae07af79dbe5a93c52890f2bab23) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/asm-generic/vmlinux.lds.h | 1 + scripts/module-common.lds | 3 +++ 2 files changed, 4 insertions(+)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa70cbd..1a1a89d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -477,6 +477,7 @@ #define KERNEL_CTORS() . = ALIGN(8); \ VMLINUX_SYMBOL(__ctors_start) = .; \ *(.ctors) \ + *(SORT(.init_array.*)) \ *(.init_array) \ VMLINUX_SYMBOL(__ctors_end) = .; #else diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 0865b3e..c25bbd1c 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -16,4 +16,7 @@ SECTIONS { __kcrctab_unused : { *(SORT(___kcrctab_unused+*)) } __kcrctab_unused_gpl : { *(SORT(___kcrctab_unused_gpl+*)) } __kcrctab_gpl_future : { *(SORT(___kcrctab_gpl_future+*)) } + + . = ALIGN(8); + .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } }
From: Andrey Ryabinin a.ryabinin@samsung.com
MODULE_DEVICE_TABLE() macro used to create aliases to device tables. Normally alias should have the same type as aliased symbol.
Device tables are arrays, so they have 'struct type##_device_id[x]' types. Alias created by MODULE_DEVICE_TABLE() will have non-array type - 'struct type##_device_id'.
This inconsistency confuses compiler, it could make a wrong assumption about variable's size which leads KASan to produce a false positive report about out of bounds access.
For every global variable compiler calls __asan_register_globals() passing information about global variable (address, size, size with redzone, name ...) __asan_register_globals() poison symbols redzone to detect possible out of bounds accesses.
When symbol has an alias __asan_register_globals() will be called as for symbol so for alias. Compiler determines size of variable by size of variable's type. Alias and symbol have the same address, so if alias have the wrong size part of memory that actually belongs to the symbol could be poisoned as redzone of alias symbol.
By fixing type of alias symbol we will fix size of it, so __asan_register_globals() will not poison valid memory.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 6301939d97d079f0d3dbe71e750f4daf5d39fc33) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/module.h b/include/linux/module.h index 71f282a..0a88d71 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -135,7 +135,7 @@ void trim_init_extable(struct module *m); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ - extern const struct type##_device_id __mod_##type##__##name##_device_table \ +extern const typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name)
From: Andrey Ryabinin a.ryabinin@samsung.com
This feature let us to detect accesses out of bounds of global variables. This will work as for globals in kernel image, so for globals in modules. Currently this won't work for symbols in user-specified sections (e.g. __init, __read_mostly, ...)
The idea of this is simple. Compiler increases each global variable by redzone size and add constructors invoking __asan_register_globals() function. Information about global variable (address, size, size with redzone ...) passed to __asan_register_globals() so we could poison variable's redzone.
This patch also forces module_alloc() to return 8*PAGE_SIZE aligned address making shadow memory handling ( kasan_module_alloc()/kasan_module_free() ) more simple. Such alignment guarantees that each shadow page backing modules address space correspond to only one module_alloc() allocation.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Konstantin Serebryany kcc@google.com Cc: Dmitry Chernenkov dmitryc@google.com Signed-off-by: Andrey Konovalov adech.fo@gmail.com Cc: Yuri Gribov tetra2005@gmail.com Cc: Konstantin Khlebnikov koct9i@gmail.com Cc: Sasha Levin sasha.levin@oracle.com Cc: Christoph Lameter cl@linux.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Dave Hansen dave.hansen@intel.com Cc: Andi Kleen andi@firstfloor.org Cc: Ingo Molnar mingo@elte.hu Cc: Thomas Gleixner tglx@linutronix.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit bebf56a1b176c2e1c9efe44e7e6915532cc682cf) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/kasan.txt | 2 +- arch/x86/kernel/module.c | 12 +++++++++- arch/x86/mm/kasan_init_64.c | 2 +- include/linux/compiler-gcc4.h | 4 ++++ include/linux/compiler-gcc5.h | 2 ++ include/linux/kasan.h | 10 +++++++++ kernel/module.c | 2 ++ lib/Kconfig.kasan | 1 + mm/kasan/kasan.c | 52 +++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 25 +++++++++++++++++++++ mm/kasan/report.c | 22 ++++++++++++++++++ scripts/Makefile.kasan | 2 +- 12 files changed, 132 insertions(+), 4 deletions(-)
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index f0645a8..092fc10 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -9,7 +9,7 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs.
KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a certain version of GCC >= 4.9.2 +therefore you will need a certain version of GCC > 4.9.2
Currently KASan is supported only for x86_64 architecture and requires that the kernel be built with the SLUB allocator. diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e830e61..d1ac80b 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -24,6 +24,7 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/kasan.h> #include <linux/bug.h> #include <linux/mm.h> #include <linux/gfp.h> @@ -83,13 +84,22 @@ static unsigned long int get_module_load_offset(void)
void *module_alloc(unsigned long size) { + void *p; + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return __vmalloc_node_range(size, 1, + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; }
#ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 5350870..4860906 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -196,7 +196,7 @@ void __init kasan_init(void) (unsigned long)kasan_mem_to_shadow(_end), NUMA_NO_NODE);
- populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_VADDR), + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END);
memset(kasan_zero_page, 0, PAGE_SIZE); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index d1a5582..769e198 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -85,3 +85,7 @@ #define __HAVE_BUILTIN_BSWAP16__ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h index c8c5659..efee493 100644 --- a/include/linux/compiler-gcc5.h +++ b/include/linux/compiler-gcc5.h @@ -63,3 +63,5 @@ #define __HAVE_BUILTIN_BSWAP64__ #define __HAVE_BUILTIN_BSWAP16__ #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#define KASAN_ABI_VERSION 4 diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d5310ee..72ba725 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -49,8 +49,15 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object);
+#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) + +int kasan_module_alloc(void *addr, size_t size); +void kasan_module_free(void *addr); + #else /* CONFIG_KASAN */
+#define MODULE_ALIGN 1 + static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
static inline void kasan_enable_current(void) {} @@ -74,6 +81,9 @@ static inline void kasan_krealloc(const void *object, size_t new_size) {} static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
+static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_module_free(void *addr) {} + #endif /* CONFIG_KASAN */
#endif /* LINUX_KASAN_H */ diff --git a/kernel/module.c b/kernel/module.c index 3da0c00..8ab423e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -57,6 +57,7 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> +#include <linux/kasan.h> #include <linux/jump_label.h> #include <linux/pfn.h> #include <linux/bsearch.h> @@ -1831,6 +1832,7 @@ static void unset_module_init_ro_nx(struct module *mod) { } void __weak module_free(struct module *mod, void *module_region) { vfree(module_region); + kasan_module_free(module_region); }
void __weak module_arch_cleanup(struct module *mod) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4d47d87..4fecaedc 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB_DEBUG + select CONSTRUCTORS help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 799c52b..78fee63 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -22,6 +22,7 @@ #include <linux/memblock.h> #include <linux/memory.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/printk.h> #include <linux/sched.h> #include <linux/slab.h> @@ -395,6 +396,57 @@ void kasan_kfree_large(const void *ptr) KASAN_FREE_PAGE); }
+int kasan_module_alloc(void *addr, size_t size) +{ + void *ret; + size_t shadow_size; + unsigned long shadow_start; + + shadow_start = (unsigned long)kasan_mem_to_shadow(addr); + shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, + PAGE_SIZE); + + if (WARN_ON(!PAGE_ALIGNED(shadow_start))) + return -EINVAL; + + ret = __vmalloc_node_range(shadow_size, 1, shadow_start, + shadow_start + shadow_size, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, + __builtin_return_address(0)); + return ret ? 0 : -ENOMEM; +} + +void kasan_module_free(void *addr) +{ + vfree(kasan_mem_to_shadow(addr)); +} + +static void register_global(struct kasan_global *global) +{ + size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE); + + kasan_unpoison_shadow(global->beg, global->size); + + kasan_poison_shadow(global->beg + aligned_size, + global->size_with_redzone - aligned_size, + KASAN_GLOBAL_REDZONE); +} + +void __asan_register_globals(struct kasan_global *globals, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + register_global(&globals[i]); +} +EXPORT_SYMBOL(__asan_register_globals); + +void __asan_unregister_globals(struct kasan_global *globals, size_t size) +{ +} +EXPORT_SYMBOL(__asan_unregister_globals); + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1fcc1d8..4986b0a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -11,6 +11,7 @@ #define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ +#define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */
/* * Stack redzone shadow values @@ -21,6 +22,10 @@ #define KASAN_STACK_RIGHT 0xF3 #define KASAN_STACK_PARTIAL 0xF4
+/* Don't break randconfig/all*config builds */ +#ifndef KASAN_ABI_VERSION +#define KASAN_ABI_VERSION 1 +#endif
struct kasan_access_info { const void *access_addr; @@ -30,6 +35,26 @@ struct kasan_access_info { unsigned long ip; };
+/* The layout of struct dictated by compiler */ +struct kasan_source_location { + const char *filename; + int line_no; + int column_no; +}; + +/* The layout of struct dictated by compiler */ +struct kasan_global { + const void *beg; /* Address of the beginning of the global variable. */ + size_t size; /* Size of the global variable. */ + size_t size_with_redzone; /* Size of the variable + size of the red zone. 32 bytes aligned */ + const void *name; + const void *module_name; /* Name of the module where the global variable is declared. */ + unsigned long has_dynamic_init; /* This needed for C++ */ +#if KASAN_ABI_VERSION >= 4 + struct kasan_source_location *location; +#endif +}; + void kasan_report_error(struct kasan_access_info *info); void kasan_report_user_access(struct kasan_access_info *info);
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 866732e..680ceed 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -23,6 +23,8 @@ #include <linux/types.h> #include <linux/kasan.h>
+#include <asm/sections.h> + #include "kasan.h" #include "../slab.h"
@@ -61,6 +63,7 @@ static void print_error_description(struct kasan_access_info *info) break; case KASAN_PAGE_REDZONE: case KASAN_KMALLOC_REDZONE: + case KASAN_GLOBAL_REDZONE: case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -80,6 +83,20 @@ static void print_error_description(struct kasan_access_info *info) info->access_size, current->comm, task_pid_nr(current)); }
+static inline bool kernel_or_module_addr(const void *addr) +{ + return (addr >= (void *)_stext && addr < (void *)_end) + || (addr >= (void *)MODULES_VADDR + && addr < (void *)MODULES_END); +} + +static inline bool init_task_stack_addr(const void *addr) +{ + return addr >= (void *)&init_thread_union.stack && + (addr <= (void *)&init_thread_union.stack + + sizeof(init_thread_union.stack)); +} + static void print_address_description(struct kasan_access_info *info) { const void *addr = info->access_addr; @@ -107,6 +124,11 @@ static void print_address_description(struct kasan_access_info *info) dump_page(page, "kasan: bad access detected"); }
+ if (kernel_or_module_addr(addr)) { + if (!init_task_stack_addr(addr)) + pr_err("Address belongs to variable %pS\n", addr); + } + dump_stack(); }
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 2163b8c..631619b 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -9,7 +9,7 @@ CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address
CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ - --param asan-stack=1 \ + --param asan-stack=1 --param asan-globals=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold))
ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),)
From: Andrey Ryabinin a.ryabinin@samsung.com
Current approach in handling shadow memory for modules is broken.
Shadow memory could be freed only after memory shadow corresponds it is no longer used. vfree() called from interrupt context could use memory its freeing to store 'struct llist_node' in it:
void vfree(const void *addr) { ... if (unlikely(in_interrupt())) { struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred); if (llist_add((struct llist_node *)addr, &p->list)) schedule_work(&p->wq);
Later this list node used in free_work() which actually frees memory. Currently module_memfree() called in interrupt context will free shadow before freeing module's memory which could provoke kernel crash.
So shadow memory should be freed after module's memory. However, such deallocation order could race with kasan_module_alloc() in module_alloc().
Free shadow right before releasing vm area. At this point vfree()'d memory is not used anymore and yet not available for other allocations. New VM_KASAN flag used to indicate that vm area has dynamically allocated shadow memory so kasan frees shadow only if it was previously allocated.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Acked-by: Rusty Russell rusty@rustcorp.com.au Cc: Dmitry Vyukov dvyukov@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit a5af5aa8b67dfdba36c853b70564fd2dfe73d478) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/kasan.h | 5 +++-- include/linux/vmalloc.h | 1 + kernel/module.c | 2 -- mm/kasan/kasan.c | 14 +++++++++++--- mm/vmalloc.c | 1 + 5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 72ba725..5fa48a2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -5,6 +5,7 @@
struct kmem_cache; struct page; +struct vm_struct;
#ifdef CONFIG_KASAN
@@ -52,7 +53,7 @@ void kasan_slab_free(struct kmem_cache *s, void *object); #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
int kasan_module_alloc(void *addr, size_t size); -void kasan_module_free(void *addr); +void kasan_free_shadow(const struct vm_struct *vm);
#else /* CONFIG_KASAN */
@@ -82,7 +83,7 @@ static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_module_free(void *addr) {} +static inline void kasan_free_shadow(const struct vm_struct *vm) {}
#endif /* CONFIG_KASAN */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 7d7acb3..0ec5983 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -17,6 +17,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ /* bits [20..32] reserved for arch specific ioremap internals */
/* diff --git a/kernel/module.c b/kernel/module.c index 8ab423e..3da0c00 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -57,7 +57,6 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> -#include <linux/kasan.h> #include <linux/jump_label.h> #include <linux/pfn.h> #include <linux/bsearch.h> @@ -1832,7 +1831,6 @@ static void unset_module_init_ro_nx(struct module *mod) { } void __weak module_free(struct module *mod, void *module_region) { vfree(module_region); - kasan_module_free(module_region); }
void __weak module_arch_cleanup(struct module *mod) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 78fee63..936d816 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -29,6 +29,7 @@ #include <linux/stacktrace.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/vmalloc.h> #include <linux/kasan.h>
#include "kasan.h" @@ -414,12 +415,19 @@ int kasan_module_alloc(void *addr, size_t size) GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, __builtin_return_address(0)); - return ret ? 0 : -ENOMEM; + + if (ret) { + find_vm_area(addr)->flags |= VM_KASAN; + return 0; + } + + return -ENOMEM; }
-void kasan_module_free(void *addr) +void kasan_free_shadow(const struct vm_struct *vm) { - vfree(kasan_mem_to_shadow(addr)); + if (vm->flags & VM_KASAN) + vfree(kasan_mem_to_shadow(vm->addr)); }
static void register_global(struct kasan_global *global) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 50ddda06..4d7725f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1419,6 +1419,7 @@ struct vm_struct *remove_vm_area(const void *addr) spin_unlock(&vmap_area_lock);
vmap_debug_free_range(va->va_start, va->va_end); + kasan_free_shadow(vm); free_unmap_vmap_area(va); vm->size -= PAGE_SIZE;
From: Andrey Ryabinin a.ryabinin@samsung.com
include/linux/moduleloader.h is more suitable place for this macro. Also change alignment to PAGE_SIZE for CONFIG_KASAN=n as such alignment already assumed in several places.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Dmitry Vyukov dvyukov@google.com Acked-by: Rusty Russell rusty@rustcorp.com.au Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit d3733e5c98e952d419e77fa721912f09d15a2806) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/kasan.h | 4 ---- include/linux/moduleloader.h | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5fa48a2..5bb0744 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -50,15 +50,11 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object);
-#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) - int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm);
#else /* CONFIG_KASAN */
-#define MODULE_ALIGN 1 - static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
static inline void kasan_enable_current(void) {} diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 7eeb9bb..94f268e 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -82,4 +82,11 @@ int module_finalize(const Elf_Ehdr *hdr, /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod);
+#ifdef CONFIG_KASAN +#include <linux/kasan.h> +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else +#define MODULE_ALIGN PAGE_SIZE +#endif + #endif
From: Andrey Ryabinin a.ryabinin@samsung.com
It might be annoying to constantly see this:
scripts/Makefile.kasan:16: Cannot use CONFIG_KASAN: -fsanitize=kernel-address is not supported by compiler
while performing allmodconfig/allyesconfig build tests. Disable this warning if CONFIG_COMPILE_TEST=y.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Cc: Michal Marek mmarek@suse.cz Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 6e54abac1b8e0b7febffdbad37b605daef1cfcff) Signed-off-by: Alex Shi alex.shi@linaro.org --- scripts/Makefile.kasan | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 631619b..3f874d2 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -13,12 +13,16 @@ CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ --param asan-instrumentation-with-call-threshold=$(call_threshold))
ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) + ifneq ($(CONFIG_COMPILE_TEST),y) $(warning Cannot use CONFIG_KASAN: \ -fsanitize=kernel-address is not supported by compiler) + endif else ifeq ($(CFLAGS_KASAN),) - $(warning CONFIG_KASAN: compiler does not support all options.\ - Trying minimal configuration) + ifneq ($(CONFIG_COMPILE_TEST),y) + $(warning CONFIG_KASAN: compiler does not support all options.\ + Trying minimal configuration) + endif CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) endif endif
From: Joe Perches joe@perches.com
The documentation shows a need for gcc > 4.9.2, but it's really >=. The Kconfig entries don't show require versions so add them. Correct a latter/later typo too. Also mention that gcc 5 required to catch out of bounds accesses to global and stack variables.
Signed-off-by: Joe Perches joe@perches.com Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 01e76903f655a4d88c2e09d3182436c65f6e1213) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/kasan.txt | 8 +++++--- lib/Kconfig.kasan | 8 ++++++-- 2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index 092fc10..4692241 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -9,7 +9,9 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs.
KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a certain version of GCC > 4.9.2 +therefore you will need a gcc version of 4.9.2 or later. KASan could detect out +of bounds accesses to stack or global variables, but only if gcc 5.0 or later was +used to built the kernel.
Currently KASan is supported only for x86_64 architecture and requires that the kernel be built with the SLUB allocator. @@ -23,8 +25,8 @@ To enable KASAN configure kernel with:
and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline is compiler instrumentation types. The former produces smaller binary the -latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or -latter. +latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version +of 5.0 or later.
Currently KASAN works only with the SLUB memory allocator. For better bug detection and nicer report, enable CONFIG_STACKTRACE and put diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4fecaedc..777eda7 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -10,8 +10,11 @@ config KASAN help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. - This is strictly debugging feature. It consumes about 1/8 - of available memory and brings about ~x3 performance slowdown. + This is strictly a debugging feature and it requires a gcc version + of 4.9.2 or later. Detection of out of bounds accesses to stack or + global variables requires gcc 5.0 or later. + This feature consumes about 1/8 of available memory and brings about + ~x3 performance slowdown. For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline.
@@ -40,6 +43,7 @@ config KASAN_INLINE memory accesses. This is faster than outline (in some workloads it gives about x2 boost over outline instrumentation), but make kernel's .text size much bigger. + This requires a gcc version of 5.0 or later.
endchoice
From: Wang Long long.wanglong@huawei.com
Remove duplicate definition of the macro KASAN_FREE_PAGE in mm/kasan/kasan.h
Signed-off-by: Wang Long long.wanglong@huawei.com Acked-by: Andrey Ryabinin a.ryabinin@samsung.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit bffacb9132a306b7e22bb6366e5b277f20f67465) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.h | 1 - 1 file changed, 1 deletion(-)
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 4986b0a..c242adf 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,7 +7,6 @@ #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
#define KASAN_FREE_PAGE 0xFF /* page was freed */ -#define KASAN_FREE_PAGE 0xFF /* page was freed */ #define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */
From: Andrey Ryabinin ryabinin.a.a@gmail.com
Current definition of KASAN_SHADOW_OFFSET in include/linux/kasan.h will not work for upcomming arm64, so move it to the arch header.
Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Alexander Potapenko glider@google.com Cc: Alexey Klimov klimov.linux@gmail.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Cc: Arnd Bergmann arnd@arndb.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: David Keitel dkeitel@codeaurora.org Cc: Dmitry Vyukov dvyukov@google.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Linus Walleij linus.walleij@linaro.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rik van Riel riel@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Will Deacon will.deacon@arm.com Cc: Yury yury.norov@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1439444244-26057-2-git-send-email-ryabinin.a.a@gmai... Signed-off-by: Ingo Molnar mingo@kernel.org (cherry picked from commit 920e277e17f12870188f4564887a95ae9ac03e31) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/x86/include/asm/kasan.h | 3 +++ include/linux/kasan.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422..491e4fd 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -1,6 +1,9 @@ #ifndef _ASM_X86_KASAN_H #define _ASM_X86_KASAN_H
+#include <linux/const.h> +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + /* * Compiler uses shadow offset assuming that addresses start * from 0. Kernel addresses don't start from 0, so shadow diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5bb0744..788743f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -10,7 +10,6 @@ struct vm_struct; #ifdef CONFIG_KASAN
#define KASAN_SHADOW_SCALE_SHIFT 3 -#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
#include <asm/kasan.h> #include <linux/sched.h>
From: Andrey Ryabinin ryabinin.a.a@gmail.com
Introduce generic kasan_populate_zero_shadow(shadow_start, shadow_end). This function maps kasan_zero_page to the [shadow_start, shadow_end] addresses.
This replaces x86_64 specific populate_zero_shadow() and will be used for ARM64 in follow on patches.
The main changes from original version are:
* Use p?d_populate*() instead of set_p?d() * Use memblock allocator directly instead of vmemmap_alloc_block() * __pa() instead of __pa_nodebug(). __pa() causes troubles iff we use it before kasan_early_init(). kasan_populate_zero_shadow() will be used later, so we ok with __pa() here.
Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com Acked-by: Catalin Marinas catalin.marinas@arm.com Cc: Alexander Potapenko glider@google.com Cc: Alexey Klimov klimov.linux@gmail.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Cc: Arnd Bergmann arnd@arndb.de Cc: David Keitel dkeitel@codeaurora.org Cc: Dmitry Vyukov dvyukov@google.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Linus Walleij linus.walleij@linaro.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rik van Riel riel@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Will Deacon will.deacon@arm.com Cc: Yury yury.norov@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1439444244-26057-3-git-send-email-ryabinin.a.a@gmai... Signed-off-by: Ingo Molnar mingo@kernel.org (cherry picked from commit 69786cdb379bbc6eab14cf2393c1abd879316e85) Signed-off-by: Alex Shi alex.shi@linaro.org
Conflicts: don't care arch/x86/mm/kasan_init_64.c --- arch/x86/mm/kasan_init_64.c | 9 +-- include/linux/kasan.h | 9 +++ mm/kasan/Makefile | 2 +- mm/kasan/kasan_init.c | 152 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 mm/kasan/kasan_init.c
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906..d858afe 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -179,7 +179,7 @@ void __init kasan_init(void)
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
- populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, kasan_mem_to_shadow((void *)PAGE_OFFSET));
for (i = 0; i < E820_X_MAX; i++) { @@ -189,14 +189,15 @@ void __init kasan_init(void) if (map_range(&pfn_mapped[i])) panic("kasan: unable to allocate shadow!"); } - populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); + kasan_populate_zero_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + kasan_mem_to_shadow((void *)__START_KERNEL_map));
vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), (unsigned long)kasan_mem_to_shadow(_end), NUMA_NO_NODE);
- populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END);
memset(kasan_zero_page, 0, PAGE_SIZE); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 788743f..f4ffd04 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -12,8 +12,17 @@ struct vm_struct; #define KASAN_SHADOW_SCALE_SHIFT 3
#include <asm/kasan.h> +#include <asm/pgtable.h> #include <linux/sched.h>
+extern unsigned char kasan_zero_page[PAGE_SIZE]; +extern pte_t kasan_zero_pte[PTRS_PER_PTE]; +extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; +extern pud_t kasan_zero_pud[PTRS_PER_PUD]; + +void kasan_populate_zero_shadow(const void *shadow_start, + const void *shadow_end); + static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index bd837b8..6471014 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -5,4 +5,4 @@ CFLAGS_REMOVE_kasan.o = -pg # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
-obj-y := kasan.o report.o +obj-y := kasan.o report.o kasan_init.o diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c new file mode 100644 index 0000000..3f9a41c --- /dev/null +++ b/mm/kasan/kasan_init.c @@ -0,0 +1,152 @@ +/* + * This file contains some kasan initialization code. + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin ryabinin.a.a@gmail.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/bootmem.h> +#include <linux/init.h> +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/pfn.h> + +#include <asm/page.h> +#include <asm/pgalloc.h> + +/* + * This page serves two purposes: + * - It used as early shadow memory. The entire shadow region populated + * with this page, before we will be able to setup normal shadow memory. + * - Latter it reused it as zero shadow to cover large ranges of memory + * that allowed to access, but not handled by kasan (vmalloc/vmemmap ...). + */ +unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; + +#if CONFIG_PGTABLE_LEVELS > 3 +pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif +#if CONFIG_PGTABLE_LEVELS > 2 +pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +static __init void *early_alloc(size_t size, int node) +{ + return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), + BOOTMEM_ALLOC_ACCESSIBLE, node); +} + +static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + pte_t zero_pte; + + zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL); + zero_pte = pte_wrprotect(zero_pte); + + while (addr + PAGE_SIZE <= end) { + set_pte_at(&init_mm, addr, pte, zero_pte); + addr += PAGE_SIZE; + pte = pte_offset_kernel(pmd, addr); + } +} + +static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, + unsigned long end) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { + next = pmd_addr_end(addr, end); + + if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + continue; + } + + if (pmd_none(*pmd)) { + pmd_populate_kernel(&init_mm, pmd, + early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + } + zero_pte_populate(pmd, addr, next); + } while (pmd++, addr = next, addr != end); +} + +static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { + pmd_t *pmd; + + pud_populate(&init_mm, pud, kasan_zero_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + continue; + } + + if (pud_none(*pud)) { + pud_populate(&init_mm, pud, + early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + } + zero_pmd_populate(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +/** + * kasan_populate_zero_shadow - populate shadow memory region with + * kasan_zero_page + * @shadow_start - start of the memory range to populate + * @shadow_end - end of the memory range to populate + */ +void __init kasan_populate_zero_shadow(const void *shadow_start, + const void *shadow_end) +{ + unsigned long addr = (unsigned long)shadow_start; + unsigned long end = (unsigned long)shadow_end; + pgd_t *pgd = pgd_offset_k(addr); + unsigned long next; + + do { + next = pgd_addr_end(addr, end); + + if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + pud_t *pud; + pmd_t *pmd; + + /* + * kasan_zero_pud should be populated with pmds + * at this moment. + * [pud,pmd]_populate*() below needed only for + * 3,2 - level page tables where we don't have + * puds,pmds, so pgd_populate(), pud_populate() + * is noops. + */ + pgd_populate(&init_mm, pgd, kasan_zero_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, kasan_zero_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + continue; + } + + if (pgd_none(*pgd)) { + pgd_populate(&init_mm, pgd, + early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + } + zero_pud_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +}
From: David Rientjes rientjes@google.com
Allocating a large number of elements in atomic context could quickly deplete memory reserves, so just disallow atomic resizing entirely.
Nothing currently uses mempool_resize() with anything other than GFP_KERNEL, so convert existing callers to drop the gfp_mask.
[akpm@linux-foundation.org: coding-style fixes] Signed-off-by: David Rientjes rientjes@google.com Acked-by: Steffen Maier maier@linux.vnet.ibm.com [zfcp] Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Steve French sfrench@samba.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
(cherry picked from commit 11d83360452ea2a95e699da01f8e1bcc4676a5de) Signed-off-by: Alex Shi alex.shi@linaro.org --- drivers/s390/scsi/zfcp_erp.c | 4 ++-- fs/cifs/connect.c | 6 ++---- include/linux/mempool.h | 2 +- mm/mempool.c | 10 ++++++---- 4 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index c82fe65..f263e8c 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -738,11 +738,11 @@ static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *act) return ZFCP_ERP_FAILED;
if (mempool_resize(act->adapter->pool.sr_data, - act->adapter->stat_read_buf_num, GFP_KERNEL)) + act->adapter->stat_read_buf_num)) return ZFCP_ERP_FAILED;
if (mempool_resize(act->adapter->pool.status_read_req, - act->adapter->stat_read_buf_num, GFP_KERNEL)) + act->adapter->stat_read_buf_num)) return ZFCP_ERP_FAILED;
atomic_set(&act->adapter->stat_miss, act->adapter->stat_read_buf_num); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 82ebe7d..c750973 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
length = atomic_dec_return(&tcpSesAllocCount); if (length > 0) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); }
static int @@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
length = atomic_inc_return(&tcpSesAllocCount); if (length > 1) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable(); while (server->tcpStatus != CifsExiting) { diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 39ed62a..b19b302 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -29,7 +29,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int nid);
-extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); +extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); extern void mempool_free(void *element, mempool_t *pool); diff --git a/mm/mempool.c b/mm/mempool.c index e209c98..949970d 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -113,23 +113,24 @@ EXPORT_SYMBOL(mempool_create_node); * mempool_create(). * @new_min_nr: the new minimum number of elements guaranteed to be * allocated for this pool. - * @gfp_mask: the usual allocation bitmask. * * This function shrinks/grows the pool. In the case of growing, * it cannot be guaranteed that the pool will be grown to the new * size immediately, but new mempool_free() calls will refill it. + * This function may sleep. * * Note, the caller must guarantee that no mempool_destroy is called * while this function is running. mempool_alloc() & mempool_free() * might be called (eg. from IRQ contexts) while this function executes. */ -int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) +int mempool_resize(mempool_t *pool, int new_min_nr) { void *element; void **new_elements; unsigned long flags;
BUG_ON(new_min_nr <= 0); + might_sleep();
spin_lock_irqsave(&pool->lock, flags); if (new_min_nr <= pool->min_nr) { @@ -145,7 +146,8 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) spin_unlock_irqrestore(&pool->lock, flags);
/* Grow the pool */ - new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); + new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), + GFP_KERNEL); if (!new_elements) return -ENOMEM;
@@ -164,7 +166,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
while (pool->curr_nr < pool->min_nr) { spin_unlock_irqrestore(&pool->lock, flags); - element = pool->alloc(gfp_mask, pool->pool_data); + element = pool->alloc(GFP_KERNEL, pool->pool_data); if (!element) goto out; spin_lock_irqsave(&pool->lock, flags);
From: David Rientjes rientjes@google.com
All occurrences of mempools based on slab caches with object constructors have been removed from the tree, so disallow creating them.
We can only dereference mem->ctor in mm/mempool.c without including mm/slab.h in include/linux/mempool.h. So simply note the restriction, just like the comment restricting usage of __GFP_ZERO, and warn on kernels with CONFIG_DEBUG_VM() if such a mempool is allocated from.
We don't want to incur this check on every element allocation, so use VM_BUG_ON().
Signed-off-by: David Rientjes rientjes@google.com Cc: Dave Kleikamp shaggy@kernel.org Cc: Christoph Hellwig hch@lst.de Cc: Sebastian Ott sebott@linux.vnet.ibm.com Cc: Mikulas Patocka mpatocka@redhat.com Cc: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit e244c9e66f6197f55f6fbb2d5e70714e262cc595) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/mempool.h | 3 ++- mm/mempool.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/mempool.h b/include/linux/mempool.h index b19b302..69b6951 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -36,7 +36,8 @@ extern void mempool_free(void *element, mempool_t *pool);
/* * A mempool_alloc_t and mempool_free_t that get the memory from - * a slab that is passed in through pool_data. + * a slab cache that is passed in through pool_data. + * Note: the slab cache may not have a ctor function. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); diff --git a/mm/mempool.c b/mm/mempool.c index 949970d..b60fb85 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -15,6 +15,7 @@ #include <linux/mempool.h> #include <linux/blkdev.h> #include <linux/writeback.h> +#include "slab.h"
static void add_element(mempool_t *pool, void *element) { @@ -334,6 +335,7 @@ EXPORT_SYMBOL(mempool_free); void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { struct kmem_cache *mem = pool_data; + VM_BUG_ON(mem->ctor); return kmem_cache_alloc(mem, gfp_mask); } EXPORT_SYMBOL(mempool_alloc_slab);
From: David Rientjes rientjes@google.com
Mempools keep elements in a reserved pool for contexts in which allocation may not be possible. When an element is allocated from the reserved pool, its memory contents is the same as when it was added to the reserved pool.
Because of this, elements lack any free poisoning to detect use-after-free errors.
This patch adds free poisoning for elements backed by the slab allocator. This is possible because the mempool layer knows the object size of each element.
When an element is added to the reserved pool, it is poisoned with POISON_FREE. When it is removed from the reserved pool, the contents are checked for POISON_FREE. If there is a mismatch, a warning is emitted to the kernel log.
This is only effective for configs with CONFIG_DEBUG_SLAB or CONFIG_SLUB_DEBUG_ON.
[fabio.estevam@freescale.com: use '%zu' for printing 'size_t' variable] [arnd@arndb.de: add missing include] Signed-off-by: David Rientjes rientjes@google.com Cc: Dave Kleikamp shaggy@kernel.org Cc: Christoph Hellwig hch@lst.de Cc: Sebastian Ott sebott@linux.vnet.ibm.com Cc: Mikulas Patocka mpatocka@redhat.com Cc: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Fabio Estevam fabio.estevam@freescale.com Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
(cherry picked from commit bdfedb76f4f5aa5e37380e3b71adee4a39f30fc6) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/mempool.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 2 deletions(-)
diff --git a/mm/mempool.c b/mm/mempool.c index b60fb85..2884d5b 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -6,10 +6,12 @@ * extreme VM load. * * started by Ingo Molnar, Copyright (C) 2001 + * debugging by David Rientjes, Copyright (C) 2015 */
#include <linux/mm.h> #include <linux/slab.h> +#include <linux/highmem.h> #include <linux/kmemleak.h> #include <linux/export.h> #include <linux/mempool.h> @@ -17,16 +19,102 @@ #include <linux/writeback.h> #include "slab.h"
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON) +static void poison_error(mempool_t *pool, void *element, size_t size, + size_t byte) +{ + const int nr = pool->curr_nr; + const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0); + const int end = min_t(int, byte + (BITS_PER_LONG / 8), size); + int i; + + pr_err("BUG: mempool element poison mismatch\n"); + pr_err("Mempool %p size %zu\n", pool, size); + pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : ""); + for (i = start; i < end; i++) + pr_cont("%x ", *(u8 *)(element + i)); + pr_cont("%s\n", end < size ? "..." : ""); + dump_stack(); +} + +static void __check_element(mempool_t *pool, void *element, size_t size) +{ + u8 *obj = element; + size_t i; + + for (i = 0; i < size; i++) { + u8 exp = (i < size - 1) ? POISON_FREE : POISON_END; + + if (obj[i] != exp) { + poison_error(pool, element, size, i); + return; + } + } + memset(obj, POISON_INUSE, size); +} + +static void check_element(mempool_t *pool, void *element) +{ + /* Mempools backed by slab allocator */ + if (pool->free == mempool_free_slab || pool->free == mempool_kfree) + __check_element(pool, element, ksize(element)); + + /* Mempools backed by page allocator */ + if (pool->free == mempool_free_pages) { + int order = (int)(long)pool->pool_data; + void *addr = kmap_atomic((struct page *)element); + + __check_element(pool, addr, 1UL << (PAGE_SHIFT + order)); + kunmap_atomic(addr); + } +} + +static void __poison_element(void *element, size_t size) +{ + u8 *obj = element; + + memset(obj, POISON_FREE, size - 1); + obj[size - 1] = POISON_END; +} + +static void poison_element(mempool_t *pool, void *element) +{ + /* Mempools backed by slab allocator */ + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + __poison_element(element, ksize(element)); + + /* Mempools backed by page allocator */ + if (pool->alloc == mempool_alloc_pages) { + int order = (int)(long)pool->pool_data; + void *addr = kmap_atomic((struct page *)element); + + __poison_element(addr, 1UL << (PAGE_SHIFT + order)); + kunmap_atomic(addr); + } +} +#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ +static inline void check_element(mempool_t *pool, void *element) +{ +} +static inline void poison_element(mempool_t *pool, void *element) +{ +} +#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ + static void add_element(mempool_t *pool, void *element) { BUG_ON(pool->curr_nr >= pool->min_nr); + poison_element(pool, element); pool->elements[pool->curr_nr++] = element; }
static void *remove_element(mempool_t *pool) { - BUG_ON(pool->curr_nr <= 0); - return pool->elements[--pool->curr_nr]; + void *element = pool->elements[--pool->curr_nr]; + + BUG_ON(pool->curr_nr < 0); + check_element(pool, element); + return element; }
/**
From: Andrey Ryabinin a.ryabinin@samsung.com
Mempools keep allocated objects in reserved for situations when ordinary allocation may not be possible to satisfy. These objects shouldn't be accessed before they leave the pool.
This patch poison elements when get into the pool and unpoison when they leave it. This will let KASan to detect use-after-free of mempool's elements.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Tested-by: David Rientjes rientjes@google.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Dmitry Chernenkov drcheren@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 923936157b158f36bd6a3d86496dce82b1a957de) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/kasan.h | 2 ++ mm/kasan/kasan.c | 13 +++++++++++++ mm/mempool.c | 23 +++++++++++++++++++++++ 3 files changed, 38 insertions(+)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f4ffd04..4b9f85c 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,6 +52,7 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object);
void kasan_kmalloc_large(const void *ptr, size_t size); void kasan_kfree_large(const void *ptr); +void kasan_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); void kasan_krealloc(const void *object, size_t new_size);
@@ -79,6 +80,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
static inline void kasan_kmalloc_large(void *ptr, size_t size) {} static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size) {} static inline void kasan_krealloc(const void *object, size_t new_size) {} diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 936d816..6c513a6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -389,6 +389,19 @@ void kasan_krealloc(const void *object, size_t size) kasan_kmalloc(page->slab_cache, object, size); }
+void kasan_kfree(void *ptr) +{ + struct page *page; + + page = virt_to_head_page(ptr); + + if (unlikely(!PageSlab(page))) + kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), + KASAN_FREE_PAGE); + else + kasan_slab_free(page->slab_cache, ptr); +} + void kasan_kfree_large(const void *ptr) { struct page *page = virt_to_page(ptr); diff --git a/mm/mempool.c b/mm/mempool.c index 2884d5b..2cc08de 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/highmem.h> +#include <linux/kasan.h> #include <linux/kmemleak.h> #include <linux/export.h> #include <linux/mempool.h> @@ -101,10 +102,31 @@ static inline void poison_element(mempool_t *pool, void *element) } #endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+static void kasan_poison_element(mempool_t *pool, void *element) +{ + if (pool->alloc == mempool_alloc_slab) + kasan_slab_free(pool->pool_data, element); + if (pool->alloc == mempool_kmalloc) + kasan_kfree(element); + if (pool->alloc == mempool_alloc_pages) + kasan_free_pages(element, (unsigned long)pool->pool_data); +} + +static void kasan_unpoison_element(mempool_t *pool, void *element) +{ + if (pool->alloc == mempool_alloc_slab) + kasan_slab_alloc(pool->pool_data, element); + if (pool->alloc == mempool_kmalloc) + kasan_krealloc(element, (size_t)pool->pool_data); + if (pool->alloc == mempool_alloc_pages) + kasan_alloc_pages(element, (unsigned long)pool->pool_data); +} + static void add_element(mempool_t *pool, void *element) { BUG_ON(pool->curr_nr >= pool->min_nr); poison_element(pool, element); + kasan_poison_element(pool, element); pool->elements[pool->curr_nr++] = element; }
@@ -114,6 +136,7 @@ static void *remove_element(mempool_t *pool)
BUG_ON(pool->curr_nr < 0); check_element(pool, element); + kasan_unpoison_element(pool, element); return element; }
From: Sergey Senozhatsky sergey.senozhatsky@gmail.com
mempool_destroy() does not tolerate a NULL mempool_t pointer argument and performs a NULL-pointer dereference. This requires additional attention and effort from developers/reviewers and forces all mempool_destroy() callers to do a NULL check
if (pool) mempool_destroy(pool);
Or, otherwise, be invalid mempool_destroy() users.
Tweak mempool_destroy() and NULL-check the pointer there.
Proposed by Andrew Morton.
Link: https://lkml.org/lkml/2015/6/8/583 Signed-off-by: Sergey Senozhatsky sergey.senozhatsky@gmail.com Acked-by: David Rientjes rientjes@google.com Cc: Julia Lawall julia.lawall@lip6.fr Cc: Joe Perches joe@perches.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 4e3ca3e033d1eea62fa16c3fdbef4f20427bd0de) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/mempool.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/mm/mempool.c b/mm/mempool.c index 2cc08de..4c533bc 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -150,6 +150,9 @@ static void *remove_element(mempool_t *pool) */ void mempool_destroy(mempool_t *pool) { + if (unlikely(!pool)) + return; + while (pool->curr_nr) { void *element = remove_element(pool); pool->free(element, pool->pool_data);
From: Matthew Dawson matthew@mjdsystems.ca
When removing an element from the mempool, mark it as unpoisoned in KASAN before verifying its contents for SLUB/SLAB debugging. Otherwise KASAN will flag the reads checking the element use-after-free writes as use-after-free reads.
Signed-off-by: Matthew Dawson matthew@mjdsystems.ca Acked-by: Andrey Ryabinin aryabinin@virtuozzo.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 7640131032db9118a78af715ac77ba2debeeb17c) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/mempool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/mempool.c b/mm/mempool.c index 4c533bc..5e18249 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -135,8 +135,8 @@ static void *remove_element(mempool_t *pool) void *element = pool->elements[--pool->curr_nr];
BUG_ON(pool->curr_nr < 0); - check_element(pool, element); kasan_unpoison_element(pool, element); + check_element(pool, element); return element; }
From: Andrey Ryabinin a.ryabinin@samsung.com
Mempools keep allocated objects in reserved for situations when ordinary allocation may not be possible to satisfy. These objects shouldn't be accessed before they leave the pool.
This patch poison elements when get into the pool and unpoison when they leave it. This will let KASan to detect use-after-free of mempool's elements.
Signed-off-by: Andrey Ryabinin a.ryabinin@samsung.com Tested-by: David Rientjes rientjes@google.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Dmitry Chernenkov drcheren@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 923936157b158f36bd6a3d86496dce82b1a957de) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/mempool.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/mempool.c b/mm/mempool.c index 5e18249..d4c4e4c 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -137,6 +137,7 @@ static void *remove_element(mempool_t *pool) BUG_ON(pool->curr_nr < 0); kasan_unpoison_element(pool, element); check_element(pool, element); + kasan_unpoison_element(pool, element); return element; }
From: Xishi Qiu qiuxishi@huawei.com
The shadow which correspond 16 bytes memory may span 2 or 3 bytes. If the memory is aligned on 8, then the shadow takes only 2 bytes. So we check "shadow_first_bytes" is enough, and need not to call "memory_is_poisoned_1(addr + 15);". But the code "if (likely(!last_byte))" is wrong judgement.
e.g. addr=0, so last_byte = 15 & KASAN_SHADOW_MASK = 7, then the code will continue to call "memory_is_poisoned_1(addr + 15);"
Signed-off-by: Xishi Qiu qiuxishi@huawei.com Acked-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Andrey Konovalov adech.fo@gmail.com Cc: Rusty Russell rusty@rustcorp.com.au Cc: Michal Marek mmarek@suse.cz Cc: zhongjiang@huawei.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 8d77a6d18ae9ccfd5eee1cc551ee4ac27fd41464) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 6c513a6..3d64922 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr)
if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK;
if (unlikely(shadow_first_bytes)) return true;
- if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false;
return memory_is_poisoned_1(addr + 15);
From: Andrey Ryabinin ryabinin.a.a@gmail.com
This patch adds arch specific code for kernel address sanitizer (see Documentation/kasan.txt).
1/8 of kernel addresses reserved for shadow memory. There was no big enough hole for this, so virtual addresses for shadow were stolen from vmalloc area.
At early boot stage the whole shadow region populated with just one physical page (kasan_zero_page). Later, this page reused as readonly zero shadow for some memory that KASan currently don't track (vmalloc). After mapping the physical memory, pages for shadow memory are allocated and mapped.
Functions like memset/memmove/memcpy do a lot of memory accesses. If bad pointer passed to one of these function it is important to catch this. Compiler's instrumentation cannot do this since these functions are written in assembly. KASan replaces memory functions with manually instrumented variants. Original functions declared as weak symbols so strong definitions in mm/kasan/kasan.c could replace them. Original functions have aliases with '__' prefix in name, so we could call non-instrumented variant if needed. Some files built without kasan instrumentation (e.g. mm/slub.c). Original mem* function replaced (via #define) with prefixed variants to disable memory access checks for such files.
Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com Tested-by: Linus Walleij linus.walleij@linaro.org Reviewed-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com (cherry picked from commit 39d114ddc68223022c12ae3a1573912bc4b585e5) Signed-off-by: Alex Shi alex.shi@linaro.org
BTW, commit 9f25e6ad5 arm64: expose number of page table levels on Kconfig level change CONFIG_ARM64_PGTABLE_LEVELS to CONFIG_PGTABLE_LEVELS, which would introduce much conflicts, since only arch/arm64/mm/kasan_init.c use two of instance of this config, just change them back. --- arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 7 ++ arch/arm64/include/asm/kasan.h | 36 +++++++++ arch/arm64/include/asm/pgtable.h | 7 ++ arch/arm64/include/asm/string.h | 16 ++++ arch/arm64/kernel/Makefile | 2 + arch/arm64/kernel/arm64ksyms.c | 3 + arch/arm64/kernel/head.S | 3 + arch/arm64/kernel/module.c | 16 +++- arch/arm64/kernel/setup.c | 4 + arch/arm64/lib/memcpy.S | 3 + arch/arm64/lib/memmove.S | 7 +- arch/arm64/lib/memset.S | 3 + arch/arm64/mm/Makefile | 3 + arch/arm64/mm/kasan_init.c | 165 +++++++++++++++++++++++++++++++++++++++ drivers/firmware/efi/Makefile | 8 ++ mm/kasan/kasan_init.c | 4 +- scripts/Makefile.kasan | 4 +- 18 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 arch/arm64/include/asm/kasan.h create mode 100644 arch/arm64/mm/kasan_init.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 00b9c48..8843060 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -36,6 +36,7 @@ config ARM64 select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2d54c55..7ad2e9ad 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -46,6 +46,13 @@ else TEXT_OFFSET := 0x00080000 endif
+# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) +# in 32-bit arithmetic +KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ + (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ + + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \ + - (1 << (64 - 32 - 3)) )) ) + export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/ diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h new file mode 100644 index 0000000..71dfe14 --- /dev/null +++ b/arch/arm64/include/asm/kasan.h @@ -0,0 +1,36 @@ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include <asm/memory.h> + +/* + * KASAN_SHADOW_START: beginning of the kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + */ +#define KASAN_SHADOW_START (VA_START) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) + +/* + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] + * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET + * should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + */ +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) + +void kasan_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif +#endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2c77d4a..f95a8b6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -41,7 +41,14 @@ * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) + +#ifndef CONFIG_KASAN #define VMALLOC_START (VA_START) +#else +#include <asm/kasan.h> +#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) +#endif + #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 64d2d48..2eb714c 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
#define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMMOVE extern void *memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMCHR extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); +extern void *__memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP extern int memcmp(const void *, const void *, size_t);
+ +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 33cba01..773ff56 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -6,6 +6,8 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+KASAN_SANITIZE_efi-stub.o := n + CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index a85843d..3b6d8cc 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memcmp);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b9eb040..77bfa34 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -455,6 +455,9 @@ __mmap_switched: str x21, [x5] // Save FDT pointer str x24, [x6] // Save PHYS_OFFSET mov x29, #0 +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif b start_kernel ENDPROC(__mmap_switched)
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 468c12a..5112801 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -21,6 +21,7 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/gfp.h> +#include <linux/kasan.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> @@ -32,9 +33,18 @@
void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + void *p; + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; }
enum aarch64_reloc_op { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8669e4e..9dc321e 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -52,6 +52,7 @@ #include <asm/cputable.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> +#include <asm/kasan.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -454,6 +455,9 @@ void __init setup_arch(char **cmdline_p)
paging_init(); relocate_initrd(); + + kasan_init(); + request_standard_resources();
efi_idmap_init(); diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 36a6a62..6761393 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -68,7 +68,10 @@ stp \ptr, \regB, [\regC], \val .endm
+ .weak memcpy +ENTRY(__memcpy) ENTRY(memcpy) #include "copy_template.S" ret ENDPIPROC(memcpy) +ENDPROC(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 68e2f20..a5a4459 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -57,12 +57,14 @@ C_h .req x12 D_l .req x13 D_h .req x14
+ .weak memmove +ENTRY(__memmove) ENTRY(memmove) cmp dstin, src - b.lo memcpy + b.lo __memcpy add tmp1, src, count cmp dstin, tmp1 - b.hs memcpy /* No overlap. */ + b.hs __memcpy /* No overlap. */
add dst, dstin, count add src, src, count @@ -195,3 +197,4 @@ ENTRY(memmove) b.ne .Ltail63 ret ENDPIPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 29f405f..f2670a9 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -54,6 +54,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9
+ .weak memset +ENTRY(__memset) ENTRY(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 @@ -214,3 +216,4 @@ ENTRY(memset) b.ne .Ltail_maybe_long ret ENDPIPROC(memset) +ENDPROC(__memset) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index c56179e..00bc265 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -3,3 +3,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o + +obj-$(CONFIG_KASAN) += kasan_init.o +KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c new file mode 100644 index 0000000..b6a92f5 --- /dev/null +++ b/arch/arm64/mm/kasan_init.c @@ -0,0 +1,165 @@ +/* + * This file contains kasan initialization code for ARM64. + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin ryabinin.a.a@gmail.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/start_kernel.h> + +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte; + unsigned long next; + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + + pte = pte_offset_kernel(pmd, addr); + do { + next = addr + PAGE_SIZE; + set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), + PAGE_KERNEL)); + } while (pte++, addr = next, addr != end && pte_none(*pte)); +} + +static void __init kasan_early_pmd_populate(pud_t *pud, + unsigned long addr, + unsigned long end) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, kasan_zero_pmd); + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + kasan_early_pte_populate(pmd, addr, next); + } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); +} + +static void __init kasan_early_pud_populate(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + pud_t *pud; + unsigned long next; + + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, kasan_zero_pud); + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + kasan_early_pmd_populate(pud, addr, next); + } while (pud++, addr = next, addr != end && pud_none(*pud)); +} + +static void __init kasan_map_early_shadow(void) +{ + unsigned long addr = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + kasan_early_pud_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +void __init kasan_early_init(void) +{ + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + kasan_map_early_shadow(); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + /* + * Remove references to kasan page tables from + * swapper_pg_dir. pgd_clear() can't be used + * here because it's nop on 2,3-level pagetable setups + */ + for (; start < end; start += PGDIR_SIZE) + set_pgd(pgd_offset_k(start), __pgd(0)); +} + +static void __init cpu_set_ttbr1(unsigned long ttbr1) +{ + asm( + " msr ttbr1_el1, %0\n" + " isb" + : + : "r" (ttbr1)); +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; + + /* + * We are going to perform proper setup of shadow memory. + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code couldn't execute without shadow memory. + * tmp_pg_dir used to keep early shadow mapped until full shadow + * setup will be finished. + */ + memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); + cpu_set_ttbr1(__pa(tmp_pg_dir)); + flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)MODULES_VADDR)); + + for_each_memblock(memory, reg) { + void *start = (void *)__phys_to_virt(reg->base); + void *end = (void *)__phys_to_virt(reg->base + reg->size); + + if (start >= end) + break; + + /* + * end + 1 here is intentional. We check several shadow bytes in + * advance to slightly speed up fastpath. In some rare cases + * we could cross boundary of mapped shadow, so we just map + * some more here. + */ + vmemmap_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end) + 1, + pfn_to_nid(virt_to_pfn(start))); + } + + memset(kasan_zero_page, 0, PAGE_SIZE); + cpu_set_ttbr1(__pa(swapper_pg_dir)); + flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized\n"); +} diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index aef6a95..bef2579 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -1,6 +1,14 @@ # # Makefile for linux kernel # + +# +# ARM64 maps efi runtime services in userspace addresses +# which don't have KASAN shadow. So dereference of these addresses +# in efi_call_virt() will cause crash if this code instrumented. +# +KASAN_SANITIZE_runtime-wrappers.o := n + obj-$(CONFIG_EFI) += efi.o vars.o reboot.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 3f9a41c..f41ac9a 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -29,10 +29,10 @@ */ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 3 +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; #endif -#if CONFIG_PGTABLE_LEVELS > 2 +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 3f874d2..37323b0 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -5,10 +5,12 @@ else call_threshold := 0 endif
+KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) + CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address
CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ - -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET) \ --param asan-stack=1 --param asan-globals=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold))
From: Will Deacon will.deacon@arm.com
Sparse reports some new issues introduced by the kasan patches:
arch/arm64/mm/kasan_init.c:91:13: warning: no previous prototype for 'kasan_early_init' [-Wmissing-prototypes] void __init kasan_early_init(void) ^ arch/arm64/mm/kasan_init.c:91:13: warning: symbol 'kasan_early_init' was not declared. Should it be static? [sparse]
This patch resolves the problem by adding a prototype for kasan_early_init and marking the function as asmlinkage, since it's only called from head.S.
Signed-off-by: Will Deacon will.deacon@arm.com Acked-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com (cherry picked from commit 83040123fde42ec532d3b632efb5f7f84024e61d) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/include/asm/kasan.h | 2 ++ arch/arm64/mm/kasan_init.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 71dfe14..2774fa3 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -5,6 +5,7 @@
#ifdef CONFIG_KASAN
+#include <linux/linkage.h> #include <asm/memory.h>
/* @@ -27,6 +28,7 @@ #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
void kasan_init(void); +asmlinkage void kasan_early_init(void);
#else static inline void kasan_init(void) { } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index b6a92f5..cf038c7 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -88,7 +88,7 @@ static void __init kasan_map_early_shadow(void) } while (pgd++, addr = next, addr != end); }
-void __init kasan_early_init(void) +asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
From: Andrey Ryabinin aryabinin@virtuozzo.com
On KASAN + 16K_PAGES + 48BIT_VA arch/arm64/mm/kasan_init.c: In function ‘kasan_early_init’: include/linux/compiler.h:484:38: error: call to ‘__compiletime_assert_95’ declared with attribute error: BUILD_BUG_ON failed: !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE) _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
Currently KASAN will not work on 16K_PAGES and 48BIT_VA, so forbid such configuration to avoid above build failure.
Signed-off-by: Andrey Ryabinin aryabinin@virtuozzo.com Reported-by: Suzuki K. Poulose Suzuki.Poulose@arm.com Acked-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com (cherry picked from commit f1b9032f61c0412082a240cb7245f8b79e09ae8d) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8843060..a4a6224 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -36,7 +36,7 @@ config ARM64 select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP + select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT
From: "Aneesh Kumar K.V" aneesh.kumar@linux.vnet.ibm.com
The function only disable/enable reporting. In the later patch we will be adding a kasan early enable/disable. Rename kasan_enabled to properly reflect its function.
Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 0ba8663cbfae066fc504b858db7cbb7d03c2b872) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.h | 2 +- mm/kasan/report.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index c242adf..a6b46cc 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -63,7 +63,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) << KASAN_SHADOW_SCALE_SHIFT); }
-static inline bool kasan_enabled(void) +static inline bool kasan_report_enabled(void) { return !current->kasan_depth; } diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 680ceed..ba916b1b 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -220,7 +220,7 @@ void kasan_report(unsigned long addr, size_t size, { struct kasan_access_info info;
- if (likely(!kasan_enabled())) + if (likely(!kasan_report_enabled())) return;
info.access_addr = (void *)addr;
From: "Aneesh Kumar K.V" aneesh.kumar@linux.vnet.ibm.com
Use is_module_address instead
Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 527f215b78976e94995dce7163b07539b576d519) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/report.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index ba916b1b..9474364 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/kasan.h> +#include <linux/module.h>
#include <asm/sections.h>
@@ -85,9 +86,11 @@ static void print_error_description(struct kasan_access_info *info)
static inline bool kernel_or_module_addr(const void *addr) { - return (addr >= (void *)_stext && addr < (void *)_end) - || (addr >= (void *)MODULES_VADDR - && addr < (void *)MODULES_END); + if (addr >= (void *)_stext && addr < (void *)_end) + return true; + if (is_module_address((unsigned long)addr)) + return true; + return false; }
static inline bool init_task_stack_addr(const void *addr)
From: "Aneesh Kumar K.V" aneesh.kumar@linux.vnet.ibm.com
We can't use generic functions like print_hex_dump to access kasan shadow region. This require us to setup another kasan shadow region for the address passed (kasan shadow address). Some architectures won't be able to do that. Hence make a copy of the shadow region row and pass that to generic functions.
Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit f2377d4eaab2aabe1938b3974b5b94f5ba4c7ead) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/report.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 9474364..2b30762 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -164,14 +164,20 @@ static void print_shadow_for_address(const void *addr) for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) { const void *kaddr = kasan_shadow_to_mem(shadow_row); char buffer[4 + (BITS_PER_LONG/8)*2]; + char shadow_buf[SHADOW_BYTES_PER_ROW];
snprintf(buffer, sizeof(buffer), (i == 0) ? ">%p: " : " %p: ", kaddr); - + /* + * We should not pass a shadow pointer to generic + * function, because generic functions may try to + * access kasan mapping for the passed address. + */ kasan_disable_current(); + memcpy(shadow_buf, shadow_row, SHADOW_BYTES_PER_ROW); print_hex_dump(KERN_ERR, buffer, DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1, - shadow_row, SHADOW_BYTES_PER_ROW, 0); + shadow_buf, SHADOW_BYTES_PER_ROW, 0); kasan_enable_current();
if (row_is_guilty(shadow_row, shadow))
From: "Aneesh Kumar K.V" aneesh.kumar@linux.vnet.ibm.com
When we end up calling kasan_report in real mode, our shadow mapping for the spinlock variable will show poisoned. This will result in us calling kasan_report_error with lock_report spin lock held. To prevent this disable kasan reporting when we are priting error w.r.t kasan.
Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.vnet.ibm.com Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit fc5aeeaf593278f07ffa4d97296e27423ecae867) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/report.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 2b30762..a42dade 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -173,12 +173,10 @@ static void print_shadow_for_address(const void *addr) * function, because generic functions may try to * access kasan mapping for the passed address. */ - kasan_disable_current(); memcpy(shadow_buf, shadow_row, SHADOW_BYTES_PER_ROW); print_hex_dump(KERN_ERR, buffer, DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1, shadow_buf, SHADOW_BYTES_PER_ROW, 0); - kasan_enable_current();
if (row_is_guilty(shadow_row, shadow)) pr_err("%*c\n", @@ -195,6 +193,10 @@ void kasan_report_error(struct kasan_access_info *info) { unsigned long flags;
+ /* + * Make sure we don't end up in loop. + */ + kasan_disable_current(); spin_lock_irqsave(&report_lock, flags); pr_err("=================================" "=================================\n"); @@ -204,12 +206,17 @@ void kasan_report_error(struct kasan_access_info *info) pr_err("=================================" "=================================\n"); spin_unlock_irqrestore(&report_lock, flags); + kasan_enable_current(); }
void kasan_report_user_access(struct kasan_access_info *info) { unsigned long flags;
+ /* + * Make sure we don't end up in loop. + */ + kasan_disable_current(); spin_lock_irqsave(&report_lock, flags); pr_err("=================================" "=================================\n"); @@ -222,6 +229,7 @@ void kasan_report_user_access(struct kasan_access_info *info) pr_err("=================================" "=================================\n"); spin_unlock_irqrestore(&report_lock, flags); + kasan_enable_current(); }
void kasan_report(unsigned long addr, size_t size,
From: Andrey Konovalov andreyknvl@google.com
Each access with address lower than kasan_shadow_to_mem(KASAN_SHADOW_START) is reported as user-memory-access. This is not always true, the accessed address might not be in user space. Fix this by reporting such accesses as null-ptr-derefs or wild-memory-accesses.
There's another reason for this change. For userspace ASan we have a bunch of systems that analyze error types for the purpose of classification and deduplication. Sooner of later we will write them to KASAN as well. Then clearly and explicitly stated error types will bring value.
Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit e91210766341cb356ead7fd39f07493a3d00b80f) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 8 +------- mm/kasan/kasan.h | 3 --- mm/kasan/report.c | 50 +++++++++++++++++++++++--------------------------- 3 files changed, 24 insertions(+), 37 deletions(-)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 3d64922..c6dc627 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -235,18 +235,12 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) static __always_inline void check_memory_region(unsigned long addr, size_t size, bool write) { - struct kasan_access_info info; - if (unlikely(size == 0)) return;
if (unlikely((void *)addr < kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { - info.access_addr = (void *)addr; - info.access_size = size; - info.is_write = write; - info.ip = _RET_IP_; - kasan_report_user_access(&info); + kasan_report(addr, size, write, _RET_IP_); return; }
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index a6b46cc..4f6c62e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -54,9 +54,6 @@ struct kasan_global { #endif };
-void kasan_report_error(struct kasan_access_info *info); -void kasan_report_user_access(struct kasan_access_info *info); - static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index a42dade..8a695bb 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -189,9 +189,10 @@ static void print_shadow_for_address(const void *addr)
static DEFINE_SPINLOCK(report_lock);
-void kasan_report_error(struct kasan_access_info *info) +static void kasan_report_error(struct kasan_access_info *info) { unsigned long flags; + const char *bug_type;
/* * Make sure we don't end up in loop. @@ -200,32 +201,26 @@ void kasan_report_error(struct kasan_access_info *info) spin_lock_irqsave(&report_lock, flags); pr_err("=================================" "=================================\n"); - print_error_description(info); - print_address_description(info); - print_shadow_for_address(info->first_bad_addr); - pr_err("=================================" - "=================================\n"); - spin_unlock_irqrestore(&report_lock, flags); - kasan_enable_current(); -} - -void kasan_report_user_access(struct kasan_access_info *info) -{ - unsigned long flags; - - /* - * Make sure we don't end up in loop. - */ - kasan_disable_current(); - spin_lock_irqsave(&report_lock, flags); - pr_err("=================================" - "=================================\n"); - pr_err("BUG: KASan: user-memory-access on address %p\n", - info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, task_pid_nr(current)); - dump_stack(); + if (info->access_addr < + kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) { + if ((unsigned long)info->access_addr < PAGE_SIZE) + bug_type = "null-ptr-deref"; + else if ((unsigned long)info->access_addr < TASK_SIZE) + bug_type = "user-memory-access"; + else + bug_type = "wild-memory-access"; + pr_err("BUG: KASan: %s on address %p\n", + bug_type, info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, + task_pid_nr(current)); + dump_stack(); + } else { + print_error_description(info); + print_address_description(info); + print_shadow_for_address(info->first_bad_addr); + } pr_err("=================================" "=================================\n"); spin_unlock_irqrestore(&report_lock, flags); @@ -244,6 +239,7 @@ void kasan_report(unsigned long addr, size_t size, info.access_size = size; info.is_write = is_write; info.ip = ip; + kasan_report_error(&info); }
From: Andrey Konovalov andreyknvl@google.com
Update the names of the bad access types to better reflect the type of the access that happended and make these error types "literals" that can be used for classification and deduplication in scripts.
Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 0952d87fd6a6211ac51b2abdc5c066b49c651fd8) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/report.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8a695bb..69d9315c 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -49,7 +49,7 @@ static const void *find_first_bad_addr(const void *addr, size_t size)
static void print_error_description(struct kasan_access_info *info) { - const char *bug_type = "unknown crash"; + const char *bug_type = "unknown-crash"; u8 shadow_val;
info->first_bad_addr = find_first_bad_addr(info->access_addr, @@ -58,21 +58,25 @@ static void print_error_description(struct kasan_access_info *info) shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr);
switch (shadow_val) { - case KASAN_FREE_PAGE: - case KASAN_KMALLOC_FREE: - bug_type = "use after free"; + case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: + bug_type = "out-of-bounds"; break; case KASAN_PAGE_REDZONE: case KASAN_KMALLOC_REDZONE: + bug_type = "slab-out-of-bounds"; + break; case KASAN_GLOBAL_REDZONE: - case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: - bug_type = "out of bounds access"; + bug_type = "global-out-of-bounds"; break; case KASAN_STACK_LEFT: case KASAN_STACK_MID: case KASAN_STACK_RIGHT: case KASAN_STACK_PARTIAL: - bug_type = "out of bounds on stack"; + bug_type = "stack-out-of-bounds"; + break; + case KASAN_FREE_PAGE: + case KASAN_KMALLOC_FREE: + bug_type = "use-after-free"; break; }
From: Andrey Konovalov andreyknvl@google.com
Makes KASAN accurately determine the type of the bad access. If the shadow byte value is in the [0, KASAN_SHADOW_SCALE_SIZE) range we can look at the next shadow byte to determine the type of the access.
Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit cdf6a273dc4346277ab9d148ef29f6e058624a8c) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/report.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 69d9315c..63e039f 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -50,15 +50,26 @@ static const void *find_first_bad_addr(const void *addr, size_t size) static void print_error_description(struct kasan_access_info *info) { const char *bug_type = "unknown-crash"; - u8 shadow_val; + u8 *shadow_addr;
info->first_bad_addr = find_first_bad_addr(info->access_addr, info->access_size);
- shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr); + shadow_addr = (u8 *)kasan_mem_to_shadow(info->first_bad_addr);
- switch (shadow_val) { + /* + * If shadow byte value is in [0, KASAN_SHADOW_SCALE_SIZE) we can look + * at the next shadow byte to determine the type of the bad access. + */ + if (*shadow_addr > 0 && *shadow_addr <= KASAN_SHADOW_SCALE_SIZE - 1) + shadow_addr++; + + switch (*shadow_addr) { case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: + /* + * In theory it's still possible to see these shadow values + * due to a data race in the kernel code. + */ bug_type = "out-of-bounds"; break; case KASAN_PAGE_REDZONE:
From: Andrey Konovalov andreyknvl@google.com
We decided to use KASAN as the short name of the tool and KernelAddressSanitizer as the full one. Update log messages according to that.
Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 25add7ec708170e4eaef1f9793a07803b2fb5c71) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/x86/mm/kasan_init_64.c | 2 ++ mm/kasan/kasan.c | 2 +- mm/kasan/report.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d858afe..23dc767 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -204,4 +204,6 @@ void __init kasan_init(void)
load_cr3(init_level4_pgt); init_task.kasan_depth = 0; + + pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index c6dc627..d207424 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -518,7 +518,7 @@ static int kasan_mem_notifier(struct notifier_block *nb,
static int __init kasan_memhotplug_init(void) { - pr_err("WARNING: KASan doesn't support memory hot-add\n"); + pr_err("WARNING: KASAN doesn't support memory hot-add\n"); pr_err("Memory hot-add will be disabled\n");
hotplug_memory_notifier(kasan_mem_notifier, 0); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 63e039f..c8cc63f 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -91,7 +91,7 @@ static void print_error_description(struct kasan_access_info *info) break; }
- pr_err("BUG: KASan: %s in %pS at addr %p\n", + pr_err("BUG: KASAN: %s in %pS at addr %p\n", bug_type, (void *)info->ip, info->access_addr); pr_err("%s of size %zu by task %s/%d\n", @@ -224,7 +224,7 @@ static void kasan_report_error(struct kasan_access_info *info) bug_type = "user-memory-access"; else bug_type = "wild-memory-access"; - pr_err("BUG: KASan: %s on address %p\n", + pr_err("BUG: KASAN: %s on address %p\n", bug_type, info->access_addr); pr_err("%s of size %zu by task %s/%d\n", info->is_write ? "Write" : "Read",
From: Andrey Konovalov andreyknvl@google.com
[akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
(cherry picked from commit 0295fd5d570626817d10deadf5a2ad5e49c36a1d) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/kasan.txt | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-)
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index 4692241..4e906a0 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -1,32 +1,31 @@ -Kernel address sanitizer -================ +KernelAddressSanitizer (KASAN) +==============================
0. Overview ===========
-Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides +KernelAddressSANitizer (KASAN) is a dynamic memory error detector. It provides a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs.
-KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a gcc version of 4.9.2 or later. KASan could detect out -of bounds accesses to stack or global variables, but only if gcc 5.0 or later was -used to built the kernel. +KASAN uses compile-time instrumentation for checking every memory access, +therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is +required for detection of out-of-bounds accesses to stack or global variables.
-Currently KASan is supported only for x86_64 architecture and requires that the -kernel be built with the SLUB allocator. +Currently KASAN is supported only for x86_64 architecture and requires the +kernel to be built with the SLUB allocator.
1. Usage -========= +========
To enable KASAN configure kernel with:
CONFIG_KASAN = y
-and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline -is compiler instrumentation types. The former produces smaller binary the -latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version -of 5.0 or later. +and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline and +inline are compiler instrumentation types. The former produces smaller binary +the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC +version 5.0 or later.
Currently KASAN works only with the SLUB memory allocator. For better bug detection and nicer report, enable CONFIG_STACKTRACE and put @@ -42,7 +41,7 @@ similar to the following to the respective kernel Makefile: KASAN_SANITIZE := n
1.1 Error reports -========== +=================
A typical out of bounds access report looks like this:
@@ -119,14 +118,16 @@ Memory state around the buggy address: ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ==================================================================
-First sections describe slub object where bad access happened. -See 'SLUB Debug output' section in Documentation/vm/slub.txt for details. +The header of the report discribe what kind of bug happened and what kind of +access caused it. It's followed by the description of the accessed slub object +(see 'SLUB Debug output' section in Documentation/vm/slub.txt for details) and +the description of the accessed memory page.
In the last section the report shows memory state around the accessed address. -Reading this part requires some more understanding of how KASAN works. +Reading this part requires some understanding of how KASAN works.
-Each 8 bytes of memory are encoded in one shadow byte as accessible, -partially accessible, freed or they can be part of a redzone. +The state of each 8 aligned bytes of memory is encoded in one shadow byte. +Those 8 bytes can be accessible, partially accessible, freed or be a redzone. We use the following encoding for each shadow byte: 0 means that all 8 bytes of the corresponding memory region are accessible; number N (1 <= N <= 7) means that the first N bytes are accessible, and other (8 - N) bytes are not; @@ -139,7 +140,7 @@ the accessed address is partially accessible.
2. Implementation details -======================== +=========================
From a high level, our approach to memory error detection is similar to that of kmemcheck: use shadow memory to record whether each byte of memory is safe
From: Andrey Konovalov andreyknvl@google.com
Move KASAN_SANITIZE in arch/x86/boot/Makefile above the comment related to SVGA_MODE, since the comment refers to 'the next line'.
Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit c63f06dd15797736c31dc86e6392811d0bac34a1) Signed-off-by: Alex Shi alex.shi@linaro.org --- arch/x86/boot/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 57bbf2f..ff6dc20 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -9,13 +9,13 @@ # Changed by many, many contributors over the years. #
+KASAN_SANITIZE := n + # If you want to preset the SVGA mode, uncomment the next line and # set SVGA_MODE to whatever number you want. # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup.
-KASAN_SANITIZE := n - SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
targets := vmlinux.bin setup.bin setup.elf bzImage
From: Andrey Konovalov andreyknvl@google.com
Update the reference to the kasan prototype repository on github, since it was renamed.
Signed-off-by: Andrey Konovalov andreyknvl@google.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Dmitry Vyukov dvyukov@google.com Cc: Alexander Potapenko glider@google.com Cc: Konstantin Serebryany kcc@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 5d0926efe728e00afbd81a1e3c498222cf908d23) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 2 +- mm/kasan/report.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d207424..c5a1e26 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -4,7 +4,7 @@ * Copyright (c) 2014 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin a.ryabinin@samsung.com * - * Some of code borrowed from https://github.com/xairy/linux by + * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov adech.fo@gmail.com * * This program is free software; you can redistribute it and/or modify diff --git a/mm/kasan/report.c b/mm/kasan/report.c index c8cc63f..c59fc0e 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -4,7 +4,7 @@ * Copyright (c) 2014 Samsung Electronics Co., Ltd. * Author: Andrey Ryabinin a.ryabinin@samsung.com * - * Some of code borrowed from https://github.com/xairy/linux by + * Some code borrowed from https://github.com/xairy/kasan-prototype by * Andrey Konovalov adech.fo@gmail.com * * This program is free software; you can redistribute it and/or modify
From: Wang Long long.wanglong@huawei.com
Add some out of bounds testcases to test_kasan module.
Signed-off-by: Wang Long long.wanglong@huawei.com Acked-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Vladimir Murzin vladimir.murzin@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit f523e737c08f5daaec9fac017e1bc5695e6f2760) Signed-off-by: Alex Shi alex.shi@linaro.org --- lib/test_kasan.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+)
diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 098c08e..dc340b3 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -138,6 +138,71 @@ static noinline void __init kmalloc_oob_16(void) kfree(ptr2); }
+static noinline void __init kmalloc_oob_memset_2(void) +{ + char *ptr; + size_t size = 8; + + pr_info("out-of-bounds in memset2\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+7, 0, 2); + kfree(ptr); +} + +static noinline void __init kmalloc_oob_memset_4(void) +{ + char *ptr; + size_t size = 8; + + pr_info("out-of-bounds in memset4\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+5, 0, 4); + kfree(ptr); +} + + +static noinline void __init kmalloc_oob_memset_8(void) +{ + char *ptr; + size_t size = 8; + + pr_info("out-of-bounds in memset8\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+1, 0, 8); + kfree(ptr); +} + +static noinline void __init kmalloc_oob_memset_16(void) +{ + char *ptr; + size_t size = 16; + + pr_info("out-of-bounds in memset16\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+1, 0, 16); + kfree(ptr); +} + static noinline void __init kmalloc_oob_in_memset(void) { char *ptr; @@ -264,6 +329,10 @@ static int __init kmalloc_tests_init(void) kmalloc_oob_krealloc_less(); kmalloc_oob_16(); kmalloc_oob_in_memset(); + kmalloc_oob_memset_2(); + kmalloc_oob_memset_4(); + kmalloc_oob_memset_8(); + kmalloc_oob_memset_16(); kmalloc_uaf(); kmalloc_uaf_memset(); kmalloc_uaf2();
From: Wang Long long.wanglong@huawei.com
The current KASAN code can not find the following out-of-bounds bugs:
char *ptr; ptr = kmalloc(8, GFP_KERNEL); memset(ptr+7, 0, 2);
the cause of the problem is the type conversion error in *memory_is_poisoned_n* function. So this patch fix that.
Signed-off-by: Wang Long long.wanglong@huawei.com Acked-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Vladimir Murzin vladimir.murzin@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit e0d57714394f5e2ce4e2f9bbebf48e3c7a7fd3be) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index c5a1e26..1a66507 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -203,7 +203,7 @@ static __always_inline bool memory_is_poisoned_n(unsigned long addr, s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte);
if (unlikely(ret != (unsigned long)last_shadow || - ((last_byte & KASAN_SHADOW_MASK) >= *last_shadow))) + ((long)(last_byte & KASAN_SHADOW_MASK) >= *last_shadow))) return true; } return false;
From: Xishi Qiu qiuxishi@huawei.com
Use IS_ALIGNED() to determine whether the shadow span two bytes. It generates less code and more readable. Also add some comments in shadow check functions.
Signed-off-by: Xishi Qiu qiuxishi@huawei.com Acked-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Andrey Konovalov adech.fo@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 10f702627e139e21465f4c9d44f63527bbca163c) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 1a66507..7f97c85 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -86,6 +86,11 @@ static __always_inline bool memory_is_poisoned_2(unsigned long addr) if (memory_is_poisoned_1(addr + 1)) return true;
+ /* + * If single shadow byte covers 2-byte access, we don't + * need to do anything more. Otherwise, test the first + * shadow byte. + */ if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0)) return false;
@@ -103,6 +108,11 @@ static __always_inline bool memory_is_poisoned_4(unsigned long addr) if (memory_is_poisoned_1(addr + 3)) return true;
+ /* + * If single shadow byte covers 4-byte access, we don't + * need to do anything more. Otherwise, test the first + * shadow byte. + */ if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3)) return false;
@@ -120,7 +130,12 @@ static __always_inline bool memory_is_poisoned_8(unsigned long addr) if (memory_is_poisoned_1(addr + 7)) return true;
- if (likely(((addr + 7) & KASAN_SHADOW_MASK) >= 7)) + /* + * If single shadow byte covers 8-byte access, we don't + * need to do anything more. Otherwise, test the first + * shadow byte. + */ + if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE))) return false;
return unlikely(*(u8 *)shadow_addr); @@ -139,7 +154,12 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(shadow_first_bytes)) return true;
- if (likely(IS_ALIGNED(addr, 8))) + /* + * If two shadow bytes covers 16-byte access, we don't + * need to do anything more. Otherwise, test the last + * shadow byte. + */ + if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE))) return false;
return memory_is_poisoned_1(addr + 15);
From: Andrey Ryabinin aryabinin@virtuozzo.com
It's recommended to have slub's user tracking enabled with CONFIG_KASAN, because:
a) User tracking disables slab merging which improves detecting out-of-bounds accesses. b) User tracking metadata acts as redzone which also improves detecting out-of-bounds accesses. c) User tracking provides additional information about object. This information helps to understand bugs.
Currently it is not enabled by default. Besides recompiling the kernel with KASAN and reinstalling it, user also have to change the boot cmdline, which is not very handy.
Enable slub user tracking by default with KASAN=y, since there is no good reason to not do this.
[akpm@linux-foundation.org: little fixes, per David] Signed-off-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
(cherry picked from commit 89d3c87e20d95e3238eac85e43de7b3cb1f39d8b) Signed-off-by: Alex Shi alex.shi@linaro.org --- Documentation/kasan.txt | 3 +-- lib/Kconfig.kasan | 3 +-- mm/slub.c | 4 +++- 3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index 4e906a0..58b72fb 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -28,8 +28,7 @@ the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC version 5.0 or later.
Currently KASAN works only with the SLUB memory allocator. -For better bug detection and nicer report, enable CONFIG_STACKTRACE and put -at least 'slub_debug=U' in the boot cmdline. +For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
To disable instrumentation for specific files or directories, add a line similar to the following to the respective kernel Makefile: diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 777eda7..53362c1 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -15,8 +15,7 @@ config KASAN global variables requires gcc 5.0 or later. This feature consumes about 1/8 of available memory and brings about ~x3 performance slowdown. - For better error detection enable CONFIG_STACKTRACE, - and add slub_debug=U to boot cmdline. + For better error detection enable CONFIG_STACKTRACE.
config KASAN_SHADOW_OFFSET hex diff --git a/mm/slub.c b/mm/slub.c index b8b76d5..f46deaf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -459,8 +459,10 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) /* * Debug settings: */ -#ifdef CONFIG_SLUB_DEBUG_ON +#if defined(CONFIG_SLUB_DEBUG_ON) static int slub_debug = DEBUG_DEFAULT_FLAGS; +#elif defined(CONFIG_KASAN) +static int slub_debug = SLAB_STORE_USER; #else static int slub_debug; #endif
From: Andrey Ryabinin aryabinin@virtuozzo.com
Currently we already taint the kernel in some cases. E.g. if we hit some bug in slub memory we call object_err() which will taint the kernel with TAINT_BAD_PAGE flag. But for other kind of bugs kernel left untainted.
Always taint with TAINT_BAD_PAGE if kasan found some bug. This is useful for automated testing.
Signed-off-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Alexander Potapenko glider@google.com Reviewed-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit eb06f43f1c94d502b7867b0998e92cdabbc060bc) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/report.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index c59fc0e..e0b3e94 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -238,6 +238,7 @@ static void kasan_report_error(struct kasan_access_info *info) } pr_err("=================================" "=================================\n"); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, flags); kasan_enable_current(); }
From: Andrey Ryabinin aryabinin@virtuozzo.com
Kmemleak reports the following leak:
unreferenced object 0xfffffbfff41ea000 (size 20480): comm "modprobe", pid 65199, jiffies 4298875551 (age 542.568s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<ffffffff82354f5e>] kmemleak_alloc+0x4e/0xc0 [<ffffffff8152e718>] __vmalloc_node_range+0x4b8/0x740 [<ffffffff81574072>] kasan_module_alloc+0x72/0xc0 [<ffffffff810efe68>] module_alloc+0x78/0xb0 [<ffffffff812f6a24>] module_alloc_update_bounds+0x14/0x70 [<ffffffff812f8184>] layout_and_allocate+0x16f4/0x3c90 [<ffffffff812faa1f>] load_module+0x2ff/0x6690 [<ffffffff813010b6>] SyS_finit_module+0x136/0x170 [<ffffffff8239bbc9>] system_call_fastpath+0x16/0x1b [<ffffffffffffffff>] 0xffffffffffffffff
kasan_module_alloc() allocates shadow memory for module and frees it on module unloading. It doesn't store the pointer to allocated shadow memory because it could be calculated from the shadowed address, i.e. kasan_mem_to_shadow(addr).
Since kmemleak cannot find pointer to allocated shadow, it thinks that memory leaked.
Use kmemleak_ignore() to tell kmemleak that this is not a leak and shadow memory doesn't contain any pointers.
Signed-off-by: Andrey Ryabinin aryabinin@virtuozzo.com Acked-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 459372545c9c0d6f491e280dccc8a54a61b60e56) Signed-off-by: Alex Shi alex.shi@linaro.org --- mm/kasan/kasan.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7f97c85..81a2f45 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -19,6 +19,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/kmemleak.h> #include <linux/memblock.h> #include <linux/memory.h> #include <linux/mm.h> @@ -444,6 +445,7 @@ int kasan_module_alloc(void *addr, size_t size)
if (ret) { find_vm_area(addr)->flags |= VM_KASAN; + kmemleak_ignore(ret); return 0; }
From: Mark Rutland mark.rutland@arm.com
Functions which the compiler has instrumented for ASAN place poison on the stack shadow upon entry and remove this poison prior to returning.
In some cases (e.g. hotplug and idle), CPUs may exit the kernel a number of levels deep in C code. If there are any instrumented functions on this critical path, these will leave portions of the idle thread stack shadow poisoned.
If a CPU returns to the kernel via a different path (e.g. a cold entry), then depending on stack frame layout subsequent calls to instrumented functions may use regions of the stack with stale poison, resulting in (spurious) KASAN splats to the console.
Contemporary GCCs always add stack shadow poisoning when ASAN is enabled, even when asked to not instrument a function [1], so we can't simply annotate functions on the critical path to avoid poisoning.
Instead, this series explicitly removes any stale poison before it can be hit. In the common hotplug case we clear the entire stack shadow in common code, before a CPU is brought online.
On architectures which perform a cold return as part of cpu idle may retain an architecture-specific amount of stack contents. To retain the poison for this retained context, the arch code must call the core KASAN code, passing a "watermark" stack pointer value beyond which shadow will be cleared. Architectures which don't perform a cold return as part of idle do not need any additional code.
This patch (of 3):
Functions which the compiler has instrumented for KASAN place poison on the stack shadow upon entry and remove this poision prior to returning.
In some cases (e.g. hotplug and idle), CPUs may exit the kernel a number of levels deep in C code. If there are any instrumented functions on this critical path, these will leave portions of the stack shadow poisoned.
If a CPU returns to the kernel via a different path (e.g. a cold entry), then depending on stack frame layout subsequent calls to instrumented functions may use regions of the stack with stale poison, resulting in (spurious) KASAN splats to the console.
To avoid this, we must clear stale poison from the stack prior to instrumented functions being called. This patch adds functions to the KASAN core for removing poison from (portions of) a task's stack. These will be used by subsequent patches to avoid problems with hotplug and idle.
Signed-off-by: Mark Rutland mark.rutland@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Reviewed-by: Andrey Ryabinin aryabinin@virtuozzo.com Cc: Alexander Potapenko glider@google.com Cc: Lorenzo Pieralisi lorenzo.pieralisi@arm.com Cc: Will Deacon will.deacon@arm.com Cc: Ingo Molnar mingo@kernel.org Cc: Peter Zijlstra peterz@infradead.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit e3ae116339f9a0c77523abc95e338fa405946e07) Signed-off-by: Alex Shi alex.shi@linaro.org --- include/linux/kasan.h | 6 +++++- mm/kasan/kasan.c | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4b9f85c..0fdc798 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,6 +1,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H
+#include <linux/sched.h> #include <linux/types.h>
struct kmem_cache; @@ -13,7 +14,6 @@ struct vm_struct;
#include <asm/kasan.h> #include <asm/pgtable.h> -#include <linux/sched.h>
extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; @@ -43,6 +43,8 @@ static inline void kasan_disable_current(void)
void kasan_unpoison_shadow(const void *address, size_t size);
+void kasan_unpoison_task_stack(struct task_struct *task); + void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order);
@@ -66,6 +68,8 @@ void kasan_free_shadow(const struct vm_struct *vm);
static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
+static inline void kasan_unpoison_task_stack(struct task_struct *task) {} + static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {}
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 81a2f45..8f6ff54 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/kmemleak.h> +#include <linux/linkage.h> #include <linux/memblock.h> #include <linux/memory.h> #include <linux/mm.h> @@ -60,6 +61,25 @@ void kasan_unpoison_shadow(const void *address, size_t size) } }
+static void __kasan_unpoison_stack(struct task_struct *task, void *sp) +{ + void *base = task_stack_page(task); + size_t size = sp - base; + + kasan_unpoison_shadow(base, size); +} + +/* Unpoison the entire stack for a task. */ +void kasan_unpoison_task_stack(struct task_struct *task) +{ + __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE); +} + +/* Unpoison the stack for the current task beyond a watermark sp value. */ +asmlinkage void kasan_unpoison_remaining_stack(void *sp) +{ + __kasan_unpoison_stack(current, sp); +}
/* * All functions below always inlined so compiler could
linaro-kernel@lists.linaro.org