March 2020 - Linux-stable-mirror

[PATCH v2] RISC-V: Move all address space definition macros to one place

by Atish Patra

We get the following compilation error if CONFIG_SPARSEMEM_VMEMMAP is set. --------------------------------------------------------------- ./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’: ./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared (first use in this function); did you mean ‘mem_map’? #define __pfn_to_page(pfn) (vmemmap + (pfn)) ^~~~~~~ ./include/asm-generic/memory_model.h:82:21: note: in expansion of macro ‘__pfn_to_page’ #define pfn_to_page __pfn_to_page ^~~~~~~~~~~~~ ./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro ‘pfn_to_page’ return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); --------------------------------------------------------------- Fix the compliation errors by moving all the address space definition macros before including pgtable-64.h. Cc: stable(a)vger.kernel.org Fixes: 8ad8b72721d0 (riscv: Add KASAN support) Signed-off-by: Atish Patra <atish.patra(a)wdc.com> Reviewed-by: Anup Patel <anup(a)brainfault.org> --- arch/riscv/include/asm/pgtable.h | 78 +++++++++++++++++--------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e43041519edd..393f2014dfee 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -19,6 +19,47 @@ #include <asm/tlbflush.h> #include <linux/mm_types.h> +#ifdef CONFIG_MMU + +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (VMALLOC_END) + +/* + * Roughly size the vmemmap space to be large enough to fit enough + * struct pages to map half the virtual address space. Then + * position vmemmap directly below the VMALLOC region. + */ +#define VMEMMAP_SHIFT \ + (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) +#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) + +/* + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +#define vmemmap ((struct page *)VMEMMAP_START) + +#define PCI_IO_SIZE SZ_16M +#define PCI_IO_END VMEMMAP_START +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) + +#define FIXADDR_TOP PCI_IO_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#endif + #ifdef CONFIG_64BIT #include <asm/pgtable-64.h> #else @@ -90,31 +131,6 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define BPF_JIT_REGION_SIZE (SZ_128M) -#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) -#define BPF_JIT_REGION_END (VMALLOC_END) - -/* - * Roughly size the vmemmap space to be large enough to fit enough - * struct pages to map half the virtual address space. Then - * position vmemmap directly below the VMALLOC region. - */ -#define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) -#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) - -/* - * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel - * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. - */ -#define vmemmap ((struct page *)VMEMMAP_START) - static inline int pmd_present(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -432,18 +448,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PCI_IO_SIZE SZ_16M -#define PCI_IO_END VMEMMAP_START -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) - -#define FIXADDR_TOP PCI_IO_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. -- 2.25.1

5 years, 8 months

1
0
0 0

[PATCH] RISC-V: Move all address space definition macros to one place

by Atish Patra

If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the following compilation error. --------------------------------------------------------------- ./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’: ./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared (first use in this function); did you mean ‘mem_map’? #define __pfn_to_page(pfn) (vmemmap + (pfn)) ^~~~~~~ ./include/asm-generic/memory_model.h:82:21: note: in expansion of macro ‘__pfn_to_page’ #define pfn_to_page __pfn_to_page ^~~~~~~~~~~~~ ./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro ‘pfn_to_page’ return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); --------------------------------------------------------------- Fix the compliation errors by moving all the address space definition macros before including pgtable-64.h. Cc: stable(a)vger.kernel.org Fixes: 8ad8b72721d0 (riscv: Add KASAN support) Signed-off-by: Atish Patra <atish.patra(a)wdc.com> --- arch/riscv/include/asm/pgtable.h | 78 +++++++++++++++++--------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 453afb0a570a..4f6ee48a42e8 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -19,6 +19,47 @@ #include <asm/tlbflush.h> #include <linux/mm_types.h> +#ifdef CONFIG_MMU + +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (VMALLOC_END) + +/* + * Roughly size the vmemmap space to be large enough to fit enough + * struct pages to map half the virtual address space. Then + * position vmemmap directly below the VMALLOC region. + */ +#define VMEMMAP_SHIFT \ + (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) +#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) + +/* + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +#define vmemmap ((struct page *)VMEMMAP_START) + +#define PCI_IO_SIZE SZ_16M +#define PCI_IO_END VMEMMAP_START +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) + +#define FIXADDR_TOP PCI_IO_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#endif + #ifdef CONFIG_64BIT #include <asm/pgtable-64.h> #else @@ -90,31 +131,6 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define BPF_JIT_REGION_SIZE (SZ_128M) -#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) -#define BPF_JIT_REGION_END (VMALLOC_END) - -/* - * Roughly size the vmemmap space to be large enough to fit enough - * struct pages to map half the virtual address space. Then - * position vmemmap directly below the VMALLOC region. - */ -#define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) -#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) - -/* - * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel - * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. - */ -#define vmemmap ((struct page *)VMEMMAP_START) - static inline int pmd_present(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -452,18 +468,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PCI_IO_SIZE SZ_16M -#define PCI_IO_END VMEMMAP_START -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) - -#define FIXADDR_TOP PCI_IO_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. -- 2.25.0

5 years, 8 months

6
6
0 0

[nacked] kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: kernel/taskstats: fix wrong nla type for {cgroup,task}stats policy has been removed from the -mm tree. Its filename was kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy.patch This patch was dropped because it was nacked ------------------------------------------------------ From: Yafang Shao <laoar.shao(a)gmail.com> Subject: kernel/taskstats: fix wrong nla type for {cgroup,task}stats policy After our server is upgraded to a newer kernel, we found that it continuesly print a warning in the kernel message. The warning is, [832984.946322] netlink: 'irmas.lc': attribute type 1 has an invalid length. irmas.lc is one of our container monitor daemons, and it will use CGROUPSTATS_CMD_GET to get the cgroupstats, that is similar with tools/accounting/getdelays.c. We can also produce this warning with getdelays. For example, after running below command $ ./getdelays -C /sys/fs/cgroup/memory then you can find a warning in dmesg, [61607.229318] netlink: 'getdelays': attribute type 1 has an invalid length. This warning is introduced in commit 6e237d099fac ("netlink: Relax attr validation for fixed length types"), which is used to check whether attributes using types NLA_U* and NLA_S* have an exact length. Regarding this issue, the root cause is cgroupstats_cmd_get_policy defines a wrong type as NLA_U32, while it should be NLA_NESTED an its minimal length is NLA_HDRLEN. That is similar to taskstats_cmd_get_policy. As this behavior change really breaks our application, we'd better cc stable as well. Link: http://lkml.kernel.org/r/1585191042-9935-1-git-send-email-laoar.shao@gmail.… Fixes: 6e237d099fac ("netlink: Relax attr validation for fixed length types") Signed-off-by: Yafang Shao <laoar.shao(a)gmail.com> Cc: Balbir Singh <bsingharora(a)gmail.com> Cc: David Ahern <dsahern(a)gmail.com> Cc: David S. Miller <davem(a)davemloft.net> Cc: Johannes Berg <johannes(a)sipsolutions.net> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/taskstats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) --- a/kernel/taskstats.c~kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy +++ a/kernel/taskstats.c @@ -35,8 +35,8 @@ struct kmem_cache *taskstats_cache; static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { - [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, - [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, + [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_NESTED }, + [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_NESTED }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; @@ -45,7 +45,7 @@ static const struct nla_policy taskstats * Make sure they are always aligned. */ static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { - [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, + [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_NESTED }, }; struct listener { _ Patches currently in -mm which might be from laoar.shao(a)gmail.com are mm-memcg-fix-build-error-around-the-usage-of-kmem_caches.patch

5 years, 8 months

1
0
0 0

+ kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy.patch added to -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: kernel/taskstats: fix wrong nla type for {cgroup,task}stats policy has been added to the -mm tree. Its filename is kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/kernel-taskstats-fix-wrong-nla-typ… and later at http://ozlabs.org/~akpm/mmotm/broken-out/kernel-taskstats-fix-wrong-nla-typ… Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Yafang Shao <laoar.shao(a)gmail.com> Subject: kernel/taskstats: fix wrong nla type for {cgroup,task}stats policy After our server is upgraded to a newer kernel, we found that it continuesly print a warning in the kernel message. The warning is, [832984.946322] netlink: 'irmas.lc': attribute type 1 has an invalid length. irmas.lc is one of our container monitor daemons, and it will use CGROUPSTATS_CMD_GET to get the cgroupstats, that is similar with tools/accounting/getdelays.c. We can also produce this warning with getdelays. For example, after running bellow command $ ./getdelays -C /sys/fs/cgroup/memory then you can find a warning in dmesg, [61607.229318] netlink: 'getdelays': attribute type 1 has an invalid length. This warning is introduced in commit 6e237d099fac ("netlink: Relax attr validation for fixed length types"), which is used to check whether attributes using types NLA_U* and NLA_S* have an exact length. Regarding this issue, the root cause is cgroupstats_cmd_get_policy defines a wrong type as NLA_U32, while it should be NLA_NESTED an its minimal length is NLA_HDRLEN. That is similar to taskstats_cmd_get_policy. As this behavior change really breaks our application, we'd better cc stable as well. Link: http://lkml.kernel.org/r/1585191042-9935-1-git-send-email-laoar.shao@gmail.… Signed-off-by: Yafang Shao <laoar.shao(a)gmail.com> Cc: Balbir Singh <bsingharora(a)gmail.com> Cc: David Ahern <dsahern(a)gmail.com> Cc: David S. Miller <davem(a)davemloft.net> Cc: Johannes Berg <johannes(a)sipsolutions.net> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/taskstats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) --- a/kernel/taskstats.c~kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy +++ a/kernel/taskstats.c @@ -35,8 +35,8 @@ struct kmem_cache *taskstats_cache; static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { - [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, - [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, + [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_NESTED }, + [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_NESTED }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; @@ -45,7 +45,7 @@ static const struct nla_policy taskstats * Make sure they are always aligned. */ static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { - [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, + [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_NESTED }, }; struct listener { _ Patches currently in -mm which might be from laoar.shao(a)gmail.com are kernel-taskstats-fix-wrong-nla-type-for-cgrouptaskstats-policy.patch mm-memcg-fix-build-error-around-the-usage-of-kmem_caches.patch

5 years, 8 months

1
0
0 0

+ mm-sparse-fix-kernel-crash-with-pfn_section_valid-check-v2.patch added to -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm-sparse-fix-kernel-crash-with-pfn_section_valid-check-v2 has been added to the -mm tree. Its filename is mm-sparse-fix-kernel-crash-with-pfn_section_valid-check-v2.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-sparse-fix-kernel-crash-with-pf… and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-sparse-fix-kernel-crash-with-pf… Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com> Subject: mm-sparse-fix-kernel-crash-with-pfn_section_valid-check-v2 add comment Link: http://lkml.kernel.org/r/20200326133235.343616-1-aneesh.kumar@linux.ibm.com Fixes: d41e2f3bd546 ("mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case") Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com> Reported-by: Sachin Sant <sachinp(a)linux.vnet.ibm.com> Tested-by: Sachin Sant <sachinp(a)linux.vnet.ibm.com> Reviewed-by: Baoquan He <bhe(a)redhat.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Acked-by: Pankaj Gupta <pankaj.gupta.linux(a)gmail.com> Cc: Michael Ellerman <mpe(a)ellerman.id.au> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: Wei Yang <richardw.yang(a)linux.intel.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Mike Rapoport <rppt(a)linux.ibm.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/sparse.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) --- a/mm/sparse.c~mm-sparse-fix-kernel-crash-with-pfn_section_valid-check-v2 +++ a/mm/sparse.c @@ -781,7 +781,11 @@ static void section_deactivate(unsigned ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - /* Mark the section invalid */ + /* + * Mark the section invalid so that valid_section() + * return false. This prevents code from dereferencing + * ms->usage array. + */ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; } _ Patches currently in -mm which might be from aneesh.kumar(a)linux.ibm.com are mm-sparse-fix-kernel-crash-with-pfn_section_valid-check.patch mm-sparse-fix-kernel-crash-with-pfn_section_valid-check-v2.patch

5 years, 8 months

1
0
0 0

[merged] libfs-fix-infoleak-in-simple_attr_read.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: libfs: fix infoleak in simple_attr_read() has been removed from the -mm tree. Its filename was libfs-fix-infoleak-in-simple_attr_read.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Eric Biggers <ebiggers(a)google.com> Subject: libfs: fix infoleak in simple_attr_read() Reading from a debugfs file at a nonzero position, without first reading at position 0, leaks uninitialized memory to userspace. It's a bit tricky to do this, since lseek() and pread() aren't allowed on these files, and write() doesn't update the position on them. But writing to them with splice() *does* update the position: #define _GNU_SOURCE 1 #include <fcntl.h> #include <stdio.h> #include <unistd.h> int main() { int pipes[2], fd, n, i; char buf[32]; pipe(pipes); write(pipes[1], "0", 1); fd = open("/sys/kernel/debug/fault_around_bytes", O_RDWR); splice(pipes[0], NULL, fd, NULL, 1, 0); n = read(fd, buf, sizeof(buf)); for (i = 0; i < n; i++) printf("%02x", buf[i]); printf(" "); } Output: 5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a30 Fix the infoleak by making simple_attr_read() always fill simple_attr::get_buf if it hasn't been filled yet. Link: http://lkml.kernel.org/r/20200308023849.988264-1-ebiggers@kernel.org Fixes: acaefc25d21f ("[PATCH] libfs: add simple attribute files") Signed-off-by: Eric Biggers <ebiggers(a)google.com> Reported-by: syzbot+fcab69d1ada3e8d6f06b(a)syzkaller.appspotmail.com Reported-by: Alexander Potapenko <glider(a)google.com> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Cc: "Rafael J. Wysocki" <rafael(a)kernel.org> Cc: Kees Cook <keescook(a)chromium.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/libfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) --- a/fs/libfs.c~libfs-fix-infoleak-in-simple_attr_read +++ a/fs/libfs.c @@ -891,7 +891,7 @@ int simple_attr_open(struct inode *inode { struct simple_attr *attr; - attr = kmalloc(sizeof(*attr), GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -931,9 +931,11 @@ ssize_t simple_attr_read(struct file *fi if (ret) return ret; - if (*ppos) { /* continued read */ + if (*ppos && attr->get_buf[0]) { + /* continued read */ size = strlen(attr->get_buf); - } else { /* first read */ + } else { + /* first read */ u64 val; ret = attr->get(attr->data, &val); if (ret) _ Patches currently in -mm which might be from ebiggers(a)google.com are kmod-make-request_module-return-an-error-when-autoloading-is-disabled.patch fs-filesystemsc-downgrade-user-reachable-warn_once-to-pr_warn_once.patch docs-admin-guide-document-the-kernelmodprobe-sysctl.patch selftests-kmod-fix-handling-test-numbers-above-9.patch selftests-kmod-test-disabling-module-autoloading.patch

5 years, 8 months

1
0
0 0

[merged] x86-mm-split-vmalloc_sync_all.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: x86/mm: split vmalloc_sync_all() has been removed from the -mm tree. Its filename was x86-mm-split-vmalloc_sync_all.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Joerg Roedel <jroedel(a)suse.de> Subject: x86/mm: split vmalloc_sync_all() Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in the vunmap() code-path. While this change was necessary to maintain correctness on x86-32-pae kernels, it also adds additional cycles for architectures that don't need it. Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported severe performance regressions in micro-benchmarks because it now also calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But the vmalloc_sync_all() implementation on x86-64 is only needed for newly created mappings. To avoid the unnecessary work on x86-64 and to gain the performance back, split up vmalloc_sync_all() into two functions: * vmalloc_sync_mappings(), and * vmalloc_sync_unmappings() Most call-sites to vmalloc_sync_all() only care about new mappings being synchronized. The only exception is the new call-site added in the above mentioned commit. Shile Zhang directed us to a report of an 80% regression in reaim throughput. Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK… Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.… Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") Signed-off-by: Joerg Roedel <jroedel(a)suse.de> Reported-by: kernel test robot <oliver.sang(a)intel.com> Reported-by: Shile Zhang <shile.zhang(a)linux.alibaba.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com> [GHES] Tested-by: Borislav Petkov <bp(a)suse.de> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- arch/x86/mm/fault.c | 26 ++++++++++++++++++++++++-- drivers/acpi/apei/ghes.c | 2 +- include/linux/vmalloc.h | 5 +++-- kernel/notifier.c | 2 +- mm/nommu.c | 10 +++++++--- mm/vmalloc.c | 11 +++++++---- 6 files changed, 43 insertions(+), 13 deletions(-) --- a/arch/x86/mm/fault.c~x86-mm-split-vmalloc_sync_all +++ a/arch/x86/mm/fault.c @@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pg return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -217,6 +217,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -319,11 +329,23 @@ out: #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * --- a/drivers/acpi/apei/ghes.c~x86-mm-split-vmalloc_sync_all +++ a/drivers/acpi/apei/ghes.c @@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes) * New allocation must be visible in all pgd before it can be found by * an NMI allocating from the pool. */ - vmalloc_sync_all(); + vmalloc_sync_mappings(); rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); if (rc) --- a/include/linux/vmalloc.h~x86-mm-split-vmalloc_sync_all +++ a/include/linux/vmalloc.h @@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(s extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ --- a/kernel/notifier.c~x86-mm-split-vmalloc_sync_all +++ a/kernel/notifier.c @@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); --- a/mm/nommu.c~x86-mm-split-vmalloc_sync_all +++ a/mm/nommu.c @@ -370,10 +370,14 @@ void vm_unmap_aliases(void) EXPORT_SYMBOL_GPL(vm_unmap_aliases); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture + * chose not to have one. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) +{ +} + +void __weak vmalloc_sync_unmappings(void) { } --- a/mm/vmalloc.c~x86-mm-split-vmalloc_sync_all +++ a/mm/vmalloc.c @@ -1295,7 +1295,7 @@ static bool __purge_vmap_area_lazy(unsig * First make sure the mappings are removed from all page-tables * before they are freed. */ - vmalloc_sync_all(); + vmalloc_sync_unmappings(); /* * TODO: to calculate a flush range without looping. @@ -3128,16 +3128,19 @@ int remap_vmalloc_range(struct vm_area_s EXPORT_SYMBOL(remap_vmalloc_range); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose + * not to have one. * * The purpose of this function is to make sure the vmalloc area * mappings are identical in all page-tables in the system. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) { } +void __weak vmalloc_sync_unmappings(void) +{ +} static int f(pte_t *pte, unsigned long addr, void *data) { _ Patches currently in -mm which might be from jroedel(a)suse.de are

5 years, 8 months

1
0
0 0

[merged] mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm, slub: prevent kmalloc_node crashes and memory leaks has been removed from the -mm tree. Its filename was mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Vlastimil Babka <vbabka(a)suse.cz> Subject: mm, slub: prevent kmalloc_node crashes and memory leaks Sachin reports [1] a crash in SLUB __slab_alloc(): BUG: Kernel NULL pointer dereference on read at 0x000073b0 Faulting instruction address: 0xc0000000003d55f4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 19 PID: 1 Comm: systemd Not tainted 5.6.0-rc2-next-20200218-autotest #1 NIP: c0000000003d55f4 LR: c0000000003d5b94 CTR: 0000000000000000 REGS: c0000008b37836d0 TRAP: 0300 Not tainted (5.6.0-rc2-next-20200218-autotest) MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 24004844 XER: 00000000 CFAR: c00000000000dec4 DAR: 00000000000073b0 DSISR: 40000000 IRQMASK: 1 GPR00: c0000000003d5b94 c0000008b3783960 c00000000155d400 c0000008b301f500 GPR04: 0000000000000dc0 0000000000000002 c0000000003443d8 c0000008bb398620 GPR08: 00000008ba2f0000 0000000000000001 0000000000000000 0000000000000000 GPR12: 0000000024004844 c00000001ec52a00 0000000000000000 0000000000000000 GPR16: c0000008a1b20048 c000000001595898 c000000001750c18 0000000000000002 GPR20: c000000001750c28 c000000001624470 0000000fffffffe0 5deadbeef0000122 GPR24: 0000000000000001 0000000000000dc0 0000000000000002 c0000000003443d8 GPR28: c0000008b301f500 c0000008bb398620 0000000000000000 c00c000002287180 NIP [c0000000003d55f4] ___slab_alloc+0x1f4/0x760 LR [c0000000003d5b94] __slab_alloc+0x34/0x60 Call Trace: [c0000008b3783960] [c0000000003d5734] ___slab_alloc+0x334/0x760 (unreliable) [c0000008b3783a40] [c0000000003d5b94] __slab_alloc+0x34/0x60 [c0000008b3783a70] [c0000000003d6fa0] __kmalloc_node+0x110/0x490 [c0000008b3783af0] [c0000000003443d8] kvmalloc_node+0x58/0x110 [c0000008b3783b30] [c0000000003fee38] mem_cgroup_css_online+0x108/0x270 [c0000008b3783b90] [c000000000235aa8] online_css+0x48/0xd0 [c0000008b3783bc0] [c00000000023eaec] cgroup_apply_control_enable+0x2ec/0x4d0 [c0000008b3783ca0] [c000000000242318] cgroup_mkdir+0x228/0x5f0 [c0000008b3783d10] [c00000000051e170] kernfs_iop_mkdir+0x90/0xf0 [c0000008b3783d50] [c00000000043dc00] vfs_mkdir+0x110/0x230 [c0000008b3783da0] [c000000000441c90] do_mkdirat+0xb0/0x1a0 [c0000008b3783e20] [c00000000000b278] system_call+0x5c/0x68 This is a PowerPC platform with following NUMA topology: available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 35247 MB node 1 free: 30907 MB node distances: node 0 1 0: 10 40 1: 40 10 possible numa nodes: 0-31 This only happens with a mmotm patch "mm/memcontrol.c: allocate shrinker_map on appropriate NUMA node" [2] which effectively calls kmalloc_node for each possible node. SLUB however only allocates kmem_cache_node on online N_NORMAL_MEMORY nodes, and relies on node_to_mem_node to return such valid node for other nodes since commit a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node"). This is however not true in this configuration where the _node_numa_mem_ array is not initialized for nodes 0 and 2-31, thus it contains zeroes and get_partial() ends up accessing non-allocated kmem_cache_node. A related issue was reported by Bharata (originally by Ramachandran) [3] where a similar PowerPC configuration, but with mainline kernel without patch [2] ends up allocating large amounts of pages by kmalloc-1k kmalloc-512. This seems to have the same underlying issue with node_to_mem_node() not behaving as expected, and might probably also lead to an infinite loop with CONFIG_SLUB_CPU_PARTIAL [4]. This patch should fix both issues by not relying on node_to_mem_node() anymore and instead simply falling back to NUMA_NO_NODE, when kmalloc_node(node) is attempted for a node that's not online, or has no usable memory. The "usable memory" condition is also changed from node_present_pages() to N_NORMAL_MEMORY node state, as that is exactly the condition that SLUB uses to allocate kmem_cache_node structures. The check in get_partial() is removed completely, as the checks in ___slab_alloc() are now sufficient to prevent get_partial() being reached with an invalid node. [1] https://lore.kernel.org/linux-next/3381CD91-AB3D-4773-BA04-E7A072A63968@lin… [2] https://lore.kernel.org/linux-mm/fff0e636-4c36-ed10-281c-8cdb0687c839@virtu… [3] https://lore.kernel.org/linux-mm/20200317092624.GB22538@in.ibm.com/ [4] https://lore.kernel.org/linux-mm/088b5996-faae-8a56-ef9c-5b567125ae54@suse.… Link: http://lkml.kernel.org/r/20200320115533.9604-1-vbabka@suse.cz Fixes: a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node") Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz> Reported-by: Sachin Sant <sachinp(a)linux.vnet.ibm.com> Tested-by: Sachin Sant <sachinp(a)linux.vnet.ibm.com> Reported-by: PUVICHAKRAVARTHY RAMACHANDRAN <puvichakravarthy(a)in.ibm.com> Tested-by: Bharata B Rao <bharata(a)linux.ibm.com> Debugged-by: Srikar Dronamraju <srikar(a)linux.vnet.ibm.com> Reviewed-by: Srikar Dronamraju <srikar(a)linux.vnet.ibm.com> Cc: Mel Gorman <mgorman(a)techsingularity.net> Cc: Michael Ellerman <mpe(a)ellerman.id.au> Cc: Michal Hocko <mhocko(a)kernel.org> Cc: Christopher Lameter <cl(a)linux.com> Cc: linuxppc-dev(a)lists.ozlabs.org Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: David Rientjes <rientjes(a)google.com> Cc: Kirill Tkhai <ktkhai(a)virtuozzo.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Nathan Lynch <nathanl(a)linux.ibm.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/slub.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) --- a/mm/slub.c~mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks +++ a/mm/slub.c @@ -1973,8 +1973,6 @@ static void *get_partial(struct kmem_cac if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - else if (!node_present_pages(node)) - searchnode = node_to_mem_node(node); object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) @@ -2563,17 +2561,27 @@ static void *___slab_alloc(struct kmem_c struct page *page; page = c->page; - if (!page) + if (!page) { + /* + * if the node is not online or has no normal memory, just + * ignore the node constraint + */ + if (unlikely(node != NUMA_NO_NODE && + !node_state(node, N_NORMAL_MEMORY))) + node = NUMA_NO_NODE; goto new_slab; + } redo: if (unlikely(!node_match(page, node))) { - int searchnode = node; - - if (node != NUMA_NO_NODE && !node_present_pages(node)) - searchnode = node_to_mem_node(node); - - if (unlikely(!node_match(page, searchnode))) { + /* + * same as above but node_match() being false already + * implies node != NUMA_NO_NODE + */ + if (!node_state(node, N_NORMAL_MEMORY)) { + node = NUMA_NO_NODE; + goto redo; + } else { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, page, c->freelist, c); goto new_slab; _ Patches currently in -mm which might be from vbabka(a)suse.cz are revert-topology-add-support-for-node_to_mem_node-to-determine-the-fallback-node.patch mm-compaction-fully-assume-capture-is-not-null-in-compact_zone_order.patch mm-hugetlb-remove-unnecessary-memory-fetch-in-pageheadhuge.patch

5 years, 8 months

1
0
0 0

[merged] epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: epoll: fix possible lost wakeup on epoll_ctl() path has been removed from the -mm tree. Its filename was epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Roman Penyaev <rpenyaev(a)suse.de> Subject: epoll: fix possible lost wakeup on epoll_ctl() path This fixes possible lost wakeup introduced by commit a218cc491420. Originally modifications to ep->wq were serialized by ep->wq.lock, but in the a218cc491420 new rw lock was introduced in order to relax fd event path, i.e. callers of ep_poll_callback() function. After the change ep_modify and ep_insert (both are called on epoll_ctl() path) were switched to ep->lock, but ep_poll (epoll_wait) was using ep->wq.lock on wqueue list modification. The bug doesn't lead to any wqueue list corruptions, because wake up path and list modifications were serialized by ep->wq.lock internally, but actual waitqueue_active() check prior wake_up() call can be reordered with modifications of ep ready list, thus wake up can be lost. And yes, can be healed by explicit smp_mb(): list_add_tail(&epi->rdlink, &ep->rdllist); smp_mb(); if (waitqueue_active(&ep->wq)) wake_up(&ep->wp); But let's make it simple, thus current patch replaces ep->wq.lock with the ep->lock for wqueue modifications, thus wake up path always observes activeness of the wqueue correcty. Link: http://lkml.kernel.org/r/20200214170211.561524-1-rpenyaev@suse.de Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention") References: https://bugzilla.kernel.org/show_bug.cgi?id=205933 Signed-off-by: Roman Penyaev <rpenyaev(a)suse.de> Reported-by: Max Neunhoeffer <max(a)arangodb.com> Bisected-by: Max Neunhoeffer <max(a)arangodb.com> Tested-by: Max Neunhoeffer <max(a)arangodb.com> Cc: Jakub Kicinski <kuba(a)kernel.org> Cc: Christopher Kohlhoff <chris.kohlhoff(a)clearpool.io> Cc: Davidlohr Bueso <dbueso(a)suse.de> Cc: Jason Baron <jbaron(a)akamai.com> Cc: Jes Sorensen <jes.sorensen(a)gmail.com> Cc: <stable(a)vger.kernel.org> [5.1+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/eventpoll.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) --- a/fs/eventpoll.c~epoll-fix-possible-lost-wakeup-on-epoll_ctl-path +++ a/fs/eventpoll.c @@ -1854,9 +1854,9 @@ fetch_events: waiter = true; init_waitqueue_entry(&wait, current); - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } for (;;) { @@ -1904,9 +1904,9 @@ send_events: goto fetch_events; if (waiter) { - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __remove_wait_queue(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } return res; _ Patches currently in -mm which might be from rpenyaev(a)suse.de are kselftest-introduce-new-epoll-test-case.patch

5 years, 8 months

1
0
0 0

[merged] mm-do-not-allow-madv_pageout-for-cow-pages.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm: do not allow MADV_PAGEOUT for CoW pages has been removed from the -mm tree. Its filename was mm-do-not-allow-madv_pageout-for-cow-pages.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Michal Hocko <mhocko(a)suse.com> Subject: mm: do not allow MADV_PAGEOUT for CoW pages Jann has brought up a very interesting point [1]. While shared pages are excluded from MADV_PAGEOUT normally, CoW pages can be easily reclaimed that way. This can lead to all sorts of hard to debug problems. E.g. performance problems outlined by Daniel [2]. There are runtime environments where there is a substantial memory shared among security domains via CoW memory and a easy to reclaim way of that memory, which MADV_{COLD,PAGEOUT} offers, can lead to either performance degradation in for the parent process which might be more privileged or even open side channel attacks. The feasibility of the latter is not really clear to me TBH but there is no real reason for exposure at this stage. It seems there is no real use case to depend on reclaiming CoW memory via madvise at this stage so it is much easier to simply disallow it and this is what this patch does. Put it simply MADV_{PAGEOUT,COLD} can operate only on the exclusively owned memory which is a straightforward semantic. [1] http://lkml.kernel.org/r/CAG48ez0G3JkMq61gUmyQAaCq=_TwHbi1XKzWRooxZkv08PQKu… [2] http://lkml.kernel.org/r/CAKOZueua_v8jHCpmEtTB6f3i9e2YnmX4mqdYVWhV4E=Z-n+zR… Link: http://lkml.kernel.org/r/20200312082248.GS23944@dhcp22.suse.cz Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") Signed-off-by: Michal Hocko <mhocko(a)suse.com> Reported-by: Jann Horn <jannh(a)google.com> Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Daniel Colascione <dancol(a)google.com> Cc: Dave Hansen <dave.hansen(a)intel.com> Cc: "Joel Fernandes (Google)" <joel(a)joelfernandes.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/madvise.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) --- a/mm/madvise.c~mm-do-not-allow-madv_pageout-for-cow-pages +++ a/mm/madvise.c @@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_r } page = pmd_page(orig_pmd); + + /* Do not interfere with other mappings of this page */ + if (page_mapcount(page) != 1) + goto huge_unlock; + if (next - addr != HPAGE_PMD_SIZE) { int err; - if (page_mapcount(page) != 1) - goto huge_unlock; - get_page(page); spin_unlock(ptl); lock_page(page); @@ -426,6 +428,10 @@ regular_page: continue; } + /* Do not interfere with other mappings of this page */ + if (page_mapcount(page) != 1) + continue; + VM_BUG_ON_PAGE(PageTransCompound(page), page); if (pte_young(ptent)) { _ Patches currently in -mm which might be from mhocko(a)suse.com are selftests-vm-drop-dependencies-on-page-flags-from-mlock2-tests.patch

5 years, 8 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror March 2020