Hi Oscar,
On Wed, 10 Mar 2021 at 03:11, Oscar Salvador osalvador@suse.de wrote:
When sizeof(struct page) is not a power of 2, sections do not span a PMD anymore and so when populating them some parts of the PMD will remain unused. Because of this, PMDs will be left behind when depopulating sections since remove_pmd_table() thinks that those unused parts are still in use.
Fix this by marking the unused parts with PAGE_UNUSED, so memchr_inv() will do the right thing and will let us free the PMD when the last user of it is gone.
This patch is based on a similar patch by David Hildenbrand:
https://lore.kernel.org/linux-mm/20200722094558.9828-9-david@redhat.com/
Signed-off-by: Oscar Salvador osalvador@suse.de Reviewed-by: David Hildenbrand david@redhat.com Acked-by: Dave Hansen dave.hansen@linux.intel.com
arch/x86/mm/init_64.c | 65 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 12 deletions(-)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9ecb3c488ac8..d93b36856ed3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -826,6 +826,51 @@ void __init paging_init(void) zone_sizes_init(); }
+#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define PAGE_UNUSED 0xFD
+/* Returns true if the PMD is completely unused and thus it can be freed */ +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end) +{
unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
memset((void *)addr, PAGE_UNUSED, end - addr);
return !memchr_inv((void *)start, PAGE_UNUSED, PMD_SIZE);
+}
+static void __meminit vmemmap_use_sub_pmd(unsigned long start) +{
/*
* As we expect to add in the same granularity as we remove, it's
* sufficient to mark only some piece used to block the memmap page from
* getting removed when removing some other adjacent memmap (just in
* case the first memmap never gets initialized e.g., because the memory
* block never gets onlined).
*/
memset((void *)start, 0, sizeof(struct page));
+}
+static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) +{
/*
* Could be our memmap page is filled with PAGE_UNUSED already from a
* previous remove. Make sure to reset it.
*/
vmemmap_use_sub_pmd(start);
/*
* Mark with PAGE_UNUSED the unused parts of the new memmap range
*/
if (!IS_ALIGNED(start, PMD_SIZE))
memset((void *)start, PAGE_UNUSED,
start - ALIGN_DOWN(start, PMD_SIZE));
if (!IS_ALIGNED(end, PMD_SIZE))
memset((void *)end, PAGE_UNUSED,
ALIGN(end, PMD_SIZE) - end);
+} +#endif
/*
- Memory hotplug specific functions
*/ @@ -871,8 +916,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return add_pages(nid, start_pfn, nr_pages, params); }
-#define PAGE_INUSE 0xFD
static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; @@ -1006,7 +1049,6 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, unsigned long next, pages = 0; pte_t *pte_base; pmd_t *pmd;
void *page_addr; pmd = pmd_start + pmd_index(addr); for (; addr < end; addr = next, pmd++) {
@@ -1026,20 +1068,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); pages++;
} else {
/* If here, we are freeing vmemmap pages. */
memset((void *)addr, PAGE_INUSE, next - addr);
page_addr = page_address(pmd_page(*pmd));
if (!memchr_inv(page_addr, PAGE_INUSE,
PMD_SIZE)) {
} else if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) &&
vmemmap_pmd_is_unused(addr, next)) { free_hugepage_table(pmd_page(*pmd), altmap);
spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock);
} } continue;
@@ -1492,11 +1527,17 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE;
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE))
vmemmap_use_new_sub_pmd(addr, next);
continue; } else if (altmap) return -ENOMEM; /* no fallback */ } else if (pmd_large(*pmd)) { vmemmap_verify((pte_t *)pmd, node, addr, next);
vmemmap_use_sub_pmd(addr);
While building the linux next 20210310 tag for x86_64 architecture with clang-12 and gcc-9 the following warnings / errors were noticed.
arch/x86/mm/init_64.c:1585:6: error: implicit declaration of function 'vmemmap_use_new_sub_pmd' [-Werror,-Wimplicit-function-declaration] vmemmap_use_new_sub_pmd(addr, next); ^ arch/x86/mm/init_64.c:1591:4: error: implicit declaration of function 'vmemmap_use_sub_pmd' [-Werror,-Wimplicit-function-declaration] vmemmap_use_sub_pmd(addr, next); ^ 2 errors generated. make[3]: *** [scripts/Makefile.build:271: arch/x86/mm/init_64.o] Error 1
Reported-by: Naresh Kamboju naresh.kamboju@linaro.org
Steps to reproduce: ------------------- # TuxMake is a command line tool and Python library that provides # portable and repeatable Linux kernel builds across a variety of # architectures, toolchains, kernel configurations, and make targets. # # TuxMake supports the concept of runtimes. # See https://docs.tuxmake.org/runtimes/, for that to work it requires # that you install podman or docker on your system. # # To install tuxmake on your system globally: # sudo pip3 install -U tuxmake # # See https://docs.tuxmake.org/ for complete documentation.
tuxmake --runtime podman --target-arch x86_64 --toolchain clang-12 --kconfig defconfig --kconfig-add https://builds.tuxbuild.com/1pYCPt4WlgSfSdv1BULm6ABINeJ/config
Build pipeline error link, https://gitlab.com/Linaro/lkft/mirrors/next/linux-next/-/jobs/1085496613#L42...
On Wed, Mar 10, 2021 at 11:37:53AM +0530, Naresh Kamboju wrote:
Hi Oscar,
Hi Naresh,
While building the linux next 20210310 tag for x86_64 architecture with clang-12 and gcc-9 the following warnings / errors were noticed.
arch/x86/mm/init_64.c:1585:6: error: implicit declaration of function 'vmemmap_use_new_sub_pmd' [-Werror,-Wimplicit-function-declaration] vmemmap_use_new_sub_pmd(addr, next); ^ arch/x86/mm/init_64.c:1591:4: error: implicit declaration of function 'vmemmap_use_sub_pmd' [-Werror,-Wimplicit-function-declaration] vmemmap_use_sub_pmd(addr, next); ^ 2 errors generated. make[3]: *** [scripts/Makefile.build:271: arch/x86/mm/init_64.o] Error 1
Reported-by: Naresh Kamboju naresh.kamboju@linaro.org
Yes, this was also reported by Zi Yan here [1]. Looking into your .config, seems to be the same issue as you have CONFIG_SPARSE_VMEMMAP but !CONFIG_MEMORY_HOTPLUG.
This version fixes those compilation errors.
Thanks for reporting it anyway!
[1] https://lore.kernel.org/linux-mm/YEfoH8US4YVxak7r@localhost.localdomain/T/#m...
Steps to reproduce:
# TuxMake is a command line tool and Python library that provides # portable and repeatable Linux kernel builds across a variety of # architectures, toolchains, kernel configurations, and make targets. # # TuxMake supports the concept of runtimes. # See https://docs.tuxmake.org/runtimes/, for that to work it requires # that you install podman or docker on your system. # # To install tuxmake on your system globally: # sudo pip3 install -U tuxmake # # See https://docs.tuxmake.org/ for complete documentation.
tuxmake --runtime podman --target-arch x86_64 --toolchain clang-12 --kconfig defconfig --kconfig-add https://builds.tuxbuild.com/1pYCPt4WlgSfSdv1BULm6ABINeJ/config
Build pipeline error link, https://gitlab.com/Linaro/lkft/mirrors/next/linux-next/-/jobs/1085496613#L42...
-- Linaro LKFT https://lkft.linaro.org