Le 15/09/2020 à 09:23, David Hildenbrand a écrit :
On 14.09.20 18:50, Laurent Dufour wrote:
At boot time, or when doing memory hot-add operations, if the links in sysfs can't be created, the system is still able to run, so just report the error in the kernel log rather than BUG_ON and potentially make system unusable because the callpath can be called with locks held.
Since the number of memory blocks managed could be high, the messages are rate limited.
As a consequence, link_mem_sections() has no status to report anymore.
Signed-off-by: Laurent Dufour ldufour@linux.ibm.com Acked-by: Michal Hocko mhocko@suse.com Cc: David Hildenbrand david@redhat.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org
drivers/base/node.c | 33 +++++++++++++++++++++------------ include/linux/node.h | 16 +++++++--------- mm/memory_hotplug.c | 5 ++--- 3 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c index 01ee73c9d675..249b2ba6dc81 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -761,8 +761,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn) return pfn_to_nid(pfn); } -static int do_register_memory_block_under_node(int nid,
struct memory_block *mem_blk)
+static void do_register_memory_block_under_node(int nid,
{ int ret;struct memory_block *mem_blk)
@@ -775,12 +775,19 @@ static int do_register_memory_block_under_node(int nid, ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, kobject_name(&mem_blk->dev.kobj));
- if (ret)
return ret;
- if (ret && ret != -EEXIST)
dev_err_ratelimited(&node_devices[nid]->dev,
"can't create link to %s in sysfs (%d)\n",
kobject_name(&mem_blk->dev.kobj), ret);
- return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
- ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj, &node_devices[nid]->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj));
- if (ret && ret != -EEXIST)
dev_err_ratelimited(&mem_blk->dev,
"can't create link to %s in sysfs (%d)\n",
kobject_name(&node_devices[nid]->dev.kobj),
}ret);
/* register memory section under specified node if it spans that node */ @@ -817,7 +824,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk, continue; /* The memory block is registered to the first matching node */
return do_register_memory_block_under_node(nid, mem_blk);
do_register_memory_block_under_node(nid, mem_blk);
} /* mem section does not span the specified node */ return 0;return 0;
@@ -832,7 +840,8 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, { int nid = *(int *)arg;
- return do_register_memory_block_under_node(nid, mem_blk);
- do_register_memory_block_under_node(nid, mem_blk);
- return 0; }
/* @@ -850,8 +859,8 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
enum meminit_context context)
+void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
{ walk_memory_blocks_func_t func;enum meminit_context context)
@@ -860,9 +869,9 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, else func = register_mem_block_under_node_early;
- return walk_memory_blocks(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
func);
- walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
(void *)&nid, func);
- return; }
#ifdef CONFIG_HUGETLBFS diff --git a/include/linux/node.h b/include/linux/node.h index 014ba3ab2efd..8e5a29897936 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,15 +99,14 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -int link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
enum meminit_context context);
+void link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
#elseenum meminit_context context);
-static inline int link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
enum meminit_context context)
+static inline void link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
{enum meminit_context context)
- return 0; } #endif
@@ -130,8 +129,7 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */
error = link_mem_sections(nid, start_pfn, end_pfn,
MEMINIT_EARLY);
}link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY);
return error; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 03df20078827..01e01a530d38 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1080,9 +1080,8 @@ int __ref add_memory_resource(int nid, struct resource *res) } /* link memory sections under this node.*/
- ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
MEMINIT_HOTPLUG);
- BUG_ON(ret);
- link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
MEMINIT_HOTPLUG);
/* create new memmap entry */ if (!strcmp(res->name, "System RAM"))
I just remember that I still have some cleanup patches lying around that rework the whole node onlining on the add_memory() path, being able to fail in a nice way rather than ignoring errors. Anyhow, this is good enough for now
Acked-by: David Hildenbrand david@redhat.com
Thanks David.