Because maxnode bug there is no way to bind or migrate_pages to the
last node in multi-node NUMA system unless you lie about maxnodes
when making the mbind, set_mempolicy or migrate_pages syscall.
Manpage for those syscall describe maxnodes as the number of bits in
the node bitmap ("bit mask of nodes containing up to maxnode bits").
Thus if maxnode is n then we expect to have a n bit(s) bitmap which
means that the mask of valid bits is ((1 << n) - 1). The get_nodes()
decrement lead to the mask being ((1 << (n - 1)) - 1).
The three syscalls use a common helper get_nodes() and first things
this helper do is decrement maxnode by 1 which leads to using n-1 bits
in the provided mask of nodes (see get_bitmap() an helper function to
get_nodes()).
The lead to two bugs, either the last node in the bitmap provided will
not be use in either of the three syscalls, or the syscalls will error
out and return EINVAL if the only bit set in the bitmap was the last
bit in the mask of nodes (which is ignored because of the bug and an
empty mask of nodes is an invalid argument).
I am surprised this bug was never caught ... it has been in the kernel
since forever.
People can use the following function to detect if the kernel has the
bug:
bool kernel_has_maxnodes_bug(void)
{
unsigned long nodemask = 1;
bool has_bug;
long res;
res = set_mempolicy(MPOL_BIND, &nodemask, 1);
has_bug = res && (errno == EINVAL);
set_mempolicy(MPOL_DEFAULT, NULL, 0);
return has_bug;
}
You can tested with any of the three program below:
gcc mbind.c -o mbind -lnuma
gcc set_mempolicy.c -o set_mempolicy -lnuma
gcc migrate_pages.c -o migrate_pages -lnuma
First argument is maxnode, second argument is the bit index to set in
the mask of node (0 set the first bit, 1 the second bit, ...).
./mbind 2 1 & sleep 2 && numastat -n -p `pidof mbind` && fg
./set_mempolicy 2 1 & sleep 2 && numastat -n -p `pidof set_mempolicy` && fg
./migrate_pages 2 1 & sleep 2 && numastat -n -p `pidof migrate_pages` && fg
mbind.c %< ----------------------------------------------------------
void *anon_mem(size_t size)
{
void *ret;
ret = mmap(NULL, size, PROT_READ|
PROT_WRITE, MAP_PRIVATE|
MAP_ANON, -1, 0);
return ret == MAP_FAILED ? NULL : ret;
}
unsigned long mround(unsigned long v, unsigned long m)
{
if (m == 0) {
return v;
}
return v + m - (v % m);
}
void bitmap_set(void *_bitmap, unsigned long b)
{
uint8_t *bitmap = _bitmap;
bitmap[b >> 3] |= (1 << (b & 7));
}
int main(int argc, char *argv[])
{
unsigned long *nodemask, maxnode, node, i;
size_t bytes;
int8_t *mem;
long res;
if (argv[1] == NULL || argv[2] == NULL) {
printf("missing argument: %s maxnodes node\n", argv[0]);
return -1;
}
maxnode = atoi(argv[1]);
node = atoi(argv[2]);
bytes = mround(mround(maxnode, 8) >> 3,
sizeof(unsigned long));
nodemask = calloc(bytes, 1);
mem = anon_mem(NPAGES << 12);
if (!mem || !nodemask) {
return -1;
}
// Try to bind memory to node
bitmap_set(nodemask, node);
res = mbind(mem, NPAGES << 12, MPOL_BIND,
nodemask, maxnode, 0);
if (res) {
printf("mbind(mem, NPAGES << 12, MPOL_BIND, "
"nodemask, %d, 0) failed with %d\n",
maxnode, errno);
return -1;
}
// Write something to breakup from the zero page
for (unsigned i = 0; i < NPAGES; i++) {
mem[i << 12] = i + 1;
}
// Allow numastats to gather statistics
getchar();
return 0;
}
set_mempolicy %< ----------------------------------------------------
void *anon_mem(size_t size)
{
void *ret;
ret = mmap(NULL, size, PROT_READ|
PROT_WRITE, MAP_PRIVATE|
MAP_ANON, -1, 0);
return ret == MAP_FAILED ? NULL : ret;
}
unsigned long mround(unsigned long v, unsigned long m)
{
if (m == 0) {
return v;
}
return v + m - (v % m);
}
void bitmap_set(void *_bitmap, unsigned long b)
{
uint8_t *bitmap = _bitmap;
bitmap[b >> 3] |= (1 << (b & 7));
}
int main(int argc, char *argv[])
{
unsigned long *nodemask, maxnode, node, i;
size_t bytes;
int8_t *mem;
long res;
if (argv[1] == NULL || argv[2] == NULL) {
printf("missing argument: %s maxnodes node\n", argv[0]);
return -1;
}
maxnode = atoi(argv[1]);
node = atoi(argv[2]);
// bind memory to node 0 ...
i = 1;
res = set_mempolicy(MPOL_BIND, i, 2);
if (res) {
printf("set_mempolicy(MPOL_BIND, []=1, %d) "
"failed with %d\n", maxnode, errno);
return -1;
}
bytes = mround(mround(maxnode, 8) >> 3,
sizeof(unsigned long));
nodemask = calloc(bytes, 1);
mem = anon_mem(NPAGES << 12);
if (!mem || !nodemask) {
return -1;
}
// Try to bind memory to node
bitmap_set(nodemask, node);
res = set_mempolicy(MPOL_BIND, nodemask, maxnode);
if (res) {
printf("set_mempolicy(MPOL_BIND, nodemask, %d) "
"failed with %d\n", maxnode, errno);
return -1;
}
// Write something to breakup from the zero page
for (unsigned i = 0; i < NPAGES; i++) {
mem[i << 12] = i + 1;
}
// Allow numastats to gather statistics
getchar();
return 0;
}
migrate_pages %< ----------------------------------------------------
void *anon_mem(size_t size)
{
void *ret;
ret = mmap(NULL, size, PROT_READ|
PROT_WRITE, MAP_PRIVATE|
MAP_ANON, -1, 0);
return ret == MAP_FAILED ? NULL : ret;
}
unsigned long mround(unsigned long v, unsigned long m)
{
if (m == 0) {
return v;
}
return v + m - (v % m);
}
void bitmap_set(void *_bitmap, unsigned long b)
{
uint8_t *bitmap = _bitmap;
bitmap[b >> 3] |= (1 << (b & 7));
}
int main(int argc, char *argv[])
{
unsigned long *old_nodes, *new_nodes, maxnode, node, i;
size_t bytes;
int8_t *mem;
long res;
if (argv[1] == NULL || argv[2] == NULL) {
printf("missing argument: %s maxnodes node\n", argv[0]);
return -1;
}
maxnode = atoi(argv[1]);
node = atoi(argv[2]);
// bind memory to node 0 ...
i = 1;
res = set_mempolicy(MPOL_BIND, &i, 2);
if (res) {
printf("set_mempolicy(MPOL_BIND, []=1, %d) "
"failed with %d\n", maxnode, errno);
return -1;
}
bytes = mround(mround(maxnode, 8) >> 3,
sizeof(unsigned long));
old_nodes = calloc(bytes, 1);
new_nodes = calloc(bytes, 1);
mem = anon_mem(NPAGES << 12);
if (!mem || !new_nodes || !old_nodes) {
return -1;
}
// Write something to breakup from the zero page
for (unsigned i = 0; i < NPAGES; i++) {
mem[i << 12] = i + 1;
}
// Try to bind memory to node
bitmap_set(old_nodes, 0);
bitmap_set(new_nodes, node);
res = migrate_pages(getpid(), maxnode,
old_nodes, new_nodes);
if (res) {
printf("migrate_pages(pid, %d, old_nodes, "
"new_nodes) failed with %d\n",
maxnode, errno);
return -1;
}
// Allow numastats to gather statistics
getchar();
return 0;
}
Signed-off-by: Jérôme Glisse <jglisse(a)google.com>
To: Andrew Morton <akpm(a)linux-foundation.org>
To: linux-mm(a)kvack.org
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
mm/mempolicy.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index aec756ae5637..658e5366d266 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1434,7 +1434,6 @@ static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask,
static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
unsigned long maxnode)
{
- --maxnode;
nodes_clear(*nodes);
if (maxnode == 0 || !nmask)
return 0;
--
2.45.2.1089.g2a221341d9-goog
The patch titled
Subject: crash: fix x86_32 crash memory reserve dead loop bug at high
has been added to the -mm mm-nonmm-unstable branch. Its filename is
crash-fix-x86_32-crash-memory-reserve-dead-loop-bug-at-high.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Jinjie Ruan <ruanjinjie(a)huawei.com>
Subject: crash: fix x86_32 crash memory reserve dead loop bug at high
Date: Thu, 18 Jul 2024 11:54:43 +0800
On x86_32 Qemu machine with 1GB memory, the cmdline "crashkernel=512M" will
also cause system stall as below:
ACPI: Reserving FACP table memory at [mem 0x3ffe18b8-0x3ffe192b]
ACPI: Reserving DSDT table memory at [mem 0x3ffe0040-0x3ffe18b7]
ACPI: Reserving FACS table memory at [mem 0x3ffe0000-0x3ffe003f]
ACPI: Reserving APIC table memory at [mem 0x3ffe192c-0x3ffe19bb]
ACPI: Reserving HPET table memory at [mem 0x3ffe19bc-0x3ffe19f3]
ACPI: Reserving WAET table memory at [mem 0x3ffe19f4-0x3ffe1a1b]
143MB HIGHMEM available.
879MB LOWMEM available.
mapped low ram: 0 - 36ffe000
low ram: 0 - 36ffe000
(stall here)
The reason is that the CRASH_ADDR_LOW_MAX is equal to CRASH_ADDR_HIGH_MAX
on x86_32, the first "low" crash kernel memory reservation for 512M fails,
then it go into the "retry" loop and never came out as below (consider
CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX = 512M):
-> reserve_crashkernel_generic() and high is false
-> alloc at [0, 0x20000000] fail
-> alloc at [0x20000000, 0x20000000] fail and repeatedly
(because CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX).
Fix it by skipping meaningless calls of memblock_phys_alloc_range() with
`start = end`
After this patch, the retry dead loop is avoided and print below info:
cannot allocate crashkernel (size:0x20000000)
And apply generic crashkernel reservation to 32bit system will be ready.
Link: https://lkml.kernel.org/r/20240718035444.2977105-3-ruanjinjie@huawei.com
Fixes: 9c08a2a139fe ("x86: kdump: use generic interface to simplify crashkernel reservation code")
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Tested-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: Andrew Davis <afd(a)ti.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Chen Jiahao <chenjiahao16(a)huawei.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Eric DeVolder <eric.devolder(a)oracle.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Hari Bathini <hbathini(a)linux.ibm.com>
Cc: Helge Deller <deller(a)gmx.de>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Javier Martinez Canillas <javierm(a)redhat.com>
Cc: Linus Walleij <linus.walleij(a)linaro.org>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Rob Herring <robh(a)kernel.org>
Cc: Russell King <linux(a)armlinux.org.uk>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Vivek Goyal <vgoyal(a)redhat.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Zhen Lei <thunder.leizhen(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/crash_reserve.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/kernel/crash_reserve.c~crash-fix-x86_32-crash-memory-reserve-dead-loop-bug-at-high
+++ a/kernel/crash_reserve.c
@@ -413,7 +413,8 @@ retry:
search_end = CRASH_ADDR_HIGH_MAX;
search_base = CRASH_ADDR_LOW_MAX;
crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
- goto retry;
+ if (search_base != search_end)
+ goto retry;
}
/*
_
Patches currently in -mm which might be from ruanjinjie(a)huawei.com are
crash-fix-x86_32-crash-memory-reserve-dead-loop-bug.patch
crash-fix-x86_32-crash-memory-reserve-dead-loop-bug-at-high.patch
arm-use-generic-interface-to-simplify-crashkernel-reservation.patch
Changes since v1:
Fixed some formatting errors to make the patchset less confusing.
A patchset from linux-5.15 should be backported to 4.19 that can
significantly improve ext4 fs read and write performance. Unixbench test
results for linux-4.19.318 on Phytium D2000 CPU are shown below.
Test cmd: (Phytium D2000 only has 8 cores)
./Run fs -c 8
Before this patch set:
File Copy 1024 bufsize 2000 maxblocks 1124181
File Copy 256 bufsize 500 maxblocks 281885
File Copy 4096 bufsize 8000 maxblocks 3383785
File Read 1024 bufsize 2000 maxblocks 8702173
File Read 256 bufsize 500 maxblocks 3869384
File Read 4096 bufsize 8000 maxblocks 13043151
File Write 1024 bufsize 2000 maxblocks 1107185
File Write 256 bufsize 500 maxblocks 270493
File Write 4096 bufsize 8000 maxblocks 4018084
After this patch set:
File Copy 1024 bufsize 2000 maxblocks 2026206
File Copy 256 bufsize 500 maxblocks 829534
File Copy 4096 bufsize 8000 maxblocks 4066659
File Read 1024 bufsize 2000 maxblocks 8877219
File Read 256 bufsize 500 maxblocks 3997445
File Read 4096 bufsize 8000 maxblocks 13179885
File Write 1024 bufsize 2000 maxblocks 4256929
File Write 256 bufsize 500 maxblocks 1305320
File Write 4096 bufsize 8000 maxblocks 10721052
We can observe a quantum leap in the test results as a consequence of
applying this patchset
Link: https://lore.kernel.org/all/20210716122024.1105856-1-yi.zhang@huawei.com/
Original description:
This patchset address to improve buffer write performance with delalloc.
The first patch reduce the unnecessary update i_disksize, the second two
patch refactor the inline data write procedure and also do some small
fix, the last patch do improve by remove all unnecessary journal handle
in the delalloc write procedure.
After this patch set, we could get a lot of performance improvement.
Below is the Unixbench comparison data test on my machine with 'Intel
Xeon Gold 5120' CPU and nvme SSD backend.
Test cmd:
./Run -c 56 -i 3 fstime fsbuffer fsdisk
Before this patch set:
System Benchmarks Partial Index BASELINE RESULT INDEX
File Copy 1024 bufsize 2000 maxblocks 3960.0 422965.0 1068.1
File Copy 256 bufsize 500 maxblocks 1655.0 105077.0 634.9
File Copy 4096 bufsize 8000 maxblocks 5800.0 1429092.0 2464.0
========
System Benchmarks Index Score (Partial Only) 1186.6
After this patch set:
System Benchmarks Partial Index BASELINE RESULT INDEX
File Copy 1024 bufsize 2000 maxblocks 3960.0 732716.0 1850.3
File Copy 256 bufsize 500 maxblocks 1655.0 184940.0 1117.5
File Copy 4096 bufsize 8000 maxblocks 5800.0 2427152.0 4184.7
========
System Benchmarks Index Score (Partial Only) 2053.0
Zhang Yi (4):
ext4: check and update i_disksize properly
ext4: correct the error path of ext4_write_inline_data_end()
ext4: factor out write end code of inline file
ext4: drop unnecessary journal handle in delalloc write
fs/ext4/ext4.h | 3 -
fs/ext4/inline.c | 120 ++++++++++++++++++-------------------
fs/ext4/inode.c | 150 ++++++++++++-----------------------------------
3 files changed, 99 insertions(+), 174 deletions(-)
--
2.31.1
The patch titled
Subject: mm/page_alloc: fix pcp->count race between drain_pages_zone() vs __rmqueue_pcplist()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-page_alloc-fix-pcp-count-race-between-drain_pages_zone-vs-__rmqueue_pcplist.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Li Zhijian <lizhijian(a)fujitsu.com>
Subject: mm/page_alloc: fix pcp->count race between drain_pages_zone() vs __rmqueue_pcplist()
Date: Tue, 23 Jul 2024 14:44:28 +0800
It's expected that no page should be left in pcp_list after calling
zone_pcp_disable() in offline_pages(). Previously, it's observed that
offline_pages() gets stuck [1] due to some pages remaining in pcp_list.
Cause:
There is a race condition between drain_pages_zone() and __rmqueue_pcplist()
involving the pcp->count variable. See below scenario:
CPU0 CPU1
---------------- ---------------
spin_lock(&pcp->lock);
__rmqueue_pcplist() {
zone_pcp_disable() {
/* list is empty */
if (list_empty(list)) {
/* add pages to pcp_list */
alloced = rmqueue_bulk()
mutex_lock(&pcp_batch_high_lock)
...
__drain_all_pages() {
drain_pages_zone() {
/* read pcp->count, it's 0 here */
count = READ_ONCE(pcp->count)
/* 0 means nothing to drain */
/* update pcp->count */
pcp->count += alloced << order;
...
...
spin_unlock(&pcp->lock);
In this case, after calling zone_pcp_disable() though, there are still some
pages in pcp_list. And these pages in pcp_list are neither movable nor
isolated, offline_pages() gets stuck as a result.
Solution:
Expand the scope of the pcp->lock to also protect pcp->count in
drain_pages_zone(), to ensure no pages are left in the pcp list after
zone_pcp_disable()
[1] https://lore.kernel.org/linux-mm/6a07125f-e720-404c-b2f9-e55f3f166e85@fujit…
Link: https://lkml.kernel.org/r/20240723064428.1179519-1-lizhijian@fujitsu.com
Fixes: 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock")
Signed-off-by: Li Zhijian <lizhijian(a)fujitsu.com>
Reported-by: Yao Xingtao <yaoxt.fnst(a)fujitsu.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
--- a/mm/page_alloc.c~mm-page_alloc-fix-pcp-count-race-between-drain_pages_zone-vs-__rmqueue_pcplist
+++ a/mm/page_alloc.c
@@ -2343,16 +2343,20 @@ void drain_zone_pages(struct zone *zone,
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
{
struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
- int count = READ_ONCE(pcp->count);
-
- while (count) {
- int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
- count -= to_drain;
+ int count;
+ do {
spin_lock(&pcp->lock);
- free_pcppages_bulk(zone, to_drain, pcp, 0);
+ count = pcp->count;
+ if (count) {
+ int to_drain = min(count,
+ pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
+
+ free_pcppages_bulk(zone, to_drain, pcp, 0);
+ count -= to_drain;
+ }
spin_unlock(&pcp->lock);
- }
+ } while (count);
}
/*
_
Patches currently in -mm which might be from lizhijian(a)fujitsu.com are
mm-page_alloc-fix-pcp-count-race-between-drain_pages_zone-vs-__rmqueue_pcplist.patch
The patch titled
Subject: scripts/gdb: fix lx-mounts command error
has been added to the -mm mm-nonmm-unstable branch. Its filename is
scripts-gdb-fix-lx-mounts-command-error.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Subject: scripts/gdb: fix lx-mounts command error
Date: Tue, 23 Jul 2024 14:48:59 +0800
(gdb) lx-mounts
mount super_block devname pathname fstype options
Python Exception <class 'gdb.error'>: There is no member named list.
Error occurred in Python: There is no member named list.
We encounter the above issue after commit 2eea9ce4310d ("mounts: keep
list of mounts in an rbtree"). The commit move a mount from list into
rbtree.
So we can instead use rbtree to iterate all mounts information.
Link: https://lkml.kernel.org/r/20240723064902.124154-4-kuan-ying.lee@canonical.c…
Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree")
Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/proc.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/scripts/gdb/linux/proc.py~scripts-gdb-fix-lx-mounts-command-error
+++ a/scripts/gdb/linux/proc.py
@@ -18,6 +18,7 @@ from linux import utils
from linux import tasks
from linux import lists
from linux import vfs
+from linux import rbtree
from struct import *
@@ -172,8 +173,7 @@ values of that process namespace"""
gdb.write("{:^18} {:^15} {:>9} {} {} options\n".format(
"mount", "super_block", "devname", "pathname", "fstype"))
- for mnt in lists.list_for_each_entry(namespace['list'],
- mount_ptr_type, "mnt_list"):
+ for mnt in rbtree.rb_inorder_for_each_entry(namespace['mounts'], mount_ptr_type, "mnt_node"):
devname = mnt['mnt_devname'].string()
devname = devname if devname else "none"
_
Patches currently in -mm which might be from kuan-ying.lee(a)canonical.com are
scripts-gdb-fix-timerlist-parsing-issue.patch
scripts-gdb-add-iteration-function-for-rbtree.patch
scripts-gdb-fix-lx-mounts-command-error.patch
scripts-gdb-add-lx-stack_depot_lookup-command.patch
scripts-gdb-add-lx-kasan_mem_to_shadow-command.patch
The patch titled
Subject: scripts/gdb: add iteration function for rbtree
has been added to the -mm mm-nonmm-unstable branch. Its filename is
scripts-gdb-add-iteration-function-for-rbtree.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Subject: scripts/gdb: add iteration function for rbtree
Date: Tue, 23 Jul 2024 14:48:58 +0800
Add inorder iteration function for rbtree usage.
This is a preparation patch for the next patch to fix the gdb mounts
issue.
Link: https://lkml.kernel.org/r/20240723064902.124154-3-kuan-ying.lee@canonical.c…
Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree")
Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/rbtree.py | 12 ++++++++++++
1 file changed, 12 insertions(+)
--- a/scripts/gdb/linux/rbtree.py~scripts-gdb-add-iteration-function-for-rbtree
+++ a/scripts/gdb/linux/rbtree.py
@@ -9,6 +9,18 @@ from linux import utils
rb_root_type = utils.CachedType("struct rb_root")
rb_node_type = utils.CachedType("struct rb_node")
+def rb_inorder_for_each(root):
+ def inorder(node):
+ if node:
+ yield from inorder(node['rb_left'])
+ yield node
+ yield from inorder(node['rb_right'])
+
+ yield from inorder(root['rb_node'])
+
+def rb_inorder_for_each_entry(root, gdbtype, member):
+ for node in rb_inorder_for_each(root):
+ yield utils.container_of(node, gdbtype, member)
def rb_first(root):
if root.type == rb_root_type.get_type():
_
Patches currently in -mm which might be from kuan-ying.lee(a)canonical.com are
scripts-gdb-fix-timerlist-parsing-issue.patch
scripts-gdb-add-iteration-function-for-rbtree.patch
scripts-gdb-fix-lx-mounts-command-error.patch
scripts-gdb-add-lx-stack_depot_lookup-command.patch
scripts-gdb-add-lx-kasan_mem_to_shadow-command.patch
The patch titled
Subject: scripts/gdb: fix timerlist parsing issue
has been added to the -mm mm-nonmm-unstable branch. Its filename is
scripts-gdb-fix-timerlist-parsing-issue.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Subject: scripts/gdb: fix timerlist parsing issue
Date: Tue, 23 Jul 2024 14:48:57 +0800
Patch series "Fix some GDB command error and add some GDB commands", v3.
Fix some GDB command errors and add some useful GDB commands.
This patch (of 5):
Commit 7988e5ae2be7 ("tick: Split nohz and highres features from
nohz_mode") and commit 7988e5ae2be7 ("tick: Split nohz and highres
features from nohz_mode") move 'tick_stopped' and 'nohz_mode' to flags
field which will break the gdb lx-mounts command:
(gdb) lx-timerlist
Python Exception <class 'gdb.error'>: There is no member named nohz_mode.
Error occurred in Python: There is no member named nohz_mode.
(gdb) lx-timerlist
Python Exception <class 'gdb.error'>: There is no member named tick_stopped.
Error occurred in Python: There is no member named tick_stopped.
We move 'tick_stopped' and 'nohz_mode' to flags field instead.
Link: https://lkml.kernel.org/r/20240723064902.124154-1-kuan-ying.lee@canonical.c…
Link: https://lkml.kernel.org/r/20240723064902.124154-2-kuan-ying.lee@canonical.c…
Fixes: a478ffb2ae23 ("tick: Move individual bit features to debuggable mask accesses")
Fixes: 7988e5ae2be7 ("tick: Split nohz and highres features from nohz_mode")
Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/timerlist.py | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
--- a/scripts/gdb/linux/timerlist.py~scripts-gdb-fix-timerlist-parsing-issue
+++ a/scripts/gdb/linux/timerlist.py
@@ -87,21 +87,22 @@ def print_cpu(hrtimer_bases, cpu, max_cl
text += "\n"
if constants.LX_CONFIG_TICK_ONESHOT:
- fmts = [(" .{} : {}", 'nohz_mode'),
- (" .{} : {} nsecs", 'last_tick'),
- (" .{} : {}", 'tick_stopped'),
- (" .{} : {}", 'idle_jiffies'),
- (" .{} : {}", 'idle_calls'),
- (" .{} : {}", 'idle_sleeps'),
- (" .{} : {} nsecs", 'idle_entrytime'),
- (" .{} : {} nsecs", 'idle_waketime'),
- (" .{} : {} nsecs", 'idle_exittime'),
- (" .{} : {} nsecs", 'idle_sleeptime'),
- (" .{}: {} nsecs", 'iowait_sleeptime'),
- (" .{} : {}", 'last_jiffies'),
- (" .{} : {}", 'next_timer'),
- (" .{} : {} nsecs", 'idle_expires')]
- text += "\n".join([s.format(f, ts[f]) for s, f in fmts])
+ TS_FLAG_STOPPED = 1 << 1
+ TS_FLAG_NOHZ = 1 << 4
+ text += f" .{'nohz':15s}: {int(bool(ts['flags'] & TS_FLAG_NOHZ))}\n"
+ text += f" .{'last_tick':15s}: {ts['last_tick']}\n"
+ text += f" .{'tick_stopped':15s}: {int(bool(ts['flags'] & TS_FLAG_STOPPED))}\n"
+ text += f" .{'idle_jiffies':15s}: {ts['idle_jiffies']}\n"
+ text += f" .{'idle_calls':15s}: {ts['idle_calls']}\n"
+ text += f" .{'idle_sleeps':15s}: {ts['idle_sleeps']}\n"
+ text += f" .{'idle_entrytime':15s}: {ts['idle_entrytime']} nsecs\n"
+ text += f" .{'idle_waketime':15s}: {ts['idle_waketime']} nsecs\n"
+ text += f" .{'idle_exittime':15s}: {ts['idle_exittime']} nsecs\n"
+ text += f" .{'idle_sleeptime':15s}: {ts['idle_sleeptime']} nsecs\n"
+ text += f" .{'iowait_sleeptime':15s}: {ts['iowait_sleeptime']} nsecs\n"
+ text += f" .{'last_jiffies':15s}: {ts['last_jiffies']}\n"
+ text += f" .{'next_timer':15s}: {ts['next_timer']}\n"
+ text += f" .{'idle_expires':15s}: {ts['idle_expires']} nsecs\n"
text += "\njiffies: {}\n".format(jiffies)
text += "\n"
_
Patches currently in -mm which might be from kuan-ying.lee(a)canonical.com are
scripts-gdb-fix-timerlist-parsing-issue.patch
scripts-gdb-add-iteration-function-for-rbtree.patch
scripts-gdb-fix-lx-mounts-command-error.patch
scripts-gdb-add-lx-stack_depot_lookup-command.patch
scripts-gdb-add-lx-kasan_mem_to_shadow-command.patch