erofs readahead could fail with ENOMEM under the memory pressure because
it tries to alloc_page with GFP_NOWAIT | GFP_NORETRY, while GFP_KERNEL
for a regular read. And if readahead fails (with non-uptodate folios),
the original request will then fall back to synchronous read, and
`.read_folio()` should return appropriate errnos.
However, in scenarios where readahead and read operations compete,
read operation could return an unintended EIO because of an incorrect
error propagation.
To resolve this, this patch modifies the behavior so that, when the
PCL is for read(which means pcl.besteffort is true), it attempts actual
decompression instead of propagating the privios error except initial EIO.
- Page size: 4K
- The original size of FileA: 16K
- Compress-ratio per PCL: 50% (Uncompressed 8K -> Compressed 4K)
[page0, page1] [page2, page3]
[PCL0]---------[PCL1]
- functions declaration:
. pread(fd, buf, count, offset)
. readahead(fd, offset, count)
- Thread A tries to read the last 4K
- Thread B tries to do readahead 8K from 4K
- RA, besteffort == false
- R, besteffort == true
<process A> <process B>
pread(FileA, buf, 4K, 12K)
do readahead(page3) // failed with ENOMEM
wait_lock(page3)
if (!uptodate(page3))
goto do_read
readahead(FileA, 4K, 8K)
// Here create PCL-chain like below:
// [null, page1] [page2, null]
// [PCL0:RA]-----[PCL1:RA]
...
do read(page3) // found [PCL1:RA] and add page3 into it,
// and then, change PCL1 from RA to R
...
// Now, PCL-chain is as below:
// [null, page1] [page2, page3]
// [PCL0:RA]-----[PCL1:R]
// try to decompress PCL-chain...
z_erofs_decompress_queue
err = 0;
// failed with ENOMEM, so page 1
// only for RA will not be uptodated.
// it's okay.
err = decompress([PCL0:RA], err)
// However, ENOMEM propagated to next
// PCL, even though PCL is not only
// for RA but also for R. As a result,
// it just failed with ENOMEM without
// trying any decompression, so page2
// and page3 will not be uptodated.
** BUG HERE ** --> err = decompress([PCL1:R], err)
return err as ENOMEM
...
wait_lock(page3)
if (!uptodate(page3))
return EIO <-- Return an unexpected EIO!
...
Fixes: 2349d2fa02db ("erofs: sunset unneeded NOFAILs")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jaewook Kim <jw5454.kim(a)samsung.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Junbeom Yeom <junbeom.yeom(a)samsung.com>
---
v2:
- If disk I/Os are successful, handle to decompress each pcluster.
fs/erofs/zdata.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 27b1f44d10ce..70e1597dec8a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1262,7 +1262,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
return err;
}
-static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
+static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, bool eio)
{
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
@@ -1270,7 +1270,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
const struct z_erofs_decompressor *alg =
z_erofs_decomp[pcl->algorithmformat];
bool try_free = true;
- int i, j, jtop, err2;
+ int i, j, jtop, err2, err = eio ? -EIO : 0;
struct page *page;
bool overlapped;
const char *reason;
@@ -1413,12 +1413,12 @@ static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
.pcl = io->head,
};
struct z_erofs_pcluster *next;
- int err = io->eio ? -EIO : 0;
+ int err = 0;
for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) {
DBG_BUGON(!be.pcl);
next = READ_ONCE(be.pcl->next);
- err = z_erofs_decompress_pcluster(&be, err) ?: err;
+ err = z_erofs_decompress_pcluster(&be, io->eio) ?: err;
}
return err;
}
--
2.34.1
Calling napi_disable() on an already disabled napi can cause the
deadlock. In commit 4bc12818b363 ("virtio-net: disable delayed refill
when pausing rx"), to avoid the deadlock, when pausing the RX in
virtnet_rx_pause[_all](), we disable and cancel the delayed refill work.
However, in the virtnet_rx_resume_all(), we enable the delayed refill
work too early before enabling all the receive queue napis.
The deadlock can be reproduced by running
selftests/drivers/net/hw/xsk_reconfig.py with multiqueue virtio-net
device and inserting a cond_resched() inside the for loop in
virtnet_rx_resume_all() to increase the success rate. Because the worker
processing the delayed refilled work runs on the same CPU as
virtnet_rx_resume_all(), a reschedule is needed to cause the deadlock.
In real scenario, the contention on netdev_lock can cause the
reschedule.
This fixes the deadlock by ensuring all receive queue's napis are
enabled before we enable the delayed refill work in
virtnet_rx_resume_all() and virtnet_open().
Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx")
Reported-by: Paolo Abeni <pabeni(a)redhat.com>
Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/drv-hw-dbg/results/400961/3-…
Cc: stable(a)vger.kernel.org
Signed-off-by: Bui Quang Minh <minhquangbui99(a)gmail.com>
---
Changes in v2:
- Move try_fill_recv() before rx napi_enable()
- Link to v1: https://lore.kernel.org/netdev/20251208153419.18196-1-minhquangbui99@gmail.…
---
drivers/net/virtio_net.c | 71 +++++++++++++++++++++++++---------------
1 file changed, 45 insertions(+), 26 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8e04adb57f52..4e08880a9467 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3214,21 +3214,31 @@ static void virtnet_update_settings(struct virtnet_info *vi)
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+ bool schedule_refill = false;
int i, err;
- enable_delayed_refill(vi);
-
+ /* - We must call try_fill_recv before enabling napi of the same receive
+ * queue so that it doesn't race with the call in virtnet_receive.
+ * - We must enable and schedule delayed refill work only when we have
+ * enabled all the receive queue's napi. Otherwise, in refill_work, we
+ * have a deadlock when calling napi_disable on an already disabled
+ * napi.
+ */
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
+ schedule_refill = true;
err = virtnet_enable_queue_pair(vi, i);
if (err < 0)
goto err_enable_qp;
}
+ enable_delayed_refill(vi);
+ if (schedule_refill)
+ schedule_delayed_work(&vi->refill, 0);
+
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
if (vi->status & VIRTIO_NET_S_LINK_UP)
netif_carrier_on(vi->dev);
@@ -3463,39 +3473,48 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
__virtnet_rx_pause(vi, rq);
}
-static void __virtnet_rx_resume(struct virtnet_info *vi,
- struct receive_queue *rq,
- bool refill)
+static void virtnet_rx_resume_all(struct virtnet_info *vi)
{
- bool running = netif_running(vi->dev);
bool schedule_refill = false;
+ int i;
- if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
- schedule_refill = true;
- if (running)
- virtnet_napi_enable(rq);
-
- if (schedule_refill)
- schedule_delayed_work(&vi->refill, 0);
-}
+ if (netif_running(vi->dev)) {
+ /* See the comment in virtnet_open for the ordering rule
+ * of try_fill_recv, receive queue napi_enable and delayed
+ * refill enable/schedule.
+ */
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (i < vi->curr_queue_pairs)
+ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
+ schedule_refill = true;
-static void virtnet_rx_resume_all(struct virtnet_info *vi)
-{
- int i;
+ virtnet_napi_enable(&vi->rq[i]);
+ }
- enable_delayed_refill(vi);
- for (i = 0; i < vi->max_queue_pairs; i++) {
- if (i < vi->curr_queue_pairs)
- __virtnet_rx_resume(vi, &vi->rq[i], true);
- else
- __virtnet_rx_resume(vi, &vi->rq[i], false);
+ enable_delayed_refill(vi);
+ if (schedule_refill)
+ schedule_delayed_work(&vi->refill, 0);
}
}
static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
{
- enable_delayed_refill(vi);
- __virtnet_rx_resume(vi, rq, true);
+ bool schedule_refill = false;
+
+ if (netif_running(vi->dev)) {
+ /* See the comment in virtnet_open for the ordering rule
+ * of try_fill_recv, receive queue napi_enable and delayed
+ * refill enable/schedule.
+ */
+ if (!try_fill_recv(vi, rq, GFP_KERNEL))
+ schedule_refill = true;
+
+ virtnet_napi_enable(rq);
+
+ enable_delayed_refill(vi);
+ if (schedule_refill)
+ schedule_delayed_work(&vi->refill, 0);
+ }
}
static int virtnet_rx_resize(struct virtnet_info *vi,
--
2.43.0
Since commit c6e126de43e7 ("of: Keep track of populated platform
devices") child devices will not be created by of_platform_populate()
if the devices had previously been deregistered individually so that the
OF_POPULATED flag is still set in the corresponding OF nodes.
Switch to using of_platform_depopulate() instead of open coding so that
the child devices are created if the driver is rebound.
Fixes: c6e126de43e7 ("of: Keep track of populated platform devices")
Cc: stable(a)vger.kernel.org # 3.16
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/mfd/qcom-pm8xxx.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/drivers/mfd/qcom-pm8xxx.c b/drivers/mfd/qcom-pm8xxx.c
index 1149f7102a36..0cf374c015ce 100644
--- a/drivers/mfd/qcom-pm8xxx.c
+++ b/drivers/mfd/qcom-pm8xxx.c
@@ -577,17 +577,11 @@ static int pm8xxx_probe(struct platform_device *pdev)
return rc;
}
-static int pm8xxx_remove_child(struct device *dev, void *unused)
-{
- platform_device_unregister(to_platform_device(dev));
- return 0;
-}
-
static void pm8xxx_remove(struct platform_device *pdev)
{
struct pm_irq_chip *chip = platform_get_drvdata(pdev);
- device_for_each_child(&pdev->dev, NULL, pm8xxx_remove_child);
+ of_platform_depopulate(&pdev->dev);
irq_domain_remove(chip->irqdomain);
}
--
2.51.2
The patch titled
Subject: AMDGPU: fix failure with periodic signal
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fix-amdgpu-failure-with-periodic-signal.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Mikulas Patocka <mpatocka(a)redhat.com>
Subject: AMDGPU: fix failure with periodic signal
Date: Fri, 7 Nov 2025 18:48:01 +0100 (CET)
If a process sets up a timer that periodically sends a signal in short
intervals and if it uses OpenCL on AMDGPU at the same time, we get random
errors. Sometimes, probing the OpenCL device fails (strace shows that
open("/dev/kfd") failed with -EINTR). Sometimes we get the message
"amdgpu: init_user_pages: Failed to register MMU notifier: -4" in the
syslog.
The bug can be reproduced with this program:
http://www.jikos.cz/~mikulas/testcases/opencl/opencl-bug-small.c
The root cause for these failures is in the function mm_take_all_locks.
This function fails with -EINTR if there is pending signal. The -EINTR is
propagated up the call stack to userspace and userspace fails if it gets
this error.
There is the following call chain: kfd_open -> kfd_create_process ->
create_process -> mmu_notifier_get -> mmu_notifier_get_locked ->
__mmu_notifier_register -> mm_take_all_locks -> "return -EINTR"
If the failure happens in init_user_pages, there is the following call
chain: init_user_pages -> amdgpu_hmm_register ->
mmu_interval_notifier_insert -> mmu_notifier_register ->
__mmu_notifier_register -> mm_take_all_locks -> "return -EINTR"
In order to fix these failures, this commit changes
signal_pending(current) to fatal_signal_pending(current) in
mm_take_all_locks, so that it is interrupted only if the signal is
actually killing the process.
Also, this commit skips pr_err in init_user_pages if the process is being
killed - in this situation, there was no error and so we don't want to
report it in the syslog.
I'm submitting this patch for the stable kernels, because this bug may
cause random failures in any OpenCL code.
Link: https://lkml.kernel.org/r/6f16b618-26fc-3031-abe8-65c2090262e7@redhat.com
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: Alexander Deucher <alexander.deucher(a)amd.com>
Cc: Christan K��nig <christian.koenig(a)amd.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Jann Horn <jannh(a)google.com>
Cc: Pedro Falcato <pfalcato(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 +++++++--
mm/vma.c | 8 ++++----
2 files changed, 11 insertions(+), 6 deletions(-)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c~fix-amdgpu-failure-with-periodic-signal
+++ a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1070,8 +1070,13 @@ static int init_user_pages(struct kgd_me
ret = amdgpu_hmm_register(bo, user_addr);
if (ret) {
- pr_err("%s: Failed to register MMU notifier: %d\n",
- __func__, ret);
+ /*
+ * If we got EINTR because the process was killed, don't report
+ * it, because no error happened.
+ */
+ if (!(fatal_signal_pending(current) && ret == -EINTR))
+ pr_err("%s: Failed to register MMU notifier: %d\n",
+ __func__, ret);
goto out;
}
--- a/mm/vma.c~fix-amdgpu-failure-with-periodic-signal
+++ a/mm/vma.c
@@ -2166,14 +2166,14 @@ int mm_take_all_locks(struct mm_struct *
* is reached.
*/
for_each_vma(vmi, vma) {
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
goto out_unlock;
vma_start_write(vma);
}
vma_iter_init(&vmi, mm, 0);
for_each_vma(vmi, vma) {
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
is_vm_hugetlb_page(vma))
@@ -2182,7 +2182,7 @@ int mm_take_all_locks(struct mm_struct *
vma_iter_init(&vmi, mm, 0);
for_each_vma(vmi, vma) {
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
!is_vm_hugetlb_page(vma))
@@ -2191,7 +2191,7 @@ int mm_take_all_locks(struct mm_struct *
vma_iter_init(&vmi, mm, 0);
for_each_vma(vmi, vma) {
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
goto out_unlock;
if (vma->anon_vma)
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
_
Patches currently in -mm which might be from mpatocka(a)redhat.com are
fix-amdgpu-failure-with-periodic-signal.patch
The patch titled
Subject: mm/page_owner: fix memory leak in page_owner_stack_fops->release()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-page_owner-fix-memory-leak-in-page_owner_stack_fops-release.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Ran Xiaokai <ran.xiaokai(a)zte.com.cn>
Subject: mm/page_owner: fix memory leak in page_owner_stack_fops->release()
Date: Fri, 19 Dec 2025 07:42:32 +0000
The page_owner_stack_fops->open() callback invokes seq_open_private(),
therefore its corresponding ->release() callback must call
seq_release_private(). Otherwise it will cause a memory leak of struct
stack_print_ctx.
Link: https://lkml.kernel.org/r/20251219074232.136482-1-ranxiaokai627@163.com
Fixes: 765973a098037 ("mm,page_owner: display all stacks and their count")
Signed-off-by: Ran Xiaokai <ran.xiaokai(a)zte.com.cn>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Brendan Jackman <jackmanb(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Marco Elver <elver(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_owner.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/page_owner.c~mm-page_owner-fix-memory-leak-in-page_owner_stack_fops-release
+++ a/mm/page_owner.c
@@ -952,7 +952,7 @@ static const struct file_operations page
.open = page_owner_stack_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
};
static int page_owner_threshold_get(void *data, u64 *val)
_
Patches currently in -mm which might be from ran.xiaokai(a)zte.com.cn are
mm-page_owner-fix-memory-leak-in-page_owner_stack_fops-release.patch
The patch titled
Subject: mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via various
branches at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there most days
------------------------------------------------------
From: Jane Chu <jane.chu(a)oracle.com>
Subject: mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
Date: Fri, 19 Dec 2025 12:15:58 -0700
When a newly poisoned subpage ends up in an already poisoned hugetlb
folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats is
not. Fix the inconsistency by designating action_result() to update them
both.
While at it, define __get_huge_page_for_hwpoison() return values in terms
of symbol names for better readibility. Also rename
folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the
function does more than the conventional bit setting and the fact three
possible return values are expected.
Link: https://lkml.kernel.org/r/20251219191559.2962716-1-jane.chu@oracle.com
Fixes: 18f41fa616ee4 ("mm: memory-failure: bump memory failure stats to pglist_data")
Signed-off-by: Jane Chu <jane.chu(a)oracle.com>
Cc: "David Hildenbrand (Red Hat)" <david(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Jiaqi Yan <jiaqiyan(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: William Roche <william.roche(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 56 ++++++++++++++++++++++++------------------
1 file changed, 33 insertions(+), 23 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison
+++ a/mm/memory-failure.c
@@ -1883,12 +1883,18 @@ static unsigned long __folio_free_raw_hw
return count;
}
-static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
+#define MF_HUGETLB_ALREADY_POISONED 3 /* already poisoned */
+#define MF_HUGETLB_ACC_EXISTING_POISON 4 /* accessed existing poisoned page */
+/*
+ * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list
+ * to keep track of the poisoned pages.
+ */
+static int hugetlb_update_hwpoison(struct folio *folio, struct page *page)
{
struct llist_head *head;
struct raw_hwp_page *raw_hwp;
struct raw_hwp_page *p;
- int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
+ int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_ALREADY_POISONED : 0;
/*
* Once the hwpoison hugepage has lost reliable raw error info,
@@ -1896,20 +1902,18 @@ static int folio_set_hugetlb_hwpoison(st
* so skip to add additional raw error info.
*/
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
- return -EHWPOISON;
+ return MF_HUGETLB_ALREADY_POISONED;
+
head = raw_hwp_list_head(folio);
llist_for_each_entry(p, head->first, node) {
if (p->page == page)
- return -EHWPOISON;
+ return MF_HUGETLB_ACC_EXISTING_POISON;
}
raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
if (raw_hwp) {
raw_hwp->page = page;
llist_add(&raw_hwp->node, head);
- /* the first error event will be counted in action_result(). */
- if (ret)
- num_poisoned_pages_inc(page_to_pfn(page));
} else {
/*
* Failed to save raw error info. We no longer trace all
@@ -1955,32 +1959,30 @@ void folio_clear_hugetlb_hwpoison(struct
folio_free_raw_hwp(folio, true);
}
+#define MF_HUGETLB_FREED 0 /* freed hugepage */
+#define MF_HUGETLB_IN_USED 1 /* in-use hugepage */
+#define MF_NOT_HUGETLB 2 /* not a hugepage */
+
/*
* Called from hugetlb code with hugetlb_lock held.
- *
- * Return values:
- * 0 - free hugepage
- * 1 - in-use hugepage
- * 2 - not a hugepage
- * -EBUSY - the hugepage is busy (try to retry)
- * -EHWPOISON - the hugepage is already hwpoisoned
*/
int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
struct folio *folio = page_folio(page);
- int ret = 2; /* fallback to normal page handling */
+ int ret = MF_NOT_HUGETLB;
bool count_increased = false;
+ int rc;
if (!folio_test_hugetlb(folio))
goto out;
if (flags & MF_COUNT_INCREASED) {
- ret = 1;
+ ret = MF_HUGETLB_IN_USED;
count_increased = true;
} else if (folio_test_hugetlb_freed(folio)) {
- ret = 0;
+ ret = MF_HUGETLB_FREED;
} else if (folio_test_hugetlb_migratable(folio)) {
ret = folio_try_get(folio);
if (ret)
@@ -1991,8 +1993,9 @@ int __get_huge_page_for_hwpoison(unsigne
goto out;
}
- if (folio_set_hugetlb_hwpoison(folio, page)) {
- ret = -EHWPOISON;
+ rc = hugetlb_update_hwpoison(folio, page);
+ if (rc >= MF_HUGETLB_ALREADY_POISONED) {
+ ret = rc;
goto out;
}
@@ -2029,22 +2032,29 @@ static int try_memory_failure_hugetlb(un
*hugetlb = 1;
retry:
res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
- if (res == 2) { /* fallback to normal page handling */
+ switch (res) {
+ case MF_NOT_HUGETLB: /* fallback to normal page handling */
*hugetlb = 0;
return 0;
- } else if (res == -EHWPOISON) {
+ case MF_HUGETLB_ALREADY_POISONED:
+ case MF_HUGETLB_ACC_EXISTING_POISON:
if (flags & MF_ACTION_REQUIRED) {
folio = page_folio(p);
res = kill_accessing_process(current, folio_pfn(folio), flags);
}
- action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
+ if (res == MF_HUGETLB_ALREADY_POISONED)
+ action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
+ else
+ action_result(pfn, MF_MSG_HUGE, MF_FAILED);
return res;
- } else if (res == -EBUSY) {
+ case -EBUSY:
if (!(flags & MF_NO_RETRY)) {
flags |= MF_NO_RETRY;
goto retry;
}
return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
+ default:
+ break;
}
folio = page_folio(p);
_
Patches currently in -mm which might be from jane.chu(a)oracle.com are
mm-memory-failure-fix-missing-mf_stats-count-in-hugetlb-poison.patch