The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x a833a693a490ecff8ba377654c6d4d333718b6b1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051910-sharpness-improvise-0b77@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001
From: Wupeng Ma <mawupeng1(a)huawei.com>
Date: Thu, 10 Apr 2025 14:26:33 +0800
Subject: [PATCH] mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1(a)huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x a833a693a490ecff8ba377654c6d4d333718b6b1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051909-bluff-anchovy-3f6f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001
From: Wupeng Ma <mawupeng1(a)huawei.com>
Date: Thu, 10 Apr 2025 14:26:33 +0800
Subject: [PATCH] mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1(a)huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a833a693a490ecff8ba377654c6d4d333718b6b1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051908-repulsive-crazed-331a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001
From: Wupeng Ma <mawupeng1(a)huawei.com>
Date: Thu, 10 Apr 2025 14:26:33 +0800
Subject: [PATCH] mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1(a)huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a833a693a490ecff8ba377654c6d4d333718b6b1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051907-laxative-caviar-b847@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001
From: Wupeng Ma <mawupeng1(a)huawei.com>
Date: Thu, 10 Apr 2025 14:26:33 +0800
Subject: [PATCH] mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1(a)huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x a833a693a490ecff8ba377654c6d4d333718b6b1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051905-getup-hatchery-57be@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001
From: Wupeng Ma <mawupeng1(a)huawei.com>
Date: Thu, 10 Apr 2025 14:26:33 +0800
Subject: [PATCH] mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1(a)huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x a833a693a490ecff8ba377654c6d4d333718b6b1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051959-steadily-imposing-a3e6@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001
From: Wupeng Ma <mawupeng1(a)huawei.com>
Date: Thu, 10 Apr 2025 14:26:33 +0800
Subject: [PATCH] mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1(a)huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6ea1be71aa42..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 78ab4be549533432d97ea8989d2f00b508fa68d8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051939-overbuilt-leverage-7eec@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 78ab4be549533432d97ea8989d2f00b508fa68d8 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Tue, 6 May 2025 14:55:39 +0300
Subject: [PATCH] wifi: mt76: disable napi on driver removal
A warning on driver removal started occurring after commit 9dd05df8403b
("net: warn if NAPI instance wasn't shut down"). Disable tx napi before
deleting it in mt76_dma_cleanup().
WARNING: CPU: 4 PID: 18828 at net/core/dev.c:7288 __netif_napi_del_locked+0xf0/0x100
CPU: 4 UID: 0 PID: 18828 Comm: modprobe Not tainted 6.15.0-rc4 #4 PREEMPT(lazy)
Hardware name: ASUS System Product Name/PRIME X670E-PRO WIFI, BIOS 3035 09/05/2024
RIP: 0010:__netif_napi_del_locked+0xf0/0x100
Call Trace:
<TASK>
mt76_dma_cleanup+0x54/0x2f0 [mt76]
mt7921_pci_remove+0xd5/0x190 [mt7921e]
pci_device_remove+0x47/0xc0
device_release_driver_internal+0x19e/0x200
driver_detach+0x48/0x90
bus_remove_driver+0x6d/0xf0
pci_unregister_driver+0x2e/0xb0
__do_sys_delete_module.isra.0+0x197/0x2e0
do_syscall_64+0x7b/0x160
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Tested with mt7921e but the same pattern can be actually applied to other
mt76 drivers calling mt76_dma_cleanup() during removal. Tx napi is enabled
in their *_dma_init() functions and only toggled off and on again inside
their suspend/resume/reset paths. So it should be okay to disable tx
napi in such a generic way.
Found by Linux Verification Center (linuxtesting.org).
Fixes: 2ac515a5d74f ("mt76: mt76x02: use napi polling for tx cleanup")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Tested-by: Ming Yen Hsieh <mingyen.hsieh(a)mediatek.com>
Link: https://patch.msgid.link/20250506115540.19045-1-pchelkin@ispras.ru
Signed-off-by: Felix Fietkau <nbd(a)nbd.name>
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 844af16ee551..35b4ec91979e 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -1011,6 +1011,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev)
int i;
mt76_worker_disable(&dev->tx_worker);
+ napi_disable(&dev->tx_napi);
netif_napi_del(&dev->tx_napi);
for (i = 0; i < ARRAY_SIZE(dev->phys); i++) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e8007fad5457ea547ca63bb011fdb03213571c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051959-relieving-trifocals-0eed@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001
From: Steve Siwinski <ssiwinski(a)atto.com>
Date: Thu, 8 May 2025 16:01:22 -0400
Subject: [PATCH] scsi: sd_zbc: block: Respect bio vector limits for REPORT
ZONES buffer
The REPORT ZONES buffer size is currently limited by the HBA's maximum
segment count to ensure the buffer can be mapped. However, the block
layer further limits the number of iovec entries to 1024 when allocating
a bio.
To avoid allocation of buffers too large to be mapped, further restrict
the maximum buffer size to BIO_MAX_INLINE_VECS.
Replace the UIO_MAXIOV symbolic name with the more contextually
appropriate BIO_MAX_INLINE_VECS.
Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Siwinski <ssiwinski(a)atto.com>
Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/block/bio.c b/block/bio.c
index 4e6c85a33d74..4be592d37fb6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -611,7 +611,7 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
{
struct bio *bio;
- if (nr_vecs > UIO_MAXIOV)
+ if (nr_vecs > BIO_MAX_INLINE_VECS)
return NULL;
return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
}
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7a447ff600d2..a8db66428f80 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
unsigned int nr_zones, size_t *buflen)
{
struct request_queue *q = sdkp->disk->queue;
+ unsigned int max_segments;
size_t bufsize;
void *buf;
@@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
* Furthermore, since the report zone command cannot be split, make
* sure that the allocated buffer can always be mapped by limiting the
* number of pages allocated to the HBA max segments limit.
+ * Since max segments can be larger than the max inline bio vectors,
+ * further limit the allocated buffer to BIO_MAX_INLINE_VECS.
*/
nr_zones = min(nr_zones, sdkp->zone_info.nr_zones);
bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
- bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
+ max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q));
+ bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT);
while (bufsize >= SECTOR_SIZE) {
buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index cafc7c215de8..b786ec5bcc81 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -11,6 +11,7 @@
#include <linux/uio.h>
#define BIO_MAX_VECS 256U
+#define BIO_MAX_INLINE_VECS UIO_MAXIOV
struct queue_limits;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e8007fad5457ea547ca63bb011fdb03213571c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051959-radiantly-ominous-f8f3@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001
From: Steve Siwinski <ssiwinski(a)atto.com>
Date: Thu, 8 May 2025 16:01:22 -0400
Subject: [PATCH] scsi: sd_zbc: block: Respect bio vector limits for REPORT
ZONES buffer
The REPORT ZONES buffer size is currently limited by the HBA's maximum
segment count to ensure the buffer can be mapped. However, the block
layer further limits the number of iovec entries to 1024 when allocating
a bio.
To avoid allocation of buffers too large to be mapped, further restrict
the maximum buffer size to BIO_MAX_INLINE_VECS.
Replace the UIO_MAXIOV symbolic name with the more contextually
appropriate BIO_MAX_INLINE_VECS.
Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Siwinski <ssiwinski(a)atto.com>
Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/block/bio.c b/block/bio.c
index 4e6c85a33d74..4be592d37fb6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -611,7 +611,7 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
{
struct bio *bio;
- if (nr_vecs > UIO_MAXIOV)
+ if (nr_vecs > BIO_MAX_INLINE_VECS)
return NULL;
return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
}
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7a447ff600d2..a8db66428f80 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
unsigned int nr_zones, size_t *buflen)
{
struct request_queue *q = sdkp->disk->queue;
+ unsigned int max_segments;
size_t bufsize;
void *buf;
@@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
* Furthermore, since the report zone command cannot be split, make
* sure that the allocated buffer can always be mapped by limiting the
* number of pages allocated to the HBA max segments limit.
+ * Since max segments can be larger than the max inline bio vectors,
+ * further limit the allocated buffer to BIO_MAX_INLINE_VECS.
*/
nr_zones = min(nr_zones, sdkp->zone_info.nr_zones);
bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
- bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
+ max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q));
+ bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT);
while (bufsize >= SECTOR_SIZE) {
buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index cafc7c215de8..b786ec5bcc81 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -11,6 +11,7 @@
#include <linux/uio.h>
#define BIO_MAX_VECS 256U
+#define BIO_MAX_INLINE_VECS UIO_MAXIOV
struct queue_limits;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e8007fad5457ea547ca63bb011fdb03213571c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051958-canal-tidbit-2b4d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001
From: Steve Siwinski <ssiwinski(a)atto.com>
Date: Thu, 8 May 2025 16:01:22 -0400
Subject: [PATCH] scsi: sd_zbc: block: Respect bio vector limits for REPORT
ZONES buffer
The REPORT ZONES buffer size is currently limited by the HBA's maximum
segment count to ensure the buffer can be mapped. However, the block
layer further limits the number of iovec entries to 1024 when allocating
a bio.
To avoid allocation of buffers too large to be mapped, further restrict
the maximum buffer size to BIO_MAX_INLINE_VECS.
Replace the UIO_MAXIOV symbolic name with the more contextually
appropriate BIO_MAX_INLINE_VECS.
Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve Siwinski <ssiwinski(a)atto.com>
Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/block/bio.c b/block/bio.c
index 4e6c85a33d74..4be592d37fb6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -611,7 +611,7 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
{
struct bio *bio;
- if (nr_vecs > UIO_MAXIOV)
+ if (nr_vecs > BIO_MAX_INLINE_VECS)
return NULL;
return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
}
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7a447ff600d2..a8db66428f80 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
unsigned int nr_zones, size_t *buflen)
{
struct request_queue *q = sdkp->disk->queue;
+ unsigned int max_segments;
size_t bufsize;
void *buf;
@@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
* Furthermore, since the report zone command cannot be split, make
* sure that the allocated buffer can always be mapped by limiting the
* number of pages allocated to the HBA max segments limit.
+ * Since max segments can be larger than the max inline bio vectors,
+ * further limit the allocated buffer to BIO_MAX_INLINE_VECS.
*/
nr_zones = min(nr_zones, sdkp->zone_info.nr_zones);
bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
- bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
+ max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q));
+ bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT);
while (bufsize >= SECTOR_SIZE) {
buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index cafc7c215de8..b786ec5bcc81 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -11,6 +11,7 @@
#include <linux/uio.h>
#define BIO_MAX_VECS 256U
+#define BIO_MAX_INLINE_VECS UIO_MAXIOV
struct queue_limits;