The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a63c357b9fd56ad5fe64616f5b22835252c6a76a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012226-unmanned-marshy-5819@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
a63c357b9fd5 ("iommu/dma: Trace bounce buffer usage when mapping buffers")
f316ba0a8814 ("dma-iommu: Check that swiotlb is active before trying to use it")
a17e3026bc4d ("iommu: Move flush queue data into iommu_dma_cookie")
f7f07484542f ("iommu/iova: Move flush queue code to iommu-dma")
ea4d71bb5e3f ("iommu/iova: Consolidate flush queue code")
87f60cc65d24 ("iommu/vt-d: Use put_pages_list")
649ad9835a37 ("iommu/iova: Squash flush_cb abstraction")
d5c383f2c98a ("iommu/iova: Squash entry_dtor abstraction")
d7061627d701 ("iommu/iova: Fix race between FQ timeout and teardown")
2e727bffbe93 ("iommu/dma: Check CONFIG_SWIOTLB more broadly")
9b49bbc2c4df ("iommu/dma: Fold _swiotlb helpers into callers")
ee9d4097cc14 ("iommu/dma: Skip extra sync during unmap w/swiotlb")
06e620345d54 ("iommu/dma: Fix arch_sync_dma for map")
08ae5d4a1ae9 ("iommu/dma: Fix sync_sg with swiotlb")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a63c357b9fd56ad5fe64616f5b22835252c6a76a Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Date: Fri, 8 Dec 2023 15:41:40 -0800
Subject: [PATCH] iommu/dma: Trace bounce buffer usage when mapping buffers
When commit 82612d66d51d ("iommu: Allow the dma-iommu api to
use bounce buffers") was introduced, it did not add the logic
for tracing the bounce buffer usage from iommu_dma_map_page().
All of the users of swiotlb_tbl_map_single() trace their bounce
buffer usage, except iommu_dma_map_page(). This makes it difficult
to track SWIOTLB usage from that function. Thus, trace bounce buffer
usage from iommu_dma_map_page().
Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Cc: stable(a)vger.kernel.org # v5.15+
Cc: Tom Murphy <murphyt7(a)tcd.ie>
Cc: Lu Baolu <baolu.lu(a)linux.intel.com>
Cc: Saravana Kannan <saravanak(a)google.com>
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Link: https://lore.kernel.org/r/20231208234141.2356157-1-isaacmanjarres@google.com
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 85163a83df2f..037fcf826407 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
+#include <trace/events/swiotlb.h>
#include "dma-iommu.h"
@@ -1156,6 +1157,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
return DMA_MAPPING_ERROR;
}
+ trace_swiotlb_bounced(dev, phys, size);
+
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
iova_mask(iovad), dir, attrs);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 8e0545c83d672750632f46e3f9ad95c48c91a0fc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024061303-heap-catalyst-0a58@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
8e0545c83d67 ("mm/vmalloc: fix vmalloc which may return null if called with __GFP_NOFAIL")
e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e0545c83d672750632f46e3f9ad95c48c91a0fc Mon Sep 17 00:00:00 2001
From: "Hailong.Liu" <hailong.liu(a)oppo.com>
Date: Fri, 10 May 2024 18:01:31 +0800
Subject: [PATCH] mm/vmalloc: fix vmalloc which may return null if called with
__GFP_NOFAIL
commit a421ef303008 ("mm: allow !GFP_KERNEL allocations for kvmalloc")
includes support for __GFP_NOFAIL, but it presents a conflict with commit
dd544141b9eb ("vmalloc: back off when the current task is OOM-killed"). A
possible scenario is as follows:
process-a
__vmalloc_node_range(GFP_KERNEL | __GFP_NOFAIL)
__vmalloc_area_node()
vm_area_alloc_pages()
--> oom-killer send SIGKILL to process-a
if (fatal_signal_pending(current)) break;
--> return NULL;
To fix this, do not check fatal_signal_pending() in vm_area_alloc_pages()
if __GFP_NOFAIL set.
This issue occurred during OPLUS KASAN TEST. Below is part of the log
-> oom-killer sends signal to process
[65731.222840] [ T1308] oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),cpuset=/,mems_allowed=0,global_oom,task_memcg=/apps/uid_10198,task=gs.intelligence,pid=32454,uid=10198
[65731.259685] [T32454] Call trace:
[65731.259698] [T32454] dump_backtrace+0xf4/0x118
[65731.259734] [T32454] show_stack+0x18/0x24
[65731.259756] [T32454] dump_stack_lvl+0x60/0x7c
[65731.259781] [T32454] dump_stack+0x18/0x38
[65731.259800] [T32454] mrdump_common_die+0x250/0x39c [mrdump]
[65731.259936] [T32454] ipanic_die+0x20/0x34 [mrdump]
[65731.260019] [T32454] atomic_notifier_call_chain+0xb4/0xfc
[65731.260047] [T32454] notify_die+0x114/0x198
[65731.260073] [T32454] die+0xf4/0x5b4
[65731.260098] [T32454] die_kernel_fault+0x80/0x98
[65731.260124] [T32454] __do_kernel_fault+0x160/0x2a8
[65731.260146] [T32454] do_bad_area+0x68/0x148
[65731.260174] [T32454] do_mem_abort+0x151c/0x1b34
[65731.260204] [T32454] el1_abort+0x3c/0x5c
[65731.260227] [T32454] el1h_64_sync_handler+0x54/0x90
[65731.260248] [T32454] el1h_64_sync+0x68/0x6c
[65731.260269] [T32454] z_erofs_decompress_queue+0x7f0/0x2258
--> be->decompressed_pages = kvcalloc(be->nr_pages, sizeof(struct page *), GFP_KERNEL | __GFP_NOFAIL);
kernel panic by NULL pointer dereference.
erofs assume kvmalloc with __GFP_NOFAIL never return NULL.
[65731.260293] [T32454] z_erofs_runqueue+0xf30/0x104c
[65731.260314] [T32454] z_erofs_readahead+0x4f0/0x968
[65731.260339] [T32454] read_pages+0x170/0xadc
[65731.260364] [T32454] page_cache_ra_unbounded+0x874/0xf30
[65731.260388] [T32454] page_cache_ra_order+0x24c/0x714
[65731.260411] [T32454] filemap_fault+0xbf0/0x1a74
[65731.260437] [T32454] __do_fault+0xd0/0x33c
[65731.260462] [T32454] handle_mm_fault+0xf74/0x3fe0
[65731.260486] [T32454] do_mem_abort+0x54c/0x1b34
[65731.260509] [T32454] el0_da+0x44/0x94
[65731.260531] [T32454] el0t_64_sync_handler+0x98/0xb4
[65731.260553] [T32454] el0t_64_sync+0x198/0x19c
Link: https://lkml.kernel.org/r/20240510100131.1865-1-hailong.liu@oppo.com
Fixes: 9376130c390a ("mm/vmalloc: add support for __GFP_NOFAIL")
Signed-off-by: Hailong.Liu <hailong.liu(a)oppo.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Suggested-by: Barry Song <21cnbao(a)gmail.com>
Reported-by: Oven <liyangouwen1(a)oppo.com>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Cc: Chao Yu <chao(a)kernel.org>
Cc: Christoph Hellwig <hch(a)infradead.org>
Cc: Gao Xiang <xiang(a)kernel.org>
Cc: Lorenzo Stoakes <lstoakes(a)gmail.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6641be0ca80b..5d3aa2dc88a8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3498,7 +3498,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
{
unsigned int nr_allocated = 0;
gfp_t alloc_gfp = gfp;
- bool nofail = false;
+ bool nofail = gfp & __GFP_NOFAIL;
struct page *page;
int i;
@@ -3555,12 +3555,11 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* and compaction etc.
*/
alloc_gfp &= ~__GFP_NOFAIL;
- nofail = true;
}
/* High-order pages or fallback path if "bulk" fails. */
while (nr_allocated < nr_pages) {
- if (fatal_signal_pending(current))
+ if (!nofail && fatal_signal_pending(current))
break;
if (nid == NUMA_NO_NODE)
The patch titled
Subject: mm: fix incorrect vbq reference in purge_fragmented_block
has been added to the -mm mm-hotfixes-stable branch. Its filename is
mm-fix-incorrect-vbq-reference-in-purge_fragmented_block.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-stable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
Subject: mm: fix incorrect vbq reference in purge_fragmented_block
Date: Fri, 7 Jun 2024 10:31:16 +0800
xa_for_each() in _vm_unmap_aliases() loops through all vbs. However,
since commit 062eacf57ad9 ("mm: vmalloc: remove a global vmap_blocks
xarray") the vb from xarray may not be on the corresponding CPU
vmap_block_queue. Consequently, purge_fragmented_block() might use the
wrong vbq->lock to protect the free list, leading to vbq->free breakage.
Incorrect lock protection can exhaust all vmalloc space as follows:
CPU0 CPU1
+--------------------------------------------+
| +--------------------+ +-----+ |
+--> | |---->| |------+
| CPU1:vbq free_list | | vb1 |
+--- | |<----| |<-----+
| +--------------------+ +-----+ |
+--------------------------------------------+
_vm_unmap_aliases() vb_alloc()
new_vmap_block()
xa_for_each(&vbq->vmap_blocks, idx, vb)
--> vb in CPU1:vbq->freelist
purge_fragmented_block(vb)
spin_lock(&vbq->lock) spin_lock(&vbq->lock)
--> use CPU0:vbq->lock --> use CPU1:vbq->lock
list_del_rcu(&vb->free_list) list_add_tail_rcu(&vb->free_list, &vbq->free)
__list_del(vb->prev, vb->next)
next->prev = prev
+--------------------+
| |
| CPU1:vbq free_list |
+---| |<--+
| +--------------------+ |
+----------------------------+
__list_add(new, head->prev, head)
+--------------------------------------------+
| +--------------------+ +-----+ |
+--> | |---->| |------+
| CPU1:vbq free_list | | vb2 |
+--- | |<----| |<-----+
| +--------------------+ +-----+ |
+--------------------------------------------+
prev->next = next
+--------------------------------------------+
|----------------------------+ |
| +--------------------+ | +-----+ |
+--> | |--+ | |------+
| CPU1:vbq free_list | | vb2 |
+--- | |<----| |<-----+
| +--------------------+ +-----+ |
+--------------------------------------------+
Here���s a list breakdown. All vbs, which were to be added to
���prev���, cannot be used by list_for_each_entry_rcu(vb, &vbq->free,
free_list) in vb_alloc(). Thus, vmalloc space is exhausted.
This issue affects both erofs and f2fs, the stacktrace is as follows:
erofs:
[<ffffffd4ffb93ad4>] __switch_to+0x174
[<ffffffd4ffb942f0>] __schedule+0x624
[<ffffffd4ffb946f4>] schedule+0x7c
[<ffffffd4ffb947cc>] schedule_preempt_disabled+0x24
[<ffffffd4ffb962ec>] __mutex_lock+0x374
[<ffffffd4ffb95998>] __mutex_lock_slowpath+0x14
[<ffffffd4ffb95954>] mutex_lock+0x24
[<ffffffd4fef2900c>] reclaim_and_purge_vmap_areas+0x44
[<ffffffd4fef25908>] alloc_vmap_area+0x2e0
[<ffffffd4fef24ea0>] vm_map_ram+0x1b0
[<ffffffd4ff1b46f4>] z_erofs_lz4_decompress+0x278
[<ffffffd4ff1b8ac4>] z_erofs_decompress_queue+0x650
[<ffffffd4ff1b8328>] z_erofs_runqueue+0x7f4
[<ffffffd4ff1b66a8>] z_erofs_read_folio+0x104
[<ffffffd4feeb6fec>] filemap_read_folio+0x6c
[<ffffffd4feeb68c4>] filemap_fault+0x300
[<ffffffd4fef0ecac>] __do_fault+0xc8
[<ffffffd4fef0c908>] handle_mm_fault+0xb38
[<ffffffd4ffb9f008>] do_page_fault+0x288
[<ffffffd4ffb9ed64>] do_translation_fault[jt]+0x40
[<ffffffd4fec39c78>] do_mem_abort+0x58
[<ffffffd4ffb8c3e4>] el0_ia+0x70
[<ffffffd4ffb8c260>] el0t_64_sync_handler[jt]+0xb0
[<ffffffd4fec11588>] ret_to_user[jt]+0x0
f2fs:
[<ffffffd4ffb93ad4>] __switch_to+0x174
[<ffffffd4ffb942f0>] __schedule+0x624
[<ffffffd4ffb946f4>] schedule+0x7c
[<ffffffd4ffb947cc>] schedule_preempt_disabled+0x24
[<ffffffd4ffb962ec>] __mutex_lock+0x374
[<ffffffd4ffb95998>] __mutex_lock_slowpath+0x14
[<ffffffd4ffb95954>] mutex_lock+0x24
[<ffffffd4fef2900c>] reclaim_and_purge_vmap_areas+0x44
[<ffffffd4fef25908>] alloc_vmap_area+0x2e0
[<ffffffd4fef24ea0>] vm_map_ram+0x1b0
[<ffffffd4ff1a3b60>] f2fs_prepare_decomp_mem+0x144
[<ffffffd4ff1a6c24>] f2fs_alloc_dic+0x264
[<ffffffd4ff175468>] f2fs_read_multi_pages+0x428
[<ffffffd4ff17b46c>] f2fs_mpage_readpages+0x314
[<ffffffd4ff1785c4>] f2fs_readahead+0x50
[<ffffffd4feec3384>] read_pages+0x80
[<ffffffd4feec32c0>] page_cache_ra_unbounded+0x1a0
[<ffffffd4feec39e8>] page_cache_ra_order+0x274
[<ffffffd4feeb6cec>] do_sync_mmap_readahead+0x11c
[<ffffffd4feeb6764>] filemap_fault+0x1a0
[<ffffffd4ff1423bc>] f2fs_filemap_fault+0x28
[<ffffffd4fef0ecac>] __do_fault+0xc8
[<ffffffd4fef0c908>] handle_mm_fault+0xb38
[<ffffffd4ffb9f008>] do_page_fault+0x288
[<ffffffd4ffb9ed64>] do_translation_fault[jt]+0x40
[<ffffffd4fec39c78>] do_mem_abort+0x58
[<ffffffd4ffb8c3e4>] el0_ia+0x70
[<ffffffd4ffb8c260>] el0t_64_sync_handler[jt]+0xb0
[<ffffffd4fec11588>] ret_to_user[jt]+0x0
To fix this, introducee cpu within vmap_block to record which this vb
belongs to.
Link: https://lkml.kernel.org/r/20240607023116.1720640-1-zhaoyang.huang@unisoc.com
Fixes: fc1e0d980037 ("mm/vmalloc: prevent stale TLBs in fully utilized blocks")
Signed-off-by: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
Suggested-by: Hailong.Liu <hailong.liu(a)oppo.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Christoph Hellwig <hch(a)infradead.org>
Cc: Lorenzo Stoakes <lstoakes(a)gmail.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
--- a/mm/vmalloc.c~mm-fix-incorrect-vbq-reference-in-purge_fragmented_block
+++ a/mm/vmalloc.c
@@ -2498,6 +2498,7 @@ struct vmap_block {
struct list_head free_list;
struct rcu_head rcu_head;
struct list_head purge;
+ unsigned int cpu;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -2625,8 +2626,15 @@ static void *new_vmap_block(unsigned int
free_vmap_area(va);
return ERR_PTR(err);
}
-
- vbq = raw_cpu_ptr(&vmap_block_queue);
+ /*
+ * list_add_tail_rcu could happened in another core
+ * rather than vb->cpu due to task migration, which
+ * is safe as list_add_tail_rcu will ensure the list's
+ * integrity together with list_for_each_rcu from read
+ * side.
+ */
+ vb->cpu = raw_smp_processor_id();
+ vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
@@ -2654,9 +2662,10 @@ static void free_vmap_block(struct vmap_
}
static bool purge_fragmented_block(struct vmap_block *vb,
- struct vmap_block_queue *vbq, struct list_head *purge_list,
- bool force_purge)
+ struct list_head *purge_list, bool force_purge)
{
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu);
+
if (vb->free + vb->dirty != VMAP_BBMAP_BITS ||
vb->dirty == VMAP_BBMAP_BITS)
return false;
@@ -2704,7 +2713,7 @@ static void purge_fragmented_blocks(int
continue;
spin_lock(&vb->lock);
- purge_fragmented_block(vb, vbq, &purge, true);
+ purge_fragmented_block(vb, &purge, true);
spin_unlock(&vb->lock);
}
rcu_read_unlock();
@@ -2841,7 +2850,7 @@ static void _vm_unmap_aliases(unsigned l
* not purgeable, check whether there is dirty
* space to be flushed.
*/
- if (!purge_fragmented_block(vb, vbq, &purge_list, false) &&
+ if (!purge_fragmented_block(vb, &purge_list, false) &&
vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;
_
Patches currently in -mm which might be from zhaoyang.huang(a)unisoc.com are
mm-fix-incorrect-vbq-reference-in-purge_fragmented_block.patch
mm-optimization-on-page-allocation-when-cma-enabled.patch
From: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
vmalloc area runs out in our ARM64 system during an erofs test as
vm_map_ram failed[1]. By following the debug log, we find that
vm_map_ram()->vb_alloc() will allocate new vb->va which corresponding
to 4MB vmalloc area as list_for_each_entry_rcu returns immediately
when vbq->free->next points to vbq->free. That is to say, 65536 times
of page fault after the list's broken will run out of the whole
vmalloc area. This should be introduced by one vbq->free->next point to
vbq->free which makes list_for_each_entry_rcu can not iterate the list
and find the BUG.
[1]
PID: 1 TASK: ffffff80802b4e00 CPU: 6 COMMAND: "init"
#0 [ffffffc08006afe0] __switch_to at ffffffc08111d5cc
#1 [ffffffc08006b040] __schedule at ffffffc08111dde0
#2 [ffffffc08006b0a0] schedule at ffffffc08111e294
#3 [ffffffc08006b0d0] schedule_preempt_disabled at ffffffc08111e3f0
#4 [ffffffc08006b140] __mutex_lock at ffffffc08112068c
#5 [ffffffc08006b180] __mutex_lock_slowpath at ffffffc08111f8f8
#6 [ffffffc08006b1a0] mutex_lock at ffffffc08111f834
#7 [ffffffc08006b1d0] reclaim_and_purge_vmap_areas at ffffffc0803ebc3c
#8 [ffffffc08006b290] alloc_vmap_area at ffffffc0803e83fc
#9 [ffffffc08006b300] vm_map_ram at ffffffc0803e78c0
Fixes: fc1e0d980037 ("mm/vmalloc: prevent stale TLBs in fully utilized blocks")
For detailed reason of broken list, please refer to below URL
https://lore.kernel.org/all/20240531024820.5507-1-hailong.liu@oppo.com/
Suggested-by: Hailong.Liu <hailong.liu(a)oppo.com>
Signed-off-by: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
---
v2: introduce cpu in vmap_block to record the right CPU number
v3: use get_cpu/put_cpu to prevent schedule between core
v4: replace get_cpu/put_cpu by another API to avoid disabling preemption
---
---
mm/vmalloc.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 22aa63f4ef63..89eb034f4ac6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2458,6 +2458,7 @@ struct vmap_block {
struct list_head free_list;
struct rcu_head rcu_head;
struct list_head purge;
+ unsigned int cpu;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -2585,8 +2586,15 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
free_vmap_area(va);
return ERR_PTR(err);
}
-
- vbq = raw_cpu_ptr(&vmap_block_queue);
+ /*
+ * list_add_tail_rcu could happened in another core
+ * rather than vb->cpu due to task migration, which
+ * is safe as list_add_tail_rcu will ensure the list's
+ * integrity together with list_for_each_rcu from read
+ * side.
+ */
+ vb->cpu = raw_smp_processor_id();
+ vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
@@ -2614,9 +2622,10 @@ static void free_vmap_block(struct vmap_block *vb)
}
static bool purge_fragmented_block(struct vmap_block *vb,
- struct vmap_block_queue *vbq, struct list_head *purge_list,
- bool force_purge)
+ struct list_head *purge_list, bool force_purge)
{
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu);
+
if (vb->free + vb->dirty != VMAP_BBMAP_BITS ||
vb->dirty == VMAP_BBMAP_BITS)
return false;
@@ -2664,7 +2673,7 @@ static void purge_fragmented_blocks(int cpu)
continue;
spin_lock(&vb->lock);
- purge_fragmented_block(vb, vbq, &purge, true);
+ purge_fragmented_block(vb, &purge, true);
spin_unlock(&vb->lock);
}
rcu_read_unlock();
@@ -2801,7 +2810,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
* not purgeable, check whether there is dirty
* space to be flushed.
*/
- if (!purge_fragmented_block(vb, vbq, &purge_list, false) &&
+ if (!purge_fragmented_block(vb, &purge_list, false) &&
vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;
--
2.25.1