Running the crypto manager self tests with
CONFIG_CRYPTO_MANAGER_EXTRA_TESTS may result in several types of errors
when using the ccp-crypto driver:
alg: skcipher: cbc-des3-ccp encryption failed on test vector 0; expected_error=0, actual_error=-5 ...
alg: skcipher: ctr-aes-ccp decryption overran dst buffer on test vector 0 ...
alg: ahash: sha224-ccp test failed (wrong result) on test vector ...
These errors are the result of improper processing of scatterlists mapped
for DMA.
Given a scatterlist in which entries are merged as part of mapping the
scatterlist for DMA, the DMA length of a merged entry will reflect the
combined length of the entries that were merged. The subsequent
scatterlist entry will contain DMA information for the scatterlist entry
after the last merged entry, but the non-DMA information will be that of
the first merged entry.
The ccp driver does not take this scatterlist merging into account. To
address this, add a second scatterlist pointer to track the current
position in the DMA mapped representation of the scatterlist. Both the DMA
representation and the original representation of the scatterlist must be
tracked as while most of the driver can use just the DMA representation,
scatterlist_map_and_copy() must use the original representation and
expects the scatterlist pointer to be accurate to the original
representation.
In order to properly walk the original scatterlist, the scatterlist must
be walked until the combined lengths of the entries seen is equal to the
DMA length of the current entry being processed in the DMA mapped
representation.
Fixes: 63b945091a070 ("crypto: ccp - CCP device driver and interface support")
Signed-off-by: John Allen <john.allen(a)amd.com>
Cc: stable(a)vger.kernel.org
---
drivers/crypto/ccp/ccp-dev.h | 1 +
drivers/crypto/ccp/ccp-ops.c | 37 +++++++++++++++++++++++++-----------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 3f68262d9ab4..87a34d91fdf7 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -469,6 +469,7 @@ struct ccp_sg_workarea {
unsigned int sg_used;
struct scatterlist *dma_sg;
+ struct scatterlist *dma_sg_head;
struct device *dma_dev;
unsigned int dma_count;
enum dma_data_direction dma_dir;
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 422193690fd4..64112c736810 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -63,7 +63,7 @@ static u32 ccp_gen_jobid(struct ccp_device *ccp)
static void ccp_sg_free(struct ccp_sg_workarea *wa)
{
if (wa->dma_count)
- dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
+ dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
wa->dma_count = 0;
}
@@ -92,6 +92,7 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
return 0;
wa->dma_sg = sg;
+ wa->dma_sg_head = sg;
wa->dma_dev = dev;
wa->dma_dir = dma_dir;
wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
@@ -104,14 +105,28 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
{
unsigned int nbytes = min_t(u64, len, wa->bytes_left);
+ unsigned int sg_combined_len = 0;
if (!wa->sg)
return;
wa->sg_used += nbytes;
wa->bytes_left -= nbytes;
- if (wa->sg_used == wa->sg->length) {
- wa->sg = sg_next(wa->sg);
+ if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
+ /* Advance to the next DMA scatterlist entry */
+ wa->dma_sg = sg_next(wa->dma_sg);
+
+ /* In the case that the DMA mapped scatterlist has entries
+ * that have been merged, the non-DMA mapped scatterlist
+ * must be advanced multiple times for each merged entry.
+ * This ensures that the current non-DMA mapped entry
+ * corresponds to the current DMA mapped entry.
+ */
+ do {
+ sg_combined_len += wa->sg->length;
+ wa->sg = sg_next(wa->sg);
+ } while (wa->sg_used > sg_combined_len);
+
wa->sg_used = 0;
}
}
@@ -299,7 +314,7 @@ static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
/* Update the structures and generate the count */
buf_count = 0;
while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
- nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
+ nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
dm_wa->length - buf_count);
nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
@@ -331,11 +346,11 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
* and destination. The resulting len values will always be <= UINT_MAX
* because the dma length is an unsigned int.
*/
- sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
+ sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
if (dst) {
- sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
+ sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
op_len = min(sg_src_len, sg_dst_len);
} else {
@@ -365,7 +380,7 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
/* Enough data in the sg element, but we need to
* adjust for any previously copied data
*/
- op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
+ op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
op->src.u.dma.offset = src->sg_wa.sg_used;
op->src.u.dma.length = op_len & ~(block_size - 1);
@@ -386,7 +401,7 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
/* Enough room in the sg element, but we need to
* adjust for any previously used area
*/
- op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
+ op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
op->dst.u.dma.offset = dst->sg_wa.sg_used;
op->dst.u.dma.length = op->src.u.dma.length;
}
@@ -2028,7 +2043,7 @@ ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
dst.sg_wa.sg_used = 0;
for (i = 1; i <= src.sg_wa.dma_count; i++) {
if (!dst.sg_wa.sg ||
- (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
+ (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
ret = -EINVAL;
goto e_dst;
}
@@ -2054,8 +2069,8 @@ ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_dst;
}
- dst.sg_wa.sg_used += src.sg_wa.sg->length;
- if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
+ dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
+ if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
dst.sg_wa.sg_used = 0;
}
--
2.18.4
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ee470bb25d0dcdf126f586ec0ae6dca66cb340a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp(a)suse.de>
Date: Thu, 18 Jun 2020 20:25:25 +0200
Subject: [PATCH] EDAC/amd64: Read back the scrub rate PCI register on F15h
Commit:
da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
added support for F15h, model 0x60 CPUs but in doing so, missed to read
back SCRCTRL PCI config register on F15h CPUs which are *not* model
0x60. Add that read so that doing
$ cat /sys/devices/system/edac/mc/mc0/sdram_scrub_rate
can show the previously set DRAM scrub rate.
Fixes: da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
Reported-by: Anders Andersson <pipatron(a)gmail.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org> #v4.4..
Link: https://lkml.kernel.org/r/CAKkunMbNWppx_i6xSdDHLseA2QQmGJqj_crY=NF-GZML5np4…
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ef90070a9194..6262f6370c5d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -269,6 +269,8 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ else
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
} else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
}
Our tracehook logic for syscall entry/exit raises a SIGTRAP back to the
tracer following a ptrace request such as PTRACE_SYSCALL. As part of this
procedure, we clobber the reported value of one of the tracee's general
purpose registers (x7 for native tasks, r12 for compat) to indicate
whether the stop occurred on syscall entry or exit. This is a slightly
unfortunate ABI, as it prevents the tracer from accessing the real
register value and is at odds with other similar stops such as seccomp
traps.
Since we're stuck with this ABI, expand the comment in our tracehook
logic to acknowledge the issue and descibe the behaviour in more detail.
Cc: <stable(a)vger.kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Luis Machado <luis.machado(a)linaro.org>
Reported-by: Keno Fischer <keno(a)juliacomputing.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
---
arch/arm64/kernel/ptrace.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 68b7f34a08f5..d71795dc3dbd 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1811,8 +1811,20 @@ static void tracehook_report_syscall(struct pt_regs *regs,
unsigned long saved_reg;
/*
- * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
- * used to denote syscall entry/exit:
+ * We have some ABI weirdness here in the way that we handle syscall
+ * exit stops because we indicate whether or not the stop has been
+ * signalled from syscall entry or syscall exit by clobbering a general
+ * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
+ * and restoring its old value after the stop. This means that:
+ *
+ * - Any writes by the tracer to this register during the stop are
+ * ignored/discarded.
+ *
+ * - The actual value of the register is not available during the stop,
+ * so the tracer cannot save it and restore it later.
+ *
+ * - Syscall stops behave differently to seccomp and pseudo-step traps
+ * (the latter do not nobble any registers).
*/
regno = (is_compat_task() ? 12 : 7);
saved_reg = regs->regs[regno];
--
2.27.0.212.ge8ba1cc988-goog
As we allow for parallel threads to create vma instances in parallel,
and we only filter out the duplicates upon reacquiring the spinlock for
the rbtree, we have to free the loser of the constructors' race. When
freeing, we should also drop any resource references acquired for the
redundant vma.
Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.5+
---
drivers/gpu/drm/i915/i915_vma.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1f63c4a1f055..7fe1f317cd2b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -198,6 +198,7 @@ vma_create(struct drm_i915_gem_object *obj,
cmp = i915_vma_compare(pos, vm, view);
if (cmp == 0) {
spin_unlock(&obj->vma.lock);
+ i915_vm_put(vm);
i915_vma_free(vma);
return pos;
}
--
2.20.1
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From fa7041d9d2fc7401cece43f305eb5b87b7017fc4 Mon Sep 17 00:00:00 2001
From: Stylon Wang <stylon.wang(a)amd.com>
Date: Fri, 12 Jun 2020 19:04:18 +0800
Subject: [PATCH] drm/amd/display: Fix ineffective setting of max bpc property
[Why]
Regression was introduced where setting max bpc property has no effect
on the atomic check and final commit. It has the same effect as max bpc
being stuck at 8.
[How]
Correctly propagate max bpc with the new connector state.
Signed-off-by: Stylon Wang <stylon.wang(a)amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas(a)amd.com>
Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7ced9f87be97..10ac8076d4f2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5024,7 +5024,8 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
struct drm_connector *connector = &aconnector->base;
struct amdgpu_device *adev = connector->dev->dev_private;
struct dc_stream_state *stream;
- int requested_bpc = connector->state ? connector->state->max_requested_bpc : 8;
+ const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL;
+ int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8;
enum dc_status dc_result = DC_OK;
do {
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 243bce09c91b0145aeaedd5afba799d81841c030 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 25 Jun 2020 20:29:59 -0700
Subject: [PATCH] mm: fix swap cache node allocation mask
Chris Murphy reports that a slightly overcommitted load, testing swap
and zram along with i915, splats and keeps on splatting, when it had
better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208085
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 432cd2a10f1c10cead91fe706ff5dc52f06d642a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:32:55 +0100
Subject: [PATCH] btrfs: fix data block group relocation failure due to
concurrent scrub
When running relocation of a data block group while scrub is running in
parallel, it is possible that the relocation will fail and abort the
current transaction with an -EINVAL error:
[134243.988595] BTRFS info (device sdc): found 14 extents, stage: move data extents
[134243.999871] ------------[ cut here ]------------
[134244.000741] BTRFS: Transaction aborted (error -22)
[134244.001692] WARNING: CPU: 0 PID: 26954 at fs/btrfs/ctree.c:1071 __btrfs_cow_block+0x6a7/0x790 [btrfs]
[134244.003380] Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
[134244.012577] CPU: 0 PID: 26954 Comm: btrfs Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134244.014162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134244.016184] RIP: 0010:__btrfs_cow_block+0x6a7/0x790 [btrfs]
[134244.017151] Code: 48 c7 c7 (...)
[134244.020549] RSP: 0018:ffffa41607863888 EFLAGS: 00010286
[134244.021515] RAX: 0000000000000000 RBX: ffff9614bdfe09c8 RCX: 0000000000000000
[134244.022822] RDX: 0000000000000001 RSI: ffffffffb3d63980 RDI: 0000000000000001
[134244.024124] RBP: ffff961589e8c000 R08: 0000000000000000 R09: 0000000000000001
[134244.025424] R10: ffffffffc0ae5955 R11: 0000000000000000 R12: ffff9614bd530d08
[134244.026725] R13: ffff9614ced41b88 R14: ffff9614bdfe2a48 R15: 0000000000000000
[134244.028024] FS: 00007f29b63c08c0(0000) GS:ffff9615ba600000(0000) knlGS:0000000000000000
[134244.029491] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134244.030560] CR2: 00007f4eb339b000 CR3: 0000000130d6e006 CR4: 00000000003606f0
[134244.031997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134244.033153] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134244.034484] Call Trace:
[134244.034984] btrfs_cow_block+0x12b/0x2b0 [btrfs]
[134244.035859] do_relocation+0x30b/0x790 [btrfs]
[134244.036681] ? do_raw_spin_unlock+0x49/0xc0
[134244.037460] ? _raw_spin_unlock+0x29/0x40
[134244.038235] relocate_tree_blocks+0x37b/0x730 [btrfs]
[134244.039245] relocate_block_group+0x388/0x770 [btrfs]
[134244.040228] btrfs_relocate_block_group+0x161/0x2e0 [btrfs]
[134244.041323] btrfs_relocate_chunk+0x36/0x110 [btrfs]
[134244.041345] btrfs_balance+0xc06/0x1860 [btrfs]
[134244.043382] ? btrfs_ioctl_balance+0x27c/0x310 [btrfs]
[134244.045586] btrfs_ioctl_balance+0x1ed/0x310 [btrfs]
[134244.045611] btrfs_ioctl+0x1880/0x3760 [btrfs]
[134244.049043] ? do_raw_spin_unlock+0x49/0xc0
[134244.049838] ? _raw_spin_unlock+0x29/0x40
[134244.050587] ? __handle_mm_fault+0x11b3/0x14b0
[134244.051417] ? ksys_ioctl+0x92/0xb0
[134244.052070] ksys_ioctl+0x92/0xb0
[134244.052701] ? trace_hardirqs_off_thunk+0x1a/0x1c
[134244.053511] __x64_sys_ioctl+0x16/0x20
[134244.054206] do_syscall_64+0x5c/0x280
[134244.054891] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[134244.055819] RIP: 0033:0x7f29b51c9dd7
[134244.056491] Code: 00 00 00 (...)
[134244.059767] RSP: 002b:00007ffcccc1dd08 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
[134244.061168] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f29b51c9dd7
[134244.062474] RDX: 00007ffcccc1dda0 RSI: 00000000c4009420 RDI: 0000000000000003
[134244.063771] RBP: 0000000000000003 R08: 00005565cea4b000 R09: 0000000000000000
[134244.065032] R10: 0000000000000541 R11: 0000000000000202 R12: 00007ffcccc2060a
[134244.066327] R13: 00007ffcccc1dda0 R14: 0000000000000002 R15: 00007ffcccc1dec0
[134244.067626] irq event stamp: 0
[134244.068202] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134244.069351] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134244.070909] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134244.072392] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134244.073432] ---[ end trace bd7c03622e0b0a99 ]---
The -EINVAL error comes from the following chain of function calls:
__btrfs_cow_block() <-- aborts the transaction
btrfs_reloc_cow_block()
replace_file_extents()
get_new_location() <-- returns -EINVAL
When relocating a data block group, for each allocated extent of the block
group, we preallocate another extent (at prealloc_file_extent_cluster()),
associated with the data relocation inode, and then dirty all its pages.
These preallocated extents have, and must have, the same size that extents
from the data block group being relocated have.
Later before we start the relocation stage that updates pointers (bytenr
field of file extent items) to point to the the new extents, we trigger
writeback for the data relocation inode. The expectation is that writeback
will write the pages to the previously preallocated extents, that it
follows the NOCOW path. That is generally the case, however, if a scrub
is running it may have turned the block group that contains those extents
into RO mode, in which case writeback falls back to the COW path.
However in the COW path instead of allocating exactly one extent with the
expected size, the allocator may end up allocating several smaller extents
due to free space fragmentation - because we tell it at cow_file_range()
that the minimum allocation size can match the filesystem's sector size.
This later breaks the relocation's expectation that an extent associated
to a file extent item in the data relocation inode has the same size as
the respective extent pointed by a file extent item in another tree - in
this case the extent to which the relocation inode poins to is smaller,
causing relocation.c:get_new_location() to return -EINVAL.
For example, if we are relocating a data block group X that has a logical
address of X and the block group has an extent allocated at the logical
address X + 128KiB with a size of 64KiB:
1) At prealloc_file_extent_cluster() we allocate an extent for the data
relocation inode with a size of 64KiB and associate it to the file
offset 128KiB (X + 128KiB - X) of the data relocation inode. This
preallocated extent was allocated at block group Z;
2) A scrub running in parallel turns block group Z into RO mode and
starts scrubing its extents;
3) Relocation triggers writeback for the data relocation inode;
4) When running delalloc (btrfs_run_delalloc_range()), we try first the
NOCOW path because the data relocation inode has BTRFS_INODE_PREALLOC
set in its flags. However, because block group Z is in RO mode, the
NOCOW path (run_delalloc_nocow()) falls back into the COW path, by
calling cow_file_range();
5) At cow_file_range(), in the first iteration of the while loop we call
btrfs_reserve_extent() to allocate a 64KiB extent and pass it a minimum
allocation size of 4KiB (fs_info->sectorsize). Due to free space
fragmentation, btrfs_reserve_extent() ends up allocating two extents
of 32KiB each, each one on a different iteration of that while loop;
6) Writeback of the data relocation inode completes;
7) Relocation proceeds and ends up at relocation.c:replace_file_extents(),
with a leaf which has a file extent item that points to the data extent
from block group X, that has a logical address (bytenr) of X + 128KiB
and a size of 64KiB. Then it calls get_new_location(), which does a
lookup in the data relocation tree for a file extent item starting at
offset 128KiB (X + 128KiB - X) and belonging to the data relocation
inode. It finds a corresponding file extent item, however that item
points to an extent that has a size of 32KiB, which doesn't match the
expected size of 64KiB, resuling in -EINVAL being returned from this
function and propagated up to __btrfs_cow_block(), which aborts the
current transaction.
To fix this make sure that at cow_file_range() when we call the allocator
we pass it a minimum allocation size corresponding the desired extent size
if the inode belongs to the data relocation tree, otherwise pass it the
filesystem's sector size as the minimum allocation size.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12b5d61f23bb..62c3f4972ff6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:19 +0100
Subject: [PATCH] btrfs: fix race between block group removal and block group
creation
There is a race between block group removal and block group creation
when the removal is completed by a task running fitrim or scrub. When
this happens we end up failing the block group creation with an error
-EEXIST since we attempt to insert a duplicate block group item key
in the extent tree. That results in a transaction abort.
The race happens like this:
1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes
block group X with btrfs_freeze_block_group() (until very recently
that was named btrfs_get_block_group_trimming());
2) Task B starts removing block group X, either because it's now unused
or due to relocation for example. So at btrfs_remove_block_group(),
while holding the chunk mutex and the block group's lock, it sets
the 'removed' flag of the block group and it sets the local variable
'remove_em' to false, because the block group is currently frozen
(its 'frozen' counter is > 0, until very recently this counter was
named 'trimming');
3) Task B unlocks the block group and the chunk mutex;
4) Task A is done trimming the block group and unfreezes the block group
by calling btrfs_unfreeze_block_group() (until very recently this was
named btrfs_put_block_group_trimming()). In this function we lock the
block group and set the local variable 'cleanup' to true because we
were able to decrement the block group's 'frozen' counter down to 0 and
the flag 'removed' is set in the block group.
Since 'cleanup' is set to true, it locks the chunk mutex and removes
the extent mapping representing the block group from the mapping tree;
5) Task C allocates a new block group Y and it picks up the logical address
that block group X had as the logical address for Y, because X was the
block group with the highest logical address and now the second block
group with the highest logical address, the last in the fs mapping tree,
ends at an offset corresponding to block group X's logical address (this
logical address selection is done at volumes.c:find_next_chunk()).
At this point the new block group Y does not have yet its item added
to the extent tree (nor the corresponding device extent items and
chunk item in the device and chunk trees). The new group Y is added to
the list of pending block groups in the transaction handle;
6) Before task B proceeds to removing the block group item for block
group X from the extent tree, which has a key matching:
(X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length)
task C while ending its transaction handle calls
btrfs_create_pending_block_groups(), which finds block group Y and
tries to insert the block group item for Y into the exten tree, which
fails with -EEXIST since logical offset is the same that X had and
task B hasn't yet deleted the key from the extent tree.
This failure results in a transaction abort, producing a stack like
the following:
------------[ cut here ]------------
BTRFS: Transaction aborted (error -17)
WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Code: ff ff ff 48 8b 55 50 f0 48 (...)
RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001
RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001
R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10
R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000
FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs]
btrfs_commit_transaction+0xd0/0xc50 [btrfs]
? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs]
? __ia32_sys_fdatasync+0x20/0x20
iterate_supers+0xdb/0x180
ksys_sync+0x60/0xb0
__ia32_sys_sync+0xa/0x10
do_syscall_64+0x5c/0x280
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f2ce1d4d5b7
Code: 83 c4 08 48 3d 01 (...)
RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2
RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7
RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c
RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00
R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032
R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20
irq event stamp: 0
hardirqs last enabled at (0): [<0000000000000000>] 0x0
hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace bd7c03622e0b0a9c ]---
Fix this simply by making btrfs_remove_block_group() remove the block
group's item from the extent tree before it flags the block group as
removed. Also make the free space deletion from the free space tree
before flagging the block group as removed, to avoid a similar race
with adding and removing free space entries for the free space tree.
Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6462dd0b155c..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
This series of commits fixes a problem with closing l2cap connection
if socket has unACKed frames. Due an to an infinite loop in l2cap_wait_ack
the userspace process gets stuck in close() and then the kernel crashes
with the following report:
Call trace:
[<ffffffc000ace0b4>] l2cap_do_send+0x2c/0xec
[<ffffffc000acf5f8>] l2cap_send_sframe+0x178/0x260
[<ffffffc000acf740>] l2cap_send_rr_or_rnr+0x60/0x84
[<ffffffc000acf980>] l2cap_ack_timeout+0x60/0xac
[<ffffffc0000b35b8>] process_one_work+0x140/0x384
[<ffffffc0000b393c>] worker_thread+0x140/0x4e4
[<ffffffc0000b8c48>] kthread+0xdc/0xf0
All kernels below v4.3 are affected.
-------------------------
Commit log:
Alexey Dobriyan (1):
Bluetooth: Stop sabotaging list poisoning
Dean Jenkins (8):
Bluetooth: L2CAP ERTM shutdown protect sk and chan
Bluetooth: Make __l2cap_wait_ack more efficient
Bluetooth: Add BT_DBG to l2cap_sock_shutdown()
Bluetooth: __l2cap_wait_ack() use msecs_to_jiffies()
Bluetooth: __l2cap_wait_ack() add defensive timeout
Bluetooth: Unwind l2cap_sock_shutdown()
Bluetooth: Reorganize mutex lock in l2cap_sock_shutdown()
Bluetooth: l2cap_disconnection_req priority over shutdown
Tedd Ho-Jeong An (1):
Bluetooth: Reinitialize the list after deletion for session user list
include/net/bluetooth/l2cap.h | 2 +
net/bluetooth/l2cap_core.c | 12 ++---
net/bluetooth/l2cap_sock.c | 94 +++++++++++++++++++++++++++--------
3 files changed, 78 insertions(+), 30 deletions(-)
--
2.17.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9fecd13202f520f3f25d5b1c313adb740fe19773 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:06 +0100
Subject: [PATCH] btrfs: fix a block group ref counter leak after failure to
remove block group
When removing a block group, if we fail to delete the block group's item
from the extent tree, we jump to the 'out' label and end up decrementing
the block group's reference count once only (by 1), resulting in a counter
leak because the block group at that point was already removed from the
block group cache rbtree - so we have to decrement the reference count
twice, once for the rbtree and once for our lookup at the start of the
function.
There is a second bug where if removing the free space tree entries (the
call to remove_block_group_free_space()) fails we end up jumping to the
'out_put_group' label but end up decrementing the reference count only
once, when we should have done it twice, since we have already removed
the block group from the block group cache rbtree. This happens because
the reference count decrement for the rbtree reference happens after
attempting to remove the free space tree entries, which is far away from
the place where we remove the block group from the rbtree.
To make things less error prone, decrement the reference count for the
rbtree immediately after removing the block group from it. This also
eleminates the need for two different exit labels on error, renaming
'out_put_label' to just 'out' and removing the old 'out'.
Fixes: f6033c5e333238 ("btrfs: fix block group leak when removing fails")
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 176e8a292fd1..6462dd0b155c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1125,10 +1128,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = remove_block_group_free_space(trans, block_group);
if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ goto out;
ret = remove_block_group_item(trans, path, block_group);
if (ret < 0)
@@ -1145,10 +1145,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_extent_map(em);
}
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
From: Dave Hansen <dave.hansen(a)linux.intel.com>
I went to go add a new RECLAIM_* mode for the zone_reclaim_mode
sysctl. Like a good kernel developer, I also went to go update the
documentation. I noticed that the bits in the documentation didn't
match the bits in the #defines.
The VM never explicitly checks the RECLAIM_ZONE bit. The bit is,
however implicitly checked when checking 'node_reclaim_mode==0'.
The RECLAIM_ZONE #define was removed in a cleanup. That, by itself
is fine.
But, when the bit was removed (bit 0) the _other_ bit locations also
got changed. That's not OK because the bit values are documented to
mean one specific thing and users surely rely on them meaning that one
thing and not changing from kernel to kernel. The end result is that
if someone had a script that did:
sysctl vm.zone_reclaim_mode=1
That script went from doing nothing to writing out pages during
node reclaim after the commit in question. That's not great.
Put the bits back the way they were and add a comment so something
like this is a bit harder to do again. Update the documentation to
make it clear that the first bit is ignored.
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Fixes: 648b5cf368e0 ("mm/vmscan: remove unused RECLAIM_OFF/RECLAIM_ZONE")
Cc: Ben Widawsky <ben.widawsky(a)intel.com>
Cc: Alex Shi <alex.shi(a)linux.alibaba.com>
Cc: Daniel Wagner <dwagner(a)suse.de>
Cc: "Tobin C. Harding" <tobin(a)kernel.org>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Daniel Wagner <dwagner(a)suse.de>
Cc: stable(a)vger.kernel.org
---
b/Documentation/admin-guide/sysctl/vm.rst | 10 +++++-----
b/mm/vmscan.c | 9 +++++++--
2 files changed, 12 insertions(+), 7 deletions(-)
diff -puN Documentation/admin-guide/sysctl/vm.rst~mm-vmscan-restore-old-zone_reclaim_mode-abi Documentation/admin-guide/sysctl/vm.rst
--- a/Documentation/admin-guide/sysctl/vm.rst~mm-vmscan-restore-old-zone_reclaim_mode-abi 2020-07-01 08:22:11.354955336 -0700
+++ b/Documentation/admin-guide/sysctl/vm.rst 2020-07-01 08:22:11.360955336 -0700
@@ -948,11 +948,11 @@ that benefit from having their data cach
left disabled as the caching effect is likely to be more important than
data locality.
-zone_reclaim may be enabled if it's known that the workload is partitioned
-such that each partition fits within a NUMA node and that accessing remote
-memory would cause a measurable performance reduction. The page allocator
-will then reclaim easily reusable pages (those page cache pages that are
-currently not used) before allocating off node pages.
+Consider enabling one or more zone_reclaim mode bits if it's known that the
+workload is partitioned such that each partition fits within a NUMA node
+and that accessing remote memory would cause a measurable performance
+reduction. The page allocator will take additional actions before
+allocating off node pages.
Allowing zone reclaim to write out pages stops processes that are
writing large amounts of data from dirtying pages on other nodes. Zone
diff -puN mm/vmscan.c~mm-vmscan-restore-old-zone_reclaim_mode-abi mm/vmscan.c
--- a/mm/vmscan.c~mm-vmscan-restore-old-zone_reclaim_mode-abi 2020-07-01 08:22:11.356955336 -0700
+++ b/mm/vmscan.c 2020-07-01 08:22:11.362955336 -0700
@@ -4090,8 +4090,13 @@ module_init(kswapd_init)
*/
int node_reclaim_mode __read_mostly;
-#define RECLAIM_WRITE (1<<0) /* Writeout pages during reclaim */
-#define RECLAIM_UNMAP (1<<1) /* Unmap pages during reclaim */
+/*
+ * These bit locations are exposed in the vm.zone_reclaim_mode sysctl
+ * ABI. New bits are OK, but existing bits can never change.
+ */
+#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
+#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
+#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
/*
* Priority for NODE_RECLAIM. This determines the fraction of pages
_
Hi,
Here is a series of patches for 4.4.y to build perf-tools on
newer toolchain. This also includes a perf-probe fix.
Thank you,
---
Arnaldo Carvalho de Melo (1):
perf annotate: Use asprintf when formatting objdump command line
Changbin Du (1):
perf: Make perf able to build with latest libbfd
Jiri Olsa (1):
perf tools: Fix snprint warnings for gcc 8
Masami Hiramatsu (1):
perf probe: Fix to check blacklist address correctly
Sergey Senozhatsky (1):
tools/lib/subcmd/pager.c: do not alias select() params
tools/perf/builtin-script.c | 24 ++++++++++++------------
tools/perf/tests/attr.c | 4 ++--
tools/perf/tests/pmu.c | 2 +-
tools/perf/util/annotate.c | 14 +++++++++++---
tools/perf/util/cgroup.c | 2 +-
tools/perf/util/pager.c | 5 ++++-
tools/perf/util/parse-events.c | 4 ++--
tools/perf/util/pmu.c | 2 +-
tools/perf/util/probe-event.c | 21 +++++++++++++++------
tools/perf/util/srcline.c | 16 +++++++++++++++-
10 files changed, 64 insertions(+), 30 deletions(-)
--
Masami Hiramatsu (Linaro) <mhiramat(a)kernel.org>
Hey,
following commit from 5.8-rc1 would be beneficial for 5.4+ kernels.
We've verified it fixes audio driver probe errors on systems with Intel
gen10/11/12 display hardware:
Link: https://github.com/thesofproject/linux/issues/1847
commit 1c664c15cf0a31784b217a84fa0128ce46f17a84
Author: Kai Vehmanen <kai.vehmanen(a)linux.intel.com>
Date: Tue Mar 24 17:32:12 2020 +0200
drm/i915: use forced codec wake on all gen9+ platforms
Commit 632f3ab95fe2 ("drm/i915/audio: add codec wakeup override
enabled/disable callback"), added logic to toggle Codec Wake on gen9.
This is used by audio driver when it resets the HDA controller.
It seems explicit toggling of the wakeline can help to fix problems
with probe failing on some gen12 platforms. And based on specs, there
is no reason why this programming sequence should not be applied to
all
gen9+ platforms. No side-effects are seen on gen10/11. So apply
the wake-logic to all gen9+ platforms.
Link: https://github.com/thesofproject/linux/issues/1847
Signed-off-by: Kai Vehmanen <kai.vehmanen(a)linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200324153212.6303-1-kai.veh…
Br, Kai
This change regresses the QCA6174A-3 bluetooth chip, preventing
firmware from being properly loaded. Without this change, the
chip works as intended.
The device is the Kukui Chromebook using the Mediatek chipset
and the 8250_mtk uart. Initial controller baudrate is 115200
and operating speed is 3000000. Our entire suite of bluetooth
tests now fail on this platform due to an apparent failure to
sync its firmware on initialization.
The driver is in the cros tree at drivers/bluetooth/hci_qca.c
and uses the serdev interface. Specifically, this is the
QCA_ROME chipset.
Daniel Winkler (1):
Revert "serial: 8250: Fix max baud limit in generic 8250 port"
drivers/tty/serial/8250/8250_port.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--
2.27.0.212.ge8ba1cc988-goog
From: Jeremy Kerr <jk(a)ozlabs.org>
[ Upstream commit e869e7a17798d85829fa7d4f9bbe1eebd4b2d3f6 ]
Using a AX88179 device (0b95:1790), I see two bytes of appended data on
every RX packet. For example, this 48-byte ping, using 0xff as a
payload byte:
04:20:22.528472 IP 192.168.1.1 > 192.168.1.2: ICMP echo request, id 2447, seq 1, length 64
0x0000: 000a cd35 ea50 000a cd35 ea4f 0800 4500
0x0010: 0054 c116 4000 4001 f63e c0a8 0101 c0a8
0x0020: 0102 0800 b633 098f 0001 87ea cd5e 0000
0x0030: 0000 dcf2 0600 0000 0000 ffff ffff ffff
0x0040: ffff ffff ffff ffff ffff ffff ffff ffff
0x0050: ffff ffff ffff ffff ffff ffff ffff ffff
0x0060: ffff 961f
Those last two bytes - 96 1f - aren't part of the original packet.
In the ax88179 RX path, the usbnet rx_fixup function trims a 2-byte
'alignment pseudo header' from the start of the packet, and sets the
length from a per-packet field populated by hardware. It looks like that
length field *includes* the 2-byte header; the current driver assumes
that it's excluded.
This change trims the 2-byte alignment header after we've set the packet
length, so the resulting packet length is correct. While we're moving
the comment around, this also fixes the spelling of 'pseudo'.
Signed-off-by: Jeremy Kerr <jk(a)ozlabs.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/usb/ax88179_178a.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index e3f2e6098db40..2dcc8a039d42e 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1396,10 +1396,10 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
}
if (pkt_cnt == 0) {
- /* Skip IP alignment psudo header */
- skb_pull(skb, 2);
skb->len = pkt_len;
- skb_set_tail_pointer(skb, pkt_len);
+ /* Skip IP alignment pseudo header */
+ skb_pull(skb, 2);
+ skb_set_tail_pointer(skb, skb->len);
skb->truesize = pkt_len + sizeof(struct sk_buff);
ax88179_rx_checksum(skb, pkt_hdr);
return 1;
@@ -1408,8 +1408,9 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
ax_skb = skb_clone(skb, GFP_ATOMIC);
if (ax_skb) {
ax_skb->len = pkt_len;
- ax_skb->data = skb->data + 2;
- skb_set_tail_pointer(ax_skb, pkt_len);
+ /* Skip IP alignment pseudo header */
+ skb_pull(ax_skb, 2);
+ skb_set_tail_pointer(ax_skb, ax_skb->len);
ax_skb->truesize = pkt_len + sizeof(struct sk_buff);
ax88179_rx_checksum(ax_skb, pkt_hdr);
usbnet_skb_return(dev, ax_skb);
--
2.25.1
From: Thierry Reding <treding(a)nvidia.com>
[ Upstream commit d9a0a05bf8c76e6dc79230669a8b5d685b168c30 ]
Currently when a host1x device driver is unregistered, it is not
detached from the host1x controller, which means that the device
will stay around and when the driver is registered again, it may
bind to the old, stale device rather than the new one that was
created from scratch upon driver registration. This in turn can
cause various weird crashes within the driver core because it is
confronted with a device that was already deleted.
Fix this by detaching the driver from the host1x controller when
it is unregistered. This ensures that the deleted device also is
no longer present in the device list that drivers will bind to.
Reported-by: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Signed-off-by: Thierry Reding <treding(a)nvidia.com>
Tested-by: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Signed-off-by: Thierry Reding <treding(a)nvidia.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/host1x/bus.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index c27858ae05529..6ef89e8a515a9 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -542,8 +542,17 @@ EXPORT_SYMBOL(host1x_driver_register_full);
void host1x_driver_unregister(struct host1x_driver *driver)
{
+ struct host1x *host1x;
+
driver_unregister(&driver->driver);
+ mutex_lock(&devices_lock);
+
+ list_for_each_entry(host1x, &devices, list)
+ host1x_detach_driver(host1x, driver);
+
+ mutex_unlock(&devices_lock);
+
mutex_lock(&drivers_lock);
list_del_init(&driver->list);
mutex_unlock(&drivers_lock);
--
2.25.1
From: Tony Lindgren <tony(a)atomide.com>
[ Upstream commit 0df12a01f4857495816b05f048c4c31439446e35 ]
We can currently sometimes get "RXS timed out" errors and "EOT timed out"
errors with spi transfers.
These errors can be made easy to reproduce by reading the cpcap iio
values in a loop while keeping the CPUs busy by also reading /dev/urandom.
The "RXS timed out" errors we can fix by adding spi-cpol and spi-cpha
in addition to the spi-cs-high property we already have.
The "EOT timed out" errors we can fix by increasing the spi clock rate
to 9.6 MHz. Looks similar MC13783 PMIC says it works at spi clock rates
up to 20 MHz, so let's assume we can pick any rate up to 20 MHz also
for cpcap.
Cc: maemo-leste(a)lists.dyne.org
Cc: Merlijn Wajer <merlijn(a)wizzup.org>
Cc: Pavel Machek <pavel(a)ucw.cz>
Cc: Sebastian Reichel <sre(a)kernel.org>
Signed-off-by: Tony Lindgren <tony(a)atomide.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
index bcced922b2807..b4779b0ece96d 100644
--- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
+++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
@@ -16,8 +16,10 @@ cpcap: pmic@0 {
#interrupt-cells = <2>;
#address-cells = <1>;
#size-cells = <0>;
- spi-max-frequency = <3000000>;
+ spi-max-frequency = <9600000>;
spi-cs-high;
+ spi-cpol;
+ spi-cpha;
cpcap_adc: adc {
compatible = "motorola,mapphone-cpcap-adc";
--
2.25.1
Commit bac8486116b0 ("iavf: Refactor the watchdog state machine") inverted
the logic for when to update statistics. Statistics should be updated when
no other commands are pending, instead they were only requested when a
command was processed. iavf_request_stats() would see a pending request
and not request statistics to be updated. This caused statistics to never
be updated; fix the logic.
CC: stable(a)vger.kernel.org
Fixes: bac8486116b0 ("iavf: Refactor the watchdog state machine")
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
Tested-by: Andrew Bowers <andrewx.bowers(a)intel.com>
---
drivers/net/ethernet/intel/iavf/iavf_main.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index b90ad1abbabb..48c956d90b90 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1937,7 +1937,10 @@ static void iavf_watchdog_task(struct work_struct *work)
iavf_send_api_ver(adapter);
}
} else {
- if (!iavf_process_aq_command(adapter) &&
+ /* An error will be returned if no commands were
+ * processed; use this opportunity to update stats
+ */
+ if (iavf_process_aq_command(adapter) &&
adapter->state == __IAVF_RUNNING)
iavf_request_stats(adapter);
}
--
2.26.2
Hi
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all
The bot has tested the following trees: v5.7.6, v5.4.49, v4.19.130, v4.14.186, v4.9.228, v4.4.228.
v5.7.6: Build OK!
v5.4.49: Build OK!
v4.19.130: Build OK!
v4.14.186: Build OK!
v4.9.228: Build OK!
v4.4.228: Failed to apply! Possible dependencies:
5d6c31910bc07 ("xattr: Add __vfs_{get,set,remove}xattr helpers")
6b2553918d8b4 ("replace ->follow_link() with new method that could stay in RCU mode")
aa80deab33a8f ("namei: page_getlink() and page_follow_link_light() are the same thing")
cd3417c8fc950 ("kill free_page_put_link()")
ce23e64013348 ("->getxattr(): pass dentry and inode as separate arguments")
fceef393a5381 ("switch ->get_link() to delayed_call, kill ->put_link()")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
When the kernel panics, one page worth of kmsg data is written to an allocated
page. The Hypervisor is notified of the page address trough the MSR. This
panic information is collected on the host. Since we are only collecting one
page of data, the full panic message may not be collected.
Each line of the panic message is prefixed with the log level of that
particular message in the form <N>, where N is the log level. The typical
4 Kbytes contains anywhere from 50 to 100 lines with that log level prefix.
hv_dmsg_dump() makes a call to kmsg_dump_get_buffer(). The second argument in
the call is a bool described as: ‘@syslog: include the “<4>” Prefixes’.
With this change, we will not write the log level to the allocated page. This
will provide additional room in the allocated page for more informative panic
information.
Requesting in stable kernels, since many kernels running in production are
stable releases.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joseph Salisbury <joseph.salisbury(a)microsoft.com>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 9147ee9d5f7d..d69f4efa3719 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1368,7 +1368,7 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
- kmsg_dump_get_buffer(dumper, true, hv_panic_page, HV_HYP_PAGE_SIZE,
+ kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (bytes_written)
hyperv_report_panic_msg(panic_pa, bytes_written);
--
2.17.1
This is a note to let you know that I've just added the patch titled
coresight: etmv4: Fix CPU power management setup in probe() function
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 9b6a3f3633a5cc928b78627764793b60cb62e0f6 Mon Sep 17 00:00:00 2001
From: Mike Leach <mike.leach(a)linaro.org>
Date: Wed, 1 Jul 2020 10:08:52 -0600
Subject: coresight: etmv4: Fix CPU power management setup in probe() function
The current probe() function calls a pair of cpuhp_xxx API functions to
setup CPU hotplug handling. The hotplug lock is held for the duration of
the two calls and other CPU related code using cpus_read_lock() /
cpus_read_unlock() calls.
The problem is that on error states, goto: statements bypass the
cpus_read_unlock() call. This code has increased in complexity as the
driver has developed.
This patch introduces a pair of helper functions etm4_pm_setup_cpuslocked()
and etm4_pm_clear() which correct the issues above and group the PM code a
little better.
The two functions etm4_cpu_pm_register() and etm4_cpu_pm_unregister() are
dropped as these call cpu_pm_register_notifier() / ..unregister_notifier()
dependent on CONFIG_CPU_PM - but this define is used to nop these functions
out in the pm headers - so the wrapper functions are superfluous.
Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states")
Fixes: e9f5d63f84fe ("hwtracing/coresight-etm4x: Use cpuhp_setup_state_nocalls_cpuslocked()")
Fixes: 58eb457be028 ("hwtracing/coresight-etm4x: Convert to hotplug state machine")
Signed-off-by: Mike Leach <mike.leach(a)linaro.org>
Cc: stable <stable(a)vger.kernel.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier(a)linaro.org>
Link: https://lore.kernel.org/r/20200701160852.2782823-3-mathieu.poirier@linaro.o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/hwtracing/coresight/coresight-etm4x.c | 82 ++++++++++++-------
1 file changed, 53 insertions(+), 29 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 747afc875f91..0c35cd5e0d1d 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -1388,18 +1388,57 @@ static struct notifier_block etm4_cpu_pm_nb = {
.notifier_call = etm4_cpu_pm_notify,
};
-static int etm4_cpu_pm_register(void)
+/* Setup PM. Called with cpus locked. Deals with error conditions and counts */
+static int etm4_pm_setup_cpuslocked(void)
{
- if (IS_ENABLED(CONFIG_CPU_PM))
- return cpu_pm_register_notifier(&etm4_cpu_pm_nb);
+ int ret;
- return 0;
+ if (etm4_count++)
+ return 0;
+
+ ret = cpu_pm_register_notifier(&etm4_cpu_pm_nb);
+ if (ret)
+ goto reduce_count;
+
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+ "arm/coresight4:starting",
+ etm4_starting_cpu, etm4_dying_cpu);
+
+ if (ret)
+ goto unregister_notifier;
+
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/coresight4:online",
+ etm4_online_cpu, NULL);
+
+ /* HP dyn state ID returned in ret on success */
+ if (ret > 0) {
+ hp_online = ret;
+ return 0;
+ }
+
+ /* failed dyn state - remove others */
+ cpuhp_remove_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING);
+
+unregister_notifier:
+ cpu_pm_unregister_notifier(&etm4_cpu_pm_nb);
+
+reduce_count:
+ --etm4_count;
+ return ret;
}
-static void etm4_cpu_pm_unregister(void)
+static void etm4_pm_clear(void)
{
- if (IS_ENABLED(CONFIG_CPU_PM))
- cpu_pm_unregister_notifier(&etm4_cpu_pm_nb);
+ if (--etm4_count != 0)
+ return;
+
+ cpu_pm_unregister_notifier(&etm4_cpu_pm_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+ if (hp_online) {
+ cpuhp_remove_state_nocalls(hp_online);
+ hp_online = 0;
+ }
}
static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
@@ -1453,24 +1492,15 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
etm4_init_arch_data, drvdata, 1))
dev_err(dev, "ETM arch init failed\n");
- if (!etm4_count++) {
- cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
- "arm/coresight4:starting",
- etm4_starting_cpu, etm4_dying_cpu);
- ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
- "arm/coresight4:online",
- etm4_online_cpu, NULL);
- if (ret < 0)
- goto err_arch_supported;
- hp_online = ret;
+ ret = etm4_pm_setup_cpuslocked();
+ cpus_read_unlock();
- ret = etm4_cpu_pm_register();
- if (ret)
- goto err_arch_supported;
+ /* etm4_pm_setup_cpuslocked() does its own cleanup - exit on error */
+ if (ret) {
+ etmdrvdata[drvdata->cpu] = NULL;
+ return ret;
}
- cpus_read_unlock();
-
if (etm4_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
goto err_arch_supported;
@@ -1517,13 +1547,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
err_arch_supported:
etmdrvdata[drvdata->cpu] = NULL;
- if (--etm4_count == 0) {
- etm4_cpu_pm_unregister();
-
- cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
- if (hp_online)
- cpuhp_remove_state_nocalls(hp_online);
- }
+ etm4_pm_clear();
return ret;
}
--
2.27.0
While e3a3c3a20555 ("UIO: fix uio_pdrv_genirq with device tree but no
interrupt") added support for using uio_pdrv_genirq for devices without
interrupt for device tree platforms, the removal of uio_pdrv in
26dac3c49d56 ("uio: Remove uio_pdrv and use uio_pdrv_genirq instead")
broke the support for non device tree platforms.
This change fixes this, so that uio_pdrv_genirq can be used without
interrupt on all platforms.
This still leaves the support that uio_pdrv had for custom interrupt
handler lacking, as uio_pdrv_genirq does not handle it (yet).
Fixes: 26dac3c49d56 ("uio: Remove uio_pdrv and use uio_pdrv_genirq instead")
Signed-off-by: Esben Haabendal <esben(a)geanix.com>
Cc: stable(a)vger.kernel.org
---
drivers/uio/uio_pdrv_genirq.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/uio/uio_pdrv_genirq.c b/drivers/uio/uio_pdrv_genirq.c
index 1d69dd49c6d2..b60173bc93ce 100644
--- a/drivers/uio/uio_pdrv_genirq.c
+++ b/drivers/uio/uio_pdrv_genirq.c
@@ -161,7 +161,7 @@ static int uio_pdrv_genirq_probe(struct platform_device *pdev)
if (!uioinfo->irq) {
ret = platform_get_irq_optional(pdev, 0);
uioinfo->irq = ret;
- if (ret == -ENXIO && pdev->dev.of_node)
+ if (ret == -ENXIO)
uioinfo->irq = UIO_IRQ_NONE;
else if (ret == -EPROBE_DEFER)
return ret;
--
2.4.11
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512
I'm announcing the release of the 5.7.7 kernel.
All users of the 5.7 kernel series must upgrade.
The updated 5.7.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.7.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
Thanks,
Sasha
Makefile | 2 +-
arch/arm64/kernel/fpsimd.c | 25 ++++++++++++++++++-------
drivers/nvme/host/core.c | 8 --------
drivers/nvme/host/multipath.c | 36 ++++++++++++++++++++++++++----------
drivers/nvme/host/nvme.h | 2 ++
drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
6 files changed, 61 insertions(+), 28 deletions(-)
Aaron Plattner (1):
ALSA: hda: Add NVIDIA codec IDs 9a & 9d through a0 to patch table
Adam Ford (1):
drm/panel-simple: fix connector type for LogicPD Type28 Display
Aditya Pakki (3):
rocker: fix incorrect error handling in dma_rings_init
RDMA/rvt: Fix potential memory leak caused by rvt_alloc_rq
test_objagg: Fix potential memory leak in error handling
Al Cooper (1):
xhci: Fix enumeration issue when setting max packet size for FS devices.
Al Viro (1):
fix a braino in "sparc32: fix register window handling in genregs32_[gs]et()"
Alexander Lobakin (10):
net: ethtool: add missing string for NETIF_F_GSO_TUNNEL_REMCSUM
net: ethtool: add missing NETIF_F_GSO_FRAGLIST feature string
net: qed: fix left elements count calculation
net: qed: fix async event callbacks unregistering
net: qede: stop adding events on an already destroyed workqueue
net: qed: fix NVMe login fails over VFs
net: qed: fix excessive QM ILT lines consumption
net: qede: fix PTP initialization on recovery
net: qede: fix use-after-free on recovery and AER handling
net: qed: reset ILT block sizes before recomputing to fix crashes
Alexander Usyskin (1):
mei: me: add tiger lake point device ids for H platforms.
Anand Moon (1):
Revert "usb: dwc3: exynos: Add support for Exynos5422 suspend clk"
Anson Huang (1):
soc: imx8m: Correct i.MX8MP UID fuse offset
Anton Eidelman (2):
nvme-multipath: fix deadlock between ana_work and scan_work
nvme-multipath: fix deadlock due to head->lock
Ard Biesheuvel (1):
net: phy: mscc: avoid skcipher API for single block AES encryption
Arseny Solokha (1):
powerpc/fsl_booke/32: Fix build with CONFIG_RANDOMIZE_BASE
Arvind Sankar (1):
efi/x86: Setup stack correctly for efi_pe_entry
Babu Moger (1):
x86/resctrl: Fix memory bandwidth counter width for AMD
Ben Widawsky (1):
mm/memory_hotplug.c: fix false softlockup during pfn range removal
Bernard Zhao (1):
drm/amd: fix potential memleak in err branch
Borislav Petkov (1):
EDAC/amd64: Read back the scrub rate PCI register on F15h
Chaitanya Kulkarni (1):
nvmet: fail outstanding host posted AEN req
Charles Keepax (1):
regmap: Fix memory leak from regmap_register_patch
Christoffer Nielsen (1):
ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Flight S
Christopher Swenson (1):
ALSA: usb-audio: Set 48 kHz rate for Rodecaster
Chuck Lever (2):
SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment()
xprtrdma: Fix handling of RDMA_ERROR replies
Chuhong Yuan (1):
USB: ohci-sm501: Add missed iounmap() in remove
Claudiu Beznea (3):
net: macb: undo operations in case of failure
net: macb: call pm_runtime_put_sync on failure path
net: macb: free resources on failure path of at91ether_open()
Claudiu Manoil (1):
enetc: Fix tx rings bitmap iteration range, irq handling
Colin Ian King (1):
qed: add missing error test for DBG_STATUS_NO_MATCHING_FRAMING_MODE
Cong Wang (1):
genetlink: clean up family attributes allocations
Dan Carpenter (3):
x86/resctrl: Fix a NULL vs IS_ERR() static checker warning in rdt_cdp_peer_get()
usb: gadget: udc: Potential Oops in error handling code
Staging: rtl8723bs: prevent buffer overflow in update_sta_support_rate()
Daniel Gomez (1):
drm: rcar-du: Fix build error
Daniel Vetter (1):
drm/fb-helper: Fix vt restore
Dave Martin (1):
arm64/sve: Eliminate data races on sve_default_vl
David Christensen (1):
tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes
David Howells (3):
rxrpc: Fix notification call on completion of discarded calls
rxrpc: Fix handling of rwind from an ACK packet
afs: Fix storage of cell names
David Milburn (1):
nvmet: cleanups the loop in nvmet_async_events_process
David Rientjes (3):
dma-direct: re-encrypt memory if dma_direct_alloc_pages() fails
dma-direct: check return value when encrypting or decrypting memory
dma-direct: add missing set_memory_decrypted() for coherent mapping
Dejin Zheng (1):
net: phy: smsc: fix printing too many logs
Denis Efremov (2):
drm/amd/display: Use kfree() to free rgb_user in calculate_user_regamma_ramp()
drm/radeon: fix fb_div check in ni_init_smc_spll_table()
Denis Kirjanov (1):
tcp: don't ignore ECN CWR on pure ACK
Dennis Dalessandro (1):
IB/hfi1: Fix module use count flaw due to leftover module put calls
Dinghao Liu (1):
hwrng: ks-sa - Fix runtime PM imbalance on error
Dmitry Baryshkov (1):
pinctrl: qcom: spmi-gpio: fix warning about irq chip reusage
Doug Berger (1):
net: bcmgenet: use hardware padding of runt frames
Drew Fustini (1):
ARM: dts: am335x-pocketbeagle: Fix mmc0 Write Protect
Eddie James (1):
i2c: fsi: Fix the port number field in status register
Eric Dumazet (2):
net: increment xmit_recursion level in dev_direct_xmit()
tcp: grow window for OOO packets only for SACK flows
Fabian Vogt (1):
efi/tpm: Verify event log header before parsing
Fan Guo (1):
RDMA/mad: Fix possible memory leak in ib_mad_post_receive_mads()
Filipe Manana (7):
btrfs: fix a block group ref counter leak after failure to remove block group
btrfs: fix bytes_may_use underflow when running balance and scrub in parallel
btrfs: fix data block group relocation failure due to concurrent scrub
btrfs: check if a log root exists before locking the log_mutex on unlink
btrfs: fix hang on snapshot creation after RWF_NOWAIT write
btrfs: fix failure of RWF_NOWAIT write into prealloc extent beyond eof
btrfs: fix RWF_NOWAIT write not failling when we need to cow
Florian Fainelli (3):
net: phy: Check harder for errors in get_phy_id()
of: of_mdio: Correct loop scanning logic
net: dsa: bcm_sf2: Fix node reference count
Frieder Schrempf (2):
ARM: dts: imx6ul-kontron: Move watchdog from Kontron i.MX6UL/ULL board to SoM
ARM: dts: imx6ul-kontron: Change WDOG_ANY signal from push-pull to open-drain
Gal Pressman (1):
RDMA/efa: Set maximum pkeys device attribute
Gao Xiang (1):
erofs: fix partially uninitialized misuse in z_erofs_onlinepage_fixup
Gaurav Singh (2):
ethtool: Fix check in ethtool_rx_flow_rule_create
bpf, xdp, samples: Fix null pointer dereference in *_user code
Geliang Tang (1):
mptcp: drop sndr_key in mptcp_syn_options
Harish (1):
selftests/powerpc: Fix build failure in ebb tests
Heikki Krogerus (1):
usb: typec: mux: intel_pmc_mux: Fix DP alternate mode entry
Heiner Kallweit (1):
r8169: fix firmware not resetting tp->ocp_base
Huaisheng Ye (1):
dm writecache: correct uncommitted_block when discarding uncommitted entry
Huy Nguyen (1):
xfrm: Fix double ESP trailer insertion in IPsec crypto offload.
Ido Schimmel (1):
mlxsw: spectrum: Do not rely on machine endianness
Igor Mammedov (1):
kvm: lapic: fix broken vcpu hotplug
Ilya Ponetayev (1):
sch_cake: don't try to reallocate or unshare skb unconditionally
Jason A. Donenfeld (5):
wireguard: device: avoid circular netns references
wireguard: receive: account for napi_gro_receive never returning GRO_DROP
socionext: account for napi_gro_receive never returning GRO_DROP
wil6210: account for napi_gro_receive never returning GRO_DROP
ACPI: configfs: Disallow loading ACPI tables when locked down
Jeremy Kerr (1):
net: usb: ax88179_178a: fix packet alignment padding
Jiping Ma (1):
arm64: perf: Report the PC value in REGS_ABI_32 mode
Jiri Slaby (1):
syscalls: Fix offset type of ksys_ftruncate()
Joakim Tjernlund (1):
cdc-acm: Add DISABLE_ECHO quirk for Microchip/SMSC chip
Johannes Weiner (1):
mm: memcontrol: handle div0 crash race condition in memory.low
John van der Kamp (1):
drm/amdgpu/display: Unlock mutex on error
Julian Wiedmann (1):
s390/qeth: fix error handling for isolation mode cmds
Junxiao Bi (4):
ocfs2: avoid inode removal while nfsd is accessing it
ocfs2: load global_inode_alloc
ocfs2: fix value of OCFS2_INVALID_SLOT
ocfs2: fix panic on nfs server over ocfs2
Juri Lelli (2):
sched/deadline: Initialize ->dl_boosted
sched/core: Fix PI boosting between RT and DEADLINE tasks
Kai-Heng Feng (3):
xhci: Poll for U0 after disabling USB2 LPM
xhci: Return if xHCI doesn't support LPM
ALSA: hda/realtek: Add mute LED and micmute LED support for HP systems
Kees Cook (1):
x86/cpu: Use pinning mask for CR4 bits needing to be 0
Keith Busch (1):
nvme-multipath: set bdi capabilities once
Krzysztof Kozlowski (1):
spi: spi-fsl-dspi: Free DMA memory with matching function
Laurence Tratt (1):
ALSA: usb-audio: Add implicit feedback quirk for SSL2+.
Leon Romanovsky (1):
RDMA/core: Check that type_attrs is not NULL prior access
Li Jun (1):
usb: typec: tcpci_rt1711h: avoid screaming irq causing boot hangs
Longfang Liu (1):
USB: ehci: reopen solution for Synopsys HC bug
Lorenzo Bianconi (2):
openvswitch: take into account de-fragmentation/gso_size in execute_check_pkt_len
samples/bpf: xdp_redirect_cpu: Set MAX_CPUS according to NR_CPUS
Lu Baolu (3):
iommu/vt-d: Set U/S bit in first level page table by default
iommu/vt-d: Enable PCI ACS for platform opt in hint
iommu/vt-d: Update scalable mode paging structure coherency
Luis Chamberlain (1):
blktrace: break out of blktrace setup on concurrent calls
Macpaul Lin (2):
usb: host: xhci-mtk: avoid runtime suspend when removing hcd
ALSA: usb-audio: add quirk for Samsung USBC Headset (AKG)
Mans Rullgard (1):
i2c: core: check returned size of emulated smbus block read
Marcelo Ricardo Leitner (1):
sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket
Mark Zhang (1):
RDMA/cma: Protect bind_list and listen_list while finding matching cm id
Martin (1):
bareudp: Fixed multiproto mode configuration
Martin Fuzzey (1):
regulator: da9063: fix LDO9 suspend and warning.
Masahiro Yamada (1):
kbuild: improve cc-option to clean up all temporary files
Masami Hiramatsu (2):
kprobes: Suppress the suspicious RCU warning on kprobes
tracing: Fix event trigger to accept redundant spaces
Mathias Nyman (1):
xhci: Fix incorrect EP_STATE_MASK
Matt Fleming (1):
x86/asm/64: Align start of __clear_user() loop to 16-bytes
Matthew Hagan (3):
ARM: bcm: Select ARM_TIMER_SP804 for ARCH_BCM_NSP
ARM: dts: NSP: Disable PL330 by default, add dma-coherent property
ARM: dts: NSP: Correct FA2 mailbox node
Mauricio Faria de Oliveira (1):
bcache: check and adjust logical block size for backing devices
Michael Chan (3):
bnxt_en: Store the running firmware version code.
bnxt_en: Do not enable legacy TX push on older firmware.
bnxt_en: Fix statistics counters issue during ifdown with older firmware.
Michal Kalderon (1):
RDMA/qedr: Fix KASAN: use-after-free in ucma_event_handler+0x532
Mikulas Patocka (1):
dm writecache: add cond_resched to loop in persistent_memory_claim()
Minas Harutyunyan (1):
usb: dwc2: Postponed gadget registration to the udc class driver
Muchun Song (1):
mm/memcontrol.c: add missed css_put()
Nathan Chancellor (2):
s390/vdso: Use $(LD) instead of $(CC) to link vDSO
ACPI: sysfs: Fix pm_profile_attr type
Nathan Huckleberry (1):
riscv/atomic: Fix sign extension for RV64I
Navid Emamdoost (1):
sata_rcar: handle pm_runtime_get_sync failure cases
Neal Cardwell (2):
tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT
bpf: tcp: bpf_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT
Olga Kornievskaia (1):
NFSv4 fix CLOSE not waiting for direct IO compeletion
Oskar Holmlund (2):
ARM: dts: Fix am33xx.dtsi USB ranges length
ARM: dts: Fix am33xx.dtsi ti,sysc-mask wrong softreset flag
Pavel Begunkov (1):
io_uring: fix hanging iopoll in case of -EAGAIN
Peter Chen (3):
usb: cdns3: trace: using correct dir value
usb: cdns3: ep0: fix the test mode set incorrectly
usb: cdns3: ep0: add spinlock for cdns3_check_new_setup
Philipp Fent (1):
efi/libstub: Fix path separator regression
Pierre-Louis Bossart (1):
ASoC: soc-pcm: fix checks for multi-cpu FE dailinks
Qiushi Wu (2):
efi/esrt: Fix reference count leak in esre_create_sysfs_entry.
ASoC: rockchip: Fix a reference count leak.
Rafał Miłecki (1):
ARM: dts: BCM5301X: Add missing memory "device_type" for Luxul XWC-2000
Rahul Lakkireddy (2):
cxgb4: move handling L2T ARP failures to caller
cxgb4: move PTP lock and unlock to caller in Tx path
Reinette Chatre (2):
x86/cpu: Move resctrl CPUID code to resctrl/
x86/resctrl: Support CPUID enumeration of MBM counter width
Robin Gong (3):
regualtor: pfuze100: correct sw1a/sw2 on pfuze3000
arm64: dts: imx8mm-evk: correct ldo1/ldo2 voltage range
arm64: dts: imx8mn-ddr4-evk: correct ldo1/ldo2 voltage range
Roman Bolshakov (1):
scsi: qla2xxx: Keep initiator ports after RSCN
Russell King (3):
net: phylink: fix ethtool -A with attached PHYs
net: phylink: ensure manual pause mode configuration takes effect
netfilter: ipset: fix unaligned atomic access
Sabrina Dubroca (1):
geneve: allow changing DF behavior after creation
Sagi Grimberg (2):
nvme: fix possible deadlock when I/O is blocked
nvme: don't protect ns mutation with ns->head->lock
Sami Tolvanen (1):
recordmcount: support >64k sections
Sascha Ortmann (1):
tracing/boottime: Fix kprobe multiple events
Sasha Levin (1):
Linux 5.7.7-rc1
Sean Christopherson (3):
KVM: nVMX: Plumb L2 GPA through to PML emulation
KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL
x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup
SeongJae Park (1):
scsi: lpfc: Avoid another null dereference in lpfc_sli4_hba_unset()
Shannon Nelson (2):
ionic: update the queue count on open
ionic: tame the watchdog timer on reconfig
Shay Drory (1):
IB/mad: Fix use after free when destroying MAD agent
Shengjiu Wang (1):
ASoC: fsl_ssi: Fix bclk calculation for mono channel
Srinivas Kandagatla (3):
ASoC: q6asm: handle EOS correctly
ASoc: q6afe: add support to get port direction
ASoC: qcom: common: set correct directions for dailinks
Stanislav Fomichev (1):
bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE
Steffen Maier (1):
scsi: zfcp: Fix panic on ERP timeout for previously dismissed ERP action
Steven Rostedt (VMware) (1):
ring-buffer: Zero out time extend if it is nested and not absolute
Stylon Wang (1):
drm/amd/display: Enable output_bpc property on all outputs
Sven Auhagen (1):
mvpp2: ethtool rxtx stats fix
Sven Schnelle (4):
s390/seccomp: pass syscall arguments via seccomp_data
s390/ptrace: return -ENOSYS when invalid syscall is supplied
s390/ptrace: pass invalid syscall numbers to tracing
s390/ptrace: fix setting syscall number
Taehee Yoo (3):
net: core: reduce recursion limit value
ip6_gre: fix use-after-free in ip6gre_tunnel_lookup()
ip_tunnel: fix use-after-free in ip_tunnel_lookup()
Takashi Iwai (3):
ALSA: usb-audio: Fix potential use-after-free of streams
ALSA: usb-audio: Fix OOB access of mixer element list
ALSA: hda/realtek - Add quirk for MSI GE63 laptop
Tang Bin (1):
usb: host: ehci-exynos: Fix error check in exynos_ehci_probe()
Tariq Toukan (1):
net: Do not clear the sock TX queue in sk_set_socket()
Thierry Reding (1):
Revert "i2c: tegra: Fix suspending in active runtime PM state"
Thomas Falcon (2):
ibmveth: Fix max MTU limit
ibmvnic: Harden device login requests
Thomas Martitz (1):
net: bridge: enfore alignment for ethernet address
Todd Kjos (1):
binder: fix null deref of proc->context
Toke Høiland-Jørgensen (3):
sch_cake: don't call diffserv parsing code when it is not needed
sch_cake: fix a few style nits
devmap: Use bpf_map_area_alloc() for allocating hash buckets
Tom Seewald (1):
RDMA/siw: Fix pointer-to-int-cast warning in siw_rx_pbl()
Tomas Winkler (1):
mei: me: disable mei interface on Mehlow server platforms
Tomasz Meresiński (1):
usb: add USB_QUIRK_DELAY_INIT for Logitech C922
Tomi Valkeinen (1):
drm/panel-simple: fix connector type for newhaven_nhd_43_480272ef_atxl
Tony Lindgren (6):
bus: ti-sysc: Flush posted write on enable and disable
bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit
bus: ti-sysc: Ignore clockactivity unless specified as a quirk
bus: ti-sysc: Fix uninitialized framedonetv_irq
ARM: OMAP2+: Fix legacy mode dss_reset
ARM: dts: Fix duovero smsc interrupt for suspend
Trond Myklebust (1):
pNFS/flexfiles: Fix list corruption if the mirror count changes
Vasily Averin (1):
sunrpc: fixed rollback in rpc_gssd_dummy_populate()
Vasundhara Volam (1):
bnxt_en: Read VPD info only for PFs
Vidya Sagar (1):
pinctrl: tegra: Use noirq suspend/resume callbacks
Vincent Chen (1):
clk: sifive: allocate sufficient memory for struct __prci_data
Vincent Guittot (1):
sched/cfs: change initial value of runnable_avg
Vincenzo Frascino (1):
s390/vdso: fix vDSO clock_getres()
Vishal Verma (1):
nvdimm/region: always show the 'align' attribute
Vitaly Kuznetsov (1):
Revert "KVM: VMX: Micro-optimize vmexit time when not exposing PMU"
Vlastimil Babka (1):
mm, compaction: make capture control handling safe wrt interrupts
Waiman Long (2):
mm, slab: fix sign conversion problem in memcg_uncharge_slab()
mm/slab: use memzero_explicit() in kzfree()
Wang Hai (1):
mld: fix memory leak in ipv6_mc_destroy_dev()
Wei Yongjun (1):
mptcp: fix memory leak in mptcp_subflow_create_socket()
Weiping Zhang (1):
block: update hctx map when use multiple maps
Wenhui Sheng (1):
drm/amdgpu: add fw release for sdma v5_0
Will Deacon (1):
arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n
Willem de Bruijn (1):
selftests/net: report etf errors correctly
Xiaoyao Li (1):
KVM: X86: Fix MSR range of APIC registers in X2APIC mode
Xiyu Yang (1):
cifs: Fix cached_fid refcnt leak in open_shroot
Yang Yingliang (1):
net: fix memleak in register_netdevice()
Yash Shah (1):
RISC-V: Don't allow write+exec only page mapping request in mmap
Ye Bin (1):
ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function
Yick W. Tse (1):
ALSA: usb-audio: add quirk for Denon DCD-1500RE
Yoshihiro Shimoda (1):
usb: renesas_usbhs: getting residue from callback_result
Zekun Shen (1):
net: alx: fix race condition in alx_remove
Zhang Xiaoxu (2):
cifs/smb3: Fix data inconsistent when punch hole
cifs/smb3: Fix data inconsistent when zero file range
Zheng Bin (1):
loop: replace kill_bdev with invalidate_bdev
guodeqing (1):
net: Fix the arp error in some cases
yu kuai (2):
block/bio-integrity: don't free 'buf' if bio_integrity_add_page() failed
ARM: imx5: add missing put_device() call in imx_suspend_alloc_ocram()
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCgAdFiEE4n5dijQDou9mhzu83qZv95d3LNwFAl78lGQACgkQ3qZv95d3
LNw59Q//XtCa8cJn1j0kdhLbwvPmHwwF+gZOjNKm/7phhYGdAFyVi3OSO5qd+s0V
JmeU+ycDrQ5xkNNA4nZR3oToqIUueg+JYFg4UytPzjuGO2kBFsgSRh2kQxjGyIvh
JZeiuPvUje7hk5DT95amKupWchUJV25vrCCBGWo+nFQnhbofQemOXIfdP0qjlhOQ
pJB2bDGGZdVIratvsvWVttuGwyTpflgvebnyY3NpFjaGHP2jQ2l/ghjooNl7/OlM
C4JDpEEiY/KEsx0w3mCYVHY/Twbv+gEXk49uhVHc0BiIcH6kg8zLNx7XJqWjNxuB
wcks5v4CJ+CK54FRmEnAHtWA6vR6TvwYixRpALbnvZlcQIUQqVY796ilTe7qFmjk
V2VfpV0EwuthKTd1I+klAlnbHCFwlVuylwNJn425SfQLHJP3sq7Ptsp2dzIu02yt
0fWgMtF/s4wTVzFU5j+E+U7Dulxp3llIGXhdMQpfo0OoOvddGeorZt3MjbbhNOo7
59MBYw3WJeEoRyL1d7uWyqdlYU4d9/c9Xd94aGFsKM8p6K7kMp2S/Pqqyb0rEdTN
OQBrS/UVYjczn+8BpG2exzz4d56kSUYbGaB2yAklv9pTWjwqThiximozIdP51VOY
MyGYhtY0X8R5JdNlJnbv+gSQ9PTZJNjplMmZSQqHudNTVSJgAC0=
=X90X
-----END PGP SIGNATURE-----
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: b8fba93561c9 - Revert "tty: hvc: Fix data abort due to race in hvc_open"
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - DaCapo Benchmark Suite
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ❌ Storage blktests
Host 4:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ❌ Networking firewall: basic netfilter test
🚧 ❌ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
x86_64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ❌ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ power-management: cpupower/sanity test
🚧 ✅ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Since 5.7, we've been using task_work to trigger async running of
requests in the context of the original task. This generally works
great, but there's a case where if the task is currently blocked
in the kernel waiting on a condition to become true, it won't process
task_work. Even though the task is woken, it just checks whatever
condition it's waiting on, and goes back to sleep if it's still false.
This is a problem if that very condition only becomes true when that
task_work is run. An example of that is the task registering an eventfd
with io_uring, and it's now blocked waiting on an eventfd read. That
read could depend on a completion event, and that completion event
won't get trigged until task_work has been run.
Use the TWA_SIGNAL notification for task_work, so that we ensure that
the task always runs the work when queued.
Cc: stable(a)vger.kernel.org # v5.7
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
fs/io_uring.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e507737f044e..476f03b42777 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4072,6 +4072,23 @@ struct io_poll_table {
int error;
};
+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
+ int notify)
+{
+ const bool is_sqthread = (req->ctx->flags & IORING_SETUP_SQPOLL) != 0;
+ struct task_struct *tsk = req->task;
+ int ret;
+
+ if (is_sqthread)
+ notify = 0;
+
+ ret = task_work_add(tsk, cb, notify);
+
+ if (!ret && is_sqthread)
+ wake_up_process(tsk);
+ return ret;
+}
+
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, task_work_func_t func)
{
@@ -4095,13 +4112,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
* of executing it. We can't safely execute it anyway, as we may not
* have the needed state needed for it anyway.
*/
- ret = task_work_add(tsk, &req->task_work, true);
+ ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
if (unlikely(ret)) {
WRITE_ONCE(poll->canceled, true);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, true);
+ task_work_add(tsk, &req->task_work, 0);
+ wake_up_process(tsk);
}
- wake_up_process(tsk);
return 1;
}
@@ -6182,15 +6199,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
do {
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
+ /* make sure we run task_work before checking for signals */
if (current->task_works)
task_work_run();
- if (io_should_wake(&iowq, false))
- break;
- schedule();
if (signal_pending(current)) {
ret = -EINTR;
break;
}
+ if (io_should_wake(&iowq, false))
+ break;
+ schedule();
} while (1);
finish_wait(&ctx->wait, &iowq.wq);
--
2.27.0
Controller ID's (cntlid) for NVMe devices were introduced in version
1.1.0 of the specification. Controllers that follow the older 1.0.0 spec
don't set this field so it doesn't make sense to validate it. On the
contrary, when using SR-IOV this check breaks VFs as they are all part
of the same NVMe subsystem.
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
Cc: <stable(a)vger.kernel.org> # 5.4+
---
drivers/nvme/host/core.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 28f4388c1337..c4a991acc949 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2773,7 +2773,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
put_device(&subsys->dev);
subsys = found;
- if (!nvme_validate_cntlid(subsys, ctrl, id)) {
+ if (ctrl->vs >= NVME_VS(1, 1, 0) &&
+ !nvme_validate_cntlid(subsys, ctrl, id)) {
ret = -EINVAL;
goto out_put_subsystem;
}
@@ -2883,7 +2884,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
goto out_free;
}
- if (!(ctrl->ops->flags & NVME_F_FABRICS))
+ if (!(ctrl->ops->flags & NVME_F_FABRICS) && ctrl->vs >= NVME_VS(1, 1, 0))
ctrl->cntlid = le16_to_cpu(id->cntlid);
if (!ctrl->identified) {
--
2.16.6
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
With the recent addition of filesystem checksum types other than CRC32c,
it is not anymore hard-coded which checksum type a btrfs filesystem uses.
Up to now there is no good way to read the filesystem checksum, apart from
reading the filesystem UUID and then query sysfs for the checksum type.
Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl
command which usually is used to query filesystem features. Also add a
flags member indicating that the kernel responded with a set csum_type and
csum_size field.
For compatibility reasons, only return the csum_type and csum_size if the
BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE flag was passed to the kernel. Also
clear any unknown flags so we don't pass false positives to user-space
newer than the kernel.
To simplify further additions to the ioctl, also switch the padding to a
u8 array. Pahole was used to verify the result of this switch:
pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
struct btrfs_ioctl_fs_info_args {
__u64 max_id; /* 0 8 */
__u64 num_devices; /* 8 8 */
__u8 fsid[16]; /* 16 16 */
__u32 nodesize; /* 32 4 */
__u32 sectorsize; /* 36 4 */
__u32 clone_alignment; /* 40 4 */
__u32 flags; /* 44 4 */
__u16 csum_type; /* 48 2 */
__u16 csum_size; /* 50 2 */
__u8 reserved[972]; /* 52 972 */
/* size: 1024, cachelines: 16, members: 10 */
};
Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
CC: stable(a)vger.kernel.org # 5.5+
Signed-off-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
---
Changes to v3:
* make flags in/out (David)
* make csum return opt-in (Hans)
Changes to v2:
* add additional csum_size (David)
* rename flag value to BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE to reflect
additional size
Changes to v1:
* add 'out' comment to be consistent (Hans)
* remove le16_to_cpu() (kbuild robot)
* switch padding to be all u8 (David)
---
fs/btrfs/ioctl.c | 16 +++++++++++++---
include/uapi/linux/btrfs.h | 14 ++++++++++++--
2 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b3e4c632d80c..4d70b918f656 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3198,11 +3198,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ u32 inflags;
int ret = 0;
- fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
- if (!fi_args)
- return -ENOMEM;
+ fi_args = memdup_user(arg, sizeof(*fi_args));
+ if (IS_ERR(fi_args))
+ return PTR_ERR(fi_args);
+
+ inflags = fi_args->flags;
+ fi_args->flags = 0;
rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices;
@@ -3218,6 +3222,12 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
+ if (inflags & BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE) {
+ fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
+ fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE;
+ }
+
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index e6b6cb0f8bc6..c130eaea416e 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -250,10 +250,20 @@ struct btrfs_ioctl_fs_info_args {
__u32 nodesize; /* out */
__u32 sectorsize; /* out */
__u32 clone_alignment; /* out */
- __u32 reserved32;
- __u64 reserved[122]; /* pad to 1k */
+ __u32 flags; /* in/out */
+ __u16 csum_type; /* out */
+ __u16 csum_size; /* out */
+ __u8 reserved[972]; /* pad to 1k */
};
+/*
+ * fs_info ioctl flags
+ *
+ * Used by:
+ * struct btrfs_ioctl_fs_info_args
+ */
+#define BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE (1 << 0)
+
/*
* feature flags
*
--
2.26.2
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 009bce1df0bb5eb970b9eb98d963861f7fe353c7
Gitweb: https://git.kernel.org/tip/009bce1df0bb5eb970b9eb98d963861f7fe353c7
Author: Sean Christopherson <sean.j.christopherson(a)intel.com>
AuthorDate: Fri, 05 Jun 2020 12:26:05 -07:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 30 Jun 2020 14:09:31 +02:00
x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted
Choo! Choo! All aboard the Split Lock Express, with direct service to
Wreckage!
Skip split_lock_verify_msr() if the CPU isn't whitelisted as a possible
SLD-enabled CPU model to avoid writing MSR_TEST_CTRL. MSR_TEST_CTRL
exists, and is writable, on many generations of CPUs. Writing the MSR,
even with '0', can result in bizarre, undocumented behavior.
This fixes a crash on Haswell when resuming from suspend with a live KVM
guest. Because APs use the standard SMP boot flow for resume, they will
go through split_lock_init() and the subsequent RDMSR/WRMSR sequence,
which runs even when sld_state==sld_off to ensure SLD is disabled. On
Haswell (at least, my Haswell), writing MSR_TEST_CTRL with '0' will
succeed and _may_ take the SMT _sibling_ out of VMX root mode.
When KVM has an active guest, KVM performs VMXON as part of CPU onlining
(see kvm_starting_cpu()). Because SMP boot is serialized, the resulting
flow is effectively:
on_each_ap_cpu() {
WRMSR(MSR_TEST_CTRL, 0)
VMXON
}
As a result, the WRMSR can disable VMX on a different CPU that has
already done VMXON. This ultimately results in a #UD on VMPTRLD when
KVM regains control and attempt run its vCPUs.
The above voodoo was confirmed by reworking KVM's VMXON flow to write
MSR_TEST_CTRL prior to VMXON, and to serialize the sequence as above.
Further verification of the insanity was done by redoing VMXON on all
APs after the initial WRMSR->VMXON sequence. The additional VMXON,
which should VM-Fail, occasionally succeeded, and also eliminated the
unexpected #UD on VMPTRLD.
The damage done by writing MSR_TEST_CTRL doesn't appear to be limited
to VMX, e.g. after suspend with an active KVM guest, subsequent reboots
almost always hang (even when fudging VMXON), a #UD on a random Jcc was
observed, suspend/resume stability is qualitatively poor, and so on and
so forth.
kernel BUG at arch/x86/kvm/x86.c:386!
CPU: 1 PID: 2592 Comm: CPU 6/KVM Tainted: G D
Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014
RIP: 0010:kvm_spurious_fault+0xf/0x20
Call Trace:
vmx_vcpu_load_vmcs+0x1fb/0x2b0
vmx_vcpu_load+0x3e/0x160
kvm_arch_vcpu_load+0x48/0x260
finish_task_switch+0x140/0x260
__schedule+0x460/0x720
_cond_resched+0x2d/0x40
kvm_arch_vcpu_ioctl_run+0x82e/0x1ca0
kvm_vcpu_ioctl+0x363/0x5c0
ksys_ioctl+0x88/0xa0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x4c/0x170
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Fixes: dbaba47085b0c ("x86/split_lock: Rework the initialization flow of split lock detection")
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20200605192605.7439-1-sean.j.christopherson@intel…
---
arch/x86/kernel/cpu/intel.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c25a67a..0ab48f1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -50,6 +50,13 @@ static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;
/*
+ * With a name like MSR_TEST_CTL it should go without saying, but don't touch
+ * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
+ * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
+ */
+static bool cpu_model_supports_sld __ro_after_init;
+
+/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
* CPU models in which having conflicting memory types still leads to
@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on)
static void split_lock_init(void)
{
- split_lock_verify_msr(sld_state != sld_off);
+ if (cpu_model_supports_sld)
+ split_lock_verify_msr(sld_state != sld_off);
}
static void split_lock_warn(unsigned long ip)
@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
return;
}
+ cpu_model_supports_sld = true;
split_lock_setup();
}
GENMASK and it's callees conduct checking to ensure the passed
parameters are valid. One of those checks is for '< 0'. So if an
unsigned value is passed, in an invalid comparison takes place.
Judging from the current code, it looks as though 'unsigned int'
is the correct type to use, so simply cast these small values
with no chance of being false negative to signed int for
comparison/error checking purposes.
Squashes the following W=1 warnings:
In file included from /home/lee/projects/linux/kernel/include/linux/bits.h:23,
from /home/lee/projects/linux/kernel/include/linux/bitops.h:5,
from /home/lee/projects/linux/kernel/include/linux/kernel.h:12,
from /home/lee/projects/linux/kernel/include/linux/mfd/syscon/atmel-smc.h:14,
from /home/lee/projects/linux/kernel/drivers/mfd/atmel-smc.c:11:
/home/lee/projects/linux/kernel/drivers/mfd/atmel-smc.c: In function ‘atmel_smc_cs_encode_ncycles’:
/home/lee/projects/linux/kernel/include/linux/bits.h:26:28: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 | __builtin_constant_p((l) > (h)), (l) > (h), 0)))
| ^
/home/lee/projects/linux/kernel/include/linux/build_bug.h:16:62: note: in definition of macro ‘BUILD_BUG_ON_ZERO’
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
| ^
/home/lee/projects/linux/kernel/include/linux/bits.h:39:3: note: in expansion of macro ‘GENMASK_INPUT_CHECK’
39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
| ^~~~~~~~~~~~~~~~~~~
/home/lee/projects/linux/kernel/drivers/mfd/atmel-smc.c:49:25: note: in expansion of macro ‘GENMASK’
49 | unsigned int lsbmask = GENMASK(msbpos - 1, 0);
| ^~~~~~~
/home/lee/projects/linux/kernel/include/linux/bits.h:26:40: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 | __builtin_constant_p((l) > (h)), (l) > (h), 0)))
| ^
/home/lee/projects/linux/kernel/include/linux/build_bug.h:16:62: note: in definition of macro ‘BUILD_BUG_ON_ZERO’
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
| ^
/home/lee/projects/linux/kernel/include/linux/bits.h:39:3: note: in expansion of macro ‘GENMASK_INPUT_CHECK’
39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
| ^~~~~~~~~~~~~~~~~~~
Cc: <stable(a)vger.kernel.org>
Cc: Nicolas Ferre <nicolas.ferre(a)microchip.com>
Cc: Alexandre Belloni <alexandre.belloni(a)bootlin.com>
Cc: Ludovic Desroches <ludovic.desroches(a)microchip.com>
Cc: Boris Brezillon <boris.brezillon(a)free-electrons.com>
Signed-off-by: Lee Jones <lee.jones(a)linaro.org>
---
drivers/mfd/atmel-smc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/mfd/atmel-smc.c b/drivers/mfd/atmel-smc.c
index 1fa2ec950e7df..17bbe9d1fa740 100644
--- a/drivers/mfd/atmel-smc.c
+++ b/drivers/mfd/atmel-smc.c
@@ -46,8 +46,8 @@ static int atmel_smc_cs_encode_ncycles(unsigned int ncycles,
unsigned int msbfactor,
unsigned int *encodedval)
{
- unsigned int lsbmask = GENMASK(msbpos - 1, 0);
- unsigned int msbmask = GENMASK(msbwidth - 1, 0);
+ unsigned int lsbmask = GENMASK((int)msbpos - 1, 0);
+ unsigned int msbmask = GENMASK((int)msbwidth - 1, 0);
unsigned int msb, lsb;
int ret = 0;
--
2.25.1
plane->index is NOT the index of the color plane in a YUV frame.
Actually, a YUV frame is represented by a single drm_plane, even though
it contains three Y, U, V planes.
Cc: stable(a)vger.kernel.org # v5.3
Fixes: 90b86fcc47b4 ("DRM: Add KMS driver for the Ingenic JZ47xx SoCs")
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
---
Notes:
v2: No change
drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
index a15f9a1940c6..924c8daf071a 100644
--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
@@ -386,7 +386,7 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane,
addr = drm_fb_cma_get_gem_addr(state->fb, state, 0);
width = state->src_w >> 16;
height = state->src_h >> 16;
- cpp = state->fb->format->cpp[plane->index];
+ cpp = state->fb->format->cpp[0];
priv->dma_hwdesc->addr = addr;
priv->dma_hwdesc->cmd = width * height * cpp / 4;
--
2.27.0
Bit 8 would be the "global" bit, which does not quite make sense for non-leaf
page table entries. Intel ignores it; AMD ignores it in PDEs and PDPEs, but
reserves it in PML4Es.
Probably, earlier versions of the AMD manual documented it as reserved in PDPEs
as well, and that behavior made it into KVM as well as kvm-unit-tests; fix it.
Cc: stable(a)vger.kernel.org
Reported-by: Nadav Amit <namit(a)vmware.com>
Fixes: a0c0feb57992 ("KVM: x86: reserve bit 8 of non-leaf PDPEs and PML4Es in 64-bit mode on AMD", 2014-09-03)
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/mmu/mmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 76817d13c86e..6d6a0ae7800c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4449,7 +4449,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+ gbpages_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
--
2.26.2
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 005c34ae4b44f085120d7f371121ec7ded677761
Gitweb: https://git.kernel.org/tip/005c34ae4b44f085120d7f371121ec7ded677761
Author: Marc Zyngier <maz(a)kernel.org>
AuthorDate: Sun, 21 Jun 2020 14:43:15 +01:00
Committer: Marc Zyngier <maz(a)kernel.org>
CommitterDate: Sun, 21 Jun 2020 15:24:46 +01:00
irqchip/gic: Atomically update affinity
The GIC driver uses a RMW sequence to update the affinity, and
relies on the gic_lock_irqsave/gic_unlock_irqrestore sequences
to update it atomically.
But these sequences only expand into anything meaningful if
the BL_SWITCHER option is selected, which almost never happens.
It also turns out that using a RMW and locks is just as silly,
as the GIC distributor supports byte accesses for the GICD_TARGETRn
registers, which when used make the update atomic by definition.
Drop the terminally broken code and replace it by a byte write.
Fixes: 04c8b0f82c7d ("irqchip/gic: Make locking a BL_SWITCHER only feature")
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
---
drivers/irqchip/irq-gic.c | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 00de05a..c17fabd 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -329,10 +329,8 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
{
- void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
- unsigned int cpu, shift = (gic_irq(d) % 4) * 8;
- u32 val, mask, bit;
- unsigned long flags;
+ void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d);
+ unsigned int cpu;
if (!force)
cpu = cpumask_any_and(mask_val, cpu_online_mask);
@@ -342,13 +340,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
return -EINVAL;
- gic_lock_irqsave(flags);
- mask = 0xff << shift;
- bit = gic_cpu_map[cpu] << shift;
- val = readl_relaxed(reg) & ~mask;
- writel_relaxed(val | bit, reg);
- gic_unlock_irqrestore(flags);
-
+ writeb_relaxed(gic_cpu_map[cpu], reg);
irq_data_update_effective_affinity(d, cpumask_of(cpu));
return IRQ_SET_MASK_OK_DONE;
With the recent addition of filesystem checksum types other than CRC32c,
it is not anymore hard-coded which checksum type a btrfs filesystem uses.
Up to now there is no good way to read the filesystem checksum, apart from
reading the filesystem UUID and then query sysfs for the checksum type.
Add a new csum_type field to the BTRFS_IOC_FS_INFO ioctl command which
usually is used to query filesystem features. Also add a flags member
indicating that the kernel responded with a set csum_type field.
To simplify further additions to the ioctl, also switch the padding to a
u8 array. Pahole was used to verify the result of this switch:
pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
struct btrfs_ioctl_fs_info_args {
__u64 max_id; /* 0 8 */
__u64 num_devices; /* 8 8 */
__u8 fsid[16]; /* 16 16 */
__u32 nodesize; /* 32 4 */
__u32 sectorsize; /* 36 4 */
__u32 clone_alignment; /* 40 4 */
__u32 flags; /* 44 4 */
__u16 csum_type; /* 48 2 */
__u16 csum_size; /* 50 2 */
__u8 reserved[972]; /* 52 972 */
/* size: 1024, cachelines: 16, members: 10 */
};
Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
Cc: stable(a)vger.kernel.org
Signed-off-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
---
Changes to v2:
* add additional csum_size (David)
* rename flag value to BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE to reflect
additional size
Changes to v1:
* add 'out' comment to be consistent (Hans)
* remove le16_to_cpu() (kbuild robot)
* switch padding to be all u8 (David)
---
fs/btrfs/ioctl.c | 3 +++
include/uapi/linux/btrfs.h | 14 ++++++++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b3e4c632d80c..cfedcdf446c3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3217,6 +3217,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
fi_args->nodesize = fs_info->nodesize;
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
+ fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
+ fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE;
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index e6b6cb0f8bc6..2de3ef3c5c71 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -250,10 +250,20 @@ struct btrfs_ioctl_fs_info_args {
__u32 nodesize; /* out */
__u32 sectorsize; /* out */
__u32 clone_alignment; /* out */
- __u32 reserved32;
- __u64 reserved[122]; /* pad to 1k */
+ __u32 flags; /* out */
+ __u16 csum_type; /* out */
+ __u16 csum_size; /* out */
+ __u8 reserved[972]; /* pad to 1k */
};
+/*
+ * fs_info ioctl flags
+ *
+ * Used by:
+ * struct btrfs_ioctl_fs_info_args
+ */
+#define BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE (1 << 0)
+
/*
* feature flags
*
--
2.26.2
Hi,
The same kmod source code can be built on KASAN-disabled kernel
(5.7.0), but after enable it with CONFIG_KASAN=y, the kmod can't build
on the new installed KASAN-enabled kernel with below error message,
seems no relevant answers for this issue with google:
root@build-ws:/home/mm/slab# make
make -C /lib/modules/5.7.0/build M=/home/mm/slab modules
make[1]: Entering directory '/home/linux-5.7'
CC [M] /home/mm/slab/tap_slab.o
MODPOST 1 modules
ERROR: modpost: "__asan_register_globals" [/home/mm/slab/tap_slab.ko] undefined!
ERROR: modpost: "__asan_unregister_globals"
[/home/mm/slab/tap_slab.ko] undefined!
ERROR: modpost: "__asan_load8_noabort" [/home/mm/slab/tap_slab.ko] undefined!
scripts/Makefile.modpost:94: recipe for target '__modpost' failed
make[2]: *** [__modpost] Error 1
Makefile:1642: recipe for target 'modules' failed
make[1]: *** [modules] Error 2
make[1]: Leaving directory '/home/linux-5.7'
Makefile:6: recipe for target 'default' failed
make: *** [default] Error 2
===
Regards,
Richard
On Mon, Jun 29, 2020 at 04:28:05PM +0200, SeongJae Park wrote:
> Hello,
>
>
> With my little script, I found below commits in the mainline tree are more than
> 1 week old and fixing commits that back-ported in v5.4..v5.4.49, but not merged
> in the stable/linux-5.4.y tree. Are those need to be merged in but missed or
> dealyed?
>
> 9210c075cef2 ("nvme-pci: avoid race between nvme_reap_pending_cqes() and nvme_poll()")
> 9fecd13202f5 ("btrfs: fix a block group ref counter leak after failure to remove block group")
> 9d964e1b82d8 ("fix a braino in "sparc32: fix register window handling in genregs32_[gs]et()"")
> 8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
> 6e2f83884c09 ("bnxt_en: Fix AER reset logic on 57500 chips.")
> efb94790852a ("drm/panel-simple: fix connector type for LogicPD Type28 Display")
> ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
> ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
> 8dbe4c5d5e40 ("net: dsa: bcm_sf2: Fix node reference count")
> ca8826095e4d ("selftests/net: report etf errors correctly")
> 5a8d7f126c97 ("of: of_mdio: Correct loop scanning logic")
> d35d3660e065 ("binder: fix null deref of proc->context")
>
> The script found several more commits but I exclude those here, because those
> seems not applicable on 5.4.y or fixing trivial problems only. If I'm not
> following a proper process for this kind of reports, please let me know.
For commits that only have a "Fixes:" tag, and not a "cc: stable..."
tag, wait a few weeks, or a month, for us to catch up with them. We
usually get to them eventually, but it takes us a while as we have lots
more to deal with by developers and maintainers that are properly
tagging patches for this type of thing.
Some of the above commits are queued up already, but not all of them.
I'll take a look at the list after this next round of patches go out,
and will let you know.
And yes, we do want this type of list, it's greatly appreciated.
thanks,
greg k-h
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7b2377486767503d47265e4d487a63c651f6b55d Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1(a)huawei.com>
Date: Mon, 15 Jun 2020 11:33:23 +0800
Subject: [PATCH] dm zoned: assign max_io_len correctly
The unit of max_io_len is sector instead of byte (spotted through
code review), so fix it.
Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Reviewed-by: Damien Le Moal <damien.lemoal(a)wdc.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index a907a9446c0b..cf915009c306 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -890,7 +890,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
/* Set target (no write same support) */
- ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
+ ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_zeroes_bios = 1;
If wakeup event occurred by extcon event, it needs to call
ci_irq again since the first ci_irq calling at extcon notifier
only wakes up controller, but do noop for event handling,
it causes the extcon use case can't work well from low power mode.
Cc: <stable(a)vger.kernel.org>
Fixes: 3ecb3e09b042 ("usb: chipidea: Use extcon framework for VBUS and ID detect")
Reported-by: Philippe Schenker <philippe.schenker(a)toradex.com>
Tested-by: Philippe Schenker <philippe.schenker(a)toradex.com>
Signed-off-by: Peter Chen <peter.chen(a)nxp.com>
---
drivers/usb/chipidea/core.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index e8ce300ad490..9e10dcfeb98f 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -1313,6 +1313,29 @@ static void ci_controller_suspend(struct ci_hdrc *ci)
enable_irq(ci->irq);
}
+/*
+ * Handle the wakeup interrupt triggered by extcon connector
+ * We need to call ci_irq again for extcon since the first
+ * interrupt (wakeup int) only let the controller be out of
+ * low power mode, but not handle any interrupts.
+ */
+static void ci_extcon_wakeup_int(struct ci_hdrc *ci)
+{
+ struct ci_hdrc_cable *cable_id, *cable_vbus;
+ u32 otgsc = hw_read_otgsc(ci, ~0);
+
+ cable_id = &ci->platdata->id_extcon;
+ cable_vbus = &ci->platdata->vbus_extcon;
+
+ if (!IS_ERR(cable_id->edev) && ci->is_otg &&
+ (otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS))
+ ci_irq(ci->irq, ci);
+
+ if (!IS_ERR(cable_vbus->edev) && ci->is_otg &&
+ (otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS))
+ ci_irq(ci->irq, ci);
+}
+
static int ci_controller_resume(struct device *dev)
{
struct ci_hdrc *ci = dev_get_drvdata(dev);
@@ -1343,6 +1366,7 @@ static int ci_controller_resume(struct device *dev)
enable_irq(ci->irq);
if (ci_otg_is_fsm_mode(ci))
ci_otg_fsm_wakeup_by_srp(ci);
+ ci_extcon_wakeup_int(ci);
}
return 0;
--
2.17.1
The (struct __prci_data).hw_clks.hws is an array with dynamic elements.
Using struct_size(pd, hw_clks.hws, ARRAY_SIZE(__prci_init_clocks))
instead of sizeof(*pd) to get the correct memory size of
struct __prci_data for sifive/fu540-prci. After applying this
modifications, the kernel runs smoothly with CONFIG_SLAB_FREELIST_RANDOM
enabled on the HiFive unleashed board.
Fixes: 30b8e27e3b58 ("clk: sifive: add a driver for the SiFive FU540 PRCI IP block")
Cc: stable(a)vger.kernel.org
Signed-off-by: Vincent Chen <vincent.chen(a)sifive.com>
---
drivers/clk/sifive/fu540-prci.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/clk/sifive/fu540-prci.c b/drivers/clk/sifive/fu540-prci.c
index 6282ee2f361c..a8901f90a61a 100644
--- a/drivers/clk/sifive/fu540-prci.c
+++ b/drivers/clk/sifive/fu540-prci.c
@@ -586,7 +586,10 @@ static int sifive_fu540_prci_probe(struct platform_device *pdev)
struct __prci_data *pd;
int r;
- pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL);
+ pd = devm_kzalloc(dev,
+ struct_size(pd, hw_clks.hws,
+ ARRAY_SIZE(__prci_init_clocks)),
+ GFP_KERNEL);
if (!pd)
return -ENOMEM;
--
2.7.4
Hello,
With my little script, I found below commits in the mainline tree are more than
1 week old and fixing commits that back-ported in v5.4..v5.4.49 but not merged
in the stable/linux-5.4.y tree. Are those need to be merged in but missed or
dealyed?
9210c075cef2 ("nvme-pci: avoid race between nvme_reap_pending_cqes() and nvme_poll()")
9fecd13202f5 ("btrfs: fix a block group ref counter leak after failure to remove block group")
9d964e1b82d8 ("fix a braino in "sparc32: fix register window handling in genregs32_[gs]et()"")
8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
6e2f83884c09 ("bnxt_en: Fix AER reset logic on 57500 chips.")
efb94790852a ("drm/panel-simple: fix connector type for LogicPD Type28 Display")
ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
8dbe4c5d5e40 ("net: dsa: bcm_sf2: Fix node reference count")
ca8826095e4d ("selftests/net: report etf errors correctly")
5a8d7f126c97 ("of: of_mdio: Correct loop scanning logic")
d35d3660e065 ("binder: fix null deref of proc->context")
The script found several more commits but I exclude those here, because those
seems not applicable on 5.4.y or fixing trivial problems only. If I'm not
following a proper process for this kind of reports, please let me know.
Thanks,
SeongJae Park
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 243bce09c91b0145aeaedd5afba799d81841c030 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 25 Jun 2020 20:29:59 -0700
Subject: [PATCH] mm: fix swap cache node allocation mask
Chris Murphy reports that a slightly overcommitted load, testing swap
and zram along with i915, splats and keeps on splatting, when it had
better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208085
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 243bce09c91b0145aeaedd5afba799d81841c030 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 25 Jun 2020 20:29:59 -0700
Subject: [PATCH] mm: fix swap cache node allocation mask
Chris Murphy reports that a slightly overcommitted load, testing swap
and zram along with i915, splats and keeps on splatting, when it had
better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208085
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
This is a note to let you know that I've just added the patch titled
mei: bus: don't clean driver pointer
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From e852c2c251ed9c23ae6e3efebc5ec49adb504207 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin(a)intel.com>
Date: Mon, 29 Jun 2020 01:53:59 +0300
Subject: mei: bus: don't clean driver pointer
It's not needed to set driver to NULL in mei_cl_device_remove()
which is bus_type remove() handler as this is done anyway
in __device_release_driver().
Actually this is causing an endless loop in driver_detach()
on ubuntu patched kernel, while removing (rmmod) the mei_hdcp module.
The reason list_empty(&drv->p->klist_devices.k_list) is always not-empty.
as the check is always true in __device_release_driver()
if (dev->driver != drv)
return;
The non upstream patch is causing this behavior, titled:
'vfio -- release device lock before userspace requests'
Nevertheless the fix is correct also for the upstream.
Link: https://patchwork.ozlabs.org/project/ubuntu-kernel/patch/20180912085046.340…
Cc: <stable(a)vger.kernel.org>
Cc: Andy Whitcroft <apw(a)canonical.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin(a)intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
Link: https://lore.kernel.org/r/20200628225359.2185929-1-tomas.winkler@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/misc/mei/bus.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 8d468e0a950a..f476dbc7252b 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -745,9 +745,8 @@ static int mei_cl_device_remove(struct device *dev)
mei_cl_bus_module_put(cldev);
module_put(THIS_MODULE);
- dev->driver = NULL;
- return ret;
+ return ret;
}
static ssize_t name_show(struct device *dev, struct device_attribute *a,
--
2.27.0
From: Steven Price <steven.price(a)arm.com>
If SVE is enabled then 'ret' can be assigned the return value of
kvm_vcpu_enable_sve() which may be 0 causing future "goto out" sites to
erroneously return 0 on failure rather than -EINVAL as expected.
Remove the initialisation of 'ret' and make setting the return value
explicit to avoid this situation in the future.
Fixes: 9a3cdf26e336 ("KVM: arm64/sve: Allow userspace to enable SVE for vcpus")
Cc: stable(a)vger.kernel.org
Reported-by: James Morse <james.morse(a)arm.com>
Signed-off-by: Steven Price <steven.price(a)arm.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20200617105456.28245-1-steven.price@arm.com
---
arch/arm64/kvm/reset.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d3b209023727..6ed36be51b4b 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -245,7 +245,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
- int ret = -EINVAL;
+ int ret;
bool loaded;
u32 pstate;
@@ -269,15 +269,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
- if (kvm_vcpu_enable_ptrauth(vcpu))
+ if (kvm_vcpu_enable_ptrauth(vcpu)) {
+ ret = -EINVAL;
goto out;
+ }
}
switch (vcpu->arch.target) {
default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
- if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
+ if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) {
+ ret = -EINVAL;
goto out;
+ }
pstate = VCPU_RESET_PSTATE_SVC;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
--
2.27.0
Hello Greg,
Can you please consider including the following patch in the stable linux-4.14.y and stable linux-5.4.y branch?
This is to fix CVE-2020-12655
d0c7feaf87678371c2c09b3709400be416b2dc62(xfs: add agf freeblocks verify in xfs_agf_verify)
Thanks,
Yishan Chen
Hello
commit[1] introduced regression that will lead blktest nvme/004 failed on v5.7.5, and commits [2] fixed this issue on latest linux tree.
But commit[2] cannot be directly applied to stable tree due to dependceny[3], could you help backport the fix and dependency to stable tree, thanks.
[1]
64f5e9cdd711 nvmet: fix memory leak when removing namespaces and controllers concurrently
[2]
819f7b88b48f nvmet: fail outstanding host posted AEN req
[3]
1cdf9f7670a7 nvmet: cleanups the loop in nvmet_async_events_process
696ece751366 nvmet: add async event tracing support
Best Regards,
Yi Zhang
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7b2377486767503d47265e4d487a63c651f6b55d Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1(a)huawei.com>
Date: Mon, 15 Jun 2020 11:33:23 +0800
Subject: [PATCH] dm zoned: assign max_io_len correctly
The unit of max_io_len is sector instead of byte (spotted through
code review), so fix it.
Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Reviewed-by: Damien Le Moal <damien.lemoal(a)wdc.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index a907a9446c0b..cf915009c306 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -890,7 +890,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
/* Set target (no write same support) */
- ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
+ ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_zeroes_bios = 1;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7b2377486767503d47265e4d487a63c651f6b55d Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1(a)huawei.com>
Date: Mon, 15 Jun 2020 11:33:23 +0800
Subject: [PATCH] dm zoned: assign max_io_len correctly
The unit of max_io_len is sector instead of byte (spotted through
code review), so fix it.
Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Reviewed-by: Damien Le Moal <damien.lemoal(a)wdc.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index a907a9446c0b..cf915009c306 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -890,7 +890,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
/* Set target (no write same support) */
- ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
+ ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_zeroes_bios = 1;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7b2377486767503d47265e4d487a63c651f6b55d Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1(a)huawei.com>
Date: Mon, 15 Jun 2020 11:33:23 +0800
Subject: [PATCH] dm zoned: assign max_io_len correctly
The unit of max_io_len is sector instead of byte (spotted through
code review), so fix it.
Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Reviewed-by: Damien Le Moal <damien.lemoal(a)wdc.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index a907a9446c0b..cf915009c306 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -890,7 +890,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
/* Set target (no write same support) */
- ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
+ ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_zeroes_bios = 1;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ee470bb25d0dcdf126f586ec0ae6dca66cb340a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp(a)suse.de>
Date: Thu, 18 Jun 2020 20:25:25 +0200
Subject: [PATCH] EDAC/amd64: Read back the scrub rate PCI register on F15h
Commit:
da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
added support for F15h, model 0x60 CPUs but in doing so, missed to read
back SCRCTRL PCI config register on F15h CPUs which are *not* model
0x60. Add that read so that doing
$ cat /sys/devices/system/edac/mc/mc0/sdram_scrub_rate
can show the previously set DRAM scrub rate.
Fixes: da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
Reported-by: Anders Andersson <pipatron(a)gmail.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org> #v4.4..
Link: https://lkml.kernel.org/r/CAKkunMbNWppx_i6xSdDHLseA2QQmGJqj_crY=NF-GZML5np4…
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ef90070a9194..6262f6370c5d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -269,6 +269,8 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ else
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
} else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ee470bb25d0dcdf126f586ec0ae6dca66cb340a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp(a)suse.de>
Date: Thu, 18 Jun 2020 20:25:25 +0200
Subject: [PATCH] EDAC/amd64: Read back the scrub rate PCI register on F15h
Commit:
da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
added support for F15h, model 0x60 CPUs but in doing so, missed to read
back SCRCTRL PCI config register on F15h CPUs which are *not* model
0x60. Add that read so that doing
$ cat /sys/devices/system/edac/mc/mc0/sdram_scrub_rate
can show the previously set DRAM scrub rate.
Fixes: da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
Reported-by: Anders Andersson <pipatron(a)gmail.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org> #v4.4..
Link: https://lkml.kernel.org/r/CAKkunMbNWppx_i6xSdDHLseA2QQmGJqj_crY=NF-GZML5np4…
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ef90070a9194..6262f6370c5d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -269,6 +269,8 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ else
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
} else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ee470bb25d0dcdf126f586ec0ae6dca66cb340a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp(a)suse.de>
Date: Thu, 18 Jun 2020 20:25:25 +0200
Subject: [PATCH] EDAC/amd64: Read back the scrub rate PCI register on F15h
Commit:
da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
added support for F15h, model 0x60 CPUs but in doing so, missed to read
back SCRCTRL PCI config register on F15h CPUs which are *not* model
0x60. Add that read so that doing
$ cat /sys/devices/system/edac/mc/mc0/sdram_scrub_rate
can show the previously set DRAM scrub rate.
Fixes: da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
Reported-by: Anders Andersson <pipatron(a)gmail.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org> #v4.4..
Link: https://lkml.kernel.org/r/CAKkunMbNWppx_i6xSdDHLseA2QQmGJqj_crY=NF-GZML5np4…
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ef90070a9194..6262f6370c5d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -269,6 +269,8 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ else
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
} else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ee470bb25d0dcdf126f586ec0ae6dca66cb340a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp(a)suse.de>
Date: Thu, 18 Jun 2020 20:25:25 +0200
Subject: [PATCH] EDAC/amd64: Read back the scrub rate PCI register on F15h
Commit:
da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
added support for F15h, model 0x60 CPUs but in doing so, missed to read
back SCRCTRL PCI config register on F15h CPUs which are *not* model
0x60. Add that read so that doing
$ cat /sys/devices/system/edac/mc/mc0/sdram_scrub_rate
can show the previously set DRAM scrub rate.
Fixes: da92110dfdfa ("EDAC, amd64_edac: Extend scrub rate support to F15hM60h")
Reported-by: Anders Andersson <pipatron(a)gmail.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org> #v4.4..
Link: https://lkml.kernel.org/r/CAKkunMbNWppx_i6xSdDHLseA2QQmGJqj_crY=NF-GZML5np4…
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ef90070a9194..6262f6370c5d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -269,6 +269,8 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ else
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
} else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 243bce09c91b0145aeaedd5afba799d81841c030 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 25 Jun 2020 20:29:59 -0700
Subject: [PATCH] mm: fix swap cache node allocation mask
Chris Murphy reports that a slightly overcommitted load, testing swap
and zram along with i915, splats and keeps on splatting, when it had
better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208085
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 243bce09c91b0145aeaedd5afba799d81841c030 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 25 Jun 2020 20:29:59 -0700
Subject: [PATCH] mm: fix swap cache node allocation mask
Chris Murphy reports that a slightly overcommitted load, testing swap
and zram along with i915, splats and keeps on splatting, when it had
better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208085
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 243bce09c91b0145aeaedd5afba799d81841c030 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 25 Jun 2020 20:29:59 -0700
Subject: [PATCH] mm: fix swap cache node allocation mask
Chris Murphy reports that a slightly overcommitted load, testing swap
and zram along with i915, splats and keeps on splatting, when it had
better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208085
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 260a63395f90f67d6ab89e4266af9e3dc34a77e9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 15 Jun 2020 18:49:13 +0100
Subject: [PATCH] btrfs: fix RWF_NOWAIT write not failling when we need to cow
If we attempt to do a RWF_NOWAIT write against a file range for which we
can only do NOCOW for a part of it, due to the existence of holes or
shared extents for example, we proceed with the write as if it were
possible to NOCOW the whole range.
Example:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/sdj/bar
$ chattr +C /mnt/sdj/bar
$ xfs_io -d -c "pwrite -S 0xab -b 256K 0 256K" /mnt/bar
wrote 262144/262144 bytes at offset 0
256 KiB, 1 ops; 0.0003 sec (694.444 MiB/sec and 2777.7778 ops/sec)
$ xfs_io -c "fpunch 64K 64K" /mnt/bar
$ sync
$ xfs_io -d -c "pwrite -N -V 1 -b 128K -S 0xfe 0 128K" /mnt/bar
wrote 131072/131072 bytes at offset 0
128 KiB, 1 ops; 0.0007 sec (160.051 MiB/sec and 1280.4097 ops/sec)
This last write should fail with -EAGAIN since the file range from 64K to
128K is a hole. On xfs it fails, as expected, but on ext4 it currently
succeeds because apparently it is expensive to check if there are extents
allocated for the whole range, but I'll check with the ext4 people.
Fix the issue by checking if check_can_nocow() returns a number of
NOCOW'able bytes smaller then the requested number of bytes, and if it
does return -EAGAIN.
Fixes: edf064e7c6fec3 ("btrfs: nowait aio support")
CC: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 04faa04fccd1..6d5d905281c6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1904,18 +1904,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
if (iocb->ki_flags & IOCB_NOWAIT) {
+ size_t nocow_bytes = count;
+
/*
* We will allocate space in case nodatacow is not set,
* so bail
*/
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) ||
- check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) {
inode_unlock(inode);
return -EAGAIN;
}
/* check_can_nocow() locks the snapshot lock on success */
btrfs_drew_write_unlock(&root->snapshot_lock);
+ /*
+ * There are holes in the range or parts of the range that must
+ * be COWed (shared extents, RO block groups, etc), so just bail
+ * out.
+ */
+ if (nocow_bytes < count) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
}
current->backing_dev_info = inode_to_bdi(inode);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 260a63395f90f67d6ab89e4266af9e3dc34a77e9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 15 Jun 2020 18:49:13 +0100
Subject: [PATCH] btrfs: fix RWF_NOWAIT write not failling when we need to cow
If we attempt to do a RWF_NOWAIT write against a file range for which we
can only do NOCOW for a part of it, due to the existence of holes or
shared extents for example, we proceed with the write as if it were
possible to NOCOW the whole range.
Example:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/sdj/bar
$ chattr +C /mnt/sdj/bar
$ xfs_io -d -c "pwrite -S 0xab -b 256K 0 256K" /mnt/bar
wrote 262144/262144 bytes at offset 0
256 KiB, 1 ops; 0.0003 sec (694.444 MiB/sec and 2777.7778 ops/sec)
$ xfs_io -c "fpunch 64K 64K" /mnt/bar
$ sync
$ xfs_io -d -c "pwrite -N -V 1 -b 128K -S 0xfe 0 128K" /mnt/bar
wrote 131072/131072 bytes at offset 0
128 KiB, 1 ops; 0.0007 sec (160.051 MiB/sec and 1280.4097 ops/sec)
This last write should fail with -EAGAIN since the file range from 64K to
128K is a hole. On xfs it fails, as expected, but on ext4 it currently
succeeds because apparently it is expensive to check if there are extents
allocated for the whole range, but I'll check with the ext4 people.
Fix the issue by checking if check_can_nocow() returns a number of
NOCOW'able bytes smaller then the requested number of bytes, and if it
does return -EAGAIN.
Fixes: edf064e7c6fec3 ("btrfs: nowait aio support")
CC: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 04faa04fccd1..6d5d905281c6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1904,18 +1904,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
if (iocb->ki_flags & IOCB_NOWAIT) {
+ size_t nocow_bytes = count;
+
/*
* We will allocate space in case nodatacow is not set,
* so bail
*/
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) ||
- check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) {
inode_unlock(inode);
return -EAGAIN;
}
/* check_can_nocow() locks the snapshot lock on success */
btrfs_drew_write_unlock(&root->snapshot_lock);
+ /*
+ * There are holes in the range or parts of the range that must
+ * be COWed (shared extents, RO block groups, etc), so just bail
+ * out.
+ */
+ if (nocow_bytes < count) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
}
current->backing_dev_info = inode_to_bdi(inode);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 260a63395f90f67d6ab89e4266af9e3dc34a77e9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 15 Jun 2020 18:49:13 +0100
Subject: [PATCH] btrfs: fix RWF_NOWAIT write not failling when we need to cow
If we attempt to do a RWF_NOWAIT write against a file range for which we
can only do NOCOW for a part of it, due to the existence of holes or
shared extents for example, we proceed with the write as if it were
possible to NOCOW the whole range.
Example:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/sdj/bar
$ chattr +C /mnt/sdj/bar
$ xfs_io -d -c "pwrite -S 0xab -b 256K 0 256K" /mnt/bar
wrote 262144/262144 bytes at offset 0
256 KiB, 1 ops; 0.0003 sec (694.444 MiB/sec and 2777.7778 ops/sec)
$ xfs_io -c "fpunch 64K 64K" /mnt/bar
$ sync
$ xfs_io -d -c "pwrite -N -V 1 -b 128K -S 0xfe 0 128K" /mnt/bar
wrote 131072/131072 bytes at offset 0
128 KiB, 1 ops; 0.0007 sec (160.051 MiB/sec and 1280.4097 ops/sec)
This last write should fail with -EAGAIN since the file range from 64K to
128K is a hole. On xfs it fails, as expected, but on ext4 it currently
succeeds because apparently it is expensive to check if there are extents
allocated for the whole range, but I'll check with the ext4 people.
Fix the issue by checking if check_can_nocow() returns a number of
NOCOW'able bytes smaller then the requested number of bytes, and if it
does return -EAGAIN.
Fixes: edf064e7c6fec3 ("btrfs: nowait aio support")
CC: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 04faa04fccd1..6d5d905281c6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1904,18 +1904,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
if (iocb->ki_flags & IOCB_NOWAIT) {
+ size_t nocow_bytes = count;
+
/*
* We will allocate space in case nodatacow is not set,
* so bail
*/
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) ||
- check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) {
inode_unlock(inode);
return -EAGAIN;
}
/* check_can_nocow() locks the snapshot lock on success */
btrfs_drew_write_unlock(&root->snapshot_lock);
+ /*
+ * There are holes in the range or parts of the range that must
+ * be COWed (shared extents, RO block groups, etc), so just bail
+ * out.
+ */
+ if (nocow_bytes < count) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
}
current->backing_dev_info = inode_to_bdi(inode);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f2cb2f39ccc30fa13d3ac078d461031a63960e5b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 15 Jun 2020 18:46:01 +0100
Subject: [PATCH] btrfs: fix hang on snapshot creation after RWF_NOWAIT write
If we do a successful RWF_NOWAIT write we end up locking the snapshot lock
of the inode, through a call to check_can_nocow(), but we never unlock it.
This means the next attempt to create a snapshot on the subvolume will
hang forever.
Trivial reproducer:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/foobar
$ chattr +C /mnt/foobar
$ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foobar
$ xfs_io -d -c "pwrite -N -V 1 -S 0xfe 0 64K" /mnt/foobar
$ btrfs subvolume snapshot -r /mnt /mnt/snap
--> hangs
Fix this by unlocking the snapshot lock if check_can_nocow() returned
success.
Fixes: edf064e7c6fec3 ("btrfs: nowait aio support")
CC: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2c14312b05e8..04faa04fccd1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1914,6 +1914,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
return -EAGAIN;
}
+ /* check_can_nocow() locks the snapshot lock on success */
+ btrfs_drew_write_unlock(&root->snapshot_lock);
}
current->backing_dev_info = inode_to_bdi(inode);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f2cb2f39ccc30fa13d3ac078d461031a63960e5b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 15 Jun 2020 18:46:01 +0100
Subject: [PATCH] btrfs: fix hang on snapshot creation after RWF_NOWAIT write
If we do a successful RWF_NOWAIT write we end up locking the snapshot lock
of the inode, through a call to check_can_nocow(), but we never unlock it.
This means the next attempt to create a snapshot on the subvolume will
hang forever.
Trivial reproducer:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/foobar
$ chattr +C /mnt/foobar
$ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foobar
$ xfs_io -d -c "pwrite -N -V 1 -S 0xfe 0 64K" /mnt/foobar
$ btrfs subvolume snapshot -r /mnt /mnt/snap
--> hangs
Fix this by unlocking the snapshot lock if check_can_nocow() returned
success.
Fixes: edf064e7c6fec3 ("btrfs: nowait aio support")
CC: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2c14312b05e8..04faa04fccd1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1914,6 +1914,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
return -EAGAIN;
}
+ /* check_can_nocow() locks the snapshot lock on success */
+ btrfs_drew_write_unlock(&root->snapshot_lock);
}
current->backing_dev_info = inode_to_bdi(inode);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f2cb2f39ccc30fa13d3ac078d461031a63960e5b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 15 Jun 2020 18:46:01 +0100
Subject: [PATCH] btrfs: fix hang on snapshot creation after RWF_NOWAIT write
If we do a successful RWF_NOWAIT write we end up locking the snapshot lock
of the inode, through a call to check_can_nocow(), but we never unlock it.
This means the next attempt to create a snapshot on the subvolume will
hang forever.
Trivial reproducer:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/foobar
$ chattr +C /mnt/foobar
$ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foobar
$ xfs_io -d -c "pwrite -N -V 1 -S 0xfe 0 64K" /mnt/foobar
$ btrfs subvolume snapshot -r /mnt /mnt/snap
--> hangs
Fix this by unlocking the snapshot lock if check_can_nocow() returned
success.
Fixes: edf064e7c6fec3 ("btrfs: nowait aio support")
CC: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2c14312b05e8..04faa04fccd1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1914,6 +1914,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
return -EAGAIN;
}
+ /* check_can_nocow() locks the snapshot lock on success */
+ btrfs_drew_write_unlock(&root->snapshot_lock);
}
current->backing_dev_info = inode_to_bdi(inode);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 432cd2a10f1c10cead91fe706ff5dc52f06d642a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:32:55 +0100
Subject: [PATCH] btrfs: fix data block group relocation failure due to
concurrent scrub
When running relocation of a data block group while scrub is running in
parallel, it is possible that the relocation will fail and abort the
current transaction with an -EINVAL error:
[134243.988595] BTRFS info (device sdc): found 14 extents, stage: move data extents
[134243.999871] ------------[ cut here ]------------
[134244.000741] BTRFS: Transaction aborted (error -22)
[134244.001692] WARNING: CPU: 0 PID: 26954 at fs/btrfs/ctree.c:1071 __btrfs_cow_block+0x6a7/0x790 [btrfs]
[134244.003380] Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
[134244.012577] CPU: 0 PID: 26954 Comm: btrfs Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134244.014162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134244.016184] RIP: 0010:__btrfs_cow_block+0x6a7/0x790 [btrfs]
[134244.017151] Code: 48 c7 c7 (...)
[134244.020549] RSP: 0018:ffffa41607863888 EFLAGS: 00010286
[134244.021515] RAX: 0000000000000000 RBX: ffff9614bdfe09c8 RCX: 0000000000000000
[134244.022822] RDX: 0000000000000001 RSI: ffffffffb3d63980 RDI: 0000000000000001
[134244.024124] RBP: ffff961589e8c000 R08: 0000000000000000 R09: 0000000000000001
[134244.025424] R10: ffffffffc0ae5955 R11: 0000000000000000 R12: ffff9614bd530d08
[134244.026725] R13: ffff9614ced41b88 R14: ffff9614bdfe2a48 R15: 0000000000000000
[134244.028024] FS: 00007f29b63c08c0(0000) GS:ffff9615ba600000(0000) knlGS:0000000000000000
[134244.029491] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134244.030560] CR2: 00007f4eb339b000 CR3: 0000000130d6e006 CR4: 00000000003606f0
[134244.031997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134244.033153] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134244.034484] Call Trace:
[134244.034984] btrfs_cow_block+0x12b/0x2b0 [btrfs]
[134244.035859] do_relocation+0x30b/0x790 [btrfs]
[134244.036681] ? do_raw_spin_unlock+0x49/0xc0
[134244.037460] ? _raw_spin_unlock+0x29/0x40
[134244.038235] relocate_tree_blocks+0x37b/0x730 [btrfs]
[134244.039245] relocate_block_group+0x388/0x770 [btrfs]
[134244.040228] btrfs_relocate_block_group+0x161/0x2e0 [btrfs]
[134244.041323] btrfs_relocate_chunk+0x36/0x110 [btrfs]
[134244.041345] btrfs_balance+0xc06/0x1860 [btrfs]
[134244.043382] ? btrfs_ioctl_balance+0x27c/0x310 [btrfs]
[134244.045586] btrfs_ioctl_balance+0x1ed/0x310 [btrfs]
[134244.045611] btrfs_ioctl+0x1880/0x3760 [btrfs]
[134244.049043] ? do_raw_spin_unlock+0x49/0xc0
[134244.049838] ? _raw_spin_unlock+0x29/0x40
[134244.050587] ? __handle_mm_fault+0x11b3/0x14b0
[134244.051417] ? ksys_ioctl+0x92/0xb0
[134244.052070] ksys_ioctl+0x92/0xb0
[134244.052701] ? trace_hardirqs_off_thunk+0x1a/0x1c
[134244.053511] __x64_sys_ioctl+0x16/0x20
[134244.054206] do_syscall_64+0x5c/0x280
[134244.054891] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[134244.055819] RIP: 0033:0x7f29b51c9dd7
[134244.056491] Code: 00 00 00 (...)
[134244.059767] RSP: 002b:00007ffcccc1dd08 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
[134244.061168] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f29b51c9dd7
[134244.062474] RDX: 00007ffcccc1dda0 RSI: 00000000c4009420 RDI: 0000000000000003
[134244.063771] RBP: 0000000000000003 R08: 00005565cea4b000 R09: 0000000000000000
[134244.065032] R10: 0000000000000541 R11: 0000000000000202 R12: 00007ffcccc2060a
[134244.066327] R13: 00007ffcccc1dda0 R14: 0000000000000002 R15: 00007ffcccc1dec0
[134244.067626] irq event stamp: 0
[134244.068202] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134244.069351] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134244.070909] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134244.072392] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134244.073432] ---[ end trace bd7c03622e0b0a99 ]---
The -EINVAL error comes from the following chain of function calls:
__btrfs_cow_block() <-- aborts the transaction
btrfs_reloc_cow_block()
replace_file_extents()
get_new_location() <-- returns -EINVAL
When relocating a data block group, for each allocated extent of the block
group, we preallocate another extent (at prealloc_file_extent_cluster()),
associated with the data relocation inode, and then dirty all its pages.
These preallocated extents have, and must have, the same size that extents
from the data block group being relocated have.
Later before we start the relocation stage that updates pointers (bytenr
field of file extent items) to point to the the new extents, we trigger
writeback for the data relocation inode. The expectation is that writeback
will write the pages to the previously preallocated extents, that it
follows the NOCOW path. That is generally the case, however, if a scrub
is running it may have turned the block group that contains those extents
into RO mode, in which case writeback falls back to the COW path.
However in the COW path instead of allocating exactly one extent with the
expected size, the allocator may end up allocating several smaller extents
due to free space fragmentation - because we tell it at cow_file_range()
that the minimum allocation size can match the filesystem's sector size.
This later breaks the relocation's expectation that an extent associated
to a file extent item in the data relocation inode has the same size as
the respective extent pointed by a file extent item in another tree - in
this case the extent to which the relocation inode poins to is smaller,
causing relocation.c:get_new_location() to return -EINVAL.
For example, if we are relocating a data block group X that has a logical
address of X and the block group has an extent allocated at the logical
address X + 128KiB with a size of 64KiB:
1) At prealloc_file_extent_cluster() we allocate an extent for the data
relocation inode with a size of 64KiB and associate it to the file
offset 128KiB (X + 128KiB - X) of the data relocation inode. This
preallocated extent was allocated at block group Z;
2) A scrub running in parallel turns block group Z into RO mode and
starts scrubing its extents;
3) Relocation triggers writeback for the data relocation inode;
4) When running delalloc (btrfs_run_delalloc_range()), we try first the
NOCOW path because the data relocation inode has BTRFS_INODE_PREALLOC
set in its flags. However, because block group Z is in RO mode, the
NOCOW path (run_delalloc_nocow()) falls back into the COW path, by
calling cow_file_range();
5) At cow_file_range(), in the first iteration of the while loop we call
btrfs_reserve_extent() to allocate a 64KiB extent and pass it a minimum
allocation size of 4KiB (fs_info->sectorsize). Due to free space
fragmentation, btrfs_reserve_extent() ends up allocating two extents
of 32KiB each, each one on a different iteration of that while loop;
6) Writeback of the data relocation inode completes;
7) Relocation proceeds and ends up at relocation.c:replace_file_extents(),
with a leaf which has a file extent item that points to the data extent
from block group X, that has a logical address (bytenr) of X + 128KiB
and a size of 64KiB. Then it calls get_new_location(), which does a
lookup in the data relocation tree for a file extent item starting at
offset 128KiB (X + 128KiB - X) and belonging to the data relocation
inode. It finds a corresponding file extent item, however that item
points to an extent that has a size of 32KiB, which doesn't match the
expected size of 64KiB, resuling in -EINVAL being returned from this
function and propagated up to __btrfs_cow_block(), which aborts the
current transaction.
To fix this make sure that at cow_file_range() when we call the allocator
we pass it a minimum allocation size corresponding the desired extent size
if the inode belongs to the data relocation tree, otherwise pass it the
filesystem's sector size as the minimum allocation size.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12b5d61f23bb..62c3f4972ff6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 432cd2a10f1c10cead91fe706ff5dc52f06d642a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:32:55 +0100
Subject: [PATCH] btrfs: fix data block group relocation failure due to
concurrent scrub
When running relocation of a data block group while scrub is running in
parallel, it is possible that the relocation will fail and abort the
current transaction with an -EINVAL error:
[134243.988595] BTRFS info (device sdc): found 14 extents, stage: move data extents
[134243.999871] ------------[ cut here ]------------
[134244.000741] BTRFS: Transaction aborted (error -22)
[134244.001692] WARNING: CPU: 0 PID: 26954 at fs/btrfs/ctree.c:1071 __btrfs_cow_block+0x6a7/0x790 [btrfs]
[134244.003380] Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
[134244.012577] CPU: 0 PID: 26954 Comm: btrfs Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134244.014162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134244.016184] RIP: 0010:__btrfs_cow_block+0x6a7/0x790 [btrfs]
[134244.017151] Code: 48 c7 c7 (...)
[134244.020549] RSP: 0018:ffffa41607863888 EFLAGS: 00010286
[134244.021515] RAX: 0000000000000000 RBX: ffff9614bdfe09c8 RCX: 0000000000000000
[134244.022822] RDX: 0000000000000001 RSI: ffffffffb3d63980 RDI: 0000000000000001
[134244.024124] RBP: ffff961589e8c000 R08: 0000000000000000 R09: 0000000000000001
[134244.025424] R10: ffffffffc0ae5955 R11: 0000000000000000 R12: ffff9614bd530d08
[134244.026725] R13: ffff9614ced41b88 R14: ffff9614bdfe2a48 R15: 0000000000000000
[134244.028024] FS: 00007f29b63c08c0(0000) GS:ffff9615ba600000(0000) knlGS:0000000000000000
[134244.029491] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134244.030560] CR2: 00007f4eb339b000 CR3: 0000000130d6e006 CR4: 00000000003606f0
[134244.031997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134244.033153] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134244.034484] Call Trace:
[134244.034984] btrfs_cow_block+0x12b/0x2b0 [btrfs]
[134244.035859] do_relocation+0x30b/0x790 [btrfs]
[134244.036681] ? do_raw_spin_unlock+0x49/0xc0
[134244.037460] ? _raw_spin_unlock+0x29/0x40
[134244.038235] relocate_tree_blocks+0x37b/0x730 [btrfs]
[134244.039245] relocate_block_group+0x388/0x770 [btrfs]
[134244.040228] btrfs_relocate_block_group+0x161/0x2e0 [btrfs]
[134244.041323] btrfs_relocate_chunk+0x36/0x110 [btrfs]
[134244.041345] btrfs_balance+0xc06/0x1860 [btrfs]
[134244.043382] ? btrfs_ioctl_balance+0x27c/0x310 [btrfs]
[134244.045586] btrfs_ioctl_balance+0x1ed/0x310 [btrfs]
[134244.045611] btrfs_ioctl+0x1880/0x3760 [btrfs]
[134244.049043] ? do_raw_spin_unlock+0x49/0xc0
[134244.049838] ? _raw_spin_unlock+0x29/0x40
[134244.050587] ? __handle_mm_fault+0x11b3/0x14b0
[134244.051417] ? ksys_ioctl+0x92/0xb0
[134244.052070] ksys_ioctl+0x92/0xb0
[134244.052701] ? trace_hardirqs_off_thunk+0x1a/0x1c
[134244.053511] __x64_sys_ioctl+0x16/0x20
[134244.054206] do_syscall_64+0x5c/0x280
[134244.054891] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[134244.055819] RIP: 0033:0x7f29b51c9dd7
[134244.056491] Code: 00 00 00 (...)
[134244.059767] RSP: 002b:00007ffcccc1dd08 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
[134244.061168] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f29b51c9dd7
[134244.062474] RDX: 00007ffcccc1dda0 RSI: 00000000c4009420 RDI: 0000000000000003
[134244.063771] RBP: 0000000000000003 R08: 00005565cea4b000 R09: 0000000000000000
[134244.065032] R10: 0000000000000541 R11: 0000000000000202 R12: 00007ffcccc2060a
[134244.066327] R13: 00007ffcccc1dda0 R14: 0000000000000002 R15: 00007ffcccc1dec0
[134244.067626] irq event stamp: 0
[134244.068202] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134244.069351] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134244.070909] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134244.072392] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134244.073432] ---[ end trace bd7c03622e0b0a99 ]---
The -EINVAL error comes from the following chain of function calls:
__btrfs_cow_block() <-- aborts the transaction
btrfs_reloc_cow_block()
replace_file_extents()
get_new_location() <-- returns -EINVAL
When relocating a data block group, for each allocated extent of the block
group, we preallocate another extent (at prealloc_file_extent_cluster()),
associated with the data relocation inode, and then dirty all its pages.
These preallocated extents have, and must have, the same size that extents
from the data block group being relocated have.
Later before we start the relocation stage that updates pointers (bytenr
field of file extent items) to point to the the new extents, we trigger
writeback for the data relocation inode. The expectation is that writeback
will write the pages to the previously preallocated extents, that it
follows the NOCOW path. That is generally the case, however, if a scrub
is running it may have turned the block group that contains those extents
into RO mode, in which case writeback falls back to the COW path.
However in the COW path instead of allocating exactly one extent with the
expected size, the allocator may end up allocating several smaller extents
due to free space fragmentation - because we tell it at cow_file_range()
that the minimum allocation size can match the filesystem's sector size.
This later breaks the relocation's expectation that an extent associated
to a file extent item in the data relocation inode has the same size as
the respective extent pointed by a file extent item in another tree - in
this case the extent to which the relocation inode poins to is smaller,
causing relocation.c:get_new_location() to return -EINVAL.
For example, if we are relocating a data block group X that has a logical
address of X and the block group has an extent allocated at the logical
address X + 128KiB with a size of 64KiB:
1) At prealloc_file_extent_cluster() we allocate an extent for the data
relocation inode with a size of 64KiB and associate it to the file
offset 128KiB (X + 128KiB - X) of the data relocation inode. This
preallocated extent was allocated at block group Z;
2) A scrub running in parallel turns block group Z into RO mode and
starts scrubing its extents;
3) Relocation triggers writeback for the data relocation inode;
4) When running delalloc (btrfs_run_delalloc_range()), we try first the
NOCOW path because the data relocation inode has BTRFS_INODE_PREALLOC
set in its flags. However, because block group Z is in RO mode, the
NOCOW path (run_delalloc_nocow()) falls back into the COW path, by
calling cow_file_range();
5) At cow_file_range(), in the first iteration of the while loop we call
btrfs_reserve_extent() to allocate a 64KiB extent and pass it a minimum
allocation size of 4KiB (fs_info->sectorsize). Due to free space
fragmentation, btrfs_reserve_extent() ends up allocating two extents
of 32KiB each, each one on a different iteration of that while loop;
6) Writeback of the data relocation inode completes;
7) Relocation proceeds and ends up at relocation.c:replace_file_extents(),
with a leaf which has a file extent item that points to the data extent
from block group X, that has a logical address (bytenr) of X + 128KiB
and a size of 64KiB. Then it calls get_new_location(), which does a
lookup in the data relocation tree for a file extent item starting at
offset 128KiB (X + 128KiB - X) and belonging to the data relocation
inode. It finds a corresponding file extent item, however that item
points to an extent that has a size of 32KiB, which doesn't match the
expected size of 64KiB, resuling in -EINVAL being returned from this
function and propagated up to __btrfs_cow_block(), which aborts the
current transaction.
To fix this make sure that at cow_file_range() when we call the allocator
we pass it a minimum allocation size corresponding the desired extent size
if the inode belongs to the data relocation tree, otherwise pass it the
filesystem's sector size as the minimum allocation size.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12b5d61f23bb..62c3f4972ff6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:19 +0100
Subject: [PATCH] btrfs: fix race between block group removal and block group
creation
There is a race between block group removal and block group creation
when the removal is completed by a task running fitrim or scrub. When
this happens we end up failing the block group creation with an error
-EEXIST since we attempt to insert a duplicate block group item key
in the extent tree. That results in a transaction abort.
The race happens like this:
1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes
block group X with btrfs_freeze_block_group() (until very recently
that was named btrfs_get_block_group_trimming());
2) Task B starts removing block group X, either because it's now unused
or due to relocation for example. So at btrfs_remove_block_group(),
while holding the chunk mutex and the block group's lock, it sets
the 'removed' flag of the block group and it sets the local variable
'remove_em' to false, because the block group is currently frozen
(its 'frozen' counter is > 0, until very recently this counter was
named 'trimming');
3) Task B unlocks the block group and the chunk mutex;
4) Task A is done trimming the block group and unfreezes the block group
by calling btrfs_unfreeze_block_group() (until very recently this was
named btrfs_put_block_group_trimming()). In this function we lock the
block group and set the local variable 'cleanup' to true because we
were able to decrement the block group's 'frozen' counter down to 0 and
the flag 'removed' is set in the block group.
Since 'cleanup' is set to true, it locks the chunk mutex and removes
the extent mapping representing the block group from the mapping tree;
5) Task C allocates a new block group Y and it picks up the logical address
that block group X had as the logical address for Y, because X was the
block group with the highest logical address and now the second block
group with the highest logical address, the last in the fs mapping tree,
ends at an offset corresponding to block group X's logical address (this
logical address selection is done at volumes.c:find_next_chunk()).
At this point the new block group Y does not have yet its item added
to the extent tree (nor the corresponding device extent items and
chunk item in the device and chunk trees). The new group Y is added to
the list of pending block groups in the transaction handle;
6) Before task B proceeds to removing the block group item for block
group X from the extent tree, which has a key matching:
(X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length)
task C while ending its transaction handle calls
btrfs_create_pending_block_groups(), which finds block group Y and
tries to insert the block group item for Y into the exten tree, which
fails with -EEXIST since logical offset is the same that X had and
task B hasn't yet deleted the key from the extent tree.
This failure results in a transaction abort, producing a stack like
the following:
------------[ cut here ]------------
BTRFS: Transaction aborted (error -17)
WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Code: ff ff ff 48 8b 55 50 f0 48 (...)
RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001
RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001
R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10
R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000
FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs]
btrfs_commit_transaction+0xd0/0xc50 [btrfs]
? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs]
? __ia32_sys_fdatasync+0x20/0x20
iterate_supers+0xdb/0x180
ksys_sync+0x60/0xb0
__ia32_sys_sync+0xa/0x10
do_syscall_64+0x5c/0x280
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f2ce1d4d5b7
Code: 83 c4 08 48 3d 01 (...)
RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2
RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7
RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c
RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00
R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032
R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20
irq event stamp: 0
hardirqs last enabled at (0): [<0000000000000000>] 0x0
hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace bd7c03622e0b0a9c ]---
Fix this simply by making btrfs_remove_block_group() remove the block
group's item from the extent tree before it flags the block group as
removed. Also make the free space deletion from the free space tree
before flagging the block group as removed, to avoid a similar race
with adding and removing free space entries for the free space tree.
Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6462dd0b155c..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:19 +0100
Subject: [PATCH] btrfs: fix race between block group removal and block group
creation
There is a race between block group removal and block group creation
when the removal is completed by a task running fitrim or scrub. When
this happens we end up failing the block group creation with an error
-EEXIST since we attempt to insert a duplicate block group item key
in the extent tree. That results in a transaction abort.
The race happens like this:
1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes
block group X with btrfs_freeze_block_group() (until very recently
that was named btrfs_get_block_group_trimming());
2) Task B starts removing block group X, either because it's now unused
or due to relocation for example. So at btrfs_remove_block_group(),
while holding the chunk mutex and the block group's lock, it sets
the 'removed' flag of the block group and it sets the local variable
'remove_em' to false, because the block group is currently frozen
(its 'frozen' counter is > 0, until very recently this counter was
named 'trimming');
3) Task B unlocks the block group and the chunk mutex;
4) Task A is done trimming the block group and unfreezes the block group
by calling btrfs_unfreeze_block_group() (until very recently this was
named btrfs_put_block_group_trimming()). In this function we lock the
block group and set the local variable 'cleanup' to true because we
were able to decrement the block group's 'frozen' counter down to 0 and
the flag 'removed' is set in the block group.
Since 'cleanup' is set to true, it locks the chunk mutex and removes
the extent mapping representing the block group from the mapping tree;
5) Task C allocates a new block group Y and it picks up the logical address
that block group X had as the logical address for Y, because X was the
block group with the highest logical address and now the second block
group with the highest logical address, the last in the fs mapping tree,
ends at an offset corresponding to block group X's logical address (this
logical address selection is done at volumes.c:find_next_chunk()).
At this point the new block group Y does not have yet its item added
to the extent tree (nor the corresponding device extent items and
chunk item in the device and chunk trees). The new group Y is added to
the list of pending block groups in the transaction handle;
6) Before task B proceeds to removing the block group item for block
group X from the extent tree, which has a key matching:
(X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length)
task C while ending its transaction handle calls
btrfs_create_pending_block_groups(), which finds block group Y and
tries to insert the block group item for Y into the exten tree, which
fails with -EEXIST since logical offset is the same that X had and
task B hasn't yet deleted the key from the extent tree.
This failure results in a transaction abort, producing a stack like
the following:
------------[ cut here ]------------
BTRFS: Transaction aborted (error -17)
WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Code: ff ff ff 48 8b 55 50 f0 48 (...)
RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001
RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001
R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10
R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000
FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs]
btrfs_commit_transaction+0xd0/0xc50 [btrfs]
? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs]
? __ia32_sys_fdatasync+0x20/0x20
iterate_supers+0xdb/0x180
ksys_sync+0x60/0xb0
__ia32_sys_sync+0xa/0x10
do_syscall_64+0x5c/0x280
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f2ce1d4d5b7
Code: 83 c4 08 48 3d 01 (...)
RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2
RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7
RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c
RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00
R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032
R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20
irq event stamp: 0
hardirqs last enabled at (0): [<0000000000000000>] 0x0
hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace bd7c03622e0b0a9c ]---
Fix this simply by making btrfs_remove_block_group() remove the block
group's item from the extent tree before it flags the block group as
removed. Also make the free space deletion from the free space tree
before flagging the block group as removed, to avoid a similar race
with adding and removing free space entries for the free space tree.
Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6462dd0b155c..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:19 +0100
Subject: [PATCH] btrfs: fix race between block group removal and block group
creation
There is a race between block group removal and block group creation
when the removal is completed by a task running fitrim or scrub. When
this happens we end up failing the block group creation with an error
-EEXIST since we attempt to insert a duplicate block group item key
in the extent tree. That results in a transaction abort.
The race happens like this:
1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes
block group X with btrfs_freeze_block_group() (until very recently
that was named btrfs_get_block_group_trimming());
2) Task B starts removing block group X, either because it's now unused
or due to relocation for example. So at btrfs_remove_block_group(),
while holding the chunk mutex and the block group's lock, it sets
the 'removed' flag of the block group and it sets the local variable
'remove_em' to false, because the block group is currently frozen
(its 'frozen' counter is > 0, until very recently this counter was
named 'trimming');
3) Task B unlocks the block group and the chunk mutex;
4) Task A is done trimming the block group and unfreezes the block group
by calling btrfs_unfreeze_block_group() (until very recently this was
named btrfs_put_block_group_trimming()). In this function we lock the
block group and set the local variable 'cleanup' to true because we
were able to decrement the block group's 'frozen' counter down to 0 and
the flag 'removed' is set in the block group.
Since 'cleanup' is set to true, it locks the chunk mutex and removes
the extent mapping representing the block group from the mapping tree;
5) Task C allocates a new block group Y and it picks up the logical address
that block group X had as the logical address for Y, because X was the
block group with the highest logical address and now the second block
group with the highest logical address, the last in the fs mapping tree,
ends at an offset corresponding to block group X's logical address (this
logical address selection is done at volumes.c:find_next_chunk()).
At this point the new block group Y does not have yet its item added
to the extent tree (nor the corresponding device extent items and
chunk item in the device and chunk trees). The new group Y is added to
the list of pending block groups in the transaction handle;
6) Before task B proceeds to removing the block group item for block
group X from the extent tree, which has a key matching:
(X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length)
task C while ending its transaction handle calls
btrfs_create_pending_block_groups(), which finds block group Y and
tries to insert the block group item for Y into the exten tree, which
fails with -EEXIST since logical offset is the same that X had and
task B hasn't yet deleted the key from the extent tree.
This failure results in a transaction abort, producing a stack like
the following:
------------[ cut here ]------------
BTRFS: Transaction aborted (error -17)
WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Code: ff ff ff 48 8b 55 50 f0 48 (...)
RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001
RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001
R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10
R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000
FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs]
btrfs_commit_transaction+0xd0/0xc50 [btrfs]
? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs]
? __ia32_sys_fdatasync+0x20/0x20
iterate_supers+0xdb/0x180
ksys_sync+0x60/0xb0
__ia32_sys_sync+0xa/0x10
do_syscall_64+0x5c/0x280
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f2ce1d4d5b7
Code: 83 c4 08 48 3d 01 (...)
RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2
RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7
RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c
RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00
R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032
R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20
irq event stamp: 0
hardirqs last enabled at (0): [<0000000000000000>] 0x0
hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace bd7c03622e0b0a9c ]---
Fix this simply by making btrfs_remove_block_group() remove the block
group's item from the extent tree before it flags the block group as
removed. Also make the free space deletion from the free space tree
before flagging the block group as removed, to avoid a similar race
with adding and removing free space entries for the free space tree.
Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6462dd0b155c..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:19 +0100
Subject: [PATCH] btrfs: fix race between block group removal and block group
creation
There is a race between block group removal and block group creation
when the removal is completed by a task running fitrim or scrub. When
this happens we end up failing the block group creation with an error
-EEXIST since we attempt to insert a duplicate block group item key
in the extent tree. That results in a transaction abort.
The race happens like this:
1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes
block group X with btrfs_freeze_block_group() (until very recently
that was named btrfs_get_block_group_trimming());
2) Task B starts removing block group X, either because it's now unused
or due to relocation for example. So at btrfs_remove_block_group(),
while holding the chunk mutex and the block group's lock, it sets
the 'removed' flag of the block group and it sets the local variable
'remove_em' to false, because the block group is currently frozen
(its 'frozen' counter is > 0, until very recently this counter was
named 'trimming');
3) Task B unlocks the block group and the chunk mutex;
4) Task A is done trimming the block group and unfreezes the block group
by calling btrfs_unfreeze_block_group() (until very recently this was
named btrfs_put_block_group_trimming()). In this function we lock the
block group and set the local variable 'cleanup' to true because we
were able to decrement the block group's 'frozen' counter down to 0 and
the flag 'removed' is set in the block group.
Since 'cleanup' is set to true, it locks the chunk mutex and removes
the extent mapping representing the block group from the mapping tree;
5) Task C allocates a new block group Y and it picks up the logical address
that block group X had as the logical address for Y, because X was the
block group with the highest logical address and now the second block
group with the highest logical address, the last in the fs mapping tree,
ends at an offset corresponding to block group X's logical address (this
logical address selection is done at volumes.c:find_next_chunk()).
At this point the new block group Y does not have yet its item added
to the extent tree (nor the corresponding device extent items and
chunk item in the device and chunk trees). The new group Y is added to
the list of pending block groups in the transaction handle;
6) Before task B proceeds to removing the block group item for block
group X from the extent tree, which has a key matching:
(X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length)
task C while ending its transaction handle calls
btrfs_create_pending_block_groups(), which finds block group Y and
tries to insert the block group item for Y into the exten tree, which
fails with -EEXIST since logical offset is the same that X had and
task B hasn't yet deleted the key from the extent tree.
This failure results in a transaction abort, producing a stack like
the following:
------------[ cut here ]------------
BTRFS: Transaction aborted (error -17)
WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Code: ff ff ff 48 8b 55 50 f0 48 (...)
RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001
RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001
R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10
R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000
FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs]
btrfs_commit_transaction+0xd0/0xc50 [btrfs]
? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs]
? __ia32_sys_fdatasync+0x20/0x20
iterate_supers+0xdb/0x180
ksys_sync+0x60/0xb0
__ia32_sys_sync+0xa/0x10
do_syscall_64+0x5c/0x280
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f2ce1d4d5b7
Code: 83 c4 08 48 3d 01 (...)
RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2
RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7
RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c
RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00
R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032
R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20
irq event stamp: 0
hardirqs last enabled at (0): [<0000000000000000>] 0x0
hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace bd7c03622e0b0a9c ]---
Fix this simply by making btrfs_remove_block_group() remove the block
group's item from the extent tree before it flags the block group as
removed. Also make the free space deletion from the free space tree
before flagging the block group as removed, to avoid a similar race
with adding and removing free space entries for the free space tree.
Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6462dd0b155c..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:19 +0100
Subject: [PATCH] btrfs: fix race between block group removal and block group
creation
There is a race between block group removal and block group creation
when the removal is completed by a task running fitrim or scrub. When
this happens we end up failing the block group creation with an error
-EEXIST since we attempt to insert a duplicate block group item key
in the extent tree. That results in a transaction abort.
The race happens like this:
1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes
block group X with btrfs_freeze_block_group() (until very recently
that was named btrfs_get_block_group_trimming());
2) Task B starts removing block group X, either because it's now unused
or due to relocation for example. So at btrfs_remove_block_group(),
while holding the chunk mutex and the block group's lock, it sets
the 'removed' flag of the block group and it sets the local variable
'remove_em' to false, because the block group is currently frozen
(its 'frozen' counter is > 0, until very recently this counter was
named 'trimming');
3) Task B unlocks the block group and the chunk mutex;
4) Task A is done trimming the block group and unfreezes the block group
by calling btrfs_unfreeze_block_group() (until very recently this was
named btrfs_put_block_group_trimming()). In this function we lock the
block group and set the local variable 'cleanup' to true because we
were able to decrement the block group's 'frozen' counter down to 0 and
the flag 'removed' is set in the block group.
Since 'cleanup' is set to true, it locks the chunk mutex and removes
the extent mapping representing the block group from the mapping tree;
5) Task C allocates a new block group Y and it picks up the logical address
that block group X had as the logical address for Y, because X was the
block group with the highest logical address and now the second block
group with the highest logical address, the last in the fs mapping tree,
ends at an offset corresponding to block group X's logical address (this
logical address selection is done at volumes.c:find_next_chunk()).
At this point the new block group Y does not have yet its item added
to the extent tree (nor the corresponding device extent items and
chunk item in the device and chunk trees). The new group Y is added to
the list of pending block groups in the transaction handle;
6) Before task B proceeds to removing the block group item for block
group X from the extent tree, which has a key matching:
(X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length)
task C while ending its transaction handle calls
btrfs_create_pending_block_groups(), which finds block group Y and
tries to insert the block group item for Y into the exten tree, which
fails with -EEXIST since logical offset is the same that X had and
task B hasn't yet deleted the key from the extent tree.
This failure results in a transaction abort, producing a stack like
the following:
------------[ cut here ]------------
BTRFS: Transaction aborted (error -17)
WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Modules linked in: btrfs blake2b_generic xor raid6_pq (...)
CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs]
Code: ff ff ff 48 8b 55 50 f0 48 (...)
RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001
RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001
R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10
R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000
FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs]
btrfs_commit_transaction+0xd0/0xc50 [btrfs]
? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs]
? __ia32_sys_fdatasync+0x20/0x20
iterate_supers+0xdb/0x180
ksys_sync+0x60/0xb0
__ia32_sys_sync+0xa/0x10
do_syscall_64+0x5c/0x280
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f2ce1d4d5b7
Code: 83 c4 08 48 3d 01 (...)
RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2
RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7
RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c
RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00
R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032
R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20
irq event stamp: 0
hardirqs last enabled at (0): [<0000000000000000>] 0x0
hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace bd7c03622e0b0a9c ]---
Fix this simply by making btrfs_remove_block_group() remove the block
group's item from the extent tree before it flags the block group as
removed. Also make the free space deletion from the free space tree
before flagging the block group as removed, to avoid a similar race
with adding and removing free space entries for the free space tree.
Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6462dd0b155c..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6bd335b469f945f75474c11e3f577f85409f39c3 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:33:05 +0100
Subject: [PATCH] btrfs: fix bytes_may_use underflow when running balance and
scrub in parallel
When balance and scrub are running in parallel it is possible to end up
with an underflow of the bytes_may_use counter of the data space_info
object, which triggers a warning like the following:
[134243.793196] BTRFS info (device sdc): relocating block group 1104150528 flags data
[134243.806891] ------------[ cut here ]------------
[134243.807561] WARNING: CPU: 1 PID: 26884 at fs/btrfs/space-info.h:125 btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.808819] Modules linked in: btrfs blake2b_generic xor (...)
[134243.815779] CPU: 1 PID: 26884 Comm: kworker/u8:8 Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134243.816944] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134243.818389] Workqueue: writeback wb_workfn (flush-btrfs-108483)
[134243.819186] RIP: 0010:btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.819963] Code: 0b f2 85 (...)
[134243.822271] RSP: 0018:ffffa4160aae7510 EFLAGS: 00010287
[134243.822929] RAX: 000000000000c000 RBX: ffff96159a8c1000 RCX: 0000000000000000
[134243.823816] RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffff96158067a810
[134243.824742] RBP: ffff96158067a800 R08: 0000000000000001 R09: 0000000000000000
[134243.825636] R10: ffff961501432a40 R11: 0000000000000000 R12: 000000000000c000
[134243.826532] R13: 0000000000000001 R14: ffffffffffff4000 R15: ffff96158067a810
[134243.827432] FS: 0000000000000000(0000) GS:ffff9615baa00000(0000) knlGS:0000000000000000
[134243.828451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134243.829184] CR2: 000055bd7e414000 CR3: 00000001077be004 CR4: 00000000003606e0
[134243.830083] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134243.830975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134243.831867] Call Trace:
[134243.832211] find_free_extent+0x4a0/0x16c0 [btrfs]
[134243.832846] btrfs_reserve_extent+0x91/0x180 [btrfs]
[134243.833487] cow_file_range+0x12d/0x490 [btrfs]
[134243.834080] fallback_to_cow+0x82/0x1b0 [btrfs]
[134243.834689] ? release_extent_buffer+0x121/0x170 [btrfs]
[134243.835370] run_delalloc_nocow+0x33f/0xa30 [btrfs]
[134243.836032] btrfs_run_delalloc_range+0x1ea/0x6d0 [btrfs]
[134243.836725] ? find_lock_delalloc_range+0x221/0x250 [btrfs]
[134243.837450] writepage_delalloc+0xe8/0x150 [btrfs]
[134243.838059] __extent_writepage+0xe8/0x4c0 [btrfs]
[134243.838674] extent_write_cache_pages+0x237/0x530 [btrfs]
[134243.839364] extent_writepages+0x44/0xa0 [btrfs]
[134243.839946] do_writepages+0x23/0x80
[134243.840401] __writeback_single_inode+0x59/0x700
[134243.841006] writeback_sb_inodes+0x267/0x5f0
[134243.841548] __writeback_inodes_wb+0x87/0xe0
[134243.842091] wb_writeback+0x382/0x590
[134243.842574] ? wb_workfn+0x4a2/0x6c0
[134243.843030] wb_workfn+0x4a2/0x6c0
[134243.843468] process_one_work+0x26d/0x6a0
[134243.843978] worker_thread+0x4f/0x3e0
[134243.844452] ? process_one_work+0x6a0/0x6a0
[134243.844981] kthread+0x103/0x140
[134243.845400] ? kthread_create_worker_on_cpu+0x70/0x70
[134243.846030] ret_from_fork+0x3a/0x50
[134243.846494] irq event stamp: 0
[134243.846892] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134243.847682] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.848687] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.849913] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134243.850698] ---[ end trace bd7c03622e0b0a96 ]---
[134243.851335] ------------[ cut here ]------------
When relocating a data block group, for each extent allocated in the
block group we preallocate another extent with the same size for the
data relocation inode (we do it at prealloc_file_extent_cluster()).
We reserve space by calling btrfs_check_data_free_space(), which ends
up incrementing the data space_info's bytes_may_use counter, and
then call btrfs_prealloc_file_range() to allocate the extent, which
always decrements the bytes_may_use counter by the same amount.
The expectation is that writeback of the data relocation inode always
follows a NOCOW path, by writing into the preallocated extents. However,
when starting writeback we might end up falling back into the COW path,
because the block group that contains the preallocated extent was turned
into RO mode by a scrub running in parallel. The COW path then calls the
extent allocator which ends up calling btrfs_add_reserved_bytes(), and
this function decrements the bytes_may_use counter of the data space_info
object by an amount corresponding to the size of the allocated extent,
despite we haven't previously incremented it. When the counter currently
has a value smaller then the allocated extent we reset the counter to 0
and emit a warning, otherwise we just decrement it and slowly mess up
with this counter which is crucial for space reservation, the end result
can be granting reserved space to tasks when there isn't really enough
free space, and having the tasks fail later in critical places where
error handling consists of a transaction abort or hitting a BUG_ON().
Fix this by making sure that if we fallback to the COW path for a data
relocation inode, we increment the bytes_may_use counter of the data
space_info object. The COW path will then decrement it at
btrfs_add_reserved_bytes() on success or through its error handling part
by a call to extent_clear_unlock_delalloc() (which ends up calling
btrfs_clear_delalloc_extent() that does the decrement operation) in case
of an error.
Test case btrfs/061 from fstests could sporadically trigger this.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 62c3f4972ff6..62b49d2db928 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1378,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1408,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6bd335b469f945f75474c11e3f577f85409f39c3 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:33:05 +0100
Subject: [PATCH] btrfs: fix bytes_may_use underflow when running balance and
scrub in parallel
When balance and scrub are running in parallel it is possible to end up
with an underflow of the bytes_may_use counter of the data space_info
object, which triggers a warning like the following:
[134243.793196] BTRFS info (device sdc): relocating block group 1104150528 flags data
[134243.806891] ------------[ cut here ]------------
[134243.807561] WARNING: CPU: 1 PID: 26884 at fs/btrfs/space-info.h:125 btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.808819] Modules linked in: btrfs blake2b_generic xor (...)
[134243.815779] CPU: 1 PID: 26884 Comm: kworker/u8:8 Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134243.816944] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134243.818389] Workqueue: writeback wb_workfn (flush-btrfs-108483)
[134243.819186] RIP: 0010:btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.819963] Code: 0b f2 85 (...)
[134243.822271] RSP: 0018:ffffa4160aae7510 EFLAGS: 00010287
[134243.822929] RAX: 000000000000c000 RBX: ffff96159a8c1000 RCX: 0000000000000000
[134243.823816] RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffff96158067a810
[134243.824742] RBP: ffff96158067a800 R08: 0000000000000001 R09: 0000000000000000
[134243.825636] R10: ffff961501432a40 R11: 0000000000000000 R12: 000000000000c000
[134243.826532] R13: 0000000000000001 R14: ffffffffffff4000 R15: ffff96158067a810
[134243.827432] FS: 0000000000000000(0000) GS:ffff9615baa00000(0000) knlGS:0000000000000000
[134243.828451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134243.829184] CR2: 000055bd7e414000 CR3: 00000001077be004 CR4: 00000000003606e0
[134243.830083] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134243.830975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134243.831867] Call Trace:
[134243.832211] find_free_extent+0x4a0/0x16c0 [btrfs]
[134243.832846] btrfs_reserve_extent+0x91/0x180 [btrfs]
[134243.833487] cow_file_range+0x12d/0x490 [btrfs]
[134243.834080] fallback_to_cow+0x82/0x1b0 [btrfs]
[134243.834689] ? release_extent_buffer+0x121/0x170 [btrfs]
[134243.835370] run_delalloc_nocow+0x33f/0xa30 [btrfs]
[134243.836032] btrfs_run_delalloc_range+0x1ea/0x6d0 [btrfs]
[134243.836725] ? find_lock_delalloc_range+0x221/0x250 [btrfs]
[134243.837450] writepage_delalloc+0xe8/0x150 [btrfs]
[134243.838059] __extent_writepage+0xe8/0x4c0 [btrfs]
[134243.838674] extent_write_cache_pages+0x237/0x530 [btrfs]
[134243.839364] extent_writepages+0x44/0xa0 [btrfs]
[134243.839946] do_writepages+0x23/0x80
[134243.840401] __writeback_single_inode+0x59/0x700
[134243.841006] writeback_sb_inodes+0x267/0x5f0
[134243.841548] __writeback_inodes_wb+0x87/0xe0
[134243.842091] wb_writeback+0x382/0x590
[134243.842574] ? wb_workfn+0x4a2/0x6c0
[134243.843030] wb_workfn+0x4a2/0x6c0
[134243.843468] process_one_work+0x26d/0x6a0
[134243.843978] worker_thread+0x4f/0x3e0
[134243.844452] ? process_one_work+0x6a0/0x6a0
[134243.844981] kthread+0x103/0x140
[134243.845400] ? kthread_create_worker_on_cpu+0x70/0x70
[134243.846030] ret_from_fork+0x3a/0x50
[134243.846494] irq event stamp: 0
[134243.846892] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134243.847682] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.848687] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.849913] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134243.850698] ---[ end trace bd7c03622e0b0a96 ]---
[134243.851335] ------------[ cut here ]------------
When relocating a data block group, for each extent allocated in the
block group we preallocate another extent with the same size for the
data relocation inode (we do it at prealloc_file_extent_cluster()).
We reserve space by calling btrfs_check_data_free_space(), which ends
up incrementing the data space_info's bytes_may_use counter, and
then call btrfs_prealloc_file_range() to allocate the extent, which
always decrements the bytes_may_use counter by the same amount.
The expectation is that writeback of the data relocation inode always
follows a NOCOW path, by writing into the preallocated extents. However,
when starting writeback we might end up falling back into the COW path,
because the block group that contains the preallocated extent was turned
into RO mode by a scrub running in parallel. The COW path then calls the
extent allocator which ends up calling btrfs_add_reserved_bytes(), and
this function decrements the bytes_may_use counter of the data space_info
object by an amount corresponding to the size of the allocated extent,
despite we haven't previously incremented it. When the counter currently
has a value smaller then the allocated extent we reset the counter to 0
and emit a warning, otherwise we just decrement it and slowly mess up
with this counter which is crucial for space reservation, the end result
can be granting reserved space to tasks when there isn't really enough
free space, and having the tasks fail later in critical places where
error handling consists of a transaction abort or hitting a BUG_ON().
Fix this by making sure that if we fallback to the COW path for a data
relocation inode, we increment the bytes_may_use counter of the data
space_info object. The COW path will then decrement it at
btrfs_add_reserved_bytes() on success or through its error handling part
by a call to extent_clear_unlock_delalloc() (which ends up calling
btrfs_clear_delalloc_extent() that does the decrement operation) in case
of an error.
Test case btrfs/061 from fstests could sporadically trigger this.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 62c3f4972ff6..62b49d2db928 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1378,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1408,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6bd335b469f945f75474c11e3f577f85409f39c3 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:33:05 +0100
Subject: [PATCH] btrfs: fix bytes_may_use underflow when running balance and
scrub in parallel
When balance and scrub are running in parallel it is possible to end up
with an underflow of the bytes_may_use counter of the data space_info
object, which triggers a warning like the following:
[134243.793196] BTRFS info (device sdc): relocating block group 1104150528 flags data
[134243.806891] ------------[ cut here ]------------
[134243.807561] WARNING: CPU: 1 PID: 26884 at fs/btrfs/space-info.h:125 btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.808819] Modules linked in: btrfs blake2b_generic xor (...)
[134243.815779] CPU: 1 PID: 26884 Comm: kworker/u8:8 Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134243.816944] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134243.818389] Workqueue: writeback wb_workfn (flush-btrfs-108483)
[134243.819186] RIP: 0010:btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.819963] Code: 0b f2 85 (...)
[134243.822271] RSP: 0018:ffffa4160aae7510 EFLAGS: 00010287
[134243.822929] RAX: 000000000000c000 RBX: ffff96159a8c1000 RCX: 0000000000000000
[134243.823816] RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffff96158067a810
[134243.824742] RBP: ffff96158067a800 R08: 0000000000000001 R09: 0000000000000000
[134243.825636] R10: ffff961501432a40 R11: 0000000000000000 R12: 000000000000c000
[134243.826532] R13: 0000000000000001 R14: ffffffffffff4000 R15: ffff96158067a810
[134243.827432] FS: 0000000000000000(0000) GS:ffff9615baa00000(0000) knlGS:0000000000000000
[134243.828451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134243.829184] CR2: 000055bd7e414000 CR3: 00000001077be004 CR4: 00000000003606e0
[134243.830083] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134243.830975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134243.831867] Call Trace:
[134243.832211] find_free_extent+0x4a0/0x16c0 [btrfs]
[134243.832846] btrfs_reserve_extent+0x91/0x180 [btrfs]
[134243.833487] cow_file_range+0x12d/0x490 [btrfs]
[134243.834080] fallback_to_cow+0x82/0x1b0 [btrfs]
[134243.834689] ? release_extent_buffer+0x121/0x170 [btrfs]
[134243.835370] run_delalloc_nocow+0x33f/0xa30 [btrfs]
[134243.836032] btrfs_run_delalloc_range+0x1ea/0x6d0 [btrfs]
[134243.836725] ? find_lock_delalloc_range+0x221/0x250 [btrfs]
[134243.837450] writepage_delalloc+0xe8/0x150 [btrfs]
[134243.838059] __extent_writepage+0xe8/0x4c0 [btrfs]
[134243.838674] extent_write_cache_pages+0x237/0x530 [btrfs]
[134243.839364] extent_writepages+0x44/0xa0 [btrfs]
[134243.839946] do_writepages+0x23/0x80
[134243.840401] __writeback_single_inode+0x59/0x700
[134243.841006] writeback_sb_inodes+0x267/0x5f0
[134243.841548] __writeback_inodes_wb+0x87/0xe0
[134243.842091] wb_writeback+0x382/0x590
[134243.842574] ? wb_workfn+0x4a2/0x6c0
[134243.843030] wb_workfn+0x4a2/0x6c0
[134243.843468] process_one_work+0x26d/0x6a0
[134243.843978] worker_thread+0x4f/0x3e0
[134243.844452] ? process_one_work+0x6a0/0x6a0
[134243.844981] kthread+0x103/0x140
[134243.845400] ? kthread_create_worker_on_cpu+0x70/0x70
[134243.846030] ret_from_fork+0x3a/0x50
[134243.846494] irq event stamp: 0
[134243.846892] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134243.847682] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.848687] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.849913] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134243.850698] ---[ end trace bd7c03622e0b0a96 ]---
[134243.851335] ------------[ cut here ]------------
When relocating a data block group, for each extent allocated in the
block group we preallocate another extent with the same size for the
data relocation inode (we do it at prealloc_file_extent_cluster()).
We reserve space by calling btrfs_check_data_free_space(), which ends
up incrementing the data space_info's bytes_may_use counter, and
then call btrfs_prealloc_file_range() to allocate the extent, which
always decrements the bytes_may_use counter by the same amount.
The expectation is that writeback of the data relocation inode always
follows a NOCOW path, by writing into the preallocated extents. However,
when starting writeback we might end up falling back into the COW path,
because the block group that contains the preallocated extent was turned
into RO mode by a scrub running in parallel. The COW path then calls the
extent allocator which ends up calling btrfs_add_reserved_bytes(), and
this function decrements the bytes_may_use counter of the data space_info
object by an amount corresponding to the size of the allocated extent,
despite we haven't previously incremented it. When the counter currently
has a value smaller then the allocated extent we reset the counter to 0
and emit a warning, otherwise we just decrement it and slowly mess up
with this counter which is crucial for space reservation, the end result
can be granting reserved space to tasks when there isn't really enough
free space, and having the tasks fail later in critical places where
error handling consists of a transaction abort or hitting a BUG_ON().
Fix this by making sure that if we fallback to the COW path for a data
relocation inode, we increment the bytes_may_use counter of the data
space_info object. The COW path will then decrement it at
btrfs_add_reserved_bytes() on success or through its error handling part
by a call to extent_clear_unlock_delalloc() (which ends up calling
btrfs_clear_delalloc_extent() that does the decrement operation) in case
of an error.
Test case btrfs/061 from fstests could sporadically trigger this.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 62c3f4972ff6..62b49d2db928 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1378,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1408,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6bd335b469f945f75474c11e3f577f85409f39c3 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 8 Jun 2020 13:33:05 +0100
Subject: [PATCH] btrfs: fix bytes_may_use underflow when running balance and
scrub in parallel
When balance and scrub are running in parallel it is possible to end up
with an underflow of the bytes_may_use counter of the data space_info
object, which triggers a warning like the following:
[134243.793196] BTRFS info (device sdc): relocating block group 1104150528 flags data
[134243.806891] ------------[ cut here ]------------
[134243.807561] WARNING: CPU: 1 PID: 26884 at fs/btrfs/space-info.h:125 btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.808819] Modules linked in: btrfs blake2b_generic xor (...)
[134243.815779] CPU: 1 PID: 26884 Comm: kworker/u8:8 Tainted: G W 5.6.0-rc7-btrfs-next-58 #5
[134243.816944] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[134243.818389] Workqueue: writeback wb_workfn (flush-btrfs-108483)
[134243.819186] RIP: 0010:btrfs_add_reserved_bytes+0x1da/0x280 [btrfs]
[134243.819963] Code: 0b f2 85 (...)
[134243.822271] RSP: 0018:ffffa4160aae7510 EFLAGS: 00010287
[134243.822929] RAX: 000000000000c000 RBX: ffff96159a8c1000 RCX: 0000000000000000
[134243.823816] RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffff96158067a810
[134243.824742] RBP: ffff96158067a800 R08: 0000000000000001 R09: 0000000000000000
[134243.825636] R10: ffff961501432a40 R11: 0000000000000000 R12: 000000000000c000
[134243.826532] R13: 0000000000000001 R14: ffffffffffff4000 R15: ffff96158067a810
[134243.827432] FS: 0000000000000000(0000) GS:ffff9615baa00000(0000) knlGS:0000000000000000
[134243.828451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[134243.829184] CR2: 000055bd7e414000 CR3: 00000001077be004 CR4: 00000000003606e0
[134243.830083] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[134243.830975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[134243.831867] Call Trace:
[134243.832211] find_free_extent+0x4a0/0x16c0 [btrfs]
[134243.832846] btrfs_reserve_extent+0x91/0x180 [btrfs]
[134243.833487] cow_file_range+0x12d/0x490 [btrfs]
[134243.834080] fallback_to_cow+0x82/0x1b0 [btrfs]
[134243.834689] ? release_extent_buffer+0x121/0x170 [btrfs]
[134243.835370] run_delalloc_nocow+0x33f/0xa30 [btrfs]
[134243.836032] btrfs_run_delalloc_range+0x1ea/0x6d0 [btrfs]
[134243.836725] ? find_lock_delalloc_range+0x221/0x250 [btrfs]
[134243.837450] writepage_delalloc+0xe8/0x150 [btrfs]
[134243.838059] __extent_writepage+0xe8/0x4c0 [btrfs]
[134243.838674] extent_write_cache_pages+0x237/0x530 [btrfs]
[134243.839364] extent_writepages+0x44/0xa0 [btrfs]
[134243.839946] do_writepages+0x23/0x80
[134243.840401] __writeback_single_inode+0x59/0x700
[134243.841006] writeback_sb_inodes+0x267/0x5f0
[134243.841548] __writeback_inodes_wb+0x87/0xe0
[134243.842091] wb_writeback+0x382/0x590
[134243.842574] ? wb_workfn+0x4a2/0x6c0
[134243.843030] wb_workfn+0x4a2/0x6c0
[134243.843468] process_one_work+0x26d/0x6a0
[134243.843978] worker_thread+0x4f/0x3e0
[134243.844452] ? process_one_work+0x6a0/0x6a0
[134243.844981] kthread+0x103/0x140
[134243.845400] ? kthread_create_worker_on_cpu+0x70/0x70
[134243.846030] ret_from_fork+0x3a/0x50
[134243.846494] irq event stamp: 0
[134243.846892] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[134243.847682] hardirqs last disabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.848687] softirqs last enabled at (0): [<ffffffffb2abdedf>] copy_process+0x74f/0x2020
[134243.849913] softirqs last disabled at (0): [<0000000000000000>] 0x0
[134243.850698] ---[ end trace bd7c03622e0b0a96 ]---
[134243.851335] ------------[ cut here ]------------
When relocating a data block group, for each extent allocated in the
block group we preallocate another extent with the same size for the
data relocation inode (we do it at prealloc_file_extent_cluster()).
We reserve space by calling btrfs_check_data_free_space(), which ends
up incrementing the data space_info's bytes_may_use counter, and
then call btrfs_prealloc_file_range() to allocate the extent, which
always decrements the bytes_may_use counter by the same amount.
The expectation is that writeback of the data relocation inode always
follows a NOCOW path, by writing into the preallocated extents. However,
when starting writeback we might end up falling back into the COW path,
because the block group that contains the preallocated extent was turned
into RO mode by a scrub running in parallel. The COW path then calls the
extent allocator which ends up calling btrfs_add_reserved_bytes(), and
this function decrements the bytes_may_use counter of the data space_info
object by an amount corresponding to the size of the allocated extent,
despite we haven't previously incremented it. When the counter currently
has a value smaller then the allocated extent we reset the counter to 0
and emit a warning, otherwise we just decrement it and slowly mess up
with this counter which is crucial for space reservation, the end result
can be granting reserved space to tasks when there isn't really enough
free space, and having the tasks fail later in critical places where
error handling consists of a transaction abort or hitting a BUG_ON().
Fix this by making sure that if we fallback to the COW path for a data
relocation inode, we increment the bytes_may_use counter of the data
space_info object. The COW path will then decrement it at
btrfs_add_reserved_bytes() on success or through its error handling part
by a call to extent_clear_unlock_delalloc() (which ends up calling
btrfs_clear_delalloc_extent() that does the decrement operation) in case
of an error.
Test case btrfs/061 from fstests could sporadically trigger this.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 62c3f4972ff6..62b49d2db928 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1378,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1408,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9fecd13202f520f3f25d5b1c313adb740fe19773 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:06 +0100
Subject: [PATCH] btrfs: fix a block group ref counter leak after failure to
remove block group
When removing a block group, if we fail to delete the block group's item
from the extent tree, we jump to the 'out' label and end up decrementing
the block group's reference count once only (by 1), resulting in a counter
leak because the block group at that point was already removed from the
block group cache rbtree - so we have to decrement the reference count
twice, once for the rbtree and once for our lookup at the start of the
function.
There is a second bug where if removing the free space tree entries (the
call to remove_block_group_free_space()) fails we end up jumping to the
'out_put_group' label but end up decrementing the reference count only
once, when we should have done it twice, since we have already removed
the block group from the block group cache rbtree. This happens because
the reference count decrement for the rbtree reference happens after
attempting to remove the free space tree entries, which is far away from
the place where we remove the block group from the rbtree.
To make things less error prone, decrement the reference count for the
rbtree immediately after removing the block group from it. This also
eleminates the need for two different exit labels on error, renaming
'out_put_label' to just 'out' and removing the old 'out'.
Fixes: f6033c5e333238 ("btrfs: fix block group leak when removing fails")
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 176e8a292fd1..6462dd0b155c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1125,10 +1128,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = remove_block_group_free_space(trans, block_group);
if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ goto out;
ret = remove_block_group_item(trans, path, block_group);
if (ret < 0)
@@ -1145,10 +1145,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_extent_map(em);
}
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9fecd13202f520f3f25d5b1c313adb740fe19773 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:06 +0100
Subject: [PATCH] btrfs: fix a block group ref counter leak after failure to
remove block group
When removing a block group, if we fail to delete the block group's item
from the extent tree, we jump to the 'out' label and end up decrementing
the block group's reference count once only (by 1), resulting in a counter
leak because the block group at that point was already removed from the
block group cache rbtree - so we have to decrement the reference count
twice, once for the rbtree and once for our lookup at the start of the
function.
There is a second bug where if removing the free space tree entries (the
call to remove_block_group_free_space()) fails we end up jumping to the
'out_put_group' label but end up decrementing the reference count only
once, when we should have done it twice, since we have already removed
the block group from the block group cache rbtree. This happens because
the reference count decrement for the rbtree reference happens after
attempting to remove the free space tree entries, which is far away from
the place where we remove the block group from the rbtree.
To make things less error prone, decrement the reference count for the
rbtree immediately after removing the block group from it. This also
eleminates the need for two different exit labels on error, renaming
'out_put_label' to just 'out' and removing the old 'out'.
Fixes: f6033c5e333238 ("btrfs: fix block group leak when removing fails")
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 176e8a292fd1..6462dd0b155c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1125,10 +1128,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = remove_block_group_free_space(trans, block_group);
if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ goto out;
ret = remove_block_group_item(trans, path, block_group);
if (ret < 0)
@@ -1145,10 +1145,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_extent_map(em);
}
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9fecd13202f520f3f25d5b1c313adb740fe19773 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 1 Jun 2020 19:12:06 +0100
Subject: [PATCH] btrfs: fix a block group ref counter leak after failure to
remove block group
When removing a block group, if we fail to delete the block group's item
from the extent tree, we jump to the 'out' label and end up decrementing
the block group's reference count once only (by 1), resulting in a counter
leak because the block group at that point was already removed from the
block group cache rbtree - so we have to decrement the reference count
twice, once for the rbtree and once for our lookup at the start of the
function.
There is a second bug where if removing the free space tree entries (the
call to remove_block_group_free_space()) fails we end up jumping to the
'out_put_group' label but end up decrementing the reference count only
once, when we should have done it twice, since we have already removed
the block group from the block group cache rbtree. This happens because
the reference count decrement for the rbtree reference happens after
attempting to remove the free space tree entries, which is far away from
the place where we remove the block group from the rbtree.
To make things less error prone, decrement the reference count for the
rbtree immediately after removing the block group from it. This also
eleminates the need for two different exit labels on error, renaming
'out_put_label' to just 'out' and removing the old 'out'.
Fixes: f6033c5e333238 ("btrfs: fix block group leak when removing fails")
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 176e8a292fd1..6462dd0b155c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1125,10 +1128,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = remove_block_group_free_space(trans, block_group);
if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ goto out;
ret = remove_block_group_item(trans, path, block_group);
if (ret < 0)
@@ -1145,10 +1145,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_extent_map(em);
}
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a13b9d0b97211579ea63b96c606de79b963c0f47 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook(a)chromium.org>
Date: Mon, 8 Jun 2020 20:15:09 -0700
Subject: [PATCH] x86/cpu: Use pinning mask for CR4 bits needing to be 0
The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older
kernels should enforce this bit to zero, and newer kernels need to
enforce it depending on boot-time configuration (e.g. "nofsgsbase").
To support a pinned bit being either 1 or 0, use an explicit mask in
combination with the expected pinned bit values.
[1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kick…
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 043d93cdcaad..95c090a45b4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -347,6 +347,9 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_clear_bits(X86_CR4_UMIP);
}
+/* These bits should not change their value after CPU init is finished. */
+static const unsigned long cr4_pinned_mask =
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
{
- unsigned long bits_missing = 0;
+ unsigned long bits_changed = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
if (static_branch_likely(&cr_pinning)) {
- if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
- bits_missing = ~val & cr4_pinned_bits;
- val |= bits_missing;
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
- /* Warn after we've set the missing bits. */
- WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
+ /* Warn after we've corrected the changed bits. */
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
+ bits_changed);
}
}
#if IS_MODULE(CONFIG_LKDTM)
@@ -419,7 +422,7 @@ void cr4_init(void)
if (boot_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
- cr4 |= cr4_pinned_bits;
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
__write_cr4(cr4);
@@ -434,10 +437,7 @@ void cr4_init(void)
*/
static void __init setup_cr_pinning(void)
{
- unsigned long mask;
-
- mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
- cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
static_key_enable(&cr_pinning.key);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a13b9d0b97211579ea63b96c606de79b963c0f47 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook(a)chromium.org>
Date: Mon, 8 Jun 2020 20:15:09 -0700
Subject: [PATCH] x86/cpu: Use pinning mask for CR4 bits needing to be 0
The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older
kernels should enforce this bit to zero, and newer kernels need to
enforce it depending on boot-time configuration (e.g. "nofsgsbase").
To support a pinned bit being either 1 or 0, use an explicit mask in
combination with the expected pinned bit values.
[1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kick…
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 043d93cdcaad..95c090a45b4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -347,6 +347,9 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_clear_bits(X86_CR4_UMIP);
}
+/* These bits should not change their value after CPU init is finished. */
+static const unsigned long cr4_pinned_mask =
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
{
- unsigned long bits_missing = 0;
+ unsigned long bits_changed = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
if (static_branch_likely(&cr_pinning)) {
- if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
- bits_missing = ~val & cr4_pinned_bits;
- val |= bits_missing;
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
- /* Warn after we've set the missing bits. */
- WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
+ /* Warn after we've corrected the changed bits. */
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
+ bits_changed);
}
}
#if IS_MODULE(CONFIG_LKDTM)
@@ -419,7 +422,7 @@ void cr4_init(void)
if (boot_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
- cr4 |= cr4_pinned_bits;
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
__write_cr4(cr4);
@@ -434,10 +437,7 @@ void cr4_init(void)
*/
static void __init setup_cr_pinning(void)
{
- unsigned long mask;
-
- mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
- cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
static_key_enable(&cr_pinning.key);
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a13b9d0b97211579ea63b96c606de79b963c0f47 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook(a)chromium.org>
Date: Mon, 8 Jun 2020 20:15:09 -0700
Subject: [PATCH] x86/cpu: Use pinning mask for CR4 bits needing to be 0
The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older
kernels should enforce this bit to zero, and newer kernels need to
enforce it depending on boot-time configuration (e.g. "nofsgsbase").
To support a pinned bit being either 1 or 0, use an explicit mask in
combination with the expected pinned bit values.
[1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kick…
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 043d93cdcaad..95c090a45b4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -347,6 +347,9 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_clear_bits(X86_CR4_UMIP);
}
+/* These bits should not change their value after CPU init is finished. */
+static const unsigned long cr4_pinned_mask =
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
{
- unsigned long bits_missing = 0;
+ unsigned long bits_changed = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
if (static_branch_likely(&cr_pinning)) {
- if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
- bits_missing = ~val & cr4_pinned_bits;
- val |= bits_missing;
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
- /* Warn after we've set the missing bits. */
- WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
+ /* Warn after we've corrected the changed bits. */
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
+ bits_changed);
}
}
#if IS_MODULE(CONFIG_LKDTM)
@@ -419,7 +422,7 @@ void cr4_init(void)
if (boot_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
- cr4 |= cr4_pinned_bits;
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
__write_cr4(cr4);
@@ -434,10 +437,7 @@ void cr4_init(void)
*/
static void __init setup_cr_pinning(void)
{
- unsigned long mask;
-
- mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
- cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
static_key_enable(&cr_pinning.key);
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a13b9d0b97211579ea63b96c606de79b963c0f47 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook(a)chromium.org>
Date: Mon, 8 Jun 2020 20:15:09 -0700
Subject: [PATCH] x86/cpu: Use pinning mask for CR4 bits needing to be 0
The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older
kernels should enforce this bit to zero, and newer kernels need to
enforce it depending on boot-time configuration (e.g. "nofsgsbase").
To support a pinned bit being either 1 or 0, use an explicit mask in
combination with the expected pinned bit values.
[1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kick…
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 043d93cdcaad..95c090a45b4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -347,6 +347,9 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_clear_bits(X86_CR4_UMIP);
}
+/* These bits should not change their value after CPU init is finished. */
+static const unsigned long cr4_pinned_mask =
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
{
- unsigned long bits_missing = 0;
+ unsigned long bits_changed = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
if (static_branch_likely(&cr_pinning)) {
- if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
- bits_missing = ~val & cr4_pinned_bits;
- val |= bits_missing;
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
- /* Warn after we've set the missing bits. */
- WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
+ /* Warn after we've corrected the changed bits. */
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
+ bits_changed);
}
}
#if IS_MODULE(CONFIG_LKDTM)
@@ -419,7 +422,7 @@ void cr4_init(void)
if (boot_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
- cr4 |= cr4_pinned_bits;
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
__write_cr4(cr4);
@@ -434,10 +437,7 @@ void cr4_init(void)
*/
static void __init setup_cr_pinning(void)
{
- unsigned long mask;
-
- mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
- cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
static_key_enable(&cr_pinning.key);
}
From: Gao Xiang <hsiangkao(a)redhat.com>
commit 3c597282887fd55181578996dca52ce697d985a5 upstream.
Hongyu reported "id != index" in z_erofs_onlinepage_fixup() with
specific aarch64 environment easily, which wasn't shown before.
After digging into that, I found that high 32 bits of page->private
was set to 0xaaaaaaaa rather than 0 (due to z_erofs_onlinepage_init
behavior with specific compiler options). Actually we only use low
32 bits to keep the page information since page->private is only 4
bytes on most 32-bit platforms. However z_erofs_onlinepage_fixup()
uses the upper 32 bits by mistake.
Let's fix it now.
Reported-and-tested-by: Hongyu Jin <hongyu.jin(a)unisoc.com>
Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support")
Cc: <stable(a)vger.kernel.org> # 4.19+
Reviewed-by: Chao Yu <yuchao0(a)huawei.com>
Link: https://lore.kernel.org/r/20200618234349.22553-1-hsiangkao@aol.com
Signed-off-by: Gao Xiang <hsiangkao(a)redhat.com>
---
This fix has been merged into Linus's tree just now (today).
Since the patch could not directly be applied to 4.19, manually handle this.
drivers/staging/erofs/unzip_vle.h | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index 684ff06fc7bf..630fd1f4f123 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -169,22 +169,22 @@ static inline void z_erofs_onlinepage_init(struct page *page)
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
{
- unsigned long *p, o, v, id;
-repeat:
- p = &page_private(page);
- o = READ_ONCE(*p);
+ union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
+ int orig, orig_index, val;
- id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
- if (id) {
+repeat:
+ orig = atomic_read(u.o);
+ orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+ if (orig_index) {
if (!index)
return;
- BUG_ON(id != index);
+ DBG_BUGON(orig_index != index);
}
- v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
- ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned)down);
- if (cmpxchg(p, o, v) != o)
+ val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+ ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
+ if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
}
--
2.24.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: c291ca510bff - loop: replace kill_bdev with invalidate_bdev
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 💥 jvm - DaCapo Benchmark Suite
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
Host 4:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
From: Alexander Usyskin <alexander.usyskin(a)intel.com>
It's not needed to set driver to NULL in mei_cl_device_remove()
which is bus_type remove() handler as this is done anyway
in __device_release_driver().
Actually this is causing an endless loop in driver_detach()
on ubuntu patched kernel, while removing (rmmod) the mei_hdcp module.
The reason list_empty(&drv->p->klist_devices.k_list) is always not-empty.
as the check is always true in __device_release_driver()
if (dev->driver != drv)
return;
The non upstream patch is causing this behavior, titled:
'vfio -- release device lock before userspace requests'
Nevertheless the fix is correct also for the upstream.
Link: https://patchwork.ozlabs.org/project/ubuntu-kernel/patch/20180912085046.340…
Cc: <stable(a)vger.kernel.org>
Cc: Andy Whitcroft <apw(a)canonical.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin(a)intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
---
drivers/misc/mei/bus.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 8d468e0a950a..f476dbc7252b 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -745,9 +745,8 @@ static int mei_cl_device_remove(struct device *dev)
mei_cl_bus_module_put(cldev);
module_put(THIS_MODULE);
- dev->driver = NULL;
- return ret;
+ return ret;
}
static ssize_t name_show(struct device *dev, struct device_attribute *a,
--
2.25.4
If system memory is migrated to device private memory and no GPU MMU
page table entry exists, the GPU will fault and call hmm_range_fault()
to get the PFN for the page. Since the .dev_private_owner pointer in
struct hmm_range is not set, hmm_range_fault returns an error which
results in the GPU program stopping with a fatal fault.
Fix this by setting .dev_private_owner appropriately.
Fixes: 08ddddda667b ("mm/hmm: check the device private page owner in hmm_range_fault()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg(a)mellanox.com>
---
This is based on Linux-5.8.0-rc2 and is for Ben Skeggs nouveau tree.
It doesn't depend on any of the other nouveau/HMM changes I have
recently posted.
Resending to include stable(a)vger.org and adding Jason's reviewed-by.
drivers/gpu/drm/nouveau/nouveau_svm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index ba9f9359c30e..6586d9d39874 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -562,6 +562,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
.end = notifier->notifier.interval_tree.last + 1,
.pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
.hmm_pfns = hmm_pfns,
+ .dev_private_owner = drm->dev,
};
struct mm_struct *mm = notifier->notifier.mm;
int ret;
--
2.20.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6b69040247e14b43419a520f841f2b3052833df9 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:54 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when zero file range
CIFS implements the fallocate(FALLOC_FL_ZERO_RANGE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page cache not update, then the data
inconsistent with server, which leads the xfstest generic/008 failed.
So we need to remove the local page caches before send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. After next read, it will
re-cache it.
Fixes: 30175628bf7f5 ("[SMB3] Enable fallocate -z support for SMB3 mounts")
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 28553d45604e..876a0d9e3d46 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3188,6 +3188,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From acc91c2d8de4ef46ed751c5f9df99ed9a109b100 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:53 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when punch hole
When punch hole success, we also can read old data from file:
# strace -e trace=pread64,fallocate xfs_io -f -c "pread 20 40" \
-c "fpunch 20 40" -c"pread 20 40" file
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
fallocate(3, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 20, 40) = 0
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
CIFS implements the fallocate(FALLOCATE_FL_PUNCH_HOLE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page caches not updated, then the
local page caches inconsistent with server.
Also can be found by xfstests generic/316.
So, we need to remove the page caches before send the SMB
ioctl(FSCTL_SET_ZERO_DATA) to server.
Fixes: 31742c5a33176 ("enable fallocate punch hole ("fallocate -p") for SMB3")
Suggested-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 876a0d9e3d46..d9fdafa5eb60 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3259,6 +3259,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b3d71abd135e6919ca0b6cab463738472653ddfb Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Wed, 24 Jun 2020 16:59:49 +0300
Subject: [PATCH] xhci: Poll for U0 after disabling USB2 LPM
USB2 devices with LPM enabled may interrupt the system suspend:
[ 932.510475] usb 1-7: usb suspend, wakeup 0
[ 932.510549] hub 1-0:1.0: hub_suspend
[ 932.510581] usb usb1: bus suspend, wakeup 0
[ 932.510590] xhci_hcd 0000:00:14.0: port 9 not suspended
[ 932.510593] xhci_hcd 0000:00:14.0: port 8 not suspended
..
[ 932.520323] xhci_hcd 0000:00:14.0: Port change event, 1-7, id 7, portsc: 0x400e03
..
[ 932.591405] PM: pci_pm_suspend(): hcd_pci_suspend+0x0/0x30 returns -16
[ 932.591414] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -16
[ 932.591418] PM: Device 0000:00:14.0 failed to suspend async: error -16
During system suspend, USB core will let HC suspends the device if it
doesn't have remote wakeup enabled and doesn't have any children.
However, from the log above we can see that the usb 1-7 doesn't get bus
suspended due to not in U0. After a while the port finished U2 -> U0
transition, interrupts the suspend process.
The observation is that after disabling LPM, port doesn't transit to U0
immediately and can linger in U2. xHCI spec 4.23.5.2 states that the
maximum exit latency for USB2 LPM should be BESL + 10us. The BESL for
the affected device is advertised as 400us, which is still not enough
based on my testing result.
So let's use the maximum permitted latency, 10000, to poll for U0
status to solve the issue.
Cc: stable(a)vger.kernel.org
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200624135949.22611-6-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f97106e2860f..ed468eed299c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4475,6 +4475,9 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
mutex_lock(hcd->bandwidth_mutex);
xhci_change_max_exit_latency(xhci, udev, 0);
mutex_unlock(hcd->bandwidth_mutex);
+ readl_poll_timeout(ports[port_num]->addr, pm_val,
+ (pm_val & PORT_PLS_MASK) == XDEV_U0,
+ 100, 10000);
return 0;
}
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 220345e98f1cdc768eeb6e3364a0fa7ab9647fe7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai(a)suse.de>
Date: Wed, 24 Jun 2020 14:23:40 +0200
Subject: [PATCH] ALSA: usb-audio: Fix OOB access of mixer element list
The USB-audio mixer code holds a linked list of usb_mixer_elem_list,
and several operations are performed for each mixer element. A few of
them (snd_usb_mixer_notify_id() and snd_usb_mixer_interrupt_v2())
assume each mixer element being a usb_mixer_elem_info object that is a
subclass of usb_mixer_elem_list, cast via container_of() and access it
members. This may result in an out-of-bound access when a
non-standard list element has been added, as spotted by syzkaller
recently.
This patch adds a new field, is_std_info, in usb_mixer_elem_list to
indicate that the element is the usb_mixer_elem_info type or not, and
skip the access to such an element if needed.
Reported-by: syzbot+fb14314433463ad51625(a)syzkaller.appspotmail.com
Reported-by: syzbot+2405ca3401e943c538b5(a)syzkaller.appspotmail.com
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200624122340.9615-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 15769f266790..eab0fd4fd7c3 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -581,8 +581,9 @@ static int check_matrix_bitmap(unsigned char *bmap,
* if failed, give up and free the control instance.
*/
-int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
- struct snd_kcontrol *kctl)
+int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
+ struct snd_kcontrol *kctl,
+ bool is_std_info)
{
struct usb_mixer_interface *mixer = list->mixer;
int err;
@@ -596,6 +597,7 @@ int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
return err;
}
list->kctl = kctl;
+ list->is_std_info = is_std_info;
list->next_id_elem = mixer->id_elems[list->id];
mixer->id_elems[list->id] = list;
return 0;
@@ -3234,8 +3236,11 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid)
unitid = delegate_notify(mixer, unitid, NULL, NULL);
for_each_mixer_elem(list, mixer, unitid) {
- struct usb_mixer_elem_info *info =
- mixer_elem_list_to_info(list);
+ struct usb_mixer_elem_info *info;
+
+ if (!list->is_std_info)
+ continue;
+ info = mixer_elem_list_to_info(list);
/* invalidate cache, so the value is read from the device */
info->cached = 0;
snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
@@ -3315,6 +3320,8 @@ static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer,
if (!list->kctl)
continue;
+ if (!list->is_std_info)
+ continue;
info = mixer_elem_list_to_info(list);
if (count > 1 && info->control != control)
diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h
index 41ec9dc4139b..c29e27ac43a7 100644
--- a/sound/usb/mixer.h
+++ b/sound/usb/mixer.h
@@ -66,6 +66,7 @@ struct usb_mixer_elem_list {
struct usb_mixer_elem_list *next_id_elem; /* list of controls with same id */
struct snd_kcontrol *kctl;
unsigned int id;
+ bool is_std_info;
usb_mixer_elem_dump_func_t dump;
usb_mixer_elem_resume_func_t resume;
};
@@ -103,8 +104,12 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid);
int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
int request, int validx, int value_set);
-int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
- struct snd_kcontrol *kctl);
+int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
+ struct snd_kcontrol *kctl,
+ bool is_std_info);
+
+#define snd_usb_mixer_add_control(list, kctl) \
+ snd_usb_mixer_add_list(list, kctl, true)
void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list,
struct usb_mixer_interface *mixer,
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index b6bcf2f92383..cec1cfd7edb7 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -158,7 +158,8 @@ static int add_single_ctl_with_resume(struct usb_mixer_interface *mixer,
return -ENOMEM;
}
kctl->private_free = snd_usb_mixer_elem_free;
- return snd_usb_mixer_add_control(list, kctl);
+ /* don't use snd_usb_mixer_add_control() here, this is a special list element */
+ return snd_usb_mixer_add_list(list, kctl, false);
}
/*
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 8dfcd4110882 - btrfs: fix a block group ref counter leak after failure to remove block group
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
x86_64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: efc8aa58c75c - btrfs: fix a block group ref counter leak after failure to remove block group
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 4:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: c93288eb14eb - btrfs: fix a block group ref counter leak after failure to remove block group
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
s390x:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
x86_64:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ❌ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 9292f8b0e944 - btrfs: fix a block group ref counter leak after failure to remove block group
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 3:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
x86_64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
The RT5670_PWR_ANLG1 register has 3 bits to select the LDO voltage,
so the correct mask is 0x7 not 0x3.
Because of this wrong mask we were programming the ldo bits
to a setting of binary 001 (0x05 & 0x03) instead of binary 101
when moving to SND_SOC_BIAS_PREPARE.
According to the datasheet 001 is a reserved value, so no idea
what it did, since the driver was working fine before I guess we
got lucky and it does something which is ok.
Fixes: 5e8351de740d ("ASoC: add RT5670 CODEC driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
sound/soc/codecs/rt5670.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h
index a8c3e44770b8..de0203369b7c 100644
--- a/sound/soc/codecs/rt5670.h
+++ b/sound/soc/codecs/rt5670.h
@@ -757,7 +757,7 @@
#define RT5670_PWR_VREF2_BIT 4
#define RT5670_PWR_FV2 (0x1 << 3)
#define RT5670_PWR_FV2_BIT 3
-#define RT5670_LDO_SEL_MASK (0x3)
+#define RT5670_LDO_SEL_MASK (0x7)
#define RT5670_LDO_SEL_SFT 0
/* Power Management for Analog 2 (0x64) */
--
2.26.2
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6b69040247e14b43419a520f841f2b3052833df9 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:54 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when zero file range
CIFS implements the fallocate(FALLOC_FL_ZERO_RANGE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page cache not update, then the data
inconsistent with server, which leads the xfstest generic/008 failed.
So we need to remove the local page caches before send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. After next read, it will
re-cache it.
Fixes: 30175628bf7f5 ("[SMB3] Enable fallocate -z support for SMB3 mounts")
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 28553d45604e..876a0d9e3d46 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3188,6 +3188,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6b69040247e14b43419a520f841f2b3052833df9 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:54 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when zero file range
CIFS implements the fallocate(FALLOC_FL_ZERO_RANGE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page cache not update, then the data
inconsistent with server, which leads the xfstest generic/008 failed.
So we need to remove the local page caches before send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. After next read, it will
re-cache it.
Fixes: 30175628bf7f5 ("[SMB3] Enable fallocate -z support for SMB3 mounts")
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 28553d45604e..876a0d9e3d46 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3188,6 +3188,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6b69040247e14b43419a520f841f2b3052833df9 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:54 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when zero file range
CIFS implements the fallocate(FALLOC_FL_ZERO_RANGE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page cache not update, then the data
inconsistent with server, which leads the xfstest generic/008 failed.
So we need to remove the local page caches before send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. After next read, it will
re-cache it.
Fixes: 30175628bf7f5 ("[SMB3] Enable fallocate -z support for SMB3 mounts")
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 28553d45604e..876a0d9e3d46 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3188,6 +3188,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From acc91c2d8de4ef46ed751c5f9df99ed9a109b100 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:53 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when punch hole
When punch hole success, we also can read old data from file:
# strace -e trace=pread64,fallocate xfs_io -f -c "pread 20 40" \
-c "fpunch 20 40" -c"pread 20 40" file
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
fallocate(3, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 20, 40) = 0
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
CIFS implements the fallocate(FALLOCATE_FL_PUNCH_HOLE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page caches not updated, then the
local page caches inconsistent with server.
Also can be found by xfstests generic/316.
So, we need to remove the page caches before send the SMB
ioctl(FSCTL_SET_ZERO_DATA) to server.
Fixes: 31742c5a33176 ("enable fallocate punch hole ("fallocate -p") for SMB3")
Suggested-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 876a0d9e3d46..d9fdafa5eb60 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3259,6 +3259,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From acc91c2d8de4ef46ed751c5f9df99ed9a109b100 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:53 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when punch hole
When punch hole success, we also can read old data from file:
# strace -e trace=pread64,fallocate xfs_io -f -c "pread 20 40" \
-c "fpunch 20 40" -c"pread 20 40" file
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
fallocate(3, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 20, 40) = 0
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
CIFS implements the fallocate(FALLOCATE_FL_PUNCH_HOLE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page caches not updated, then the
local page caches inconsistent with server.
Also can be found by xfstests generic/316.
So, we need to remove the page caches before send the SMB
ioctl(FSCTL_SET_ZERO_DATA) to server.
Fixes: 31742c5a33176 ("enable fallocate punch hole ("fallocate -p") for SMB3")
Suggested-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 876a0d9e3d46..d9fdafa5eb60 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3259,6 +3259,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From acc91c2d8de4ef46ed751c5f9df99ed9a109b100 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Date: Tue, 23 Jun 2020 07:31:53 -0400
Subject: [PATCH] cifs/smb3: Fix data inconsistent when punch hole
When punch hole success, we also can read old data from file:
# strace -e trace=pread64,fallocate xfs_io -f -c "pread 20 40" \
-c "fpunch 20 40" -c"pread 20 40" file
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
fallocate(3, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 20, 40) = 0
pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40
CIFS implements the fallocate(FALLOCATE_FL_PUNCH_HOLE) with send SMB
ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the
remote file to zero, but local page caches not updated, then the
local page caches inconsistent with server.
Also can be found by xfstests generic/316.
So, we need to remove the page caches before send the SMB
ioctl(FSCTL_SET_ZERO_DATA) to server.
Fixes: 31742c5a33176 ("enable fallocate punch hole ("fallocate -p") for SMB3")
Suggested-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov(a)microsoft.com>
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Cc: stable(a)vger.kernel.org # v3.17
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 876a0d9e3d46..d9fdafa5eb60 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3259,6 +3259,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b3d71abd135e6919ca0b6cab463738472653ddfb Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Wed, 24 Jun 2020 16:59:49 +0300
Subject: [PATCH] xhci: Poll for U0 after disabling USB2 LPM
USB2 devices with LPM enabled may interrupt the system suspend:
[ 932.510475] usb 1-7: usb suspend, wakeup 0
[ 932.510549] hub 1-0:1.0: hub_suspend
[ 932.510581] usb usb1: bus suspend, wakeup 0
[ 932.510590] xhci_hcd 0000:00:14.0: port 9 not suspended
[ 932.510593] xhci_hcd 0000:00:14.0: port 8 not suspended
..
[ 932.520323] xhci_hcd 0000:00:14.0: Port change event, 1-7, id 7, portsc: 0x400e03
..
[ 932.591405] PM: pci_pm_suspend(): hcd_pci_suspend+0x0/0x30 returns -16
[ 932.591414] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -16
[ 932.591418] PM: Device 0000:00:14.0 failed to suspend async: error -16
During system suspend, USB core will let HC suspends the device if it
doesn't have remote wakeup enabled and doesn't have any children.
However, from the log above we can see that the usb 1-7 doesn't get bus
suspended due to not in U0. After a while the port finished U2 -> U0
transition, interrupts the suspend process.
The observation is that after disabling LPM, port doesn't transit to U0
immediately and can linger in U2. xHCI spec 4.23.5.2 states that the
maximum exit latency for USB2 LPM should be BESL + 10us. The BESL for
the affected device is advertised as 400us, which is still not enough
based on my testing result.
So let's use the maximum permitted latency, 10000, to poll for U0
status to solve the issue.
Cc: stable(a)vger.kernel.org
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200624135949.22611-6-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f97106e2860f..ed468eed299c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4475,6 +4475,9 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
mutex_lock(hcd->bandwidth_mutex);
xhci_change_max_exit_latency(xhci, udev, 0);
mutex_unlock(hcd->bandwidth_mutex);
+ readl_poll_timeout(ports[port_num]->addr, pm_val,
+ (pm_val & PORT_PLS_MASK) == XDEV_U0,
+ 100, 10000);
return 0;
}
}
The RT5670_PWR_ANLG1 register has 3 bits to select the LDO voltage,
so the correct mask is 0x7 not 0x3.
Because of this wrong mask we were programming the ldo bits
to a setting of binary 001 (0x05 & 0x03) instead of binary 101
when moving to SND_SOC_BIAS_PREPARE.
According to the datasheet 001 is a reserved value, so no idea
what it did, since the driver was working fine before I guess we
got lucky and it does something which is ok.
Fixes: 5e8351de740d ("ASoC: add RT5670 CODEC driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
sound/soc/codecs/rt5670.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h
index a8c3e44770b8..de0203369b7c 100644
--- a/sound/soc/codecs/rt5670.h
+++ b/sound/soc/codecs/rt5670.h
@@ -757,7 +757,7 @@
#define RT5670_PWR_VREF2_BIT 4
#define RT5670_PWR_FV2 (0x1 << 3)
#define RT5670_PWR_FV2_BIT 3
-#define RT5670_LDO_SEL_MASK (0x3)
+#define RT5670_LDO_SEL_MASK (0x7)
#define RT5670_LDO_SEL_SFT 0
/* Power Management for Analog 2 (0x64) */
--
2.26.2
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b3d71abd135e6919ca0b6cab463738472653ddfb Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Wed, 24 Jun 2020 16:59:49 +0300
Subject: [PATCH] xhci: Poll for U0 after disabling USB2 LPM
USB2 devices with LPM enabled may interrupt the system suspend:
[ 932.510475] usb 1-7: usb suspend, wakeup 0
[ 932.510549] hub 1-0:1.0: hub_suspend
[ 932.510581] usb usb1: bus suspend, wakeup 0
[ 932.510590] xhci_hcd 0000:00:14.0: port 9 not suspended
[ 932.510593] xhci_hcd 0000:00:14.0: port 8 not suspended
..
[ 932.520323] xhci_hcd 0000:00:14.0: Port change event, 1-7, id 7, portsc: 0x400e03
..
[ 932.591405] PM: pci_pm_suspend(): hcd_pci_suspend+0x0/0x30 returns -16
[ 932.591414] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -16
[ 932.591418] PM: Device 0000:00:14.0 failed to suspend async: error -16
During system suspend, USB core will let HC suspends the device if it
doesn't have remote wakeup enabled and doesn't have any children.
However, from the log above we can see that the usb 1-7 doesn't get bus
suspended due to not in U0. After a while the port finished U2 -> U0
transition, interrupts the suspend process.
The observation is that after disabling LPM, port doesn't transit to U0
immediately and can linger in U2. xHCI spec 4.23.5.2 states that the
maximum exit latency for USB2 LPM should be BESL + 10us. The BESL for
the affected device is advertised as 400us, which is still not enough
based on my testing result.
So let's use the maximum permitted latency, 10000, to poll for U0
status to solve the issue.
Cc: stable(a)vger.kernel.org
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200624135949.22611-6-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f97106e2860f..ed468eed299c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4475,6 +4475,9 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
mutex_lock(hcd->bandwidth_mutex);
xhci_change_max_exit_latency(xhci, udev, 0);
mutex_unlock(hcd->bandwidth_mutex);
+ readl_poll_timeout(ports[port_num]->addr, pm_val,
+ (pm_val & PORT_PLS_MASK) == XDEV_U0,
+ 100, 10000);
return 0;
}
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 220345e98f1cdc768eeb6e3364a0fa7ab9647fe7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai(a)suse.de>
Date: Wed, 24 Jun 2020 14:23:40 +0200
Subject: [PATCH] ALSA: usb-audio: Fix OOB access of mixer element list
The USB-audio mixer code holds a linked list of usb_mixer_elem_list,
and several operations are performed for each mixer element. A few of
them (snd_usb_mixer_notify_id() and snd_usb_mixer_interrupt_v2())
assume each mixer element being a usb_mixer_elem_info object that is a
subclass of usb_mixer_elem_list, cast via container_of() and access it
members. This may result in an out-of-bound access when a
non-standard list element has been added, as spotted by syzkaller
recently.
This patch adds a new field, is_std_info, in usb_mixer_elem_list to
indicate that the element is the usb_mixer_elem_info type or not, and
skip the access to such an element if needed.
Reported-by: syzbot+fb14314433463ad51625(a)syzkaller.appspotmail.com
Reported-by: syzbot+2405ca3401e943c538b5(a)syzkaller.appspotmail.com
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200624122340.9615-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 15769f266790..eab0fd4fd7c3 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -581,8 +581,9 @@ static int check_matrix_bitmap(unsigned char *bmap,
* if failed, give up and free the control instance.
*/
-int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
- struct snd_kcontrol *kctl)
+int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
+ struct snd_kcontrol *kctl,
+ bool is_std_info)
{
struct usb_mixer_interface *mixer = list->mixer;
int err;
@@ -596,6 +597,7 @@ int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
return err;
}
list->kctl = kctl;
+ list->is_std_info = is_std_info;
list->next_id_elem = mixer->id_elems[list->id];
mixer->id_elems[list->id] = list;
return 0;
@@ -3234,8 +3236,11 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid)
unitid = delegate_notify(mixer, unitid, NULL, NULL);
for_each_mixer_elem(list, mixer, unitid) {
- struct usb_mixer_elem_info *info =
- mixer_elem_list_to_info(list);
+ struct usb_mixer_elem_info *info;
+
+ if (!list->is_std_info)
+ continue;
+ info = mixer_elem_list_to_info(list);
/* invalidate cache, so the value is read from the device */
info->cached = 0;
snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
@@ -3315,6 +3320,8 @@ static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer,
if (!list->kctl)
continue;
+ if (!list->is_std_info)
+ continue;
info = mixer_elem_list_to_info(list);
if (count > 1 && info->control != control)
diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h
index 41ec9dc4139b..c29e27ac43a7 100644
--- a/sound/usb/mixer.h
+++ b/sound/usb/mixer.h
@@ -66,6 +66,7 @@ struct usb_mixer_elem_list {
struct usb_mixer_elem_list *next_id_elem; /* list of controls with same id */
struct snd_kcontrol *kctl;
unsigned int id;
+ bool is_std_info;
usb_mixer_elem_dump_func_t dump;
usb_mixer_elem_resume_func_t resume;
};
@@ -103,8 +104,12 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid);
int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
int request, int validx, int value_set);
-int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
- struct snd_kcontrol *kctl);
+int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
+ struct snd_kcontrol *kctl,
+ bool is_std_info);
+
+#define snd_usb_mixer_add_control(list, kctl) \
+ snd_usb_mixer_add_list(list, kctl, true)
void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list,
struct usb_mixer_interface *mixer,
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index b6bcf2f92383..cec1cfd7edb7 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -158,7 +158,8 @@ static int add_single_ctl_with_resume(struct usb_mixer_interface *mixer,
return -ENOMEM;
}
kctl->private_free = snd_usb_mixer_elem_free;
- return snd_usb_mixer_add_control(list, kctl);
+ /* don't use snd_usb_mixer_add_control() here, this is a special list element */
+ return snd_usb_mixer_add_list(list, kctl, false);
}
/*
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 220345e98f1cdc768eeb6e3364a0fa7ab9647fe7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai(a)suse.de>
Date: Wed, 24 Jun 2020 14:23:40 +0200
Subject: [PATCH] ALSA: usb-audio: Fix OOB access of mixer element list
The USB-audio mixer code holds a linked list of usb_mixer_elem_list,
and several operations are performed for each mixer element. A few of
them (snd_usb_mixer_notify_id() and snd_usb_mixer_interrupt_v2())
assume each mixer element being a usb_mixer_elem_info object that is a
subclass of usb_mixer_elem_list, cast via container_of() and access it
members. This may result in an out-of-bound access when a
non-standard list element has been added, as spotted by syzkaller
recently.
This patch adds a new field, is_std_info, in usb_mixer_elem_list to
indicate that the element is the usb_mixer_elem_info type or not, and
skip the access to such an element if needed.
Reported-by: syzbot+fb14314433463ad51625(a)syzkaller.appspotmail.com
Reported-by: syzbot+2405ca3401e943c538b5(a)syzkaller.appspotmail.com
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200624122340.9615-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 15769f266790..eab0fd4fd7c3 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -581,8 +581,9 @@ static int check_matrix_bitmap(unsigned char *bmap,
* if failed, give up and free the control instance.
*/
-int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
- struct snd_kcontrol *kctl)
+int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
+ struct snd_kcontrol *kctl,
+ bool is_std_info)
{
struct usb_mixer_interface *mixer = list->mixer;
int err;
@@ -596,6 +597,7 @@ int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
return err;
}
list->kctl = kctl;
+ list->is_std_info = is_std_info;
list->next_id_elem = mixer->id_elems[list->id];
mixer->id_elems[list->id] = list;
return 0;
@@ -3234,8 +3236,11 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid)
unitid = delegate_notify(mixer, unitid, NULL, NULL);
for_each_mixer_elem(list, mixer, unitid) {
- struct usb_mixer_elem_info *info =
- mixer_elem_list_to_info(list);
+ struct usb_mixer_elem_info *info;
+
+ if (!list->is_std_info)
+ continue;
+ info = mixer_elem_list_to_info(list);
/* invalidate cache, so the value is read from the device */
info->cached = 0;
snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
@@ -3315,6 +3320,8 @@ static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer,
if (!list->kctl)
continue;
+ if (!list->is_std_info)
+ continue;
info = mixer_elem_list_to_info(list);
if (count > 1 && info->control != control)
diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h
index 41ec9dc4139b..c29e27ac43a7 100644
--- a/sound/usb/mixer.h
+++ b/sound/usb/mixer.h
@@ -66,6 +66,7 @@ struct usb_mixer_elem_list {
struct usb_mixer_elem_list *next_id_elem; /* list of controls with same id */
struct snd_kcontrol *kctl;
unsigned int id;
+ bool is_std_info;
usb_mixer_elem_dump_func_t dump;
usb_mixer_elem_resume_func_t resume;
};
@@ -103,8 +104,12 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid);
int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
int request, int validx, int value_set);
-int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list,
- struct snd_kcontrol *kctl);
+int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
+ struct snd_kcontrol *kctl,
+ bool is_std_info);
+
+#define snd_usb_mixer_add_control(list, kctl) \
+ snd_usb_mixer_add_list(list, kctl, true)
void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list,
struct usb_mixer_interface *mixer,
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index b6bcf2f92383..cec1cfd7edb7 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -158,7 +158,8 @@ static int add_single_ctl_with_resume(struct usb_mixer_interface *mixer,
return -ENOMEM;
}
kctl->private_free = snd_usb_mixer_elem_free;
- return snd_usb_mixer_add_control(list, kctl);
+ /* don't use snd_usb_mixer_add_control() here, this is a special list element */
+ return snd_usb_mixer_add_list(list, kctl, false);
}
/*
From: Eric Biggers <ebiggers(a)google.com>
If the minix filesystem tries to map a very large logical block number
to its on-disk location, block_to_path() can return offsets that are too
large, causing out-of-bounds memory accesses when accessing indirect
index blocks. This should be prevented by the check against the maximum
file size, but this doesn't work because the maximum file size is read
directly from the on-disk superblock and isn't validated itself.
Fix this by validating the maximum file size at mount time.
Reported-by: syzbot+c7d9ec7a1a7272dd71b3(a)syzkaller.appspotmail.com
Reported-by: syzbot+3b7b03a0c28948054fb5(a)syzkaller.appspotmail.com
Reported-by: syzbot+6e056ee473568865f3e6(a)syzkaller.appspotmail.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/minix/inode.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2bca95abe8f4..0dd929346f3f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -150,6 +150,23 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
return 0;
}
+static bool minix_check_superblock(struct minix_sb_info *sbi)
+{
+ if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+ return false;
+
+ /*
+ * s_max_size must not exceed the block mapping limitation. This check
+ * is only needed for V1 filesystems, since V2/V3 support an extra level
+ * of indirect blocks which places the limit well above U32_MAX.
+ */
+ if (sbi->s_version == MINIX_V1 &&
+ sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE)
+ return false;
+
+ return true;
+}
+
static int minix_fill_super(struct super_block *s, void *data, int silent)
{
struct buffer_head *bh;
@@ -228,11 +245,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
} else
goto out_no_fs;
+ if (!minix_check_superblock(sbi))
+ goto out_illegal_sb;
+
/*
* Allocate the buffer map to keep the superblock small.
*/
- if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
- goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
map = kzalloc(i, GFP_KERNEL);
if (!map)
--
2.27.0
This is a note to let you know that I've just added the patch titled
Revert "tty: xilinx_uartps: Fix missing id assignment to the console"
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 76ed2e105796710cf5b8a4ba43c81eceed948b70 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka(a)siemens.com>
Date: Thu, 18 Jun 2020 10:11:40 +0200
Subject: Revert "tty: xilinx_uartps: Fix missing id assignment to the console"
This reverts commit 2ae11c46d5fdc46cb396e35911c713d271056d35.
It turned out to break the ultra96-rev1, e.g., which uses uart1 as
serial0 (and stdout-path = "serial0:115200n8").
Fixes: 2ae11c46d5fd ("tty: xilinx_uartps: Fix missing id assignment to the console")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Jan Kiszka <jan.kiszka(a)siemens.com>
Reviewed-by: Michal Simek <michal.simek(a)xilinx.com>
Tested-by: Michal Simek <michal.simek(a)xilinx.com>
Link: https://lore.kernel.org/r/f4092727-d8f5-5f91-2c9f-76643aace993@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/xilinx_uartps.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index b9d672af8b65..672cfa075e28 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1465,7 +1465,6 @@ static int cdns_uart_probe(struct platform_device *pdev)
cdns_uart_uart_driver.nr = CDNS_UART_NR_PORTS;
#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
cdns_uart_uart_driver.cons = &cdns_uart_console;
- cdns_uart_console.index = id;
#endif
rc = uart_register_driver(&cdns_uart_uart_driver);
--
2.27.0
This is a note to let you know that I've just added the patch titled
Revert "serial: core: Refactor uart_unlock_and_check_sysrq()"
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 10652a9e9fe3fbcaca090f99cd3060ac3fee2913 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Wed, 10 Jun 2020 17:22:30 +0200
Subject: Revert "serial: core: Refactor uart_unlock_and_check_sysrq()"
This reverts commit da9a5aa3402db0ff3b57216d8dbf2478e1046cae.
In order to ease backporting a fix for a sysrq regression, revert this
rewrite which was since added on top.
The other sysrq helpers now bail out early when sysrq is not enabled;
it's better to keep that pattern here as well.
Note that the __releases() attribute won't be needed after the follow-on
fix either.
Fixes: da9a5aa3402d ("serial: core: Refactor uart_unlock_and_check_sysrq()")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200610152232.16925-2-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/serial_core.c | 23 +++++++++++++----------
include/linux/serial_core.h | 3 ++-
2 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 13fb92ae3710..fcdb6bfbe2cf 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3239,19 +3239,22 @@ int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch)
}
EXPORT_SYMBOL_GPL(uart_prepare_sysrq_char);
-void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags)
-__releases(&port->lock)
+void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags)
{
- if (port->has_sysrq) {
- int sysrq_ch = port->sysrq_ch;
+ int sysrq_ch;
- port->sysrq_ch = 0;
- spin_unlock_irqrestore(&port->lock, flags);
- if (sysrq_ch)
- handle_sysrq(sysrq_ch);
- } else {
- spin_unlock_irqrestore(&port->lock, flags);
+ if (!port->has_sysrq) {
+ spin_unlock_irqrestore(&port->lock, irqflags);
+ return;
}
+
+ sysrq_ch = port->sysrq_ch;
+ port->sysrq_ch = 0;
+
+ spin_unlock_irqrestore(&port->lock, irqflags);
+
+ if (sysrq_ch)
+ handle_sysrq(sysrq_ch);
}
EXPORT_SYMBOL_GPL(uart_unlock_and_check_sysrq);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 9fd550e7946a..ef4921ddbe97 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -464,7 +464,8 @@ extern void uart_insert_char(struct uart_port *port, unsigned int status,
extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch);
extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch);
-extern void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags);
+extern void uart_unlock_and_check_sysrq(struct uart_port *port,
+ unsigned long irqflags);
extern int uart_handle_break(struct uart_port *port);
/*
--
2.27.0
The patch titled
Subject: mm/memcontrol.c: add missed css_put()
has been removed from the -mm tree. Its filename was
mm-memcontrol-fix-do-not-put-the-css-reference.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Muchun Song <songmuchun(a)bytedance.com>
Subject: mm/memcontrol.c: add missed css_put()
We should put the css reference when memory allocation failed.
Link: http://lkml.kernel.org/r/20200614122653.98829-1-songmuchun@bytedance.com
Fixes: f0a3a24b532d ("mm: memcg/slab: rework non-root kmem_cache lifecycle management")
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Acked-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/mm/memcontrol.c~mm-memcontrol-fix-do-not-put-the-css-reference
+++ a/mm/memcontrol.c
@@ -2772,8 +2772,10 @@ static void memcg_schedule_kmem_cache_cr
return;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
- if (!cw)
+ if (!cw) {
+ css_put(&memcg->css);
return;
+ }
cw->memcg = memcg;
cw->cachep = cachep;
_
Patches currently in -mm which might be from songmuchun(a)bytedance.com are
The patch titled
Subject: mm: memcontrol: handle div0 crash race condition in memory.low
has been removed from the -mm tree. Its filename was
mm-memcontrol-handle-div0-crash-race-condition-in-memorylow.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Johannes Weiner <hannes(a)cmpxchg.org>
Subject: mm: memcontrol: handle div0 crash race condition in memory.low
Tejun reports seeing rare div0 crashes in memory.low stress testing:
[37228.504582] RIP: 0010:mem_cgroup_calculate_protection+0xed/0x150
[37228.505059] Code: 0f 46 d1 4c 39 d8 72 57 f6 05 16 d6 42 01 40 74 1f 4c 39 d8 76 1a 4c 39 d1 76 15 4c 29 d1 4c 29 d8 4d 29 d9 31 d2 48 0f af c1 <49> f7 f1 49 01 c2 4c 89 96 38 01 00 00 5d c3 48 0f af c7 31 d2 49
[37228.506254] RSP: 0018:ffffa14e01d6fcd0 EFLAGS: 00010246
[37228.506769] RAX: 000000000243e384 RBX: 0000000000000000 RCX: 0000000000008f4b
[37228.507319] RDX: 0000000000000000 RSI: ffff8b89bee84000 RDI: 0000000000000000
[37228.507869] RBP: ffffa14e01d6fcd0 R08: ffff8b89ca7d40f8 R09: 0000000000000000
[37228.508376] R10: 0000000000000000 R11: 00000000006422f7 R12: 0000000000000000
[37228.508881] R13: ffff8b89d9617000 R14: ffff8b89bee84000 R15: ffffa14e01d6fdb8
[37228.509397] FS: 0000000000000000(0000) GS:ffff8b8a1f1c0000(0000) knlGS:0000000000000000
[37228.509917] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[37228.510442] CR2: 00007f93b1fc175b CR3: 000000016100a000 CR4: 0000000000340ea0
[37228.511076] Call Trace:
[37228.511561] shrink_node+0x1e5/0x6c0
[37228.512044] balance_pgdat+0x32d/0x5f0
[37228.512521] kswapd+0x1d7/0x3d0
[37228.513346] ? wait_woken+0x80/0x80
[37228.514170] kthread+0x11c/0x160
[37228.514983] ? balance_pgdat+0x5f0/0x5f0
[37228.515797] ? kthread_park+0x90/0x90
[37228.516593] ret_from_fork+0x1f/0x30
This happens when parent_usage == siblings_protected. We check that usage
is bigger than protected, which should imply parent_usage being bigger
than siblings_protected. However, we don't read (or even update) these
values atomically, and they can be out of sync as the memory state changes
under us. A bit of fluctuation around the target protection isn't a big
deal, but we need to handle the div0 case.
Check the parent state explicitly to make sure we have a reasonable
positive value for the divisor.
Link: http://lkml.kernel.org/r/20200615140658.601684-1-hannes@cmpxchg.org
Fixes: 8a931f801340 ("mm: memcontrol: recursive memory.low protection")
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reported-by: Tejun Heo <tj(a)kernel.org>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Chris Down <chris(a)chrisdown.name>
Cc: Roman Gushchin <guro(a)fb.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/mm/memcontrol.c~mm-memcontrol-handle-div0-crash-race-condition-in-memorylow
+++ a/mm/memcontrol.c
@@ -6360,11 +6360,16 @@ static unsigned long effective_protectio
* We're using unprotected memory for the weight so that if
* some cgroups DO claim explicit protection, we don't protect
* the same bytes twice.
+ *
+ * Check both usage and parent_usage against the respective
+ * protected values. One should imply the other, but they
+ * aren't read atomically - make sure the division is sane.
*/
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
return ep;
-
- if (parent_effective > siblings_protected && usage > protected) {
+ if (parent_effective > siblings_protected &&
+ parent_usage > siblings_protected &&
+ usage > protected) {
unsigned long unclaimed;
unclaimed = parent_effective - siblings_protected;
_
Patches currently in -mm which might be from hannes(a)cmpxchg.org are
mm-memcontrol-decouple-reference-counting-from-page-accounting.patch
The patch titled
Subject: mm: fix swap cache node allocation mask
has been removed from the -mm tree. Its filename was
mm-fix-swap-cache-node-allocation-mask.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm: fix swap cache node allocation mask
https://bugzilla.kernel.org/show_bug.cgi?id=208085 reports that a slightly
overcommitted load, testing swap and zram along with i915, splats and
keeps on splatting, when it had better fail less noisily:
gnome-shell: page allocation failure: order:0,
mode:0x400d0(__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_RECLAIMABLE),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 1155 Comm: gnome-shell Not tainted 5.7.0-1.fc33.x86_64 #1
Call Trace:
dump_stack+0x64/0x88
warn_alloc.cold+0x75/0xd9
__alloc_pages_slowpath.constprop.0+0xcfa/0xd30
__alloc_pages_nodemask+0x2df/0x320
alloc_slab_page+0x195/0x310
allocate_slab+0x3c5/0x440
___slab_alloc+0x40c/0x5f0
__slab_alloc+0x1c/0x30
kmem_cache_alloc+0x20e/0x220
xas_nomem+0x28/0x70
add_to_swap_cache+0x321/0x400
__read_swap_cache_async+0x105/0x240
swap_cluster_readahead+0x22c/0x2e0
shmem_swapin+0x8e/0xc0
shmem_swapin_page+0x196/0x740
shmem_getpage_gfp+0x3a2/0xa60
shmem_read_mapping_page_gfp+0x32/0x60
shmem_get_pages+0x155/0x5e0 [i915]
__i915_gem_object_get_pages+0x68/0xa0 [i915]
i915_vma_pin+0x3fe/0x6c0 [i915]
eb_add_vma+0x10b/0x2c0 [i915]
i915_gem_do_execbuffer+0x704/0x3430 [i915]
i915_gem_execbuffer2_ioctl+0x1ea/0x3e0 [i915]
drm_ioctl_kernel+0x86/0xd0 [drm]
drm_ioctl+0x206/0x390 [drm]
ksys_ioctl+0x82/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0xf0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported on 5.7, but it goes back really to 3.1: when
shmem_read_mapping_page_gfp() was implemented for use by i915, and
allowed for __GFP_NORETRY and __GFP_NOWARN flags in most places, but
missed swapin's "& GFP_KERNEL" mask for page tree node allocation in
__read_swap_cache_async() - that was to mask off HIGHUSER_MOVABLE bits
from what page cache uses, but GFP_RECLAIM_MASK is now what's needed.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2006151330070.11064@eggly.anvils
Fixes: 68da9f055755 ("tmpfs: pass gfp to shmem_getpage_gfp")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Analyzed-by: Vlastimil Babka <vbabka(a)suse.cz>
Analyzed-by: Matthew Wilcox <willy(a)infradead.org>
Tested-by: Chris Murphy <lists(a)colorremedies.com>
Cc: <stable(a)vger.kernel.org> [3.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swap_state.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/swap_state.c~mm-fix-swap-cache-node-allocation-mask
+++ a/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-vmstat-add-events-for-pmd-based-thp-migration-without-split-fix.patch
The patch titled
Subject: mm/slab: use memzero_explicit() in kzfree()
has been removed from the -mm tree. Its filename was
mm-slab-use-memzero_explicit-in-kzfree.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Waiman Long <longman(a)redhat.com>
Subject: mm/slab: use memzero_explicit() in kzfree()
The kzfree() function is normally used to clear some sensitive
information, like encryption keys, in the buffer before freeing it back to
the pool. Memset() is currently used for buffer clearing. However
unlikely, there is still a non-zero probability that the compiler may
choose to optimize away the memory clearing especially if LTO is being
used in the future. To make sure that this optimization will never
happen, memzero_explicit(), which is introduced in v3.18, is now used in
kzfree() to future-proof it.
Link: http://lkml.kernel.org/r/20200616154311.12314-2-longman@redhat.com
Fixes: 3ef0e5ba4673 ("slab: introduce kzfree()")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: "Serge E. Hallyn" <serge(a)hallyn.com>
Cc: Joe Perches <joe(a)perches.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: "Jason A . Donenfeld" <Jason(a)zx2c4.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab_common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/slab_common.c~mm-slab-use-memzero_explicit-in-kzfree
+++ a/mm/slab_common.c
@@ -1726,7 +1726,7 @@ void kzfree(const void *p)
if (unlikely(ZERO_OR_NULL_PTR(mem)))
return;
ks = ksize(mem);
- memset(mem, 0, ks);
+ memzero_explicit(mem, ks);
kfree(mem);
}
EXPORT_SYMBOL(kzfree);
_
Patches currently in -mm which might be from longman(a)redhat.com are
mm-treewide-rename-kzfree-to-kfree_sensitive.patch
sched-mm-optimize-current_gfp_context.patch
The patch titled
Subject: mm, slab: fix sign conversion problem in memcg_uncharge_slab()
has been removed from the -mm tree. Its filename was
mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Waiman Long <longman(a)redhat.com>
Subject: mm, slab: fix sign conversion problem in memcg_uncharge_slab()
It was found that running the LTP test on a PowerPC system could produce
erroneous values in /proc/meminfo, like:
MemTotal: 531915072 kB
MemFree: 507962176 kB
MemAvailable: 1100020596352 kB
Using bisection, the problem is tracked down to commit 9c315e4d7d8c ("mm:
memcg/slab: cache page number in memcg_(un)charge_slab()").
In memcg_uncharge_slab() with a "int order" argument:
unsigned int nr_pages = 1 << order;
:
mod_lruvec_state(lruvec, cache_vmstat_idx(s), -nr_pages);
The mod_lruvec_state() function will eventually call the
__mod_zone_page_state() which accepts a long argument. Depending on the
compiler and how inlining is done, "-nr_pages" may be treated as a
negative number or a very large positive number. Apparently, it was
treated as a large positive number in that PowerPC system leading to
incorrect stat counts. This problem hasn't been seen in x86-64 yet,
perhaps the gcc compiler there has some slight difference in behavior.
It is fixed by making nr_pages a signed value. For consistency, a similar
change is applied to memcg_charge_slab() as well.
Link: http://lkml.kernel.org/r/20200620184719.10994-1-longman@redhat.com
Fixes: 9c315e4d7d8c ("mm: memcg/slab: cache page number in memcg_(un)charge_slab()").
Signed-off-by: Waiman Long <longman(a)redhat.com>
Acked-by: Roman Gushchin <guro(a)fb.com>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/slab.h~mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab
+++ a/mm/slab.h
@@ -348,7 +348,7 @@ static __always_inline int memcg_charge_
gfp_t gfp, int order,
struct kmem_cache *s)
{
- unsigned int nr_pages = 1 << order;
+ int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
int ret;
@@ -388,7 +388,7 @@ out:
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
- unsigned int nr_pages = 1 << order;
+ int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
_
Patches currently in -mm which might be from longman(a)redhat.com are
mm-treewide-rename-kzfree-to-kfree_sensitive.patch
sched-mm-optimize-current_gfp_context.patch
The patch titled
Subject: ocfs2: fix value of OCFS2_INVALID_SLOT
has been removed from the -mm tree. Its filename was
ocfs2-fix-value-of-ocfs2_invalid_slot.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: fix value of OCFS2_INVALID_SLOT
In the ocfs2 disk layout, slot number is 16 bits, but in ocfs2
implementation, slot number is 32 bits. Usually this will not cause any
issue, because slot number is converted from u16 to u32, but
OCFS2_INVALID_SLOT was defined as -1, when an invalid slot number from
disk was obtained, its value was (u16)-1, and it was converted to u32.
Then the following checking in get_local_system_inode will be always
skipped:
static struct inode **get_local_system_inode(struct ocfs2_super *osb,
int type,
u32 slot)
{
BUG_ON(slot == OCFS2_INVALID_SLOT);
...
}
Link: http://lkml.kernel.org/r/20200616183829.87211-5-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/ocfs2_fs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/ocfs2/ocfs2_fs.h~ocfs2-fix-value-of-ocfs2_invalid_slot
+++ a/fs/ocfs2/ocfs2_fs.h
@@ -290,7 +290,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
_
Patches currently in -mm which might be from junxiao.bi(a)oracle.com are
The patch titled
Subject: ocfs2: load global_inode_alloc
has been removed from the -mm tree. Its filename was
ocfs2-load-global_inode_alloc.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: load global_inode_alloc
Set global_inode_alloc as OCFS2_FIRST_ONLINE_SYSTEM_INODE, that will make
it load during mount. It can be used to test whether some global/system
inodes are valid. One use case is that nfsd will test whether root inode
is valid.
Link: http://lkml.kernel.org/r/20200616183829.87211-3-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/ocfs2_fs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/ocfs2/ocfs2_fs.h~ocfs2-load-global_inode_alloc
+++ a/fs/ocfs2/ocfs2_fs.h
@@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
_
Patches currently in -mm which might be from junxiao.bi(a)oracle.com are
The patch titled
Subject: ocfs2: avoid inode removal while nfsd is accessing it
has been removed from the -mm tree. Its filename was
ocfs2-avoid-inode-removed-while-nfsd-access-it.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: avoid inode removal while nfsd is accessing it
Patch series "ocfs2: fix nfsd over ocfs2 issues", v2.
This is a series of patches to fix issues on nfsd over ocfs2. patch 1 is
to avoid inode removed while nfsd access it patch 2 & 3 is to fix a panic
issue.
This patch (of 4):
When nfsd is getting file dentry using handle or parent dentry of some
dentry, one cluster lock is used to avoid inode removed from other node,
but it still could be removed from local node, so use a rw lock to avoid
this.
Link: http://lkml.kernel.org/r/20200616183829.87211-1-junxiao.bi@oracle.com
Link: http://lkml.kernel.org/r/20200616183829.87211-2-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/dlmglue.c | 17 ++++++++++++++++-
fs/ocfs2/ocfs2.h | 1 +
2 files changed, 17 insertions(+), 1 deletion(-)
--- a/fs/ocfs2/dlmglue.c~ocfs2-avoid-inode-removed-while-nfsd-access-it
+++ a/fs/ocfs2/dlmglue.c
@@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init
&ocfs2_nfs_sync_lops, osb);
}
+static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
+{
+ ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ init_rwsem(&osb->nfs_sync_rwlock);
+}
+
void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
{
struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
@@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_sup
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
+ if (ex)
+ down_write(&osb->nfs_sync_rwlock);
+ else
+ down_read(&osb->nfs_sync_rwlock);
+
if (ocfs2_mount_local(osb))
return 0;
@@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
+ if (ex)
+ up_write(&osb->nfs_sync_rwlock);
+ else
+ up_read(&osb->nfs_sync_rwlock);
}
int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
@@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *o
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
- ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ ocfs2_nfs_sync_lock_init(osb);
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
osb->cconn = conn;
--- a/fs/ocfs2/ocfs2.h~ocfs2-avoid-inode-removed-while-nfsd-access-it
+++ a/fs/ocfs2/ocfs2.h
@@ -395,6 +395,7 @@ struct ocfs2_super
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
+ struct rw_semaphore nfs_sync_rwlock;
struct ocfs2_lock_res osb_trim_fs_lockres;
struct mutex obs_trim_fs_mutex;
struct ocfs2_dlm_debug *osb_dlm_debug;
_
Patches currently in -mm which might be from junxiao.bi(a)oracle.com are
The patch titled
Subject: mm, compaction: make capture control handling safe wrt interrupts
has been removed from the -mm tree. Its filename was
mm-compaction-make-capture-control-handling-safe-wrt-interrupts.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, compaction: make capture control handling safe wrt interrupts
Hugh reports:
: While stressing compaction, one run oopsed on NULL capc->cc in
: __free_one_page()'s task_capc(zone): compact_zone_order() had been
: interrupted, and a page was being freed in the return from interrupt.
:
: Though you would not expect it from the source, both gccs I was using (a
: 4.8.1 and a 7.5.0) had chosen to compile compact_zone_order() with the
: ".cc = &cc" implemented by mov %rbx,-0xb0(%rbp) immediately before callq
: compact_zone - long after the "current->capture_control = &capc". An
: interrupt in between those finds capc->cc NULL (zeroed by an earlier rep
: stos).
:
: This could presumably be fixed by a barrier() before setting
: current->capture_control in compact_zone_order(); but would also need more
: care on return from compact_zone(), in order not to risk leaking a page
: captured by interrupt just before capture_control is reset.
:
: Maybe that is the preferable fix, but I felt safer for task_capc() to
: exclude the rather surprising possibility of capture at interrupt time.
I have checked that gcc10 also behaves the same.
The advantage of fix in compact_zone_order() is that we don't add another
test in the page freeing hot path, and that it might prevent future
problems if we stop exposing pointers to uninitialized structures in
current task.
So this patch implements the suggestion for compact_zone_order() with
barrier() (and WRITE_ONCE() to prevent store tearing) for setting
current->capture_control, and prevents page leaking with
WRITE_ONCE/READ_ONCE in the proper order.
Link: http://lkml.kernel.org/r/20200616082649.27173-1-vbabka@suse.cz
Fixes: 5e1f0f098b46 ("mm, compaction: capture a page under direct compaction")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Hugh Dickins <hughd(a)google.com>
Suggested-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Cc: Alex Shi <alex.shi(a)linux.alibaba.com>
Cc: Li Wang <liwang(a)redhat.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: <stable(a)vger.kernel.org> [5.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/compaction.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
--- a/mm/compaction.c~mm-compaction-make-capture-control-handling-safe-wrt-interrupts
+++ a/mm/compaction.c
@@ -2316,15 +2316,26 @@ static enum compact_result compact_zone_
.page = NULL,
};
- current->capture_control = &capc;
+ /*
+ * Make sure the structs are really initialized before we expose the
+ * capture control, in case we are interrupted and the interrupt handler
+ * frees a page.
+ */
+ barrier();
+ WRITE_ONCE(current->capture_control, &capc);
ret = compact_zone(&cc, &capc);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
- *capture = capc.page;
- current->capture_control = NULL;
+ /*
+ * Make sure we hide capture control first before we read the captured
+ * page pointer, otherwise an interrupt could free and capture a page
+ * and we would leak it.
+ */
+ WRITE_ONCE(current->capture_control, NULL);
+ *capture = READ_ONCE(capc.page);
return ret;
}
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
mm-slub-extend-slub_debug-syntax-for-multiple-blocks.patch
mm-slub-make-some-slub_debug-related-attributes-read-only.patch
mm-slub-remove-runtime-allocation-order-changes.patch
mm-slub-make-remaining-slub_debug-related-attributes-read-only.patch
mm-slub-make-reclaim_account-attribute-read-only.patch
mm-slub-introduce-static-key-for-slub_debug.patch
mm-slub-introduce-kmem_cache_debug_flags.patch
mm-slub-introduce-kmem_cache_debug_flags-fix.patch
mm-slub-extend-checks-guarded-by-slub_debug-static-key.patch
mm-slab-slub-move-and-improve-cache_from_obj.patch
mm-slab-slub-improve-error-reporting-and-overhead-of-cache_from_obj.patch
mm-slab-slub-improve-error-reporting-and-overhead-of-cache_from_obj-fix.patch
mm-page_alloc-use-unlikely-in-task_capc.patch
Hi,
Please consider applying the following patches to the listed stable
releases.
The following patches were found to be missing in stable releases by the
Chrome OS missing patch robot. The patches meet the following criteria.
- The patch includes a Fixes: tag
Note that the Fixes: tag does not always point to the correct upstream
SHA. In that case the correct upstream SHA is listed below.
- The patch referenced in the Fixes: tag has been applied to the listed
stable release
- The patch has not been applied to that stable release
All patches have been applied to the listed stable releases and to at least
one Chrome OS branch. Resulting images have been build- and runtime-tested
(where applicable) on real hardware and with virtual hardware on
kerneltests.org.
Thanks,
Guenter
---
Upstream commit ba69ead9e9e9 ("scsi: scsi_devinfo: handle non-terminated strings")
upstream: v4.15-rc4
Fixes: b8018b973c7c ("scsi: scsi_devinfo: fixup string compare")
in linux-4.4.y: ac8002f7cd0a
in linux-4.9.y: d74a350d7035
in linux-4.14.y: 13cd297f15ed
upstream: v4.15-rc1
Affected branches:
linux-4.4.y
linux-4.9.y
linux-4.14.y
Upstream commit 0d0d9a388a85 ("l2tp: Allow duplicate session creation with UDP")
upstream: v5.6-rc1
Fixes: dbdbc73b4478 ("l2tp: fix duplicate session creation")
in linux-4.4.y: e613c6208242
in linux-4.9.y: d9face6fc62a
upstream: v4.11-rc6
Affected branches:
linux-4.4.y
linux-4.9.y
linux-4.14.y (already applied)
linux-4.19.y (already applied)
linux-5.4.y (already applied)
Upstream commit 64611a15ca9d ("dm crypt: avoid truncating the logical block size")
upstream: v5.8-rc1
Fixes: ad6bf88a6c19 ("block: fix an integer overflow in logical block size")
in linux-4.4.y: b8cd70b724f0
in linux-4.9.y: 5dbde467ccd6
in linux-4.14.y: 0c7a7d8e62bd
in linux-4.19.y: a7f79052d1af
in linux-5.4.y: 6eed26e35cfd
upstream: v5.5-rc7
Affected branches:
linux-4.4.y (conflicts - backport needed)
linux-4.9.y (conflicts - backport needed)
linux-4.14.y
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit f29801030ac6 ("e1000e: Disable TSO for buffer overrun workaround")
upstream: v5.8-rc1
Fixes: b10effb92e27 ("e1000e: fix buffer overrun while the I219 is processing DMA transactions")
in linux-4.14.y: 0f478f25d50c
upstream: v4.15-rc1
Affected branches:
linux-4.14.y
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit 7c6d2ecbda83 ("net: be more gentle about silly gso requests coming from user")
upstream: v5.7
Fixes: 6dd912f82680 ("net: check untrusted gso_size at kernel entry")
in linux-4.14.y: 5c336039843d
in linux-4.19.y: 8920e8ae16a8
in linux-5.4.y: a93417dfc1b0
in linux-5.6.y: 17e1f3e32239
upstream: v5.7
Affected branches:
linux-4.14.y
linux-4.19.y
linux-5.4.y (already applied)
linux-5.6.y (already applied)
Upstream commit 8418897f1bf8 ("ext4: fix error pointer dereference")
upstream: v5.8-rc1
Fixes: fb265c9cb49e ("ext4: add ext4_sb_bread() to disambiguate ENOMEM cases")
in linux-4.14.y: 9da1f6d06b7a
in linux-4.19.y: b878c8a7f08f
upstream: v5.0-rc1
Affected branches:
linux-4.14.y
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit a75ca9303175 ("block/bio-integrity: don't free 'buf' if bio_integrity_add_page() failed")
upstream: v5.8-rc1
Fixes: e7bf90e5afe3 ("block/bio-integrity: fix a memory leak bug")
in linux-4.14.y: c35f5d31451f
in linux-4.19.y: af50d6a1c245
upstream: v5.3-rc1
Affected branches:
linux-4.14.y
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit be32acff4380 ("scsi: ufs: Don't update urgent bkops level when toggling auto bkops")
upstream: v5.8-rc1
Fixes: 24366c2afbb0 ("scsi: ufs: Recheck bkops level if bkops is disabled")
in linux-4.14.y: c909605d73ab
in linux-4.19.y: 028a925c0540
in linux-5.4.y: 3c9edf55817a
upstream: v5.6-rc1
Affected branches:
linux-4.14.y
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit 2f02fd3fa13e ("fanotify: fix ignore mask logic for events on child and on dir")
upstream: v5.8-rc1
Fixes: b469e7e47c8a ("fanotify: fix handling of events on child sub-directory")
in linux-4.9.y: 987d8ff3a2d8
in linux-4.14.y: 515160e3c4f2
in linux-4.19.y: 20663629f6ae
upstream: v4.20-rc3
Affected branches:
linux-4.9.y (conflicts - backport needed)
linux-4.14.y (conflicts - backport needed)
linux-4.19.y
linux-5.4.y (already applied)
linux-5.6.y (already applied)
linux-5.7.y (already applied)
Upstream commit 607fa205a7e4 ("ASoC: core: only convert non DPCM link to DPCM link")
upstream: v5.8-rc1
Fixes: 218fe9b7ec7f ("ASoC: soc-core: Set dpcm_playback / dpcm_capture")
in linux-4.19.y: 57f633cfe3fb
in linux-5.4.y: 5585d2a98904
upstream: v5.5-rc6
Affected branches:
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit 3e5095eebe01 ("PCI: vmd: Filter resource type bits from shadow register")
upstream: v5.8-rc1
Fixes: a1a30170138c ("PCI: vmd: Fix shadow offsets to reflect spec changes")
in linux-4.19.y: 956ce989c41f
upstream: v5.4-rc1
Affected branches:
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit 8a325dd06f23 ("of: Fix a refcounting bug in __of_attach_node_sysfs()")
upstream: v5.8-rc1
Fixes: 5b2c2f5a0ea3 ("of: overlay: add missing of_node_get() in __of_attach_node_sysfs")
in linux-4.19.y: 9af27fab0061
upstream: v5.0-rc1
Affected branches:
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit 8bc3b5e4b70d ("xfs: clean up the error handling in xfs_swap_extents")
upstream: v5.8-rc1
Fixes: 96987eea537d ("xfs: cancel COW blocks before swapext")
in linux-4.19.y: a585ac0e767b
upstream: v4.20-rc1
Affected branches:
linux-4.19.y
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit fe17e6cdc0fe ("ASoC: SOF: imx8: Fix randbuild error")
upstream: v5.8-rc1
Fixes: f9ad75468453 ("ASoC: SOF: imx: fix reverse CONFIG_SND_SOC_SOF_OF dependency")
in linux-5.4.y: d98020a3fddd
upstream: v5.5-rc1
Affected branches:
linux-5.4.y
linux-5.6.y
linux-5.7.y
Upstream commit 10ce77e4817f ("ALSA: usb-audio: Add duplex sound support for USB devices using implicit feedback")
upstream: v5.8-rc1
Fixes: c249177944b6 ("ALSA: usb-audio: add implicit fb quirk for MOTU M Series")
in linux-5.4.y: da2d50868e59
upstream: v5.6-rc1
Affected branches:
linux-5.4.y
linux-5.6.y
linux-5.7.y
Hi folks,
I'm the maintainer in Debian for strace. Trying to reproduce
https://bugs.debian.org/963462 on my machine (Thinkpad T470), I've
found a repeatable hard lockup running the strace testsuite. Each time
it seems to have failed in a slightly different place in the testsuite
(suggesting it's not one particular syscall test that's triggering the
failure). I initially found this using Debian's current Buster kernel
(4.19.118+2+deb10u1), then backtracking I found that 4.19.98+1+deb10u1
worked fine.
I've bisected to find the failure point along the linux-4.19.y stable
branch and what I've got to is the following commit:
e58f543fc7c0926f31a49619c1a3648e49e8d233 is the first bad commit
commit e58f543fc7c0926f31a49619c1a3648e49e8d233
Author: Jann Horn <jannh(a)google.com>
Date: Thu Sep 13 18:12:09 2018 +0200
apparmor: don't try to replace stale label in ptrace access check
[ Upstream commit 1f8266ff58840d698a1e96d2274189de1bdf7969 ]
As a comment above begin_current_label_crit_section() explains,
begin_current_label_crit_section() must run in sleepable context because
when label_is_stale() is true, aa_replace_current_label() runs, which uses
prepare_creds(), which can sleep.
Until now, the ptrace access check (which runs with a task lock held)
violated this rule.
Also add a might_sleep() assertion to begin_current_label_crit_section(),
because asserts are less likely to be ignored than comments.
Fixes: b2d09ae449ced ("apparmor: move ptrace checks to using labels")
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: John Johansen <john.johansen(a)canonical.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
:040000 040000 ca92f885a38c1747b812116f19de6967084a647e 865a227665e460e159502f21e8a16e6fa590bf50 M security
Considering I'm running strace build tests to provoke this bug,
finding the failure in a commit talking about ptrace changes does look
very suspicious...!
Annoyingly, I can't reproduce this on my disparate other machines
here, suggesting it's maybe(?) timing related.
Hope this helps - happy to give more information, test things, etc.
--
Steve McIntyre, Cambridge, UK. steve(a)einval.com
"Managing a volunteer open source project is a lot like herding
kittens, except the kittens randomly appear and disappear because they
have day jobs." -- Matt Mackall
This is a note to let you know that I've just added the patch titled
usb: cdns3: trace: using correct dir value
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From ba3a80fe0fb67d8790f62b7bc60df97406d89871 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen(a)nxp.com>
Date: Tue, 23 Jun 2020 11:09:17 +0800
Subject: usb: cdns3: trace: using correct dir value
It should use the correct direction value from register, not depends
on previous software setting. It fixed the EP number wrong issue at
trace when the TRBERR interrupt occurs for EP0IN.
When the EP0IN IOC has finished, software prepares the setup packet
request, the expected direction is OUT, but at that time, the TRBERR
for EP0IN may occur since it is DMULT mode, the DMA does not stop
until TRBERR has met.
Cc: <stable(a)vger.kernel.org>
Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Reviewed-by: Pawel Laszczak <pawell(a)cadence.com>
Signed-off-by: Peter Chen <peter.chen(a)nxp.com>
Signed-off-by: Felipe Balbi <balbi(a)kernel.org>
---
drivers/usb/cdns3/trace.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/cdns3/trace.h b/drivers/usb/cdns3/trace.h
index 8d121e207fd8..755c56582257 100644
--- a/drivers/usb/cdns3/trace.h
+++ b/drivers/usb/cdns3/trace.h
@@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(cdns3_log_ep0_irq,
__dynamic_array(char, str, CDNS3_MSG_MAX)
),
TP_fast_assign(
- __entry->ep_dir = priv_dev->ep0_data_dir;
+ __entry->ep_dir = priv_dev->selected_ep;
__entry->ep_sts = ep_sts;
),
TP_printk("%s", cdns3_decode_ep0_irq(__get_str(str),
--
2.27.0
This is a note to let you know that I've just added the patch titled
usb: cdns3: ep0: add spinlock for cdns3_check_new_setup
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 2587a029fa2a877d0a8dda955ef1b24c94b4bd0e Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen(a)nxp.com>
Date: Tue, 23 Jun 2020 11:09:18 +0800
Subject: usb: cdns3: ep0: add spinlock for cdns3_check_new_setup
The other thread may access other endpoints when the cdns3_check_new_setup
is handling, add spinlock to protect it.
Cc: <stable(a)vger.kernel.org>
Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Reviewed-by: Pawel Laszczak <pawell(a)cadence.com>
Signed-off-by: Peter Chen <peter.chen(a)nxp.com>
Signed-off-by: Felipe Balbi <balbi(a)kernel.org>
---
drivers/usb/cdns3/ep0.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c
index 04e49582fb55..61ec5bb2b0ca 100644
--- a/drivers/usb/cdns3/ep0.c
+++ b/drivers/usb/cdns3/ep0.c
@@ -705,15 +705,17 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep,
int ret = 0;
u8 zlp = 0;
+ spin_lock_irqsave(&priv_dev->lock, flags);
trace_cdns3_ep0_queue(priv_dev, request);
/* cancel the request if controller receive new SETUP packet. */
- if (cdns3_check_new_setup(priv_dev))
+ if (cdns3_check_new_setup(priv_dev)) {
+ spin_unlock_irqrestore(&priv_dev->lock, flags);
return -ECONNRESET;
+ }
/* send STATUS stage. Should be called only for SET_CONFIGURATION */
if (priv_dev->ep0_stage == CDNS3_STATUS_STAGE) {
- spin_lock_irqsave(&priv_dev->lock, flags);
cdns3_select_ep(priv_dev, 0x00);
erdy_sent = !priv_dev->hw_configured_flag;
@@ -738,7 +740,6 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep,
return 0;
}
- spin_lock_irqsave(&priv_dev->lock, flags);
if (!list_empty(&priv_ep->pending_req_list)) {
dev_err(priv_dev->dev,
"can't handle multiple requests for ep0\n");
--
2.27.0
This is a note to let you know that I've just added the patch titled
usb: cdns3: ep0: fix the test mode set incorrectly
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From b51e1cf64f93acebb6d8afbacd648a6ecefc39b4 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen(a)nxp.com>
Date: Tue, 23 Jun 2020 11:09:16 +0800
Subject: usb: cdns3: ep0: fix the test mode set incorrectly
The 'tmode' is ctrl->wIndex, changing it as the real test
mode value for register assignment.
Cc: <stable(a)vger.kernel.org>
Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Reviewed-by: Jun Li <jun.li(a)nxp.com>
Signed-off-by: Peter Chen <peter.chen(a)nxp.com>
Signed-off-by: Felipe Balbi <balbi(a)kernel.org>
---
drivers/usb/cdns3/ep0.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c
index 82645a2a0f52..04e49582fb55 100644
--- a/drivers/usb/cdns3/ep0.c
+++ b/drivers/usb/cdns3/ep0.c
@@ -327,7 +327,8 @@ static int cdns3_ep0_feature_handle_device(struct cdns3_device *priv_dev,
if (!set || (tmode & 0xff) != 0)
return -EINVAL;
- switch (tmode >> 8) {
+ tmode >>= 8;
+ switch (tmode) {
case TEST_J:
case TEST_K:
case TEST_SE0_NAK:
--
2.27.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03fe7aaf0c3d40ef7feff2bdc7180c146989586a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk(a)kernel.org>
Date: Wed, 10 Jun 2020 17:41:57 +0200
Subject: [PATCH] spi: spi-fsl-dspi: Free DMA memory with matching function
Driver allocates DMA memory with dma_alloc_coherent() but frees it with
dma_unmap_single().
This causes DMA warning during system shutdown (with DMA debugging) on
Toradex Colibri VF50 module:
WARNING: CPU: 0 PID: 1 at ../kernel/dma/debug.c:1036 check_unmap+0x3fc/0xb04
DMA-API: fsl-edma 40098000.dma-controller: device driver frees DMA memory with wrong function
[device address=0x0000000087040000] [size=8 bytes] [mapped as coherent] [unmapped as single]
Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
(unwind_backtrace) from [<8010bb34>] (show_stack+0x10/0x14)
(show_stack) from [<8011ced8>] (__warn+0xf0/0x108)
(__warn) from [<8011cf64>] (warn_slowpath_fmt+0x74/0xb8)
(warn_slowpath_fmt) from [<8017d170>] (check_unmap+0x3fc/0xb04)
(check_unmap) from [<8017d900>] (debug_dma_unmap_page+0x88/0x90)
(debug_dma_unmap_page) from [<80601d68>] (dspi_release_dma+0x88/0x110)
(dspi_release_dma) from [<80601e4c>] (dspi_shutdown+0x5c/0x80)
(dspi_shutdown) from [<805845f8>] (device_shutdown+0x17c/0x220)
(device_shutdown) from [<80143ef8>] (kernel_restart+0xc/0x50)
(kernel_restart) from [<801441cc>] (__do_sys_reboot+0x18c/0x210)
(__do_sys_reboot) from [<80100060>] (ret_fast_syscall+0x0/0x28)
DMA-API: Mapped at:
dma_alloc_attrs+0xa4/0x130
dspi_probe+0x568/0x7b4
platform_drv_probe+0x6c/0xa4
really_probe+0x208/0x348
driver_probe_device+0x5c/0xb4
Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid")
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
Acked-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/1591803717-11218-1-git-send-email-krzk@kernel.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index a35faced0456..58190c94561f 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -588,14 +588,14 @@ static void dspi_release_dma(struct fsl_dspi *dspi)
return;
if (dma->chan_tx) {
- dma_unmap_single(dma->chan_tx->device->dev, dma->tx_dma_phys,
- dma_bufsize, DMA_TO_DEVICE);
+ dma_free_coherent(dma->chan_tx->device->dev, dma_bufsize,
+ dma->tx_dma_buf, dma->tx_dma_phys);
dma_release_channel(dma->chan_tx);
}
if (dma->chan_rx) {
- dma_unmap_single(dma->chan_rx->device->dev, dma->rx_dma_phys,
- dma_bufsize, DMA_FROM_DEVICE);
+ dma_free_coherent(dma->chan_rx->device->dev, dma_bufsize,
+ dma->rx_dma_buf, dma->rx_dma_phys);
dma_release_channel(dma->chan_rx);
}
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03fe7aaf0c3d40ef7feff2bdc7180c146989586a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk(a)kernel.org>
Date: Wed, 10 Jun 2020 17:41:57 +0200
Subject: [PATCH] spi: spi-fsl-dspi: Free DMA memory with matching function
Driver allocates DMA memory with dma_alloc_coherent() but frees it with
dma_unmap_single().
This causes DMA warning during system shutdown (with DMA debugging) on
Toradex Colibri VF50 module:
WARNING: CPU: 0 PID: 1 at ../kernel/dma/debug.c:1036 check_unmap+0x3fc/0xb04
DMA-API: fsl-edma 40098000.dma-controller: device driver frees DMA memory with wrong function
[device address=0x0000000087040000] [size=8 bytes] [mapped as coherent] [unmapped as single]
Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
(unwind_backtrace) from [<8010bb34>] (show_stack+0x10/0x14)
(show_stack) from [<8011ced8>] (__warn+0xf0/0x108)
(__warn) from [<8011cf64>] (warn_slowpath_fmt+0x74/0xb8)
(warn_slowpath_fmt) from [<8017d170>] (check_unmap+0x3fc/0xb04)
(check_unmap) from [<8017d900>] (debug_dma_unmap_page+0x88/0x90)
(debug_dma_unmap_page) from [<80601d68>] (dspi_release_dma+0x88/0x110)
(dspi_release_dma) from [<80601e4c>] (dspi_shutdown+0x5c/0x80)
(dspi_shutdown) from [<805845f8>] (device_shutdown+0x17c/0x220)
(device_shutdown) from [<80143ef8>] (kernel_restart+0xc/0x50)
(kernel_restart) from [<801441cc>] (__do_sys_reboot+0x18c/0x210)
(__do_sys_reboot) from [<80100060>] (ret_fast_syscall+0x0/0x28)
DMA-API: Mapped at:
dma_alloc_attrs+0xa4/0x130
dspi_probe+0x568/0x7b4
platform_drv_probe+0x6c/0xa4
really_probe+0x208/0x348
driver_probe_device+0x5c/0xb4
Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid")
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
Acked-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/1591803717-11218-1-git-send-email-krzk@kernel.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index a35faced0456..58190c94561f 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -588,14 +588,14 @@ static void dspi_release_dma(struct fsl_dspi *dspi)
return;
if (dma->chan_tx) {
- dma_unmap_single(dma->chan_tx->device->dev, dma->tx_dma_phys,
- dma_bufsize, DMA_TO_DEVICE);
+ dma_free_coherent(dma->chan_tx->device->dev, dma_bufsize,
+ dma->tx_dma_buf, dma->tx_dma_phys);
dma_release_channel(dma->chan_tx);
}
if (dma->chan_rx) {
- dma_unmap_single(dma->chan_rx->device->dev, dma->rx_dma_phys,
- dma_bufsize, DMA_FROM_DEVICE);
+ dma_free_coherent(dma->chan_rx->device->dev, dma_bufsize,
+ dma->rx_dma_buf, dma->rx_dma_phys);
dma_release_channel(dma->chan_rx);
}
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03fe7aaf0c3d40ef7feff2bdc7180c146989586a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk(a)kernel.org>
Date: Wed, 10 Jun 2020 17:41:57 +0200
Subject: [PATCH] spi: spi-fsl-dspi: Free DMA memory with matching function
Driver allocates DMA memory with dma_alloc_coherent() but frees it with
dma_unmap_single().
This causes DMA warning during system shutdown (with DMA debugging) on
Toradex Colibri VF50 module:
WARNING: CPU: 0 PID: 1 at ../kernel/dma/debug.c:1036 check_unmap+0x3fc/0xb04
DMA-API: fsl-edma 40098000.dma-controller: device driver frees DMA memory with wrong function
[device address=0x0000000087040000] [size=8 bytes] [mapped as coherent] [unmapped as single]
Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
(unwind_backtrace) from [<8010bb34>] (show_stack+0x10/0x14)
(show_stack) from [<8011ced8>] (__warn+0xf0/0x108)
(__warn) from [<8011cf64>] (warn_slowpath_fmt+0x74/0xb8)
(warn_slowpath_fmt) from [<8017d170>] (check_unmap+0x3fc/0xb04)
(check_unmap) from [<8017d900>] (debug_dma_unmap_page+0x88/0x90)
(debug_dma_unmap_page) from [<80601d68>] (dspi_release_dma+0x88/0x110)
(dspi_release_dma) from [<80601e4c>] (dspi_shutdown+0x5c/0x80)
(dspi_shutdown) from [<805845f8>] (device_shutdown+0x17c/0x220)
(device_shutdown) from [<80143ef8>] (kernel_restart+0xc/0x50)
(kernel_restart) from [<801441cc>] (__do_sys_reboot+0x18c/0x210)
(__do_sys_reboot) from [<80100060>] (ret_fast_syscall+0x0/0x28)
DMA-API: Mapped at:
dma_alloc_attrs+0xa4/0x130
dspi_probe+0x568/0x7b4
platform_drv_probe+0x6c/0xa4
really_probe+0x208/0x348
driver_probe_device+0x5c/0xb4
Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid")
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
Acked-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/1591803717-11218-1-git-send-email-krzk@kernel.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index a35faced0456..58190c94561f 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -588,14 +588,14 @@ static void dspi_release_dma(struct fsl_dspi *dspi)
return;
if (dma->chan_tx) {
- dma_unmap_single(dma->chan_tx->device->dev, dma->tx_dma_phys,
- dma_bufsize, DMA_TO_DEVICE);
+ dma_free_coherent(dma->chan_tx->device->dev, dma_bufsize,
+ dma->tx_dma_buf, dma->tx_dma_phys);
dma_release_channel(dma->chan_tx);
}
if (dma->chan_rx) {
- dma_unmap_single(dma->chan_rx->device->dev, dma->rx_dma_phys,
- dma_bufsize, DMA_FROM_DEVICE);
+ dma_free_coherent(dma->chan_rx->device->dev, dma_bufsize,
+ dma->rx_dma_buf, dma->rx_dma_phys);
dma_release_channel(dma->chan_rx);
}
}
Do not fail probing when device_init_wakeup fails. Also clear wakeup
on remove.
device_init_wakeup fails when the device is already enabled as wakeup
device. Hence, the driver fails to probe the device if:
- The device has already been enabled for wakeup (via /proc/acpi/wakeup)
- The driver has been unloaded and is being loaded again.
This goal of the patch is to fix the above cases.
Overwhelming majority of the drivers do not consider device_init_wakeup
failure as a fatal error and proceed regardless of whether it succeeds
or not.
Changes since v2:
- disabled wakeup in remove
- CC'ing stable
- description fixed
Changes since v1:
- added Fixes tag
Fixes: cd70de2d356ee ("media: platform: Add ChromeOS EC CEC driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dariusz Marcinkiewicz <darekm(a)google.com>
---
drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c
index 0e7e2772f08f..3881ed7bc3d9 100644
--- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c
+++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c
@@ -277,12 +277,6 @@ static int cros_ec_cec_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, cros_ec_cec);
cros_ec_cec->cros_ec = cros_ec;
- ret = device_init_wakeup(&pdev->dev, 1);
- if (ret) {
- dev_err(&pdev->dev, "failed to initialize wakeup\n");
- return ret;
- }
-
cros_ec_cec->adap = cec_allocate_adapter(&cros_ec_cec_ops, cros_ec_cec,
DRV_NAME,
CEC_CAP_DEFAULTS |
@@ -310,6 +304,8 @@ static int cros_ec_cec_probe(struct platform_device *pdev)
if (ret < 0)
goto out_probe_notify;
+ device_init_wakeup(&pdev->dev, 1);
+
return 0;
out_probe_notify:
@@ -339,6 +335,8 @@ static int cros_ec_cec_remove(struct platform_device *pdev)
cros_ec_cec->adap);
cec_unregister_adapter(cros_ec_cec->adap);
+ device_init_wakeup(&pdev->dev, 0);
+
return 0;
}
--
2.27.0.212.ge8ba1cc988-goog
Building the current 5.8 kernel for a e500 machine with
CONFIG_RANDOMIZE_BASE set yields the following failure:
arch/powerpc/mm/nohash/kaslr_booke.c: In function 'kaslr_early_init':
arch/powerpc/mm/nohash/kaslr_booke.c:387:2: error: implicit declaration
of function 'flush_icache_range'; did you mean 'flush_tlb_range'?
[-Werror=implicit-function-declaration]
Indeed, including asm/cacheflush.h into kaslr_booke.c fixes the build.
The issue dates back to the introduction of that file and probably went
unnoticed because there's no in-tree defconfig with CONFIG_RANDOMIZE_BASE
set.
Fixes: 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR infrastructure")
Cc: stable(a)vger.kernel.org
Signed-off-by: Arseny Solokha <asolokha(a)kb.kras.ru>
---
arch/powerpc/mm/nohash/kaslr_booke.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 4a75f2d9bf0e..bce0e5349978 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -14,6 +14,7 @@
#include <linux/memblock.h>
#include <linux/libfdt.h>
#include <linux/crash_core.h>
+#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/kdump.h>
--
2.27.0
From: Johannes Weiner <hannes(a)cmpxchg.org>
Subject: mm: memcontrol: handle div0 crash race condition in memory.low
Tejun reports seeing rare div0 crashes in memory.low stress testing:
[37228.504582] RIP: 0010:mem_cgroup_calculate_protection+0xed/0x150
[37228.505059] Code: 0f 46 d1 4c 39 d8 72 57 f6 05 16 d6 42 01 40 74 1f 4c 39 d8 76 1a 4c 39 d1 76 15 4c 29 d1 4c 29 d8 4d 29 d9 31 d2 48 0f af c1 <49> f7 f1 49 01 c2 4c 89 96 38 01 00 00 5d c3 48 0f af c7 31 d2 49
[37228.506254] RSP: 0018:ffffa14e01d6fcd0 EFLAGS: 00010246
[37228.506769] RAX: 000000000243e384 RBX: 0000000000000000 RCX: 0000000000008f4b
[37228.507319] RDX: 0000000000000000 RSI: ffff8b89bee84000 RDI: 0000000000000000
[37228.507869] RBP: ffffa14e01d6fcd0 R08: ffff8b89ca7d40f8 R09: 0000000000000000
[37228.508376] R10: 0000000000000000 R11: 00000000006422f7 R12: 0000000000000000
[37228.508881] R13: ffff8b89d9617000 R14: ffff8b89bee84000 R15: ffffa14e01d6fdb8
[37228.509397] FS: 0000000000000000(0000) GS:ffff8b8a1f1c0000(0000) knlGS:0000000000000000
[37228.509917] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[37228.510442] CR2: 00007f93b1fc175b CR3: 000000016100a000 CR4: 0000000000340ea0
[37228.511076] Call Trace:
[37228.511561] shrink_node+0x1e5/0x6c0
[37228.512044] balance_pgdat+0x32d/0x5f0
[37228.512521] kswapd+0x1d7/0x3d0
[37228.513346] ? wait_woken+0x80/0x80
[37228.514170] kthread+0x11c/0x160
[37228.514983] ? balance_pgdat+0x5f0/0x5f0
[37228.515797] ? kthread_park+0x90/0x90
[37228.516593] ret_from_fork+0x1f/0x30
This happens when parent_usage == siblings_protected. We check that usage
is bigger than protected, which should imply parent_usage being bigger
than siblings_protected. However, we don't read (or even update) these
values atomically, and they can be out of sync as the memory state changes
under us. A bit of fluctuation around the target protection isn't a big
deal, but we need to handle the div0 case.
Check the parent state explicitly to make sure we have a reasonable
positive value for the divisor.
Link: http://lkml.kernel.org/r/20200615140658.601684-1-hannes@cmpxchg.org
Fixes: 8a931f801340 ("mm: memcontrol: recursive memory.low protection")
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reported-by: Tejun Heo <tj(a)kernel.org>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Chris Down <chris(a)chrisdown.name>
Cc: Roman Gushchin <guro(a)fb.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/mm/memcontrol.c~mm-memcontrol-handle-div0-crash-race-condition-in-memorylow
+++ a/mm/memcontrol.c
@@ -6360,11 +6360,16 @@ static unsigned long effective_protectio
* We're using unprotected memory for the weight so that if
* some cgroups DO claim explicit protection, we don't protect
* the same bytes twice.
+ *
+ * Check both usage and parent_usage against the respective
+ * protected values. One should imply the other, but they
+ * aren't read atomically - make sure the division is sane.
*/
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
return ep;
-
- if (parent_effective > siblings_protected && usage > protected) {
+ if (parent_effective > siblings_protected &&
+ parent_usage > siblings_protected &&
+ usage > protected) {
unsigned long unclaimed;
unclaimed = parent_effective - siblings_protected;
_
From: Waiman Long <longman(a)redhat.com>
Subject: mm/slab: use memzero_explicit() in kzfree()
The kzfree() function is normally used to clear some sensitive
information, like encryption keys, in the buffer before freeing it back to
the pool. Memset() is currently used for buffer clearing. However
unlikely, there is still a non-zero probability that the compiler may
choose to optimize away the memory clearing especially if LTO is being
used in the future. To make sure that this optimization will never
happen, memzero_explicit(), which is introduced in v3.18, is now used in
kzfree() to future-proof it.
Link: http://lkml.kernel.org/r/20200616154311.12314-2-longman@redhat.com
Fixes: 3ef0e5ba4673 ("slab: introduce kzfree()")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: "Serge E. Hallyn" <serge(a)hallyn.com>
Cc: Joe Perches <joe(a)perches.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: "Jason A . Donenfeld" <Jason(a)zx2c4.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab_common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/slab_common.c~mm-slab-use-memzero_explicit-in-kzfree
+++ a/mm/slab_common.c
@@ -1726,7 +1726,7 @@ void kzfree(const void *p)
if (unlikely(ZERO_OR_NULL_PTR(mem)))
return;
ks = ksize(mem);
- memset(mem, 0, ks);
+ memzero_explicit(mem, ks);
kfree(mem);
}
EXPORT_SYMBOL(kzfree);
_
From: Waiman Long <longman(a)redhat.com>
Subject: mm, slab: fix sign conversion problem in memcg_uncharge_slab()
It was found that running the LTP test on a PowerPC system could produce
erroneous values in /proc/meminfo, like:
MemTotal: 531915072 kB
MemFree: 507962176 kB
MemAvailable: 1100020596352 kB
Using bisection, the problem is tracked down to commit 9c315e4d7d8c ("mm:
memcg/slab: cache page number in memcg_(un)charge_slab()").
In memcg_uncharge_slab() with a "int order" argument:
unsigned int nr_pages = 1 << order;
:
mod_lruvec_state(lruvec, cache_vmstat_idx(s), -nr_pages);
The mod_lruvec_state() function will eventually call the
__mod_zone_page_state() which accepts a long argument. Depending on the
compiler and how inlining is done, "-nr_pages" may be treated as a
negative number or a very large positive number. Apparently, it was
treated as a large positive number in that PowerPC system leading to
incorrect stat counts. This problem hasn't been seen in x86-64 yet,
perhaps the gcc compiler there has some slight difference in behavior.
It is fixed by making nr_pages a signed value. For consistency, a similar
change is applied to memcg_charge_slab() as well.
Link: http://lkml.kernel.org/r/20200620184719.10994-1-longman@redhat.com
Fixes: 9c315e4d7d8c ("mm: memcg/slab: cache page number in memcg_(un)charge_slab()").
Signed-off-by: Waiman Long <longman(a)redhat.com>
Acked-by: Roman Gushchin <guro(a)fb.com>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/slab.h~mm-slab-fix-sign-conversion-problem-in-memcg_uncharge_slab
+++ a/mm/slab.h
@@ -348,7 +348,7 @@ static __always_inline int memcg_charge_
gfp_t gfp, int order,
struct kmem_cache *s)
{
- unsigned int nr_pages = 1 << order;
+ int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
int ret;
@@ -388,7 +388,7 @@ out:
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
- unsigned int nr_pages = 1 << order;
+ int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
_
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: fix value of OCFS2_INVALID_SLOT
In the ocfs2 disk layout, slot number is 16 bits, but in ocfs2
implementation, slot number is 32 bits. Usually this will not cause any
issue, because slot number is converted from u16 to u32, but
OCFS2_INVALID_SLOT was defined as -1, when an invalid slot number from
disk was obtained, its value was (u16)-1, and it was converted to u32.
Then the following checking in get_local_system_inode will be always
skipped:
static struct inode **get_local_system_inode(struct ocfs2_super *osb,
int type,
u32 slot)
{
BUG_ON(slot == OCFS2_INVALID_SLOT);
...
}
Link: http://lkml.kernel.org/r/20200616183829.87211-5-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/ocfs2_fs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/ocfs2/ocfs2_fs.h~ocfs2-fix-value-of-ocfs2_invalid_slot
+++ a/fs/ocfs2/ocfs2_fs.h
@@ -290,7 +290,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
_
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: load global_inode_alloc
Set global_inode_alloc as OCFS2_FIRST_ONLINE_SYSTEM_INODE, that will make
it load during mount. It can be used to test whether some global/system
inodes are valid. One use case is that nfsd will test whether root inode
is valid.
Link: http://lkml.kernel.org/r/20200616183829.87211-3-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/ocfs2_fs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/ocfs2/ocfs2_fs.h~ocfs2-load-global_inode_alloc
+++ a/fs/ocfs2/ocfs2_fs.h
@@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
_
From: Junxiao Bi <junxiao.bi(a)oracle.com>
Subject: ocfs2: avoid inode removal while nfsd is accessing it
Patch series "ocfs2: fix nfsd over ocfs2 issues", v2.
This is a series of patches to fix issues on nfsd over ocfs2. patch 1 is
to avoid inode removed while nfsd access it patch 2 & 3 is to fix a panic
issue.
This patch (of 4):
When nfsd is getting file dentry using handle or parent dentry of some
dentry, one cluster lock is used to avoid inode removed from other node,
but it still could be removed from local node, so use a rw lock to avoid
this.
Link: http://lkml.kernel.org/r/20200616183829.87211-1-junxiao.bi@oracle.com
Link: http://lkml.kernel.org/r/20200616183829.87211-2-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/dlmglue.c | 17 ++++++++++++++++-
fs/ocfs2/ocfs2.h | 1 +
2 files changed, 17 insertions(+), 1 deletion(-)
--- a/fs/ocfs2/dlmglue.c~ocfs2-avoid-inode-removed-while-nfsd-access-it
+++ a/fs/ocfs2/dlmglue.c
@@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init
&ocfs2_nfs_sync_lops, osb);
}
+static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
+{
+ ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ init_rwsem(&osb->nfs_sync_rwlock);
+}
+
void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
{
struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
@@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_sup
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
+ if (ex)
+ down_write(&osb->nfs_sync_rwlock);
+ else
+ down_read(&osb->nfs_sync_rwlock);
+
if (ocfs2_mount_local(osb))
return 0;
@@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
+ if (ex)
+ up_write(&osb->nfs_sync_rwlock);
+ else
+ up_read(&osb->nfs_sync_rwlock);
}
int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
@@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *o
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
- ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ ocfs2_nfs_sync_lock_init(osb);
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
osb->cconn = conn;
--- a/fs/ocfs2/ocfs2.h~ocfs2-avoid-inode-removed-while-nfsd-access-it
+++ a/fs/ocfs2/ocfs2.h
@@ -395,6 +395,7 @@ struct ocfs2_super
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
+ struct rw_semaphore nfs_sync_rwlock;
struct ocfs2_lock_res osb_trim_fs_lockres;
struct mutex obs_trim_fs_mutex;
struct ocfs2_dlm_debug *osb_dlm_debug;
_
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, compaction: make capture control handling safe wrt interrupts
Hugh reports:
: While stressing compaction, one run oopsed on NULL capc->cc in
: __free_one_page()'s task_capc(zone): compact_zone_order() had been
: interrupted, and a page was being freed in the return from interrupt.
:
: Though you would not expect it from the source, both gccs I was using (a
: 4.8.1 and a 7.5.0) had chosen to compile compact_zone_order() with the
: ".cc = &cc" implemented by mov %rbx,-0xb0(%rbp) immediately before callq
: compact_zone - long after the "current->capture_control = &capc". An
: interrupt in between those finds capc->cc NULL (zeroed by an earlier rep
: stos).
:
: This could presumably be fixed by a barrier() before setting
: current->capture_control in compact_zone_order(); but would also need more
: care on return from compact_zone(), in order not to risk leaking a page
: captured by interrupt just before capture_control is reset.
:
: Maybe that is the preferable fix, but I felt safer for task_capc() to
: exclude the rather surprising possibility of capture at interrupt time.
I have checked that gcc10 also behaves the same.
The advantage of fix in compact_zone_order() is that we don't add another
test in the page freeing hot path, and that it might prevent future
problems if we stop exposing pointers to uninitialized structures in
current task.
So this patch implements the suggestion for compact_zone_order() with
barrier() (and WRITE_ONCE() to prevent store tearing) for setting
current->capture_control, and prevents page leaking with
WRITE_ONCE/READ_ONCE in the proper order.
Link: http://lkml.kernel.org/r/20200616082649.27173-1-vbabka@suse.cz
Fixes: 5e1f0f098b46 ("mm, compaction: capture a page under direct compaction")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Hugh Dickins <hughd(a)google.com>
Suggested-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Hugh Dickins <hughd(a)google.com>
Cc: Alex Shi <alex.shi(a)linux.alibaba.com>
Cc: Li Wang <liwang(a)redhat.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: <stable(a)vger.kernel.org> [5.1+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/compaction.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
--- a/mm/compaction.c~mm-compaction-make-capture-control-handling-safe-wrt-interrupts
+++ a/mm/compaction.c
@@ -2316,15 +2316,26 @@ static enum compact_result compact_zone_
.page = NULL,
};
- current->capture_control = &capc;
+ /*
+ * Make sure the structs are really initialized before we expose the
+ * capture control, in case we are interrupted and the interrupt handler
+ * frees a page.
+ */
+ barrier();
+ WRITE_ONCE(current->capture_control, &capc);
ret = compact_zone(&cc, &capc);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
- *capture = capc.page;
- current->capture_control = NULL;
+ /*
+ * Make sure we hide capture control first before we read the captured
+ * page pointer, otherwise an interrupt could free and capture a page
+ * and we would leak it.
+ */
+ WRITE_ONCE(current->capture_control, NULL);
+ *capture = READ_ONCE(capc.page);
return ret;
}
_
Hello Greg,
Can you please consider including the following patch in the stable linux-4.14.y branch?
This is to fix CVE-2020-12655
d0c7feaf87678371c2c09b3709400be416b2dc62(xfs: add agf freeblocks verify in xfs_agf_verify)
Thanks,
Yishan Chen
Har har, after I moved the slab freelist pointer into the middle of the
slab, now it looks like the contents are getting poisoned. Adjust the
test to avoid the freelist pointer again.
Fixes: 3202fa62fb43 ("slub: relocate freelist pointer to middle of object")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kees Cook <keescook(a)chromium.org>
---
drivers/misc/lkdtm/heap.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
index 3c5cec85edce..1323bc16f113 100644
--- a/drivers/misc/lkdtm/heap.c
+++ b/drivers/misc/lkdtm/heap.c
@@ -58,11 +58,12 @@ void lkdtm_READ_AFTER_FREE(void)
int *base, *val, saw;
size_t len = 1024;
/*
- * The slub allocator uses the first word to store the free
- * pointer in some configurations. Use the middle of the
- * allocation to avoid running into the freelist
+ * The slub allocator will use the either the first word or
+ * the middle of the allocation to store the free pointer,
+ * depending on configurations. Store in the second word to
+ * avoid running into the freelist.
*/
- size_t offset = (len / sizeof(*base)) / 2;
+ size_t offset = sizeof(*base);
base = kmalloc(len, GFP_KERNEL);
if (!base) {
--
2.25.1
The eth_addr member is passed to ether_addr functions that require
2-byte alignment, therefore the member must be properly aligned
to avoid unaligned accesses.
The problem is in place since the initial merge of multicast to unicast:
commit 6db6f0eae6052b70885562e1733896647ec1d807 bridge: multicast to unicast
Fixes: 6db6f0eae605 ("bridge: multicast to unicast")
Cc: Roopa Prabhu <roopa(a)cumulusnetworks.com>
Cc: Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com>
Cc: David S. Miller <davem(a)davemloft.net>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: Felix Fietkau <nbd(a)nbd.name>
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Martitz <t.martitz(a)avm.de>
---
net/bridge/br_private.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7501be4eeba0..22cb2f1993ef 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -217,8 +217,8 @@ struct net_bridge_port_group {
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
+ unsigned char eth_addr[ETH_ALEN]; /* 2-byte aligned */
unsigned char flags;
- unsigned char eth_addr[ETH_ALEN];
};
struct net_bridge_mdb_entry {
--
2.27.0
The skcipher API dynamically instantiates the transformation object
on request that implements the requested algorithm optimally on the
given platform. This notion of optimality only matters for cases like
bulk network or disk encryption, where performance can be a bottleneck,
or in cases where the algorithm itself is not known at compile time.
In the mscc case, we are dealing with AES encryption of a single
block, and so neither concern applies, and we are better off using
the AES library interface, which is lightweight and safe for this
kind of use.
Note that the scatterlist API does not permit references to buffers
that are located on the stack, so the existing code is incorrect in
any case, but avoiding the skcipher and scatterlist APIs entirely is
the most straight-forward approach to fixing this.
Cc: Antoine Tenart <antoine.tenart(a)bootlin.com>
Cc: Andrew Lunn <andrew(a)lunn.ch>
Cc: Florian Fainelli <f.fainelli(a)gmail.com>
Cc: Heiner Kallweit <hkallweit1(a)gmail.com>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Fixes: 28c5107aa904e ("net: phy: mscc: macsec support")
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
v2:
- select CRYPTO_LIB_AES only if MACSEC is enabled
- add Eric's R-b
drivers/net/phy/Kconfig | 3 +-
drivers/net/phy/mscc/mscc_macsec.c | 40 +++++---------------
2 files changed, 10 insertions(+), 33 deletions(-)
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index f25702386d83..e351d65533aa 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -480,8 +480,7 @@ config MICROCHIP_T1_PHY
config MICROSEMI_PHY
tristate "Microsemi PHYs"
depends on MACSEC || MACSEC=n
- select CRYPTO_AES
- select CRYPTO_ECB
+ select CRYPTO_LIB_AES if MACSEC
help
Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c
index b4d3dc4068e2..d53ca884b5c9 100644
--- a/drivers/net/phy/mscc/mscc_macsec.c
+++ b/drivers/net/phy/mscc/mscc_macsec.c
@@ -10,7 +10,7 @@
#include <linux/phy.h>
#include <dt-bindings/net/mscc-phy-vsc8531.h>
-#include <crypto/skcipher.h>
+#include <crypto/aes.h>
#include <net/macsec.h>
@@ -500,39 +500,17 @@ static u32 vsc8584_macsec_flow_context_id(struct macsec_flow *flow)
static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN],
u16 key_len, u8 hkey[16])
{
- struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
- struct skcipher_request *req = NULL;
- struct scatterlist src, dst;
- DECLARE_CRYPTO_WAIT(wait);
- u32 input[4] = {0};
+ const u8 input[AES_BLOCK_SIZE] = {0};
+ struct crypto_aes_ctx ctx;
int ret;
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- req = skcipher_request_alloc(tfm, GFP_KERNEL);
- if (!req) {
- ret = -ENOMEM;
- goto out;
- }
-
- skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done,
- &wait);
- ret = crypto_skcipher_setkey(tfm, key, key_len);
- if (ret < 0)
- goto out;
-
- sg_init_one(&src, input, 16);
- sg_init_one(&dst, hkey, 16);
- skcipher_request_set_crypt(req, &src, &dst, 16, NULL);
-
- ret = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+ ret = aes_expandkey(&ctx, key, key_len);
+ if (ret)
+ return ret;
-out:
- skcipher_request_free(req);
- crypto_free_skcipher(tfm);
- return ret;
+ aes_encrypt(&ctx, hkey, input);
+ memzero_explicit(&ctx, sizeof(ctx));
+ return 0;
}
static int vsc8584_macsec_transformation(struct phy_device *phydev,
--
2.27.0