From: David Sterba <dsterba(a)suse.cz>
There's a mistake in backport of upstream commit 2175bf57dc95 ("btrfs:
fix possible free space tree corruption with online conversion") as
5.4.95 commit e1ae9aab8029.
The enum value BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED has been added to the
wrong enum set, colliding with value of BTRFS_FS_QUOTA_ENABLE. This
could cause problems during the tree conversion, where the quotas
wouldn't be set up properly but the related code executed anyway due to
the bit set.
Link: https://lore.kernel.org/linux-btrfs/20210219111741.95DD.409509F4@e16-tech.c…
Reported-by: Wang Yugui <wangyugui(a)e16-tech.com>
CC: stable(a)vger.kernel.org # 5.4.95+
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
This is same fix that went to 5.10.x, with refreshed diff so it applies
cleanly on 5.4.x and with updated references.
fs/btrfs/ctree.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cda5534d3d0e..7960359dbc70 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -136,9 +136,6 @@ enum {
BTRFS_FS_STATE_DEV_REPLACING,
/* The btrfs_fs_info created for self-tests */
BTRFS_FS_STATE_DUMMY_FS_INFO,
-
- /* Indicate that we can't trust the free space tree for caching yet */
- BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
};
#define BTRFS_BACKREF_REV_MAX 256
@@ -527,6 +524,9 @@ enum {
* so we don't need to offload checksums to workqueues.
*/
BTRFS_FS_CSUM_IMPL_FAST,
+
+ /* Indicate that we can't trust the free space tree for caching yet */
+ BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
};
struct btrfs_fs_info {
--
2.29.2
From: Frieder Schrempf <frieder.schrempf(a)kontron.de>
The driver uses the DVS registers PCA9450_REG_BUCKxOUT_DVS0 to set the
voltage for the buck regulators 1, 2 and 3. This has no effect as the
PRESET_EN bit is set by default. This causes the preset values to be
used instead, which are set to 850 mV by default.
To fix this we reset the PRESET_EN bit at time of initialization.
Fixes: 0935ff5f1f0a ("regulator: pca9450: add pca9450 pmic driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Frieder Schrempf <frieder.schrempf(a)kontron.de>
---
drivers/regulator/pca9450-regulator.c | 8 ++++++++
include/linux/regulator/pca9450.h | 3 +++
2 files changed, 11 insertions(+)
diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c
index 833d398c6aa2..cf329341cb2f 100644
--- a/drivers/regulator/pca9450-regulator.c
+++ b/drivers/regulator/pca9450-regulator.c
@@ -797,6 +797,14 @@ static int pca9450_i2c_probe(struct i2c_client *i2c,
return ret;
}
+ /* Reset PRESET_EN bit in BUCK123_DVS to use DVS registers */
+ ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_BUCK123_DVS,
+ BUCK123_PRESET_EN, ~BUCK123_PRESET_EN);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to reset PRESET_EN bit\n");
+ return ret;
+ }
+
/* Set reset behavior on assertion of WDOG_B signal */
ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_RESET_CTRL,
WDOG_B_CFG_MASK, WDOG_B_CFG_COLD_LDO12);
diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
index ccdb5320a240..71902f41c919 100644
--- a/include/linux/regulator/pca9450.h
+++ b/include/linux/regulator/pca9450.h
@@ -147,6 +147,9 @@ enum {
#define BUCK6_FPWM 0x04
#define BUCK6_ENMODE_MASK 0x03
+/* PCA9450_REG_BUCK123_PRESET_EN bit */
+#define BUCK123_PRESET_EN 0x80
+
/* PCA9450_BUCK1OUT_DVS0 bits */
#define BUCK1OUT_DVS0_MASK 0x7F
#define BUCK1OUT_DVS0_DEFAULT 0x14
--
2.25.1
From: Filipe Manana <fdmanana(a)suse.com>
Whenever we attempt to do a non-aligned direct IO write with O_DSYNC, we
end up triggering an assertion and crashing. Example reproducer:
$ cat test.sh
#!/bin/bash
DEV=/dev/sdj
MNT=/mnt/sdj
mkfs.btrfs -f $DEV > /dev/null
mount $DEV $MNT
# Do a direct IO write with O_DSYNC into a non-aligned range...
xfs_io -f -d -s -c "pwrite -S 0xab -b 64K 1111 64K" $MNT/foobar
umount $MNT
When running the reproducer an assertion fails and produces the following
trace:
[ 2418.403134] assertion failed: !current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA, in fs/btrfs/space-info.c:1467
[ 2418.403745] ------------[ cut here ]------------
[ 2418.404306] kernel BUG at fs/btrfs/ctree.h:3286!
[ 2418.404862] invalid opcode: 0000 [#2] PREEMPT SMP DEBUG_PAGEALLOC PTI
[ 2418.405451] CPU: 1 PID: 64705 Comm: xfs_io Tainted: G D 5.10.15-btrfs-next-87 #1
[ 2418.406026] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[ 2418.407228] RIP: 0010:assertfail.constprop.0+0x18/0x26 [btrfs]
[ 2418.407835] Code: e6 48 c7 (...)
[ 2418.409078] RSP: 0018:ffffb06080d13c98 EFLAGS: 00010246
[ 2418.409696] RAX: 000000000000006c RBX: ffff994c1debbf08 RCX: 0000000000000000
[ 2418.410302] RDX: 0000000000000000 RSI: 0000000000000027 RDI: 00000000ffffffff
[ 2418.410904] RBP: ffff994c21770000 R08: 0000000000000000 R09: 0000000000000000
[ 2418.411504] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000010000
[ 2418.412111] R13: ffff994c22198400 R14: ffff994c21770000 R15: 0000000000000000
[ 2418.412713] FS: 00007f54fd7aff00(0000) GS:ffff994d35200000(0000) knlGS:0000000000000000
[ 2418.413326] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2418.413933] CR2: 000056549596d000 CR3: 000000010b928003 CR4: 0000000000370ee0
[ 2418.414528] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2418.415109] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2418.415669] Call Trace:
[ 2418.416254] btrfs_reserve_data_bytes.cold+0x22/0x22 [btrfs]
[ 2418.416812] btrfs_check_data_free_space+0x4c/0xa0 [btrfs]
[ 2418.417380] btrfs_buffered_write+0x1b0/0x7f0 [btrfs]
[ 2418.418315] btrfs_file_write_iter+0x2a9/0x770 [btrfs]
[ 2418.418920] new_sync_write+0x11f/0x1c0
[ 2418.419430] vfs_write+0x2bb/0x3b0
[ 2418.419972] __x64_sys_pwrite64+0x90/0xc0
[ 2418.420486] do_syscall_64+0x33/0x80
[ 2418.420979] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 2418.421486] RIP: 0033:0x7f54fda0b986
[ 2418.421981] Code: 48 c7 c0 (...)
[ 2418.423019] RSP: 002b:00007ffc40569c38 EFLAGS: 00000246 ORIG_RAX: 0000000000000012
[ 2418.423547] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f54fda0b986
[ 2418.424075] RDX: 0000000000010000 RSI: 000056549595e000 RDI: 0000000000000003
[ 2418.424596] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000400
[ 2418.425119] R10: 0000000000000400 R11: 0000000000000246 R12: 00000000ffffffff
[ 2418.425644] R13: 0000000000000400 R14: 0000000000010000 R15: 0000000000000000
[ 2418.426148] Modules linked in: btrfs blake2b_generic (...)
[ 2418.429540] ---[ end trace ef2aeb44dc0afa34 ]---
1) At btrfs_file_write_iter() we set current->journal_info to
BTRFS_DIO_SYNC_STUB;
2) We then call __btrfs_direct_write(), which calls btrfs_direct_IO();
3) We can't do the direct IO write because it starts at a non-aligned
offset (1111). So at btrfs_direct_IO() we return -EINVAL (coming from
check_direct_IO() which does the alignment check), but we leave
current->journal_info set to BTRFS_DIO_SYNC_STUB - we only clear it
at btrfs_dio_iomap_begin(), because we assume we always get there;
4) Then at __btrfs_direct_write() we see that the attempt to do the
direct IO write was not successful, 0 bytes written, so we fallback
to a buffered write by calling btrfs_buffered_write();
5) There we call btrfs_check_data_free_space() which in turn calls
btrfs_alloc_data_chunk_ondemand() and that calls
btrfs_reserve_data_bytes() with flush == BTRFS_RESERVE_FLUSH_DATA;
6) Then at btrfs_reserve_data_bytes() we have current->journal_info set to
BTRFS_DIO_SYNC_STUB, therefore not NULL, and flush has the value
BTRFS_RESERVE_FLUSH_DATA, triggering the second assertion:
int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
{
struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
int ret;
ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
(...)
So fix that by setting the journal to NULL whenever check_direct_IO()
returns a failure.
This bug only affects 5.10 kernels, and the regression was introduced in
5.10-rc1 by commit 0eb79294dbe328 ("btrfs: dio iomap DSYNC workaround").
The bug does not exist in 5.11 kernels due to commit ecfdc08b8cc65d
("btrfs: remove dio iomap DSYNC workaround"), which depends on a large
patchset that went into the merge window for 5.11. So this is a fix only
for 5.10.x stable kernels, as there are people hitting this bug.
Fixes: 0eb79294dbe328 ("btrfs: dio iomap DSYNC workaround")
CC: stable(a)vger.kernel.org # 5.10 (and only 5.10)
CC: David Sterba <dsterba(a)suse.cz>
Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1181605
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
---
fs/btrfs/inode.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index acc47e2ffb46..b536d21541a9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8026,8 +8026,12 @@ ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
bool relock = false;
ssize_t ret;
- if (check_direct_IO(fs_info, iter, offset))
+ if (check_direct_IO(fs_info, iter, offset)) {
+ ASSERT(current->journal_info == NULL ||
+ current->journal_info == BTRFS_DIO_SYNC_STUB);
+ current->journal_info = NULL;
return 0;
+ }
count = iov_iter_count(iter);
if (iov_iter_rw(iter) == WRITE) {
--
2.28.0
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 88bf56d04bc3564542049ec4ec168a8b60d0b48c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs(a)linux.alibaba.com>
Date: Thu, 17 Dec 2020 23:41:18 +0800
Subject: [PATCH] kvm: check tlbs_dirty directly
In kvm_mmu_notifier_invalidate_range_start(), tlbs_dirty is used as:
need_tlb_flush |= kvm->tlbs_dirty;
with need_tlb_flush's type being int and tlbs_dirty's type being long.
It means that tlbs_dirty is always used as int and the higher 32 bits
is useless. We need to check tlbs_dirty in a correct way and this
change checks it directly without propagating it to need_tlb_flush.
Note: it's _extremely_ unlikely this neglecting of higher 32 bits can
cause problems in practice. It would require encountering tlbs_dirty
on a 4 billion count boundary, and KVM would need to be using shadow
paging or be running a nested guest.
Cc: stable(a)vger.kernel.org
Fixes: a4ee1ca4a36e ("KVM: MMU: delay flush all tlbs on sync_page path")
Signed-off-by: Lai Jiangshan <laijs(a)linux.alibaba.com>
Message-Id: <20201217154118.16497-1-jiangshanlai(a)gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3abcb2ce5b7d..19dae28904f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -485,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm->mmu_notifier_count++;
need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
range->flags);
- need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
- if (need_tlb_flush)
+ if (need_tlb_flush || kvm->tlbs_dirty)
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0a88fa221ce911c331bf700d2214c5b2f77414d3 Mon Sep 17 00:00:00 2001
From: Manish Narani <manish.narani(a)xilinx.com>
Date: Tue, 17 Nov 2020 12:43:35 +0530
Subject: [PATCH] usb: gadget: u_ether: Fix MTU size mismatch with RX packet
size
Fix the MTU size issue with RX packet size as the host sends the packet
with extra bytes containing ethernet header. This causes failure when
user sets the MTU size to the maximum i.e. 15412. In this case the
ethernet packet received will be of length 15412 plus the ethernet header
length. This patch fixes the issue where there is a check that RX packet
length must not be more than max packet length.
Fixes: bba787a860fa ("usb: gadget: ether: Allow jumbo frames")
Signed-off-by: Manish Narani <manish.narani(a)xilinx.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/1605597215-122027-1-git-send-email-manish.narani@…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 31ea76adcc0d..c019f2b0c0af 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -45,9 +45,10 @@
#define UETH__VERSION "29-May-2008"
/* Experiments show that both Linux and Windows hosts allow up to 16k
- * frame sizes. Set the max size to 15k+52 to prevent allocating 32k
+ * frame sizes. Set the max MTU size to 15k+52 to prevent allocating 32k
* blocks and still have efficient handling. */
-#define GETHER_MAX_ETH_FRAME_LEN 15412
+#define GETHER_MAX_MTU_SIZE 15412
+#define GETHER_MAX_ETH_FRAME_LEN (GETHER_MAX_MTU_SIZE + ETH_HLEN)
struct eth_dev {
/* lock is held while accessing port_usb
@@ -786,7 +787,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
/* MTU range: 14 - 15412 */
net->min_mtu = ETH_HLEN;
- net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+ net->max_mtu = GETHER_MAX_MTU_SIZE;
dev->gadget = g;
SET_NETDEV_DEV(net, &g->dev);
@@ -848,7 +849,7 @@ struct net_device *gether_setup_name_default(const char *netname)
/* MTU range: 14 - 15412 */
net->min_mtu = ETH_HLEN;
- net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+ net->max_mtu = GETHER_MAX_MTU_SIZE;
return net;
}
From: Mike Rapoport <rppt(a)linux.ibm.com>
Hi,
@Andrew, this is based on v5.11-mmotm-2021-02-18-18-29 with the previous
version reverted
Commit 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather
that check each PFN") exposed several issues with the memory map
initialization and these patches fix those issues.
Initially there were crashes during compaction that Qian Cai reported back
in April [1]. It seemed back then that the problem was fixed, but a few
weeks ago Andrea Arcangeli hit the same bug [2] and there was an additional
discussion at [3].
I didn't appreciate variety of ways BIOSes can report memory in the first
megabyte, so previous versions of this set caused all kinds of troubles.
The last version that implicitly extended node/zone to cover the complete
section might also have unexpected side effects, so this time I'm trying to
move in forward in baby steps.
This is mostly a return to the fist version that simply merges
init_unavailable_pages() into memmap_init() so that the only effective
change would be more sensible zone/node links in unavailable struct pages.
For now, I've dropped the patch that tried to make ZONE_DMA to span pfn 0
because it didn't cause any issues for really long time and there are way
to many hidden mines around this.
I have an ugly workaround for "pfn 0" issue that IMHO is the safest way to
deal with it until it could be gradually fixed properly:
https://git.kernel.org/pub/scm/linux/kernel/git/rppt/linux.git/commit/?id=a…
v6:
* only interleave initialization of unavailable pages in memmap_init(), so
that it is essentially includes init_unavailable_pages().
v5: https://lore.kernel.org/lkml/20210208110820.6269-1-rppt@kernel.org
* extend node/zone spans to cover complete sections, this allows to interleave
the initialization of unavailable pages with "normal" memory map init.
* drop modifications to x86 early setup
v4: https://lore.kernel.org/lkml/20210130221035.4169-1-rppt@kernel.org/
* make sure pages in the range 0 - start_pfn_of_lowest_zone are initialized
even if an architecture hides them from the generic mm
* finally make pfn 0 on x86 to be a part of memory visible to the generic
mm as reserved memory.
v3: https://lore.kernel.org/lkml/20210111194017.22696-1-rppt@kernel.org
* use architectural zone constraints to set zone links for struct pages
corresponding to the holes
* drop implicit update of memblock.memory
* add a patch that sets pfn 0 to E820_TYPE_RAM on x86
v2: https://lore.kernel.org/lkml/20201209214304.6812-1-rppt@kernel.org/):
* added patch that adds all regions in memblock.reserved that do not
overlap with memblock.memory to memblock.memory in the beginning of
free_area_init()
[1] https://lore.kernel.org/lkml/8C537EB7-85EE-4DCF-943E-3CC0ED0DF56D@lca.pw
[2] https://lore.kernel.org/lkml/20201121194506.13464-1-aarcange@redhat.com
[3] https://lore.kernel.org/mm-commits/20201206005401.qKuAVgOXr%akpm@linux-foun…
Mike Rapoport (1):
mm/page_alloc.c: refactor initialization of struct page for holes in
memory layout
mm/page_alloc.c | 144 ++++++++++++++++++++----------------------------
1 file changed, 61 insertions(+), 83 deletions(-)
--
2.28.0
Hi
517b693351a2 ("Bluetooth: btusb: Always fallback to alt 1 for WBS")
was applied to mainline fixing (restoring) behaviour to pre 5.7. As
the commit message describes in effect, WBS was broken for all USB-BT
adapters that do not support alt 6.
Can you consider it to apply it to back to 5.10.y?
Regards,
Salvatore
It is possible that 'data' passed to kfree() is set to a error value
instead of allocated space. Make sure it doesn't get called with invalid
pointer.
Fixes: 5a6338cce9f4 ("mailbox: arm_mhuv2: Add driver")
Cc: v5.11 <stable(a)vger.kernel.org> # v5.11
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
drivers/mailbox/arm_mhuv2.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/mailbox/arm_mhuv2.c b/drivers/mailbox/arm_mhuv2.c
index cdfb1939fabf..d997f8ebfa98 100644
--- a/drivers/mailbox/arm_mhuv2.c
+++ b/drivers/mailbox/arm_mhuv2.c
@@ -699,7 +699,9 @@ static irqreturn_t mhuv2_receiver_interrupt(int irq, void *arg)
ret = IRQ_HANDLED;
}
- kfree(data);
+ if (!IS_ERR(data))
+ kfree(data);
+
return ret;
}
--
2.25.0.rc1.19.g042ed3e048af
some binary, for example the output of golang, may be mark as FPXX,
while in fact they are still FP32.
Since FPXX binary can work with both FR=1 and FR=0, we introduce a
config option CONFIG_MIPS_O32_FPXX_USE_FR0 to force it to use FR=0 here.
https://go-review.googlesource.com/c/go/+/239217https://go-review.googlesource.com/c/go/+/237058
v3->v4:
introduce a config option: CONFIG_MIPS_O32_FPXX_USE_FR0
v2->v3:
commit message: add Signed-off-by and Cc to stable.
v1->v2:
Fix bad commit message: in fact, we are switching to FR=0
Signed-off-by: YunQiang Su <yunqiang.su(a)cipunited.com>
Cc: stable(a)vger.kernel.org # 4.19+
---
arch/mips/Kconfig | 11 +++++++++++
arch/mips/kernel/elf.c | 13 ++++++++++---
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0a17bedf4f0d..442db620636f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -3100,6 +3100,17 @@ config MIPS_O32_FP64_SUPPORT
If unsure, say N.
+config MIPS_O32_FPXX_USE_FR0
+ bool "Use FR=0 mode for O32 FPXX binaries" if !CPU_MIPSR6
+ depends on MIPS_O32_FP64_SUPPORT
+ help
+ O32 FPXX can works on both FR=0 and FR=1 mode, so by default, the
+ mode preferred by hardware is used.
+
+ While some binaries may be marked as FPXX by mistake, for example
+ output of golang: they are in fact FP32 mode. To compatiable with
+ these binaries, we should use FR=0 mode for them.
+
config USE_OF
bool
select OF
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 7b045d2a0b51..443ced26ee60 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -234,9 +234,10 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
* fpxx case. This is because, in any-ABI (or no-ABI) we have no FPU
* instructions so we don't care about the mode. We will simply use
* the one preferred by the hardware. In fpxx case, that ABI can
- * handle both FR=1 and FR=0, so, again, we simply choose the one
- * preferred by the hardware. Next, if we only use single-precision
- * FPU instructions, and the default ABI FPU mode is not good
+ * handle both FR=1 and FR=0. Here, we may need to use FR=0, because
+ * some binaries may be mark as FPXX by mistake (ie, output of golang).
+ * - If we only use single-precision FPU instructions,
+ * and the default ABI FPU mode is not good
* (ie single + any ABI combination), we set again the FPU mode to the
* one is preferred by the hardware. Next, if we know that the code
* will only use single-precision instructions, shown by single being
@@ -248,8 +249,14 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
*/
if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1)
state->overall_fp_mode = FP_FRE;
+#if CONFIG_MIPS_O32_FPXX_USE_FR0
+ else if (prog_req.fr1 && prog_req.frdefault)
+ state->overall_fp_mode = FP_FR0;
+ else if (prog_req.single && !prog_req.frdefault)
+#else
else if ((prog_req.fr1 && prog_req.frdefault) ||
(prog_req.single && !prog_req.frdefault))
+#endif
/* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
state->overall_fp_mode = ((raw_current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
cpu_has_mips_r2_r6) ?
--
2.20.1
[+cc stable(a)vger.kernel.org]
Hi,
On Sat, Feb 06, 2021 at 04:56:53PM +0100, Sjoerd Simons wrote:
> On Fri, Dec 18, 2020 at 10:23:08PM +0100, Marcel Holtmann wrote:
> > Hi Trent,
> >
> > > When alt mode 6 is not available, fallback to the kernel <= 5.7 behavior
> > > of always using alt mode 1.
> > >
> > > Prior to kernel 5.8, btusb would always use alt mode 1 for WBS (Wide
> > > Band Speech aka mSBC aka transparent SCO). In commit baac6276c0a9
> > > ("Bluetooth: btusb: handle mSBC audio over USB Endpoints") this
> > > was changed to use alt mode 6, which is the recommended mode in the
> > > Bluetooth spec (Specifications of the Bluetooth System, v5.0, Vol 4.B
> > > §2.2.1). However, many if not most BT USB adapters do not support alt
> > > mode 6. In fact, I have been unable to find any which do.
>
> > patch has been applied to bluetooth-next tree.
>
> For easier application to the stable tree(s) this should probably get:
> Fixes: baac6276c0a9 ("Bluetooth: btusb: handle mSBC audio over USB Endpoints")
>
> In my testing this indeed fixes mSBC audio with both a Belkin (Broadcom
> BCM20702A, 050d:065a) and an Intel Bluetooth (8087:0a2b) adapters.
>
> Tested-By: Sjoerd Simons <sjoerd(a)collabora.com>
Tested on Intel AX200 Bluetooth (8087:0029):
Tested-by: Sebastian Reichel <sre(a)kernel.org>
The patch has been merged to Linus' tree today and I think it should
be applied to the 5.10 tree, which is used by Debian. This patch is
required to use BT headset with bidirectional-audio in acceptable
quality (That also requires proper userspace software, e.g. pipewire
0.3.22, which Sjoerd uploaded to Debian experimental).
Patch applies cleanly on 5.10.
Thanks,
-- Sebastian
From: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Seems that there are config combinations in which this driver gets enabled
and hence selects the MFD, but with out HAS_IOMEM getting pulled in
via some other route. MFD is entirely contained in an
if HAS_IOMEM block, leading to the build issue in this bugzilla.
https://bugzilla.kernel.org/show_bug.cgi?id=209889
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index bf7d22fa4be2..6605c263949c 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -923,6 +923,7 @@ config STM32_ADC_CORE
depends on ARCH_STM32 || COMPILE_TEST
depends on OF
depends on REGULATOR
+ depends on HAS_IOMEM
select IIO_BUFFER
select MFD_STM32_TIMERS
select IIO_STM32_TIMER_TRIGGER
--
2.30.0
some binary, for example the output of golang, may be mark as FPXX,
while in fact they are still FP32.
Since FPXX binary can work with both FR=1 and FR=0, we force it to
use FR=0 here.
https://go-review.googlesource.com/c/go/+/239217https://go-review.googlesource.com/c/go/+/237058
v2->v3:
commit message: add Signed-off-by and Cc to stable.
v1->v2:
Fix bad commit message: in fact, we are switching to FR=0
Signed-off-by: YunQiang Su <yunqiang.su(a)cipunited.com>
Cc: stable(a)vger.kernel.org # 4.19+
---
arch/mips/kernel/elf.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 7b045d2a0b51..bf798ce0ec0e 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -234,9 +234,10 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
* fpxx case. This is because, in any-ABI (or no-ABI) we have no FPU
* instructions so we don't care about the mode. We will simply use
* the one preferred by the hardware. In fpxx case, that ABI can
- * handle both FR=1 and FR=0, so, again, we simply choose the one
- * preferred by the hardware. Next, if we only use single-precision
- * FPU instructions, and the default ABI FPU mode is not good
+ * handle both FR=1 and FR=0. Here, we use FR=0, because some
+ * binaries may be mark as FPXX by mistake (ie, output of golang).
+ * - If we only use single-precision FPU instructions,
+ * and the default ABI FPU mode is not good
* (ie single + any ABI combination), we set again the FPU mode to the
* one is preferred by the hardware. Next, if we know that the code
* will only use single-precision instructions, shown by single being
@@ -248,8 +249,9 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
*/
if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1)
state->overall_fp_mode = FP_FRE;
- else if ((prog_req.fr1 && prog_req.frdefault) ||
- (prog_req.single && !prog_req.frdefault))
+ else if (prog_req.fr1 && prog_req.frdefault)
+ state->overall_fp_mode = FP_FR0;
+ else if (prog_req.single && !prog_req.frdefault)
/* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
state->overall_fp_mode = ((raw_current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
cpu_has_mips_r2_r6) ?
--
2.20.1
There is a short window where percpu_refs are already turned zero, but
we try to do resurrect(). Play nicer and wait for ->release() to happen
in this case and proceed as everything is ok. One downside for ctx refs
is that we can ignore signal_pending() on a rare occasion, but someone
else should check for it later if needed.
Cc: <stable(a)vger.kernel.org> # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
---
fs/io_uring.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b00ab7138410..ce197af2d3c6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1104,6 +1104,21 @@ static inline void io_set_resource_node(struct io_kiocb *req)
}
}
+static bool io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
+{
+ if (!percpu_ref_tryget(ref)) {
+ /* already at zero, wait for ->release() */
+ if (!try_wait_for_completion(compl))
+ synchronize_rcu();
+ return false;
+ }
+
+ percpu_ref_resurrect(ref);
+ reinit_completion(compl);
+ percpu_ref_put(ref);
+ return true;
+}
+
static bool io_match_task(struct io_kiocb *head,
struct task_struct *task,
struct files_struct *files)
@@ -7353,13 +7368,11 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
- if (!ret)
+ if (!ret || !io_refs_resurrect(&data->refs, &data->done))
break;
- percpu_ref_resurrect(&data->refs);
io_sqe_rsrc_set_node(ctx, data, backup_node);
backup_node = NULL;
- reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
mutex_lock(&ctx->uring_lock);
@@ -10094,10 +10107,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
mutex_lock(&ctx->uring_lock);
- if (ret) {
- percpu_ref_resurrect(&ctx->refs);
- goto out_quiesce;
- }
+ if (ret && io_refs_resurrect(&ctx->refs, &ctx->ref_comp))
+ return ret;
}
if (ctx->restricted) {
@@ -10189,7 +10200,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (io_register_op_must_quiesce(opcode)) {
/* bring the ctx back to life */
percpu_ref_reinit(&ctx->refs);
-out_quiesce:
reinit_completion(&ctx->ref_comp);
}
return ret;
--
2.24.0
There is a short window where percpu_refs are already turned zero, but
we try to do resurrect(). Play nicer and wait for ->release() to happen
in this case and proceed as everything is ok. One little downside is
that we can ignore signal_pending() on a rare occasion, but someone
else should check for it later if needed.
Cc: <stable(a)vger.kernel.org> # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
---
fs/io_uring.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f2fdebaf28fe..6ea4633e5ed5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1104,6 +1104,21 @@ static inline void io_set_resource_node(struct io_kiocb *req)
}
}
+static bool io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
+{
+ if (!percpu_ref_tryget(ref)) {
+ /* already at zero, wait for ->release() */
+ if (!try_wait_for_completion(compl))
+ synchronize_rcu();
+ return false;
+ }
+
+ percpu_ref_resurrect(ref);
+ reinit_completion(compl);
+ percpu_ref_put(ref);
+ return true;
+}
+
static bool io_match_task(struct io_kiocb *head,
struct task_struct *task,
struct files_struct *files)
@@ -10094,10 +10109,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
mutex_lock(&ctx->uring_lock);
- if (ret) {
- percpu_ref_resurrect(&ctx->refs);
- goto out_quiesce;
- }
+ if (ret && io_refs_resurrect(&ctx->refs, &ctx->ref_comp))
+ return ret;
}
if (ctx->restricted) {
@@ -10189,7 +10202,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (io_register_op_must_quiesce(opcode)) {
/* bring the ctx back to life */
percpu_ref_reinit(&ctx->refs);
-out_quiesce:
reinit_completion(&ctx->ref_comp);
}
return ret;
--
2.24.0
Older verions of libelf cannot recognize the compressed section.
However, it's only required to fix the compressed section info when compiling with CONFIG_DEBUG_INFO_COMPRESSED flag is set.
Only compile the compressed_section_fix function when necessary will make it easier to enable the BTF function.
Since the tool resolve_btfids is compiled with host toolchain.
The host toolchain might be older than the cross compile toolchain.
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Kun-Chuan Hsieh <jetswayss(a)gmail.com>
---
tools/bpf/resolve_btfids/main.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 7409d7860aa6..ad40346c6631 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -260,6 +260,7 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
return btf_id__add(root, id, false);
}
+#ifdef CONFIG_DEBUG_INFO_COMPRESSED
/*
* The data of compressed section should be aligned to 4
* (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
@@ -292,6 +293,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
}
return 0;
}
+#endif
static int elf_collect(struct object *obj)
{
@@ -370,8 +372,10 @@ static int elf_collect(struct object *obj)
obj->efile.idlist_addr = sh.sh_addr;
}
+#ifdef CONFIG_DEBUG_INFO_COMPRESSED
if (compressed_section_fix(elf, scn, &sh))
return -1;
+#endif
}
return 0;
--
2.25.1
some binary, for example the output of golang, may be mark as FPXX,
while in fact they are still FP32.
Since FPXX binary can work with both FR=1 and FR=0, we force it to
use FR=0 here.
https://go-review.googlesource.com/c/go/+/239217https://go-review.googlesource.com/c/go/+/237058
v1->v2:
Fix bad commit message: in fact, we are switching to FR=0
---
arch/mips/kernel/elf.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 7b045d2a0b51..bf798ce0ec0e 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -234,9 +234,10 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
* fpxx case. This is because, in any-ABI (or no-ABI) we have no FPU
* instructions so we don't care about the mode. We will simply use
* the one preferred by the hardware. In fpxx case, that ABI can
- * handle both FR=1 and FR=0, so, again, we simply choose the one
- * preferred by the hardware. Next, if we only use single-precision
- * FPU instructions, and the default ABI FPU mode is not good
+ * handle both FR=1 and FR=0. Here, we use FR=0, because some
+ * binaries may be mark as FPXX by mistake (ie, output of golang).
+ * - If we only use single-precision FPU instructions,
+ * and the default ABI FPU mode is not good
* (ie single + any ABI combination), we set again the FPU mode to the
* one is preferred by the hardware. Next, if we know that the code
* will only use single-precision instructions, shown by single being
@@ -248,8 +249,9 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
*/
if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1)
state->overall_fp_mode = FP_FRE;
- else if ((prog_req.fr1 && prog_req.frdefault) ||
- (prog_req.single && !prog_req.frdefault))
+ else if (prog_req.fr1 && prog_req.frdefault)
+ state->overall_fp_mode = FP_FR0;
+ else if (prog_req.single && !prog_req.frdefault)
/* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
state->overall_fp_mode = ((raw_current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
cpu_has_mips_r2_r6) ?
--
2.20.1
some binary, for example the output of golang, may be mark as FPXX,
while in fact they are still FP32.
Since FPXX binary can work with both FR=1 and FR=0, we force it to
use FR=1 here.
https://go-review.googlesource.com/c/go/+/239217https://go-review.googlesource.com/c/go/+/237058
---
arch/mips/kernel/elf.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 7b045d2a0b51..bf798ce0ec0e 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -234,9 +234,10 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
* fpxx case. This is because, in any-ABI (or no-ABI) we have no FPU
* instructions so we don't care about the mode. We will simply use
* the one preferred by the hardware. In fpxx case, that ABI can
- * handle both FR=1 and FR=0, so, again, we simply choose the one
- * preferred by the hardware. Next, if we only use single-precision
- * FPU instructions, and the default ABI FPU mode is not good
+ * handle both FR=1 and FR=0. Here, we use FR=0, because some
+ * binaries may be mark as FPXX by mistake (ie, output of golang).
+ * - If we only use single-precision FPU instructions,
+ * and the default ABI FPU mode is not good
* (ie single + any ABI combination), we set again the FPU mode to the
* one is preferred by the hardware. Next, if we know that the code
* will only use single-precision instructions, shown by single being
@@ -248,8 +249,9 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
*/
if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1)
state->overall_fp_mode = FP_FRE;
- else if ((prog_req.fr1 && prog_req.frdefault) ||
- (prog_req.single && !prog_req.frdefault))
+ else if (prog_req.fr1 && prog_req.frdefault)
+ state->overall_fp_mode = FP_FR0;
+ else if (prog_req.single && !prog_req.frdefault)
/* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
state->overall_fp_mode = ((raw_current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
cpu_has_mips_r2_r6) ?
--
2.20.1
There is a short window where percpu_refs are already turned zero, but
we try to do resurrect(). Play nicer and wait for all users to leave RCU
section.
Cc: <stable(a)vger.kernel.org> # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
---
fs/io_uring.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f3af499b12a9..ce5fccf00367 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7351,6 +7351,7 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
break;
percpu_ref_resurrect(&data->refs);
+ synchronize_rcu();
io_sqe_rsrc_set_node(ctx, data, backup_node);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@@ -10089,6 +10090,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (ret) {
percpu_ref_resurrect(&ctx->refs);
+ synchronize_rcu();
goto out_quiesce;
}
}
--
2.24.0
This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2
(SLB 9670):
[ 4.324298] TPM returned invalid status
[ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f
Background
==========
TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO
Interface Locality Usage per Register, Table 39 Register Behavior Based on
Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns
0xFF in case of lack of locality.
The fix
=======
Decorate tpm_get_timeouts() with request_locality() and release_locality().
Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()")
Cc: James Bottomley <James.Bottomley(a)HansenPartnership.com>
Cc: Guenter Roeck <linux(a)roeck-us.net>
Cc: Laurent Bigonville <bigon(a)debian.org>
Cc: stable(a)vger.kernel.org
Reported-by: Lukasz Majczak <lma(a)semihalf.com>
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
drivers/char/tpm/tpm_tis_core.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 431919d5f48a..30843954aa36 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1019,11 +1019,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
init_waitqueue_head(&priv->read_queue);
init_waitqueue_head(&priv->int_queue);
if (irq != -1) {
- /* Before doing irq testing issue a command to the TPM in polling mode
+ /*
+ * Before doing irq testing issue a command to the TPM in polling mode
* to make sure it works. May as well use that command to set the
* proper timeouts for the driver.
*/
- if (tpm_get_timeouts(chip)) {
+
+ rc = request_locality(chip, 0);
+ if (rc < 0)
+ goto out_err;
+
+ rc = tpm_get_timeouts(chip);
+
+ release_locality(chip, 0);
+
+ if (rc) {
dev_err(dev, "Could not get TPM timeouts and durations\n");
rc = -ENODEV;
goto out_err;
--
2.30.1
CCW_CMD_READ_STATUS was introduced with revision 2 of virtio-ccw,
and drivers should only rely on it being implemented when they
negotiated at least that revision with the device.
However, virtio_ccw_get_status() issued READ_STATUS for any
device operating at least at revision 1. If the device accepts
READ_STATUS regardless of the negotiated revision (which some
implementations like QEMU do, even though the spec currently does
not allow it), everything works as intended. While a device
rejecting the command should also be handled gracefully, we will
not be able to see any changes the device makes to the status,
such as setting NEEDS_RESET or setting the status to zero after
a completed reset.
We negotiated the revision to at most 1, as we never bumped the
maximum revision; let's do that now and properly send READ_STATUS
only if we are operating at least at revision 2.
Cc: stable(a)vger.kernel.org
Fixes: 7d3ce5ab9430 ("virtio/s390: support READ_STATUS command for virtio-ccw")
Reviewed-by: Halil Pasic <pasic(a)linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck(a)redhat.com>
---
v1->v2:
tweak patch description and cc:stable
---
drivers/s390/virtio/virtio_ccw.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..54e686dca6de 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -117,7 +117,7 @@ struct virtio_rev_info {
};
/* the highest virtio-ccw revision we support */
-#define VIRTIO_CCW_REV_MAX 1
+#define VIRTIO_CCW_REV_MAX 2
struct virtio_ccw_vq_info {
struct virtqueue *vq;
@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev)
u8 old_status = vcdev->dma_area->status;
struct ccw1 *ccw;
- if (vcdev->revision < 1)
+ if (vcdev->revision < 2)
return vcdev->dma_area->status;
ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
--
2.26.2
The patch titled
Subject: ntfs: check for valid standard information attribute
has been added to the -mm tree. Its filename is
ntfs-check-for-valid-standard-information-attribute.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/ntfs-check-for-valid-standard-inf…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/ntfs-check-for-valid-standard-inf…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Rustam Kovhaev <rkovhaev(a)gmail.com>
Subject: ntfs: check for valid standard information attribute
Mounting a corrupted filesystem with NTFS resulted in a kernel crash.
We should check for valid STANDARD_INFORMATION attribute offset and length
before trying to access it
Link: https://lkml.kernel.org/r/20210217155930.1506815-1-rkovhaev@gmail.com
Link: https://syzkaller.appspot.com/bug?extid=c584225dabdea2f71969
Signed-off-by: Rustam Kovhaev <rkovhaev(a)gmail.com>
Reported-by: syzbot+c584225dabdea2f71969(a)syzkaller.appspotmail.com
Tested-by: syzbot+c584225dabdea2f71969(a)syzkaller.appspotmail.com
Acked-by: Anton Altaparmakov <anton(a)tuxera.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ntfs/inode.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/fs/ntfs/inode.c~ntfs-check-for-valid-standard-information-attribute
+++ a/fs/ntfs/inode.c
@@ -629,6 +629,12 @@ static int ntfs_read_locked_inode(struct
}
a = ctx->attr;
/* Get the standard information attribute value. */
+ if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
+ + le32_to_cpu(a->data.resident.value_length) >
+ (u8 *)ctx->mrec + vol->mft_record_size) {
+ ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
+ goto unm_err_out;
+ }
si = (STANDARD_INFORMATION*)((u8*)a +
le16_to_cpu(a->data.resident.value_offset));
_
Patches currently in -mm which might be from rkovhaev(a)gmail.com are
ntfs-check-for-valid-standard-information-attribute.patch
The patch titled
Subject: mm/vmscan: restore zone_reclaim_mode ABI
has been added to the -mm tree. Its filename is
mm-vmscan-restore-zone_reclaim_mode-abi.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-vmscan-restore-zone_reclaim_mo…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-vmscan-restore-zone_reclaim_mo…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Dave Hansen <dave.hansen(a)linux.intel.com>
Subject: mm/vmscan: restore zone_reclaim_mode ABI
I went to go add a new RECLAIM_* mode for the zone_reclaim_mode sysctl.
Like a good kernel developer, I also went to go update the documentation.
I noticed that the bits in the documentation didn't match the bits in the
#defines.
The VM never explicitly checks the RECLAIM_ZONE bit. The bit is, however
implicitly checked when checking 'node_reclaim_mode==0'. The RECLAIM_ZONE
#define was removed in a cleanup. That, by itself is fine.
But, when the bit was removed (bit 0) the _other_ bit locations also got
changed. That's not OK because the bit values are documented to mean one
specific thing. Users surely do not expect the meaning to change from
kernel to kernel.
The end result is that if someone had a script that did:
sysctl vm.zone_reclaim_mode=1
it would have gone from enabling node reclaim for clean unmapped pages to
writing out pages during node reclaim after the commit in question.
That's not great.
Put the bits back the way they were and add a comment so something like
this is a bit harder to do again. Update the documentation to make it
clear that the first bit is ignored.
Link: https://lkml.kernel.org/r/20210219172555.FF0CDF23@viggo.jf.intel.com
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Fixes: 648b5cf368e0 ("mm/vmscan: remove unused RECLAIM_OFF/RECLAIM_ZONE")
Reviewed-by: Ben Widawsky <ben.widawsky(a)intel.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Christoph Lameter <cl(a)linux.com>
Cc: Alex Shi <alex.shi(a)linux.alibaba.com>
Cc: Daniel Wagner <dwagner(a)suse.de>
Cc: "Tobin C. Harding" <tobin(a)kernel.org>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Daniel Wagner <dwagner(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Documentation/admin-guide/sysctl/vm.rst | 10 +++++-----
mm/vmscan.c | 9 +++++++--
2 files changed, 12 insertions(+), 7 deletions(-)
--- a/Documentation/admin-guide/sysctl/vm.rst~mm-vmscan-restore-zone_reclaim_mode-abi
+++ a/Documentation/admin-guide/sysctl/vm.rst
@@ -983,11 +983,11 @@ that benefit from having their data cach
left disabled as the caching effect is likely to be more important than
data locality.
-zone_reclaim may be enabled if it's known that the workload is partitioned
-such that each partition fits within a NUMA node and that accessing remote
-memory would cause a measurable performance reduction. The page allocator
-will then reclaim easily reusable pages (those page cache pages that are
-currently not used) before allocating off node pages.
+Consider enabling one or more zone_reclaim mode bits if it's known that the
+workload is partitioned such that each partition fits within a NUMA node
+and that accessing remote memory would cause a measurable performance
+reduction. The page allocator will take additional actions before
+allocating off node pages.
Allowing zone reclaim to write out pages stops processes that are
writing large amounts of data from dirtying pages on other nodes. Zone
--- a/mm/vmscan.c~mm-vmscan-restore-zone_reclaim_mode-abi
+++ a/mm/vmscan.c
@@ -4085,8 +4085,13 @@ module_init(kswapd_init)
*/
int node_reclaim_mode __read_mostly;
-#define RECLAIM_WRITE (1<<0) /* Writeout pages during reclaim */
-#define RECLAIM_UNMAP (1<<1) /* Unmap pages during reclaim */
+/*
+ * These bit locations are exposed in the vm.zone_reclaim_mode sysctl
+ * ABI. New bits are OK, but existing bits can never change.
+ */
+#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
+#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
+#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
/*
* Priority for NODE_RECLAIM. This determines the fraction of pages
_
Patches currently in -mm which might be from dave.hansen(a)linux.intel.com are
mm-vmscan-restore-zone_reclaim_mode-abi.patch
From: David Sterba <dsterba(a)suse.cz>
There's a mistake in backport of upstream commit 2175bf57dc95 ("btrfs:
fix possible free space tree corruption with online conversion") as
5.10.13 commit 2175bf57dc95.
The enum value BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED has been added to the
wrong enum set, colliding with value of BTRFS_FS_QUOTA_ENABLE. This
could cause problems during the tree conversion, where the quotas
wouldn't be set up properly but the related code executed anyway due to
the bit set.
Link: https://lore.kernel.org/linux-btrfs/20210219111741.95DD.409509F4@e16-tech.c…
Reported-by: Wang Yugui <wangyugui(a)e16-tech.com>
CC: stable(a)vger.kernel.org # 5.10.13+
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/ctree.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 30ea9780725f..b6884eda9ff6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -146,9 +146,6 @@ enum {
BTRFS_FS_STATE_DEV_REPLACING,
/* The btrfs_fs_info created for self-tests */
BTRFS_FS_STATE_DUMMY_FS_INFO,
-
- /* Indicate that we can't trust the free space tree for caching yet */
- BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
};
#define BTRFS_BACKREF_REV_MAX 256
@@ -562,6 +559,9 @@ enum {
/* Indicate that the discard workqueue can service discards. */
BTRFS_FS_DISCARD_RUNNING,
+
+ /* Indicate that we can't trust the free space tree for caching yet */
+ BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
};
/*
--
2.29.2
From: Dave Hansen <dave.hansen(a)linux.intel.com>
I went to go add a new RECLAIM_* mode for the zone_reclaim_mode
sysctl. Like a good kernel developer, I also went to go update the
documentation. I noticed that the bits in the documentation didn't
match the bits in the #defines.
The VM never explicitly checks the RECLAIM_ZONE bit. The bit is,
however implicitly checked when checking 'node_reclaim_mode==0'.
The RECLAIM_ZONE #define was removed in a cleanup. That, by itself
is fine.
But, when the bit was removed (bit 0) the _other_ bit locations also
got changed. That's not OK because the bit values are documented to
mean one specific thing. Users surely do not expect the meaning to
change from kernel to kernel.
The end result is that if someone had a script that did:
sysctl vm.zone_reclaim_mode=1
it would have gone from enabling node reclaim for clean unmapped
pages to writing out pages during node reclaim after the commit in
question. That's not great.
Put the bits back the way they were and add a comment so something
like this is a bit harder to do again. Update the documentation to
make it clear that the first bit is ignored.
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Fixes: 648b5cf368e0 ("mm/vmscan: remove unused RECLAIM_OFF/RECLAIM_ZONE")
Reviewed-by: Ben Widawsky <ben.widawsky(a)intel.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Christoph Lameter <cl(a)linux.com>
Cc: Alex Shi <alex.shi(a)linux.alibaba.com>
Cc: Daniel Wagner <dwagner(a)suse.de>
Cc: "Tobin C. Harding" <tobin(a)kernel.org>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Daniel Wagner <dwagner(a)suse.de>
Cc: stable(a)vger.kernel.org
--
Changes from v2:
* Update description to indicate that bit0 was used for clean
unmapped page node reclaim.
---
b/Documentation/admin-guide/sysctl/vm.rst | 10 +++++-----
b/mm/vmscan.c | 9 +++++++--
2 files changed, 12 insertions(+), 7 deletions(-)
diff -puN Documentation/admin-guide/sysctl/vm.rst~mm-vmscan-restore-old-zone_reclaim_mode-abi Documentation/admin-guide/sysctl/vm.rst
--- a/Documentation/admin-guide/sysctl/vm.rst~mm-vmscan-restore-old-zone_reclaim_mode-abi 2021-02-19 09:25:26.656663105 -0800
+++ b/Documentation/admin-guide/sysctl/vm.rst 2021-02-19 09:25:26.662663105 -0800
@@ -983,11 +983,11 @@ that benefit from having their data cach
left disabled as the caching effect is likely to be more important than
data locality.
-zone_reclaim may be enabled if it's known that the workload is partitioned
-such that each partition fits within a NUMA node and that accessing remote
-memory would cause a measurable performance reduction. The page allocator
-will then reclaim easily reusable pages (those page cache pages that are
-currently not used) before allocating off node pages.
+Consider enabling one or more zone_reclaim mode bits if it's known that the
+workload is partitioned such that each partition fits within a NUMA node
+and that accessing remote memory would cause a measurable performance
+reduction. The page allocator will take additional actions before
+allocating off node pages.
Allowing zone reclaim to write out pages stops processes that are
writing large amounts of data from dirtying pages on other nodes. Zone
diff -puN mm/vmscan.c~mm-vmscan-restore-old-zone_reclaim_mode-abi mm/vmscan.c
--- a/mm/vmscan.c~mm-vmscan-restore-old-zone_reclaim_mode-abi 2021-02-19 09:25:26.658663105 -0800
+++ b/mm/vmscan.c 2021-02-19 09:25:26.665663105 -0800
@@ -4095,8 +4095,13 @@ module_init(kswapd_init)
*/
int node_reclaim_mode __read_mostly;
-#define RECLAIM_WRITE (1<<0) /* Writeout pages during reclaim */
-#define RECLAIM_UNMAP (1<<1) /* Unmap pages during reclaim */
+/*
+ * These bit locations are exposed in the vm.zone_reclaim_mode sysctl
+ * ABI. New bits are OK, but existing bits can never change.
+ */
+#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
+#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
+#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
/*
* Priority for NODE_RECLAIM. This determines the fraction of pages
_
>> On Fri, Feb 19, 2021 at 02:43:34PM +0800, Wen Yang wrote:
>> From: Peter Zijlstra <peterz(a)infradead.org>
>>
>> commit 19dbdcb8039cff16669a05136a29180778d16d0a upstream.
>>
>> It's clearly documented that smp function calls cannot be invoked from
>> softirq handling context. Unfortunately nothing enforces that or emits a
>> warning.
>>
>> A single function call can be invoked from softirq context only via
>> smp_call_function_single_async().
>>
>> The only legit context is task context, so add a warning to that effect.
>>
>> Reported-by: luferry <luferry(a)163.com>
>> Signed-off-by: Peter Zijlstra <peterz(a)infradead.org>
>> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
>> Link: https://lkml.kernel.org/r/20190718160601.GP3402@hirez.programming.kicks-ass…
>> Cc: stable <stable(a)vger.kernel.org> # 4.9.x
>> Signed-off-by: Wen Yang <simon.wy(a)alibaba-inc.com>
>> ---
>> kernel/smp.c | 16 ++++++++++++++++
>> 1 file changed, 16 insertions(+)
>>
>> diff --git a/kernel/smp.c b/kernel/smp.c
>> index 399905f..f2b29c4 100644
>> --- a/kernel/smp.c
>> +++ b/kernel/smp.c
>> @@ -276,6 +276,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
>> WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
>> && !oops_in_progress);
>>
>> + /*
>> + * When @wait we can deadlock when we interrupt between llist_add() and
>> + * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
>> + * csd_lock() on because the interrupt context uses the same csd
>> + * storage.
>> + */
>> + WARN_ON_ONCE(!in_task());
>> +
>> csd = &csd_stack;
>> if (!wait) {
>> csd = this_cpu_ptr(&csd_data);
>> @@ -401,6 +409,14 @@ void smp_call_function_many(const struct cpumask *mask,
>> WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
>> && !oops_in_progress && !early_boot_irqs_disabled);
>>
>> + /*
>> + * When @wait we can deadlock when we interrupt between llist_add() and
>> + * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
>> + * csd_lock() on because the interrupt context uses the same csd
>> + * storage.
>> + */
>> + WARN_ON_ONCE(!in_task());
>> +
>> /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
>> cpu = cpumask_first_and(mask, cpu_online_mask);
>> if (cpu == this_cpu)
>> --
>> 1.8.3.1
>>
>
> WHy do you want this in the 4.9.y kernel tree only, and not all others?
> What bug/problem does this fix? It seems that it will only report
> problems that other code has, not fix existing code. If anything, it's
> going to start causing machines to reboot that have "panic on warn" set,
> is that a good thing to do?
4.9, 4.14 and 4.19 should all need it.
We find that some third party kernel modules occasionally cause kernel
panic (such as watchdog reset). After further analysis, it is found that the
functions such as smp_call_function()/on_each_cpu() are called in the interrupt
context or softirq context.
Since these usages are illegal and cannot be prohibited, we should add a warning
to enhance the robustness of the stable kernel and/or facilitate the analysis of
the problems.
thanks,
Wen
From: Joerg Roedel <jroedel(a)suse.de>
The code in the NMI handler to adjust the #VC handler IST stack is
needed in case an NMI hits when the #VC handler is still using its IST
stack.
But the check for this condition also needs to look if the regs->sp
value is trusted, meaning it was not set by user-space. Extend the
check to not use regs->sp when the NMI interrupted user-space code or
the SYSCALL gap.
Reported-by: Andy Lutomirski <luto(a)kernel.org>
Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI handler")
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/kernel/sev-es.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 84c1821819af..0df38b185d53 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -144,7 +144,9 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
/* Make room on the IST stack */
- if (on_vc_stack(regs->sp))
+ if (on_vc_stack(regs->sp) &&
+ !user_mode(regs) &&
+ !from_syscall_gap(regs))
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
else
new_ist = old_ist - sizeof(old_ist);
--
2.30.0
From: Peter Zijlstra <peterz(a)infradead.org>
commit 19dbdcb8039cff16669a05136a29180778d16d0a upstream.
It's clearly documented that smp function calls cannot be invoked from
softirq handling context. Unfortunately nothing enforces that or emits a
warning.
A single function call can be invoked from softirq context only via
smp_call_function_single_async().
The only legit context is task context, so add a warning to that effect.
Reported-by: luferry <luferry(a)163.com>
Signed-off-by: Peter Zijlstra <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lkml.kernel.org/r/20190718160601.GP3402@hirez.programming.kicks-ass…
Cc: stable <stable(a)vger.kernel.org> # 4.9.x
Signed-off-by: Wen Yang <simon.wy(a)alibaba-inc.com>
---
kernel/smp.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/kernel/smp.c b/kernel/smp.c
index 399905f..f2b29c4 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -276,6 +276,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
csd = &csd_stack;
if (!wait) {
csd = this_cpu_ptr(&csd_data);
@@ -401,6 +409,14 @@ void smp_call_function_many(const struct cpumask *mask,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
--
1.8.3.1
From: Peter Zijlstra <peterz(a)infradead.org>
commit 19dbdcb8039cff16669a05136a29180778d16d0a upstream.
It's clearly documented that smp function calls cannot be invoked from
softirq handling context. Unfortunately nothing enforces that or emits a
warning.
A single function call can be invoked from softirq context only via
smp_call_function_single_async().
The only legit context is task context, so add a warning to that effect.
Reported-by: luferry <luferry(a)163.com>
Signed-off-by: Peter Zijlstra <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lkml.kernel.org/r/20190718160601.GP3402@hirez.programming.kicks-ass…
Cc: stable <stable(a)vger.kernel.org> # 4.19.x
Signed-off-by: Wen Yang <simon.wy(a)alibaba-inc.com>
---
kernel/smp.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/kernel/smp.c b/kernel/smp.c
index 084c8b3..9afcbb4 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -290,6 +290,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
csd = &csd_stack;
if (!wait) {
csd = this_cpu_ptr(&csd_data);
@@ -415,6 +423,14 @@ void smp_call_function_many(const struct cpumask *mask,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
--
1.8.3.1
From: Peter Zijlstra <peterz(a)infradead.org>
commit 19dbdcb8039cff16669a05136a29180778d16d0a upstream.
It's clearly documented that smp function calls cannot be invoked from
softirq handling context. Unfortunately nothing enforces that or emits a
warning.
A single function call can be invoked from softirq context only via
smp_call_function_single_async().
The only legit context is task context, so add a warning to that effect.
Reported-by: luferry <luferry(a)163.com>
Signed-off-by: Peter Zijlstra <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lkml.kernel.org/r/20190718160601.GP3402@hirez.programming.kicks-ass…
Cc: stable <stable(a)vger.kernel.org> # 4.14.x
Signed-off-by: Wen Yang <simon.wy(a)alibaba-inc.com>
---
kernel/smp.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/kernel/smp.c b/kernel/smp.c
index c94dd85..7d00d3e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -290,6 +290,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
csd = &csd_stack;
if (!wait) {
csd = this_cpu_ptr(&csd_data);
@@ -415,6 +423,14 @@ void smp_call_function_many(const struct cpumask *mask,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
--
1.8.3.1
Hi Greg,
Could you please backport the below commit to "5.10.y" stable release.
Looks like this commit was already pulled to "5.10.y" stable tree weeks
ago.
This fix is required for Chelsio adapters. Without this fix the number
of connections supported by isert(over Chelsio adapter) will be significantly less.
--------------------------------------------------------
commit ID: dae7a75f1f19bffb579daf148f8d8addd2726772
IB/isert: add module param to set sg_tablesize for IO cmd
Author: Max Gurtovoy <mgurtovoy(a)nvidia.com>
Fixes: 317000b ("IB/isert: allocate RW ctxs according to max IO size")
Thanks,
Krishnamraju.
page structs are not guaranteed to be contiguous for gigantic pages. The
routine update_and_free_page can encounter a gigantic page, yet it assumes
page structs are contiguous when setting page flags in subpages.
If update_and_free_page encounters non-contiguous page structs, we can
see “BUG: Bad page state in process …” errors.
Non-contiguous page structs are generally not an issue. However, they can
exist with a specific kernel configuration and hotplug operations. For
example: Configure the kernel with CONFIG_SPARSEMEM and
!CONFIG_SPARSEMEM_VMEMMAP. Then, hotplug add memory for the area where the
gigantic page will be allocated.
Zi Yan outlined steps to reproduce here [1].
[1] https://lore.kernel.org/linux-mm/16F7C58B-4D79-41C5-9B64-A1A1628F4AF2@nvidi…
Fixes: 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
---
mm/hugetlb.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4bdb58ab14cb..94e9fa803294 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1312,14 +1312,16 @@ static inline void destroy_compound_gigantic_page(struct page *page,
static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
+ struct page *subpage = page;
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
- for (i = 0; i < pages_per_huge_page(h); i++) {
- page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
+ for (i = 0; i < pages_per_huge_page(h);
+ i++, subpage = mem_map_next(subpage, page, i)) {
+ subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
1 << PG_writeback);
--
2.29.2
freq_qos_update_request() returns 1 if the effective constraint value
has changed, 0 if the effective constraint value has not changed, or a
negative error code on failures.
The frequency constraints for CPUs can be set by different parts of the
kernel. If the maximum frequency constraint set by other parts of the
kernel are set at a lower value than the one corresponding to cooling
state 0, then we will never be able to cool down the system as
freq_qos_update_request() will keep on returning 0 and we will skip
updating cpufreq_state and thermal pressure.
Fix that by doing the updates even in the case where
freq_qos_update_request() returns 0, as we have effectively set the
constraint to a new value even if the consolidated value of the
actual constraint is unchanged because of external factors.
Cc: v5.7+ <stable(a)vger.kernel.org> # v5.7+
Reported-by: Thara Gopinath <thara.gopinath(a)linaro.org>
Fixes: f12e4f66ab6a ("thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping")
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
Hi Guys,
This needs to go in 5.12-rc.
Thara, please give this a try and give your tested-by :).
drivers/thermal/cpufreq_cooling.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index f5af2571f9b7..10af3341e5ea 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -485,7 +485,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
frequency = get_state_freq(cpufreq_cdev, state);
ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency);
- if (ret > 0) {
+ if (ret >= 0) {
cpufreq_cdev->cpufreq_state = state;
cpus = cpufreq_cdev->policy->cpus;
max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus));
--
2.25.0.rc1.19.g042ed3e048af
pstore_compress() and decompress_record() use a mistyped config option
name ("PSTORE_COMPRESSION" instead of "PSTORE_COMPRESS").
As a result compression and decompressionm of pstore records is always
disabled.
Use the correct config option name.
Signed-off-by: Jiri Bohac <jbohac(a)suse.cz>
Fixes: fd49e03280e596e54edb93a91bc96170f8e97e4a ("pstore: Fix linking when crypto API disabled")
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 32f64abc277c..d963ae7902f9 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -269,7 +269,7 @@ static int pstore_compress(const void *in, void *out,
{
int ret;
- if (!IS_ENABLED(CONFIG_PSTORE_COMPRESSION))
+ if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
return -EINVAL;
ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
@@ -671,7 +671,7 @@ static void decompress_record(struct pstore_record *record)
int unzipped_len;
char *unzipped, *workspace;
- if (!IS_ENABLED(CONFIG_PSTORE_COMPRESSION) || !record->compressed)
+ if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed)
return;
/* Only PSTORE_TYPE_DMESG support compression. */
--
Jiri Bohac <jbohac(a)suse.cz>
SUSE Labs, Prague, Czechia
Compaction always operates on pages from a single given zone when isolating
both pages to migrate and freepages. Pageblock boundaries are intersected with
zone boundaries to be safe in case zone starts or ends in the middle of
pageblock. The use of pageblock_pfn_to_page() protects against non-contiguous
pageblocks.
The functions fast_isolate_freepages() and fast_isolate_around() don't
currently protect the fast freepage isolation thoroughly enough against these
corner cases, and can result in freepage isolation operate outside of zone
boundaries:
- in fast_isolate_freepages() if we get a pfn from the first pageblock of a
zone that starts in the middle of that pageblock, 'highest' can be a pfn
outside of the zone. If we fail to isolate anything in this function, we
may then call fast_isolate_around() on a pfn outside of the zone and there
effectively do a set_pageblock_skip(page_to_pfn(highest)) which may currently
hit a VM_BUG_ON() in some configurations
- fast_isolate_around() checks only the zone end boundary and not beginning,
nor that the pageblock is contiguous (with pageblock_pfn_to_page()) so it's
possible that we end up calling isolate_freepages_block() on a range of pfn's
from two different zones and end up e.g. isolating freepages under the wrong
zone's lock.
This patch should fix the above issues.
Fixes: 5a811889de10 ("mm, compaction: use free lists to quickly locate a migration target")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
---
mm/compaction.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
Hi, as promised here's a fix for issues that I think exist regardless of the
memblock stuff, but were partially exposed by that. I will see if I can manage
to test that it does prevent the known symptoms (it should if I didn't miss
anything).
diff --git a/mm/compaction.c b/mm/compaction.c
index 190ccdaa6c19..22a35521e358 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1288,7 +1288,7 @@ static void
fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
{
unsigned long start_pfn, end_pfn;
- struct page *page = pfn_to_page(pfn);
+ struct page *page;
/* Do not search around if there are enough pages already */
if (cc->nr_freepages >= cc->nr_migratepages)
@@ -1299,8 +1299,12 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
return;
/* Pageblock boundaries */
- start_pfn = pageblock_start_pfn(pfn);
- end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
+ start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn);
+ end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone));
+
+ page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone);
+ if (!page)
+ return;
/* Scan before */
if (start_pfn != pfn) {
@@ -1402,7 +1406,8 @@ fast_isolate_freepages(struct compact_control *cc)
pfn = page_to_pfn(freepage);
if (pfn >= highest)
- highest = pageblock_start_pfn(pfn);
+ highest = max(pageblock_start_pfn(pfn),
+ cc->zone->zone_start_pfn);
if (pfn >= low_pfn) {
cc->fast_search_fail = 0;
@@ -1472,7 +1477,8 @@ fast_isolate_freepages(struct compact_control *cc)
} else {
if (cc->direct_compaction && pfn_valid(min_pfn)) {
page = pageblock_pfn_to_page(min_pfn,
- pageblock_end_pfn(min_pfn),
+ min(pageblock_end_pfn(min_pfn),
+ zone_end_pfn(cc->zone)),
cc->zone);
cc->free_pfn = min_pfn;
}
--
2.30.0
This is a note to let you know that I've just added the patch titled
debugfs: do not attempt to create a new file before the filesystem is
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
>From 56348560d495d2501e87db559a61de717cd3ab02 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Date: Thu, 18 Feb 2021 11:08:18 +0100
Subject: debugfs: do not attempt to create a new file before the filesystem is
initalized
Some subsystems want to add debugfs files at early boot, way before
debugfs is initialized. This seems to work somehow as the vfs layer
will not allow it to happen, but let's be explicit and test to ensure we
are properly up and running before allowing files to be created.
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Reported-by: Michael Walle <michael(a)walle.cc>
Reported-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20210218100818.3622317-2-gregkh@linuxfoundation.o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/debugfs/inode.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index bbeb563cbe78..86c7f0489620 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -318,6 +318,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!(debugfs_allow & DEBUGFS_ALLOW_API))
return ERR_PTR(-EPERM);
+ if (!debugfs_initialized())
+ return ERR_PTR(-ENOENT);
+
pr_debug("creating file '%s'\n", name);
if (IS_ERR(parent))
--
2.30.1
This is a note to let you know that I've just added the patch titled
debugfs: be more robust at handling improper input in
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
>From bc6de804d36b3709d54fa22bd128cbac91c11526 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Date: Thu, 18 Feb 2021 11:08:17 +0100
Subject: debugfs: be more robust at handling improper input in
debugfs_lookup()
debugfs_lookup() doesn't like it if it is passed an illegal name
pointer, or if the filesystem isn't even initialized yet. If either of
these happen, it will crash the system, so fix it up by properly testing
for valid input and that we are up and running before trying to find a
file in the filesystem.
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Reported-by: Michael Walle <michael(a)walle.cc>
Tested-by: Michael Walle <michael(a)walle.cc>
Tested-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20210218100818.3622317-1-gregkh@linuxfoundation.o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/debugfs/inode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2fcf66473436..bbeb563cbe78 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -297,7 +297,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
{
struct dentry *dentry;
- if (IS_ERR(parent))
+ if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent))
return NULL;
if (!parent)
--
2.30.1
This is a note to let you know that I've just added the patch titled
debugfs: do not attempt to create a new file before the filesystem is
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the driver-core-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
>From 56348560d495d2501e87db559a61de717cd3ab02 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Date: Thu, 18 Feb 2021 11:08:18 +0100
Subject: debugfs: do not attempt to create a new file before the filesystem is
initalized
Some subsystems want to add debugfs files at early boot, way before
debugfs is initialized. This seems to work somehow as the vfs layer
will not allow it to happen, but let's be explicit and test to ensure we
are properly up and running before allowing files to be created.
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Reported-by: Michael Walle <michael(a)walle.cc>
Reported-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20210218100818.3622317-2-gregkh@linuxfoundation.o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/debugfs/inode.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index bbeb563cbe78..86c7f0489620 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -318,6 +318,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!(debugfs_allow & DEBUGFS_ALLOW_API))
return ERR_PTR(-EPERM);
+ if (!debugfs_initialized())
+ return ERR_PTR(-ENOENT);
+
pr_debug("creating file '%s'\n", name);
if (IS_ERR(parent))
--
2.30.1
This is a note to let you know that I've just added the patch titled
debugfs: be more robust at handling improper input in
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the driver-core-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
>From bc6de804d36b3709d54fa22bd128cbac91c11526 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Date: Thu, 18 Feb 2021 11:08:17 +0100
Subject: debugfs: be more robust at handling improper input in
debugfs_lookup()
debugfs_lookup() doesn't like it if it is passed an illegal name
pointer, or if the filesystem isn't even initialized yet. If either of
these happen, it will crash the system, so fix it up by properly testing
for valid input and that we are up and running before trying to find a
file in the filesystem.
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Reported-by: Michael Walle <michael(a)walle.cc>
Tested-by: Michael Walle <michael(a)walle.cc>
Tested-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20210218100818.3622317-1-gregkh@linuxfoundation.o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/debugfs/inode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2fcf66473436..bbeb563cbe78 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -297,7 +297,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
{
struct dentry *dentry;
- if (IS_ERR(parent))
+ if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent))
return NULL;
if (!parent)
--
2.30.1
From: Federico Pellegrin <fede(a)evolware.org>
According to the datasheet PA7 can be set to either function A, B or
C (see table 6-2 of DS60001579D). The previous value would permit just
configuring with function C.
Signed-off-by: Federico Pellegrin <fede(a)evolware.org>
Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board")
Cc: <stable(a)vger.kernel.org> # 5.6+
Cc: Sandeep Sheriker Mallikarjun <sandeepsheriker.mallikarjun(a)microchip.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre(a)microchip.com>
---
arch/arm/boot/dts/at91-sam9x60ek.dts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts
index 73b6b1f89de9..4c40ae571154 100644
--- a/arch/arm/boot/dts/at91-sam9x60ek.dts
+++ b/arch/arm/boot/dts/at91-sam9x60ek.dts
@@ -336,7 +336,7 @@ ethernet-phy@0 {
&pinctrl {
atmel,mux-mask = <
/* A B C */
- 0xFFFFFE7F 0xC0E0397F 0xEF00019D /* pioA */
+ 0xFFFFFEFF 0xC0E039FF 0xEF00019D /* pioA */
0x03FFFFFF 0x02FC7E68 0x00780000 /* pioB */
0xffffffff 0xF83FFFFF 0xB800F3FC /* pioC */
0x003FFFFF 0x003F8000 0x00000000 /* pioD */
--
2.30.0
debugfs_lookup() doesn't like it if it is passed an illegal name
pointer, or if the filesystem isn't even initialized yet. If either of
these happen, it will crash the system, so fix it up by properly testing
for valid input and that we are up and running before trying to find a
file in the filesystem.
Reported-by: Michael Walle <michael(a)walle.cc>
Tested-by: Michael Walle <michael(a)walle.cc>
Tested-by: Marc Zyngier <maz(a)kernel.org>
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/debugfs/inode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2fcf66473436..bbeb563cbe78 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -297,7 +297,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
{
struct dentry *dentry;
- if (IS_ERR(parent))
+ if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent))
return NULL;
if (!parent)
--
2.30.1
Good day,
Im Donald Flor the Purchase Manager of Trankiem Exports Trading Co.Ltd.
We are sourcing for new suppliers from your location on the products you are selling . Please let me know if you can accept new orders on your products.
Kindly send me an email and I will forward our PO immediately to place a trial order.
Thanks & regards,
Department of Trankiem Hu
Donald Flor
The `wacom_feature_mapping` function is careful to only set the the
touch_max value a single time, but this care does not extend to the
`wacom_wac_finger_event` function. In particular, if a device sends
multiple HID_DG_CONTACTMAX items in a single feature report, the
driver will end up retaining the value of last item.
The HID descriptor for the Cintiq Companion 2 does exactly this. It
incorrectly sets a "Report Count" of 2, which will cause the driver
to process two HID_DG_CONTACTCOUNT items. The first item has the actual
count, while the second item should have been declared as a constant
zero. The constant zero is the value the driver ends up using, however,
since it is the last HID_DG_CONTACTCOUNT in the report.
Report ID (16),
Usage (Contact Count Maximum), ; Contact count maximum (55h, static value)
Report Count (2),
Logical Maximum (10),
Feature (Variable),
To address this, we add a check that the touch_max is not already set
within the `wacom_wac_finger_event` function that processes the
HID_DG_TOUCHMAX item. We emit a warning if the value is set and ignore
the updated value.
This could potentially cause problems if there is a tablet which has
a similar issue but requires the last item to be used. This is unlikely,
however, since it would have to have a different non-zero value for
HID_DG_CONTACTMAX earlier in the same report, which makes no sense
except in the case of a firmware bug. Note that cases where the
HID_DG_CONTACTMAX items are in different reports is already handled
(and similarly ignored) by `wacom_feature_mapping` as mentioned above.
Link: https://github.com/linuxwacom/input-wacom/issues/223
Fixes: 184eccd40389 ("HID: wacom: generic: read HID_DG_CONTACTMAX from any feature report")
Signed-off-by: Jason Gerecke <jason.gerecke(a)wacom.com>
CC: stable(a)vger.kernel.org
---
drivers/hid/wacom_wac.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 1bd0eb71559c..44d715c12f6a 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2600,7 +2600,12 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
wacom_wac->is_invalid_bt_frame = !value;
return;
case HID_DG_CONTACTMAX:
- features->touch_max = value;
+ if (!features->touch_max) {
+ features->touch_max = value;
+ } else {
+ hid_warn(hdev, "%s: ignoring attempt to overwrite non-zero touch_max "
+ "%d -> %d\n", __func__, features->touch_max, value);
+ }
return;
}
--
2.30.1
Commit 65b709586e222fa6ffd4166ac7fdb5d5dad113ee upstream.
Before this patch, if 'offset + len' was equal to
sizeof(struct virtio_net_config), the entire buffer wasn't filled,
returning incorrect values to the caller.
Since 'vdpasim->config' type is 'struct virtio_net_config', we can
safely copy its content under this condition.
Commit 65b709586e22 ("vdpa_sim: add get_config callback in
vdpasim_dev_attr") unintentionally solved it upstream while
refactoring vdpa_sim.c to support multiple devices. But we don't want
to backport it to stable branches as it contains many changes.
Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator")
Cc: <stable(a)vger.kernel.org> # 5.10.x
Signed-off-by: Stefano Garzarella <sgarzare(a)redhat.com>
---
drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 6a90fdb9cbfc..8ca178d7b02f 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -572,7 +572,7 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- if (offset + len < sizeof(struct virtio_net_config))
+ if (offset + len <= sizeof(struct virtio_net_config))
memcpy(buf, (u8 *)&vdpasim->config + offset, len);
}
--
2.29.2
The upstream commit fef912bf860e8e7e48a2bfb978a356bba743a8b7 ("block:
genhd: add 'groups' argument to device_add_disk") and the following
patches fix a race condition of udev for several devices, including
nvme, aoe, zram and virtio.
The stable tree commit 9e07f4e243791e00a4086ad86e573705cf7b2c65("zram:
close udev startup race condition as default groups") only fixes zram,
leaving other devices unfixed.
This udev race issue indeed makes trouble. We recently found that this
issue can cause missing '/dev/disk/by-id/XXXX' symlink of virtio-blk
devices on 4.19.
Be noted that this patch set follows the idea of stable commit
9e07f4e243791e00a4086ad86e573705cf7b2c65 ("zram: close udev startup race
condition as default groups") of merging the preparation patch (commit
fef912bf860e) and the fixing patch (commit 98af4d4df889).
Jeffle Xu (3):
virtio-blk: close udev startup race condition as default groups
aoe: close udev startup race condition as default groups
nvme: close udev startup race condition as default groups
drivers/block/aoe/aoe.h | 1 -
drivers/block/aoe/aoeblk.c | 20 +++----
drivers/block/aoe/aoedev.c | 1 -
drivers/block/virtio_blk.c | 67 +++++++++++++---------
drivers/nvme/host/core.c | 20 +++----
drivers/nvme/host/lightnvm.c | 105 ++++++++++++++--------------------
drivers/nvme/host/multipath.c | 10 +---
drivers/nvme/host/nvme.h | 10 +---
8 files changed, 103 insertions(+), 131 deletions(-)
--
2.27.0
This is the start of the stable review cycle for the 5.10.16 release.
There are 54 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat, 13 Feb 2021 15:01:39 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.16-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.16-rc1
Phillip Lougher <phillip(a)squashfs.org.uk>
squashfs: add more sanity checks in xattr id lookup
Phillip Lougher <phillip(a)squashfs.org.uk>
squashfs: add more sanity checks in inode lookup
Phillip Lougher <phillip(a)squashfs.org.uk>
squashfs: add more sanity checks in id lookup
Phillip Lougher <phillip(a)squashfs.org.uk>
squashfs: avoid out of bounds writes in decompressors
Johannes Weiner <hannes(a)cmpxchg.org>
Revert "mm: memcontrol: avoid workload stalls when lowering memory.high"
Joachim Henke <joachim.henke(a)t-systems.com>
nilfs2: make splice write available again
Ville Syrjälä <ville.syrjala(a)linux.intel.com>
drm/i915: Skip vswing programming for TBT
Ville Syrjälä <ville.syrjala(a)linux.intel.com>
drm/i915: Fix ICL MG PHY vswing handling
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix verifier jsgt branch analysis on max bound
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix 32 bit src register truncation on div/mod
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix verifier jmp32 pruning decision logic
Mark Brown <broonie(a)kernel.org>
regulator: Fix lockdep warning resolving supplies
Baolin Wang <baolin.wang(a)linux.alibaba.com>
blk-cgroup: Use cond_resched() when destroy blkgs
Qii Wang <qii.wang(a)mediatek.com>
i2c: mediatek: Move suspend and resume handling to NOIRQ phase
Dave Wysochanski <dwysocha(a)redhat.com>
SUNRPC: Handle 0 length opaque XDR object data properly
Dave Wysochanski <dwysocha(a)redhat.com>
SUNRPC: Move simple_get_bytes and simple_get_netobj into private header
Johannes Berg <johannes.berg(a)intel.com>
iwlwifi: queue: bail out on invalid freeing
Johannes Berg <johannes.berg(a)intel.com>
iwlwifi: mvm: guard against device removal in reprobe
Luca Coelho <luciano.coelho(a)intel.com>
iwlwifi: pcie: add rules to match Qu with Hr2
Gregory Greenman <gregory.greenman(a)intel.com>
iwlwifi: mvm: invalidate IDs of internal stations at mvm start
Johannes Berg <johannes.berg(a)intel.com>
iwlwifi: pcie: fix context info memory leak
Emmanuel Grumbach <emmanuel.grumbach(a)intel.com>
iwlwifi: pcie: add a NULL check in iwl_pcie_txq_unmap
Johannes Berg <johannes.berg(a)intel.com>
iwlwifi: mvm: take mutex for calling iwl_mvm_get_sync_time()
Sara Sharon <sara.sharon(a)intel.com>
iwlwifi: mvm: skip power command when unbinding vif during CSA
Libin Yang <libin.yang(a)intel.com>
ASoC: Intel: sof_sdw: set proper flags for Dell TGL-H SKU 0A5E
Eliot Blennerhassett <eliot(a)blennerhassett.gen.nz>
ASoC: ak4458: correct reset polarity
Bard Liao <bard.liao(a)intel.com>
ALSA: hda: intel-dsp-config: add PCI id for TGL-H
Trond Myklebust <trond.myklebust(a)hammerspace.com>
pNFS/NFSv4: Improve rejection of out-of-order layouts
Trond Myklebust <trond.myklebust(a)hammerspace.com>
pNFS/NFSv4: Try to return invalid layout in pnfs_layout_process()
Pan Bian <bianpan2016(a)163.com>
chtls: Fix potential resource leak
Ricardo Ribalda <ribalda(a)chromium.org>
ASoC: Intel: Skylake: Zero snd_ctl_elem_value
Shay Bar <shay.bar(a)celeno.com>
mac80211: 160MHz with extended NSS BW in CSA
Ben Skeggs <bskeggs(a)redhat.com>
drm/nouveau/nvif: fix method count when pushing an array
James Schulman <james.schulman(a)cirrus.com>
ASoC: wm_adsp: Fix control name parsing for multi-fw
David Collins <collinsd(a)codeaurora.org>
regulator: core: avoid regulator_resolve_supply() race condition
Cong Wang <cong.wang(a)bytedance.com>
af_key: relax availability checks for skb size calculation
Raoni Fassina Firmino <raoni(a)linux.ibm.com>
powerpc/64/signal: Fix regression in __kernel_sigtramp_rt64() semantics
Kent Gibson <warthog618(a)gmail.com>
gpiolib: cdev: clear debounce period if line set to output
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: drop mm/files between task_work_submit
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: reinforce cancel on flush during exit
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: fix sqo ownership false positive warning
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: fix list corruption for splice file_get
Hao Xu <haoxu(a)linux.alibaba.com>
io_uring: fix flush cqring overflow list while TASK_INTERRUPTIBLE
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: fix cancellation taking mutex while TASK_UNINTERRUPTIBLE
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: replace inflight_wait with tctx->wait
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: fix __io_uring_files_cancel() with TASK_UNINTERRUPTIBLE
Jens Axboe <axboe(a)kernel.dk>
io_uring: if we see flush on exit, cancel related tasks
Jens Axboe <axboe(a)kernel.dk>
io_uring: account io_uring internal files as REQ_F_INFLIGHT
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: fix files cancellation
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: always batch cancel in *cancel_files()
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: pass files into kill timeouts/poll
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: don't iterate io_uring_cancel_files()
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: add a {task,files} pair matching helper
Pavel Begunkov <asml.silence(a)gmail.com>
io_uring: simplify io_task_match()
-------------
Diffstat:
Makefile | 4 +-
arch/powerpc/kernel/vdso.c | 2 +-
arch/powerpc/kernel/vdso64/sigtramp.S | 11 +-
arch/powerpc/kernel/vdso64/vdso64.lds.S | 1 +
block/blk-cgroup.c | 18 +-
drivers/gpio/gpiolib-cdev.c | 2 +
drivers/gpu/drm/i915/display/intel_ddi.c | 13 +-
drivers/gpu/drm/nouveau/include/nvif/push.h | 216 ++++++-------
drivers/i2c/busses/i2c-mt65xx.c | 19 +-
.../chelsio/inline_crypto/chtls/chtls_cm.c | 7 +-
drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 25 ++
drivers/net/wireless/intel/iwlwifi/iwl-config.h | 3 +
.../net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 3 +
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +
drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 7 +-
drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 +
.../wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c | 11 +-
drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 10 +
drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 5 +
drivers/net/wireless/intel/iwlwifi/queue/tx.c | 6 +-
drivers/regulator/core.c | 44 ++-
fs/io-wq.c | 10 -
fs/io-wq.h | 1 -
fs/io_uring.c | 360 ++++++++-------------
fs/nfs/pnfs.c | 30 +-
fs/nilfs2/file.c | 1 +
fs/squashfs/block.c | 8 +-
fs/squashfs/export.c | 41 ++-
fs/squashfs/id.c | 40 ++-
fs/squashfs/squashfs_fs_sb.h | 1 +
fs/squashfs/super.c | 6 +-
fs/squashfs/xattr.h | 10 +-
fs/squashfs/xattr_id.c | 66 +++-
include/linux/sunrpc/xdr.h | 3 +-
kernel/bpf/verifier.c | 38 +--
mm/memcontrol.c | 5 +-
net/key/af_key.c | 6 +-
net/mac80211/spectmgmt.c | 10 +-
net/sunrpc/auth_gss/auth_gss.c | 30 +-
net/sunrpc/auth_gss/auth_gss_internal.h | 45 +++
net/sunrpc/auth_gss/gss_krb5_mech.c | 31 +-
sound/hda/intel-dsp-config.c | 4 +
sound/soc/codecs/ak4458.c | 22 +-
sound/soc/codecs/wm_adsp.c | 3 +
sound/soc/intel/boards/sof_sdw.c | 10 +
sound/soc/intel/skylake/skl-topology.c | 2 +-
46 files changed, 683 insertions(+), 516 deletions(-)
From: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
The following sequence of operations results in a refcount warning:
1. Open device /dev/tpmrm
2. Remove module tpm_tis_spi
3. Write a TPM command to the file descriptor opened at step 1.
------------[ cut here ]------------
WARNING: CPU: 3 PID: 1161 at lib/refcount.c:25 kobject_get+0xa0/0xa4
refcount_t: addition on 0; use-after-free.
Modules linked in: tpm_tis_spi tpm_tis_core tpm mdio_bcm_unimac brcmfmac
sha256_generic libsha256 sha256_arm hci_uart btbcm bluetooth cfg80211 vc4
brcmutil ecdh_generic ecc snd_soc_core crc32_arm_ce libaes
raspberrypi_hwmon ac97_bus snd_pcm_dmaengine bcm2711_thermal snd_pcm
snd_timer genet snd phy_generic soundcore [last unloaded: spi_bcm2835]
CPU: 3 PID: 1161 Comm: hold_open Not tainted 5.10.0ls-main-dirty #2
Hardware name: BCM2711
[<c0410c3c>] (unwind_backtrace) from [<c040b580>] (show_stack+0x10/0x14)
[<c040b580>] (show_stack) from [<c1092174>] (dump_stack+0xc4/0xd8)
[<c1092174>] (dump_stack) from [<c0445a30>] (__warn+0x104/0x108)
[<c0445a30>] (__warn) from [<c0445aa8>] (warn_slowpath_fmt+0x74/0xb8)
[<c0445aa8>] (warn_slowpath_fmt) from [<c08435d0>] (kobject_get+0xa0/0xa4)
[<c08435d0>] (kobject_get) from [<bf0a715c>] (tpm_try_get_ops+0x14/0x54 [tpm])
[<bf0a715c>] (tpm_try_get_ops [tpm]) from [<bf0a7d6c>] (tpm_common_write+0x38/0x60 [tpm])
[<bf0a7d6c>] (tpm_common_write [tpm]) from [<c05a7ac0>] (vfs_write+0xc4/0x3c0)
[<c05a7ac0>] (vfs_write) from [<c05a7ee4>] (ksys_write+0x58/0xcc)
[<c05a7ee4>] (ksys_write) from [<c04001a0>] (ret_fast_syscall+0x0/0x4c)
Exception stack(0xc226bfa8 to 0xc226bff0)
bfa0: 00000000 000105b4 00000003 beafe664 00000014 00000000
bfc0: 00000000 000105b4 000103f8 00000004 00000000 00000000 b6f9c000 beafe684
bfe0: 0000006c beafe648 0001056c b6eb6944
---[ end trace d4b8409def9b8b1f ]---
The reason for this warning is the attempt to get the chip->dev reference
in tpm_common_write() although the reference counter is already zero.
Since commit 8979b02aaf1d ("tpm: Fix reference count to main device") the
extra reference used to prevent a premature zero counter is never taken,
because the required TPM_CHIP_FLAG_TPM2 flag is never set.
Fix this by moving the TPM 2 character device handling from
tpm_chip_alloc() to tpm_add_char_device() which is called at a later point
in time when the flag has been set in case of TPM2.
Commit fdc915f7f719 ("tpm: expose spaces via a device link /dev/tpmrm<n>")
already introduced function tpm_devs_release() to release the extra
reference but did not implement the required put on chip->devs that results
in the call of this function.
Fix this by putting chip->devs in tpm_chip_unregister().
Finally move the new implemenation for the TPM 2 handling into a new
function to avoid multiple checks for the TPM_CHIP_FLAG_TPM2 flag in the
good case and error cases.
Fixes: fdc915f7f719 ("tpm: expose spaces via a device link /dev/tpmrm<n>")
Fixes: 8979b02aaf1d ("tpm: Fix reference count to main device")
Co-developed-by: Jason Gunthorpe <jgg(a)ziepe.ca>
Signed-off-by: Jason Gunthorpe <jgg(a)ziepe.ca>
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Cc: stable(a)vger.kernel.org
---
drivers/char/tpm/tpm-chip.c | 80 ++++++++++++++++++++++++++++-----------------
1 file changed, 50 insertions(+), 30 deletions(-)
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index ddaeceb..44cac3a 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -344,7 +344,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
chip->dev_num = rc;
device_initialize(&chip->dev);
- device_initialize(&chip->devs);
chip->dev.class = tpm_class;
chip->dev.class->shutdown_pre = tpm_class_shutdown;
@@ -352,39 +351,20 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
chip->dev.parent = pdev;
chip->dev.groups = chip->groups;
- chip->devs.parent = pdev;
- chip->devs.class = tpmrm_class;
- chip->devs.release = tpm_devs_release;
- /* get extra reference on main device to hold on
- * behalf of devs. This holds the chip structure
- * while cdevs is in use. The corresponding put
- * is in the tpm_devs_release (TPM2 only)
- */
- if (chip->flags & TPM_CHIP_FLAG_TPM2)
- get_device(&chip->dev);
-
if (chip->dev_num == 0)
chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR);
else
chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num);
- chip->devs.devt =
- MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
-
rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num);
if (rc)
goto out;
- rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
- if (rc)
- goto out;
if (!pdev)
chip->flags |= TPM_CHIP_FLAG_VIRTUAL;
cdev_init(&chip->cdev, &tpm_fops);
- cdev_init(&chip->cdevs, &tpmrm_fops);
chip->cdev.owner = THIS_MODULE;
- chip->cdevs.owner = THIS_MODULE;
rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE);
if (rc) {
@@ -396,7 +376,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
return chip;
out:
- put_device(&chip->devs);
put_device(&chip->dev);
return ERR_PTR(rc);
}
@@ -431,6 +410,46 @@ struct tpm_chip *tpmm_chip_alloc(struct device *pdev,
}
EXPORT_SYMBOL_GPL(tpmm_chip_alloc);
+static int tpm_add_tpm2_char_device(struct tpm_chip *chip)
+{
+ int rc;
+
+ device_initialize(&chip->devs);
+ chip->devs.parent = chip->dev.parent;
+ chip->devs.class = tpmrm_class;
+
+ rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
+ if (rc)
+ goto out_put_devs;
+ /*
+ * get extra reference on main device to hold on behalf of devs.
+ * This holds the chip structure while cdevs is in use. The
+ * corresponding put is in the tpm_devs_release.
+ */
+ get_device(&chip->dev);
+ chip->devs.release = tpm_devs_release;
+ chip->devs.devt =
+ MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
+ cdev_init(&chip->cdevs, &tpmrm_fops);
+ chip->cdevs.owner = THIS_MODULE;
+
+ rc = cdev_device_add(&chip->cdevs, &chip->devs);
+ if (rc) {
+ dev_err(&chip->devs,
+ "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
+ dev_name(&chip->devs), MAJOR(chip->devs.devt),
+ MINOR(chip->devs.devt), rc);
+ goto out_put_devs;
+ }
+
+ return 0;
+
+out_put_devs:
+ put_device(&chip->devs);
+
+ return rc;
+}
+
static int tpm_add_char_device(struct tpm_chip *chip)
{
int rc;
@@ -445,14 +464,9 @@ static int tpm_add_char_device(struct tpm_chip *chip)
}
if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- rc = cdev_device_add(&chip->cdevs, &chip->devs);
- if (rc) {
- dev_err(&chip->devs,
- "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
- dev_name(&chip->devs), MAJOR(chip->devs.devt),
- MINOR(chip->devs.devt), rc);
- return rc;
- }
+ rc = tpm_add_tpm2_char_device(chip);
+ if (rc)
+ goto del_cdev;
}
/* Make the chip available. */
@@ -460,6 +474,10 @@ static int tpm_add_char_device(struct tpm_chip *chip)
idr_replace(&dev_nums_idr, chip, chip->dev_num);
mutex_unlock(&idr_lock);
+ return 0;
+
+del_cdev:
+ cdev_device_del(&chip->cdev, &chip->dev);
return rc;
}
@@ -640,8 +658,10 @@ void tpm_chip_unregister(struct tpm_chip *chip)
if (IS_ENABLED(CONFIG_HW_RANDOM_TPM))
hwrng_unregister(&chip->hwrng);
tpm_bios_log_teardown(chip);
- if (chip->flags & TPM_CHIP_FLAG_TPM2)
+ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
cdev_device_del(&chip->cdevs, &chip->devs);
+ put_device(&chip->devs);
+ }
tpm_del_char_device(chip);
}
EXPORT_SYMBOL_GPL(tpm_chip_unregister);
--
2.7.4
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 88bf56d04bc3564542049ec4ec168a8b60d0b48c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs(a)linux.alibaba.com>
Date: Thu, 17 Dec 2020 23:41:18 +0800
Subject: [PATCH] kvm: check tlbs_dirty directly
In kvm_mmu_notifier_invalidate_range_start(), tlbs_dirty is used as:
need_tlb_flush |= kvm->tlbs_dirty;
with need_tlb_flush's type being int and tlbs_dirty's type being long.
It means that tlbs_dirty is always used as int and the higher 32 bits
is useless. We need to check tlbs_dirty in a correct way and this
change checks it directly without propagating it to need_tlb_flush.
Note: it's _extremely_ unlikely this neglecting of higher 32 bits can
cause problems in practice. It would require encountering tlbs_dirty
on a 4 billion count boundary, and KVM would need to be using shadow
paging or be running a nested guest.
Cc: stable(a)vger.kernel.org
Fixes: a4ee1ca4a36e ("KVM: MMU: delay flush all tlbs on sync_page path")
Signed-off-by: Lai Jiangshan <laijs(a)linux.alibaba.com>
Message-Id: <20201217154118.16497-1-jiangshanlai(a)gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3abcb2ce5b7d..19dae28904f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -485,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm->mmu_notifier_count++;
need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
range->flags);
- need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
- if (need_tlb_flush)
+ if (need_tlb_flush || kvm->tlbs_dirty)
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
The patch titled
Subject: mm: memcontrol: fix swap undercounting in cgroup2
has been added to the -mm tree. Its filename is
mm-memcontrol-fix-swap-undercounting-in-cgroup2.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-memcontrol-fix-swap-undercount…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-memcontrol-fix-swap-undercount…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Muchun Song <songmuchun(a)bytedance.com>
Subject: mm: memcontrol: fix swap undercounting in cgroup2
When pages are swapped in, the VM may retain the swap copy to avoid
repeated writes in the future. It's also retained if shared pages are
faulted back in some processes, but not in others. During that time we
have an in-memory copy of the page, as well as an on-swap copy. Cgroup1
and cgroup2 handle these overlapping lifetimes slightly differently due to
the nature of how they account memory and swap:
Cgroup1 has a unified memory+swap counter that tracks a data page
regardless whether it's in-core or swapped out. On swapin, we transfer
the charge from the swap entry to the newly allocated swapcache page, even
though the swap entry might stick around for a while. That's why we have
a mem_cgroup_uncharge_swap() call inside mem_cgroup_charge().
Cgroup2 tracks memory and swap as separate, independent resources and thus
has split memory and swap counters. On swapin, we charge the newly
allocated swapcache page as memory, while the swap slot in turn must
remain charged to the swap counter as long as its allocated too.
The cgroup2 logic was broken by commit 2d1c498072de ("mm: memcontrol: make
swap tracking an integral part of memory control"), because it
accidentally removed the do_memsw_account() check in the branch inside
mem_cgroup_uncharge() that was supposed to tell the difference between the
charge transfer in cgroup1 and the separate counters in cgroup2.
As a result, cgroup2 currently undercounts retained swap to varying
degrees: swap slots are cached up to 50% of the configured limit or total
available swap space; partially faulted back shared pages are only limited
by physical capacity. This in turn allows cgroups to significantly
overconsume their alloted swap space.
Add the do_memsw_account() check back to fix this problem.
Link: https://lkml.kernel.org/r/20210217153237.92484-1-songmuchun@bytedance.com
Fixes: 2d1c498072de ("mm: memcontrol: make swap tracking an integral part of memory control")
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb(a)google.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [5.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
--- a/mm/memcontrol.c~mm-memcontrol-fix-swap-undercounting-in-cgroup2
+++ a/mm/memcontrol.c
@@ -6748,7 +6748,19 @@ int mem_cgroup_charge(struct page *page,
memcg_check_events(memcg, page);
local_irq_enable();
- if (PageSwapCache(page)) {
+ /*
+ * Cgroup1's unified memory+swap counter has been charged with the
+ * new swapcache page, finish the transfer by uncharging the swap
+ * slot. The swap slot would also get uncharged when it dies, but
+ * it can stick around indefinitely and we'd count the page twice
+ * the entire time.
+ *
+ * Cgroup2 has separate resource counters for memory and swap,
+ * so this is a non-issue here. Memory and swap charge lifetimes
+ * correspond 1:1 to page and swap slot lifetimes: we charge the
+ * page to memory here, and uncharge swap when the slot is freed.
+ */
+ if (do_memsw_account() && PageSwapCache(page)) {
swp_entry_t entry = { .val = page_private(page) };
/*
* The swap entry might not get freed for a long time,
_
Patches currently in -mm which might be from songmuchun(a)bytedance.com are
mm-memcontrol-optimize-per-lruvec-stats-counter-memory-usage.patch
mm-memcontrol-fix-nr_anon_thps-accounting-in-charge-moving.patch
mm-memcontrol-convert-nr_anon_thps-account-to-pages.patch
mm-memcontrol-convert-nr_file_thps-account-to-pages.patch
mm-memcontrol-convert-nr_shmem_thps-account-to-pages.patch
mm-memcontrol-convert-nr_shmem_pmdmapped-account-to-pages.patch
mm-memcontrol-convert-nr_file_pmdmapped-account-to-pages.patch
mm-memcontrol-make-the-slab-calculation-consistent.patch
mm-memcontrol-replace-the-loop-with-a-list_for_each_entry.patch
mm-memcontrol-fix-swap-undercounting-in-cgroup2.patch
hugetlb-convert-page_huge_active-hpagemigratable-flag-fix.patch
The patch titled
Subject: mm, compaction: make fast_isolate_freepages() stay within zone
has been added to the -mm tree. Its filename is
mm-compaction-make-fast_isolate_freepages-stay-within-zone.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-compaction-make-fast_isolate_f…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-compaction-make-fast_isolate_f…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, compaction: make fast_isolate_freepages() stay within zone
Compaction always operates on pages from a single given zone when
isolating both pages to migrate and freepages. Pageblock boundaries are
intersected with zone boundaries to be safe in case zone starts or ends in
the middle of pageblock. The use of pageblock_pfn_to_page() protects
against non-contiguous pageblocks.
The functions fast_isolate_freepages() and fast_isolate_around() don't
currently protect the fast freepage isolation thoroughly enough against
these corner cases, and can result in freepage isolation operate outside
of zone boundaries:
- in fast_isolate_freepages() if we get a pfn from the first pageblock
of a zone that starts in the middle of that pageblock, 'highest' can be
a pfn outside of the zone. If we fail to isolate anything in this
function, we may then call fast_isolate_around() on a pfn outside of the
zone and there effectively do a set_pageblock_skip(page_to_pfn(highest))
which may currently hit a VM_BUG_ON() in some configurations
- fast_isolate_around() checks only the zone end boundary and not
beginning, nor that the pageblock is contiguous (with
pageblock_pfn_to_page()) so it's possible that we end up calling
isolate_freepages_block() on a range of pfn's from two different zones
and end up e.g. isolating freepages under the wrong zone's lock.
This patch should fix the above issues.
Link: https://lkml.kernel.org/r/20210217173300.6394-1-vbabka@suse.cz
Fixes: 5a811889de10 ("mm, compaction: use free lists to quickly locate a migration target")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/compaction.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
--- a/mm/compaction.c~mm-compaction-make-fast_isolate_freepages-stay-within-zone
+++ a/mm/compaction.c
@@ -1284,7 +1284,7 @@ static void
fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
{
unsigned long start_pfn, end_pfn;
- struct page *page = pfn_to_page(pfn);
+ struct page *page;
/* Do not search around if there are enough pages already */
if (cc->nr_freepages >= cc->nr_migratepages)
@@ -1295,8 +1295,12 @@ fast_isolate_around(struct compact_contr
return;
/* Pageblock boundaries */
- start_pfn = pageblock_start_pfn(pfn);
- end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
+ start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn);
+ end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone));
+
+ page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone);
+ if (!page)
+ return;
/* Scan before */
if (start_pfn != pfn) {
@@ -1398,7 +1402,8 @@ fast_isolate_freepages(struct compact_co
pfn = page_to_pfn(freepage);
if (pfn >= highest)
- highest = pageblock_start_pfn(pfn);
+ highest = max(pageblock_start_pfn(pfn),
+ cc->zone->zone_start_pfn);
if (pfn >= low_pfn) {
cc->fast_search_fail = 0;
@@ -1468,7 +1473,8 @@ fast_isolate_freepages(struct compact_co
} else {
if (cc->direct_compaction && pfn_valid(min_pfn)) {
page = pageblock_pfn_to_page(min_pfn,
- pageblock_end_pfn(min_pfn),
+ min(pageblock_end_pfn(min_pfn),
+ zone_end_pfn(cc->zone)),
cc->zone);
cc->free_pfn = min_pfn;
}
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
mm-slub-stop-freeing-kmem_cache_node-structures-on-node-offline.patch
mm-slab-slub-stop-taking-memory-hotplug-lock.patch
mm-slab-slub-stop-taking-cpu-hotplug-lock.patch
mm-slub-splice-cpu-and-page-freelists-in-deactivate_slab.patch
mm-slub-remove-slub_memcg_sysfs-boot-param-and-config_slub_memcg_sysfs_on.patch
mm-compaction-make-fast_isolate_freepages-stay-within-zone.patch
maintainers-add-uapi-directories-to-api-abi-section.patch
The patch titled
Subject: hugetlb: fix copy_huge_page_from_user contig page struct assumption
has been added to the -mm tree. Its filename is
hugetlb-fix-copy_huge_page_from_user-contig-page-struct-assumption.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/hugetlb-fix-copy_huge_page_from_u…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/hugetlb-fix-copy_huge_page_from_u…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlb: fix copy_huge_page_from_user contig page struct assumption
page structs are not guaranteed to be contiguous for gigantic pages. The
routine copy_huge_page_from_user can encounter gigantic pages, yet it
assumes page structs are contiguous when copying pages from user space.
Since page structs for the target gigantic page are not contiguous, the
data copied from user space could overwrite other pages not associated
with the gigantic page and cause data corruption.
Non-contiguous page structs are generally not an issue. However, they can
exist with a specific kernel configuration and hotplug operations. For
example: Configure the kernel with CONFIG_SPARSEMEM and
!CONFIG_SPARSEMEM_VMEMMAP. Then, hotplug add memory for the area where
the gigantic page will be allocated.
Link: https://lkml.kernel.org/r/20210217184926.33567-2-mike.kravetz@oracle.com
Fixes: 8fb5debc5fcd ("userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Davidlohr Bueso <dbueso(a)suse.de>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/mm/memory.c~hugetlb-fix-copy_huge_page_from_user-contig-page-struct-assumption
+++ a/mm/memory.c
@@ -5173,17 +5173,19 @@ long copy_huge_page_from_user(struct pag
void *page_kaddr;
unsigned long i, rc = 0;
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
+ struct page *subpage = dst_page;
- for (i = 0; i < pages_per_huge_page; i++) {
+ for (i = 0; i < pages_per_huge_page;
+ i++, subpage = mem_map_next(subpage, dst_page, i)) {
if (allow_pagefault)
- page_kaddr = kmap(dst_page + i);
+ page_kaddr = kmap(subpage);
else
- page_kaddr = kmap_atomic(dst_page + i);
+ page_kaddr = kmap_atomic(subpage);
rc = copy_from_user(page_kaddr,
(const void __user *)(src + i * PAGE_SIZE),
PAGE_SIZE);
if (allow_pagefault)
- kunmap(dst_page + i);
+ kunmap(subpage);
else
kunmap_atomic(page_kaddr);
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlb-fix-update_and_free_page-contig-page-struct-assumption.patch
hugetlb-fix-copy_huge_page_from_user-contig-page-struct-assumption.patch
hugetlb-use-pageprivate-for-hugetlb-specific-page-flags.patch
hugetlb-convert-page_huge_active-hpagemigratable-flag.patch
hugetlb-convert-pagehugetemporary-to-hpagetemporary-flag.patch
hugetlb-convert-pagehugefreed-to-hpagefreed-flag.patch
mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch
hugetlbfs-remove-special-hugetlbfs_set_page_dirty.patch
The patch titled
Subject: hugetlb: fix update_and_free_page contig page struct assumption
has been added to the -mm tree. Its filename is
hugetlb-fix-update_and_free_page-contig-page-struct-assumption.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/hugetlb-fix-update_and_free_page-…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/hugetlb-fix-update_and_free_page-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlb: fix update_and_free_page contig page struct assumption
page structs are not guaranteed to be contiguous for gigantic pages. The
routine update_and_free_page can encounter a gigantic page, yet it assumes
page structs are contiguous when setting page flags in subpages.
If update_and_free_page encounters non-contiguous page structs, we can see
“BUG: Bad page state in process …” errors.
Non-contiguous page structs are generally not an issue. However, they can
exist with a specific kernel configuration and hotplug operations. For
example: Configure the kernel with CONFIG_SPARSEMEM and
!CONFIG_SPARSEMEM_VMEMMAP. Then, hotplug add memory for the area where
the gigantic page will be allocated. Zi Yan outlined steps to reproduce
here [1].
[1] https://lore.kernel.org/linux-mm/16F7C58B-4D79-41C5-9B64-A1A1628F4AF2@nvidi…
Link: https://lkml.kernel.org/r/20210217184926.33567-1-mike.kravetz@oracle.com
Fixes: 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Davidlohr Bueso <dbueso(a)suse.de>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/mm/hugetlb.c~hugetlb-fix-update_and_free_page-contig-page-struct-assumption
+++ a/mm/hugetlb.c
@@ -1321,14 +1321,16 @@ static inline void destroy_compound_giga
static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
+ struct page *subpage = page;
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
- for (i = 0; i < pages_per_huge_page(h); i++) {
- page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
+ for (i = 0; i < pages_per_huge_page(h);
+ i++, subpage = mem_map_next(subpage, page, i)) {
+ subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
1 << PG_writeback);
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlb-fix-update_and_free_page-contig-page-struct-assumption.patch
hugetlb-fix-copy_huge_page_from_user-contig-page-struct-assumption.patch
hugetlb-use-pageprivate-for-hugetlb-specific-page-flags.patch
hugetlb-convert-page_huge_active-hpagemigratable-flag.patch
hugetlb-convert-pagehugetemporary-to-hpagetemporary-flag.patch
hugetlb-convert-pagehugefreed-to-hpagefreed-flag.patch
mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch
hugetlbfs-remove-special-hugetlbfs_set_page_dirty.patch
In __btrfs_return_cluster_to_free_space we will bail doing the cleanup
of the cluster if the block group we passed in doesn't match the block
group on the cluster. However we drop a reference to block_group, as
the cluster holds a reference to the block group while it's attached to
the cluster. If cluster->block_group != block_group however then this
is an extra put, which means we'll go negative and free this block group
down the line, leading to a UAF.
Fix this by simply bailing if the block group we passed in does not
match the block group on the cluster.
CC: stable(a)vger.kernel.org
Fixes: fa9c0d795f7b ("Btrfs: rework allocation clustering")
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
---
fs/btrfs/free-space-cache.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0d6dcb5ff963..8be36cc6cbd8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2711,8 +2711,10 @@ static void __btrfs_return_cluster_to_free_space(
struct rb_node *node;
spin_lock(&cluster->lock);
- if (cluster->block_group != block_group)
- goto out;
+ if (cluster->block_group != block_group) {
+ spin_unlock(&cluster->lock);
+ return;
+ }
cluster->block_group = NULL;
cluster->window_start = 0;
@@ -2750,8 +2752,6 @@ static void __btrfs_return_cluster_to_free_space(
entry->offset, &entry->offset_index, bitmap);
}
cluster->root = RB_ROOT;
-
-out:
spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
}
--
2.26.2
The function sync_runqueues_membarrier_state() should copy the
membarrier state from the @mm received as parameter to each runqueue
currently running tasks using that mm.
However, the use of smp_call_function_many() skips the current runqueue,
which is unintended. Replace by a call to on_each_cpu_mask().
Fixes: 227a4aadc75b ("sched/membarrier: Fix p->mm->membarrier_state racy load")
Link: https://lore.kernel.org/r/74F1E842-4A84-47BF-B6C2-5407DFDD4A4A@gmail.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Reported-by: Nadav Amit <nadav.amit(a)gmail.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: stable(a)vger.kernel.org # 5.4.x+
---
kernel/sched/membarrier.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 08ae45ad9261..f311bf85d211 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -471,9 +471,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
}
rcu_read_unlock();
- preempt_disable();
- smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
- preempt_enable();
+ on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
free_cpumask_var(tmpmask);
cpus_read_unlock();
--
2.17.1
When pages are swapped in, the VM may retain the swap copy to avoid
repeated writes in the future. It's also retained if shared pages are
faulted back in some processes, but not in others. During that time we
have an in-memory copy of the page, as well as an on-swap copy. Cgroup1
and cgroup2 handle these overlapping lifetimes slightly differently
due to the nature of how they account memory and swap:
Cgroup1 has a unified memory+swap counter that tracks a data page
regardless whether it's in-core or swapped out. On swapin, we transfer
the charge from the swap entry to the newly allocated swapcache page,
even though the swap entry might stick around for a while. That's why
we have a mem_cgroup_uncharge_swap() call inside mem_cgroup_charge().
Cgroup2 tracks memory and swap as separate, independent resources and
thus has split memory and swap counters. On swapin, we charge the
newly allocated swapcache page as memory, while the swap slot in turn
must remain charged to the swap counter as long as its allocated too.
The cgroup2 logic was broken by commit 2d1c498072de ("mm: memcontrol:
make swap tracking an integral part of memory control"), because it
accidentally removed the do_memsw_account() check in the branch inside
mem_cgroup_uncharge() that was supposed to tell the difference between
the charge transfer in cgroup1 and the separate counters in cgroup2.
As a result, cgroup2 currently undercounts retained swap to varying
degrees: swap slots are cached up to 50% of the configured limit or
total available swap space; partially faulted back shared pages are
only limited by physical capacity. This in turn allows cgroups to
significantly overconsume their alloted swap space.
Add the do_memsw_account() check back to fix this problem.
Fixes: 2d1c498072de ("mm: memcontrol: make swap tracking an integral part of memory control")
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb(a)google.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: stable(a)vger.kernel.org # 5.8+
---
v3:
- Replace !cgroup_subsys_on_dfl(memory_cgrp_subsys) with do_memsw_account().
Thanks to Shakeel.
v2:
- update commit log and add a comment to the code. Very thanks to Johannes.
mm/memcontrol.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ed5cc78a8dbf..b5a66b98af74 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6771,7 +6771,19 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
memcg_check_events(memcg, page);
local_irq_enable();
- if (PageSwapCache(page)) {
+ /*
+ * Cgroup1's unified memory+swap counter has been charged with the
+ * new swapcache page, finish the transfer by uncharging the swap
+ * slot. The swap slot would also get uncharged when it dies, but
+ * it can stick around indefinitely and we'd count the page twice
+ * the entire time.
+ *
+ * Cgroup2 has separate resource counters for memory and swap,
+ * so this is a non-issue here. Memory and swap charge lifetimes
+ * correspond 1:1 to page and swap slot lifetimes: we charge the
+ * page to memory here, and uncharge swap when the slot is freed.
+ */
+ if (do_memsw_account() && PageSwapCache(page)) {
swp_entry_t entry = { .val = page_private(page) };
/*
* The swap entry might not get freed for a long time,
--
2.11.0
When pages are swapped in, the VM may retain the swap copy to avoid
repeated writes in the future. It's also retained if shared pages are
faulted back in some processes, but not in others. During that time we
have an in-memory copy of the page, as well as an on-swap copy. Cgroup1
and cgroup2 handle these overlapping lifetimes slightly differently
due to the nature of how they account memory and swap:
Cgroup1 has a unified memory+swap counter that tracks a data page
regardless whether it's in-core or swapped out. On swapin, we transfer
the charge from the swap entry to the newly allocated swapcache page,
even though the swap entry might stick around for a while. That's why
we have a mem_cgroup_uncharge_swap() call inside mem_cgroup_charge().
Cgroup2 tracks memory and swap as separate, independent resources and
thus has split memory and swap counters. On swapin, we charge the
newly allocated swapcache page as memory, while the swap slot in turn
must remain charged to the swap counter as long as its allocated too.
The cgroup2 logic was broken by commit 2d1c498072de ("mm: memcontrol:
make swap tracking an integral part of memory control"), because it
accidentally removed the do_memsw_account() check in the branch inside
mem_cgroup_uncharge() that was supposed to tell the difference between
the charge transfer in cgroup1 and the separate counters in cgroup2.
As a result, cgroup2 currently undercounts retained swap to varying
degrees: swap slots are cached up to 50% of the configured limit or
total available swap space; partially faulted back shared pages are
only limited by physical capacity. This in turn allows cgroups to
significantly overconsume their alloted swap space.
Add the do_memsw_account() check back to fix this problem.
Fixes: 2d1c498072de ("mm: memcontrol: make swap tracking an integral part of memory control")
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb(a)google.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: stable(a)vger.kernel.org # 5.8+
---
v2:
- update commit log and add a comment to the code. Very thanks to Johannes.
mm/memcontrol.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ed5cc78a8dbf..2efbb4f71d5f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6771,7 +6771,19 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
memcg_check_events(memcg, page);
local_irq_enable();
- if (PageSwapCache(page)) {
+ /*
+ * Cgroup1's unified memory+swap counter has been charged with the
+ * new swapcache page, finish the transfer by uncharging the swap
+ * slot. The swap slot would also get uncharged when it dies, but
+ * it can stick around indefinitely and we'd count the page twice
+ * the entire time.
+ *
+ * Cgroup2 has separate resource counters for memory and swap,
+ * so this is a non-issue here. Memory and swap charge lifetimes
+ * correspond 1:1 to page and swap slot lifetimes: we charge the
+ * page to memory here, and uncharge swap when the slot is freed.
+ */
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && PageSwapCache(page)) {
swp_entry_t entry = { .val = page_private(page) };
/*
* The swap entry might not get freed for a long time,
--
2.11.0
So, here is a hopefully improved version with the following changes:
* No more late wake up debugging, objtool should debug that later with
noinstr code calling into the scheduler (Peter suggestion)
* Dropped the double rdp fetch patch, just keep the fix part for now
* Properly protect irq work call from rcu_user_enter() inside
instrumention_begin()
* Handle CONFIG_KVM_XFER_TO_GUEST_WORK (as per Peter suggestion)
git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
sched/idle-v4
HEAD: d3e956d0b693a572bd5f56241816a6390c5b2797
Thanks,
Frederic
---
Frederic Weisbecker (5):
rcu: Pull deferred rcuog wake up to rcu_eqs_enter() callers
rcu/nocb: Perform deferred wake up before last idle's need_resched() check
rcu/nocb: Trigger self-IPI on late deferred wake up before user resume
entry: Explicitly flush pending rcuog wakeup before last rescheduling point
entry/kvm: Explicitly flush pending rcuog wakeup before last rescheduling point
arch/x86/kvm/x86.c | 1 +
include/linux/entry-kvm.h | 14 +++++++++++++
include/linux/rcupdate.h | 2 ++
kernel/entry/common.c | 7 +++++++
kernel/rcu/tree.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-
kernel/rcu/tree.h | 2 +-
kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++--------
kernel/sched/idle.c | 3 +++
8 files changed, 102 insertions(+), 11 deletions(-)
From: Mike Rapoport <rppt(a)linux.ibm.com>
There could be struct pages that are not backed by actual physical memory.
This can happen when the actual memory bank is not a multiple of
SECTION_SIZE or when an architecture does not register memory holes
reserved by the firmware as memblock.memory.
Such pages are currently initialized using init_unavailable_mem() function
that iterates through PFNs in holes in memblock.memory and if there is a
struct page corresponding to a PFN, the fields of this page are set to
default values and it is marked as Reserved.
init_unavailable_mem() does not take into account zone and node the page
belongs to and sets both zone and node links in struct page to zero.
On a system that has firmware reserved holes in a zone above ZONE_DMA, for
instance in a configuration below:
# grep -A1 E820 /proc/iomem
7a17b000-7a216fff : Unknown E820 type
7a217000-7bffffff : System RAM
unset zone link in struct page will trigger
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
because there are pages in both ZONE_DMA32 and ZONE_DMA (unset zone link
in struct page) in the same pageblock.
Moreover, it is possible that the lowest node and zone start is not aligned
to the section boundarie, for example on x86:
[ 0.078898] Zone ranges:
[ 0.078899] DMA [mem 0x0000000000001000-0x0000000000ffffff]
...
[ 0.078910] Early memory node ranges
[ 0.078912] node 0: [mem 0x0000000000001000-0x000000000009cfff]
[ 0.078913] node 0: [mem 0x0000000000100000-0x000000003fffffff]
and thus with SPARSEMEM memory model the beginning of the memory map will
have struct pages that are not spanned by any node and zone.
Update detection of node boundaries in get_pfn_range_for_nid() so that the
node range will be expanded to cover memory map section. Since zone spans
are derived from the node span, there always will be a zone that covers the
part of the memory map with unavailable pages.
Interleave initialization of the unavailable pages with the normal
initialization of memory map, so that zone and node information will be
properly set on struct pages that are not backed by the actual memory.
Fixes: 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather
that check each PFN")
Reported-by: Andrea Arcangeli <aarcange(a)redhat.com>
Signed-off-by: Mike Rapoport <rppt(a)linux.ibm.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
---
mm/page_alloc.c | 160 +++++++++++++++++++++++-------------------------
1 file changed, 75 insertions(+), 85 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6446778cbc6b..1c3f7521028f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6257,22 +6257,84 @@ static void __meminit zone_init_free_lists(struct zone *zone)
}
}
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
+/*
+ * Only struct pages that correspond to ranges defined by memblock.memory
+ * are zeroed and initialized by going through __init_single_page() during
+ * memmap_init_zone().
+ *
+ * But, there could be struct pages that correspond to holes in
+ * memblock.memory. This can happen because of the following reasons:
+ * - phyiscal memory bank size is not necessarily the exact multiple of the
+ * arbitrary section size
+ * - early reserved memory may not be listed in memblock.memory
+ * - memory layouts defined with memmap= kernel parameter may not align
+ * nicely with memmap sections
+ *
+ * Explicitly initialize those struct pages so that:
+ * - PG_Reserved is set
+ * - zone and node links point to zone and node that span the page
+ */
+static u64 __meminit init_unavailable_range(unsigned long spfn,
+ unsigned long epfn,
+ int zone, int node)
+{
+ unsigned long pfn;
+ u64 pgcnt = 0;
+
+ for (pfn = spfn; pfn < epfn; pfn++) {
+ if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
+ pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+ + pageblock_nr_pages - 1;
+ continue;
+ }
+ __init_single_page(pfn_to_page(pfn), pfn, zone, node);
+ __SetPageReserved(pfn_to_page(pfn));
+ pgcnt++;
+ }
+
+ return pgcnt;
+}
+#else
+static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
+ int zone, int node)
+{
+ return 0;
+}
+#endif
+
void __meminit __weak memmap_init_zone(struct zone *zone)
{
unsigned long zone_start_pfn = zone->zone_start_pfn;
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
unsigned long start_pfn, end_pfn;
+ unsigned long hole_pfn = 0;
+ u64 pgcnt = 0;
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
+ hole_pfn = clamp(hole_pfn, zone_start_pfn, zone_end_pfn);
if (end_pfn > start_pfn)
memmap_init_range(end_pfn - start_pfn, nid,
zone_id, start_pfn, zone_end_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+
+ if (hole_pfn < start_pfn)
+ pgcnt += init_unavailable_range(hole_pfn, start_pfn,
+ zone_id, nid);
+ hole_pfn = end_pfn;
}
+
+ if (hole_pfn < zone_end_pfn)
+ pgcnt += init_unavailable_range(hole_pfn, zone_end_pfn,
+ zone_id, nid);
+
+ if (pgcnt)
+ pr_info(" %s zone: %lld pages in unavailable ranges\n",
+ zone->name, pgcnt);
}
static int zone_batchsize(struct zone *zone)
@@ -6519,8 +6581,19 @@ void __init get_pfn_range_for_nid(unsigned int nid,
*end_pfn = max(*end_pfn, this_end_pfn);
}
- if (*start_pfn == -1UL)
+ if (*start_pfn == -1UL) {
*start_pfn = 0;
+ return;
+ }
+
+#ifdef CONFIG_SPARSEMEM
+ /*
+ * Sections in the memory map may not match actual populated
+ * memory, extend the node span to cover the entire section.
+ */
+ *start_pfn = round_down(*start_pfn, PAGES_PER_SECTION);
+ *end_pfn = round_up(*end_pfn, PAGES_PER_SECTION);
+#endif
}
/*
@@ -7069,88 +7142,6 @@ void __init free_area_init_memoryless_node(int nid)
free_area_init_node(nid);
}
-#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
-/*
- * Initialize all valid struct pages in the range [spfn, epfn) and mark them
- * PageReserved(). Return the number of struct pages that were initialized.
- */
-static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
-{
- unsigned long pfn;
- u64 pgcnt = 0;
-
- for (pfn = spfn; pfn < epfn; pfn++) {
- if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
- pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
- + pageblock_nr_pages - 1;
- continue;
- }
- /*
- * Use a fake node/zone (0) for now. Some of these pages
- * (in memblock.reserved but not in memblock.memory) will
- * get re-initialized via reserve_bootmem_region() later.
- */
- __init_single_page(pfn_to_page(pfn), pfn, 0, 0);
- __SetPageReserved(pfn_to_page(pfn));
- pgcnt++;
- }
-
- return pgcnt;
-}
-
-/*
- * Only struct pages that are backed by physical memory are zeroed and
- * initialized by going through __init_single_page(). But, there are some
- * struct pages which are reserved in memblock allocator and their fields
- * may be accessed (for example page_to_pfn() on some configuration accesses
- * flags). We must explicitly initialize those struct pages.
- *
- * This function also addresses a similar issue where struct pages are left
- * uninitialized because the physical address range is not covered by
- * memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=, or when the highest physical
- * address (max_pfn) does not end on a section boundary.
- */
-static void __init init_unavailable_mem(void)
-{
- phys_addr_t start, end;
- u64 i, pgcnt;
- phys_addr_t next = 0;
-
- /*
- * Loop through unavailable ranges not covered by memblock.memory.
- */
- pgcnt = 0;
- for_each_mem_range(i, &start, &end) {
- if (next < start)
- pgcnt += init_unavailable_range(PFN_DOWN(next),
- PFN_UP(start));
- next = end;
- }
-
- /*
- * Early sections always have a fully populated memmap for the whole
- * section - see pfn_valid(). If the last section has holes at the
- * end and that section is marked "online", the memmap will be
- * considered initialized. Make sure that memmap has a well defined
- * state.
- */
- pgcnt += init_unavailable_range(PFN_DOWN(next),
- round_up(max_pfn, PAGES_PER_SECTION));
-
- /*
- * Struct pages that do not have backing memory. This could be because
- * firmware is using some of this memory, or for some other reasons.
- */
- if (pgcnt)
- pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
-}
-#else
-static inline void __init init_unavailable_mem(void)
-{
-}
-#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
-
#if MAX_NUMNODES > 1
/*
* Figure out the number of possible node ids.
@@ -7510,7 +7501,7 @@ void __init free_area_init(unsigned long *max_zone_pfn)
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
- start_pfn = find_min_pfn_with_active_regions();
+ start_pfn = 0;
descending = arch_has_descending_max_zone_pfns();
for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -7574,7 +7565,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
- init_unavailable_mem();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid);
--
2.28.0
v1: https://lore.kernel.org/stable/20210211162519.215418-1-sgarzare@redhat.com/
v2:
- backport the upstream patch and related patches needed
Commit 65b709586e22 ("vdpa_sim: add get_config callback in
vdpasim_dev_attr") unintentionally solved an issue in vdpasim_get_config()
upstream while refactoring vdpa_sim.c to support multiple devices.
Before that patch, if 'offset + len' was equal to
sizeof(struct virtio_net_config), the entire buffer wasn't filled,
returning incorrect values to the caller.
Since 'vdpasim->config' type is 'struct virtio_net_config', we can
safely copy its content under this condition.
The minimum set of patches to backport the patch that fixes the issue, is the
following:
423248d60d2b vdpa_sim: remove hard-coded virtq count
6c6e28fe4579 vdpa_sim: add struct vdpasim_dev_attr for device attributes
cf1a3b35382c vdpa_sim: store parsed MAC address in a buffer
f37cbbc65178 vdpa_sim: make 'config' generic and usable for any device type
65b709586e22 vdpa_sim: add get_config callback in vdpasim_dev_attr
The patches apply fairly cleanly. There are a few contextual differences
due to the lack of the other patches:
$ git backport-diff -u master -r linux-5.10.y..HEAD
Key:
[----] : patches are identical
[####] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively
001/5:[----] [--] 'vdpa_sim: remove hard-coded virtq count'
002/5:[----] [-C] 'vdpa_sim: add struct vdpasim_dev_attr for device attributes'
003/5:[----] [--] 'vdpa_sim: store parsed MAC address in a buffer'
004/5:[----] [-C] 'vdpa_sim: make 'config' generic and usable for any device type'
005/5:[----] [-C] 'vdpa_sim: add get_config callback in vdpasim_dev_attr'
Thanks,
Stefano
Max Gurtovoy (1):
vdpa_sim: remove hard-coded virtq count
Stefano Garzarella (4):
vdpa_sim: add struct vdpasim_dev_attr for device attributes
vdpa_sim: store parsed MAC address in a buffer
vdpa_sim: make 'config' generic and usable for any device type
vdpa_sim: add get_config callback in vdpasim_dev_attr
drivers/vdpa/vdpa_sim/vdpa_sim.c | 83 +++++++++++++++++++++++---------
1 file changed, 60 insertions(+), 23 deletions(-)
--
2.29.2
Before this commit lis3lv02d_get_pwron_wait() had a WARN_ONCE() to catch
a potential divide by 0. WARN macros should only be used to catch internal
kernel bugs and that is not the case here. We have been receiving a lot of
bug reports about kernel backtraces caused by this WARN.
The div value being checked comes from the lis3->odrs[] array. Which
is sized to be a power-of-2 matching the number of bits in lis3->odr_mask.
The only lis3 model where this array is not entirely filled with non zero
values. IOW the only model where we can hit the div == 0 check is the
3dc ("8 bits 3DC sensor") model:
int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000};
Note the 0 value at index 0, according to the datasheet an odr index of 0
means "Power-down mode". HP typically uses a lis3 accelerometer for HDD
fall protection. What I believe is happening here is that on newer
HP devices, which only contain a SDD, the BIOS is leaving the lis3 device
powered-down since it is not used for HDD fall protection.
Note that the lis3_3dc_rates array initializer only specifies 10 values,
which matches the datasheet. So it also contains 6 zero values at the end.
Replace the WARN with a normal check, which treats an odr index of 0
as power-down and uses a normal dev_err() to report the error in case
odr index point past the initialized part of the array.
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=785814
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1817027
BugLink: https://bugs.centos.org/view.php?id=10720
Fixes: 1510dd5954be ("lis3lv02d: avoid divide by zero due to unchecked")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/misc/lis3lv02d/lis3lv02d.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index dd65cedf3b12..9d14bf444481 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -208,7 +208,7 @@ static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000};
static int lis3_3dlh_rates[4] = {50, 100, 400, 1000};
/* ODR is Output Data Rate */
-static int lis3lv02d_get_odr(struct lis3lv02d *lis3)
+static int lis3lv02d_get_odr_index(struct lis3lv02d *lis3)
{
u8 ctrl;
int shift;
@@ -216,15 +216,23 @@ static int lis3lv02d_get_odr(struct lis3lv02d *lis3)
lis3->read(lis3, CTRL_REG1, &ctrl);
ctrl &= lis3->odr_mask;
shift = ffs(lis3->odr_mask) - 1;
- return lis3->odrs[(ctrl >> shift)];
+ return (ctrl >> shift);
}
static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3)
{
- int div = lis3lv02d_get_odr(lis3);
+ int odr_idx = lis3lv02d_get_odr_index(lis3);
+ int div = lis3->odrs[odr_idx];
- if (WARN_ONCE(div == 0, "device returned spurious data"))
+ if (div == 0) {
+ if (odr_idx == 0) {
+ /* Power-down mode, not sampling no need to sleep */
+ return 0;
+ }
+
+ dev_err(&lis3->pdev->dev, "Error unknown odrs-index: %d\n", odr_idx);
return -ENXIO;
+ }
/* LIS3 power on delay is quite long */
msleep(lis3->pwron_delay / div);
@@ -816,9 +824,12 @@ static ssize_t lis3lv02d_rate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+ int odr_idx;
lis3lv02d_sysfs_poweron(lis3);
- return sprintf(buf, "%d\n", lis3lv02d_get_odr(lis3));
+
+ odr_idx = lis3lv02d_get_odr_index(lis3);
+ return sprintf(buf, "%d\n", lis3->odrs[odr_idx]);
}
static ssize_t lis3lv02d_rate_set(struct device *dev,
--
2.30.1
This is the start of the stable review cycle for the 5.4.99 release.
There are 60 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 17 Feb 2021 15:27:00 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.99-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.99-rc1
Miklos Szeredi <mszeredi(a)redhat.com>
ovl: expand warning in ovl_d_real()
Sabyrzhan Tasbolatov <snovitoll(a)gmail.com>
net/qrtr: restrict user-controlled length in qrtr_tun_write_iter()
Sabyrzhan Tasbolatov <snovitoll(a)gmail.com>
net/rds: restrict iovecs length for RDS_CMSG_RDMA_ARGS
Stefano Garzarella <sgarzare(a)redhat.com>
vsock: fix locking in vsock_shutdown()
Stefano Garzarella <sgarzare(a)redhat.com>
vsock/virtio: update credit only if socket is not closed
Edwin Peer <edwin.peer(a)broadcom.com>
net: watchdog: hold device global xmit lock during tx disable
Norbert Slusarek <nslusarek(a)gmx.net>
net/vmw_vsock: improve locking in vsock_connect_timeout()
NeilBrown <neilb(a)suse.de>
net: fix iteration for sctp transport seq_files
Eric Dumazet <edumazet(a)google.com>
net: gro: do not keep too many GRO packets in napi->rx_list
Vladimir Oltean <vladimir.oltean(a)nxp.com>
net: dsa: call teardown method on probe failure
Willem de Bruijn <willemb(a)google.com>
udp: fix skb_copy_and_csum_datagram with odd segment sizes
David Howells <dhowells(a)redhat.com>
rxrpc: Fix clearance of Tx/Rx ring when releasing a call
Serge Semin <Sergey.Semin(a)baikalelectronics.ru>
usb: dwc3: ulpi: Replace CPU-based busyloop with Protocol-based one
Felipe Balbi <balbi(a)kernel.org>
usb: dwc3: ulpi: fix checkpatch warning
Randy Dunlap <rdunlap(a)infradead.org>
h8300: fix PREEMPTION build, TI_PRE_COUNT undefined
Alain Volmat <alain.volmat(a)foss.st.com>
i2c: stm32f7: fix configuration of the digital filter
Jernej Skrabec <jernej.skrabec(a)siol.net>
clk: sunxi-ng: mp: fix parent rate change flag check
Jernej Skrabec <jernej.skrabec(a)siol.net>
drm/sun4i: dw-hdmi: Fix max. frequency for H6
Jernej Skrabec <jernej.skrabec(a)siol.net>
drm/sun4i: Fix H6 HDMI PHY configuration
Jernej Skrabec <jernej.skrabec(a)siol.net>
drm/sun4i: tcon: set sync polarity for tcon1 channel
Fangrui Song <maskray(a)google.com>
firmware_loader: align .builtin_fw to 8
Yufeng Mo <moyufeng(a)huawei.com>
net: hns3: add a check for queue_id in hclge_reset_vf_queue()
Borislav Petkov <bp(a)suse.de>
x86/build: Disable CET instrumentation in the kernel for 32-bit too
Florian Westphal <fw(a)strlen.de>
netfilter: conntrack: skip identical origin tuple in same zone only
Sukadev Bhattiprolu <sukadev(a)linux.ibm.com>
ibmvnic: Clear failover_pending if unable to schedule
Mohammad Athari Bin Ismail <mohammad.athari.ismail(a)intel.com>
net: stmmac: set TxQ mode back to DCB after disabling CBS
Vadim Fedorenko <vfedorenko(a)novek.ru>
selftests: txtimestamp: fix compilation issue
Vladimir Oltean <vladimir.oltean(a)nxp.com>
net: enetc: initialize the RFS and RSS memories
Juergen Gross <jgross(a)suse.com>
xen/netback: avoid race in xenvif_rx_ring_slots_available()
Sven Auhagen <sven.auhagen(a)voleatech.de>
netfilter: flowtable: fix tcp and udp header checksum update
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nftables: fix possible UAF over chains from packet path in netns
Jozsef Kadlecsik <kadlec(a)mail.kfki.hu>
netfilter: xt_recent: Fix attempt to update deleted entry
Bui Quang Minh <minhquangbui99(a)gmail.com>
bpf: Check for integer overflow when using roundup_pow_of_two()
Maxime Ripard <maxime(a)cerno.tech>
drm/vc4: hvs: Fix buffer overflow with the dlist handling
Lorenzo Bianconi <lorenzo(a)kernel.org>
mt76: dma: fix a possible memory leak in mt76_add_fragment()
Mark Rutland <mark.rutland(a)arm.com>
lkdtm: don't move ctors to .rodata
Thomas Gleixner <tglx(a)linutronix.de>
vmlinux.lds.h: Create section for protection against instrumentation
Russell King <rmk+kernel(a)armlinux.org.uk>
ARM: kexec: fix oops after TLB are invalidated
Russell King <rmk+kernel(a)armlinux.org.uk>
ARM: ensure the signal page contains defined contents
Alexandre Belloni <alexandre.belloni(a)bootlin.com>
ARM: dts: lpc32xx: Revert set default clock rate of HCLK PLL
Lin Feng <linf(a)wangsu.com>
bfq-iosched: Revert "bfq: Fix computation of shallow depth"
Alexandre Ghiti <alex(a)ghiti.fr>
riscv: virt_addr_valid must check the address belongs to linear mapping
Victor Lu <victorchengchi.lu(a)amd.com>
drm/amd/display: Decrement refcount of dc_sink before reassignment
Victor Lu <victorchengchi.lu(a)amd.com>
drm/amd/display: Free atomic state after drm_atomic_commit
Victor Lu <victorchengchi.lu(a)amd.com>
drm/amd/display: Fix dc_sink kref count in emulated_link_detect
Sung Lee <sung.lee(a)amd.com>
drm/amd/display: Add more Clock Sources to DCN2.1
Claus Stovgaard <claus.stovgaard(a)gmail.com>
nvme-pci: ignore the subsysem NQN on Phison E16
Amir Goldstein <amir73il(a)gmail.com>
ovl: skip getxattr of security labels
Miklos Szeredi <mszeredi(a)redhat.com>
cap: fix conversions on getxattr
Miklos Szeredi <mszeredi(a)redhat.com>
ovl: perform vfs_getxattr() with mounter creds
Hans de Goede <hdegoede(a)redhat.com>
platform/x86: hp-wmi: Disable tablet-mode reporting by default
Tony Lindgren <tony(a)atomide.com>
ARM: OMAP2+: Fix suspcious RCU usage splats for omap_enter_idle_coupled
Bjorn Andersson <bjorn.andersson(a)linaro.org>
arm64: dts: qcom: sdm845: Reserve LPASS clocks in gcc
Marc Zyngier <maz(a)kernel.org>
arm64: dts: rockchip: Fix PCIe DT properties on rk3399
Odin Ugedal <odin(a)uged.al>
cgroup: fix psi monitor for root cgroup
Julien Grall <jgrall(a)amazon.com>
arm/xen: Don't probe xenbus as part of an early initcall
Steven Rostedt (VMware) <rostedt(a)goodmis.org>
tracing: Check length before giving out the filter buffer
Steven Rostedt (VMware) <rostedt(a)goodmis.org>
tracing: Do not count ftrace events in top level enable output
Nikita Shubin <nikita.shubin(a)maquefel.me>
gpio: ep93xx: Fix single irqchip with multi gpiochips
Nikita Shubin <nikita.shubin(a)maquefel.me>
gpio: ep93xx: fix BUG_ON port F usage
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/dts/lpc32xx.dtsi | 3 -
arch/arm/include/asm/kexec-internal.h | 12 ++
arch/arm/kernel/asm-offsets.c | 5 +
arch/arm/kernel/machine_kexec.c | 20 +-
arch/arm/kernel/relocate_kernel.S | 38 ++--
arch/arm/kernel/signal.c | 14 +-
arch/arm/mach-omap2/cpuidle44xx.c | 16 +-
arch/arm/xen/enlighten.c | 2 -
arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 4 +-
.../boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 4 +-
arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +-
arch/h8300/kernel/asm-offsets.c | 3 +
arch/powerpc/kernel/vmlinux.lds.S | 1 +
arch/riscv/include/asm/page.h | 5 +-
arch/x86/Makefile | 6 +-
block/bfq-iosched.c | 8 +-
drivers/clk/sunxi-ng/ccu_mp.c | 2 +-
drivers/gpio/gpio-ep93xx.c | 216 +++++++++++----------
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +--
.../gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 10 +
drivers/gpu/drm/sun4i/sun4i_tcon.c | 25 +++
drivers/gpu/drm/sun4i/sun4i_tcon.h | 6 +
drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 6 +-
drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 26 +--
drivers/gpu/drm/vc4/vc4_plane.c | 18 +-
drivers/i2c/busses/i2c-stm32f7.c | 11 +-
drivers/misc/lkdtm/Makefile | 2 +-
drivers/misc/lkdtm/rodata.c | 2 +-
drivers/net/ethernet/freescale/enetc/enetc_hw.h | 2 +
drivers/net/ethernet/freescale/enetc/enetc_pf.c | 59 ++++++
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +
drivers/net/ethernet/ibm/ibmvnic.c | 17 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 7 +-
drivers/net/wireless/mediatek/mt76/dma.c | 8 +-
drivers/net/xen-netback/rx.c | 9 +-
drivers/nvme/host/pci.c | 2 +
drivers/platform/x86/hp-wmi.c | 14 +-
drivers/usb/dwc3/ulpi.c | 20 +-
drivers/xen/xenbus/xenbus.h | 1 -
drivers/xen/xenbus/xenbus_probe.c | 2 +-
fs/overlayfs/copy_up.c | 15 +-
fs/overlayfs/inode.c | 2 +
fs/overlayfs/super.c | 13 +-
include/asm-generic/sections.h | 3 +
include/asm-generic/vmlinux.lds.h | 12 +-
include/linux/compiler.h | 53 +++++
include/linux/compiler_types.h | 4 +
include/linux/netdevice.h | 2 +
include/linux/uio.h | 8 +-
include/xen/xenbus.h | 2 -
kernel/bpf/stackmap.c | 2 +
kernel/cgroup/cgroup.c | 4 +-
kernel/trace/trace.c | 2 +-
kernel/trace/trace_events.c | 3 +-
lib/iov_iter.c | 24 ++-
net/core/datagram.c | 12 +-
net/core/dev.c | 11 +-
net/dsa/dsa2.c | 7 +-
net/netfilter/nf_conntrack_core.c | 3 +-
net/netfilter/nf_flow_table_core.c | 4 +-
net/netfilter/nf_tables_api.c | 25 ++-
net/netfilter/xt_recent.c | 12 +-
net/qrtr/tun.c | 6 +
net/rds/rdma.c | 3 +
net/rxrpc/call_object.c | 2 -
net/sctp/proc.c | 16 +-
net/vmw_vsock/af_vsock.c | 13 +-
net/vmw_vsock/hyperv_transport.c | 4 -
net/vmw_vsock/virtio_transport_common.c | 4 +-
scripts/mod/modpost.c | 2 +-
security/commoncap.c | 67 ++++---
.../networking/timestamping/txtimestamp.c | 6 +-
73 files changed, 666 insertions(+), 321 deletions(-)
When compiling under OpenEmbedded, the following error is seen
as of recently:
/srv/oe/build/tmp/hosttools/ld: cannot find /lib/libc.so.6 inside /
/srv/oe/build/tmp/hosttools/ld: cannot find /usr/lib/libc_nonshared.a inside /
/srv/oe/build/tmp/hosttools/ld: cannot find /lib/ld-linux-x86-64.so.2 inside /
collect2: error: ld returned 1 exit status
make[2]: *** [scripts/Makefile.host:95: scripts/extract-cert] Error 1
This is because 2cea4a7a1885 ("scripts: use pkg-config to
locate libcrypto") now calls for `pkg-config --libs libcrypto`
and inserts that into the Makefile rules as LDLIBS when
building extract-cert.c.
The problem is that --libs will include both -l and -L, which
will be out of order when compiling/linking.
This (very ugly) command is what's produced with OpenEmbedded:
gcc -Wp,-MMD,scripts/.extract-cert.d -Wall -Wmissing-prototypes -Wstrict-prototypes \
-O2 -fomit-frame-pointer -std=gnu89 \
-isystem/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/usr/include \
-O2 -pipe -L/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/usr/lib \
-L/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/lib \
-Wl,-rpath-link,/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/usr/lib \
-Wl,-rpath-link,/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/lib \
-Wl,-rpath,/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/usr/lib \
-Wl,-rpath,/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/lib \
-Wl,-O1 -I/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot-native/usr/include \
-I ./scripts -o scripts/extract-cert \
/oe/build/tmp/work-shared/intel-corei7-64/kernel-source/scripts/extract-cert.c \
-L/oe/build/tmp/work/MACHINE/linux/5.10+gitAUTOINC+b01f250d83-r0/recipe-sysroot/usr//lib \
-lcrypto
As per `make`'s documentation:
LDFLAGS
Extra flags to give to compilers when they are supposed to
invoke the linker, ‘ld’, such as -L. Libraries (-lfoo)
should be added to the LDLIBS variable instead.
LDLIBS
Library flags or names given to compilers when they are
supposed to invoke the linker, ‘ld’. LOADLIBES is a
deprecated (but still supported) alternative to LDLIBS.
Non-library linker flags, such as -L, should go in the
LDFLAGS variable.
Fixes: 2cea4a7a1885 ("scripts: use pkg-config to locate libcrypto")
Cc: stable(a)vger.kernel.org # 5.6.x
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Signed-off-by: Daniel Díaz <daniel.diaz(a)linaro.org>
---
scripts/Makefile | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/scripts/Makefile b/scripts/Makefile
index 9de3c03b94aa..4b4e938b4ba7 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -3,7 +3,8 @@
# scripts contains sources for various helper programs used throughout
# the kernel for the build process.
-CRYPTO_LIBS = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto)
+CRYPTO_LDFLAGS = $(shell pkg-config --libs-only-L libcrypto 2> /dev/null)
+CRYPTO_LDLIBS = $(shell pkg-config --libs-only-l libcrypto 2> /dev/null || echo -lcrypto)
CRYPTO_CFLAGS = $(shell pkg-config --cflags libcrypto 2> /dev/null)
hostprogs-always-$(CONFIG_BUILD_BIN2C) += bin2c
@@ -17,9 +18,11 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include
HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
-HOSTLDLIBS_sign-file = $(CRYPTO_LIBS)
+HOSTLDFLAGS_sign-file = $(CRYPTO_LDFLAGS)
+HOSTLDLIBS_sign-file = $(CRYPTO_LDLIBS)
HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS)
-HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS)
+HOSTLDFLAGS_extract-cert = $(CRYPTO_LDFLAGS)
+HOSTLDLIBS_extract-cert = $(CRYPTO_LDLIBS)
ifdef CONFIG_UNWINDER_ORC
ifeq ($(ARCH),x86_64)
--
2.25.1
As per UAC2 Audio Data Formats spec (2.3.1.1 USB Packets),
if the sampling rate is a constant, the allowable variation
of number of audio slots per virtual frame is +/- 1 audio slot.
It means that endpoint should be able to accept/send +1 audio
slot.
Previous endpoint max_packet_size calculation code
was adding sometimes +1 audio slot due to DIV_ROUND_UP
behaviour which was rounding up to closest integer.
However this doesn't work if the numbers are divisible.
It had no any impact with Linux hosts which ignore
this issue, but in case of more strict Windows it
caused rejected enumeration
Thus always add +1 audio slot to endpoint's max packet size
Fixes: 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth")
Cc: Peter Chen <peter.chen(a)freescale.com>
Cc: <stable(a)vger.kernel.org> #v4.3+
Signed-off-by: Ruslan Bilovol <ruslan.bilovol(a)gmail.com>
---
drivers/usb/gadget/function/f_uac2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 740cb64..c62cccb 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -478,7 +478,7 @@ static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts,
}
max_size_bw = num_channels(chmask) * ssize *
- DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1)));
+ ((srate / (factor / (1 << (ep_desc->bInterval - 1)))) + 1);
ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw,
max_size_ep));
--
1.9.1
From: Catalin Marinas <catalin.marinas(a)arm.com>
The ptrace(PTRACE_PEEKMTETAGS) implementation checks whether the user
page has valid tags (mapped with PROT_MTE) by testing the PG_mte_tagged
page flag. If this bit is cleared, ptrace(PTRACE_PEEKMTETAGS) returns
-EIO.
A newly created (PROT_MTE) mapping points to the zero page which had its
tags zeroed during cpu_enable_mte(). If there were no prior writes to
this mapping, ptrace(PTRACE_PEEKMTETAGS) fails with -EIO since the zero
page does not have the PG_mte_tagged flag set.
Set PG_mte_tagged on the zero page when its tags are cleared during
boot. In addition, to avoid ptrace(PTRACE_PEEKMTETAGS) succeeding on
!PROT_MTE mappings pointing to the zero page, change the
__access_remote_tags() check to (vm_flags & VM_MTE) instead of
PG_mte_tagged.
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE")
Cc: <stable(a)vger.kernel.org> # 5.10.x
Cc: Will Deacon <will(a)kernel.org>
Reported-by: Luis Machado <luis.machado(a)linaro.org>
Tested-by: Luis Machado <luis.machado(a)linaro.org>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Link: https://lore.kernel.org/r/20210210180316.23654-1-catalin.marinas@arm.com
---
arch/arm64/kernel/cpufeature.c | 6 +-----
arch/arm64/kernel/mte.c | 3 ++-
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e99edde..3e6331b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1701,16 +1701,12 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
- static bool cleared_zero_page = false;
-
/*
* Clear the tags in the zero page. This needs to be done via the
* linear map which has the Tagged attribute.
*/
- if (!cleared_zero_page) {
- cleared_zero_page = true;
+ if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags))
mte_clear_page_tags(lm_alias(empty_zero_page));
- }
kasan_init_hw_tags_cpu();
}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index dc9ada6..80b62fe 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -329,11 +329,12 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
* would cause the existing tags to be cleared if the page
* was never mapped with PROT_MTE.
*/
- if (!test_bit(PG_mte_tagged, &page->flags)) {
+ if (!(vma->vm_flags & VM_MTE)) {
ret = -EOPNOTSUPP;
put_page(page);
break;
}
+ WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
/* limit access to the end of the page */
offset = offset_in_page(addr);
--
2.7.4
Good Day Sir/Ms.,
We are pleased to invite you/your company to quote the following
item listed below:
Product/Model No: TM9653 PRESSURE REGULATOR
Product Name:MEKO
Qty. 30 units
Compulsory, kindly send your quotation to: quotation@procurement-
pfizer.com for immediate approval.
Kind Regards,
Albert Bourla
PFIZER B.V Supply Chain Manager
Tel: +31(0)208080 880
ADDRESS: Rivium Westlaan 142, 2909 LD
Capelle aan den IJssel, Netherlands
Userspace has discovered the functionality offered by SYS_kcmp and has
started to depend upon it. In particular, Mesa uses SYS_kcmp for
os_same_file_description() in order to identify when two fd (e.g. device
or dmabuf) point to the same struct file. Since they depend on it for
core functionality, lift SYS_kcmp out of the non-default
CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.
Rasmus Villemoes also pointed out that systemd uses SYS_kcmp to
deduplicate the per-service file descriptor store.
Note that some distributions such as Ubuntu are already enabling
CHECKPOINT_RESTORE in their configs and so, by extension, SYS_kcmp.
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3046
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Will Drewry <wad(a)chromium.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Dave Airlie <airlied(a)gmail.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Lucas Stach <l.stach(a)pengutronix.de>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Cc: Cyrill Gorcunov <gorcunov(a)gmail.com>
Cc: stable(a)vger.kernel.org
Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> # DRM depends on kcmp
Acked-by: Rasmus Villemoes <linux(a)rasmusvillemoes.dk> # systemd uses kcmp
---
v2:
- Default n.
- Borrrow help message from man kcmp.
- Export get_epoll_tfile_raw_ptr() for CONFIG_KCMP
v3:
- Select KCMP for CONFIG_DRM
---
drivers/gpu/drm/Kconfig | 3 +++
fs/eventpoll.c | 4 ++--
include/linux/eventpoll.h | 2 +-
init/Kconfig | 11 +++++++++++
kernel/Makefile | 2 +-
tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
6 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0973f408d75f..af6c6d214d91 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -15,6 +15,9 @@ menuconfig DRM
select I2C_ALGOBIT
select DMA_SHARED_BUFFER
select SYNC_FILE
+# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
+# device and dmabuf fd. Let's make sure that is available for our userspace.
+ select KCMP
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a829af074eb5..3196474cbe24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
return epir;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#ifdef CONFIG_KCMP
static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
{
struct rb_node *rbp;
@@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
return file_raw;
}
-#endif /* CONFIG_CHECKPOINT_RESTORE */
+#endif /* CONFIG_KCMP */
/**
* Adds a new entry to the tail of the list in a lockless way, i.e.
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 0350393465d4..593322c946e6 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -18,7 +18,7 @@ struct file;
#ifdef CONFIG_EPOLL
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#ifdef CONFIG_KCMP
struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
#endif
diff --git a/init/Kconfig b/init/Kconfig
index b77c60f8b963..9cc7436b2f73 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1194,6 +1194,7 @@ endif # NAMESPACES
config CHECKPOINT_RESTORE
bool "Checkpoint/restore support"
select PROC_CHILDREN
+ select KCMP
default n
help
Enables additional kernel features in a sake of checkpoint/restore.
@@ -1737,6 +1738,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool
+config KCMP
+ bool "Enable kcmp() system call" if EXPERT
+ help
+ Enable the kernel resource comparison system call. It provides
+ user-space with the ability to compare two processes to see if they
+ share a common resource, such as a file descriptor or even virtual
+ memory space.
+
+ If unsure, say N.
+
config RSEQ
bool "Enable rseq() system call" if EXPERT
default y
diff --git a/kernel/Makefile b/kernel/Makefile
index aa7368c7eabf..320f1f3941b7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -51,7 +51,7 @@ obj-y += livepatch/
obj-y += dma/
obj-y += entry/
-obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
+obj-$(CONFIG_KCMP) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 26c72f2b61b1..1b6c7d33c4ff 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -315,7 +315,7 @@ TEST(kcmp)
ret = __filecmp(getpid(), getpid(), 1, 1);
EXPECT_EQ(ret, 0);
if (ret != 0 && errno == ENOSYS)
- SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
}
TEST(mode_strict_support)
--
2.20.1