The first kiocb_set_cancel_fn() argument may point at a struct kiocb
that is not embedded inside struct aio_kiocb. With the current code,
depending on the compiler, the req->ki_ctx read happens either before
the IOCB_AIO_RW test or after that test. Move the req->ki_ctx read such
that it is guaranteed that the IOCB_AIO_RW test happens first.
Reported-by: Eric Biggers <ebiggers(a)kernel.org>
Cc: Benjamin LaHaise <ben(a)communityfibre.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: stable(a)vger.kernel.org
Fixes: b820de741ae4 ("fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
fs/aio.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index da18dbcfcb22..9cdaa2faa536 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -589,8 +589,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
- struct kioctx *ctx = req->ki_ctx;
+ struct aio_kiocb *req;
+ struct kioctx *ctx;
unsigned long flags;
/*
@@ -600,9 +600,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
if (!(iocb->ki_flags & IOCB_AIO_RW))
return;
+ req = container_of(iocb, struct aio_kiocb, rw);
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
+ ctx = req->ki_ctx;
+
spin_lock_irqsave(&ctx->ctx_lock, flags);
list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
Currently arm64's switch_mm() doesn't always have an smp_mb()
which the core scheduler code has depended upon since commit:
commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid")
If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear()
can unset the activly used cid when it fails to observe active task after it
sets lazy_put.
By adding an smp_mb() in arm64's check_and_switch_context(),
Guarantee to observe active task after sched_mm_cid_remote_clear()
success to set lazy_put.
Signed-off-by: levi.yun <yeoreum.yun(a)arm.com>
Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid")
Cc: <stable(a)vger.kernel.org> # 6.4.x
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Aaron Lu <aaron.lu(a)intel.com>
---
I'm really sorry if you got this multiple times.
I had some problems with the SMTP server...
arch/arm64/mm/context.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 188197590fc9..7a9e8e6647a0 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -268,6 +268,11 @@ void check_and_switch_context(struct mm_struct *mm)
*/
if (!system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
+
+ /*
+ * See the comments on switch_mm_cid describing user -> user transition.
+ */
+ smp_mb();
}
unsigned long arm64_mm_context_get(struct mm_struct *mm)
--
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
Patch "fs/aio: Make io_cancel() generate completions again" is based on the
assumption that calling kiocb->ki_cancel() does not complete R/W requests.
This is incorrect: the two drivers that call kiocb_set_cancel_fn() callers
set a cancellation function that calls usb_ep_dequeue(). According to its
documentation, usb_ep_dequeue() calls the completion routine with status
-ECONNRESET. Hence this revert.
Cc: Benjamin LaHaise <ben(a)communityfibre.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+b91eb2ed18f599dd3c31(a)syzkaller.appspotmail.com
Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
fs/aio.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 28223f511931..da18dbcfcb22 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2165,11 +2165,14 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
#endif
/* sys_io_cancel:
- * Attempts to cancel an iocb previously passed to io_submit(). If the
- * operation is successfully cancelled 0 is returned. May fail with
- * -EFAULT if any of the data structures pointed to are invalid. May
- * fail with -EINVAL if aio_context specified by ctx_id is invalid. Will
- * fail with -ENOSYS if not implemented.
+ * Attempts to cancel an iocb previously passed to io_submit. If
+ * the operation is successfully cancelled, the resulting event is
+ * copied into the memory pointed to by result without being placed
+ * into the completion queue and 0 is returned. May fail with
+ * -EFAULT if any of the data structures pointed to are invalid.
+ * May fail with -EINVAL if aio_context specified by ctx_id is
+ * invalid. May fail with -EAGAIN if the iocb specified was not
+ * cancelled. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
@@ -2200,12 +2203,14 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
}
spin_unlock_irq(&ctx->ctx_lock);
- /*
- * The result argument is no longer used - the io_event is always
- * delivered via the ring buffer.
- */
- if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
- aio_complete_rw(&kiocb->rw, -EINTR);
+ if (!ret) {
+ /*
+ * The result argument is no longer used - the io_event is
+ * always delivered via the ring buffer. -EINPROGRESS indicates
+ * cancellation is progress:
+ */
+ ret = -EINPROGRESS;
+ }
percpu_ref_put(&ctx->users);
This is the start of the stable review cycle for the 5.15.151 release.
There are 84 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.151-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.151-rc1
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Gal Pressman <gal(a)nvidia.com>
Revert "tls: rx: move counting TlsDecryptErrors for sync"
Jakub Kicinski <kuba(a)kernel.org>
net: tls: fix async vs NIC crypto offload
Martynas Pumputis <m(a)lambda.lt>
bpf: Derive source IP addr via bpf_*_fib_lookup()
Louis DeLosSantos <louis.delos.devel(a)gmail.com>
bpf: Add table ID to bpf_fib_lookup BPF helper
Martin KaFai Lau <martin.lau(a)kernel.org>
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Teach lockdep about icc_bw_lock order"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Fix locking for runpm vs reclaim"
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Drop oob_skb ref before purging queue in GC.
Max Krummenacher <max.krummenacher(a)toradex.com>
Revert "drm/bridge: lt8912b: Register and attach our DSI device at probe"
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Abeni <pabeni(a)redhat.com>
mptcp: push at DSS boundaries
Geliang Tang <tanggeliang(a)kylinos.cn>
mptcp: add needs_id for netlink appending addr
Jean Sacren <sakiwit(a)gmail.com>
mptcp: clean up harmless false expressions
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter in v6
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter
Paolo Abeni <pabeni(a)redhat.com>
mptcp: rename timer related helper to less confusing names
Paolo Abeni <pabeni(a)redhat.com>
mptcp: process pending subflow error on close
Paolo Abeni <pabeni(a)redhat.com>
mptcp: move __mptcp_error_report in protocol.c
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Zong Li <zong.li(a)sifive.com>
riscv: add CALLER_ADDRx support
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Tadeusz Struk <tstruk(a)gigaio.com>
dmaengine: ptdma: use consistent DMA masks
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: firewire-lib: fix to check cycle continuity
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
fbcon: always restore the old font data in fbcon_do_set_font()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Sabrina Dubroca <sd(a)queasysnail.net>
tls: decrement decrypt_pending if no async completion will be called
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: use async as an in-out argument
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: assume crypto always calls our callback
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: move counting TlsDecryptErrors for sync
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't track the async count
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: factor out writing ContentType to cmsg
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: wrap decryption arguments in a structure
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't report text length from the bowels of decrypt
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: drop unnecessary arguments from tls_setup_from_iter()
Jakub Kicinski <kuba(a)kernel.org>
tls: hw: rx: use return value of tls_device_decrypted() to carry status
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: refactor decrypt_skb_update()
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't issue wake ups when data is decrypted
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the decryption status in socket context
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the record type in socket context
Oleksij Rempel <linux(a)rempel-privat.de>
igb: extend PTP timestamp adjustments to i211
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Florian Westphal <fw(a)strlen.de>
netfilter: bridge: confirm multicast packets before passing them up the stack
Florian Westphal <fw(a)strlen.de>
netfilter: let reset rules clean out conntrack entries
Florian Westphal <fw(a)strlen.de>
netfilter: make function op structures const
Florian Westphal <fw(a)strlen.de>
netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
Florian Westphal <fw(a)strlen.de>
netfilter: nfnetlink_queue: silence bogus compiler warning
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Jakub Raczynski <j.raczynski(a)samsung.com>
stmmac: Clear variable when destroying workqueue
Justin Iurman <justin.iurman(a)uliege.be>
uapi: in6: replace temporary label with rfc9486
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Jakub Kicinski <kuba(a)kernel.org>
veth: try harder when allocating queue memory
Vasily Averin <vvs(a)openvz.org>
net: enable memcg accounting for veth queues
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Jakub Kicinski <kuba(a)kernel.org>
net: veth: clear GRO when clearing XDP even when down
Doug Smythies <dsmythies(a)telus.net>
cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: disallow timeout for anonymous sets
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/ftrace.h | 5 +
arch/riscv/include/asm/pgtable.h | 2 +-
arch/riscv/kernel/Makefile | 2 +
arch/riscv/kernel/return_address.c | 48 ++++
arch/x86/kernel/cpu/intel.c | 178 ++++++------
drivers/cpufreq/intel_pstate.c | 3 +
drivers/dma/fsl-qdma.c | 25 +-
drivers/dma/ptdma/ptdma-dmaengine.c | 2 -
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 +-
drivers/interconnect/core.c | 18 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 +++-
drivers/mtd/nand/spi/gigadevice.c | 6 +-
drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +-
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/net/veth.c | 40 +--
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
drivers/video/fbdev/core/fbcon.c | 8 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
include/linux/netfilter.h | 14 +-
include/net/ipv6_stubs.h | 5 +
include/net/netfilter/nf_conntrack.h | 8 +
include/net/strparser.h | 4 +
include/net/tls.h | 11 +-
include/uapi/linux/bpf.h | 37 ++-
include/uapi/linux/in6.h | 2 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/bridge/br_netfilter_hooks.c | 96 +++++++
net/bridge/netfilter/nf_conntrack_bridge.c | 30 ++
net/core/filter.c | 67 ++++-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +-
net/ipv4/netfilter/nf_reject_ipv4.c | 1 +
net/ipv6/addrconf.c | 7 +-
net/ipv6/af_inet6.c | 1 +
net/ipv6/netfilter/nf_reject_ipv6.c | 1 +
net/mptcp/diag.c | 3 +
net/mptcp/pm_netlink.c | 30 +-
net/mptcp/protocol.c | 123 +++++++--
net/mptcp/subflow.c | 36 ---
net/netfilter/core.c | 45 +--
net/netfilter/nf_conntrack_core.c | 21 +-
net/netfilter/nf_conntrack_netlink.c | 4 +-
net/netfilter/nf_conntrack_proto_tcp.c | 35 +++
net/netfilter/nf_nat_core.c | 2 +-
net/netfilter/nf_tables_api.c | 7 +
net/netfilter/nfnetlink_queue.c | 10 +-
net/netfilter/nft_compat.c | 20 ++
net/netlink/af_netlink.c | 2 +-
net/tls/tls_device.c | 6 +-
net/tls/tls_sw.c | 316 ++++++++++------------
net/unix/garbage.c | 22 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
sound/firewire/amdtp-stream.c | 2 +-
tools/include/uapi/linux/bpf.h | 37 ++-
tools/testing/selftests/net/mptcp/config | 2 +
72 files changed, 1046 insertions(+), 529 deletions(-)
Larry Finger <Larry.Finger(a)gmail.com> wrote:
> From: Nick Morrow <morrownr(a)gmail.com>
>
> Add VID/PIDs that are known to be missing for this driver.
>
> Removed /* 8811CU */ and /* 8821CU */ as they are redundant
> since the file is specific to those chips.
>
> Removed /* TOTOLINK A650UA v3 */ as the manufacturer. It has a REALTEK
> VID so it may not be specific to this adapter.
>
> Verified and tested.
>
> Cc: stable(a)vger.kernel.org
> Signed-off-by: Nick Morrow <morrownr(a)gmail.com>
> Signed-off-by: Larry Finger <Larry.Finger(a)lwfinger.net>
> Acked-by: Ping-Ke Shih <pkshih(a)realtek.com>
Patch applied to wireless-next.git, thanks.
b8a62478f3b1 wifi: rtw88: Add missing VID/PIDs for 8811CU and 8821CU
--
https://patchwork.kernel.org/project/linux-wireless/patch/4ume7mjw63u7.XlMU…https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatc…
The patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been added to the -mm mm-unstable branch. Its filename is
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Tue, 5 Mar 2024 15:13:49 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was valid. This wasn't present in get_swap_device() so
I've added it. I couldn't find any existing get_swap_device() call sites
where this extra check would cause any false alarms.
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240305151349.3781428-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1281,7 +1281,9 @@ struct swap_info_struct *get_swap_device
smp_rmb();
offset = swp_offset(entry);
if (offset >= si->max)
- goto put_out;
+ goto bad_offset;
+ if (data_race(!si->swap_map[swp_offset(entry)]))
+ goto bad_free;
return si;
bad_nofile:
@@ -1289,9 +1291,14 @@ bad_nofile:
out:
return NULL;
put_out:
- pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
percpu_ref_put(&si->users);
return NULL;
+bad_offset:
+ pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
+ goto put_out;
+bad_free:
+ pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
+ goto put_out;
}
static unsigned char __swap_entry_free(struct swap_info_struct *p,
@@ -1609,13 +1616,14 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d5566623ad..c37af50f312d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -424,8 +424,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of
mmiowb()") remove all mmiowb() in drivers, but it says:
"NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation."
The mmio in radeon_ring_commit() is protected by a mutex rather than a
spinlock, but in the mutex fastpath it behaves similar to spinlock. We
can add mmiowb() calls in the radeon driver but the maintainer says he
doesn't like such a workaround, and radeon is not the only example of
mutex protected mmio.
So we should extend the mmiowb tracking system from spinlock to mutex,
and maybe other locking primitives. This is not easy and error prone, so
we solve it in the architectural code, by simply defining the __io_aw()
hook as mmiowb(). And we no longer need to override queued_spin_unlock()
so use the generic definition.
Without this, we get such an error when run 'glxgears' on weak ordering
architectures such as LoongArch:
radeon 0000:04:00.0: ring 0 stalled for more than 10324msec
radeon 0000:04:00.0: ring 3 stalled for more than 10240msec
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3)
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.…
Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loon…
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/include/asm/Kbuild | 1 +
arch/loongarch/include/asm/io.h | 2 ++
arch/loongarch/include/asm/qspinlock.h | 18 ------------------
3 files changed, 3 insertions(+), 18 deletions(-)
delete mode 100644 arch/loongarch/include/asm/qspinlock.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index a97c0edbb866..2dbec7853ae8 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
+generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += segment.h
generic-y += user.h
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b66..4a8adcca329b 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#define __io_aw() mmiowb()
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad591..000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- compiletime_assert_atomic_type(lock->locked);
- c_sync();
- WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
--
2.43.0
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reinstate commit 88b065943cb5 ("drm/i915/dsi: Do display on
sequence later on icl+"), for the most part. Turns out some
machines (eg. Chuwi Minibook X) really do need that updated order.
It is also the order the Windows driver uses.
However we can't just undo the revert since that would again
break Lenovo 82TQ. After staring at the VBT sequences for both
machines I've concluded that the Lenovo 82TQ sequences look
somewhat broken:
- INIT_OTP is not present at all
- what should be in INIT_OTP is found in DISPLAY_ON
- what should be in DISPLAY_ON is found in BACKLIGHT_ON
(along with the actual backlight stuff)
The Chuwi Minibook X on the other hand has a full complement
of sequences in its VBT.
So let's try to deal with the broken sequences in the
Lenovo 82TQ VBT by simply swapping the (non-existent)
INIT_OTP sequence with the DISPLAY_ON sequence. Thus we
execute DISPLAY_ON when intending to execute INIT_OTP,
and execute nothing at all when intending to execute
DISPLAY_ON. That should be 100% equivalent to the
revert, for such broken VBTs.
Cc: stable(a)vger.kernel.org
Fixes: dc524d05974f ("Revert "drm/i915/dsi: Do display on sequence later on icl+"")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/10071
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10334
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/icl_dsi.c | 3 +-
drivers/gpu/drm/i915/display/intel_bios.c | 43 +++++++++++++++++++----
2 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index eda4a8b88590..ac456a2275db 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 343726de9aa7..373291d10af9 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915,
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
*/
-static void fixup_mipi_sequences(struct drm_i915_private *i915,
- struct intel_panel *panel)
+static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
{
u8 *init_otp;
int len;
- /* Limit this to VLV for now. */
- if (!IS_VALLEYVIEW(i915))
- return;
-
/* Limit this to v1 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
panel->vbt.dsi.seq_version != 1)
@@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915,
panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
}
+/*
+ * Some machines (eg. Lenovo 82TQ) appear to have broken
+ * VBT sequences:
+ * - INIT_OTP is not present at all
+ * - what should be in INIT_OTP is in DISPLAY_ON
+ * - what should be in DISPLAY_ON is in BACKLIGHT_ON
+ * (along with the actual backlight stuff)
+ *
+ * To make those work we simply swap DISPLAY_ON and INIT_OTP.
+ *
+ * TODO: Do we need to limit this to specific machines,
+ * or examine the contents of the sequences to
+ * avoid false positives?
+ */
+static void icl_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] &&
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) {
+ drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n");
+
+ swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP],
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]);
+ }
+}
+
+static void fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (DISPLAY_VER(i915) >= 11)
+ icl_fixup_mipi_sequences(i915, panel);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_fixup_mipi_sequences(i915, panel);
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *i915,
struct intel_panel *panel)
--
2.43.0
With the to-be-fixed commit, the reset_work handler cleared 'host->mrq'
outside of the spinlock protected critical section. That leaves a small
race window during execution of 'tmio_mmc_reset()' where the done_work
handler could grab a pointer to the now invalid 'host->mrq'. Both would
use it to call mmc_request_done() causing problems (see link below).
However, 'host->mrq' cannot simply be cleared earlier inside the
critical section. That would allow new mrqs to come in asynchronously
while the actual reset of the controller still needs to be done. So,
like 'tmio_mmc_set_ios()', an ERR_PTR is used to prevent new mrqs from
coming in but still avoiding concurrency between work handlers.
Reported-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Closes: https://lore.kernel.org/all/20240220061356.3001761-1-dirk.behme@de.bosch.co…
Fixes: df3ef2d3c92c ("mmc: protect the tmio_mmc driver against a theoretical race")
Signed-off-by: Wolfram Sang <wsa+renesas(a)sang-engineering.com>
Tested-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Reviewed-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Cc: stable(a)vger.kernel.org # 3.0+
---
Change since v1/RFT: added Dirk's tags and stable tag
@Ulf: this is nasty, subtle stuff. Would be awesome to have it in 6.8
already!
drivers/mmc/host/tmio_mmc_core.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index be7f18fd4836..c253d176db69 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work)
else
mrq->cmd->error = -ETIMEDOUT;
+ /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */
+ host->mrq = ERR_PTR(-EBUSY);
host->cmd = NULL;
host->data = NULL;
--
2.43.0
If VM_BIND is enabled on the client the legacy submission ioctl can't be
used, however if a client tries to do so regardless it will return an
error. In this case the clients mutex remained unlocked leading to a
deadlock inside nouveau_drm_postclose or any other nouveau ioctl call.
Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI")
Cc: Danilo Krummrich <dakr(a)redhat.com>
Cc: <stable(a)vger.kernel.org> # v6.6+
Signed-off-by: Karol Herbst <kherbst(a)redhat.com>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Reviewed-by: Danilo Krummrich <dakr(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240304183157.1587152-1-kher…
---
drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 49c2bcbef1299..5a887d67dc0e8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -764,7 +764,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
return -ENOMEM;
if (unlikely(nouveau_cli_uvmm(cli)))
- return -ENOSYS;
+ return nouveau_abi16_put(abi16, -ENOSYS);
list_for_each_entry(temp, &abi16->channels, head) {
if (temp->chan->chid == req->channel) {
--
2.44.0
Ring expansion checker may incorrectly assume a completely full ring
is empty, missing the need for expansion.
This is due to a special empty ring case where the dequeue ends up
ahead of the enqueue pointer. This is seen when enqueued TRBs fill up
exactly a segment, with enqueue then pointing to the end link TRB.
Once those TRBs are handled the dequeue pointer will follow the link
TRB and end up pointing to the first entry on the next segment, past
the enqueue.
This same enqueue - dequeue condition can be true if a ring is full,
with enqueue ending on that last link TRB before the dequeue pointer
on the next segment.
This can be seen when queuing several ~510 small URBs via usbfs in
one go before a single one is handled (i.e. dequeue not moved from first
entry in segment).
Expand the ring already when enqueue reaches the link TRB before the
dequeue segment, instead of expanding it when enqueue moves into the
dequeue segment.
Reported-by: Chris Yokum <linux-usb(a)mail.totalphase.com>
Closes: https://lore.kernel.org/all/949223224.833962.1709339266739.JavaMail.zimbra@…
Tested-by: Chris Yokum <linux-usb(a)mail.totalphase.com>
Fixes: f5af638f0609 ("xhci: Fix transfer ring expansion size calculation")
Cc: stable(a)vger.kernel.org # v6.5+
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f0d8a607ff21..4f64b814d4aa 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -326,7 +326,13 @@ static unsigned int xhci_ring_expansion_needed(struct xhci_hcd *xhci, struct xhc
/* how many trbs will be queued past the enqueue segment? */
trbs_past_seg = enq_used + num_trbs - (TRBS_PER_SEGMENT - 1);
- if (trbs_past_seg <= 0)
+ /*
+ * Consider expanding the ring already if num_trbs fills the current
+ * segment (i.e. trbs_past_seg == 0), not only when num_trbs goes into
+ * the next segment. Avoids confusing full ring with special empty ring
+ * case below
+ */
+ if (trbs_past_seg < 0)
return 0;
/* Empty ring special case, enqueue stuck on link trb while dequeue advanced */
--
2.25.1
This is the start of the stable review cycle for the 5.10.212 release.
There are 42 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.212-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.212-rc1
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Chuanhong Guo <gch981213(a)gmail.com>
mtd: spinand: gigadevice: fix Quad IO for GD5F1GQ5UExxG
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Zong Li <zong.li(a)sifive.com>
riscv: add CALLER_ADDRx support
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Reto Schneider <reto.schneider(a)husqvarnagroup.com>
mtd: spinand: gigadevice: Support GD5F1GQ5UExxG
zhenwei pi <pizhenwei(a)bytedance.com>
crypto: virtio/akcipher - Fix stack overflow on memcpy
Hans de Goede <hdegoede(a)redhat.com>
platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/ftrace.h | 5 +
arch/riscv/include/asm/pgtable.h | 2 +-
arch/riscv/kernel/Makefile | 2 +
arch/riscv/kernel/return_address.c | 48 ++++++
arch/x86/kernel/cpu/intel.c | 178 +++++++++++----------
.../crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +-
drivers/dma/fsl-qdma.c | 25 +--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 ++++--
drivers/mtd/nand/spi/gigadevice.c | 81 ++++++++--
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/platform/x86/touchscreen_dmi.c | 4 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 ++-
fs/cachefiles/bind.c | 3 +
fs/ext4/mballoc.c | 39 ++---
fs/hugetlbfs/inode.c | 6 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +++-
net/ipv6/addrconf.c | 7 +-
net/mptcp/diag.c | 3 +
net/mptcp/protocol.c | 49 ++++++
net/netfilter/nft_compat.c | 20 +++
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
39 files changed, 485 insertions(+), 196 deletions(-)
[BUG]
When using zoned devices (zbc), scrub would always report super block
errors like the following:
# btrfs scrub start -fB /mnt/btrfs/
Starting scrub on devid 1
scrub done for b7b5c759-1baa-4561-a0ca-b8d0babcde56
Scrub started: Tue Mar 5 12:49:14 2024
Status: finished
Duration: 0:00:00
Total to scrub: 288.00KiB
Rate: 288.00KiB/s
Error summary: super=2
Corrected: 0
Uncorrectable: 0
Unverified: 0
[CAUSE]
Since the very beginning of scrub, we always go with btrfs_sb_offset()
to grab the super blocks.
This is fine for regular btrfs filesystems, but for zoned btrfs, super
blocks are stored in dedicated zones with a ring buffer like structure.
This means the old btrfs_sb_offset() is not able to give the correct
bytenr for us to grabbing the super blocks, thus except the primary
super block, the rest would be garbage and cause the above false alerts.
[FIX]
Instead of btrfs_sb_offset(), go with btrfs_sb_log_location() which is
zoned friendly, to grab the correct super block location.
This would introduce new error patterns, as btrfs_sb_log_location() can
fail with extra errors.
Here for -ENOENT we just end the scrub as there are no more super
blocks.
For other errors, we record it as a super block error and exit.
Reported-by: WA AM <waautomata(a)gmail.com>
Link: https://lore.kernel.org/all/CANU2Z0EvUzfYxczLgGUiREoMndE9WdQnbaawV5Fv5gNXpt…
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: Johannes Thumshirn <Johannes.Thumshirn(a)wdc.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/scrub.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
---
Changelog:
v2:
- Use READ to replace the number 0
- Continue checking the next super block if we hit a non-ENOENT error
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4bd0e60db59..201b547aac4c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2788,7 +2788,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev)
{
int i;
- u64 bytenr;
u64 gen;
int ret = 0;
struct page *page;
@@ -2812,7 +2811,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
+ u64 bytenr;
+
+ ret = btrfs_sb_log_location(scrub_dev, i, READ, &bytenr);
+ if (ret == -ENOENT)
+ break;
+ if (ret < 0) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.super_errors++;
+ spin_unlock(&sctx->stat_lock);
+ continue;
+ }
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
--
2.44.0
[BUG]
When using zoned devices (zbc), scrub would always report super block
errors like the following:
# btrfs scrub start -fB /mnt/btrfs/
Starting scrub on devid 1
scrub done for b7b5c759-1baa-4561-a0ca-b8d0babcde56
Scrub started: Tue Mar 5 12:49:14 2024
Status: finished
Duration: 0:00:00
Total to scrub: 288.00KiB
Rate: 288.00KiB/s
Error summary: super=2
Corrected: 0
Uncorrectable: 0
Unverified: 0
[CAUSE]
Since the very beginning of scrub, we always go with btrfs_sb_offset()
to grab the super blocks.
This is fine for regular btrfs filesystems, but for zoned btrfs, super
blocks are stored in dedicated zones with a ring buffer like structure.
This means the old btrfs_sb_offset() is not able to give the correct
bytenr for us to grabbing the super blocks, thus except the primary
super block, the rest would be garbage and cause the above false alerts.
[FIX]
Instead of btrfs_sb_offset(), go with btrfs_sb_log_location() which is
zoned friendly, to grab the correct super block location.
This would introduce new error patterns, as btrfs_sb_log_location() can
fail with extra errors.
Here for -ENOENT we just end the scrub as there are no more super
blocks.
For other errors, we record it as a super block error and exit.
Reported-by: WA AM <waautomata(a)gmail.com>
Link: https://lore.kernel.org/all/CANU2Z0EvUzfYxczLgGUiREoMndE9WdQnbaawV5Fv5gNXpt…
CC: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Johannes Thumshirn <Johannes.Thumshirn(a)wdc.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/scrub.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4bd0e60db59..e1b67baa4072 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2788,7 +2788,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev)
{
int i;
- u64 bytenr;
u64 gen;
int ret = 0;
struct page *page;
@@ -2812,7 +2811,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
+ u64 bytenr;
+
+ ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
+ if (ret == -ENOENT)
+ break;
+ if (ret < 0) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.super_errors++;
+ spin_unlock(&sctx->stat_lock);
+ break;
+ }
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
--
2.44.0
There are indications that ASPM L0s is not working very well on this
machine so disable it also for the NVMe and modem controllers for now.
Note that this is done as a precaution based on problems with the Wi-Fi
on the X13s as well as the NVMe, modem and Wi-Fi on the sc8280xp-crd
reference design (the NVMe controller on my X13s does not support L0s
and the machine lacks a modem).
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index 9567b82db9a5..057e4d9d3c0f 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -695,6 +695,8 @@ keyboard@68 {
};
&pcie2a {
+ aspm-no-l0s;
+
perst-gpios = <&tlmm 143 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 145 GPIO_ACTIVE_LOW>;
@@ -714,6 +716,8 @@ &pcie2a_phy {
};
&pcie3a {
+ aspm-no-l0s;
+
perst-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
--
2.43.0
There are indications that ASPM L0s is not working very well on this
machine so disable it also for the modem and Wi-Fi controllers for now.
This specifically avoids having the modem and Wi-Fi controllers bounce
in an out of L0s when not used (the modem still bounces in and out of
L1) as well as intermittent Correctable errors on the Wi-Fi link when
not used.
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
arch/arm64/boot/dts/qcom/sc8280xp-crd.dts | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
index 7e94a68d5d9f..8fc0380f65a0 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
@@ -546,6 +546,8 @@ &pcie2a_phy {
};
&pcie3a {
+ aspm-no-l0s;
+
perst-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
@@ -566,6 +568,7 @@ &pcie3a_phy {
&pcie4 {
max-link-speed = <2>;
+ aspm-no-l0s;
perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>;
--
2.43.0
Commit 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting
1.9.0 ops") started enabling ASPM unconditionally when the hardware
claims to support it. This triggers Correctable Errors for some PCIe
devices on machines like the Lenovo ThinkPad X13s, which could indicate
an incomplete driver ASPM implementation or that the hardware does in
fact not support L0s.
Add support for disabling ASPM L0s in the devicetree when it is not
supported on a particular machine and controller.
Note that only the 1.9.0 ops enable ASPM currently.
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-qcom.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 09d485df34b9..0fb5dc06d2ef 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -273,6 +273,25 @@ static int qcom_pcie_start_link(struct dw_pcie *pci)
return 0;
}
+static void qcom_pcie_clear_aspm_l0s(struct dw_pcie *pci)
+{
+ u16 offset;
+ u32 val;
+
+ if (!of_property_read_bool(pci->dev->of_node, "aspm-no-l0s"))
+ return;
+
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+ dw_pcie_dbi_ro_wr_en(pci);
+
+ val = readl(pci->dbi_base + offset + PCI_EXP_LNKCAP);
+ val &= ~PCI_EXP_LNKCAP_ASPM_L0S;
+ writel(val, pci->dbi_base + offset + PCI_EXP_LNKCAP);
+
+ dw_pcie_dbi_ro_wr_dis(pci);
+}
+
static void qcom_pcie_clear_hpc(struct dw_pcie *pci)
{
u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
@@ -962,6 +981,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
{
+ qcom_pcie_clear_aspm_l0s(pcie->pci);
qcom_pcie_clear_hpc(pcie->pci);
return 0;
--
2.43.0
Since kernel version 5.4.217 LTS, there has been an issue with the kernel live patching feature becoming unavailable.
When compiling the sample code for kernel live patching, the following message is displayed when enabled:
livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
Reproduction steps:
1.git checkout v5.4.269 -b v5.4.269
2.make defconfig
3. Set CONFIG_LIVEPATCH=y、CONFIG_SAMPLE_LIVEPATCH=m
4. make -j bzImage
5. make samples/livepatch/livepatch-sample.ko
6. qemu-system-x86_64 -kernel arch/x86_64/boot/bzImage -nographic -append "console=ttyS0" -initrd initrd.img -m 1024M
7. insmod livepatch-sample.ko
Kernel live patch cannot complete successfully.
After some debugging, the immediate cause of the patch failure is an error in stack checking. The logs are as follows:
[ 340.974853] livepatch: klp_check_stack: kworker/u256:0:23486 has an unreliable stack
[ 340.974858] livepatch: klp_check_stack: kworker/u256:1:23487 has an unreliable stack
[ 340.974863] livepatch: klp_check_stack: kworker/u256:2:23488 has an unreliable stack
[ 340.974868] livepatch: klp_check_stack: kworker/u256:5:23489 has an unreliable stack
[ 340.974872] livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
......
BTW,if you use the v5.4.217 tag for testing, make sure to set CONFIG_RETPOLINE = y and CONFIG_LIVEPATCH = y, and other steps are consistent with v5.4.269
After investigation, The problem is strongly related to the commit 8afd1c7da2b0 ("x86/speculation: Change FILL_RETURN_BUFFER to work with objtool"),
which would cause incorrect ORC entries to be generated, and the v5.4.217 version can undo this commit to make kernel livepatch work normally.
It is a back-ported upstream patch with some code adjustments,from the git log, the author also mentioned no intra-function call validation support.
Based on commit 6e1f54a4985b63bc1b55a09e5e75a974c5d6719b (Linux 5.4.269), This patchset adds stack validation support for intra-function calls,
allowing the kernel live patching feature to work correctly.
Alexandre Chartre (2):
objtool: is_fentry_call() crashes if call has no destination
objtool: Add support for intra-function calls
Rui Qi (1):
x86/speculation: Support intra-function call validation
arch/x86/include/asm/nospec-branch.h | 7 ++
include/linux/frame.h | 11 ++++
.../Documentation/stack-validation.txt | 8 +++
tools/objtool/arch/x86/decode.c | 6 ++
tools/objtool/check.c | 64 +++++++++++++++++--
5 files changed, 91 insertions(+), 5 deletions(-)
--
2.39.2 (Apple Git-143)
> From: Parav Pandit <parav(a)nvidia.com>
> Sent: Tuesday, March 5, 2024 12:06 PM
>
> When the PCI device is surprise removed, requests won't complete from the
> device. These IOs are never completed and disk deletion hangs indefinitely.
>
> Fix it by aborting the IOs which the device will never complete when the VQ is
> broken.
>
> With this fix now fio completes swiftly.
> An alternative of IO timeout has been considered, however timeout can take
> very long time. For unresponsive device, quickly completing the request with
> error enables users and upper layer to react quickly.
>
> Verified with multiple device unplug cycles with pending IOs in virtio used ring
> and some pending with device.
>
> Also verified without surprise removal.
>
> Fixes: 43bb40c5b926 ("virtio_pci: Support surprise removal of virtio pci
> device")
> Cc: stable(a)vger.kernel.org
> Reported-by: lirongqing(a)baidu.com
> Closes:
> https://lore.kernel.org/virtualization/c45dd68698cd47238c55fb73ca9b4741
> @baidu.com/
> Co-developed-by: Chaitanya Kulkarni <kch(a)nvidia.com>
> Signed-off-by: Chaitanya Kulkarni <kch(a)nvidia.com>
> Signed-off-by: Parav Pandit <parav(a)nvidia.com>
> ---
Please ignore this patch, I am still debugging and verifying it.
Incorrectly it got CCed to stable.
I am sorry for the noise.
> changelog:
> v0->v1:
> - addressed comments from Ming and Michael
> - changed the flow to not depend on broken vq anymore to avoid
> the race of missing the detection
> - flow updated to quiesce request queue and device, followed by
> syncing with any ongoing interrupt handler or callbacks,
> finally finishing the requests which are not completed by device
> with error.
> ---
> drivers/block/virtio_blk.c | 46 +++++++++++++++++++++++++++++++++++-
> --
> 1 file changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index
> 2bf14a0e2815..1956172b4b1a 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -1562,9 +1562,52 @@ static int virtblk_probe(struct virtio_device *vdev)
> return err;
> }
>
> +static bool virtblk_cancel_request(struct request *rq, void *data) {
> + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
> +
> + vbr->in_hdr.status = VIRTIO_BLK_S_IOERR;
> + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq))
> + blk_mq_complete_request(rq);
> +
> + return true;
> +}
> +
> static void virtblk_remove(struct virtio_device *vdev) {
> struct virtio_blk *vblk = vdev->priv;
> + int i;
> +
> + /* Block upper layer to not get any new requests */
> + blk_mq_quiesce_queue(vblk->disk->queue);
> +
> + mutex_lock(&vblk->vdev_mutex);
> +
> + /* Stop all the virtqueues and configuration change notification and
> also
> + * synchronize with pending interrupt handlers.
> + */
> + virtio_reset_device(vdev);
> +
> + mutex_unlock(&vblk->vdev_mutex);
> +
> + /* Syncronize with any callback handlers for request completion */
> + for (i = 0; i < vblk->num_vqs; i++)
> + virtblk_done(vblk->vqs[i].vq);
> +
> + blk_sync_queue(vblk->disk->queue);
> +
> + /* At this point block layer and device/transport are quiet;
> + * hence, safely complete all the pending requests with error.
> + */
> + blk_mq_tagset_busy_iter(&vblk->tag_set, virtblk_cancel_request,
> vblk);
> + blk_mq_tagset_wait_completed_request(&vblk->tag_set);
> +
> + /*
> + * Unblock any pending dispatch I/Os before we destroy device. From
> + * del_gendisk() -> __blk_mark_disk_dead(disk) will set GD_DEAD flag,
> + * that will make sure any new I/O from bio_queue_enter() to fail.
> + */
> + blk_mq_unquiesce_queue(vblk->disk->queue);
>
> /* Make sure no work handler is accessing the device. */
> flush_work(&vblk->config_work);
> @@ -1574,9 +1617,6 @@ static void virtblk_remove(struct virtio_device
> *vdev)
>
> mutex_lock(&vblk->vdev_mutex);
>
> - /* Stop all the virtqueues. */
> - virtio_reset_device(vdev);
> -
> /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
> vblk->vdev = NULL;
>
> --
> 2.34.1
The quilt patch titled
Subject: init/Kconfig: lower GCC version check for -Warray-bounds
has been removed from the -mm tree. Its filename was
init-kconfig-lower-gcc-version-check-for-warray-bounds.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kees Cook <keescook(a)chromium.org>
Subject: init/Kconfig: lower GCC version check for -Warray-bounds
Date: Fri, 23 Feb 2024 09:08:27 -0800
We continue to see false positives from -Warray-bounds even in GCC 10,
which is getting reported in a few places[1] still:
security/security.c:811:2: warning: `memcpy' offset 32 is out of the bounds [0, 0] [-Warray-bounds]
Lower the GCC version check from 11 to 10.
Link: https://lkml.kernel.org/r/20240223170824.work.768-kees@kernel.org
Reported-by: Lu Yao <yaolu(a)kylinos.cn>
Closes: https://lore.kernel.org/lkml/20240117014541.8887-1-yaolu@kylinos.cn/
Link: https://lore.kernel.org/linux-next/65d84438.620a0220.7d171.81a7@mx.google.c… [1]
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Reviewed-by: Paul Moore <paul(a)paul-moore.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: "Gustavo A. R. Silva" <gustavoars(a)kernel.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Marc Aur��le La France <tsi(a)tuyoix.net>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
init/Kconfig | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/init/Kconfig~init-kconfig-lower-gcc-version-check-for-warray-bounds
+++ a/init/Kconfig
@@ -876,14 +876,14 @@ config CC_IMPLICIT_FALLTHROUGH
default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
-# Currently, disable gcc-11+ array-bounds globally.
+# Currently, disable gcc-10+ array-bounds globally.
# It's still broken in gcc-13, so no upper bound yet.
-config GCC11_NO_ARRAY_BOUNDS
+config GCC10_NO_ARRAY_BOUNDS
def_bool y
config CC_NO_ARRAY_BOUNDS
bool
- default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
+ default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS
# Currently, disable -Wstringop-overflow for GCC globally.
config GCC_NO_STRINGOP_OVERFLOW
_
Patches currently in -mm which might be from keescook(a)chromium.org are
The quilt patch titled
Subject: mm, mmap: fix vma_merge() case 7 with vma_ops->close
has been removed from the -mm tree. Its filename was
mm-mmap-fix-vma_merge-case-7-with-vma_ops-close.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, mmap: fix vma_merge() case 7 with vma_ops->close
Date: Thu, 22 Feb 2024 22:59:31 +0100
When debugging issues with a workload using SysV shmem, Michal Hocko has
come up with a reproducer that shows how a series of mprotect() operations
can result in an elevated shm_nattch and thus leak of the resource.
The problem is caused by wrong assumptions in vma_merge() commit
714965ca8252 ("mm/mmap: start distinguishing if vma can be removed in
mergeability test"). The shmem vmas have a vma_ops->close callback that
decrements shm_nattch, and we remove the vma without calling it.
vma_merge() has thus historically avoided merging vma's with
vma_ops->close and commit 714965ca8252 was supposed to keep it that way.
It relaxed the checks for vma_ops->close in can_vma_merge_after() assuming
that it is never called on a vma that would be a candidate for removal.
However, the vma_merge() code does also use the result of this check in
the decision to remove a different vma in the merge case 7.
A robust solution would be to refactor vma_merge() code in a way that the
vma_ops->close check is only done for vma's that are actually going to be
removed, and not as part of the preliminary checks. That would both solve
the existing bug, and also allow additional merges that the checks
currently prevent unnecessarily in some cases.
However to fix the existing bug first with a minimized risk, and for
easier stable backports, this patch only adds a vma_ops->close check to
the buggy case 7 specifically. All other cases of vma removal are covered
by the can_vma_merge_before() check that includes the test for
vma_ops->close.
The reproducer code, adapted from Michal Hocko's code:
int main(int argc, char *argv[]) {
int segment_id;
size_t segment_size = 20 * PAGE_SIZE;
char * sh_mem;
struct shmid_ds shmid_ds;
key_t key = 0x1234;
segment_id = shmget(key, segment_size,
IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
sh_mem = (char *)shmat(segment_id, NULL, 0);
mprotect(sh_mem + 2*PAGE_SIZE, PAGE_SIZE, PROT_NONE);
mprotect(sh_mem + PAGE_SIZE, PAGE_SIZE, PROT_WRITE);
mprotect(sh_mem + 2*PAGE_SIZE, PAGE_SIZE, PROT_WRITE);
shmdt(sh_mem);
shmctl(segment_id, IPC_STAT, &shmid_ds);
printf("nattch after shmdt(): %lu (expected: 0)\n", shmid_ds.shm_nattch);
if (shmctl(segment_id, IPC_RMID, 0))
printf("IPCRM failed %d\n", errno);
return (shmid_ds.shm_nattch) ? 1 : 0;
}
Link: https://lkml.kernel.org/r/20240222215930.14637-2-vbabka@suse.cz
Fixes: 714965ca8252 ("mm/mmap: start distinguishing if vma can be removed in mergeability test")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mmap.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
--- a/mm/mmap.c~mm-mmap-fix-vma_merge-case-7-with-vma_ops-close
+++ a/mm/mmap.c
@@ -954,13 +954,21 @@ static struct vm_area_struct
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
+ /*
+ * can_vma_merge_after() assumed we would not be
+ * removing prev vma, so it skipped the check
+ * for vm_ops->close, but we are removing curr
+ */
+ if (curr->vm_ops && curr->vm_ops->close)
+ err = -EINVAL;
remove = curr;
} else { /* case 5 */
adjust = curr;
adj_start = (end - curr->vm_start);
}
+ if (!err)
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else { /* merge_next */
vma_start_write(next);
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
The quilt patch titled
Subject: mm: userfaultfd: fix unexpected change to src_folio when UFFDIO_MOVE fails
has been removed from the -mm tree. Its filename was
mm-userfaultfd-fix-unexpected-change-to-src_folio-when-uffdio_move-fails.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Qi Zheng <zhengqi.arch(a)bytedance.com>
Subject: mm: userfaultfd: fix unexpected change to src_folio when UFFDIO_MOVE fails
Date: Thu, 22 Feb 2024 16:08:15 +0800
After ptep_clear_flush(), if we find that src_folio is pinned we will fail
UFFDIO_MOVE and put src_folio back to src_pte entry, but the change to
src_folio->{mapping,index} is not restored in this process. This is not
what we expected, so fix it.
This can cause the rmap for that page to be invalid, possibly resulting
in memory corruption. At least swapout+migration would no longer work,
because we might fail to locate the mappings of that folio.
Link: https://lkml.kernel.org/r/20240222080815.46291-1-zhengqi.arch@bytedance.com
Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/userfaultfd.c~mm-userfaultfd-fix-unexpected-change-to-src_folio-when-uffdio_move-fails
+++ a/mm/userfaultfd.c
@@ -914,9 +914,6 @@ static int move_present_pte(struct mm_st
goto out;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
orig_src_pte = ptep_clear_flush(src_vma, src_addr, src_pte);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -925,6 +922,9 @@ static int move_present_pte(struct mm_st
goto out;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
_
Patches currently in -mm which might be from zhengqi.arch(a)bytedance.com are
mm-pgtable-correct-the-wrong-comment-about-ptdesc-__page_flags.patch
mm-pgtable-add-missing-pt_index-to-struct-ptdesc.patch
s390-supplement-for-ptdesc-conversion.patch
The quilt patch titled
Subject: mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
has been removed from the -mm tree. Its filename was
mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
Date: Wed, 21 Feb 2024 12:43:58 +0100
Sven reports an infinite loop in __alloc_pages_slowpath() for costly order
__GFP_RETRY_MAYFAIL allocations that are also GFP_NOIO. Such combination
can happen in a suspend/resume context where a GFP_KERNEL allocation can
have __GFP_IO masked out via gfp_allowed_mask.
Quoting Sven:
1. try to do a "costly" allocation (order > PAGE_ALLOC_COSTLY_ORDER)
with __GFP_RETRY_MAYFAIL set.
2. page alloc's __alloc_pages_slowpath tries to get a page from the
freelist. This fails because there is nothing free of that costly
order.
3. page alloc tries to reclaim by calling __alloc_pages_direct_reclaim,
which bails out because a zone is ready to be compacted; it pretends
to have made a single page of progress.
4. page alloc tries to compact, but this always bails out early because
__GFP_IO is not set (it's not passed by the snd allocator, and even
if it were, we are suspending so the __GFP_IO flag would be cleared
anyway).
5. page alloc believes reclaim progress was made (because of the
pretense in item 3) and so it checks whether it should retry
compaction. The compaction retry logic thinks it should try again,
because:
a) reclaim is needed because of the early bail-out in item 4
b) a zonelist is suitable for compaction
6. goto 2. indefinite stall.
(end quote)
The immediate root cause is confusing the COMPACT_SKIPPED returned from
__alloc_pages_direct_compact() (step 4) due to lack of __GFP_IO to be
indicating a lack of order-0 pages, and in step 5 evaluating that in
should_compact_retry() as a reason to retry, before incrementing and
limiting the number of retries. There are however other places that
wrongly assume that compaction can happen while we lack __GFP_IO.
To fix this, introduce gfp_compaction_allowed() to abstract the __GFP_IO
evaluation and switch the open-coded test in try_to_compact_pages() to use
it.
Also use the new helper in:
- compaction_ready(), which will make reclaim not bail out in step 3, so
there's at least one attempt to actually reclaim, even if chances are
small for a costly order
- in_reclaim_compaction() which will make should_continue_reclaim()
return false and we don't over-reclaim unnecessarily
- in __alloc_pages_slowpath() to set a local variable can_compact,
which is then used to avoid retrying reclaim/compaction for costly
allocations (step 5) if we can't compact and also to skip the early
compaction attempt that we do in some cases
Link: https://lkml.kernel.org/r/20240221114357.13655-2-vbabka@suse.cz
Fixes: 3250845d0526 ("Revert "mm, oom: prevent premature OOM killer invocation for high order request"")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Sven van Ashbrook <svenva(a)chromium.org>
Closes: https://lore.kernel.org/all/CAG-rBihs_xMKb3wrMO1%2B-%2Bp4fowP9oy1pa_OTkfxBz…
Tested-by: Karthikeyan Ramasubramanian <kramasub(a)chromium.org>
Cc: Brian Geffon <bgeffon(a)google.com>
Cc: Curtis Malainey <cujomalainey(a)chromium.org>
Cc: Jaroslav Kysela <perex(a)perex.cz>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Takashi Iwai <tiwai(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/gfp.h | 9 +++++++++
mm/compaction.c | 7 +------
mm/page_alloc.c | 10 ++++++----
mm/vmscan.c | 5 ++++-
4 files changed, 20 insertions(+), 11 deletions(-)
--- a/include/linux/gfp.h~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/include/linux/gfp.h
@@ -353,6 +353,15 @@ static inline bool gfp_has_io_fs(gfp_t g
return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}
+/*
+ * Check if the gfp flags allow compaction - GFP_NOIO is a really
+ * tricky context because the migration might require IO.
+ */
+static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
+{
+ return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
+}
+
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
--- a/mm/compaction.c~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/mm/compaction.c
@@ -2723,16 +2723,11 @@ enum compact_result try_to_compact_pages
unsigned int alloc_flags, const struct alloc_context *ac,
enum compact_priority prio, struct page **capture)
{
- int may_perform_io = (__force int)(gfp_mask & __GFP_IO);
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- /*
- * Check if the GFP flags allow compaction - GFP_NOIO is really
- * tricky context because the migration might require IO
- */
- if (!may_perform_io)
+ if (!gfp_compaction_allowed(gfp_mask))
return COMPACT_SKIPPED;
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
--- a/mm/page_alloc.c~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/mm/page_alloc.c
@@ -4041,6 +4041,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+ bool can_compact = gfp_compaction_allowed(gfp_mask);
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
@@ -4111,7 +4112,7 @@ restart:
* Don't try this for allocations that are allowed to ignore
* watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
- if (can_direct_reclaim &&
+ if (can_direct_reclaim && can_compact &&
(costly_order ||
(order > 0 && ac->migratetype != MIGRATE_MOVABLE))
&& !gfp_pfmemalloc_allowed(gfp_mask)) {
@@ -4209,9 +4210,10 @@ retry:
/*
* Do not retry costly high order allocations unless they are
- * __GFP_RETRY_MAYFAIL
+ * __GFP_RETRY_MAYFAIL and we can compact
*/
- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+ if (costly_order && (!can_compact ||
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -4224,7 +4226,7 @@ retry:
* implementation of the compaction depends on the sufficient amount
* of free memory (see __compaction_suitable)
*/
- if (did_some_progress > 0 &&
+ if (did_some_progress > 0 && can_compact &&
should_compact_retry(ac, order, alloc_flags,
compact_result, &compact_priority,
&compaction_retries))
--- a/mm/vmscan.c~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/mm/vmscan.c
@@ -5753,7 +5753,7 @@ static void shrink_lruvec(struct lruvec
/* Use reclaim/compaction for costly allocs or under memory pressure */
static bool in_reclaim_compaction(struct scan_control *sc)
{
- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
+ if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
sc->priority < DEF_PRIORITY - 2))
return true;
@@ -5998,6 +5998,9 @@ static inline bool compaction_ready(stru
{
unsigned long watermark;
+ if (!gfp_compaction_allowed(sc->gfp_mask))
+ return false;
+
/* Allocation can already succeed, nothing to do */
if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
sc->reclaim_idx, 0))
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
[apanyaki: backport to v5.10-stable]
Signed-off-by: Andrew Paniakin <apanyaki(a)amazon.com>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 24e39984914f..f608663bdfd5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -885,8 +885,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -929,8 +929,6 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -943,6 +941,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
During the handoff from earlycon to the real console driver, we have
two separate drivers operating on the same device concurrently. In the
case of the 8250 driver these concurrent accesses cause problems due
to the driver's use of banked registers, controlled by LCR.DLAB. It is
possible for the setup(), config_port(), pm() and set_mctrl() callbacks
to set DLAB, which can cause the earlycon code that intends to access
TX to instead access DLL, leading to missed output and corruption on
the serial line due to unintended modifications to the baud rate.
In particular, for setup() we have:
univ8250_console_setup()
-> serial8250_console_setup()
-> uart_set_options()
-> serial8250_set_termios()
-> serial8250_do_set_termios()
-> serial8250_do_set_divisor()
For config_port() we have:
serial8250_config_port()
-> autoconfig()
For pm() we have:
serial8250_pm()
-> serial8250_do_pm()
-> serial8250_set_sleep()
For set_mctrl() we have (for some devices):
serial8250_set_mctrl()
-> omap8250_set_mctrl()
-> __omap8250_set_mctrl()
To avoid such problems, let's make it so that the console is locked
during pre-registration calls to these callbacks, which will prevent
the earlycon driver from running concurrently.
Remove the partial solution to this problem in the 8250 driver
that locked the console only during autoconfig_irq(), as this would
result in a deadlock with the new approach. The console continues
to be locked during autoconfig_irq() because it can only be called
through uart_configure_port().
Although this patch introduces more locking than strictly necessary
(and in particular it also locks during the call to rs485_config()
which is not affected by this issue as far as I can tell), it follows
the principle that it is the responsibility of the generic console
code to manage the earlycon handoff by ensuring that earlycon and real
console driver code cannot run concurrently, and not the individual
drivers.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Link: https://linux-review.googlesource.com/id/I7cf8124dcebf8618e6b2ee543fa5b2553…
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
---
drivers/tty/serial/8250/8250_port.c | 6 ------
drivers/tty/serial/serial_core.c | 12 ++++++++++++
kernel/printk/printk.c | 21 ++++++++++++++++++---
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 8ca061d3bbb9..1d65055dde27 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1329,9 +1329,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
inb_p(ICP);
}
- if (uart_console(port))
- console_lock();
-
/* forget possible initially masked and pending IRQ */
probe_irq_off(probe_irq_on());
save_mcr = serial8250_in_MCR(up);
@@ -1371,9 +1368,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
if (port->flags & UPF_FOURPORT)
outb_p(save_ICP, ICP);
- if (uart_console(port))
- console_unlock();
-
port->irq = (irq > 0) ? irq : 0;
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index d6a58a9e072a..ff85ebd3a007 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2608,7 +2608,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
port->type = PORT_UNKNOWN;
flags |= UART_CONFIG_TYPE;
}
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
port->ops->config_port(port, flags);
+ if (uart_console(port))
+ console_unlock();
}
if (port->type != PORT_UNKNOWN) {
@@ -2616,6 +2621,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_report_port(drv, port);
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
+
/* Power up port for set_mctrl() */
uart_change_pm(state, UART_PM_STATE_ON);
@@ -2632,6 +2641,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_rs485_config(port);
+ if (uart_console(port))
+ console_unlock();
+
/*
* If this driver supports console, and it hasn't been
* successfully registered yet, try to re-register it.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f2444b581e16..f51e4e5a869d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3263,6 +3263,21 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
+static int console_call_setup(struct console *newcon, char *options)
+{
+ int err;
+
+ if (!newcon->setup)
+ return 0;
+
+ /* Synchronize with possible boot console. */
+ console_lock();
+ err = newcon->setup(newcon, options);
+ console_unlock();
+
+ return err;
+}
+
/*
* This is called by register_console() to try to match
* the newly registered console with any of the ones selected
@@ -3298,8 +3313,8 @@ static int try_enable_preferred_console(struct console *newcon,
if (_braille_register_console(newcon, c))
return 0;
- if (newcon->setup &&
- (err = newcon->setup(newcon, c->options)) != 0)
+ err = console_call_setup(newcon, c->options);
+ if (err != 0)
return err;
}
newcon->flags |= CON_ENABLED;
@@ -3325,7 +3340,7 @@ static void try_enable_default_console(struct console *newcon)
if (newcon->index < 0)
newcon->index = 0;
- if (newcon->setup && newcon->setup(newcon, NULL) != 0)
+ if (console_call_setup(newcon, NULL) != 0)
return;
newcon->flags |= CON_ENABLED;
--
2.44.0.rc1.240.g4c46232300-goog
During the handoff from earlycon to the real console driver, we have
two separate drivers operating on the same device concurrently. In the
case of the 8250 driver these concurrent accesses cause problems due
to the driver's use of banked registers, controlled by LCR.DLAB. It is
possible for the setup(), config_port(), pm() and set_mctrl() callbacks
to set DLAB, which can cause the earlycon code that intends to access
TX to instead access DLL, leading to missed output and corruption on
the serial line due to unintended modifications to the baud rate.
In particular, for setup() we have:
univ8250_console_setup()
-> serial8250_console_setup()
-> uart_set_options()
-> serial8250_set_termios()
-> serial8250_do_set_termios()
-> serial8250_do_set_divisor()
For config_port() we have:
serial8250_config_port()
-> autoconfig()
For pm() we have:
serial8250_pm()
-> serial8250_do_pm()
-> serial8250_set_sleep()
For set_mctrl() we have (for some devices):
serial8250_set_mctrl()
-> omap8250_set_mctrl()
-> __omap8250_set_mctrl()
To avoid such problems, let's make it so that the console is locked
during pre-registration calls to these callbacks, which will prevent
the earlycon driver from running concurrently.
Remove the partial solution to this problem in the 8250 driver
that locked the console only during autoconfig_irq(), as this would
result in a deadlock with the new approach. The console continues
to be locked during autoconfig_irq() because it can only be called
through uart_configure_port().
Although this patch introduces more locking than strictly necessary
(and in particular it also locks during the call to rs485_config()
which is not affected by this issue as far as I can tell), it follows
the principle that it is the responsibility of the generic console
code to manage the earlycon handoff by ensuring that earlycon and real
console driver code cannot run concurrently, and not the individual
drivers.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Link: https://linux-review.googlesource.com/id/I7cf8124dcebf8618e6b2ee543fa5b2553…
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
---
v3:
- switch to if (err)
v2:
- add some comments
drivers/tty/serial/8250/8250_port.c | 6 ------
drivers/tty/serial/serial_core.c | 12 ++++++++++++
kernel/printk/printk.c | 21 ++++++++++++++++++---
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 8ca061d3bbb9..1d65055dde27 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1329,9 +1329,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
inb_p(ICP);
}
- if (uart_console(port))
- console_lock();
-
/* forget possible initially masked and pending IRQ */
probe_irq_off(probe_irq_on());
save_mcr = serial8250_in_MCR(up);
@@ -1371,9 +1368,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
if (port->flags & UPF_FOURPORT)
outb_p(save_ICP, ICP);
- if (uart_console(port))
- console_unlock();
-
port->irq = (irq > 0) ? irq : 0;
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index d6a58a9e072a..ff85ebd3a007 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2608,7 +2608,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
port->type = PORT_UNKNOWN;
flags |= UART_CONFIG_TYPE;
}
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
port->ops->config_port(port, flags);
+ if (uart_console(port))
+ console_unlock();
}
if (port->type != PORT_UNKNOWN) {
@@ -2616,6 +2621,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_report_port(drv, port);
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
+
/* Power up port for set_mctrl() */
uart_change_pm(state, UART_PM_STATE_ON);
@@ -2632,6 +2641,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_rs485_config(port);
+ if (uart_console(port))
+ console_unlock();
+
/*
* If this driver supports console, and it hasn't been
* successfully registered yet, try to re-register it.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f2444b581e16..89f2aa2e1172 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3263,6 +3263,21 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
+static int console_call_setup(struct console *newcon, char *options)
+{
+ int err;
+
+ if (!newcon->setup)
+ return 0;
+
+ /* Synchronize with possible boot console. */
+ console_lock();
+ err = newcon->setup(newcon, options);
+ console_unlock();
+
+ return err;
+}
+
/*
* This is called by register_console() to try to match
* the newly registered console with any of the ones selected
@@ -3298,8 +3313,8 @@ static int try_enable_preferred_console(struct console *newcon,
if (_braille_register_console(newcon, c))
return 0;
- if (newcon->setup &&
- (err = newcon->setup(newcon, c->options)) != 0)
+ err = console_call_setup(newcon, c->options);
+ if (err)
return err;
}
newcon->flags |= CON_ENABLED;
@@ -3325,7 +3340,7 @@ static void try_enable_default_console(struct console *newcon)
if (newcon->index < 0)
newcon->index = 0;
- if (newcon->setup && newcon->setup(newcon, NULL) != 0)
+ if (console_call_setup(newcon, NULL) != 0)
return;
newcon->flags |= CON_ENABLED;
--
2.44.0.278.ge034bb2e1d-goog
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
[apanyaki: backport to v5.15-stable]
Signed-off-by: Andrew Paniakin <apanyaki(a)amazon.com>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index ee691b20d4a3..04ff194fecf4 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -936,8 +936,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -981,8 +981,6 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -1000,6 +998,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 00f8e349921d..96b96516c980 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
[BUG]
Currently btrfs can create subvolume with an invalid qgroup inherit
without triggering any error:
# mkfs.btrfs -O quota -f $dev
# mount $dev $mnt
# btrfs subvolume create -i 2/0 $mnt/subv1
# btrfs qgroup show -prce --sync $mnt
Qgroupid Referenced Exclusive Path
-------- ---------- --------- ----
0/5 16.00KiB 16.00KiB <toplevel>
0/256 16.00KiB 16.00KiB subv1
[CAUSE]
We only do a very basic size check for btrfs_qgroup_inherit structure,
but never really verify if the values are correct.
Thus in btrfs_qgroup_inherit() function, we have to skip non-existing
qgroups, and never return any error.
[FIX]
Fix the behavior and introduce extra checks:
- Introduce early check for btrfs_qgroup_inherit structure
Not only the size, but also all the qgroup ids would be verifyed.
And the timing is very early, so we can return error early.
This early check is very important for snapshot creation, as snapshot
is delayed to transaction commit.
- Drop support for btrfs_qgroup_inherit::num_ref_copies and
num_excl_copies
Those two members are used to specify to copy refr/excl numbers from
other qgroups.
This would definitely mark qgroup inconsistent, and btrfs-progs has
dropped the support for them for a long time.
It's time to drop the support for kernel.
- Verify the supported btrfs_qgroup_inherit::flags
Just in case we want to add extra flags for btrfs_qgroup_inherit.
Now above subvolume creation would fail with -ENOENT other than silently
ignore the non-existing qgroup.
CC: stable(a)vger.kernel.org
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/ioctl.c | 16 +++---------
fs/btrfs/qgroup.c | 52 ++++++++++++++++++++++++++++++++++++++
fs/btrfs/qgroup.h | 3 +++
include/uapi/linux/btrfs.h | 1 +
4 files changed, 59 insertions(+), 13 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8b80fbea1e72..c19ce2e292dc 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1382,7 +1382,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
- u64 nums;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
if (vol_args->size < sizeof(*inherit) ||
vol_args->size > PAGE_SIZE) {
@@ -1395,19 +1395,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
goto free_args;
}
- if (inherit->num_qgroups > PAGE_SIZE ||
- inherit->num_ref_copies > PAGE_SIZE ||
- inherit->num_excl_copies > PAGE_SIZE) {
- ret = -EINVAL;
+ ret = btrfs_qgroup_check_inherit(fs_info, inherit, vol_args->size);
+ if (ret < 0)
goto free_inherit;
- }
-
- nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
- 2 * inherit->num_excl_copies;
- if (vol_args->size != struct_size(inherit, qgroups, nums)) {
- ret = -EINVAL;
- goto free_inherit;
- }
}
ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4fa83c76b37b..66968092b554 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3046,6 +3046,58 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
return ret;
}
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size)
+{
+ if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
+ return -EOPNOTSUPP;
+ if (size < sizeof(*inherit) || size > PAGE_SIZE)
+ return -EINVAL;
+
+ /*
+ * In the past we allow btrfs_qgroup_inherit to specify to copy
+ * refr/excl numbers directly from other qgroups.
+ * This behavior has been disable in btrfs-progs for a very long time,
+ * but here we should also disable them for kernel, as this behavior
+ * is known to mark qgroup inconsistent, and a rescan would wipe out the
+ * change anyway.
+ *
+ * So here we just reject any btrfs_qgroup_inherit with num_ref_copies or
+ * num_excl_copies.
+ */
+ if (inherit->num_ref_copies || inherit->num_excl_copies)
+ return -EINVAL;
+
+ if (inherit->num_qgroups > PAGE_SIZE)
+ return -EINVAL;
+
+ if (size != struct_size(inherit, qgroups, inherit->num_qgroups))
+ return -EINVAL;
+
+ /*
+ * Now check all the remaining qgroups, they should all:
+ * - Exist
+ * - Be higher level qgroups.
+ */
+ for (int i = 0; i < inherit->num_qgroups; i++) {
+ struct btrfs_qgroup *qgroup;
+ u64 qgroupid = inherit->qgroups[i];
+
+ if (btrfs_qgroup_level(qgroupid) == 0)
+ return -EINVAL;
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ spin_unlock(&fs_info->qgroup_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ }
+ return 0;
+}
+
static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
u64 inode_rootid,
struct btrfs_qgroup_inherit **inherit)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 1f664261c064..706640be0ec2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -350,6 +350,9 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
u64 objectid, u64 inode_rootid,
struct btrfs_qgroup_inherit *inherit);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index f8bc34a6bcfa..cdf6ad872149 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -92,6 +92,7 @@ struct btrfs_qgroup_limit {
* struct btrfs_qgroup_inherit.flags
*/
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
+#define BTRFS_QGROUP_INHERIT_FLAGS_SUPP (BTRFS_QGROUP_INHERIT_SET_LIMITS)
struct btrfs_qgroup_inherit {
__u64 flags;
--
2.43.2
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x adf1bb78dab55e36d4d557aa2fb446ebcfe9e5ce
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030417-jaws-icky-a0f2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
adf1bb78dab5 ("mptcp: fix snd_wnd initialization for passive socket")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From adf1bb78dab55e36d4d557aa2fb446ebcfe9e5ce Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Fri, 23 Feb 2024 17:14:15 +0100
Subject: [PATCH] mptcp: fix snd_wnd initialization for passive socket
Such value should be inherited from the first subflow, but
passive sockets always used 'rsk_rcv_wnd'.
Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-5-16…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 442fa7d9b57a..2c8f931c6d5b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3211,7 +3211,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
On Mon, Mar 4, 2024 at 3:00 PM Xiubo Li <xiubli(a)redhat.com> wrote:
>
>
> On 3/4/24 19:02, Ilya Dryomov wrote:
>
> On Mon, Mar 4, 2024 at 2:02 AM Xiubo Li <xiubli(a)redhat.com> wrote:
>
> On 3/2/24 00:16, Ilya Dryomov wrote:
>
> On Thu, Feb 29, 2024 at 5:22 AM <xiubli(a)redhat.com> wrote:
>
> From: Xiubo Li <xiubli(a)redhat.com>
>
> The osd code has remove cursor initilizing code and this will make
> the sparse read state into a infinite loop. We should initialize
> the cursor just before each sparse-read in messnger v2.
>
> Cc: stable(a)vger.kernel.org
> URL: https://tracker.ceph.com/issues/64607
> Fixes: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket")
> Reported-by: Luis Henriques <lhenriques(a)suse.de>
> Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
> ---
> net/ceph/messenger_v2.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
> index a0ca5414b333..7ae0f80100f4 100644
> --- a/net/ceph/messenger_v2.c
> +++ b/net/ceph/messenger_v2.c
> @@ -2025,6 +2025,7 @@ static int prepare_sparse_read_cont(struct ceph_connection *con)
> static int prepare_sparse_read_data(struct ceph_connection *con)
> {
> struct ceph_msg *msg = con->in_msg;
> + u64 len = con->in_msg->sparse_read_total ? : data_len(con->in_msg);
>
> Hi Xiubo,
>
> Why is sparse_read_total being tested here? Isn't this function
> supposed to be called only for sparse reads, after the state is set to
> IN_S_PREPARE_SPARSE_DATA based on a similar test:
>
> if (msg->sparse_read_total)
> con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
> else
> con->v2.in_state = IN_S_PREPARE_READ_DATA;
>
> Then the patch could be simplified and just be:
>
> diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
> index a0ca5414b333..ab3ab130a911 100644
> --- a/net/ceph/messenger_v2.c
> +++ b/net/ceph/messenger_v2.c
> @@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct
> ceph_connection *con)
> if (!con_secure(con))
> con->in_data_crc = -1;
>
> + ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
> + con->in_msg->sparse_read_total);
> +
> reset_in_kvecs(con);
> con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
> con->v2.data_len_remain = data_len(msg);
>
> Else where should we do the test like this ?
>
> Hi Xiubo,
>
> I suspect you copied this test from prepare_message_data() in msgr1,
> where that function is called unconditionally for all reads. In msgr2,
> prepare_sparse_read_data() is called conditionally, so the test just
> seems bogus.
>
> That said, CephFS is the only user of sparse read code, so you should
> know better at this point ;)
>
> As we know the 'sparse_read_total' it's a dedicated member and will be set only in sparse-read case.
>
> In msgr1 for all the read cases they will call "prepare_message_data()", so I just did this check in "prepare_message_data()".
Right.
>
> While for msgr2 it's a little different from msg1 and it won't call 'prepare_read_data()' for all the reads, and for sparse-read it has its own dedicated helper, which is "prepare_sparse_read_data()".
Right.
> So I just did this check in 'prepare_sparse_read_data()' instead.
This where I'm getting lost. Why do a "is this a sparse read" check in
a helper that is dedicated to sparse reads and isn't called for anything
else?
> For "prepare_read_data()" it doesn't make any sense to check "sparse_read_total".
Right, so why doesn't the same go for prepare_sparse_read_data()?
Thanks,
Ilya
v2:
- Dropped already backported patch "x86/bugs: Add asm helpers for
executing VERW".
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
- Boot tested with KASLR and KPTI enabled.
- Rebased to v6.7.8
v1: https://lore.kernel.org/r/20240226-delay-verw-backport-6-7-y-v1-0-ab25f6431…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 2/7: A conflict was resolved for the hunk
swapgs_restore_regs_and_return_to_usermode.
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
Pawan Gupta (4):
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/arch/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 12 ------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 20 +++++++++++++++----
11 files changed, 75 insertions(+), 45 deletions(-)
---
base-commit: d6d6c49dbf4512f1421f5e42896e2d70dc121f9a
change-id: 20240226-delay-verw-backport-6-7-y-a2cb3f26bb90
Best regards,
--
Thanks,
Pawan
From: Xiubo Li <xiubli(a)redhat.com>
The osd code has remove cursor initilizing code and this will make
the sparse read state into a infinite loop. We should initialize
the cursor just before each sparse-read in messnger v2.
Cc: stable(a)vger.kernel.org
URL: https://tracker.ceph.com/issues/64607
Fixes: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket")
Reported-by: Luis Henriques <lhenriques(a)suse.de>
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
net/ceph/messenger_v2.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index a0ca5414b333..7ae0f80100f4 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2025,6 +2025,7 @@ static int prepare_sparse_read_cont(struct ceph_connection *con)
static int prepare_sparse_read_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
+ u64 len = con->in_msg->sparse_read_total ? : data_len(con->in_msg);
dout("%s: starting sparse read\n", __func__);
@@ -2034,6 +2035,8 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg, len);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
--
2.43.0
Here are two patches fixing issues in MPTCP diag.sh kselftest:
- Patch 1 makes sure the exit code is '1' in case of error, and not the
test ID, not to return an exit code that would be wrongly interpreted
by the ksefltests framework, e.g. '4' means 'skip'.
- Patch 2 avoids waiting for unnecessary conditions, which can cause
timeouts in some very slow environments.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Geliang Tang (1):
selftests: mptcp: diag: return KSFT_FAIL not test_cnt
Matthieu Baerts (NGI0) (1):
selftests: mptcp: diag: avoid extra waiting
tools/testing/selftests/net/mptcp/diag.sh | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
---
base-commit: 1c61728be22c1cb49c1be88693e72d8c06b1c81e
change-id: 20240301-upstream-net-20240301-selftests-mptcp-diag-exit-timeout-207d7925b7c0
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
when AOP_WRITEPAGE_ACTIVATE is returned (as NFS does when it detects
congestion) it is important that the page is redirtied.
nfs_writepage_locked() doesn't do this, so files can become corrupted as
writes can be lost.
Note that this is not needed in v6.8 as AOP_WRITEPAGE_ACTIVATE cannot be
returned. It is needed for kernels v5.18..v6.7. From 6.3 onward the patch
is different as it needs to mention "folio", not "page".
Reported-and-tested-by: Jacek Tomaka <Jacek.Tomaka(a)poczta.fm>
Fixes: 6df25e58532b ("nfs: remove reliance on bdi congestion")
Signed-off-by: NeilBrown <neilb(a)suse.de>
---
fs/nfs/write.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f41d24b54fd1..6a0606668417 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page,
int err;
if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
+ NFS_SERVER(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0,
--
2.43.0
From: Max Krummenacher <max.krummenacher(a)toradex.com>
This reverts commit ef4a40953c8076626875ff91c41e210fcee7a6fd.
The commit was applied to make further commits apply cleanly, but the
commit depends on other commits in the same patchset. I.e. the
controlling DSI host would need a change too. Thus one would need to
backport the full patchset changing the DSI hosts and all downstream
DSI device drivers.
Revert the commit and fix up the conflicts with the backported fixes
to the lt8912b driver.
Signed-off-by: Max Krummenacher <max.krummenacher(a)toradex.com>
Conflicts:
drivers/gpu/drm/bridge/lontium-lt8912b.c
---
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
index 6891863ed5104..e16b0fc0cda0f 100644
--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
+++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
@@ -571,6 +571,10 @@ static int lt8912_bridge_attach(struct drm_bridge *bridge,
if (ret)
goto error;
+ ret = lt8912_attach_dsi(lt);
+ if (ret)
+ goto error;
+
return 0;
error:
@@ -726,15 +730,8 @@ static int lt8912_probe(struct i2c_client *client,
drm_bridge_add(<->bridge);
- ret = lt8912_attach_dsi(lt);
- if (ret)
- goto err_attach;
-
return 0;
-err_attach:
- drm_bridge_remove(<->bridge);
- lt8912_free_i2c(lt);
err_i2c:
lt8912_put_dt(lt);
err_dt_parse:
--
2.42.0
Please consider the patches below for backporting to v6.1. They should
all apply cleanly in the given order.
These are prerequisites for NX compat support on x86, but the
remaining changes do not apply cleanly and will be sent as a patch
series at a later date.
By themselves, these changes not only constitute a reasonable cleanup,
they are also needed for future support of x86s [0] CPUs that are no
longer able to transition out of long mode.
Documentation/x86/boot.rst | 2 +-
arch/x86/Kconfig | 17 +
arch/x86/boot/compressed/Makefile | 8 +-
arch/x86/boot/compressed/efi_mixed.S | 383 +++++++++++++++++++
arch/x86/boot/compressed/efi_thunk_64.S | 195 ----------
arch/x86/boot/compressed/head_32.S | 25 +-
arch/x86/boot/compressed/head_64.S | 566 ++++++-----------------------
arch/x86/boot/compressed/mem_encrypt.S | 152 +++++++-
arch/x86/boot/compressed/misc.c | 34 +-
arch/x86/boot/compressed/misc.h | 2 -
arch/x86/boot/compressed/pgtable.h | 10 +-
arch/x86/boot/compressed/pgtable_64.c | 87 ++---
arch/x86/boot/header.S | 2 +-
arch/x86/boot/tools/build.c | 2 +
drivers/firmware/efi/efi.c | 22 ++
drivers/firmware/efi/libstub/alignedmem.c | 5 +-
drivers/firmware/efi/libstub/arm64-stub.c | 6 +-
drivers/firmware/efi/libstub/efistub.h | 6 +-
drivers/firmware/efi/libstub/mem.c | 3 +-
drivers/firmware/efi/libstub/randomalloc.c | 5 +-
drivers/firmware/efi/libstub/x86-stub.c | 53 ++-
drivers/firmware/efi/vars.c | 13 +-
include/linux/decompress/mm.h | 2 +-
23 files changed, 805 insertions(+), 795 deletions(-)
[0] https://www.intel.com/content/www/us/en/developer/articles/technical/envisi…
9cf42bca30e9 efi: libstub: use EFI_LOADER_CODE region when moving the
kernel in memory
cb8bda8ad443 x86/boot/compressed: Rename efi_thunk_64.S to efi-mixed.S
e2ab9eab324c x86/boot/compressed: Move 32-bit entrypoint code into .text section
5c3a85f35b58 x86/boot/compressed: Move bootargs parsing out of 32-bit
startup code
91592b5c0c2f x86/boot/compressed: Move efi32_pe_entry into .text section
73a6dec80e2a x86/boot/compressed: Move efi32_entry out of head_64.S
7f22ca396778 x86/boot/compressed: Move efi32_pe_entry() out of head_64.S
4b52016247ae x86/boot/compressed, efi: Merge multiple definitions of
image_offset into one
630f337f0c4f x86/boot/compressed: Simplify IDT/GDT preserve/restore in
the EFI thunk
6aac80a8da46 x86/boot/compressed: Avoid touching ECX in
startup32_set_idt_entry()
d73a257f7f86 x86/boot/compressed: Pull global variable reference into
startup32_load_idt()
c6355995ba47 x86/boot/compressed: Move startup32_load_idt() into .text section
9ea813be3d34 x86/boot/compressed: Move startup32_load_idt() out of head_64.S
b5d854cd4b6a x86/boot/compressed: Move startup32_check_sev_cbit() into .text
9d7eaae6a071 x86/boot/compressed: Move startup32_check_sev_cbit() out
of head_64.S
30c9ca16a527 x86/boot/compressed: Adhere to calling convention in
get_sev_encryption_bit()
61de13df9590 x86/boot/compressed: Only build mem_encrypt.S if AMD_MEM_ENCRYPT=y
bad267f9e18f efi: verify that variable services are supported
0217a40d7ba6 efi: efivars: prevent double registration
cc3fdda2876e x86/efi: Make the deprecated EFI handover protocol optional
7734a0f31e99 x86/boot: Robustify calling startup_{32,64}() from the
decompressor code
d2d7a54f69b6 x86/efistub: Branch straight to kernel entry point from C code
df9215f15206 x86/efistub: Simplify and clean up handover entry code
127920645876 x86/decompressor: Avoid magic offsets for EFI handover entrypoint
d7156b986d4c x86/efistub: Clear BSS in EFI handover protocol entrypoint
8b63cba746f8 x86/decompressor: Store boot_params pointer in callee save register
00c6b0978ec1 x86/decompressor: Assign paging related global variables earlier
e8972a76aa90 x86/decompressor: Call trampoline as a normal function
918a7a04e717 x86/decompressor: Use standard calling convention for trampoline
bd328aa01ff7 x86/decompressor: Avoid the need for a stack in the
32-bit trampoline
64ef578b6b68 x86/decompressor: Call trampoline directly from C code
f97b67a773cd x86/decompressor: Only call the trampoline when changing
paging levels
cb83cece57e1 x86/decompressor: Pass pgtable address to trampoline directly
03dda95137d3 x86/decompressor: Merge trampoline cleanup with switching code
24388292e2d7 x86/decompressor: Move global symbol references to C code
8217ad0a435f decompress: Use 8 byte alignment
An anon THP page is first added to swap cache before reclaiming it.
Initially, each tail page contains the proper swap entry value(stored in
->private field) which is filled from add_to_swap_cache(). After
migrating the THP page sitting on the swap cache, only the swap entry of
the head page is filled(see folio_migrate_mapping()).
Now when this page is tried to split(one case is when this page is again
migrated, see migrate_pages()->try_split_thp()), the tail pages
->private is not stored with proper swap entry values. When this tail
page is now try to be freed, as part of it delete_from_swap_cache() is
called which operates on the wrong swap cache index and eventually
replaces the wrong swap cache index with shadow/NULL value, frees the
page.
This leads to the state with a swap cache containing the freed page.
This issue can manifest in many forms and the most common thing observed
is the rcu stall during the swapin (see mapping_get_entry()).
On the recent kernels, this issues is indirectly getting fixed with the
series[1], to be specific[2].
When tried to back port this series, it is observed many merge
conflicts and also seems dependent on many other changes. As backporting
to LTS branches is not a trivial one, the similar change from [2] is
picked as a fix.
[1] https://lore.kernel.org/all/20230821160849.531668-1-david@redhat.com/
[2] https://lore.kernel.org/all/20230821160849.531668-5-david@redhat.com/
Closes: https://lore.kernel.org/linux-mm/69cb784f-578d-ded1-cd9f-c6db04696336@quici…
Fixes: 3417013e0d18 ("mm/migrate: Add folio_migrate_mapping()")
Cc: <stable(a)vger.kernel.org> # see patch description, applicable to <=6.1
Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
---
mm/huge_memory.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5957794..cc5273f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2477,6 +2477,8 @@ static void __split_huge_page_tail(struct page *head, int tail,
if (!folio_test_swapcache(page_folio(head))) {
VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
page_tail->private = 0;
+ } else {
+ set_page_private(page_tail, (unsigned long)head->private + tail);
}
/* Page flags must be visible before we make the page non-compound. */
--
2.7.4
Hi Greg and Sasha,
Please apply upstream commit 5b750b22530f ("drm/amd/display: Increase
frame warning limit with KASAN or KCSAN in dml") to linux-6.1.y, as it
is needed to avoid instances of -Wframe-larger-than in allmodconfig,
which has -Werror enabled. It applies cleanly for me and it is already
in 6.6 and 6.7. The fixes tag is not entirely accurate and commit
e63e35f0164c ("drm/amd/display: Increase frame-larger-than for all
display_mode_vba files"), which was recently applied to that tree,
depends on it (I should have made that clearer in the patch).
If there are any issues, please let me know.
Cheers,
Nathan
On some pinephones the video output sometimes freezes (flips between two
frames) [1]. It seems to be that the reason for this behaviour is that
PLL-MIPI is outside its limits, and the GPU is not running at a fixed
rate.
In this patch series I propose the following changes:
1. sunxi-ng: Adhere to the following constraints given in the
Allwinner A64 Manual regarding PLL-MIPI:
* M/N <= 3
* (PLL_VIDEO0)/M >= 24MHz
* 500MHz <= clockrate <= 1400MHz
2. Remove two operating points from the A64 DTS OPPs, so that the GPU
runs at a fixed rate of 432 MHz.
Note, that when pinning the GPU to 432 MHz the issue [1] completely
disappears for me. I've searched the BSP and could not find any
indication that supports the idea of having the three OPPs. The only
frequency I found in the BPSs for A64 is 432 MHz, which has also proven
stable for me.
Another bigger change compared to the previous version is that I've
removed the patch to adapt the XBD599 panel's timings to Allwinner A64's
PLL-MIPI new constraints from this series. Mainly, because I'm currently
evaluationg other options that may or may not work. (It may work at
least until HDMI support is upstreamed.) I'll probably resend the patch
at a later point in time.
I very much appreciate your feedback!
[1] https://gitlab.com/postmarketOS/pmaports/-/issues/805
Signed-off-by: Frank Oltmanns <frank(a)oltmanns.dev>
---
Changes in v3:
- dts: Pin GPU to 432 MHz.
- nkm and a64: Move minimum and maximum rate handling to the common part
of the sunxi-ng driver.
- Removed st7703 patch from series.
- Link to v2: https://lore.kernel.org/r/20240205-pinephone-pll-fixes-v2-0-96a46a2d8c9b@ol…
Changes in v2:
- dts: Increase minimum GPU frequency to 192 MHz.
- nkm and a64: Add minimum and maximum rate for PLL-MIPI.
- nkm: Use the same approach for skipping invalid rates in
ccu_nkm_find_best() as in ccu_nkm_find_best_with_parent_adj().
- nkm: Improve names for ratio struct members and hence get rid of
describing comments.
- nkm and a64: Correct description in the commit messages: M/N <= 3
- Remove patches for nm as they were not needed.
- st7703: Rework the commit message to cover more background for the
change.
- Link to v1: https://lore.kernel.org/r/20231218-pinephone-pll-fixes-v1-0-e238b6ed6dc1@ol…
---
Frank Oltmanns (5):
clk: sunxi-ng: common: Support minimum and maximum rate
clk: sunxi-ng: a64: Set minimum and maximum rate for PLL-MIPI
clk: sunxi-ng: nkm: Support constraints on m/n ratio and parent rate
clk: sunxi-ng: a64: Add constraints on PLL-MIPI's n/m ratio and parent rate
arm64: dts: allwinner: a64: Run GPU at 432 MHz
arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 8 --------
drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 14 +++++++++-----
drivers/clk/sunxi-ng/ccu_common.c | 15 +++++++++++++++
drivers/clk/sunxi-ng/ccu_common.h | 3 +++
drivers/clk/sunxi-ng/ccu_nkm.c | 21 +++++++++++++++++++++
drivers/clk/sunxi-ng/ccu_nkm.h | 2 ++
6 files changed, 50 insertions(+), 13 deletions(-)
---
base-commit: 216c1282dde38ca87ebdf1ccacee5a0682901574
change-id: 20231218-pinephone-pll-fixes-0ccdfde273e4
Best regards,
--
Frank Oltmanns <frank(a)oltmanns.dev>
Svacer reports NULL-pointer dereference and double free issues in
do_bank_switch() in case sdw_ml_sync_bank_switch() returns an error
not on the first iteration of the list_for_each_entry() loop. These
problems are present in 5.10, 5.15 and 6.1 stable releases. These problems
have been fixed by the following upstream patch that can be cleanly
applied to 5.10, 5.15 and 6.1 branches.
Hi stable maintainers,
The following patch in mainline is listed as a fix for CVE-2023-2176:
8d037973d48c026224ab285e6a06985ccac6f7bf (RDMA/core: Refactor rdma_bind_addr)
And the following is a fix for a regression in the above patch:
0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 (RDMA/core: Update CMA destination address on rdma_resolve_addr)
To my knowledge, at least back to v6.1 is vulnerable to this same bug.
Since these should apply directly to 6.1.y, can these be picked up for that branch?
Regards,
Brennan
v2:
- Runtime patch jmp instead of verw in macro CLEAR_CPU_BUFFERS due to
lack of relative addressing support in relocations in kernels <v6.5.
- Rebased to v6.1.80
- Boot tested with KASLR and KPTI enabled.
- Fixed warning:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction
- Verified VERW being executed with mitigation ON, and not being
executed with mitigation turned OFF.
- Rebased to v6.1.80.
v1: https://lore.kernel.org/r/20240226-delay-verw-backport-6-1-y-v1-0-b3a2c5b9b…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 1/6 includes a minor fix that is queued for upstream:
https://lore.kernel.org/lkml/170899674562.398.6398007479766564897.tip-bot2@…
Patch 1,2,5 and 6 needed conflict resolution.
I saw a few new warnings:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction
I tried using REACHABLE, but that did not fix the warning.
For the below warning:
vmlinux.o: warning: objtool: .altinstr_replacement+0x17: unsupported relocation in alternatives section
not sure if this is related to this series or a pre-existing warning, I
will check later without this series.
I am not too concerned because the alternative did substitute verw
correctly:
entry_SYSCALL_64:
...
0xffffffff8200013d <+253>: swapgs
0xffffffff82000140 <+256>: verw 0xffffffff82000000
0xffffffff82000148 <+264>: sysretq
0xffffffff8200014b <+267>: int3
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
Pawan Gupta (5):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry.S | 23 ++++++++++++++++++++++
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 12 ++++++++----
13 files changed, 106 insertions(+), 46 deletions(-)
---
base-commit: a3eb3a74aa8c94e6c8130b55f3b031f29162868c
change-id: 20240226-delay-verw-backport-6-1-y-4b0cec84087c
Best regards,
--
Thanks,
Pawan
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b9cd26f640a308ea314ad23532de9a8592cd09d2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030448-walrus-tribunal-7b38@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
b9cd26f640a3 ("mptcp: push at DSS boundaries")
1094c6fe7280 ("mptcp: fix possible divide by zero")
8ce568ed06ce ("mptcp: drop tx skb cache")
4e14867d5e91 ("mptcp: tune re-injections for csum enabled mode")
2948d0a1e5ae ("mptcp: factor out __mptcp_retrans helper()")
eaeef1ce55ec ("mptcp: fix memory accounting on allocation error")
d489ded1a369 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b9cd26f640a308ea314ad23532de9a8592cd09d2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Fri, 23 Feb 2024 17:14:14 +0100
Subject: [PATCH] mptcp: push at DSS boundaries
when inserting not contiguous data in the subflow write queue,
the protocol creates a new skb and prevent the TCP stack from
merging it later with already queued skbs by setting the EOR marker.
Still no push flag is explicitly set at the end of previous GSO
packet, making the aggregation on the receiver side sub-optimal -
and packetdrill self-tests less predictable.
Explicitly mark the end of not contiguous DSS with the push flag.
Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-4-16…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 948606a537da..442fa7d9b57a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
Hi there!
Are you interested in increasing your customer base through your company's website?
We're transforming the virtual world into tangible profits by creating functional, responsive websites and profitable online stores, optimized for search engine rankings.
Whether you need a simple website or a complex web application, our team of experts utilizes advanced tools to deliver fast and user-friendly products.
Would you be interested in hearing more about what we can do for you in this regard?
Best regards
Ray Galt
There is an issue with ACPI overlay table removal specifically related
to I2C multiplexers.
Consider an ACPI SSDT Overlay that defines a PCA9548 I2C mux on an
existing I2C bus. When this table is loaded we see the creation of a
device for the overall PCA9548 chip and 8 further devices - one
i2c_adapter each for the mux channels. These are all bound to their
ACPI equivalents via an eventual invocation of acpi_bind_one().
When we unload the SSDT overlay we run into the problem. The ACPI
devices are deleted as normal via acpi_device_del_work_fn() and the
acpi_device_del_list.
However, the following warning and stack trace is output as the
deletion does not go smoothly:
------------[ cut here ]------------
kernfs: can not remove 'physical_node', no directory
WARNING: CPU: 1 PID: 11 at fs/kernfs/dir.c:1674 kernfs_remove_by_name_ns+0xb9/0xc0
Modules linked in:
CPU: 1 PID: 11 Comm: kworker/u128:0 Not tainted 6.8.0-rc6+ #1
Hardware name: congatec AG conga-B7E3/conga-B7E3, BIOS 5.13 05/16/2023
Workqueue: kacpi_hotplug acpi_device_del_work_fn
RIP: 0010:kernfs_remove_by_name_ns+0xb9/0xc0
Code: e4 00 48 89 ef e8 07 71 db ff 5b b8 fe ff ff ff 5d 41 5c 41 5d e9 a7 55 e4 00 0f 0b eb a6 48 c7 c7 f0 38 0d 9d e8 97 0a d5 ff <0f> 0b eb dc 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
RSP: 0018:ffff9f864008fb28 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff8ef90a8d4940 RCX: 0000000000000000
RDX: ffff8f000e267d10 RSI: ffff8f000e25c780 RDI: ffff8f000e25c780
RBP: ffff8ef9186f9870 R08: 0000000000013ffb R09: 00000000ffffbfff
R10: 00000000ffffbfff R11: ffff8f000e0a0000 R12: ffff9f864008fb50
R13: ffff8ef90c93dd60 R14: ffff8ef9010d0958 R15: ffff8ef9186f98c8
FS: 0000000000000000(0000) GS:ffff8f000e240000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f48f5253a08 CR3: 00000003cb82e000 CR4: 00000000003506f0
Call Trace:
<TASK>
? kernfs_remove_by_name_ns+0xb9/0xc0
? __warn+0x7c/0x130
? kernfs_remove_by_name_ns+0xb9/0xc0
? report_bug+0x171/0x1a0
? handle_bug+0x3c/0x70
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? kernfs_remove_by_name_ns+0xb9/0xc0
? kernfs_remove_by_name_ns+0xb9/0xc0
acpi_unbind_one+0x108/0x180
device_del+0x18b/0x490
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
device_unregister+0xd/0x30
i2c_del_adapter.part.0+0x1bf/0x250
i2c_mux_del_adapters+0xa1/0xe0
i2c_device_remove+0x1e/0x80
device_release_driver_internal+0x19a/0x200
bus_remove_device+0xbf/0x100
device_del+0x157/0x490
? __pfx_device_match_fwnode+0x10/0x10
? srso_return_thunk+0x5/0x5f
device_unregister+0xd/0x30
i2c_acpi_notify+0x10f/0x140
notifier_call_chain+0x58/0xd0
blocking_notifier_call_chain+0x3a/0x60
acpi_device_del_work_fn+0x85/0x1d0
process_one_work+0x134/0x2f0
worker_thread+0x2f0/0x410
? __pfx_worker_thread+0x10/0x10
kthread+0xe3/0x110
? __pfx_kthread+0x10/0x10
ret_from_fork+0x2f/0x50
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1b/0x30
</TASK>
---[ end trace 0000000000000000 ]---
...
repeated 7 more times, 1 for each channel of the mux
...
The issue is that the binding of the ACPI devices to their peer I2C
adapters is not correctly cleaned up. Digging deeper into the issue we
see that the deletion order is such that the ACPI devices matching the
mux channel i2c adapters are deleted first during the SSDT overlay
removal. For each of the channels we see a call to i2c_acpi_notify()
with ACPI_RECONFIG_DEVICE_REMOVE but, because these devices are not
actually i2c_clients, nothing is done for them.
Later on, after each of the mux channels has been dealt with, we come
to delete the i2c_client representing the PCA9548 device. This is the
call stack we see above, whereby the kernel cleans up the i2c_client
including destruction of the mux and its channel adapters. At this
point we do attempt to unbind from the ACPI peers but those peers no
longer exist and so we hit the kernfs errors.
The fix is to augment i2c_acpi_notify() to handle i2c_adapters. But,
given that the life cycle of the adapters is linked to the i2c_client,
instead of deleting the i2c_adapters during the i2c_acpi_notify(), we
just trigger unbinding of the ACPI device from the adapter device, and
allow the clean up of the adapter to continue in the way it always has.
Signed-off-by: Hamish Martin <hamish.martin(a)alliedtelesis.co.nz>
Reviewed-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)kernel.org>
Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications")
Cc: <stable(a)vger.kernel.org> # v4.8+
---
drivers/i2c/i2c-core-acpi.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index d6037a328669..67fa8deccef6 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -445,6 +445,11 @@ static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
return i2c_find_device_by_fwnode(acpi_fwnode_handle(adev));
}
+static struct i2c_adapter *i2c_acpi_find_adapter_by_adev(struct acpi_device *adev)
+{
+ return i2c_find_adapter_by_fwnode(acpi_fwnode_handle(adev));
+}
+
static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
void *arg)
{
@@ -471,11 +476,17 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
break;
client = i2c_acpi_find_client_by_adev(adev);
- if (!client)
- break;
+ if (client) {
+ i2c_unregister_device(client);
+ put_device(&client->dev);
+ }
+
+ adapter = i2c_acpi_find_adapter_by_adev(adev);
+ if (adapter) {
+ acpi_device_notify_remove(&adapter->dev);
+ put_device(&adapter->dev);
+ }
- i2c_unregister_device(client);
- put_device(&client->dev);
break;
}
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d5fbeff1ab812b6c473b6924bee8748469462e2c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100421-divisible-bacterium-18b5@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Sat, 16 Sep 2023 12:52:46 +0200
Subject: [PATCH] mptcp: move __mptcp_error_report in protocol.c
This will simplify the next patch ("mptcp: process pending subflow error
on close").
No functional change intended.
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a7fc16f5175d..915860027b1a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ continue;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+ continue;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ break;
+ }
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9bf3c7bc1762..2f40c23fdb0d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
-void __mptcp_error_report(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- WRITE_ONCE(sk->sk_err, -err);
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
-}
-
static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
After the 'Fixes' commit mentioned below, which is a partial backport,
the MPTCP worker was no longer marking the first subflow as "UNCONNECTED"
when the socket was transitioning to TCP_CLOSE state.
As a result, in v6.1, it was no longer possible to reconnect to the just
disconnected socket. Continue to do that like before, only for the first
subflow.
A few refactoring have been done around the 'msk->subflow' in later
versions, and it looks like this is not needed to do that there, but
still needed in v6.1. Without that, the 'disconnect' tests from the
mptcp_connect.sh selftest fail: they repeat the transfer 3 times by
reconnecting to the server each time.
Fixes: 7857e35ef10e ("mptcp: get rid of msk->subflow")
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Notes:
- This is specific to the 6.1 version having the partial backport.
---
net/mptcp/protocol.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cdabb00648bd2..125825db642cc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2440,6 +2440,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
__mptcp_subflow_disconnect(ssk, subflow, flags);
+ if (msk->subflow && ssk == msk->subflow->sk)
+ msk->subflow->state = SS_UNCONNECTED;
release_sock(ssk);
goto out;
--
2.43.0
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x cf7c2789822db8b5efa34f5ebcf1621bc0008d48
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030407-headway-blustery-23f1@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
cf7c2789822d ("iommufd: Fix protection fault in iommufd_test_syz_conv_iova")
bd7a282650b8 ("iommufd: Add iommufd_ctx to iommufd_put_object()")
65fe32f7a447 ("iommufd/selftest: Add nested domain allocation for mock domain")
2bdabb8e82f5 ("iommu: Pass in parent domain with user_data to domain_alloc_user op")
b5021cb264e6 ("iommufd: Share iommufd_hwpt_alloc with IOMMUFD_OBJ_HWPT_NESTED")
89db31635c87 ("iommufd: Derive iommufd_hwpt_paging from iommufd_hw_pagetable")
58d84f430dc7 ("iommufd/device: Wrap IOMMUFD_OBJ_HWPT_PAGING-only configurations")
9744a7ab62cc ("iommufd: Rename IOMMUFD_OBJ_HW_PAGETABLE to IOMMUFD_OBJ_HWPT_PAGING")
2ccabf81ddff ("iommufd: Only enforce cache coherency in iommufd_hw_pagetable_alloc")
a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP")
7adf267d66d1 ("iommufd/selftest: Test IOMMU_HWPT_SET_DIRTY_TRACKING")
266ce58989ba ("iommufd/selftest: Test IOMMU_HWPT_ALLOC_DIRTY_TRACKING")
e04b23c8d4ed ("iommufd/selftest: Expand mock_domain with dev_flags")
421a511a293f ("iommu/amd: Access/Dirty bit support in IOPTEs")
134288158a41 ("iommu/amd: Add domain_alloc_user based domain allocation")
b9a60d6f850e ("iommufd: Add IOMMU_HWPT_GET_DIRTY_BITMAP")
e2a4b2947849 ("iommufd: Add IOMMU_HWPT_SET_DIRTY_TRACKING")
5f9bdbf4c658 ("iommufd: Add a flag to enforce dirty tracking on attach")
750e2e902b71 ("iommu: Add iommu_domain ops for dirty tracking")
b5f9e63278d6 ("iommufd: Correct IOMMU_HWPT_ALLOC_NEST_PARENT description")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cf7c2789822db8b5efa34f5ebcf1621bc0008d48 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc(a)nvidia.com>
Date: Thu, 22 Feb 2024 13:23:47 -0800
Subject: [PATCH] iommufd: Fix protection fault in iommufd_test_syz_conv_iova
Syzkaller reported the following bug:
general protection fault, probably for non-canonical address 0xdffffc0000000038: 0000 [#1] SMP KASAN
KASAN: null-ptr-deref in range [0x00000000000001c0-0x00000000000001c7]
Call Trace:
lock_acquire
lock_acquire+0x1ce/0x4f0
down_read+0x93/0x4a0
iommufd_test_syz_conv_iova+0x56/0x1f0
iommufd_test_access_rw.isra.0+0x2ec/0x390
iommufd_test+0x1058/0x1e30
iommufd_fops_ioctl+0x381/0x510
vfs_ioctl
__do_sys_ioctl
__se_sys_ioctl
__x64_sys_ioctl+0x170/0x1e0
do_syscall_x64
do_syscall_64+0x71/0x140
This is because the new iommufd_access_change_ioas() sets access->ioas to
NULL during its process, so the lock might be gone in a concurrent racing
context.
Fix this by doing the same access->ioas sanity as iommufd_access_rw() and
iommufd_access_pin_pages() functions do.
Cc: stable(a)vger.kernel.org
Fixes: 9227da7816dd ("iommufd: Add iommufd_access_change_ioas(_id) helpers")
Link: https://lore.kernel.org/r/3f1932acaf1dd494d404c04364d73ce8f57f3e5e.17086366…
Reported-by: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc(a)nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 2bfe77bd351d..d59e199a8705 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -63,8 +63,8 @@ enum {
* In syzkaller mode the 64 bit IOVA is converted into an nth area and offset
* value. This has a much smaller randomization space and syzkaller can hit it.
*/
-static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
- u64 *iova)
+static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
+ u64 *iova)
{
struct syz_layout {
__u32 nth_area;
@@ -88,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
return 0;
}
+static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access,
+ u64 *iova)
+{
+ unsigned long ret;
+
+ mutex_lock(&access->ioas_lock);
+ if (!access->ioas) {
+ mutex_unlock(&access->ioas_lock);
+ return 0;
+ }
+ ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova);
+ mutex_unlock(&access->ioas_lock);
+ return ret;
+}
+
void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
unsigned int ioas_id, u64 *iova, u32 *flags)
{
@@ -100,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
ioas = iommufd_get_ioas(ucmd->ictx, ioas_id);
if (IS_ERR(ioas))
return;
- *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
+ *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova);
iommufd_put_object(ucmd->ictx, &ioas->obj);
}
@@ -1161,7 +1176,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
}
if (flags & MOCK_FLAGS_ACCESS_SYZ)
- iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
+ iova = iommufd_test_syz_conv_iova(staccess->access,
&cmd->access_pages.iova);
npages = (ALIGN(iova + length, PAGE_SIZE) -
@@ -1263,8 +1278,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
}
if (flags & MOCK_FLAGS_ACCESS_SYZ)
- iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
- &cmd->access_rw.iova);
+ iova = iommufd_test_syz_conv_iova(staccess->access,
+ &cmd->access_rw.iova);
rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
if (rc)
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 4774848fef6041716a4883217eb75f6b10eb183b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030445-rants-grading-f698@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
4774848fef60 ("riscv: Add a custom ISA extension for the [ms]envcfg CSR")
aec3353963b8 ("riscv: add ISA extension parsing for vector crypto")
0d8295ed975b ("riscv: add ISA extension parsing for scalar crypto")
e45f463a9b01 ("riscv: add ISA extension parsing for Zbc")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4774848fef6041716a4883217eb75f6b10eb183b Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland(a)sifive.com>
Date: Tue, 27 Feb 2024 22:55:34 -0800
Subject: [PATCH] riscv: Add a custom ISA extension for the [ms]envcfg CSR
The [ms]envcfg CSR was added in version 1.12 of the RISC-V privileged
ISA (aka S[ms]1p12). However, bits in this CSR are defined by several
other extensions which may be implemented separately from any particular
version of the privileged ISA (for example, some unrelated errata may
prevent an implementation from claiming conformance with Ss1p12). As a
result, Linux cannot simply use the privileged ISA version to determine
if the CSR is present. It must also check if any of these other
extensions are implemented. It also cannot probe the existence of the
CSR at runtime, because Linux does not require Sstrict, so (in the
absence of additional information) it cannot know if a CSR at that
address is [ms]envcfg or part of some non-conforming vendor extension.
Since there are several standard extensions that imply the existence of
the [ms]envcfg CSR, it becomes unwieldy to check for all of them
wherever the CSR is accessed. Instead, define a custom Xlinuxenvcfg ISA
extension bit that is implied by the other extensions and denotes that
the CSR exists as defined in the privileged ISA, containing at least one
of the fields common between menvcfg and senvcfg.
This extension does not need to be parsed from the devicetree or ISA
string because it can only be implemented as a subset of some other
standard extension.
Cc: <stable(a)vger.kernel.org> # v6.7+
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Reviewed-by: Andrew Jones <ajones(a)ventanamicro.com>
Link: https://lore.kernel.org/r/20240228065559.3434837-3-samuel.holland@sifive.com
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5340f818746b..1f2d2599c655 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,8 @@
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
+#define RISCV_ISA_EXT_XLINUXENVCFG 127
+
#define RISCV_ISA_EXT_MAX 128
#define RISCV_ISA_EXT_INVALID U32_MAX
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index c5b13f7dd482..dacffef68ce2 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -201,6 +201,16 @@ static const unsigned int riscv_zvbb_exts[] = {
RISCV_ISA_EXT_ZVKB
};
+/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+ RISCV_ISA_EXT_XLINUXENVCFG
+};
+
/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
@@ -250,8 +260,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
__RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
- __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
- __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
+ __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8e9f25a290ae0016353c9ea13314c95fb3207812
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030401-cushy-sterility-1d74@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8e9f25a290ae ("mmc: sdhci-xenon: fix PHY init clock stability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e9f25a290ae0016353c9ea13314c95fb3207812 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 22:09:30 +0200
Subject: [PATCH] mmc: sdhci-xenon: fix PHY init clock stability
Each time SD/mmc phy is initialized, at times, in some of
the attempts, phy fails to completes its initialization
which results into timeout error. Per the HW spec, it is
a pre-requisite to ensure a stable SD clock before a phy
initialization is attempted.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 8cf3a375de65..c3096230a969 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/iopoll.h>
#include <linux/of_address.h>
#include "sdhci-pltfm.h"
@@ -216,6 +217,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
return 0;
}
+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+ u32 reg;
+ int err;
+
+ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+ if (err)
+ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+ return err;
+}
+
/*
* eMMC 5.0/5.1 PHY init/re-init.
* eMMC PHY init should be executed after:
@@ -232,6 +246,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+ int ret = xenon_check_stability_internal_clk(host);
+
+ if (ret)
+ return ret;
+
reg = sdhci_readl(host, phy_regs->timing_adj);
reg |= XENON_PHY_INITIALIZAION;
sdhci_writel(host, reg, phy_regs->timing_adj);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8e9f25a290ae0016353c9ea13314c95fb3207812
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030400-debate-conclude-99e5@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8e9f25a290ae ("mmc: sdhci-xenon: fix PHY init clock stability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e9f25a290ae0016353c9ea13314c95fb3207812 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 22:09:30 +0200
Subject: [PATCH] mmc: sdhci-xenon: fix PHY init clock stability
Each time SD/mmc phy is initialized, at times, in some of
the attempts, phy fails to completes its initialization
which results into timeout error. Per the HW spec, it is
a pre-requisite to ensure a stable SD clock before a phy
initialization is attempted.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 8cf3a375de65..c3096230a969 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/iopoll.h>
#include <linux/of_address.h>
#include "sdhci-pltfm.h"
@@ -216,6 +217,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
return 0;
}
+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+ u32 reg;
+ int err;
+
+ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+ if (err)
+ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+ return err;
+}
+
/*
* eMMC 5.0/5.1 PHY init/re-init.
* eMMC PHY init should be executed after:
@@ -232,6 +246,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+ int ret = xenon_check_stability_internal_clk(host);
+
+ if (ret)
+ return ret;
+
reg = sdhci_readl(host, phy_regs->timing_adj);
reg |= XENON_PHY_INITIALIZAION;
sdhci_writel(host, reg, phy_regs->timing_adj);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030409-purchase-uselessly-906f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
09e23823ae9a ("mmc: sdhci-xenon: add timeout for PHY init complete")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 21:17:14 +0200
Subject: [PATCH] mmc: sdhci-xenon: add timeout for PHY init complete
AC5X spec says PHY init complete bit must be polled until zero.
We see cases in which timeout can take longer than the standard
calculation on AC5X, which is expected following the spec comment above.
According to the spec, we must wait as long as it takes for that bit to
toggle on AC5X.
Cap that with 100 delay loops so we won't get stuck forever.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index c3096230a969..cc9d28b75eb9 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -110,6 +110,8 @@
#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
#define XENON_LOGIC_TIMING_VALUE 0x00AA8977
+#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
+
/*
* List offset of PHY registers and some special register values
* in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -278,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
/* get the wait time */
wait /= clock;
wait++;
- /* wait for host eMMC PHY init completes */
- udelay(wait);
- reg = sdhci_readl(host, phy_regs->timing_adj);
- reg &= XENON_PHY_INITIALIZAION;
- if (reg) {
+ /*
+ * AC5X spec says bit must be polled until zero.
+ * We see cases in which timeout can take longer
+ * than the standard calculation on AC5X, which is
+ * expected following the spec comment above.
+ * According to the spec, we must wait as long as
+ * it takes for that bit to toggle on AC5X.
+ * Cap that with 100 delay loops so we won't get
+ * stuck here forever:
+ */
+
+ ret = read_poll_timeout(sdhci_readl, reg,
+ !(reg & XENON_PHY_INITIALIZAION),
+ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+ false, host, phy_regs->timing_adj);
+ if (ret)
dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
- wait);
- return -ETIMEDOUT;
- }
+ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
- return 0;
+ return ret;
}
#define ARMADA_3700_SOC_PAD_1_8V 0x1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030408-hyphen-moonwalk-5e0e@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
09e23823ae9a ("mmc: sdhci-xenon: add timeout for PHY init complete")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 21:17:14 +0200
Subject: [PATCH] mmc: sdhci-xenon: add timeout for PHY init complete
AC5X spec says PHY init complete bit must be polled until zero.
We see cases in which timeout can take longer than the standard
calculation on AC5X, which is expected following the spec comment above.
According to the spec, we must wait as long as it takes for that bit to
toggle on AC5X.
Cap that with 100 delay loops so we won't get stuck forever.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index c3096230a969..cc9d28b75eb9 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -110,6 +110,8 @@
#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
#define XENON_LOGIC_TIMING_VALUE 0x00AA8977
+#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
+
/*
* List offset of PHY registers and some special register values
* in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -278,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
/* get the wait time */
wait /= clock;
wait++;
- /* wait for host eMMC PHY init completes */
- udelay(wait);
- reg = sdhci_readl(host, phy_regs->timing_adj);
- reg &= XENON_PHY_INITIALIZAION;
- if (reg) {
+ /*
+ * AC5X spec says bit must be polled until zero.
+ * We see cases in which timeout can take longer
+ * than the standard calculation on AC5X, which is
+ * expected following the spec comment above.
+ * According to the spec, we must wait as long as
+ * it takes for that bit to toggle on AC5X.
+ * Cap that with 100 delay loops so we won't get
+ * stuck here forever:
+ */
+
+ ret = read_poll_timeout(sdhci_readl, reg,
+ !(reg & XENON_PHY_INITIALIZAION),
+ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+ false, host, phy_regs->timing_adj);
+ if (ret)
dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
- wait);
- return -ETIMEDOUT;
- }
+ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
- return 0;
+ return ret;
}
#define ARMADA_3700_SOC_PAD_1_8V 0x1
When not configured for wakeup lis3lv02d_i2c_suspend() will call
lis3lv02d_poweroff() even if the device has already been turned off
by the runtime-suspend handler and if configured for wakeup and
the device is runtime-suspended at this point then it is not turned
back on to serve as a wakeup source.
Before commit b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting
of the reg_ctrl callback"), lis3lv02d_poweroff() failed to disable
the regulators which as a side effect made calling poweroff() twice ok.
Now that poweroff() correctly disables the regulators, doing this twice
triggers a WARN() in the regulator core:
unbalanced disables for regulator-dummy
WARNING: CPU: 1 PID: 92 at drivers/regulator/core.c:2999 _regulator_disable
...
Fix lis3lv02d_i2c_suspend() to not call poweroff() a second time if
already runtime-suspended and add a poweron() call when necessary to
make wakeup work.
lis3lv02d_i2c_resume() has similar issues, with an added weirness that
it always powers on the device if it is runtime suspended, after which
the first runtime-resume will call poweron() again, causing the enabled
count for the regulator to increase by 1 every suspend/resume. These
unbalanced regulator_enable() calls cause the regulator to never
be turned off and trigger the following WARN() on driver unbind:
WARNING: CPU: 1 PID: 1724 at drivers/regulator/core.c:2396 _regulator_put
Fix this by making lis3lv02d_i2c_resume() mirror the new suspend().
Fixes: b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback")
Reported-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Closes: https://lore.kernel.org/regressions/5fc6da74-af0a-4aac-b4d5-a000b39a63a5@mo…
Cc: stable(a)vger.kernel.org
Cc: regressions(a)lists.linux.dev
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
index c6eb27d46cb0..15119584473c 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -198,8 +198,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct lis3lv02d *lis3 = i2c_get_clientdata(client);
- if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+ /* Turn on for wakeup if turned off by runtime suspend */
+ if (lis3->pdata && lis3->pdata->wakeup_flags) {
+ if (pm_runtime_suspended(dev))
+ lis3lv02d_poweron(lis3);
+ /* For non wakeup turn off if not already turned off by runtime suspend */
+ } else if (!pm_runtime_suspended(dev))
lis3lv02d_poweroff(lis3);
+
return 0;
}
@@ -208,13 +214,12 @@ static int lis3lv02d_i2c_resume(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct lis3lv02d *lis3 = i2c_get_clientdata(client);
- /*
- * pm_runtime documentation says that devices should always
- * be powered on at resume. Pm_runtime turns them off after system
- * wide resume is complete.
- */
- if (!lis3->pdata || !lis3->pdata->wakeup_flags ||
- pm_runtime_suspended(dev))
+ /* Turn back off if turned on for wakeup and runtime suspended*/
+ if (lis3->pdata && lis3->pdata->wakeup_flags) {
+ if (pm_runtime_suspended(dev))
+ lis3lv02d_poweroff(lis3);
+ /* For non wakeup turn back on if not runtime suspended */
+ } else if (!pm_runtime_suspended(dev))
lis3lv02d_poweron(lis3);
return 0;
--
2.43.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 51d31149a88b5c5a8d2d33f06df93f6187a25b4c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030406-ambiguity-strict-2ed2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
51d31149a88b ("ceph: switch to corrected encoding of max_xattr_size in mdsmap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51d31149a88b5c5a8d2d33f06df93f6187a25b4c Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Mon, 19 Feb 2024 13:14:32 +0800
Subject: [PATCH] ceph: switch to corrected encoding of max_xattr_size in
mdsmap
The addition of bal_rank_mask with encoding version 17 was merged
into ceph.git in Oct 2022 and made it into v18.2.0 release normally.
A few months later, the much delayed addition of max_xattr_size got
merged, also with encoding version 17, placed before bal_rank_mask
in the encoding -- but it didn't make v18.2.0 release.
The way this ended up being resolved on the MDS side is that
bal_rank_mask will continue to be encoded in version 17 while
max_xattr_size is now encoded in version 18. This does mean that
older kernels will misdecode version 17, but this is also true for
v18.2.0 and v18.2.1 clients in userspace.
The best we can do is backport this adjustment -- see ceph.git
commit 78abfeaff27fee343fb664db633de5b221699a73 for details.
[ idryomov: changelog ]
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/64440
Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size")
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Patrick Donnelly <pdonnell(a)ibm.com>
Reviewed-by: Venky Shankar <vshankar(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index fae97c25ce58..8109aba66e02 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
ceph_decode_skip_8(p, end, bad_ext);
/* required_client_features */
ceph_decode_skip_set(p, end, 64, bad_ext);
+ /* bal_rank_mask */
+ ceph_decode_skip_string(p, end, bad_ext);
+ }
+ if (mdsmap_ev >= 18) {
ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
- } else {
- /* This forces the usage of the (sync) SETXATTR Op */
- m->m_max_xattr_size = 0;
}
bad_ext:
doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index 89f1931f1ba6..1f2171dd01bf 100644
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -27,7 +27,11 @@ struct ceph_mdsmap {
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u64 m_max_file_size;
- u64 m_max_xattr_size; /* maximum size for xattrs blob */
+ /*
+ * maximum size for xattrs blob.
+ * Zeroed by default to force the usage of the (sync) SETXATTR Op.
+ */
+ u64 m_max_xattr_size;
u32 m_max_mds; /* expected up:active mds number */
u32 m_num_active_mds; /* actual up:active mds number */
u32 possible_max_rank; /* possible max rank index */
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 51d31149a88b5c5a8d2d33f06df93f6187a25b4c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030405-life-despair-ad2e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
51d31149a88b ("ceph: switch to corrected encoding of max_xattr_size in mdsmap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51d31149a88b5c5a8d2d33f06df93f6187a25b4c Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Mon, 19 Feb 2024 13:14:32 +0800
Subject: [PATCH] ceph: switch to corrected encoding of max_xattr_size in
mdsmap
The addition of bal_rank_mask with encoding version 17 was merged
into ceph.git in Oct 2022 and made it into v18.2.0 release normally.
A few months later, the much delayed addition of max_xattr_size got
merged, also with encoding version 17, placed before bal_rank_mask
in the encoding -- but it didn't make v18.2.0 release.
The way this ended up being resolved on the MDS side is that
bal_rank_mask will continue to be encoded in version 17 while
max_xattr_size is now encoded in version 18. This does mean that
older kernels will misdecode version 17, but this is also true for
v18.2.0 and v18.2.1 clients in userspace.
The best we can do is backport this adjustment -- see ceph.git
commit 78abfeaff27fee343fb664db633de5b221699a73 for details.
[ idryomov: changelog ]
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/64440
Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size")
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Patrick Donnelly <pdonnell(a)ibm.com>
Reviewed-by: Venky Shankar <vshankar(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index fae97c25ce58..8109aba66e02 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
ceph_decode_skip_8(p, end, bad_ext);
/* required_client_features */
ceph_decode_skip_set(p, end, 64, bad_ext);
+ /* bal_rank_mask */
+ ceph_decode_skip_string(p, end, bad_ext);
+ }
+ if (mdsmap_ev >= 18) {
ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
- } else {
- /* This forces the usage of the (sync) SETXATTR Op */
- m->m_max_xattr_size = 0;
}
bad_ext:
doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index 89f1931f1ba6..1f2171dd01bf 100644
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -27,7 +27,11 @@ struct ceph_mdsmap {
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u64 m_max_file_size;
- u64 m_max_xattr_size; /* maximum size for xattrs blob */
+ /*
+ * maximum size for xattrs blob.
+ * Zeroed by default to force the usage of the (sync) SETXATTR Op.
+ */
+ u64 m_max_xattr_size;
u32 m_max_mds; /* expected up:active mds number */
u32 m_num_active_mds; /* actual up:active mds number */
u32 possible_max_rank; /* possible max rank index */
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a79f949a5ce1d45329d63742c2a995f2b47f9852
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030434-underling-helmet-8fbe@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Wed, 7 Feb 2024 14:47:32 -0500
Subject: [PATCH] dmaengine: fsl-edma: correct max_segment_size setting
Correcting the previous setting of 0x3fff to the actual value of 0x7fff.
Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and
utilization of FIELD_GET to obtain the accurate maximum value.
Cc: stable(a)vger.kernel.org
Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy")
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x a79f949a5ce1d45329d63742c2a995f2b47f9852
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030433-nickname-giblet-5b8d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Wed, 7 Feb 2024 14:47:32 -0500
Subject: [PATCH] dmaengine: fsl-edma: correct max_segment_size setting
Correcting the previous setting of 0x3fff to the actual value of 0x7fff.
Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and
utilization of FIELD_GET to obtain the accurate maximum value.
Cc: stable(a)vger.kernel.org
Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy")
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x a79f949a5ce1d45329d63742c2a995f2b47f9852
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030433-wand-unicorn-9af0@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Wed, 7 Feb 2024 14:47:32 -0500
Subject: [PATCH] dmaengine: fsl-edma: correct max_segment_size setting
Correcting the previous setting of 0x3fff to the actual value of 0x7fff.
Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and
utilization of FIELD_GET to obtain the accurate maximum value.
Cc: stable(a)vger.kernel.org
Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy")
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b979f2d50a099f3402418d7ff5f26c3952fb08bb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030455-jolly-catcall-c2e8@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
b979f2d50a09 ("soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free")
2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
f86955f2b1ff ("soc: qcom: pmic_glink: fix connector type to be DisplayPort")
5692aeea5bcb ("soc: qcom: pmic: Fix resource leaks in a device_for_each_child_node() loop")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:25 +0100
Subject: [PATCH] soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free
A recent DRM series purporting to simplify support for "transparent
bridges" and handling of probe deferrals ironically exposed a
use-after-free issue on pmic_glink_altmode probe deferral.
This has manifested itself as the display subsystem occasionally failing
to initialise and NULL-pointer dereferences during boot of machines like
the Lenovo ThinkPad X13s.
Specifically, the dp-hpd bridge is currently registered before all
resources have been acquired which means that it can also be
deregistered on probe deferrals.
In the meantime there is a race window where the new aux bridge driver
(or PHY driver previously) may have looked up the dp-hpd bridge and
stored a (non-reference-counted) pointer to the bridge which is about to
be deallocated.
When the display controller is later initialised, this triggers a
use-after-free when attaching the bridges:
dp -> aux -> dp-hpd (freed)
which may, for example, result in the freed bridge failing to attach:
[drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16
or a NULL-pointer dereference:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
...
Call trace:
drm_bridge_attach+0x70/0x1a8 [drm]
drm_aux_bridge_attach+0x24/0x38 [aux_bridge]
drm_bridge_attach+0x80/0x1a8 [drm]
dp_bridge_init+0xa8/0x15c [msm]
msm_dp_modeset_init+0x28/0xc4 [msm]
The DRM bridge implementation is clearly fragile and implicitly built on
the assumption that bridges may never go away. In this case, the fix is
to move the bridge registration in the pmic_glink_altmode driver to
after all resources have been looked up.
Incidentally, with the new dp-hpd bridge implementation, which registers
child devices, this is also a requirement due to a long-standing issue
in driver core that can otherwise lead to a probe deferral loop (see
commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")).
[DB: slightly fixed commit message by adding the word 'commit']
Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support")
Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
Cc: <stable(a)vger.kernel.org> # 6.3
Cc: Bjorn Andersson <andersson(a)kernel.org>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+l…
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index 5fcd0fdd2faa..b3808fc24c69 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
struct work_struct work;
- struct device *bridge;
+ struct auxiliary_device *bridge;
enum typec_orientation orientation;
u16 svid;
@@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
else
pmic_glink_altmode_enable_usb(altmode, alt_port);
- drm_aux_hpd_bridge_notify(alt_port->bridge,
+ drm_aux_hpd_bridge_notify(&alt_port->bridge->dev,
alt_port->hpd_state ?
connector_status_connected :
connector_status_disconnected);
@@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
alt_port->index = port;
INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
- alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+ alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode));
if (IS_ERR(alt_port->bridge)) {
fwnode_handle_put(fwnode);
return PTR_ERR(alt_port->bridge);
@@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
}
}
+ for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) {
+ alt_port = &altmode->ports[port];
+ if (!alt_port->bridge)
+ continue;
+
+ ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge);
+ if (ret)
+ return ret;
+ }
+
altmode->client = devm_pmic_glink_register_client(dev,
altmode->owner_id,
pmic_glink_altmode_callback,
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x b979f2d50a099f3402418d7ff5f26c3952fb08bb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030452-unlatch-jailer-3f13@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
b979f2d50a09 ("soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free")
2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:25 +0100
Subject: [PATCH] soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free
A recent DRM series purporting to simplify support for "transparent
bridges" and handling of probe deferrals ironically exposed a
use-after-free issue on pmic_glink_altmode probe deferral.
This has manifested itself as the display subsystem occasionally failing
to initialise and NULL-pointer dereferences during boot of machines like
the Lenovo ThinkPad X13s.
Specifically, the dp-hpd bridge is currently registered before all
resources have been acquired which means that it can also be
deregistered on probe deferrals.
In the meantime there is a race window where the new aux bridge driver
(or PHY driver previously) may have looked up the dp-hpd bridge and
stored a (non-reference-counted) pointer to the bridge which is about to
be deallocated.
When the display controller is later initialised, this triggers a
use-after-free when attaching the bridges:
dp -> aux -> dp-hpd (freed)
which may, for example, result in the freed bridge failing to attach:
[drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16
or a NULL-pointer dereference:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
...
Call trace:
drm_bridge_attach+0x70/0x1a8 [drm]
drm_aux_bridge_attach+0x24/0x38 [aux_bridge]
drm_bridge_attach+0x80/0x1a8 [drm]
dp_bridge_init+0xa8/0x15c [msm]
msm_dp_modeset_init+0x28/0xc4 [msm]
The DRM bridge implementation is clearly fragile and implicitly built on
the assumption that bridges may never go away. In this case, the fix is
to move the bridge registration in the pmic_glink_altmode driver to
after all resources have been looked up.
Incidentally, with the new dp-hpd bridge implementation, which registers
child devices, this is also a requirement due to a long-standing issue
in driver core that can otherwise lead to a probe deferral loop (see
commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")).
[DB: slightly fixed commit message by adding the word 'commit']
Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support")
Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
Cc: <stable(a)vger.kernel.org> # 6.3
Cc: Bjorn Andersson <andersson(a)kernel.org>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+l…
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index 5fcd0fdd2faa..b3808fc24c69 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
struct work_struct work;
- struct device *bridge;
+ struct auxiliary_device *bridge;
enum typec_orientation orientation;
u16 svid;
@@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
else
pmic_glink_altmode_enable_usb(altmode, alt_port);
- drm_aux_hpd_bridge_notify(alt_port->bridge,
+ drm_aux_hpd_bridge_notify(&alt_port->bridge->dev,
alt_port->hpd_state ?
connector_status_connected :
connector_status_disconnected);
@@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
alt_port->index = port;
INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
- alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+ alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode));
if (IS_ERR(alt_port->bridge)) {
fwnode_handle_put(fwnode);
return PTR_ERR(alt_port->bridge);
@@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
}
}
+ for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) {
+ alt_port = &altmode->ports[port];
+ if (!alt_port->bridge)
+ continue;
+
+ ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge);
+ if (ret)
+ return ret;
+ }
+
altmode->client = devm_pmic_glink_register_client(dev,
altmode->owner_id,
pmic_glink_altmode_callback,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e2b54eaf28df0c978626c9736b94f003b523b451
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030455-ensure-outward-f8cc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e2b54eaf28df ("btrfs: fix double free of anonymous device after snapshot creation failure")
e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
caae78e03234 ("btrfs: move common inode creation code into btrfs_create_new_inode()")
3538d68dbd97 ("btrfs: reserve correct number of items for inode creation")
5f465bf1f15a ("btrfs: factor out common part of btrfs_{mknod,create,mkdir}()")
a1fd0c35ffe3 ("btrfs: allocate inode outside of btrfs_new_inode()")
305eaac00911 ("btrfs: set inode flags earlier in btrfs_new_inode()")
6437d4583531 ("btrfs: move btrfs_get_free_objectid() call into btrfs_new_inode()")
23c24ef8e418 ("btrfs: don't pass parent objectid to btrfs_new_inode() explicitly")
75b993cf4305 ("btrfs: remove unused mnt_userns parameter from __btrfs_set_acl")
c51fa51190f9 ("btrfs: remove unnecessary set_nlink() in btrfs_create_subvol_root()")
6d831f7ef9f0 ("btrfs: remove unnecessary inode_set_bytes(0) call")
9124e15f2798 ("btrfs: remove unnecessary btrfs_i_size_write(0) calls")
81512e89f2b7 ("btrfs: get rid of btrfs_add_nondir()")
2256e901f5bd ("btrfs: fix anon_dev leak in create_subvol()")
c16218714307 ("btrfs: reserve correct number of items for rename")
1b58ae0e4d3e ("btrfs: skip transaction commit after failure to create subvolume")
33fab972497a ("btrfs: fix double free of anon_dev after failure to create subvolume")
b7ef5f3a6f37 ("btrfs: loop only once over data sizes array when inserting an item batch")
086dcbfa50d3 ("btrfs: insert items in batches when logging a directory when possible")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e2b54eaf28df0c978626c9736b94f003b523b451 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 23 Feb 2024 16:38:43 +0000
Subject: [PATCH] btrfs: fix double free of anonymous device after snapshot
creation failure
When creating a snapshot we may do a double free of an anonymous device
in case there's an error committing the transaction. The second free may
result in freeing an anonymous device number that was allocated by some
other subsystem in the kernel or another btrfs filesystem.
The steps that lead to this:
1) At ioctl.c:create_snapshot() we allocate an anonymous device number
and assign it to pending_snapshot->anon_dev;
2) Then we call btrfs_commit_transaction() and end up at
transaction.c:create_pending_snapshot();
3) There we call btrfs_get_new_fs_root() and pass it the anonymous device
number stored in pending_snapshot->anon_dev;
4) btrfs_get_new_fs_root() frees that anonymous device number because
btrfs_lookup_fs_root() returned a root - someone else did a lookup
of the new root already, which could some task doing backref walking;
5) After that some error happens in the transaction commit path, and at
ioctl.c:create_snapshot() we jump to the 'fail' label, and after
that we free again the same anonymous device number, which in the
meanwhile may have been reallocated somewhere else, because
pending_snapshot->anon_dev still has the same value as in step 1.
Recently syzbot ran into this and reported the following trace:
------------[ cut here ]------------
ida_free called for id=51 which is not allocated.
WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525
Modules linked in:
CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525
Code: 10 42 80 3c 28 (...)
RSP: 0018:ffffc90015a67300 EFLAGS: 00010246
RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000
RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000
RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4
R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246
R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246
FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0
Call Trace:
<TASK>
btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346
create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837
create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931
btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404
create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848
btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998
btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044
__btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306
btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393
btrfs_ioctl+0xa74/0xd40
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:871 [inline]
__se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857
do_syscall_64+0xfb/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
RIP: 0033:0x7fca3e67dda9
Code: 28 00 00 00 (...)
RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9
RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003
RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658
</TASK>
Where we get an explicit message where we attempt to free an anonymous
device number that is not currently allocated. It happens in a different
code path from the example below, at btrfs_get_root_ref(), so this change
may not fix the case triggered by syzbot.
To fix at least the code path from the example above, change
btrfs_get_root_ref() and its callers to receive a dev_t pointer argument
for the anonymous device number, so that in case it frees the number, it
also resets it to 0, so that up in the call chain we don't attempt to do
the double free.
CC: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/
Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e71ef97d0a7c..c843563914ca 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
- * pass 0 for new allocation.
+ * pass NULL for a new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev,
+ u64 objectid, dev_t *anon_dev,
bool check_ref)
{
struct btrfs_root *root;
@@ -1342,9 +1342,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* that common but still possible. In that case, we just need
* to free the anon_dev.
*/
- if (unlikely(anon_dev)) {
- free_anon_bdev(anon_dev);
- anon_dev = 0;
+ if (unlikely(anon_dev && *anon_dev)) {
+ free_anon_bdev(*anon_dev);
+ *anon_dev = 0;
}
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1366,7 +1366,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
goto fail;
}
- ret = btrfs_init_fs_root(root, anon_dev);
+ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
if (ret)
goto fail;
@@ -1402,7 +1402,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
- if (anon_dev)
+ if (anon_dev && *anon_dev)
root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
@@ -1418,7 +1418,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}
/*
@@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
* the anonymous block device id
*
* @objectid: tree objectid
- * @anon_dev: if zero, allocate a new anonymous block device or use the
- * parameter value
+ * @anon_dev: if NULL, allocate a new anonymous block device or use the
+ * parameter value if not NULL
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev)
+ u64 objectid, dev_t *anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 9413726b329b..eb3473d1c1ac 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev);
+ u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fb2323b323bf..b004e3b75311 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
free_extent_buffer(leaf);
leaf = NULL;
- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c52807d97efa..bf8e64c766b6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
pending->snap = NULL;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e2b54eaf28df0c978626c9736b94f003b523b451
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030451-curtly-phoney-bfc5@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e2b54eaf28df ("btrfs: fix double free of anonymous device after snapshot creation failure")
e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
caae78e03234 ("btrfs: move common inode creation code into btrfs_create_new_inode()")
3538d68dbd97 ("btrfs: reserve correct number of items for inode creation")
5f465bf1f15a ("btrfs: factor out common part of btrfs_{mknod,create,mkdir}()")
a1fd0c35ffe3 ("btrfs: allocate inode outside of btrfs_new_inode()")
305eaac00911 ("btrfs: set inode flags earlier in btrfs_new_inode()")
6437d4583531 ("btrfs: move btrfs_get_free_objectid() call into btrfs_new_inode()")
23c24ef8e418 ("btrfs: don't pass parent objectid to btrfs_new_inode() explicitly")
75b993cf4305 ("btrfs: remove unused mnt_userns parameter from __btrfs_set_acl")
c51fa51190f9 ("btrfs: remove unnecessary set_nlink() in btrfs_create_subvol_root()")
6d831f7ef9f0 ("btrfs: remove unnecessary inode_set_bytes(0) call")
9124e15f2798 ("btrfs: remove unnecessary btrfs_i_size_write(0) calls")
81512e89f2b7 ("btrfs: get rid of btrfs_add_nondir()")
2256e901f5bd ("btrfs: fix anon_dev leak in create_subvol()")
c16218714307 ("btrfs: reserve correct number of items for rename")
1b58ae0e4d3e ("btrfs: skip transaction commit after failure to create subvolume")
33fab972497a ("btrfs: fix double free of anon_dev after failure to create subvolume")
b7ef5f3a6f37 ("btrfs: loop only once over data sizes array when inserting an item batch")
086dcbfa50d3 ("btrfs: insert items in batches when logging a directory when possible")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e2b54eaf28df0c978626c9736b94f003b523b451 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 23 Feb 2024 16:38:43 +0000
Subject: [PATCH] btrfs: fix double free of anonymous device after snapshot
creation failure
When creating a snapshot we may do a double free of an anonymous device
in case there's an error committing the transaction. The second free may
result in freeing an anonymous device number that was allocated by some
other subsystem in the kernel or another btrfs filesystem.
The steps that lead to this:
1) At ioctl.c:create_snapshot() we allocate an anonymous device number
and assign it to pending_snapshot->anon_dev;
2) Then we call btrfs_commit_transaction() and end up at
transaction.c:create_pending_snapshot();
3) There we call btrfs_get_new_fs_root() and pass it the anonymous device
number stored in pending_snapshot->anon_dev;
4) btrfs_get_new_fs_root() frees that anonymous device number because
btrfs_lookup_fs_root() returned a root - someone else did a lookup
of the new root already, which could some task doing backref walking;
5) After that some error happens in the transaction commit path, and at
ioctl.c:create_snapshot() we jump to the 'fail' label, and after
that we free again the same anonymous device number, which in the
meanwhile may have been reallocated somewhere else, because
pending_snapshot->anon_dev still has the same value as in step 1.
Recently syzbot ran into this and reported the following trace:
------------[ cut here ]------------
ida_free called for id=51 which is not allocated.
WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525
Modules linked in:
CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525
Code: 10 42 80 3c 28 (...)
RSP: 0018:ffffc90015a67300 EFLAGS: 00010246
RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000
RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000
RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4
R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246
R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246
FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0
Call Trace:
<TASK>
btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346
create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837
create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931
btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404
create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848
btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998
btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044
__btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306
btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393
btrfs_ioctl+0xa74/0xd40
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:871 [inline]
__se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857
do_syscall_64+0xfb/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
RIP: 0033:0x7fca3e67dda9
Code: 28 00 00 00 (...)
RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9
RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003
RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658
</TASK>
Where we get an explicit message where we attempt to free an anonymous
device number that is not currently allocated. It happens in a different
code path from the example below, at btrfs_get_root_ref(), so this change
may not fix the case triggered by syzbot.
To fix at least the code path from the example above, change
btrfs_get_root_ref() and its callers to receive a dev_t pointer argument
for the anonymous device number, so that in case it frees the number, it
also resets it to 0, so that up in the call chain we don't attempt to do
the double free.
CC: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/
Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e71ef97d0a7c..c843563914ca 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
- * pass 0 for new allocation.
+ * pass NULL for a new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev,
+ u64 objectid, dev_t *anon_dev,
bool check_ref)
{
struct btrfs_root *root;
@@ -1342,9 +1342,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* that common but still possible. In that case, we just need
* to free the anon_dev.
*/
- if (unlikely(anon_dev)) {
- free_anon_bdev(anon_dev);
- anon_dev = 0;
+ if (unlikely(anon_dev && *anon_dev)) {
+ free_anon_bdev(*anon_dev);
+ *anon_dev = 0;
}
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1366,7 +1366,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
goto fail;
}
- ret = btrfs_init_fs_root(root, anon_dev);
+ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
if (ret)
goto fail;
@@ -1402,7 +1402,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
- if (anon_dev)
+ if (anon_dev && *anon_dev)
root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
@@ -1418,7 +1418,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}
/*
@@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
* the anonymous block device id
*
* @objectid: tree objectid
- * @anon_dev: if zero, allocate a new anonymous block device or use the
- * parameter value
+ * @anon_dev: if NULL, allocate a new anonymous block device or use the
+ * parameter value if not NULL
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev)
+ u64 objectid, dev_t *anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 9413726b329b..eb3473d1c1ac 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev);
+ u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fb2323b323bf..b004e3b75311 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
free_extent_buffer(leaf);
leaf = NULL;
- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c52807d97efa..bf8e64c766b6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
pending->snap = NULL;
v2:
- Dropped already backported patch "x86/bugs: Add asm helpers for
executing VERW"
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
- Booted fine with KASLR and KPTI enabled.
- Rebased to v6.6.20
v1: https://lore.kernel.org/r/20240226-delay-verw-backport-6-6-y-v1-0-aa17b2922…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 1/6 includes a minor fix that is queued for upstream:
https://lore.kernel.org/lkml/170899674562.398.6398007479766564897.tip-bot2@…
Patch 2/6 needed a conflict to be resolved for the hunk
swapgs_restore_regs_and_return_to_usermode.
This is only compile and boot tested on qemu.
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
Pawan Gupta (4):
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/arch/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 12 ------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 20 +++++++++++++++----
11 files changed, 75 insertions(+), 45 deletions(-)
---
base-commit: 9b4a8eac17f0d840729384618b4b1e876233026c
change-id: 20240226-delay-verw-backport-6-6-y-2cda3298e600
Best regards,
--
Thanks,
Pawan
This is a note to let you know that I've just added the patch titled
iio: accel: adxl367: fix I2C FIFO data register
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 11dadb631007324c7a8bcb2650eda88ed2b9eed0 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur(a)gmail.com>
Date: Wed, 7 Feb 2024 05:36:51 +0200
Subject: iio: accel: adxl367: fix I2C FIFO data register
As specified in the datasheet, the I2C FIFO data register is
0x18, not 0x42. 0x42 was used by mistake when adapting the
ADXL372 driver.
Fix this mistake.
Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver")
Signed-off-by: Cosmin Tanislav <demonsingur(a)gmail.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://lore.kernel.org/r/20240207033657.206171-2-demonsingur@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/accel/adxl367_i2c.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c
index b595fe94f3a3..62c74bdc0d77 100644
--- a/drivers/iio/accel/adxl367_i2c.c
+++ b/drivers/iio/accel/adxl367_i2c.c
@@ -11,7 +11,7 @@
#include "adxl367.h"
-#define ADXL367_I2C_FIFO_DATA 0x42
+#define ADXL367_I2C_FIFO_DATA 0x18
struct adxl367_i2c_state {
struct regmap *regmap;
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: accel: adxl367: fix DEVID read after reset
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 1b926914bbe4e30cb32f268893ef7d82a85275b8 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur(a)gmail.com>
Date: Wed, 7 Feb 2024 05:36:50 +0200
Subject: iio: accel: adxl367: fix DEVID read after reset
regmap_read_poll_timeout() will not sleep before reading,
causing the first read to return -ENXIO on I2C, since the
chip does not respond to it while it is being reset.
The datasheet specifies that a soft reset operation has a
latency of 7.5ms.
Add a 15ms sleep between reset and reading the DEVID register,
and switch to a simple regmap_read() call.
Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver")
Signed-off-by: Cosmin Tanislav <demonsingur(a)gmail.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://lore.kernel.org/r/20240207033657.206171-1-demonsingur@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/accel/adxl367.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c
index 90b7ae6d42b7..484fe2e9fb17 100644
--- a/drivers/iio/accel/adxl367.c
+++ b/drivers/iio/accel/adxl367.c
@@ -1429,9 +1429,11 @@ static int adxl367_verify_devid(struct adxl367_state *st)
unsigned int val;
int ret;
- ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val,
- val == ADXL367_DEVID_AD, 1000, 10000);
+ ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val);
if (ret)
+ return dev_err_probe(st->dev, ret, "Failed to read dev id\n");
+
+ if (val != ADXL367_DEVID_AD)
return dev_err_probe(st->dev, -ENODEV,
"Invalid dev id 0x%02X, expected 0x%02X\n",
val, ADXL367_DEVID_AD);
@@ -1510,6 +1512,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops,
if (ret)
return ret;
+ fsleep(15000);
+
ret = adxl367_verify_devid(st);
if (ret)
return ret;
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: pressure: Fixes BMP38x and BMP390 SPI support
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From a9dd9ba323114f366eb07f1d9630822f8df6cbb2 Mon Sep 17 00:00:00 2001
From: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Date: Mon, 19 Feb 2024 20:13:59 +0100
Subject: iio: pressure: Fixes BMP38x and BMP390 SPI support
According to the datasheet of BMP38x and BMP390 devices, for an SPI
read operation the first byte that is returned needs to be dropped,
and the rest of the bytes are the actual data returned from the
sensor.
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Fixes: 8d329309184d ("iio: pressure: bmp280: Add support for BMP380 sensor family")
Signed-off-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Acked-by: Angel Iglesias <ang.iglesiasg(a)gmail.com>
Link: https://lore.kernel.org/r/20240219191359.18367-1-vassilisamir@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-spi.c | 50 ++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index e8a5fed07e88..a444d4b2978b 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -4,6 +4,7 @@
*
* Inspired by the older BMP085 driver drivers/misc/bmp085-spi.c
*/
+#include <linux/bits.h>
#include <linux/module.h>
#include <linux/spi/spi.h>
#include <linux/err.h>
@@ -35,6 +36,34 @@ static int bmp280_regmap_spi_read(void *context, const void *reg,
return spi_write_then_read(spi, reg, reg_size, val, val_size);
}
+static int bmp380_regmap_spi_read(void *context, const void *reg,
+ size_t reg_size, void *val, size_t val_size)
+{
+ struct spi_device *spi = to_spi_device(context);
+ u8 rx_buf[4];
+ ssize_t status;
+
+ /*
+ * Maximum number of consecutive bytes read for a temperature or
+ * pressure measurement is 3.
+ */
+ if (val_size > 3)
+ return -EINVAL;
+
+ /*
+ * According to the BMP3xx datasheets, for a basic SPI read opertion,
+ * the first byte needs to be dropped and the rest are the requested
+ * data.
+ */
+ status = spi_write_then_read(spi, reg, 1, rx_buf, val_size + 1);
+ if (status)
+ return status;
+
+ memcpy(val, rx_buf + 1, val_size);
+
+ return 0;
+}
+
static struct regmap_bus bmp280_regmap_bus = {
.write = bmp280_regmap_spi_write,
.read = bmp280_regmap_spi_read,
@@ -42,10 +71,19 @@ static struct regmap_bus bmp280_regmap_bus = {
.val_format_endian_default = REGMAP_ENDIAN_BIG,
};
+static struct regmap_bus bmp380_regmap_bus = {
+ .write = bmp280_regmap_spi_write,
+ .read = bmp380_regmap_spi_read,
+ .read_flag_mask = BIT(7),
+ .reg_format_endian_default = REGMAP_ENDIAN_BIG,
+ .val_format_endian_default = REGMAP_ENDIAN_BIG,
+};
+
static int bmp280_spi_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
const struct bmp280_chip_info *chip_info;
+ struct regmap_bus *bmp_regmap_bus;
struct regmap *regmap;
int ret;
@@ -58,8 +96,18 @@ static int bmp280_spi_probe(struct spi_device *spi)
chip_info = spi_get_device_match_data(spi);
+ switch (chip_info->chip_id[0]) {
+ case BMP380_CHIP_ID:
+ case BMP390_CHIP_ID:
+ bmp_regmap_bus = &bmp380_regmap_bus;
+ break;
+ default:
+ bmp_regmap_bus = &bmp280_regmap_bus;
+ break;
+ }
+
regmap = devm_regmap_init(&spi->dev,
- &bmp280_regmap_bus,
+ bmp_regmap_bus,
&spi->dev,
chip_info->regmap_config);
if (IS_ERR(regmap)) {
--
2.44.0
When userspace opens the console, we call set_termios() passing a
termios with the console's configured baud rate. Currently this causes
dw8250_set_termios() to disable and then re-enable the UART clock at
the same frequency as it was originally. This can cause corruption
of any concurrent console output. Fix it by skipping the reclocking
if we are already at the correct rate.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/I2e3761d239cbf29ed41412e5338f30bff…
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: stable(a)vger.kernel.org
---
drivers/tty/serial/8250/8250_dw.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
--
2.44.0.rc1.240.g4c46232300-goog
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: use mask for write_enable bitfield
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 5b4e4b72034f85f7a0cdd147d3d729c5a22c8764 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:22 +0100
Subject: iio: adc: rockchip_saradc: use mask for write_enable bitfield
Some of the registers on the SARADCv2 have bits write protected except
if another bit is set. This is usually done by having the lowest 16 bits
store the data to write and the highest 16 bits specify which of the 16
lowest bits should have their value written to the hardware block.
The write_enable mask for the channel selection was incorrect because it
was just the value shifted by 16 bits, which means it would only ever
write bits and never clear them. So e.g. if someone starts a conversion
on channel 5, the lowest 4 bits would be 0x5, then starts a conversion
on channel 0, it would still be 5.
Instead of shifting the value by 16 as the mask, let's use the OR'ing of
the appropriate masks shifted by 16.
Note that this is not an issue currently because the only SARADCv2
currently supported has a reset defined in its Device Tree, that reset
resets the SARADC controller before starting a conversion on a channel.
However, this reset is handled as optional by the probe function and
thus proper masking should be used in the event an SARADCv2 without a
reset ever makes it upstream.
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-2-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 2da8d6f3241a..1c0042fbbb54 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -102,12 +102,12 @@ static void rockchip_saradc_start_v2(struct rockchip_saradc *info, int chn)
writel_relaxed(0xc, info->regs + SARADC_T_DAS_SOC);
writel_relaxed(0x20, info->regs + SARADC_T_PD_SOC);
val = FIELD_PREP(SARADC2_EN_END_INT, 1);
- val |= val << 16;
+ val |= SARADC2_EN_END_INT << 16;
writel_relaxed(val, info->regs + SARADC2_END_INT_EN);
val = FIELD_PREP(SARADC2_START, 1) |
FIELD_PREP(SARADC2_SINGLE_MODE, 1) |
FIELD_PREP(SARADC2_CONV_CHANNELS, chn);
- val |= val << 16;
+ val |= (SARADC2_START | SARADC2_SINGLE_MODE | SARADC2_CONV_CHANNELS) << 16;
writel(val, info->regs + SARADC2_CONV_CON);
}
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From b0a4546df24a4f8c59b2d05ae141bd70ceccc386 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:21 +0100
Subject: iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
The SARADCv2 on RK3588 (the only SoC currently supported that has an
SARADCv2) selects the channel through the channel_sel bitfield which is
the 4 lowest bits, therefore the mask should be GENMASK(3, 0) and not
GENMASK(15, 0).
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-1-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index dd94667a623b..2da8d6f3241a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -52,7 +52,7 @@
#define SARADC2_START BIT(4)
#define SARADC2_SINGLE_MODE BIT(5)
-#define SARADC2_CONV_CHANNELS GENMASK(15, 0)
+#define SARADC2_CONV_CHANNELS GENMASK(3, 0)
struct rockchip_saradc;
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: use mask for write_enable bitfield
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From 5b4e4b72034f85f7a0cdd147d3d729c5a22c8764 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:22 +0100
Subject: iio: adc: rockchip_saradc: use mask for write_enable bitfield
Some of the registers on the SARADCv2 have bits write protected except
if another bit is set. This is usually done by having the lowest 16 bits
store the data to write and the highest 16 bits specify which of the 16
lowest bits should have their value written to the hardware block.
The write_enable mask for the channel selection was incorrect because it
was just the value shifted by 16 bits, which means it would only ever
write bits and never clear them. So e.g. if someone starts a conversion
on channel 5, the lowest 4 bits would be 0x5, then starts a conversion
on channel 0, it would still be 5.
Instead of shifting the value by 16 as the mask, let's use the OR'ing of
the appropriate masks shifted by 16.
Note that this is not an issue currently because the only SARADCv2
currently supported has a reset defined in its Device Tree, that reset
resets the SARADC controller before starting a conversion on a channel.
However, this reset is handled as optional by the probe function and
thus proper masking should be used in the event an SARADCv2 without a
reset ever makes it upstream.
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-2-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 2da8d6f3241a..1c0042fbbb54 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -102,12 +102,12 @@ static void rockchip_saradc_start_v2(struct rockchip_saradc *info, int chn)
writel_relaxed(0xc, info->regs + SARADC_T_DAS_SOC);
writel_relaxed(0x20, info->regs + SARADC_T_PD_SOC);
val = FIELD_PREP(SARADC2_EN_END_INT, 1);
- val |= val << 16;
+ val |= SARADC2_EN_END_INT << 16;
writel_relaxed(val, info->regs + SARADC2_END_INT_EN);
val = FIELD_PREP(SARADC2_START, 1) |
FIELD_PREP(SARADC2_SINGLE_MODE, 1) |
FIELD_PREP(SARADC2_CONV_CHANNELS, chn);
- val |= val << 16;
+ val |= (SARADC2_START | SARADC2_SINGLE_MODE | SARADC2_CONV_CHANNELS) << 16;
writel(val, info->regs + SARADC2_CONV_CON);
}
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From b0a4546df24a4f8c59b2d05ae141bd70ceccc386 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:21 +0100
Subject: iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
The SARADCv2 on RK3588 (the only SoC currently supported that has an
SARADCv2) selects the channel through the channel_sel bitfield which is
the 4 lowest bits, therefore the mask should be GENMASK(3, 0) and not
GENMASK(15, 0).
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-1-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index dd94667a623b..2da8d6f3241a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -52,7 +52,7 @@
#define SARADC2_START BIT(4)
#define SARADC2_SINGLE_MODE BIT(5)
-#define SARADC2_CONV_CHANNELS GENMASK(15, 0)
+#define SARADC2_CONV_CHANNELS GENMASK(3, 0)
struct rockchip_saradc;
--
2.44.0
The problem seems to be in fs/ntfs3/frecord.c. Original messages were
in German, so here's my translation (original at the end of the post):
[...]
CC lib/zstd/common/zstd_common.o
CC arch/x86/kernel/cpu/feat_ctl.o
fs/ntfs3/frecord.c: In Funktion »ni_read_frame«:
fs/ntfs3/frecord.c:2460:16: Error: variable >>i_size<< is not used"
[-Werror=unused-variable]
2460 | loff_t i_size = i_size_read(&ni->vfs_inode);
| ^~~~~~
AR lib/zstd/built-in.a
[...]
cc1: All warnings are treated as errors
make[4]: *** [scripts/Makefile.build:243: fs/ntfs3/frecord.o] error 1
make[3]: *** [scripts/Makefile.build:480: fs/ntfs3] error 2
make[3]: *** Waiting for not yet finished processes....
[...]
Let me know if you need further information. Thanks.
Rainer
--
Original messages:
[...]
CC lib/zstd/common/zstd_common.o
CC arch/x86/kernel/cpu/feat_ctl.o
fs/ntfs3/frecord.c: In Funktion »ni_read_frame«:
fs/ntfs3/frecord.c:2460:16: Fehler: Variable »i_size« wird nicht
verwendet [-Werror=unused-variable]
2460 | loff_t i_size = i_size_read(&ni->vfs_inode);
| ^~~~~~
AR lib/zstd/built-in.a
CC lib/bug.o
CC fs/udf/inode.o
CC arch/x86/kernel/cpu/intel.o
CC arch/x86/kernel/process_64.o
CC kernel/events/callchain.o
CC lib/buildid.o
cc1: Alle Warnungen werden als Fehler behandelt
make[4]: *** [scripts/Makefile.build:243: fs/ntfs3/frecord.o] Fehler 1
make[3]: *** [scripts/Makefile.build:480: fs/ntfs3] Fehler 2
make[3]: *** Es wird auf noch nicht beendete Prozesse gewartet....
CC kernel/events/hw_breakpoint.o
CC lib/clz_tab.o
CC lib/cmdline.o
CC fs/udf/lowlevel.o
[...]
--
The truth always turns out to be simpler than you thought.
Richard Feynman
Unused USB ports may have bogus location data in ACPI PLD tables.
This causes port peering failures as these unused USB2 and USB3 ports
location may match.
Due to these failures the driver prints a
"usb: port power management may be unreliable" warning, and
unnecessarily blocks port power off during runtime suspend.
This was debugged on a couple DELL systems where the unused ports
all returned zeroes in their location data.
Similar bugreports exist for other systems.
Don't try to peer or match ports that have connect type set to
USB_PORT_NOT_USED.
Fixes: 3bfd659baec8 ("usb: find internal hub tier mismatch via acpi")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218465
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218486
Tested-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Link: https://lore.kernel.org/linux-usb/5406d361-f5b7-4309-b0e6-8c94408f7d75@molg…
Cc: stable(a)vger.kernel.org # v3.16+
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
v1 -> v2
- Improve commit message
- Add missing Fixes, Closes and Link tags
- send this patch separately for easier picking to usb-linus
drivers/usb/core/port.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index c628c1abc907..4d63496f98b6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -573,7 +573,7 @@ static int match_location(struct usb_device *peer_hdev, void *p)
struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev);
struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent);
- if (!peer_hub)
+ if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED)
return 0;
hcd = bus_to_hcd(hdev->bus);
@@ -584,7 +584,8 @@ static int match_location(struct usb_device *peer_hdev, void *p)
for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) {
peer = peer_hub->ports[port1 - 1];
- if (peer && peer->location == port_dev->location) {
+ if (peer && peer->connect_type != USB_PORT_NOT_USED &&
+ peer->location == port_dev->location) {
link_peers_report(port_dev, peer);
return 1; /* done */
}
--
2.25.1
From: Christian Gmeiner <cgmeiner(a)igalia.com>
The hwdb selection logic as a feature that allows it to mark some fields
as 'don't care'. If we match with such a field we memcpy(..)
the current etnaviv_chip_identity into ident.
This step can overwrite some id values read from the GPU with the
'don't care' value.
Fix this issue by restoring the affected values after the memcpy(..).
As this is crucial for user space to know when this feature works as
expected increment the minor version too.
Fixes: 4078a1186dd3 ("drm/etnaviv: update hwdb selection logic")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Gmeiner <cgmeiner(a)igalia.com>
---
drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 6228ce603248..9a2965741dab 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -494,7 +494,7 @@ static const struct drm_driver etnaviv_drm_driver = {
.desc = "etnaviv DRM",
.date = "20151214",
.major = 1,
- .minor = 3,
+ .minor = 4,
};
/*
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index 67201242438b..1e38d66702f1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -265,6 +265,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
{
struct etnaviv_chip_identity *ident = &gpu->identity;
+ const u32 product_id = ident->product_id;
+ const u32 customer_id = ident->customer_id;
+ const u32 eco_id = ident->eco_id;
int i;
for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) {
@@ -278,6 +281,17 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
etnaviv_chip_identities[i].eco_id == ~0U)) {
memcpy(ident, &etnaviv_chip_identities[i],
sizeof(*ident));
+
+ /* Restore some id values if ~0U aka 'don't care' is used. */
+ if (etnaviv_chip_identities[i].product_id == ~0U)
+ ident->product_id = product_id;
+
+ if (etnaviv_chip_identities[i].customer_id == ~0U)
+ ident->customer_id = customer_id;
+
+ if (etnaviv_chip_identities[i].eco_id == ~0U)
+ ident->eco_id = eco_id;
+
return true;
}
}
--
2.44.0
From: Christian Gmeiner <cgmeiner(a)igalia.com>
The hwdb selection logic as a feature that allows it to mark some fields
as 'don't care'. If we match with such a field we memcpy(..)
the current etnaviv_chip_identity into ident.
This step can overwrite some id values read from the GPU with the
'don't care' value.
Fix this issue by restoring the affected values after the memcpy(..).
As this is crucial for user space to know when this feature works as
expected increment the minor version too.
Fixes: 4078a1186dd3 ("drm/etnaviv: update hwdb selection logic")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Gmeiner <cgmeiner(a)igalia.com>
---
V1 -> V2: Fixed patch subject line and removed not needed if clauses.
drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 6228ce603248..9a2965741dab 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -494,7 +494,7 @@ static const struct drm_driver etnaviv_drm_driver = {
.desc = "etnaviv DRM",
.date = "20151214",
.major = 1,
- .minor = 3,
+ .minor = 4,
};
/*
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index 67201242438b..8665f2658d51 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -265,6 +265,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
{
struct etnaviv_chip_identity *ident = &gpu->identity;
+ const u32 product_id = ident->product_id;
+ const u32 customer_id = ident->customer_id;
+ const u32 eco_id = ident->eco_id;
int i;
for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) {
@@ -278,6 +281,12 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
etnaviv_chip_identities[i].eco_id == ~0U)) {
memcpy(ident, &etnaviv_chip_identities[i],
sizeof(*ident));
+
+ /* Restore some id values as ~0U aka 'don't care' might been used. */
+ ident->product_id = product_id;
+ ident->customer_id = customer_id;
+ ident->eco_id = eco_id;
+
return true;
}
}
--
2.44.0
psci_init_system_suspend() invokes suspend_set_ops() very early during
bootup even before kernel command line for mem_sleep_default is setup.
This leads to kernel command line mem_sleep_default=s2idle not working
as mem_sleep_current gets changed to deep via suspend_set_ops() and never
changes back to s2idle.
Set mem_sleep_current along with mem_sleep_default during kernel command
line setup as default suspend mode.
Fixes: faf7ec4a92c0 ("drivers: firmware: psci: add system suspend support")
CC: stable(a)vger.kernel.org # 5.4+
Signed-off-by: Maulik Shah <quic_mkshah(a)quicinc.com>
---
Changes in v2:
- Set mem_sleep_current during mem_sleep_default kernel command line setup
- Update commit message accordingly
- Retain Fixes: tag
- Link to v1: https://lore.kernel.org/r/20240219-suspend_ops_late_init-v1-1-6330ca9597fa@…
---
kernel/power/suspend.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 742eb26618cc..e3ae93bbcb9b 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -192,6 +192,7 @@ static int __init mem_sleep_default_setup(char *str)
if (mem_sleep_labels[state] &&
!strcmp(str, mem_sleep_labels[state])) {
mem_sleep_default = state;
+ mem_sleep_current = state;
break;
}
---
base-commit: d37e1e4c52bc60578969f391fb81f947c3e83118
change-id: 20240219-suspend_ops_late_init-27fb0b15baee
Best regards,
--
Maulik Shah <quic_mkshah(a)quicinc.com>
In case of a memory allocation failure in the volumes loop we can only
process the already allocated scan_eba and fm_eba array elements on the
error path - others are still uninitialized.
Found by Linux Verification Center (linuxtesting.org).
Fixes: 00abf3041590 ("UBI: Add self_check_eba()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
drivers/mtd/ubi/eba.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 8d1f0e05892c..6f5eadb1598d 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1557,6 +1557,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap,
GFP_KERNEL);
if (!fm_eba[i]) {
ret = -ENOMEM;
+ kfree(scan_eba[i]);
goto out_free;
}
@@ -1592,7 +1593,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap,
}
out_free:
- for (i = 0; i < num_volumes; i++) {
+ while (--i >= 0) {
if (!ubi->volumes[i])
continue;
--
2.43.2
From: Siddh Raman Pant <code(a)siddh.me>
[ Upstream commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 ]
The userspace can configure a loop using an ioctl call, wherein
a configuration of type loop_config is passed (see lo_ioctl()'s
case on line 1550 of drivers/block/loop.c). This proceeds to call
loop_configure() which in turn calls loop_set_status_from_info()
(see line 1050 of loop.c), passing &config->info which is of type
loop_info64*. This function then sets the appropriate values, like
the offset.
loop_device has lo_offset of type loff_t (see line 52 of loop.c),
which is typdef-chained to long long, whereas loop_info64 has
lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
The function directly copies offset from info to the device as
follows (See line 980 of loop.c):
lo->lo_offset = info->lo_offset;
This results in an overflow, which triggers a warning in iomap_iter()
due to a call to iomap_iter_done() which has:
WARN_ON_ONCE(iter->iomap.offset > iter->pos);
Thus, check for negative value during loop_set_status_from_info().
Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a2…
Reported-and-tested-by: syzbot+a8e049cd3abd342936b6(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Siddh Raman Pant <code(a)siddh.me>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Genjian Zhang <zhanggenjian(a)kylinos.cn>
---
drivers/block/loop.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0fefd21f0c71..c1caa3e2355f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1271,6 +1271,11 @@ loop_set_status_from_info(struct loop_device *lo,
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
+ /* loff_t vars have been assigned __u64 */
+ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
+ return -EOVERFLOW;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
--
2.25.1
The [ms]envcfg CSR was added in version 1.12 of the RISC-V privileged
ISA (aka S[ms]1p12). However, bits in this CSR are defined by several
other extensions which may be implemented separately from any particular
version of the privileged ISA (for example, some unrelated errata may
prevent an implementation from claiming conformance with Ss1p12). As a
result, Linux cannot simply use the privileged ISA version to determine
if the CSR is present. It must also check if any of these other
extensions are implemented. It also cannot probe the existence of the
CSR at runtime, because Linux does not require Sstrict, so (in the
absence of additional information) it cannot know if a CSR at that
address is [ms]envcfg or part of some non-conforming vendor extension.
Since there are several standard extensions that imply the existence of
the [ms]envcfg CSR, it becomes unwieldy to check for all of them
wherever the CSR is accessed. Instead, define a custom Xlinuxenvcfg ISA
extension bit that is implied by the other extensions and denotes that
the CSR exists as defined in the privileged ISA, containing at least one
of the fields common between menvcfg and senvcfg.
This extension does not need to be parsed from the devicetree or ISA
string because it can only be implemented as a subset of some other
standard extension.
Cc: <stable(a)vger.kernel.org> # v6.7+
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
---
Changes in v4:
- New patch for v4
arch/riscv/include/asm/hwcap.h | 2 ++
arch/riscv/kernel/cpufeature.c | 14 ++++++++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5340f818746b..1f2d2599c655 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,8 @@
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
+#define RISCV_ISA_EXT_XLINUXENVCFG 127
+
#define RISCV_ISA_EXT_MAX 128
#define RISCV_ISA_EXT_INVALID U32_MAX
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index c5b13f7dd482..dacffef68ce2 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -201,6 +201,16 @@ static const unsigned int riscv_zvbb_exts[] = {
RISCV_ISA_EXT_ZVKB
};
+/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+ RISCV_ISA_EXT_XLINUXENVCFG
+};
+
/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
@@ -250,8 +260,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
__RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
- __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
- __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
+ __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
--
2.43.1
I'm unsure if this is just an LVM bug, or a BTRFS+LVM interaction bug,
but LVM is definitely involved somehow.
Upgrading from 5.10 to 6.1, I noticed one of my filesystems was
read-only. In dmesg, I found:
BTRFS error (device dm-75): bdev /dev/mapper/lvm-brokenDisk errs: wr
0, rd 0, flush 1, corrupt 0, gen 0
BTRFS warning (device dm-75): chunk 13631488 missing 1 devices, max
tolerance is 0 for writable mount
BTRFS: error (device dm-75) in write_all_supers:4379: errno=-5 IO
failure (errors while submitting device barriers.)
BTRFS info (device dm-75: state E): forced readonly
BTRFS warning (device dm-75: state E): Skipping commit of aborted transaction.
BTRFS: error (device dm-75: state EA) in cleanup_transaction:1992:
errno=-5 IO failure
At first I suspected a btrfs error, but a scrub found no errors, and
it continued to be read-write on 5.10 kernels.
Here is my setup:
/dev/lvm/brokenDisk is a lvm-on-lvm volume. I have /dev/sd{a,b,c,d}
(of varying sizes) in a lower VG, which has three LVs, all raid1
volumes. Two of the volumes are further used as PV's for an upper VGs.
One of the upper VGs has no issues. The non-PV LV has no issue. The
remaining one, /dev/lowerVG/lvmPool, hosting nested LVM, is used as a
PV for VG "lvm", and has 3 volumes inside. Two of those volumes have
no issues (and are btrfs), but the last one is /dev/lvm/brokenDisk.
This volume is the only one that exhibits this behavior, so something
is special.
Or described as layers:
/dev/sd{a,b,c,d} => PV => VG "lowerVG"
/dev/lowerVG/single (RAID1 LV) => BTRFS, works fine
/dev/lowerVG/works (RAID1 LV) => PV => VG "workingUpper"
/dev/workingUpper/{a,b,c} => BTRFS, works fine
/dev/lowerVG/lvmPool (RAID1 LV) => PV => VG "lvm"
/dev/lvm/{a,b} => BTRFS, works fine
/dev/lvm/brokenDisk => BTRFS, Exhibits errors
After some investigation, here is what I've found:
1. This regression was introduced in 5.19. 5.18 and earlier kernels I
can keep this filesystem rw and everything works as expected, while
5.19.0 and later the filesystem is immediately ro on any write
attempt. I couldn't build rc1, but I did confirm rc2 already has this
regression.
2. Passing /dev/lvm/brokenDisk to a KVM VM as /dev/vdb with an
unaffected kernel inside the vm exhibits the ro barrier problem on
unaffected kernels.
3. Passing /dev/lowerVG/lvmPool to a KVM VM as /dev/vdb with an
affected kernel inside the VM and using LVM inside the VM exhibits
correct behavior (I can keep the filesystem rw, no barrier errors on
host or guest)
4. A discussion in IRC with BTRFS folks, and they think the BTRFS
filesystem is fine (btrfs check and btrfs scrub also agree)
5. The dmesg error can be delayed indefinitely by not writing to the
disk, or reading with noatime
6. This affects Debian, Ubuntu, NixOS, and Solus, so I'm fairly
certain it's distro-agnostic, and purely a kernel issue.
7. I can't reproduce this with other LVM-on-LVM setups, so I think the
asymmetric nature of the raid1 volume is potentially contributing
8. There are no new smart errors/failures on any of the disks, disks are healthy
9. I previously had raidintegrity=y and caching enabled. They didn't
affect the issue
#regzbot introduced v5.18..v5.19-rc2
Patrick
From: Yishai Hadas <yishaih(a)nvidia.com>
[ Upstream commit be551ee1574280ef8afbf7c271212ac3e38933ef ]
Relax DEVX access upon modify commands to be UVERBS_ACCESS_READ.
The kernel doesn't need to protect what firmware protects, or what
causes no damage to anyone but the user.
As firmware needs to protect itself from parallel access to the same
object, don't block parallel modify/query commands on the same object in
the kernel side.
This change will allow user space application to run parallel updates to
different entries in the same bulk object.
Tested-by: Tamar Mashiah <tmashiah(a)nvidia.com>
Signed-off-by: Yishai Hadas <yishaih(a)nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur(a)nvidia.com>
Link: https://lore.kernel.org/r/7407d5ed35dc427c1097699e12b49c01e1073406.17064339…
Signed-off-by: Leon Romanovsky <leon(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/mlx5/devx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 26cc7bbcdfe6a..7a3b56c150799 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2811,7 +2811,7 @@ DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
UVERBS_IDR_ANY_OBJECT,
- UVERBS_ACCESS_WRITE,
+ UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
--
2.43.0
The special casing was originally added in pre-git history; reproducing
the commit log here:
> commit a318a92567d77
> Author: Andrew Morton <akpm(a)osdl.org>
> Date: Sun Sep 21 01:42:22 2003 -0700
>
> [PATCH] Speed up direct-io hugetlbpage handling
>
> This patch short-circuits all the direct-io page dirtying logic for
> higher-order pages. Without this, we pointlessly bounce BIOs up to
> keventd all the time.
In the last twenty years, compound pages have become used for more than
just hugetlb. Rewrite these functions to operate on folios instead
of pages and remove the special case for hugetlbfs; I don't think
it's needed any more (and if it is, we can put it back in as a call
to folio_test_hugetlb()).
This was found by inspection; as far as I can tell, this bug can lead
to pages used as the destination of a direct I/O read not being marked
as dirty. If those pages are then reclaimed by the MM without being
dirtied for some other reason, they won't be written out. Then when
they're faulted back in, they will not contain the data they should.
It'll take a pretty unusual setup to produce this problem with several
races all going the wrong way.
This problem predates the folio work; it could for example have been
triggered by mmaping a THP in tmpfs and using that as the target of an
O_DIRECT read.
Fixes: 800d8c63b2e98 ("shmem: add huge pages support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
---
block/bio.c | 46 ++++++++++++++++++++++++----------------------
1 file changed, 24 insertions(+), 22 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 8672179213b9..f46d8ec71fbd 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1171,13 +1171,22 @@ EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
+ struct folio_iter fi;
+
+ bio_for_each_folio_all(fi, bio) {
+ struct page *page;
+ size_t done = 0;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (mark_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- bio_release_page(bio, bvec->bv_page);
+ if (mark_dirty) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
+ }
+ page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
+ do {
+ bio_release_page(bio, page++);
+ done += PAGE_SIZE;
+ } while (done < fi.length);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1455,18 +1464,12 @@ EXPORT_SYMBOL(bio_free_pages);
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
*
- * The problem is that we cannot run set_page_dirty() from interrupt context
+ * The problem is that we cannot run folio_mark_dirty() from interrupt context
* because the required locks are not interrupt-safe. So what we can do is to
* mark the pages dirty _before_ performing IO. And in interrupt context,
* check that the pages are still dirty. If so, fine. If not, redirty them
* in process context.
*
- * We special-case compound pages here: normally this means reads into hugetlb
- * pages. The logic in here doesn't really work right for compound pages
- * because the VM does not uniformly chase down the head page in all cases.
- * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
- * handle them at all. So we skip compound pages here at an early stage.
- *
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
@@ -1482,12 +1485,12 @@ EXPORT_SYMBOL(bio_free_pages);
*/
void bio_set_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
+ bio_for_each_folio_all(fi, bio) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
}
}
@@ -1530,12 +1533,11 @@ static void bio_dirty_fn(struct work_struct *work)
void bio_check_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
+ struct folio_iter fi;
unsigned long flags;
- struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
+ bio_for_each_folio_all(fi, bio) {
+ if (!folio_test_dirty(fi.folio))
goto defer;
}
--
2.40.1
Resolving a frequency to an efficient one should not transgress policy->max
(which can be set for thermal reason) and policy->min. Currently there is
possibility where scaling_cur_freq can exceed scaling_max_freq when
scaling_max_freq is inefficient frequency. Add additional check to ensure
that resolving a frequency will respect policy->min/max.
Cc: <stable(a)vger.kernel.org>
Fixes: 1f39fa0dccff ("cpufreq: Introducing CPUFREQ_RELATION_E")
Signed-off-by: Shivnandan Kumar <quic_kshivnan(a)quicinc.com>
--
Changes in v2:
-rename function name from cpufreq_table_index_is_in_limits to cpufreq_is_in_limits
-remove redundant outer parenthesis in return statement
-Make comment single line
--
---
include/linux/cpufreq.h | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index afda5f24d3dd..7741244dee6e 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1021,6 +1021,19 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
efficiencies);
}
+static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy,
+ int idx)
+{
+ unsigned int freq;
+
+ if (idx < 0)
+ return false;
+
+ freq = policy->freq_table[idx].frequency;
+
+ return freq == clamp_val(freq, policy->min, policy->max);
+}
+
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -1054,7 +1067,8 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
return 0;
}
- if (idx < 0 && efficiencies) {
+ /* Limit frequency index to honor policy->min/max */
+ if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
--
2.25.1
On Thu, Feb 29, 2024 at 11:37:28AM +0300, Vasiliy Kovalev wrote:
[...]
> This patch fixes another problem, but a similar one, since the sequence is
> incorrect when registering subsystems.
>
> Initially, the registration sequence in the gtp module was as follows:
>
> 1) rtnl_link_register();
>
> 2) genl_register_family();
>
> 3) register_pernet_subsys();
>
> During debugging of the module, when starting the syzkaller reproducer, it
> turned out that after genl_register_family() (2),
>
> without waiting for register_pernet_subsys()(3), the /.dumpit/ event is
> triggered, in which the data of the unregistered pernet subsystem is
> accessed.
>
> That is, the bug was fixed by the commit
>
> 136cfaca2256 ("gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp()")[1]
>
> and the registration sequence became as follows:
>
> 1) rtnl_link_register();
>
> 2) register_pernet_subsys();
>
> 3) genl_register_family();
>
> However, syzkaller has discovered another problem:
>
> after registering rtnl_link_register, the .newlink event is triggered, in
> which the data of the unregistered pernet subsystem is accessed.
>
> This problem is reproducible on current stable kernels and the latest
> upstream kernel 6.8-rc6, in which the patch 136cfaca2256 [1] is applied.
>
> Therefore, the correct sequence should be as follows:
>
> 1)register_pernet_subsys();
>
> 2) rtnl_link_register();
>
> 3) genl_register_family();
>
> The proposed patch is developed on top of the commit changes [1], does not
> conflict with it and fixes the described bug.
>
> [1] https://lore.kernel.org/lkml/20240220160434.29bcaf43@kernel.org/T/#mb1f72c2…
Thanks for explaining, fix LGTM.
From: Hans de Goede <hdegoede(a)redhat.com>
[ Upstream commit 551539a8606e28cb2a130f8ef3e9834235b456c4 ]
The DMI strings used for the LattePanda board DMI quirks are very generic.
Using the dmidecode database from https://linux-hardware.org/ shows
that the chosen DMI strings also match the following 2 laptops
which also have a rt5645 codec:
Insignia NS-P11W7100 https://linux-hardware.org/?computer=E092FFF8BA04
Insignia NS-P10W8100 https://linux-hardware.org/?computer=AFB6C0BF7934
All 4 hw revisions of the LattePanda board have "S70CR" in their BIOS
version DMI strings:
DF-BI-7-S70CR100-*
DF-BI-7-S70CR110-*
DF-BI-7-S70CR200-*
LP-BS-7-S70CR700-*
See e.g. https://linux-hardware.org/?computer=D98250A817C0
Add a partial (non exact) DMI match on this string to make the LattePanda
board DMI match more precise to avoid false-positive matches.
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://msgid.link/r/20240211212736.179605-1-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
sound/soc/codecs/rt5645.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 37ad3bee66a47..352aefddc7d70 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3796,6 +3796,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"),
+ /*
+ * Above strings are too generic, LattePanda BIOS versions for
+ * all 4 hw revisions are:
+ * DF-BI-7-S70CR100-*
+ * DF-BI-7-S70CR110-*
+ * DF-BI-7-S70CR200-*
+ * LP-BS-7-S70CR700-*
+ * Do a partial match for S70CR to avoid false positive matches.
+ */
+ DMI_MATCH(DMI_BIOS_VERSION, "S70CR"),
},
.driver_data = (void *)&lattepanda_board_platform_data,
},
--
2.43.0
From: Filipe Manana <fdmanana(a)suse.com>
[ Upstream commit 1693d5442c458ae8d5b0d58463b873cd879569ed ]
Add a helper function to determine if a block group is being used and make
use of it at btrfs_delete_unused_bgs(). This helper will also be used in
future code changes.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/btrfs/block-group.c | 3 +--
fs/btrfs/block-group.h | 7 +++++++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 4ca6828586af5..a2bac7196d18b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1330,8 +1330,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
}
spin_lock(&block_group->lock);
- if (block_group->reserved || block_group->pinned ||
- block_group->used || block_group->ro ||
+ if (btrfs_is_block_group_used(block_group) || block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index a15868d607a92..f042c1c85a255 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -211,6 +211,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
return (block_group->start + block_group->length);
}
+static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
+{
+ lockdep_assert_held(&bg->lock);
+
+ return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+}
+
static inline bool btrfs_is_block_group_data_only(
struct btrfs_block_group *block_group)
{
--
2.43.0
From: Johannes Berg <johannes.berg(a)intel.com>
Ben Greear further reports deadlocks during concurrent debugfs
remove while files are being accessed, even though the code in
question now uses debugfs cancellations. Turns out that despite
all the review on the locking, we missed completely that the
logic is wrong: if the refcount hits zero we can finish (and
need not wait for the completion), but if it doesn't we have
to trigger all the cancellations. As written, we can _never_
get into the loop triggering the cancellations. Fix this, and
explain it better while at it.
Cc: stable(a)vger.kernel.org
Fixes: 8c88a474357e ("debugfs: add API to allow debugfs operations cancellation")
Reported-by: Ben Greear <greearb(a)candelatech.com>
Closes: https://lore.kernel.org/r/1c9fa9e5-09f1-0522-fdbc-dbcef4d255ca@candelatech.…
Tested-by: Madhan Sai <madhan.singaraju(a)candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
---
fs/debugfs/inode.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 034a617cb1a5..a40da0065433 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -751,13 +751,28 @@ static void __debugfs_file_removed(struct dentry *dentry)
if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
return;
- /* if we hit zero, just wait for all to finish */
- if (!refcount_dec_and_test(&fsd->active_users)) {
- wait_for_completion(&fsd->active_users_drained);
+ /* if this was the last reference, we're done */
+ if (refcount_dec_and_test(&fsd->active_users))
return;
- }
- /* if we didn't hit zero, try to cancel any we can */
+ /*
+ * If there's still a reference, the code that obtained it can
+ * be in different states:
+ * - The common case of not using cancellations, or already
+ * after debugfs_leave_cancellation(), where we just need
+ * to wait for debugfs_file_put() which signals the completion;
+ * - inside a cancellation section, i.e. between
+ * debugfs_enter_cancellation() and debugfs_leave_cancellation(),
+ * in which case we need to trigger the ->cancel() function,
+ * and then wait for debugfs_file_put() just like in the
+ * previous case;
+ * - before debugfs_enter_cancellation() (but obviously after
+ * debugfs_file_get()), in which case we may not see the
+ * cancellation in the list on the first round of the loop,
+ * but debugfs_enter_cancellation() signals the completion
+ * after adding it, so this code gets woken up to call the
+ * ->cancel() function.
+ */
while (refcount_read(&fsd->active_users)) {
struct debugfs_cancellation *c;
--
2.43.2
Currently xhci_map_urb_for_dma() creates a temporary buffer
and copies the SG list to the new linear buffer. But if the
kzalloc_node() fails, then the following sg_pcopy_to_buffer()
can lead to crash since it tries to memcpy to NULL pointer.
So return -ENOMEM if kzalloc returns null pointer.
Cc: <stable(a)vger.kernel.org> # 5.11
Fixes: 2017a1e58472 ("usb: xhci: Use temporary buffer to consolidate SG")
Signed-off-by: Prashanth K <quic_prashk(a)quicinc.com>
---
v2: Updated -EAGAIN to -ENOMEM
drivers/usb/host/xhci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index c057c42c36f4..35e9efdee3b2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1217,6 +1217,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
temp = kzalloc_node(buf_len, GFP_ATOMIC,
dev_to_node(hcd->self.sysdev));
+ if (!temp)
+ return -ENOMEM;
if (usb_urb_dir_out(urb))
sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
--
2.25.1
From: Prashanth K <quic_prashk(a)quicinc.com>
Currently xhci_map_urb_for_dma() creates a temporary buffer and copies
the SG list to the new linear buffer. But if the kzalloc_node() fails,
then the following sg_pcopy_to_buffer() can lead to crash since it
tries to memcpy to NULL pointer.
So return -ENOMEM if kzalloc returns null pointer.
Cc: <stable(a)vger.kernel.org> # 5.11
Fixes: 2017a1e58472 ("usb: xhci: Use temporary buffer to consolidate SG")
Signed-off-by: Prashanth K <quic_prashk(a)quicinc.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 5d70e0176527..8579603edaff 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1219,6 +1219,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
temp = kzalloc_node(buf_len, GFP_ATOMIC,
dev_to_node(hcd->self.sysdev));
+ if (!temp)
+ return -ENOMEM;
if (usb_urb_dir_out(urb))
sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
--
2.25.1
The commit 80dd33cf72d1 ("drivers: base: Fix device link removal")
introduces a workqueue to release the consumer and supplier devices used
in the devlink.
In the job queued, devices are release and in turn, when all the
references to these devices are dropped, the release function of the
device itself is called.
Nothing is present to provide some synchronisation with this workqueue
in order to ensure that all ongoing releasing operations are done and
so, some other operations can be started safely.
For instance, in the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are released and related devlinks are removed
(jobs pushed in the workqueue).
During the step 2, OF nodes are destroyed but, without any
synchronisation with devlink removal jobs, of_overlay_remove() can raise
warnings related to missing of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2
Indeed, the missing of_node_put() call is going to be done, too late,
from the workqueue job execution.
Introduce device_link_wait_removal() to offer a way to synchronize
operations waiting for the end of devlink removals (i.e. end of
workqueue jobs).
Also, as a flushing operation is done on the workqueue, the workqueue
used is moved from a system-wide workqueue to a local one.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/base/core.c | 26 +++++++++++++++++++++++---
include/linux/device.h | 1 +
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d5f4e4aac09b..80d9430856a8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -532,12 +533,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ drain_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4099,9 +4114,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/include/linux/device.h b/include/linux/device.h
index 1795121dee9a..d7d8305a72e8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1249,6 +1249,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
--
2.43.0
This is the start of the stable review cycle for the 5.4.270 release.
There are 84 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 29 Feb 2024 13:15:36 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.270-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.270-rc1
Andrii Nakryiko <andriin(a)fb.com>
scripts/bpf: Fix xdp_md forward declaration typo
Bart Van Assche <bvanassche(a)acm.org>
fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio
Erik Kurzinger <ekurzinger(a)nvidia.com>
drm/syncobj: call drm_syncobj_fence_add_wait when WAIT_AVAILABLE flag is set
Christian König <christian.koenig(a)amd.com>
drm/syncobj: make lockdep complain on WAIT_FOR_SUBMIT v3
Florian Westphal <fw(a)strlen.de>
netfilter: nf_tables: set dormant flag on hook register failure
Sabrina Dubroca <sd(a)queasysnail.net>
tls: stop recv() if initial process_rx_list gave us non-DATA
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: drop pointless else after goto
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: jump to a more appropriate label
Jason Gunthorpe <jgg(a)nvidia.com>
s390: use the correct count for __iowrite64_copy()
Wolfram Sang <wsa+renesas(a)sang-engineering.com>
packet: move from strlcpy with unused retval to strscpy
Vasiliy Kovalev <kovalev(a)altlinux.org>
ipv6: sr: fix possible use-after-free and null-ptr-deref
Daniil Dulov <d.dulov(a)aladdin.ru>
afs: Increase buffer size in afs_update_volume_status()
Eric Dumazet <edumazet(a)google.com>
ipv6: properly combine dev_base_seq and ipv6.dev_addr_genid
Eric Dumazet <edumazet(a)google.com>
ipv4: properly combine dev_base_seq and ipv4.dev_addr_genid
Arnd Bergmann <arnd(a)arndb.de>
nouveau: fix function cast warnings
Randy Dunlap <rdunlap(a)infradead.org>
scsi: jazz_esp: Only build if SCSI core is builtin
Gianmarco Lusvardi <glusvardi(a)posteo.net>
bpf, scripts: Correct GPL license name
Andrii Nakryiko <andriin(a)fb.com>
scripts/bpf: teach bpf_helpers_doc.py to dump BPF helper definitions
Arnd Bergmann <arnd(a)arndb.de>
RDMA/srpt: fix function pointer cast warnings
Bart Van Assche <bvanassche(a)acm.org>
RDMA/srpt: Make debug output more detailed
Kalesh AP <kalesh-anakkur.purayil(a)broadcom.com>
RDMA/bnxt_re: Return error for SRQ resize
Zhipeng Lu <alexious(a)zju.edu.cn>
IB/hfi1: Fix a memleak in init_credit_return
Xu Yang <xu.yang_2(a)nxp.com>
usb: roles: don't get/set_role() when usb_role_switch is unregistered
Krishna Kurapati <quic_kriskura(a)quicinc.com>
usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs
Frank Li <Frank.Li(a)nxp.com>
usb: cdns3: fix memory double free when handle zero packet
Frank Li <Frank.Li(a)nxp.com>
usb: cdns3: fixed memory use after free at cdns3_gadget_ep_disable()
Nikita Shubin <nikita.shubin(a)maquefel.me>
ARM: ep93xx: Add terminator to gpiod_lookup_table
Tom Parkin <tparkin(a)katalix.com>
l2tp: pass correct message length to ip6_append_data
Vidya Sagar <vidyas(a)nvidia.com>
PCI/MSI: Prevent MSI hardware interrupt number truncation
Vasiliy Kovalev <kovalev(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp()
Mikulas Patocka <mpatocka(a)redhat.com>
dm-crypt: don't modify the data when using authenticated encryption
Daniel Vacek <neelx(a)redhat.com>
IB/hfi1: Fix sdma.h tx->num_descs off-by-one error
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
PCI: tegra: Fix OF node reference leak
Pali Rohár <pali(a)kernel.org>
PCI: tegra: Fix reporting GPIO error value
Sireesh Kodali <sireeshkodali1(a)gmail.com>
arm64: dts: qcom: msm8916: Fix typo in pronto remoteproc node
Nathan Chancellor <nathan(a)kernel.org>
drm/amdgpu: Fix type of second parameter in trans_msg() callback
Matthew Wilcox (Oracle) <willy(a)infradead.org>
iomap: Set all uptodate bits for an Uptodate page
Mikulas Patocka <mpatocka(a)redhat.com>
dm-integrity: don't modify bio's immutable bio_vec in integrity_metadata()
Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
x86/alternatives: Disable KASAN in apply_alternatives()
Trek <trek00(a)inbox.ru>
drm/amdgpu: Check for valid number of registers to read
Icenowy Zheng <icenowy(a)aosc.io>
Revert "drm/sun4i: dsi: Change the start delay calculation"
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
ALSA: hda/realtek - Enable micmute LED on and HP system
Björn Töpel <bjorn.topel(a)gmail.com>
selftests/bpf: Avoid running unprivileged tests with alignment requirements
Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com>
net: bridge: clear bridge's private skb space on xmit
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
spi: mt7621: Fix an error message in mt7621_spi_probe()
John Stultz <john.stultz(a)linaro.org>
driver core: Set deferred_probe_timeout to a longer default if CONFIG_MODULES is set
Miaoqian Lin <linmq006(a)gmail.com>
pinctrl: rockchip: Fix refcount leak in rockchip_pinctrl_parse_groups
Lee Jones <lee.jones(a)linaro.org>
pinctrl: pinctrl-rockchip: Fix a bunch of kerneldoc misdemeanours
Eric Dumazet <edumazet(a)google.com>
tcp: add annotations around sk->sk_shutdown accesses
Soheil Hassas Yeganeh <soheil(a)google.com>
tcp: return EPOLLOUT from tcp_poll only when notsent_bytes is half the limit
Paolo Abeni <pabeni(a)redhat.com>
tcp: factor out __tcp_close() helper
Geert Uytterhoeven <geert+renesas(a)glider.be>
pmdomain: renesas: r8a77980-sysc: CR7 must be always on
Alexandra Winter <wintera(a)linux.ibm.com>
s390/qeth: Fix potential loss of L3-IP@ in case of network issues
Yi Sun <yi.sun(a)unisoc.com>
virtio-blk: Ensure no requests in virtqueues before deleting vqs.
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
firewire: core: send bus reset promptly on gap count error
Hannes Reinecke <hare(a)suse.de>
scsi: lpfc: Use unsigned type for num_sge
Zhang Rui <rui.zhang(a)intel.com>
hwmon: (coretemp) Enlarge per package core count limit
Daniel Wagner <dwagner(a)suse.de>
nvmet-fc: abort command when there is no binding
Xin Long <lucien.xin(a)gmail.com>
netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new
Chen-Yu Tsai <wens(a)csie.org>
ASoC: sunxi: sun4i-spdif: Add support for Allwinner H616
Guixin Liu <kanie(a)linux.alibaba.com>
nvmet-tcp: fix nvme tcp ida memory leak
Martin Blumenstingl <martin.blumenstingl(a)googlemail.com>
regulator: pwm-regulator: Add validity checks in continuous .get_voltage
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal()
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found()
Lennert Buytenhek <kernel(a)wantstofly.org>
ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers
Conrad Kostecki <conikost(a)gentoo.org>
ahci: asm1166: correct count of reported ports
Fullway Wang <fullwaywang(a)outlook.com>
fbdev: sis: Error out if pixclock equals zero
Fullway Wang <fullwaywang(a)outlook.com>
fbdev: savage: Error out if pixclock equals zero
Felix Fietkau <nbd(a)nbd.name>
wifi: mac80211: fix race condition on enabling fast-xmit
Michal Kazior <michal(a)plume.com>
wifi: cfg80211: fix missing interfaces when dumping
Vinod Koul <vkoul(a)kernel.org>
dmaengine: fsl-qdma: increase size of 'irq_name'
Vinod Koul <vkoul(a)kernel.org>
dmaengine: shdma: increase size of 'dev_id'
Dmitry Bogdanov <d.bogdanov(a)yadro.com>
scsi: target: core: Add TMF to tmr_list handling
Cyril Hrubis <chrubis(a)suse.cz>
sched/rt: Disallow writing invalid values to sched_rt_period_us
Cyril Hrubis <chrubis(a)suse.cz>
sched/rt: Fix sysctl_sched_rr_timeslice intial value
Lokesh Gidra <lokeshgidra(a)google.com>
userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: replace WARN_ONs for invalid DAT metadata block requests
GONG, Ruiqi <gongruiqi1(a)huawei.com>
memcg: add refcnt for pcpu stock to avoid UAF problem in drain_all_stock()
Cyril Hrubis <chrubis(a)suse.cz>
sched/rt: sysctl_sched_rr_timeslice show default timeslice after reset
Jamal Hadi Salim <jhs(a)mojatatu.com>
net/sched: Retire dsmark qdisc
Jamal Hadi Salim <jhs(a)mojatatu.com>
net/sched: Retire ATM qdisc
Jamal Hadi Salim <jhs(a)mojatatu.com>
net/sched: Retire CBQ qdisc
Oliver Upton <oliver.upton(a)linux.dev>
KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler
Oliver Upton <oliver.upton(a)linux.dev>
KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table()
-------------
Diffstat:
Makefile | 4 +-
arch/arm/mach-ep93xx/core.c | 1 +
arch/arm64/boot/dts/qcom/msm8916.dtsi | 4 +-
arch/s390/pci/pci.c | 2 +-
arch/x86/kernel/alternative.c | 13 +
drivers/ata/ahci.c | 34 +-
drivers/ata/ahci.h | 1 +
drivers/base/dd.c | 9 +
drivers/block/virtio_blk.c | 7 +-
drivers/dma/fsl-qdma.c | 2 +-
drivers/dma/sh/shdma.h | 2 +-
drivers/firewire/core-card.c | 18 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 5 +-
drivers/gpu/drm/drm_syncobj.c | 16 +-
drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 8 +-
drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c | 3 +-
drivers/hwmon/coretemp.c | 2 +-
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 +-
drivers/infiniband/hw/hfi1/pio.c | 6 +-
drivers/infiniband/hw/hfi1/sdma.c | 2 +-
drivers/infiniband/ulp/srpt/ib_srpt.c | 18 +-
drivers/md/dm-crypt.c | 6 +
drivers/md/dm-integrity.c | 11 +-
drivers/net/gtp.c | 10 +-
drivers/nvme/target/fc.c | 8 +-
drivers/nvme/target/tcp.c | 1 +
drivers/pci/controller/pci-tegra.c | 17 +-
drivers/pci/msi.c | 2 +-
drivers/pinctrl/pinctrl-rockchip.c | 23 +-
drivers/regulator/pwm-regulator.c | 3 +
drivers/s390/net/qeth_l3_main.c | 9 +-
drivers/scsi/Kconfig | 2 +-
drivers/scsi/lpfc/lpfc_scsi.c | 12 +-
drivers/soc/renesas/r8a77980-sysc.c | 3 +-
drivers/spi/spi-mt7621.c | 8 +-
drivers/target/target_core_device.c | 5 -
drivers/target/target_core_transport.c | 4 +
drivers/usb/cdns3/gadget.c | 8 +-
drivers/usb/gadget/function/f_ncm.c | 10 +-
drivers/usb/roles/class.c | 12 +-
drivers/video/fbdev/savage/savagefb_driver.c | 3 +
drivers/video/fbdev/sis/sis_main.c | 2 +
fs/afs/volume.c | 4 +-
fs/aio.c | 9 +-
fs/ext4/mballoc.c | 13 +-
fs/iomap/buffered-io.c | 3 +
fs/nilfs2/dat.c | 27 +-
include/linux/fs.h | 2 +
include/linux/lockdep.h | 5 +
include/net/tcp.h | 1 +
kernel/sched/rt.c | 10 +-
kernel/sysctl.c | 4 +
mm/memcontrol.c | 6 +
mm/userfaultfd.c | 14 +-
net/bridge/br_device.c | 2 +
net/ipv4/af_inet.c | 2 +-
net/ipv4/devinet.c | 21 +-
net/ipv4/tcp.c | 27 +-
net/ipv4/tcp_input.c | 4 +-
net/ipv6/addrconf.c | 21 +-
net/ipv6/seg6.c | 20 +-
net/l2tp/l2tp_ip6.c | 2 +-
net/mac80211/sta_info.c | 2 +
net/mac80211/tx.c | 2 +-
net/netfilter/nf_conntrack_proto_sctp.c | 2 +-
net/netfilter/nf_tables_api.c | 1 +
net/packet/af_packet.c | 4 +-
net/sched/Kconfig | 42 -
net/sched/Makefile | 3 -
net/sched/sch_atm.c | 710 --------
net/sched/sch_cbq.c | 1818 ---------------------
net/sched/sch_dsmark.c | 523 ------
net/tls/tls_sw.c | 12 +-
net/wireless/nl80211.c | 1 +
scripts/bpf_helpers_doc.py | 157 +-
sound/pci/hda/patch_realtek.c | 6 +-
sound/soc/sunxi/sun4i-spdif.c | 5 +
tools/testing/selftests/bpf/test_verifier.c | 13 +
virt/kvm/arm/vgic/vgic-its.c | 5 +
80 files changed, 572 insertions(+), 3255 deletions(-)
The AMD USB host controller (1022:43f7) does not enter PCI D3 by default
when nothing is connected. This is due to the policy introduced by
'commit a611bf473d1f ("xhci-pci: Set runtime PM as default policy on all
xHC 1.2 or later devices")', which only covers 1.2 or later devices.
Therefore, by default, allow RPM on the AMD USB controller [1022:43f7].
Fixes: 4baf12181509 ("xhci: Loosen RPM as default policy to cover for AMD xHC 1.1")
Link: https://lore.kernel.org/all/12335218.O9o76ZdvQC@natalenko.name/
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: stable(a)vger.kernel.org
Tested-by: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar(a)amd.com>
---
Changes in v2:
- Added Cc: stable(a)vger.kernel.org
drivers/usb/host/xhci-pci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index b534ca9752be..1eb7a41a75d7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -473,6 +473,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
/* xHC spec requires PCI devices to support D3hot and D3cold */
if (xhci->hci_version >= 0x120)
xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
+ else if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43f7)
+ xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
if (xhci->quirks & XHCI_RESET_ON_RESUME)
xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
--
2.25.1
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_overlay_remove() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/of/overlay.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 2ae7e9d24a64..99659ae9fb28 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -853,6 +853,14 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
{
int i;
+ /*
+ * Wait for any ongoing device link removals before removing some of
+ * nodes. Drop the global lock while waiting
+ */
+ mutex_unlock(&of_mutex);
+ device_link_wait_removal();
+ mutex_lock(&of_mutex);
+
if (ovcs->cset.entries.next)
of_changeset_destroy(&ovcs->cset);
@@ -862,7 +870,6 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
ovcs->id = 0;
}
-
for (i = 0; i < ovcs->count; i++) {
of_node_put(ovcs->fragments[i].target);
of_node_put(ovcs->fragments[i].overlay);
--
2.43.0
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs function to return a value -ENODEV
when submit URB fail with device in NOTATTACHED state. At the same time,
we need to translate -ENODEV to DID_NOT_CONNECT for the SCSI layer.
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
---
v1->v2
- Modify the description of this patch.
drivers/usb/storage/uas.c | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9707f53cfda9..967f18db525a 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,16 +541,15 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
@@ -562,9 +561,9 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return (err == -ENODEV) ? -ENODEV : SCSI_MLQUEUE_DEVICE_BUSY;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -582,7 +581,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return (err == -ENODEV) ? -ENODEV : SCSI_MLQUEUE_DEVICE_BUSY;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -602,7 +601,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return (err == -ENODEV) ? -ENODEV : SCSI_MLQUEUE_DEVICE_BUSY;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -621,7 +620,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return (err == -ENODEV) ? -ENODEV : SCSI_MLQUEUE_DEVICE_BUSY;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +697,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
--
2.32.0
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 1/6 includes a minor fix that is queued for upstream:
https://lore.kernel.org/lkml/170899674562.398.6398007479766564897.tip-bot2@…
Patch 1,2,5 and 6 needed conflict resolution.
I saw a few new warnings:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction
I tried using REACHABLE, but that did not fix the warning.
For the below warning:
vmlinux.o: warning: objtool: .altinstr_replacement+0x17: unsupported relocation in alternatives section
not sure if this is related to this series or a pre-existing warning, I
will check later without this series.
I am not too concerned because the alternative did substitute verw
correctly:
entry_SYSCALL_64:
...
0xffffffff8200013d <+253>: swapgs
0xffffffff82000140 <+256>: verw 0xffffffff82000000
0xffffffff82000148 <+264>: sysretq
0xffffffff8200014b <+267>: int3
---
Pawan Gupta (5):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry.S | 22 +++++++++++++++++++++
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 25 ++++++++++++------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 12 ++++++++----
13 files changed, 103 insertions(+), 46 deletions(-)
---
base-commit: 81e1dc2f70014b9523dd02ca763788e4f81e5bac
change-id: 20240226-delay-verw-backport-6-1-y-4b0cec84087c
The commit 80dd33cf72d1 ("drivers: base: Fix device link removal")
introduces a workqueue to release the consumer and supplier devices used
in the devlink.
In the job queued, devices are release and in turn, when all the
references to these devices are dropped, the release function of the
device itself is called.
Nothing is present to provide some synchronisation with this workqueue
in order to ensure that all ongoing releasing operations are done and
so, some other operations can be started safely.
For instance, in the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are released and related devlinks are removed
(jobs pushed in the workqueue).
During the step 2, OF nodes are destroyed but, without any
synchronisation with devlink removal jobs, of_overlay_remove() can raise
warnings related to missing of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2
Indeed, the missing of_node_put() call is going to be done, too late,
from the workqueue job execution.
Introduce device_link_wait_removal() to offer a way to synchronize
operations waiting for the end of devlink removals (i.e. end of
workqueue jobs).
Also, as a flushing operation is done on the workqueue, the workqueue
used is moved from a system-wide workqueue to a local one.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/base/core.c | 26 +++++++++++++++++++++++---
include/linux/device.h | 1 +
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d5f4e4aac09b..80d9430856a8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -532,12 +533,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ drain_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4099,9 +4114,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/include/linux/device.h b/include/linux/device.h
index 1795121dee9a..d7d8305a72e8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1249,6 +1249,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
--
2.43.0
On Fri, Feb 23, 2024 at 9:24 PM Saravana Kannan <saravanak(a)google.com> wrote:
>
> Introduced a stupid bug in commit 782bfd03c3ae ("of: property: Improve
> finding the supplier of a remote-endpoint property") due to a last minute
> incorrect edit of "index !=0" into "!index". This patch fixes it to be
> "index > 0" to match the comment right next to it.
Greg, this needs to land in the stable branches once Rob picks it up
for the next 6.8-rc.
-Saravana
>
> Reported-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
> Link: https://lore.kernel.org/lkml/20240223171849.10f9901d@booty/
> Fixes: 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property")
> Signed-off-by: Saravana Kannan <saravanak(a)google.com>
> ---
> Using Link: instead of Closes: because Luca reported two separate issues.
>
> Sorry about introducing a stupid bug in an -rcX Rob.
>
> -Saravana
>
> drivers/of/property.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/of/property.c b/drivers/of/property.c
> index b71267c6667c..fa8cd33be131 100644
> --- a/drivers/of/property.c
> +++ b/drivers/of/property.c
> @@ -1304,7 +1304,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np,
> int index)
> {
> /* Return NULL for index > 0 to signify end of remote-endpoints. */
> - if (!index || strcmp(prop_name, "remote-endpoint"))
> + if (index > 0 || strcmp(prop_name, "remote-endpoint"))
> return NULL;
>
> return of_graph_get_remote_port_parent(np);
> --
> 2.44.0.rc0.258.g7320e95886-goog
>
Commit eaae75754d81 ("docs: turn off "smart quotes" in the HTML build")
disabled conversion of quote marks along with that of dashes.
Despite the short summary, the change affects not only HTML build
but also other build targets including PDF.
However, as "smart quotes" had been enabled for more than half a
decade already, quite a few readers of HTML pages are likely expecting
conversions of "foo" -> “foo” and 'bar' -> ‘bar’.
Furthermore, in LaTeX typesetting convention, it is common to use
distinct marks for opening and closing quote marks.
To satisfy such readers' expectation, restore conversion of quotes
only by setting smartquotes_action [1].
Link: [1] https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-smart…
Cc: stable(a)vger.kernel.org # v6.4
Signed-off-by: Akira Yokosawa <akiyks(a)gmail.com>
---
Documentation/conf.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/Documentation/conf.py b/Documentation/conf.py
index da64c9fb7e07..d148f3e8dd57 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -346,9 +346,9 @@ sys.stderr.write("Using %s theme\n" % html_theme)
html_static_path = ['sphinx-static']
# If true, Docutils "smart quotes" will be used to convert quotes and dashes
-# to typographically correct entities. This will convert "--" to "—",
-# which is not always what we want, so disable it.
-smartquotes = False
+# to typographically correct entities. However, conversion of "--" to "—"
+# is not always what we want, so enable only quotes.
+smartquotes_action = 'q'
# Custom sidebar templates, maps document names to template names.
# Note that the RTD theme ignores this
base-commit: 32ed7930304ca7600a54c2e702098bd2ce7086af
--
2.34.1
Commit 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting
1.9.0 ops") started enabling ASPM unconditionally when the hardware
claims to support it. This triggers Correctable Errors for some PCIe
devices on machines like the Lenovo ThinkPad X13s, which could indicate
an incomplete driver ASPM implementation or that the hardware does in
fact not support L0s.
Add support for disabling ASPM L0s in the devicetree when it is not
supported on a particular machine and controller.
Note that only the 1.9.0 ops enable ASPM currently.
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-qcom.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 09d485df34b9..0fb5dc06d2ef 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -273,6 +273,25 @@ static int qcom_pcie_start_link(struct dw_pcie *pci)
return 0;
}
+static void qcom_pcie_clear_aspm_l0s(struct dw_pcie *pci)
+{
+ u16 offset;
+ u32 val;
+
+ if (!of_property_read_bool(pci->dev->of_node, "aspm-no-l0s"))
+ return;
+
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+ dw_pcie_dbi_ro_wr_en(pci);
+
+ val = readl(pci->dbi_base + offset + PCI_EXP_LNKCAP);
+ val &= ~PCI_EXP_LNKCAP_ASPM_L0S;
+ writel(val, pci->dbi_base + offset + PCI_EXP_LNKCAP);
+
+ dw_pcie_dbi_ro_wr_dis(pci);
+}
+
static void qcom_pcie_clear_hpc(struct dw_pcie *pci)
{
u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
@@ -962,6 +981,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
{
+ qcom_pcie_clear_aspm_l0s(pcie->pci);
qcom_pcie_clear_hpc(pcie->pci);
return 0;
--
2.43.0
Before 9d8e536 ("ALSA: memalloc: Try dma_alloc_noncontiguous() at first")
the alsa non-contiguous allocator always called the alsa fallback
allocator in the non-iommu case. This allocated non-contig memory
consisting of progressively smaller contiguous chunks. Allocation was
fast due to the OR-ing in of __GFP_NORETRY.
After 9d8e536 ("ALSA: memalloc: Try dma_alloc_noncontiguous() at first")
the code tries the dma non-contig allocator first, then falls back to
the alsa fallback allocator. In the non-iommu case, the former supports
only a single contiguous chunk.
We have observed experimentally that under heavy memory fragmentation,
allocating a large-ish contiguous chunk with __GFP_RETRY_MAYFAIL
triggers an indefinite hang in the dma non-contig allocator. This has
high-impact, as an occurrence will trigger a device reboot, resulting in
loss of user state.
Fix the non-iommu path by letting dma_alloc_noncontiguous() fail quickly
so it does not get stuck looking for that elusive large contiguous chunk,
in which case we will fall back to the alsa fallback allocator.
Note that the iommu dma non-contiguous allocator is not affected. While
assembling an array of pages, it tries consecutively smaller contiguous
allocations, and lets higher-order chunk allocations fail quickly.
Suggested-by: Sven van Ashbrook <svenva(a)chromium.org>
Suggested-by: Brian Geffon <bgeffon(a)google.com>
Fixes: 9d8e536d36e7 ("ALSA: memalloc: Try dma_alloc_noncontiguous() at first")
Cc: stable(a)vger.kernel.org
Cc: Sven van Ashbrook <svenva(a)chromium.org>
Cc: Brian Geffon <bgeffon(a)google.com>
Cc: Curtis Malainey <cujomalainey(a)chromium.org>
Signed-off-by: Karthikeyan Ramasubramanian <kramasub(a)chromium.org>
---
sound/core/memalloc.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index f901504b5afc1..5f6526a0d731c 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -540,13 +540,18 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
{
struct sg_table *sgt;
void *p;
+ gfp_t gfp_flags = DEFAULT_GFP;
#ifdef CONFIG_SND_DMA_SGBUF
if (cpu_feature_enabled(X86_FEATURE_XENPV))
return snd_dma_sg_fallback_alloc(dmab, size);
+
+ /* Non-IOMMU case: prevent allocator from searching forever */
+ if (!get_dma_ops(dmab->dev.dev))
+ gfp_flags |= __GFP_NORETRY;
#endif
sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
- DEFAULT_GFP, 0);
+ gfp_flags, 0);
#ifdef CONFIG_SND_DMA_SGBUF
if (!sgt && !get_dma_ops(dmab->dev.dev))
return snd_dma_sg_fallback_alloc(dmab, size);
--
2.43.0.687.g38aa6559b0-goog
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
The icl+ power well code currently assumes that every AUX power
well maps to an encoder which is using said power well. That is
by no menas guaranteed as we:
- only register encoders for ports declared in the VBT
- combo PHY HDMI-only encoder no longer get an AUX CH since
commit 9856308c94ca ("drm/i915: Only populate aux_ch if really needed")
However we have places such as intel_power_domains_sanitize_state()
that blindly traverse all the possible power wells. So these bits
of code may very well encounbter an aux power well with no associated
encoder.
In this particular case the BIOS seems to have left one AUX power
well enabled even though we're dealing with a HDMI only encoder
on a combo PHY. We then proceed to turn off said power well and
explode when we can't find a matching encoder. As a short term fix
we should be able to just skip the PHY related parts of the power
well programming since we know this situation can only happen with
combo PHYs.
Another option might be to go back to always picking an AUX CH for
all encoders. However I'm a bit wary about that since we might in
theory end up conflicting with the VBT AUX CH assignment. Also
that wouldn't help with encoders not declared in the VBT, should
we ever need to poke the corresponding power wells.
Longer term we need to figure out what the actual relationship
is between the PHY vs. AUX CH vs. AUX power well. Currently this
is entirely unclear.
Cc: stable(a)vger.kernel.org
Fixes: 9856308c94ca ("drm/i915: Only populate aux_ch if really needed")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10184
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
.../drm/i915/display/intel_display_power_well.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c
index 47cd6bb04366..06900ff307b2 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power_well.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c
@@ -246,7 +246,14 @@ static enum phy icl_aux_pw_to_phy(struct drm_i915_private *i915,
enum aux_ch aux_ch = icl_aux_pw_to_ch(power_well);
struct intel_digital_port *dig_port = aux_ch_to_digital_port(i915, aux_ch);
- return intel_port_to_phy(i915, dig_port->base.port);
+ /*
+ * FIXME should we care about the (VBT defined) dig_port->aux_ch
+ * relationship or should this be purely defined by the hardware layout?
+ * Currently if the port doesn't appear in the VBT, or if it's declared
+ * as HDMI-only and routed to a combo PHY, the encoder either won't be
+ * present at all or it will not have an aux_ch assigned.
+ */
+ return dig_port ? intel_port_to_phy(i915, dig_port->base.port) : PHY_NONE;
}
static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv,
@@ -414,7 +421,8 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
intel_de_rmw(dev_priv, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx));
- if (DISPLAY_VER(dev_priv) < 12)
+ /* FIXME this is a mess */
+ if (phy != PHY_NONE)
intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy),
0, ICL_LANE_ENABLE_AUX);
@@ -437,7 +445,10 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv));
- intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), ICL_LANE_ENABLE_AUX, 0);
+ /* FIXME this is a mess */
+ if (phy != PHY_NONE)
+ intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy),
+ ICL_LANE_ENABLE_AUX, 0);
intel_de_rmw(dev_priv, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0);
--
2.43.0
From: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
[WHY]
We still had an instance of get_idle_state checking the PMFW scratch
register instead of the actual idle allow signal.
[HOW]
Replace it with the SW state check for whether we had allowed idle
through notify_idle.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Duncan Ma <duncan.ma(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index daf6c7fe0906..acd8f1257ade 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -4884,22 +4884,16 @@ void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name)
bool dc_dmub_is_ips_idle_state(struct dc *dc)
{
- uint32_t idle_state = 0;
-
if (dc->debug.disable_idle_power_optimizations)
return false;
if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
return false;
- if (dc->hwss.get_idle_state)
- idle_state = dc->hwss.get_idle_state(dc);
-
- if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
- !(idle_state & DMUB_IPS2_ALLOW_MASK))
- return true;
+ if (!dc->ctx->dmub_srv)
+ return false;
- return false;
+ return dc->ctx->dmub_srv->idle_allowed;
}
/* set min and max memory clock to lowest and highest DPM level, respectively */
--
2.34.1
From: Wenjing Liu <wenjing.liu(a)amd.com>
[WHY]
When committing an update with ODM combine change when the plane is
removing or already removed, we fail to detect odm change in pipe
update flags. This has caused mismatch between new dc state and the
actual hardware state, because we missed odm programming.
[HOW]
- Detect odm change even for otg master pipe without a plane.
- Update odm config before calling program pipes for pipe with planes.
The commit also updates blank pattern programming when odm is changed
without plane. This is because number of OPP is changed when ODM
combine is changed. Blank pattern is per OPP so we will need to
reprogram OPP based on the new pipe topology.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Dillon Varone <dillon.varone(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu(a)amd.com>
---
.../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 41 ++++++++++---------
.../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 7 ++++
2 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index c55d5155ecb9..40098d9f70cb 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1498,6 +1498,11 @@ static void dcn20_detect_pipe_changes(struct dc_state *old_state,
return;
}
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER) &&
+ resource_is_odm_topology_changed(new_pipe, old_pipe))
+ /* Detect odm changes */
+ new_pipe->update_flags.bits.odm = 1;
+
/* Exit on unchanged, unused pipe */
if (!old_pipe->plane_state && !new_pipe->plane_state)
return;
@@ -1551,10 +1556,6 @@ static void dcn20_detect_pipe_changes(struct dc_state *old_state,
/* Detect top pipe only changes */
if (resource_is_pipe_type(new_pipe, OTG_MASTER)) {
- /* Detect odm changes */
- if (resource_is_odm_topology_changed(new_pipe, old_pipe))
- new_pipe->update_flags.bits.odm = 1;
-
/* Detect global sync changes */
if (old_pipe->pipe_dlg_param.vready_offset != new_pipe->pipe_dlg_param.vready_offset
|| old_pipe->pipe_dlg_param.vstartup_start != new_pipe->pipe_dlg_param.vstartup_start
@@ -1999,19 +2000,20 @@ void dcn20_program_front_end_for_ctx(
DC_LOGGER_INIT(dc->ctx->logger);
unsigned int prev_hubp_count = 0;
unsigned int hubp_count = 0;
+ struct pipe_ctx *pipe;
if (resource_is_pipe_topology_changed(dc->current_state, context))
resource_log_pipe_topology_update(dc, context);
if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ pipe = &context->res_ctx.pipe_ctx[i];
- if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && pipe_ctx->plane_state) {
- ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
+ if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) {
+ ASSERT(!pipe->plane_state->triplebuffer_flips);
/*turn off triple buffer for full update*/
dc->hwss.program_triplebuffer(
- dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
+ dc, pipe, pipe->plane_state->triplebuffer_flips);
}
}
}
@@ -2085,12 +2087,22 @@ void dcn20_program_front_end_for_ctx(
DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
}
+ /* update ODM for blanked OTG master pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ !resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe->update_flags.bits.odm &&
+ hws->funcs.update_odm)
+ hws->funcs.update_odm(dc, context, pipe);
+ }
+
/*
* Program all updated pipes, order matters for mpcc setup. Start with
* top pipe and program all pipes that follow in order
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ pipe = &context->res_ctx.pipe_ctx[i];
if (pipe->plane_state && !pipe->top_pipe) {
while (pipe) {
@@ -2129,17 +2141,6 @@ void dcn20_program_front_end_for_ctx(
context->stream_status[0].plane_count > 1) {
pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp);
}
-
- /* when dynamic ODM is active, pipes must be reconfigured when all planes are
- * disabled, as some transitions will leave software and hardware state
- * mismatched.
- */
- if (dc->debug.enable_single_display_2to1_odm_policy &&
- pipe->stream &&
- pipe->update_flags.bits.disable &&
- !pipe->prev_odm_pipe &&
- hws->funcs.update_odm)
- hws->funcs.update_odm(dc, context, pipe);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index aa36d7a56ca8..b890db0bfc46 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -1156,6 +1156,13 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
dsc->funcs->dsc_disconnect(dsc);
}
}
+
+ if (!resource_is_pipe_type(pipe_ctx, DPP_PIPE))
+ /*
+ * blank pattern is generated by OPP, reprogram blank pattern
+ * due to OPP count change
+ */
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe_ctx, true);
}
unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div)
--
2.34.1
From: Ryan Lin <tsung-hua.lin(a)amd.com>
[WHY]
Some eDP panels' ext caps don't write initial values. The value of
dpcd_addr (0x317) can be random and the backlight control interface
will be incorrect.
[HOW]
Add new panel patches to remove sink ext caps.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.5.x
Cc: Tsung-hua Lin <tsung-hua.lin(a)amd.com>
Cc: Chris Chi <moukong.chi(a)amd.com>
Reviewed-by: Wayne Lin <wayne.lin(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Ryan Lin <tsung-hua.lin(a)amd.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 85b7f58a7f35..c27063305a13 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
/* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+ case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A):
+ case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1):
DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
edid_caps->panel_patch.remove_sink_ext_caps = true;
break;
@@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
edid_caps->edid_hdmi = connector->display_info.is_hdmi;
+ apply_edid_quirks(edid_buf, edid_caps);
+
sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
if (sad_count <= 0)
return result;
@@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
else
edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION;
- apply_edid_quirks(edid_buf, edid_caps);
-
kfree(sads);
kfree(sadb);
--
2.34.1
From: Josip Pavic <josip.pavic(a)amd.com>
[WHY]
It's beneficial for ABM to know when new frame data are available.
[HOW]
Add new condition to allow dirty rects to be sent to DMUB when ABM is
active. ABM will use this as a signal that a new frame has arrived.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Anthony Koo <anthony.koo(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Josip Pavic <josip.pavic(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 5211c1c0f3c0..613d09c42f3b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3270,6 +3270,9 @@ static bool dc_dmub_should_send_dirty_rect_cmd(struct dc *dc, struct dc_stream_s
if (stream->link->replay_settings.config.replay_supported)
return true;
+ if (stream->ctx->dce_version >= DCN_VERSION_3_5 && stream->abm_level)
+ return true;
+
return false;
}
--
2.34.1
This is the start of the stable review cycle for the 4.19.308 release.
There are 52 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 29 Feb 2024 13:15:36 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.308-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.308-rc1
Andrii Nakryiko <andriin(a)fb.com>
scripts/bpf: Fix xdp_md forward declaration typo
Bart Van Assche <bvanassche(a)acm.org>
fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio
Oliver Upton <oliver.upton(a)linux.dev>
KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler
Oliver Upton <oliver.upton(a)linux.dev>
KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table()
Vidya Sagar <vidyas(a)nvidia.com>
PCI/MSI: Prevent MSI hardware interrupt number truncation
Jason Gunthorpe <jgg(a)nvidia.com>
s390: use the correct count for __iowrite64_copy()
Wolfram Sang <wsa+renesas(a)sang-engineering.com>
packet: move from strlcpy with unused retval to strscpy
Vasiliy Kovalev <kovalev(a)altlinux.org>
ipv6: sr: fix possible use-after-free and null-ptr-deref
Arnd Bergmann <arnd(a)arndb.de>
nouveau: fix function cast warnings
Randy Dunlap <rdunlap(a)infradead.org>
scsi: jazz_esp: Only build if SCSI core is builtin
Gianmarco Lusvardi <glusvardi(a)posteo.net>
bpf, scripts: Correct GPL license name
Andrii Nakryiko <andriin(a)fb.com>
scripts/bpf: teach bpf_helpers_doc.py to dump BPF helper definitions
Arnd Bergmann <arnd(a)arndb.de>
RDMA/srpt: fix function pointer cast warnings
Bart Van Assche <bvanassche(a)acm.org>
RDMA/srpt: Make debug output more detailed
Jason Gunthorpe <jgg(a)ziepe.ca>
RDMA/ulp: Use dev_name instead of ibdev->name
Bart Van Assche <bvanassche(a)acm.org>
RDMA/srpt: Support specifying the srpt_service_guid parameter
Kalesh AP <kalesh-anakkur.purayil(a)broadcom.com>
RDMA/bnxt_re: Return error for SRQ resize
Zhipeng Lu <alexious(a)zju.edu.cn>
IB/hfi1: Fix a memleak in init_credit_return
Xu Yang <xu.yang_2(a)nxp.com>
usb: roles: don't get/set_role() when usb_role_switch is unregistered
Krishna Kurapati <quic_kriskura(a)quicinc.com>
usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs
Nikita Shubin <nikita.shubin(a)maquefel.me>
ARM: ep93xx: Add terminator to gpiod_lookup_table
Tom Parkin <tparkin(a)katalix.com>
l2tp: pass correct message length to ip6_append_data
Vasiliy Kovalev <kovalev(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp()
Mikulas Patocka <mpatocka(a)redhat.com>
dm-crypt: don't modify the data when using authenticated encryption
Roman Gushchin <guro(a)fb.com>
mm: memcontrol: switch to rcu protection in drain_all_stock()
Daniel Vacek <neelx(a)redhat.com>
IB/hfi1: Fix sdma.h tx->num_descs off-by-one error
Geert Uytterhoeven <geert+renesas(a)glider.be>
pmdomain: renesas: r8a77980-sysc: CR7 must be always on
Alexandra Winter <wintera(a)linux.ibm.com>
s390/qeth: Fix potential loss of L3-IP@ in case of network issues
Yi Sun <yi.sun(a)unisoc.com>
virtio-blk: Ensure no requests in virtqueues before deleting vqs.
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
firewire: core: send bus reset promptly on gap count error
Zhang Rui <rui.zhang(a)intel.com>
hwmon: (coretemp) Enlarge per package core count limit
Martin Blumenstingl <martin.blumenstingl(a)googlemail.com>
regulator: pwm-regulator: Add validity checks in continuous .get_voltage
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal()
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found()
Conrad Kostecki <conikost(a)gentoo.org>
ahci: asm1166: correct count of reported ports
Fullway Wang <fullwaywang(a)outlook.com>
fbdev: sis: Error out if pixclock equals zero
Fullway Wang <fullwaywang(a)outlook.com>
fbdev: savage: Error out if pixclock equals zero
Felix Fietkau <nbd(a)nbd.name>
wifi: mac80211: fix race condition on enabling fast-xmit
Michal Kazior <michal(a)plume.com>
wifi: cfg80211: fix missing interfaces when dumping
Vinod Koul <vkoul(a)kernel.org>
dmaengine: shdma: increase size of 'dev_id'
Dmitry Bogdanov <d.bogdanov(a)yadro.com>
scsi: target: core: Add TMF to tmr_list handling
Cyril Hrubis <chrubis(a)suse.cz>
sched/rt: Disallow writing invalid values to sched_rt_period_us
Cyril Hrubis <chrubis(a)suse.cz>
sched/rt: sysctl_sched_rr_timeslice show default timeslice after reset
Cyril Hrubis <chrubis(a)suse.cz>
sched/rt: Fix sysctl_sched_rr_timeslice intial value
Lokesh Gidra <lokeshgidra(a)google.com>
userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: replace WARN_ONs for invalid DAT metadata block requests
GONG, Ruiqi <gongruiqi1(a)huawei.com>
memcg: add refcnt for pcpu stock to avoid UAF problem in drain_all_stock()
Aaro Koskinen <aaro.koskinen(a)nokia.com>
net: stmmac: fix notifier registration
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
stmmac: no need to check return value of debugfs_create functions
Jamal Hadi Salim <jhs(a)mojatatu.com>
net/sched: Retire dsmark qdisc
Jamal Hadi Salim <jhs(a)mojatatu.com>
net/sched: Retire ATM qdisc
Jamal Hadi Salim <jhs(a)mojatatu.com>
net/sched: Retire CBQ qdisc
-------------
Diffstat:
Makefile | 4 +-
arch/arm/mach-ep93xx/core.c | 1 +
arch/s390/pci/pci.c | 2 +-
drivers/ata/ahci.c | 5 +
drivers/block/virtio_blk.c | 7 +-
drivers/dma/sh/shdma.h | 2 +-
drivers/firewire/core-card.c | 18 +-
drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 8 +-
drivers/hwmon/coretemp.c | 2 +-
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 +-
drivers/infiniband/hw/hfi1/pio.c | 6 +-
drivers/infiniband/hw/hfi1/sdma.c | 2 +-
drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +-
drivers/infiniband/ulp/iser/iser_verbs.c | 9 +-
drivers/infiniband/ulp/isert/ib_isert.c | 2 +-
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 3 +-
drivers/infiniband/ulp/srp/ib_srp.c | 10 +-
drivers/infiniband/ulp/srpt/ib_srpt.c | 52 +-
drivers/md/dm-crypt.c | 6 +
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 -
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 65 +-
drivers/net/gtp.c | 10 +-
drivers/pci/msi.c | 2 +-
drivers/regulator/pwm-regulator.c | 3 +
drivers/s390/net/qeth_l3_main.c | 9 +-
drivers/scsi/Kconfig | 2 +-
drivers/soc/renesas/r8a77980-sysc.c | 3 +-
drivers/target/target_core_device.c | 5 -
drivers/target/target_core_transport.c | 4 +
drivers/usb/gadget/function/f_ncm.c | 10 +-
drivers/usb/roles/class.c | 12 +-
drivers/video/fbdev/savage/savagefb_driver.c | 3 +
drivers/video/fbdev/sis/sis_main.c | 2 +
fs/aio.c | 9 +-
fs/ext4/mballoc.c | 13 +-
fs/nilfs2/dat.c | 27 +-
include/linux/fs.h | 2 +
include/rdma/rdma_vt.h | 2 +-
kernel/sched/rt.c | 10 +-
kernel/sysctl.c | 5 +
mm/memcontrol.c | 23 +-
mm/userfaultfd.c | 14 +-
net/ipv6/seg6.c | 20 +-
net/l2tp/l2tp_ip6.c | 2 +-
net/mac80211/sta_info.c | 2 +
net/mac80211/tx.c | 2 +-
net/packet/af_packet.c | 4 +-
net/sched/Kconfig | 42 -
net/sched/Makefile | 3 -
net/sched/sch_atm.c | 708 --------
net/sched/sch_cbq.c | 1823 ---------------------
net/sched/sch_dsmark.c | 519 ------
net/wireless/nl80211.c | 1 +
scripts/bpf_helpers_doc.py | 157 +-
virt/kvm/arm/vgic/vgic-its.c | 5 +
55 files changed, 412 insertions(+), 3259 deletions(-)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 045e9d812868a2d80b7a57b224ce8009444b7bbc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024022602-extended-buffer-5cf7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
045e9d812868 ("mptcp: fix duplicate subflow creation")
b9d69db87fb7 ("mptcp: let the in-kernel PM use mixed IPv4 and IPv6 addresses")
bedee0b56113 ("mptcp: address lookup improvements")
4638de5aefe5 ("mptcp: handle local addrs announced by userspace PMs")
c682bf536cf4 ("mptcp: add pm_nl_pernet helpers")
4cf86ae84c71 ("mptcp: strict local address ID selection")
d045b9eb95a9 ("mptcp: introduce implicit endpoints")
aaa25a2fa796 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 045e9d812868a2d80b7a57b224ce8009444b7bbc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 15 Feb 2024 19:25:33 +0100
Subject: [PATCH] mptcp: fix duplicate subflow creation
Fullmesh endpoints could end-up unexpectedly generating duplicate
subflows - same local and remote addresses - when multiple incoming
ADD_ADDR are processed before the PM creates the subflow for the local
endpoints.
Address the issue explicitly checking for duplicates at subflow
creation time.
To avoid a quadratic computational complexity, track the unavailable
remote address ids in a temporary bitmap and initialize such bitmap
with the remote ids of all the existing subflows matching the local
address currently processed.
The above allows additionally replacing the existing code checking
for duplicate entry in the current set with a simple bit test
operation.
Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/435
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index ed6983af1ab2..58d17d9604e7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,6 +427,16 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
@@ -447,11 +444,17 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021943-wriggle-repair-49dd@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
8c86fad2cecd ("selftests: mptcp: add missing kconfig for NF Filter in v6")
b6e074e171bc ("selftests: mptcp: add infinite map testcase")
39aab88242a8 ("selftests: mptcp: join: list failure at the end")
c7d49c033de0 ("selftests: mptcp: join: alt. to exec specific tests")
ae7bd9ccecc3 ("selftests: mptcp: join: option to execute specific tests")
e59300ce3ff8 ("selftests: mptcp: join: reset failing links")
3afd0280e7d3 ("selftests: mptcp: join: define tests groups once")
3c082695e78b ("selftests: mptcp: drop msg argument of chk_csum_nr")
69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case")
d045b9eb95a9 ("mptcp: introduce implicit endpoints")
6fa0174a7c86 ("mptcp: more careful RM_ADDR generation")
f98c2bca7b2b ("selftests: mptcp: Rename wait function")
826d7bdca833 ("selftests: mptcp: join: allow running -cCi")
7d9bf018f907 ("selftests: mptcp: update output info of chk_rm_nr")
26516e10c433 ("selftests: mptcp: add more arguments for chk_join_nr")
8117dac3e7c3 ("selftests: mptcp: add invert check in check_transfer")
01542c9bf9ab ("selftests: mptcp: add fastclose testcase")
cbfafac4cf8f ("selftests: mptcp: add extra_args in do_transfer")
922fd2b39e5a ("selftests: mptcp: add the MP_RST mibs check")
e8e947ef50f6 ("selftests: mptcp: add the MP_FASTCLOSE mibs check")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Wed, 31 Jan 2024 22:49:48 +0100
Subject: [PATCH] selftests: mptcp: add missing kconfig for NF Filter in v6
Since the commit mentioned below, 'mptcp_join' selftests is using
IPTables to add rules to the Filter table for IPv6.
It is then required to have IP6_NF_FILTER KConfig.
This KConfig is usually enabled by default in many defconfig, but we
recently noticed that some CI were running our selftests without them
enabled.
Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases")
Cc: stable(a)vger.kernel.org
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 2a00bf4acdfa..26fe466f803d 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -25,6 +25,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_CLS_ACT=y
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3645c844902bd4e173d6704fc2a37e8746904d67
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021925-lyricism-unshackle-b3ca@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
3645c844902b ("selftests: mptcp: add missing kconfig for NF Filter")
46e967d187ed ("selftests: mptcp: add tests for subflow creation failure")
5fb62e9cd3ad ("selftests: mptcp: add tproxy test case")
b6ab64b074f2 ("selftests: mptcp: more stable simult_flows tests")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3645c844902bd4e173d6704fc2a37e8746904d67 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Wed, 31 Jan 2024 22:49:47 +0100
Subject: [PATCH] selftests: mptcp: add missing kconfig for NF Filter
Since the commit mentioned below, 'mptcp_join' selftests is using
IPTables to add rules to the Filter table.
It is then required to have IP_NF_FILTER KConfig.
This KConfig is usually enabled by default in many defconfig, but we
recently noticed that some CI were running our selftests without them
enabled.
Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case")
Cc: stable(a)vger.kernel.org
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index e317c2e44dae..2a00bf4acdfa 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -22,6 +22,7 @@ CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_ACT_CSUM=m
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 9f1a98813b4b686482e5ef3c9d998581cace0ba6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100447-durable-snowiness-8b36@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f1a98813b4b686482e5ef3c9d998581cace0ba6 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Sat, 16 Sep 2023 12:52:47 +0200
Subject: [PATCH] mptcp: process pending subflow error on close
On incoming TCP reset, subflow closing could happen before error
propagation. That in turn could cause the socket error being ignored,
and a missing socket state transition, as reported by Daire-Byrne.
Address the issues explicitly checking for subflow socket error at
close time. To avoid code duplication, factor-out of __mptcp_error_report()
a new helper implementing the relevant bits.
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429
Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 915860027b1a..1c96b8da71df 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -770,40 +770,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+{
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ return false;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ return true;
+}
+
void __mptcp_error_report(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- WRITE_ONCE(sk->sk_err, -err);
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
+ mptcp_for_each_subflow(msk, subflow)
+ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+ break;
}
/* In most cases we will be able to lock the mptcp socket. If its already
@@ -2428,6 +2432,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
out_release:
+ __mptcp_subflow_error_report(sk, ssk);
release_sock(ssk);
sock_put(ssk);
Commit 8238b4579866 ("wait_on_bit: add an acquire memory barrier") added
a new bitop, test_bit_acquire(), with proper wrapping in order to try to
optimize it at compile-time, but missed the list of bitops used for
checking their prototypes a bit below.
The functions added have consistent prototypes, so that no more changes
are required and no functional changes take place.
Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier")
Cc: stable(a)vger.kernel.org # 6.0+
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin(a)intel.com>
---
include/linux/bitops.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2ba557e067fe..f7f5a783da2a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -80,6 +80,7 @@ __check_bitop_pr(__test_and_set_bit);
__check_bitop_pr(__test_and_clear_bit);
__check_bitop_pr(__test_and_change_bit);
__check_bitop_pr(test_bit);
+__check_bitop_pr(test_bit_acquire);
#undef __check_bitop_pr
--
2.43.0
From: Ivan Lipski <ivlipski(a)amd.com>
[WHY]
Some eDP panels's ext caps don't write initial value cause the value of
dpcd_addr(0x317) is random. It means that sometimes the eDP will
clarify it is OLED, miniLED...etc cause the backlight control interface
is incorrect.
[HOW]
Add a new panel patch to remove sink ext caps(HDR,OLED...etc)
Cc: stable(a)vger.kernel.org # 6.5.x
Cc: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
Cc: Tsung-hua Lin <tsung-hua.lin(a)amd.com>
Cc: Chris Chi <moukong.chi(a)amd.com>
Cc: Harry Wentland <Harry.Wentland(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Reviewed-by: Sun peng Li <sunpeng.li(a)amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira(a)amd.com>
Signed-off-by: Ivan Lipski <ivlipski(a)amd.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index d9a482908380..764dc3ffd91b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -63,6 +63,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id);
edid_caps->panel_patch.disable_fams = true;
break;
+ /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+ DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.remove_sink_ext_caps = true;
+ break;
default:
return;
}
--
2.43.0
From: Bjorn Andersson <quic_bjorande(a)quicinc.com>
Commit 'e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable
the domain")' aimed to make sure that a power-domain that is being
enabled without any particular performance-state requested will at least
turn the rail on, to avoid filling DeviceTree with otherwise unnecessary
required-opps properties.
But in the event that aggregation happens on a disabled power-domain, with
an enabled peer without performance-state, both the local and peer
corner are 0. The peer's enabled_corner is not considered, with the
result that the underlying (shared) resource is disabled.
One case where this can be observed is when the display stack keeps mmcx
enabled (but without a particular performance-state vote) in order to
access registers and sync_state happens in the rpmhpd driver. As mmcx_ao
is flushed the state of the peer (mmcx) is not considered and mmcx_ao
ends up turning off "mmcx.lvl" underneath mmcx. This has been observed
several times, but has been painted over in DeviceTree by adding an
explicit vote for the lowest non-disabled performance-state.
Fixes: e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain")
Reported-by: Johan Hovold <johan(a)kernel.org>
Closes: https://lore.kernel.org/linux-arm-msm/ZdMwZa98L23mu3u6@hovoldconsulting.com/
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Bjorn Andersson <quic_bjorande(a)quicinc.com>
---
This issue is the root cause of a display regression on SC8280XP boards,
resulting in the system often resetting during boot. It was exposed by
the refactoring of the DisplayPort driver in v6.8-rc1.
---
drivers/pmdomain/qcom/rpmhpd.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c
index 3078896b1300..47df910645f6 100644
--- a/drivers/pmdomain/qcom/rpmhpd.c
+++ b/drivers/pmdomain/qcom/rpmhpd.c
@@ -692,6 +692,7 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
unsigned int active_corner, sleep_corner;
unsigned int this_active_corner = 0, this_sleep_corner = 0;
unsigned int peer_active_corner = 0, peer_sleep_corner = 0;
+ unsigned int peer_enabled_corner;
if (pd->state_synced) {
to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner);
@@ -701,9 +702,11 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
this_sleep_corner = pd->level_count - 1;
}
- if (peer && peer->enabled)
- to_active_sleep(peer, peer->corner, &peer_active_corner,
+ if (peer && peer->enabled) {
+ peer_enabled_corner = max(peer->corner, peer->enable_corner);
+ to_active_sleep(peer, peer_enabled_corner, &peer_active_corner,
&peer_sleep_corner);
+ }
active_corner = max(this_active_corner, peer_active_corner);
---
base-commit: b401b621758e46812da61fa58a67c3fd8d91de0d
change-id: 20240226-rpmhpd-enable-corner-fix-c5e07fe7b986
Best regards,
--
Bjorn Andersson <quic_bjorande(a)quicinc.com>
The value of the [ms]envcfg CSR is lost when entering a nonretentive
idle state, so the CSR must be rewritten when resuming the CPU.
Cc: <stable(a)vger.kernel.org> # v6.7+
Fixes: 43c16d51a19b ("RISC-V: Enable cbo.zero in usermode")
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
---
Changes in v4:
- Check for Xlinuxenvcfg instead of Zicboz
Changes in v3:
- Check for Zicboz instead of the privileged ISA version
Changes in v2:
- Check for privileged ISA v1.12 instead of the specific CSR
- Use riscv_has_extension_likely() instead of new ALTERNATIVE()s
arch/riscv/include/asm/suspend.h | 1 +
arch/riscv/kernel/suspend.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
index 02f87867389a..491296a335d0 100644
--- a/arch/riscv/include/asm/suspend.h
+++ b/arch/riscv/include/asm/suspend.h
@@ -14,6 +14,7 @@ struct suspend_context {
struct pt_regs regs;
/* Saved and restored by high-level functions */
unsigned long scratch;
+ unsigned long envcfg;
unsigned long tvec;
unsigned long ie;
#ifdef CONFIG_MMU
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 239509367e42..299795341e8a 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -15,6 +15,8 @@
void suspend_save_csrs(struct suspend_context *context)
{
context->scratch = csr_read(CSR_SCRATCH);
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ context->envcfg = csr_read(CSR_ENVCFG);
context->tvec = csr_read(CSR_TVEC);
context->ie = csr_read(CSR_IE);
@@ -36,6 +38,8 @@ void suspend_save_csrs(struct suspend_context *context)
void suspend_restore_csrs(struct suspend_context *context)
{
csr_write(CSR_SCRATCH, context->scratch);
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ csr_write(CSR_ENVCFG, context->envcfg);
csr_write(CSR_TVEC, context->tvec);
csr_write(CSR_IE, context->ie);
--
2.43.1
First several patches target fixing the UFS support on the Qualcomm
MSM8996 / APQ8096 platforms, broken by the commit b4e13e1ae95e ("scsi:
ufs: qcom: Add multiple frequency support for MAX_CORE_CLK_1US_CYCLES").
Last two patches clean up the UFS DT device on that platform to follow
the bindings on other MSM8969 platforms. If such breaking change is
unacceptable, they can be simply ignored, merging fixes only.
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
---
Changes in v3:
- dropped the patch conflicting with Yassine's patch that got accepted
- Cc stable on the UFS change (Manivannan)
- Fixed typos in the commit message (Manivannan)
- Link to v2: https://lore.kernel.org/r/20240213-msm8996-fix-ufs-v2-0-650758c26458@linaro…
Changes in v2:
- Dropped patches adding RX_SYMBOL_1_CLK, MSM8996 uses single lane
(Krzysztof).
- Link to v1: https://lore.kernel.org/r/20240209-msm8996-fix-ufs-v1-0-107b52e57420@linaro…
---
Dmitry Baryshkov (5):
scsi: ufs: qcom: provide default cycles_in_1us value
arm64: dts: qcom: msm8996: specify UFS core_clk frequencies
arm64: dts: qcom: msm8996: set GCC_UFS_ICE_CORE_CLK freq directly
dt-bindings: ufs: qcom,ufs: drop source clock entries
arm64: dts: qcom: msm8996: drop source clock entries from the UFS node
Documentation/devicetree/bindings/ufs/qcom,ufs.yaml | 12 +++++-------
arch/arm64/boot/dts/qcom/msm8996.dtsi | 8 +-------
drivers/ufs/host/ufs-qcom.c | 6 ++++--
3 files changed, 10 insertions(+), 16 deletions(-)
---
base-commit: 0035c3918a74a83f94158fbbd667e163bfd4a0d0
change-id: 20240209-msm8996-fix-ufs-f80ae6d4d8cf
Best regards,
--
Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
While connecting to a Linux host with CDC_NCM_NTB_DEF_SIZE_TX
set to 65536, it has been observed that we receive short packets,
which come at interval of 5-10 seconds sometimes and have block
length zero but still contain 1-2 valid datagrams present.
According to the NCM spec:
"If wBlockLength = 0x0000, the block is terminated by a
short packet. In this case, the USB transfer must still
be shorter than dwNtbInMaxSize or dwNtbOutMaxSize. If
exactly dwNtbInMaxSize or dwNtbOutMaxSize bytes are sent,
and the size is a multiple of wMaxPacketSize for the
given pipe, then no ZLP shall be sent.
wBlockLength= 0x0000 must be used with extreme care, because
of the possibility that the host and device may get out of
sync, and because of test issues.
wBlockLength = 0x0000 allows the sender to reduce latency by
starting to send a very large NTB, and then shortening it when
the sender discovers that there’s not sufficient data to justify
sending a large NTB"
However, there is a potential issue with the current implementation,
as it checks for the occurrence of multiple NTBs in a single
giveback by verifying if the leftover bytes to be processed is zero
or not. If the block length reads zero, we would process the same
NTB infintely because the leftover bytes is never zero and it leads
to a crash. Fix this by bailing out if block length reads zero.
Cc: <stable(a)vger.kernel.org>
Fixes: 427694cfaafa ("usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call")
Signed-off-by: Krishna Kurapati <quic_kriskura(a)quicinc.com>
---
Changes in v2:
Removed goto label
Link to v1:
https://lore.kernel.org/all/20240226112815.2616719-1-quic_kriskura@quicinc.…
drivers/usb/gadget/function/f_ncm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index e2a059cfda2c..28f4e6552e84 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1346,7 +1346,7 @@ static int ncm_unwrap_ntb(struct gether *port,
if (to_process == 1 &&
(*(unsigned char *)(ntb_ptr + block_len) == 0x00)) {
to_process--;
- } else if (to_process > 0) {
+ } else if ((to_process > 0) && (block_len != 0)) {
ntb_ptr = (unsigned char *)(ntb_ptr + block_len);
goto parse_ntb;
}
--
2.34.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f1796544a0ca0f14386a679d3d05fbc69235015e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024022759-crave-busily-bef7@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f1796544a0ca ("memcg: fix use-after-free in uncharge_batch")
1a3e1f40962c ("mm: memcontrol: decouple reference counting from page accounting")
8d22a9351035 ("mm/memcg: fix refcount error while moving and swapping")
d9eb1ea2bf87 ("mm: memcontrol: delete unused lrucare handling")
4c6355b25e8b ("mm: memcontrol: charge swapin pages on instantiation")
f0e45fb4da29 ("mm: memcontrol: drop unused try/commit/cancel charge API")
9d82c69438d0 ("mm: memcontrol: convert anon and file-thp to new mem_cgroup_charge() API")
468c398233da ("mm: memcontrol: switch to native NR_ANON_THPS counter")
be5d0a74c62d ("mm: memcontrol: switch to native NR_ANON_MAPPED counter")
0d1c20722ab3 ("mm: memcontrol: switch to native NR_FILE_PAGES and NR_SHMEM counters")
49e50d277ba2 ("mm: memcontrol: prepare move_account for removal of private page type counters")
9f762dbe19b9 ("mm: memcontrol: prepare uncharging for removal of private page type counters")
3fea5a499d57 ("mm: memcontrol: convert page cache to a new mem_cgroup_charge() API")
6caa6a0703e0 ("mm: memcontrol: move out cgroup swaprate throttling")
14235ab36019 ("mm: shmem: remove rare optimization when swapin races with hole punching")
3fba69a56e16 ("mm: memcontrol: drop @compound parameter from memcg charging API")
abb242f57196 ("mm: memcontrol: fix stat-corrupting race in charge moving")
f4129ea3591a ("mm: fix NUMA node file count error in replace_page_cache()")
ffe945e633b5 ("khugepaged: do not stop collapse if less than half PTEs are referenced")
396bcc5299c2 ("mm: remove CONFIG_TRANSPARENT_HUGE_PAGECACHE")
85b9f46e8ea4 ("mm, thp: track fallbacks due to failed memcg charges separately")
dcdf11ee1441 ("mm, shmem: add vmstat for hugepage fallback")
9c315e4d7d8c ("mm: memcg/slab: cache page number in memcg_(un)charge_slab()")
92d0510c3585 ("mm: kmem: switch to nr_pages in (__)memcg_kmem_charge_memcg()")
f4b00eab5004 ("mm: kmem: rename memcg_kmem_(un)charge() into memcg_kmem_(un)charge_page()")
50591183fa86 ("mm: kmem: cleanup memcg_kmem_uncharge_memcg() arguments")
10eaec2f63b6 ("mm: kmem: cleanup (__)memcg_kmem_charge_memcg() arguments")
47e29d32afba ("mm/gup: page->hpage_pinned_refcount: exact pin counts for huge pages")
3faa52c03f44 ("mm/gup: track FOLL_PIN pages")
3b78d8347d31 ("mm/gup: pass gup flags to two more routines")
c23a0c99793f ("mm/migrate: clean up some minor coding style")
92855270ff08 ("mm/memcontrol.c: cleanup some useless code")
f1f6a7dd9b53 ("mm, tree-wide: rename put_user_page*() to unpin_user_page*()")
aa4b87fe9ea3 ("powerpc: book3s64: convert to pin_user_pages() and put_user_page()")
19fed0dae94d ("vfio, mm: pin_user_pages (FOLL_PIN) and put_user_page() conversion")
1f815afcfca7 ("media/v4l2-core: pin_user_pages (FOLL_PIN) and put_user_page() conversion")
803e4572d7c5 ("mm/process_vm_access: set FOLL_PIN via pin_user_pages_remote()")
57459435cff5 ("goldish_pipe: convert to pin_user_pages() and put_user_page()")
eddb1c228f79 ("mm/gup: introduce pin_user_pages*() and FOLL_PIN")
3c7470b6f684 ("media/v4l2-core: set pages dirty upon releasing DMA buffers")
f4000fdf435b ("mm/gup: allow FOLL_FORCE for get_user_pages_fast()")
3567813eae5e ("vfio: fix FOLL_LONGTERM use, simplify get_user_pages_remote() call")
c4237f8b1f4f ("mm: fix get_user_pages_remote()'s handling of FOLL_LONGTERM")
a707cdd55f0f ("mm/gup: move try_get_compound_head() to top, fix minor issues")
a43e982082c2 ("mm/gup: factor out duplicate code from four routines")
fac0516b5534 ("mm: thp: don't need care deferred split queue in memcg charge move path")
f1fe80d4ae33 ("mm, thp: do not queue fully unmapped pages for deferred split")
acbfb087e3b1 ("mm/hugetlb: avoid looping to the same hugepage if !pages and !vmas")
867e5e1de14b ("mm: clean up and clarify lruvec lookup procedure")
242c37b459ce ("include/linux/memcontrol.h: fix comments based on per-node memcg")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f1796544a0ca0f14386a679d3d05fbc69235015e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko(a)suse.com>
Date: Fri, 4 Sep 2020 16:35:24 -0700
Subject: [PATCH] memcg: fix use-after-free in uncharge_batch
syzbot has reported an use-after-free in the uncharge_batch path
BUG: KASAN: use-after-free in instrument_atomic_write include/linux/instrumented.h:71 [inline]
BUG: KASAN: use-after-free in atomic64_sub_return include/asm-generic/atomic-instrumented.h:970 [inline]
BUG: KASAN: use-after-free in atomic_long_sub_return include/asm-generic/atomic-long.h:113 [inline]
BUG: KASAN: use-after-free in page_counter_cancel mm/page_counter.c:54 [inline]
BUG: KASAN: use-after-free in page_counter_uncharge+0x3d/0xc0 mm/page_counter.c:155
Write of size 8 at addr ffff8880371c0148 by task syz-executor.0/9304
CPU: 0 PID: 9304 Comm: syz-executor.0 Not tainted 5.8.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x1f0/0x31e lib/dump_stack.c:118
print_address_description+0x66/0x620 mm/kasan/report.c:383
__kasan_report mm/kasan/report.c:513 [inline]
kasan_report+0x132/0x1d0 mm/kasan/report.c:530
check_memory_region_inline mm/kasan/generic.c:183 [inline]
check_memory_region+0x2b5/0x2f0 mm/kasan/generic.c:192
instrument_atomic_write include/linux/instrumented.h:71 [inline]
atomic64_sub_return include/asm-generic/atomic-instrumented.h:970 [inline]
atomic_long_sub_return include/asm-generic/atomic-long.h:113 [inline]
page_counter_cancel mm/page_counter.c:54 [inline]
page_counter_uncharge+0x3d/0xc0 mm/page_counter.c:155
uncharge_batch+0x6c/0x350 mm/memcontrol.c:6764
uncharge_page+0x115/0x430 mm/memcontrol.c:6796
uncharge_list mm/memcontrol.c:6835 [inline]
mem_cgroup_uncharge_list+0x70/0xe0 mm/memcontrol.c:6877
release_pages+0x13a2/0x1550 mm/swap.c:911
tlb_batch_pages_flush mm/mmu_gather.c:49 [inline]
tlb_flush_mmu_free mm/mmu_gather.c:242 [inline]
tlb_flush_mmu+0x780/0x910 mm/mmu_gather.c:249
tlb_finish_mmu+0xcb/0x200 mm/mmu_gather.c:328
exit_mmap+0x296/0x550 mm/mmap.c:3185
__mmput+0x113/0x370 kernel/fork.c:1076
exit_mm+0x4cd/0x550 kernel/exit.c:483
do_exit+0x576/0x1f20 kernel/exit.c:793
do_group_exit+0x161/0x2d0 kernel/exit.c:903
get_signal+0x139b/0x1d30 kernel/signal.c:2743
arch_do_signal+0x33/0x610 arch/x86/kernel/signal.c:811
exit_to_user_mode_loop kernel/entry/common.c:135 [inline]
exit_to_user_mode_prepare+0x8d/0x1b0 kernel/entry/common.c:166
syscall_exit_to_user_mode+0x5e/0x1a0 kernel/entry/common.c:241
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Commit 1a3e1f40962c ("mm: memcontrol: decouple reference counting from
page accounting") reworked the memcg lifetime to be bound the the struct
page rather than charges. It also removed the css_put_many from
uncharge_batch and that is causing the above splat.
uncharge_batch() is supposed to uncharge accumulated charges for all
pages freed from the same memcg. The queuing is done by uncharge_page
which however drops the memcg reference after it adds charges to the
batch. If the current page happens to be the last one holding the
reference for its memcg then the memcg is OK to go and the next page to
be freed will trigger batched uncharge which needs to access the memcg
which is gone already.
Fix the issue by taking a reference for the memcg in the current batch.
Fixes: 1a3e1f40962c ("mm: memcontrol: decouple reference counting from page accounting")
Reported-by: syzbot+b305848212deec86eabe(a)syzkaller.appspotmail.com
Reported-by: syzbot+b5ea6fb6f139c8b9482b(a)syzkaller.appspotmail.com
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: Shakeel Butt <shakeelb(a)google.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Roman Gushchin <guro(a)fb.com>
Cc: Hugh Dickins <hughd(a)google.com>
Link: https://lkml.kernel.org/r/20200820090341.GC5033@dhcp22.suse.cz
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b807952b4d43..cfa6cbad21d5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6774,6 +6774,9 @@ static void uncharge_batch(const struct uncharge_gather *ug)
__this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages);
memcg_check_events(ug->memcg, ug->dummy_page);
local_irq_restore(flags);
+
+ /* drop reference from uncharge_page */
+ css_put(&ug->memcg->css);
}
static void uncharge_page(struct page *page, struct uncharge_gather *ug)
@@ -6797,6 +6800,9 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
uncharge_gather_clear(ug);
}
ug->memcg = page->mem_cgroup;
+
+ /* pairs with css_put in uncharge_batch */
+ css_get(&ug->memcg->css);
}
nr_pages = compound_nr(page);
ODI DFP-34X-2C2 is capable of 2500base-X, but incorrectly report its
capabilities in the EEPROM.
So use sfp_quirk_2500basex for this module to allow 2500Base-X mode.
Signed-off-by: Shengyu Qu <wiagn233(a)outlook.com>
---
drivers/net/phy/sfp.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index f75c9eb3958e..2021cb4ff2f6 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -495,6 +495,13 @@ static const struct sfp_quirk sfp_quirks[] = {
// 2500MBd NRZ in their EEPROM
SFP_QUIRK_M("Lantech", "8330-262D-E", sfp_quirk_2500basex),
+ // ODI DFP-34X-2C2 can operate at 2500base-X, but incorrectly report 1300MBd
+ // NRZ in the EEPROM.
+ // Besides, In early batches, vendor id is set to OEM, but that is fixed in
+ // newer batches.
+ SFP_QUIRK_M("ODI", "DFP-34X-2C2", sfp_quirk_2500basex),
+ SFP_QUIRK_M("OEM", "DFP-34X-2C2", sfp_quirk_2500basex),
+
SFP_QUIRK_M("UBNT", "UF-INSTANT", sfp_quirk_ubnt_uf_instant),
// Walsun HXSX-ATR[CI]-1 don't identify as copper, and use the
--
2.39.2