This is a note to let you know that I've just added the patch titled
serial: 8250: of: Fix mapped region size when using reg-offset
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From d06b1cf28297e27127d3da54753a3a01a2fa2f28 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 12 Jan 2022 13:42:14 -0600
Subject: serial: 8250: of: Fix mapped region size when using reg-offset
property
8250_of supports a reg-offset property which is intended to handle
cases where the device registers start at an offset inside the region
of memory allocated to the device. The Xilinx 16550 UART, for which this
support was initially added, requires this. However, the code did not
adjust the overall size of the mapped region accordingly, causing the
driver to request an area of memory past the end of the device's
allocation. For example, if the UART was allocated an address of
0xb0130000, size of 0x10000 and reg-offset of 0x1000 in the device
tree, the region of memory reserved was b0131000-b0140fff, which caused
the driver for the region starting at b0140000 to fail to probe.
Fix this by subtracting reg-offset from the mapped region size.
Fixes: b912b5e2cfb3 ([POWERPC] Xilinx: of_serial support for Xilinx uart 16550.)
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Link: https://lore.kernel.org/r/20220112194214.881844-1-robert.hancock@calian.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/8250/8250_of.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index bce28729dd7b..be8626234627 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
port->mapsize = resource_size(&resource);
/* Check for shifted address mapping */
- if (of_property_read_u32(np, "reg-offset", &prop) == 0)
+ if (of_property_read_u32(np, "reg-offset", &prop) == 0) {
+ if (prop >= port->mapsize) {
+ dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n",
+ prop, &port->mapsize);
+ ret = -EINVAL;
+ goto err_unprepare;
+ }
+
port->mapbase += prop;
+ port->mapsize -= prop;
+ }
port->iotype = UPIO_MEM;
if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
--
2.35.0
This is a note to let you know that I've just added the patch titled
tty: rpmsg: Fix race condition releasing tty port
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From db7f19c0aa0abcb751ff0ed694a071363f702b1d Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen(a)foss.st.com>
Date: Tue, 4 Jan 2022 17:35:45 +0100
Subject: tty: rpmsg: Fix race condition releasing tty port
The tty_port struct is part of the rpmsg_tty_port structure.
The issue is that the rpmsg_tty_port structure is freed on
rpmsg_tty_remove while it is still referenced in the tty_struct.
Its release is not predictable due to workqueues.
For instance following ftrace shows that rpmsg_tty_close is called after
rpmsg_tty_release_cport:
nr_test.sh-389 [000] ..... 212.093752: rpmsg_tty_remove <-rpmsg_dev_
remove
cat-1191 [001] ..... 212.095697: tty_release <-__fput
nr_test.sh-389 [000] ..... 212.099166: rpmsg_tty_release_cport <-rpm
sg_tty_remove
cat-1191 [001] ..... 212.115352: rpmsg_tty_close <-tty_release
cat-1191 [001] ..... 212.115371: release_tty <-tty_release_str
As consequence, the port must be free only when user has released the TTY
interface.
This path :
- Introduce the .destruct port tty ops function to release the allocated
rpmsg_tty_port structure.
- Introduce the .hangup tty ops function to call tty_port_hangup.
- Manages the tty port refcounting to trig the .destruct port ops,
- Introduces the rpmsg_tty_cleanup function to ensure that the TTY is
removed before decreasing the port refcount.
Fixes: 7c0408d80579 ("tty: add rpmsg driver")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen(a)foss.st.com>
Link: https://lore.kernel.org/r/20220104163545.34710-1-arnaud.pouliquen@foss.st.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/rpmsg_tty.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)
diff --git a/drivers/tty/rpmsg_tty.c b/drivers/tty/rpmsg_tty.c
index dae2a4e44f38..29db413bbc03 100644
--- a/drivers/tty/rpmsg_tty.c
+++ b/drivers/tty/rpmsg_tty.c
@@ -50,10 +50,17 @@ static int rpmsg_tty_cb(struct rpmsg_device *rpdev, void *data, int len, void *p
static int rpmsg_tty_install(struct tty_driver *driver, struct tty_struct *tty)
{
struct rpmsg_tty_port *cport = idr_find(&tty_idr, tty->index);
+ struct tty_port *port;
tty->driver_data = cport;
- return tty_port_install(&cport->port, driver, tty);
+ port = tty_port_get(&cport->port);
+ return tty_port_install(port, driver, tty);
+}
+
+static void rpmsg_tty_cleanup(struct tty_struct *tty)
+{
+ tty_port_put(tty->port);
}
static int rpmsg_tty_open(struct tty_struct *tty, struct file *filp)
@@ -106,12 +113,19 @@ static unsigned int rpmsg_tty_write_room(struct tty_struct *tty)
return size;
}
+static void rpmsg_tty_hangup(struct tty_struct *tty)
+{
+ tty_port_hangup(tty->port);
+}
+
static const struct tty_operations rpmsg_tty_ops = {
.install = rpmsg_tty_install,
.open = rpmsg_tty_open,
.close = rpmsg_tty_close,
.write = rpmsg_tty_write,
.write_room = rpmsg_tty_write_room,
+ .hangup = rpmsg_tty_hangup,
+ .cleanup = rpmsg_tty_cleanup,
};
static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void)
@@ -137,8 +151,10 @@ static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void)
return cport;
}
-static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport)
+static void rpmsg_tty_destruct_port(struct tty_port *port)
{
+ struct rpmsg_tty_port *cport = container_of(port, struct rpmsg_tty_port, port);
+
mutex_lock(&idr_lock);
idr_remove(&tty_idr, cport->id);
mutex_unlock(&idr_lock);
@@ -146,7 +162,10 @@ static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport)
kfree(cport);
}
-static const struct tty_port_operations rpmsg_tty_port_ops = { };
+static const struct tty_port_operations rpmsg_tty_port_ops = {
+ .destruct = rpmsg_tty_destruct_port,
+};
+
static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
{
@@ -166,7 +185,8 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
cport->id, dev);
if (IS_ERR(tty_dev)) {
ret = dev_err_probe(dev, PTR_ERR(tty_dev), "Failed to register tty port\n");
- goto err_destroy;
+ tty_port_put(&cport->port);
+ return ret;
}
cport->rpdev = rpdev;
@@ -177,12 +197,6 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
rpdev->src, rpdev->dst, cport->id);
return 0;
-
-err_destroy:
- tty_port_destroy(&cport->port);
- rpmsg_tty_release_cport(cport);
-
- return ret;
}
static void rpmsg_tty_remove(struct rpmsg_device *rpdev)
@@ -192,13 +206,11 @@ static void rpmsg_tty_remove(struct rpmsg_device *rpdev)
dev_dbg(&rpdev->dev, "Removing rpmsg tty device %d\n", cport->id);
/* User hang up to release the tty */
- if (tty_port_initialized(&cport->port))
- tty_port_tty_hangup(&cport->port, false);
+ tty_port_tty_hangup(&cport->port, false);
tty_unregister_device(rpmsg_tty_driver, cport->id);
- tty_port_destroy(&cport->port);
- rpmsg_tty_release_cport(cport);
+ tty_port_put(&cport->port);
}
static struct rpmsg_device_id rpmsg_driver_tty_id_table[] = {
--
2.35.0
The membarrier command MEMBARRIER_CMD_QUERY allows querying the
available membarrier commands. When the membarrier-rseq fence commands
were added, a new MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK was
introduced with the intent to expose them with the MEMBARRIER_CMD_QUERY
command, the but it was never added to MEMBARRIER_CMD_BITMASK.
The membarrier-rseq fence commands are therefore not wired up with the
query command.
Rename MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK to
MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK (the bitmask is not a command
per-se), and change the erroneous
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK (which does not
actually exist) to MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ.
Wire up MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK in
MEMBARRIER_CMD_BITMASK. Fixing this allows discovering availability of
the membarrier-rseq fence feature.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Peter Oskolkov <posk(a)google.com>
Cc: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: <stable(a)vger.kernel.org> # 5.10+
---
kernel/sched/membarrier.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index b5add64d9698..3d2825408e3a 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -147,11 +147,11 @@
#endif
#ifdef CONFIG_RSEQ
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
- | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
#else
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
#endif
#define MEMBARRIER_CMD_BITMASK \
@@ -159,7 +159,8 @@
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
- | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
+ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
static void ipi_mb(void *info)
{
--
2.17.1
The TCSS_DDI_STATUS register is indexed by tc_port not by the FIA port
index, fix this up. This only caused an issue on TC#3/4 ports in legacy
mode, as in all other cases the two indices either match (on TC#1/2) or
the TCSS_DDI_STATUS_READY flag is set regardless of something being
connected or not (on TC#1/2/3/4 in dp-alt and tbt-alt modes).
Reported-and-tested-by: Chia-Lin Kao (AceLan) <acelan.kao(a)canonical.com>
Fixes: 55ce306c2aa1 ("drm/i915/adl_p: Implement TC sequences")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4698
Cc: José Roberto de Souza <jose.souza(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.14+
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_tc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 4eefe7b0bb263..3291124a99e5a 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -346,10 +346,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port)
{
struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+ enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
struct intel_uncore *uncore = &i915->uncore;
u32 val;
- val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx));
+ val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port));
if (val == 0xffffffff) {
drm_dbg_kms(&i915->drm,
"Port %s: PHY in TCCOLD, assuming not complete\n",
--
2.27.0
This is a note to let you know that I've just added the patch titled
usb: cdnsp: Fix segmentation fault in cdns_lost_power function
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 79aa3e19fe8f5be30e846df8a436bfe306e8b1a6 Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell(a)cadence.com>
Date: Tue, 11 Jan 2022 10:07:37 +0100
Subject: usb: cdnsp: Fix segmentation fault in cdns_lost_power function
CDNSP driver read not initialized cdns->otg_v0_regs
which lead to segmentation fault. Patch fixes this issue.
Fixes: 2cf2581cd229 ("usb: cdns3: add power lost support for system resume")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
Link: https://lore.kernel.org/r/20220111090737.10345-1-pawell@gli-login.cadence.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/cdns3/drd.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
index 55c73b1d8704..d00ff98dffab 100644
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns)
/* Indicate the cdns3 core was power lost before */
bool cdns_power_is_lost(struct cdns *cdns)
{
- if (cdns->version == CDNS3_CONTROLLER_V1) {
- if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
+ if (cdns->version == CDNS3_CONTROLLER_V0) {
+ if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
return true;
} else {
- if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
+ if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
return true;
}
return false;
--
2.35.0
This is a note to let you know that I've just added the patch titled
usb: dwc3: xilinx: Fix error handling when getting USB3 PHY
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 2cc9b1c93b1c4caa2d971856c0780fb5f7d04692 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Tue, 25 Jan 2022 18:02:51 -0600
Subject: usb: dwc3: xilinx: Fix error handling when getting USB3 PHY
The code that looked up the USB3 PHY was ignoring all errors other than
EPROBE_DEFER in an attempt to handle the PHY not being present. Fix and
simplify the code by using devm_phy_optional_get and dev_err_probe so
that a missing PHY is not treated as an error and unexpected errors
are handled properly.
Fixes: 84770f028fab ("usb: dwc3: Add driver for Xilinx platforms")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Link: https://lore.kernel.org/r/20220126000253.1586760-3-robert.hancock@calian.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/dwc3/dwc3-xilinx.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
index 06b591b14b09..e14ac15e24c3 100644
--- a/drivers/usb/dwc3/dwc3-xilinx.c
+++ b/drivers/usb/dwc3/dwc3-xilinx.c
@@ -102,12 +102,12 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
int ret;
u32 reg;
- usb3_phy = devm_phy_get(dev, "usb3-phy");
- if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) {
- ret = -EPROBE_DEFER;
+ usb3_phy = devm_phy_optional_get(dev, "usb3-phy");
+ if (IS_ERR(usb3_phy)) {
+ ret = PTR_ERR(usb3_phy);
+ dev_err_probe(dev, ret,
+ "failed to get USB3 PHY\n");
goto err;
- } else if (IS_ERR(usb3_phy)) {
- usb3_phy = NULL;
}
/*
--
2.35.0
This is a note to let you know that I've just added the patch titled
usb: dwc3: xilinx: Skip resets and USB3 register settings for USB2.0
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 9678f3361afc27a3124cd2824aec0227739986fb Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Tue, 25 Jan 2022 18:02:50 -0600
Subject: usb: dwc3: xilinx: Skip resets and USB3 register settings for USB2.0
mode
It appears that the PIPE clock should not be selected when only USB 2.0
is being used in the design and no USB 3.0 reference clock is used.
Also, the core resets are not required if a USB3 PHY is not in use, and
will break things if USB3 is actually used but the PHY entry is not
listed in the device tree.
Skip core resets and register settings that are only required for
USB3 mode when no USB3 PHY is specified in the device tree.
Fixes: 84770f028fab ("usb: dwc3: Add driver for Xilinx platforms")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Link: https://lore.kernel.org/r/20220126000253.1586760-2-robert.hancock@calian.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/dwc3/dwc3-xilinx.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
index 9cc3ad701a29..06b591b14b09 100644
--- a/drivers/usb/dwc3/dwc3-xilinx.c
+++ b/drivers/usb/dwc3/dwc3-xilinx.c
@@ -110,6 +110,18 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
usb3_phy = NULL;
}
+ /*
+ * The following core resets are not required unless a USB3 PHY
+ * is used, and the subsequent register settings are not required
+ * unless a core reset is performed (they should be set properly
+ * by the first-stage boot loader, but may be reverted by a core
+ * reset). They may also break the configuration if USB3 is actually
+ * in use but the usb3-phy entry is missing from the device tree.
+ * Therefore, skip these operations in this case.
+ */
+ if (!usb3_phy)
+ goto skip_usb3_phy;
+
crst = devm_reset_control_get_exclusive(dev, "usb_crst");
if (IS_ERR(crst)) {
ret = PTR_ERR(crst);
@@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
goto err;
}
+skip_usb3_phy:
/*
* This routes the USB DMA traffic to go through FPD path instead
* of reaching DDR directly. This traffic routing is needed to
--
2.35.0
This is a note to let you know that I've just added the patch titled
usb: xhci-plat: fix crash when suspend if remote wake enable
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 9df478463d9feb90dae24f183383961cf123a0ec Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Mon, 10 Jan 2022 11:27:38 -0600
Subject: usb: xhci-plat: fix crash when suspend if remote wake enable
Crashed at i.mx8qm platform when suspend if enable remote wakeup
Internal error: synchronous external abort: 96000210 [#1] PREEMPT SMP
Modules linked in:
CPU: 2 PID: 244 Comm: kworker/u12:6 Not tainted 5.15.5-dirty #12
Hardware name: Freescale i.MX8QM MEK (DT)
Workqueue: events_unbound async_run_entry_fn
pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : xhci_disable_hub_port_wake.isra.62+0x60/0xf8
lr : xhci_disable_hub_port_wake.isra.62+0x34/0xf8
sp : ffff80001394bbf0
x29: ffff80001394bbf0 x28: 0000000000000000 x27: ffff00081193b578
x26: ffff00081193b570 x25: 0000000000000000 x24: 0000000000000000
x23: ffff00081193a29c x22: 0000000000020001 x21: 0000000000000001
x20: 0000000000000000 x19: ffff800014e90490 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
x14: 0000000000000000 x13: 0000000000000002 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000960 x9 : ffff80001394baa0
x8 : ffff0008145d1780 x7 : ffff0008f95b8e80 x6 : 000000001853b453
x5 : 0000000000000496 x4 : 0000000000000000 x3 : ffff00081193a29c
x2 : 0000000000000001 x1 : 0000000000000000 x0 : ffff000814591620
Call trace:
xhci_disable_hub_port_wake.isra.62+0x60/0xf8
xhci_suspend+0x58/0x510
xhci_plat_suspend+0x50/0x78
platform_pm_suspend+0x2c/0x78
dpm_run_callback.isra.25+0x50/0xe8
__device_suspend+0x108/0x3c0
The basic flow:
1. run time suspend call xhci_suspend, xhci parent devices gate the clock.
2. echo mem >/sys/power/state, system _device_suspend call xhci_suspend
3. xhci_suspend call xhci_disable_hub_port_wake, which access register,
but clock already gated by run time suspend.
This problem was hidden by power domain driver, which call run time resume before it.
But the below commit remove it and make this issue happen.
commit c1df456d0f06e ("PM: domains: Don't runtime resume devices at genpd_prepare()")
This patch call run time resume before suspend to make sure clock is on
before access register.
Reviewed-by: Peter Chen <peter.chen(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Testeb-by: Abel Vesa <abel.vesa(a)nxp.com>
Link: https://lore.kernel.org/r/20220110172738.31686-1-Frank.Li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-plat.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index c1edcc9b13ce..dc570ce4e831 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
int ret;
+ if (pm_runtime_suspended(dev))
+ pm_runtime_resume(dev);
+
ret = xhci_priv_suspend_quirk(hcd);
if (ret)
return ret;
--
2.35.0
This is a note to let you know that I've just added the patch titled
usb: common: ulpi: Fix crash in ulpi_match()
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 2e3dd4a6246945bf84ea6f478365d116e661554c Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh(a)nvidia.com>
Date: Mon, 17 Jan 2022 15:00:39 +0000
Subject: usb: common: ulpi: Fix crash in ulpi_match()
Commit 7495af930835 ("ARM: multi_v7_defconfig: Enable drivers for
DragonBoard 410c") enables the CONFIG_PHY_QCOM_USB_HS for the ARM
multi_v7_defconfig. Enabling this Kconfig is causing the kernel to crash
on the Tegra20 Ventana platform in the ulpi_match() function.
The Qualcomm USB HS PHY driver that is enabled by CONFIG_PHY_QCOM_USB_HS,
registers a ulpi_driver but this driver does not provide an 'id_table',
so when ulpi_match() is called on the Tegra20 Ventana platform, it
crashes when attempting to deference the id_table pointer which is not
valid. The Qualcomm USB HS PHY driver uses device-tree for matching the
ULPI driver with the device and so fix this crash by using device-tree
for matching if the id_table is not valid.
Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Jon Hunter <jonathanh(a)nvidia.com>
Link: https://lore.kernel.org/r/20220117150039.44058-1-jonathanh@nvidia.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/common/ulpi.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 4169cf40a03b..8f8405b0d608 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver)
struct ulpi *ulpi = to_ulpi_dev(dev);
const struct ulpi_device_id *id;
- /* Some ULPI devices don't have a vendor id so rely on OF match */
- if (ulpi->id.vendor == 0)
+ /*
+ * Some ULPI devices don't have a vendor id
+ * or provide an id_table so rely on OF match.
+ */
+ if (ulpi->id.vendor == 0 || !drv->id_table)
return of_driver_match_device(dev, driver);
for (id = drv->id_table; id->vendor; id++)
--
2.35.0
This is a note to let you know that I've just added the patch titled
usb: gadget: f_sourcesink: Fix isoc transfer for USB_SPEED_SUPER_PLUS
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 904edf8aeb459697129be5fde847e2a502f41fd9 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <quic_pkondeti(a)quicinc.com>
Date: Sat, 22 Jan 2022 08:33:22 +0530
Subject: usb: gadget: f_sourcesink: Fix isoc transfer for USB_SPEED_SUPER_PLUS
Currently when gadget enumerates in super speed plus, the isoc
endpoint request buffer size is not calculated correctly. Fix
this by checking the gadget speed against USB_SPEED_SUPER_PLUS
and update the request buffer size.
Fixes: 90c4d05780d4 ("usb: fix various gadgets null ptr deref on 10gbps cabling.")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Pavankumar Kondeti <quic_pkondeti(a)quicinc.com>
Link: https://lore.kernel.org/r/1642820602-20619-1-git-send-email-quic_pkondeti@q…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/gadget/function/f_sourcesink.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
index 1abf08e5164a..6803cd60cc6d 100644
--- a/drivers/usb/gadget/function/f_sourcesink.c
+++ b/drivers/usb/gadget/function/f_sourcesink.c
@@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
if (is_iso) {
switch (speed) {
+ case USB_SPEED_SUPER_PLUS:
case USB_SPEED_SUPER:
size = ss->isoc_maxpacket *
(ss->isoc_mult + 1) *
--
2.35.0
hallo Greg
5.16.3-rc2
compiles, boots and runs on my x86_64
(Intel i5-11400, Fedora 35)
Thanks
Tested-by: Ronald Warsow <rwarsow(a)gmx.de>
--
regards
Ronald
Fix a user API regression introduced with commit f76edd8f7ce0 ("tty:
cyclades, remove this orphan"), which removed a part of the API and
caused compilation errors for user programs using said part, such as
GCC 9 in its libsanitizer component[1]:
.../libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc:160:10: fatal error: linux/cyclades.h: No such file or directory
160 | #include <linux/cyclades.h>
| ^~~~~~~~~~~~~~~~~~
compilation terminated.
make[4]: *** [Makefile:664: sanitizer_platform_limits_posix.lo] Error 1
As the absolute minimum required bring `struct cyclades_monitor' and
ioctl numbers back then so as to make the library build again.
References:
[1] GCC PR sanitizer/100379, "cyclades.h is removed from linux kernel
header files", <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100379>
Signed-off-by: Maciej W. Rozycki <macro(a)embecosm.com>
Fixes: f76edd8f7ce0 ("tty: cyclades, remove this orphan")
Cc: stable(a)vger.kernel.org # v5.13+
---
Hi Greg,
Only these ioctl numbers are referred by libsanitizer (quoted with source
line numbers as printed by GCC):
836 | unsigned IOCTL_CYGETDEFTHRESH = CYGETDEFTHRESH;
837 | unsigned IOCTL_CYGETDEFTIMEOUT = CYGETDEFTIMEOUT;
838 | unsigned IOCTL_CYGETMON = CYGETMON;
839 | unsigned IOCTL_CYGETTHRESH = CYGETTHRESH;
840 | unsigned IOCTL_CYGETTIMEOUT = CYGETTIMEOUT;
841 | unsigned IOCTL_CYSETDEFTHRESH = CYSETDEFTHRESH;
842 | unsigned IOCTL_CYSETDEFTIMEOUT = CYSETDEFTIMEOUT;
843 | unsigned IOCTL_CYSETTHRESH = CYSETTHRESH;
844 | unsigned IOCTL_CYSETTIMEOUT = CYSETTIMEOUT;
-- however I don't think it makes sense to bring them back selectively.
Please apply.
Maciej
Changes from v1:
- Adjust heading from "tty: Revert the removal of the Cyclades public API".
- Only revert `struct cyclades_monitor' and ioctl numbers.
- Properly format the change given that it's not a plain revert anymore.
---
include/uapi/linux/cyclades.h | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
linux-uapi-cyclades.diff
Index: linux/include/uapi/linux/cyclades.h
===================================================================
--- /dev/null
+++ linux/include/uapi/linux/cyclades.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_CYCLADES_H
+#define _UAPI_LINUX_CYCLADES_H
+
+struct cyclades_monitor {
+ unsigned long int_count;
+ unsigned long char_count;
+ unsigned long char_max;
+ unsigned long char_last;
+};
+
+#define CYGETMON 0x435901
+#define CYGETTHRESH 0x435902
+#define CYSETTHRESH 0x435903
+#define CYGETDEFTHRESH 0x435904
+#define CYSETDEFTHRESH 0x435905
+#define CYGETTIMEOUT 0x435906
+#define CYSETTIMEOUT 0x435907
+#define CYGETDEFTIMEOUT 0x435908
+#define CYSETDEFTIMEOUT 0x435909
+#define CYSETRFLOW 0x43590a
+#define CYGETRFLOW 0x43590b
+#define CYSETRTSDTR_INV 0x43590c
+#define CYGETRTSDTR_INV 0x43590d
+#define CYZSETPOLLCYCLE 0x43590e
+#define CYZGETPOLLCYCLE 0x43590f
+#define CYGETCD1400VER 0x435910
+#define CYSETWAIT 0x435912
+#define CYGETWAIT 0x435913
+
+#endif /* _UAPI_LINUX_CYCLADES_H */
Good Day,
I am Sankara Mohammed, account Manager with an investment bank here in
Burkina Faso. There is a draft account opened in my firm by a
long-time client of our bank.I have the opportunity of transferring
the left over fund (15.8 Million UsDollars ) Fiftheen Million Eight
Hundred Thousand United States of American Dollars.
I want to invest this funds and introduce you to our bank for this
deal and this will be executed under a legitimate arrangement that
will protect us from any breach of the law.We will share the fund 40%
for you,50% for me while 10% is for establishing of foundation for the
poor children in your country.If you are really interested in my
proposal further details of the fund transfer will be forwarded to
you.
Yours Sincerely,
Sankara Mohammed.
This patch series to fix PTP issue in stmmac related to:
1/ PTP clock source configuration during initialization.
2/ PTP initialization during resume from suspend.
Mohammad Athari Bin Ismail (2):
net: stmmac: configure PTP clock source prior to PTP initialization
net: stmmac: skip only stmmac_ptp_register when resume from suspend
.../net/ethernet/stmicro/stmmac/stmmac_main.c | 25 +++++++++++--------
.../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 3 ---
2 files changed, 14 insertions(+), 14 deletions(-)
--
2.17.1
Dear Friend,
How are you today, Please accept my sincere apologies if my email
does not meet your business or personal ethics, I really like to have
a good relationship with you, and I have a special reason why I
decided to contact you because of the urgency of my situation here.I
came across your e-mail contact prior to a private search while in
need of your assistance.
INTRODUCTION: Am Mr Ali Musa a Banker and in one way or the other was
hoping you will cooperate with me as a partner in a project of
transferring an abandoned fund of a late customer of the bank worth of
$18,000,000 (Eighteen Million Dollars US).
This will be disbursed or shared between the both of us in these
percentages, 55% for me and 45% for you. Contact me immediately if
that is alright with you so that we can enter in agreement before we
start
processing for the transfer of the funds. If you are satisfied with
this proposal, please provide the below details for the Mutual
Confidentiality Agreement:
1. Full Name and Address
2. Occupation and Country of Origin
3. Telephone Number
4. A scan copy of your identification
I wait for your response so that we can commence on this transaction
as soon as possible.
Regards,
Mr Ali Musa.
With some chargers, vbus might momentarily raise above VSAFE5V and fall
back to 0V before tcpm gets to read port->tcpc->get_vbus. This will
report a VBUS off event causing TCPM to transition to SNK_UNATTACHED
where it should be waiting in either SNK_ATTACH_WAIT or SNK_DEBOUNCED
state. This patch makes TCPM avoid vbus off events while in
SNK_ATTACH_WAIT or SNK_DEBOUNCED state.
Stub from the spec:
"4.5.2.2.4.2 Exiting from AttachWait.SNK State
A Sink shall transition to Unattached.SNK when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce.
A DRP shall transition to Unattached.SRC when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce."
[23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected]
[23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS]
[23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS]
[23.300579] VBUS off
[23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS]
[23.301014] VBUS VSAFE0V
[23.301111] Start toggling
Fixes: f0690a25a140b8 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable(a)vger.kernel.org
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
---
Changes since v1:
- Fix typos stated by Guenter Roeck.
---
drivers/usb/typec/tcpm/tcpm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 59d4fa2443f2..3bf79f52bd34 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
case SNK_TRYWAIT_DEBOUNCE:
break;
case SNK_ATTACH_WAIT:
- tcpm_set_state(port, SNK_UNATTACHED, 0);
+ case SNK_DEBOUNCED:
+ /* Do nothing, as TCPM is still waiting for vbus to reach VSAFE5V to connect */
break;
case SNK_NEGOTIATE_CAPABILITIES:
--
2.35.0.rc0.227.g00780c9af4-goog
With some chargers, vbus might momentarily raise above VSAFE5V and fall
back to 0V causing VSAFE0V to be triggered. This will report a VBUS off
event causing TCPM to transition to SNK_UNATTACHED state where it
should be waiting in either SNK_ATTACH_WAIT or SNK_DEBOUNCED state.
This patch makes TCPM avoid VSAFE0V events while in SNK_ATTACH_WAIT
or SNK_DEBOUNCED state.
Stub from the spec:
"4.5.2.2.4.2 Exiting from AttachWait.SNK State
A Sink shall transition to Unattached.SNK when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce.
A DRP shall transition to Unattached.SRC when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce."
[23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected]
[23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS]
[23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS]
[23.300579] VBUS off
[23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS]
[23.301014] VBUS VSAFE0V
[23.301111] Start toggling
Fixes: f0690a25a140b8 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable(a)vger.kernel.org
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
Acked-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Reviewed-by: Guenter Roeck <linux(a)roeck-us.net>
---
Changes since v1:
- Fix typos stated by Guenter Roeck.
Changes since v2:
- Add reviewed-by/acked-by tags
---
drivers/usb/typec/tcpm/tcpm.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 3bf79f52bd34..0e0985355a14 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5264,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
case PR_SWAP_SNK_SRC_SOURCE_ON:
/* Do nothing, vsafe0v is expected during transition */
break;
+ case SNK_ATTACH_WAIT:
+ case SNK_DEBOUNCED:
+ /* Do nothing, still waiting for VSAFE5V to connect */
+ break;
default:
if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
tcpm_set_state(port, SNK_UNATTACHED, 0);
--
2.35.0.rc0.227.g00780c9af4-goog
With some chargers, vbus might momentarily raise above VSAFE5V and fall
back to 0V before tcpm gets to read port->tcpc->get_vbus. This will
report a VBUS off event causing TCPM to transition to SNK_UNATTACHED
where it should be waiting in either SNK_ATTACH_WAIT or SNK_DEBOUNCED
state. This patch makes TCPM avoid vbus off events while in
SNK_ATTACH_WAIT or SNK_DEBOUNCED state.
Stub from the spec:
"4.5.2.2.4.2 Exiting from AttachWait.SNK State
A Sink shall transition to Unattached.SNK when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce.
A DRP shall transition to Unattached.SRC when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce."
[23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected]
[23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS]
[23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS]
[23.300579] VBUS off
[23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS]
[23.301014] VBUS VSAFE0V
[23.301111] Start toggling
Fixes: f0690a25a140b8 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable(a)vger.kernel.org
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
Acked-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Reviewed-by: Guenter Roeck <linux(a)roeck-us.net>
---
Changes since v1:
- Fix typos stated by Guenter Roeck.
Changes since v2:
- Added reviewed/acked-by tags
---
drivers/usb/typec/tcpm/tcpm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 59d4fa2443f2..3bf79f52bd34 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
case SNK_TRYWAIT_DEBOUNCE:
break;
case SNK_ATTACH_WAIT:
- tcpm_set_state(port, SNK_UNATTACHED, 0);
+ case SNK_DEBOUNCED:
+ /* Do nothing, as TCPM is still waiting for vbus to reach VSAFE5V to connect */
break;
case SNK_NEGOTIATE_CAPABILITIES:
--
2.35.0.rc0.227.g00780c9af4-goog
Az Ön nevében az Egyesült Nemzetek és az Egészségügyi Világszervezet a
nemzetközi valutaalaphoz kapcsolódva díjat adományoz, amelyben az Ön
e-mail címét és pénzeszközét átadtuk nekünk az Ön átutalása érdekében,
kérjük, erosítse meg adatait az Ön átutalása érdekében.
Azt az utasítást kaptuk, hogy minden függoben lévo tranzakciót vigyünk
át a következo két napon belül, de ha megkapta az alapját, akkor
hagyja figyelmen kívül ezt az üzenetet, ha nem azonnal.
Sürgosen válaszolnia kell erre az üzenetre, ez nem egy olyan
internetes csaló, ez a világjárvány enyhítése.
Syzbot found a GPF in reweight_entity(). This has been bisected to commit
4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group")
There is a race between sched_post_fork() and setpriority(PRIO_PGRP)
within a thread group that causes a null-ptr-deref in reweight_entity()
in CFS. The scenario is that the main process spawns number of new
threads, which then call setpriority(PRIO_PGRP, 0, prio), wait, and exit.
For each of the new threads the copy_process() gets invoked, which adds
the new task_struct to the group, and eventually calls sched_post_fork() for it.
In the above scenario there is a possibility that setpriority(PRIO_PGRP)
and set_one_prio() will be called for a thread in the group that is just
being created by copy_process(), and for which the sched_post_fork() has
not been executed yet. This will trigger a null pointer dereference in
reweight_entity(), as it will try to access the run queue pointer, which
hasn't been set. This results it a crash as shown below:
KASAN: null-ptr-deref in range [0x00000000000000a0-0x00000000000000a7]
CPU: 0 PID: 2392 Comm: reduced_repro Not tainted 5.16.0-11201-gb42c5a161ea3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014
RIP: 0010:reweight_entity+0x15d/0x440
RSP: 0018:ffffc900035dfcf8 EFLAGS: 00010006
Call Trace:
<TASK>
reweight_task+0xde/0x1c0
set_load_weight+0x21c/0x2b0
set_user_nice.part.0+0x2d1/0x519
set_user_nice.cold+0x8/0xd
set_one_prio+0x24f/0x263
__do_sys_setpriority+0x2d3/0x640
__x64_sys_setpriority+0x84/0x8b
do_syscall_64+0x35/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
</TASK>
---[ end trace 9dc80a9d378ed00a ]---
Before the mentioned change the cfs_rq pointer for the task has been
set in sched_fork(), which is called much earlier in copy_process(),
before the new task is added to the thread_group.
Now it is done in the sched_post_fork(), which is called after that.
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Juri Lelli <juri.lelli(a)redhat.com>
Cc: Vincent Guittot <vincent.guittot(a)linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann(a)arm.com>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: Ben Segall <bsegall(a)google.com>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Daniel Bristot de Oliveira <bristot(a)redhat.com>
Cc: Zhang Qiao <zhangqiao22(a)huawei.com>
Cc: stable(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Link: https://syzkaller.appspot.com/bug?id=9d9c27adc674e3a7932b22b61c79a02da82cbd…
Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group")
Reported-by: syzbot+af7a719bc92395ee41b3(a)syzkaller.appspotmail.com
Signed-off-by: Tadeusz Struk <tadeusz.struk(a)linaro.org>
---
Changes in v2:
- Added a check in set_user_nice(), and return from there if the task
is not fully setup instead of returning from reweight_entity()
---
kernel/sched/core.c | 4 ++++
kernel/sched/sched.h | 11 +++++++++++
2 files changed, 15 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2e4ae00e52d1..c3e74b6d595b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6897,6 +6897,10 @@ void set_user_nice(struct task_struct *p, long nice)
if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
return;
+
+ /* Check if the task's schedule run queue is setup correctly */
+ if (!task_rq_ready(p))
+ return;
/*
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index de53be905739..464f629bff5a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1394,6 +1394,12 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
return grp->my_q;
}
+/* returns true if cfs run queue is set for the task */
+static inline bool task_rq_ready(struct task_struct *p)
+{
+ return !!task_cfs_rq(p);
+}
+
#else
static inline struct task_struct *task_of(struct sched_entity *se)
@@ -1419,6 +1425,11 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
{
return NULL;
}
+
+static inline bool task_rq_ready(struct task_struct *p)
+{
+ return true;
+}
#endif
extern void update_rq_clock(struct rq *rq);
--
2.34.1
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: e464121f2d40eabc7d11823fb26db807ce945df4
Gitweb: https://git.kernel.org/tip/e464121f2d40eabc7d11823fb26db807ce945df4
Author: Tony Luck <tony.luck(a)intel.com>
AuthorDate: Fri, 21 Jan 2022 09:47:38 -08:00
Committer: Borislav Petkov <bp(a)suse.de>
CommitterDate: Tue, 25 Jan 2022 18:40:30 +01:00
x86/cpu: Add Xeon Icelake-D to list of CPUs that support PPIN
Missed adding the Icelake-D CPU to the list. It uses the same MSRs
to control and read the inventory number as all the other models.
Fixes: dc6b025de95b ("x86/mce: Add Xeon Icelake to list of CPUs that support PPIN")
Reported-by: Ailin Xu <ailin.xu(a)intel.com>
Signed-off-by: Tony Luck <tony.luck(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20220121174743.1875294-2-tony.luck@intel.com
---
arch/x86/kernel/cpu/mce/intel.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index bb9a46a..baafbb3 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
With some chargers, vbus might momentarily raise above VSAFE5V and fall
back to 0V before tcpm gets to read port->tcpc->get_vbus. This will
will report a VBUS off event causing TCPM to transition to
SNK_UNATTACHED where it should be waiting in either SNK_ATTACH_WAIT
or SNK_DEBOUNCED state. This patch makes TCPM avoid vbus off events
while in SNK_ATTACH_WAIT or SNK_DEBOUNCED state.
Stub from the spec:
"4.5.2.2.4.2 Exiting from AttachWait.SNK State
A Sink shall transition to Unattached.SNK when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce.
A DRP shall transition to Unattached.SRC when the state of both
the CC1 and CC2 pins is SNK.Open for at least tPDDebounce."
[23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected]
[23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS]
[23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS]
[23.300579] VBUS off
[23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS]
[23.301014] VBUS VSAFE0V
[23.301111] Start toggling
Fixes: f0690a25a140b8 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable(a)vger.kernel.org
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
---
drivers/usb/typec/tcpm/tcpm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 59d4fa2443f2..b8afe3d8c882 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
case SNK_TRYWAIT_DEBOUNCE:
break;
case SNK_ATTACH_WAIT:
- tcpm_set_state(port, SNK_UNATTACHED, 0);
+ case SNK_DEBOUNCED:
+ /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */
break;
case SNK_NEGOTIATE_CAPABILITIES:
--
2.35.0.rc0.227.g00780c9af4-goog
This is a note to let you know that I've just added the patch titled
usb-storage: Add unusual-devs entry for VL817 USB-SATA bridge
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 5b67b315037250a61861119683e7fcb509deea25 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Mon, 24 Jan 2022 15:14:40 -0500
Subject: usb-storage: Add unusual-devs entry for VL817 USB-SATA bridge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two people have reported (and mentioned numerous other reports on the
web) that VIA's VL817 USB-SATA bridge does not work with the uas
driver. Typical log messages are:
[ 3606.232149] sd 14:0:0:0: [sdg] tag#2 uas_zap_pending 0 uas-tag 1 inflight: CMD
[ 3606.232154] sd 14:0:0:0: [sdg] tag#2 CDB: Write(16) 8a 00 00 00 00 00 18 0c c9 80 00 00 00 80 00 00
[ 3606.306257] usb 4-4.4: reset SuperSpeed Plus Gen 2x1 USB device number 11 using xhci_hcd
[ 3606.328584] scsi host14: uas_eh_device_reset_handler success
Surprisingly, the devices do seem to work okay for some other people.
The cause of the differing behaviors is not known.
In the hope of getting the devices to work for the most users, even at
the possible cost of degraded performance for some, this patch adds an
unusual_devs entry for the VL817 to block it from binding to the uas
driver by default. Users will be able to override this entry by means
of a module parameter, if they want.
CC: <stable(a)vger.kernel.org>
Reported-by: DocMAX <mail(a)vacharakis.de>
Reported-and-tested-by: Thomas Weißschuh <linux(a)weissschuh.net>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/r/Ye8IsK2sjlEv1rqU@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/storage/unusual_devs.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 29191d33c0e3..1a05e3dcfec8 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2301,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
US_FL_SCM_MULT_TARG ),
+/*
+ * Reported by DocMAX <mail(a)vacharakis.de>
+ * and Thomas Weißschuh <linux(a)weissschuh.net>
+ */
+UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999,
+ "VIA Labs, Inc.",
+ "VL817 SATA Bridge",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
"ST",
"2A",
--
2.35.0
This is a note to let you know that I've just added the patch titled
usb: typec: tcpci: don't touch CC line if it's Vconn source
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 5638b0dfb6921f69943c705383ff40fb64b987f2 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2(a)nxp.com>
Date: Thu, 13 Jan 2022 17:29:43 +0800
Subject: usb: typec: tcpci: don't touch CC line if it's Vconn source
With the AMS and Collision Avoidance, tcpm often needs to change the CC's
termination. When one CC line is sourcing Vconn, if we still change its
termination, the voltage of the another CC line is likely to be fluctuant
and unstable.
Therefore, we should verify whether a CC line is sourcing Vconn before
changing its termination and only change the termination that is not
a Vconn line. This can be done by reading the Vconn Present bit of
POWER_ STATUS register. To determine the polarity, we can read the
Plug Orientation bit of TCPC_CONTROL register. Since Vconn can only be
sourced if Plug Orientation is set.
Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance")
cc: <stable(a)vger.kernel.org>
Reviewed-by: Guenter Roeck <linux(a)roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Signed-off-by: Xu Yang <xu.yang_2(a)nxp.com>
Link: https://lore.kernel.org/r/20220113092943.752372-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/typec/tcpm/tcpci.c | 26 ++++++++++++++++++++++++++
drivers/usb/typec/tcpm/tcpci.h | 1 +
2 files changed, 27 insertions(+)
diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index 35a1307349a2..e07d26a3cd8e 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val)
static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
{
struct tcpci *tcpci = tcpc_to_tcpci(tcpc);
+ bool vconn_pres;
+ enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1;
unsigned int reg;
int ret;
+ ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, ®);
+ if (ret < 0)
+ return ret;
+
+ vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES);
+ if (vconn_pres) {
+ ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, ®);
+ if (ret < 0)
+ return ret;
+
+ if (reg & TCPC_TCPC_CTRL_ORIENTATION)
+ polarity = TYPEC_POLARITY_CC2;
+ }
+
switch (cc) {
case TYPEC_CC_RA:
reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) |
@@ -112,6 +128,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
break;
}
+ if (vconn_pres) {
+ if (polarity == TYPEC_POLARITY_CC2) {
+ reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT);
+ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT);
+ } else {
+ reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT);
+ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT);
+ }
+ }
+
ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg);
if (ret < 0)
return ret;
diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h
index 2be7a77d400e..b2edd45f13c6 100644
--- a/drivers/usb/typec/tcpm/tcpci.h
+++ b/drivers/usb/typec/tcpm/tcpci.h
@@ -98,6 +98,7 @@
#define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4)
#define TCPC_POWER_STATUS_VBUS_DET BIT(3)
#define TCPC_POWER_STATUS_VBUS_PRES BIT(2)
+#define TCPC_POWER_STATUS_VCONN_PRES BIT(1)
#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0)
#define TCPC_FAULT_STATUS 0x1f
--
2.35.0
Hi Greg,
Regression found on
stable-rc 5.4 queue riscv tinyconfig build failed.
Not sure which patch is causing build failures.
We will bisect and get back to you.
make --silent --keep-going --jobs=8
O=/home/tuxbuild/.cache/tuxmake/builds/current LLVM_IAS=1 ARCH=riscv
CROSS_COMPILE=riscv64-linux-gnu- HOSTCC=clang CC=clang
In file included from /builds/linux/kernel/dma/mapping.c:8:
In file included from /builds/linux/include/linux/memblock.h:13:
In file included from /builds/linux/include/linux/mm.h:10:
In file included from /builds/linux/include/linux/gfp.h:6:
In file included from /builds/linux/include/linux/mmzone.h:8:
In file included from /builds/linux/include/linux/spinlock.h:51:
In file included from /builds/linux/include/linux/preempt.h:78:
In file included from ./arch/riscv/include/generated/asm/preempt.h:1:
In file included from /builds/linux/include/asm-generic/preempt.h:5:
In file included from /builds/linux/include/linux/thread_info.h:22:
/builds/linux/arch/riscv/include/asm/current.h:30:9: warning: variable
'tp' is uninitialized when used here [-Wuninitialized]
return tp;
^~
/builds/linux/arch/riscv/include/asm/current.h:29:33: note: initialize
the variable 'tp' to silence this warning
register struct task_struct *tp __asm__("tp");
^
= NULL
clang: warning: argument unused during compilation: '-no-pie'
[-Wunused-command-line-argument]
In file included from /builds/linux/arch/riscv/kernel/cpu.c:7:
In file included from /builds/linux/include/linux/seq_file.h:8:
In file included from /builds/linux/include/linux/mutex.h:14:
/builds/linux/arch/riscv/include/asm/current.h:30:9: warning: variable
'tp' is uninitialized when used here [-Wuninitialized]
return tp;
^~
/builds/linux/arch/riscv/include/asm/current.h:29:33: note: initialize
the variable 'tp' to silence this warning
register struct task_struct *tp __asm__("tp");
^
= NULL
1 warning generated.
1 warning generated.
1 warning generated.
1 warning generated.
<instantiation>:1:1: error: unrecognized instruction mnemonic
LOCAL _restore_kernel_tpsp
^
/builds/linux/arch/riscv/kernel/entry.S:163:2: note: while in macro
instantiation
SAVE_ALL
^
<instantiation>:2:2: error: unrecognized instruction mnemonic
LOCAL _save_context
^
Reported-by: Linux Kernel Functional Testing <lkft(a)linaro.org>
--
Linaro LKFT
https://lkft.linaro.org
Hello There, I hope this message finds you in good spirits
especially during this challenging period. Anyway, I am D C
Johnston, a private broker formerly with a Fund Management
company in South Africa. I am privately contacting you because
one of my high profile clients is interested in investing abroad
and has asked me to look for individuals and companies with
interesting business ideas that he can invest in. He has an
interest in investing a very substantial amount abroad. I got
your email contact through an online business directory and I
thought I'd contact you to see if you are interested in this
opportunity. Do you or your company have new or existing projects
that require funding?
Please indicate your interest by replying back to this email.
Once I get your response, I will give you more details and we can
plan a strategy that will be beneficial to all parties. Please
also indicate your direct mobile and whatsapp numbers for an
easier communication. Please keep safe and I wish you a great new
year.
Best Regards,
Dean.
May the peace of God be with You.
I am contacting you through this means because I need your urgent
assistance and also help me to carry a charity project in your
country. I found your email address as a true child of God for past
few days now that I have been praying to know if you are really the
chosen one for this great charity project, according to God's
direction, after all prayers I am convinced, and I have decided to
contact you. Please, i want you use the funds for the Lord's work,
with confidence, read and respond now.
My name is Mrs. Emman Nadia F, a widow, but currently based in West
Africa since my life with my late husband, who was a businessman in
this country before dying some years ago. We were married to many
years without a child. He died after a brief illness that lasted only
six days and I myself have been suffering from an ovarian cancer
disease. At this moment I am about to finish the race in this way
because the disease has reached a very bad stage, without any family
member and without children. I hope you do not expose or betray this
trust and I am sure that I am about to trust you for the mutual
benefit of orphans and the less privileged. I have some funds that I
inherited from my late husband, the total sum of ($ 12,500,000.00)
deposited at a bank here in Burkina Faso. After knowing my current
state of health, I decided to trust you with this fund, believing that
you will use it in the way I will instruct here.
you will use this $12.5 Million for public benefit as follows;
1. Establish An Orphanage Home To Help The Orphanages Children.
2. Build A Hospital To Help The Poor.
3. Build A Nursing Home For Elderly People Need Care & Meal.
You will named them after my late husband.Therefore, I need you to
help me and claim this money and use it for charities, for orphanages
and provide justice and help to the poor, needy and to promote the
words of God and the effort to maintain the house of God, according to
the bible in the book of. Jeremiah 22: 15-16, without minding our
different religions.
It will be a pleasure to compensate with 40% percent of the total
money for your effort in handling the transaction, while 60% of the
money will go to charity project.
All I need from you is sincerity and ability to complete the task of
God without any failure. It will be my pleasure to see that the bank
has finally released and transferred the fund to your bank account in
the country, even before I die here in the hospital, due to my current
state of health, everything must be processed as soon as possible.
I am waiting for your immediate response, if you are only interested
in obtaining more details about the transaction and execution of this
humanitarian project for the glory and honor of God.
Sorry if you received this letter in your spam, is due to recent
connection/network error here in the country.
Please I am waiting for your urgent reply now.
May God Bless you,
Mrs. EEmman Nadia F.
Hot-unplug all firmware-framebuffer devices as part of removing
them via remove_conflicting_framebuffers() et al. Releases all
memory regions to be acquired by native drivers.
Firmware, such as EFI, install a framebuffer while posting the
computer. After removing the firmware-framebuffer device from fbdev,
a native driver takes over the hardware and the firmware framebuffer
becomes invalid.
Firmware-framebuffer drivers, specifically simplefb, don't release
their device from Linux' device hierarchy. It still owns the firmware
framebuffer and blocks the native drivers from loading. This has been
observed in the vmwgfx driver. [1]
Initiating a device removal (i.e., hot unplug) as part of
remove_conflicting_framebuffers() removes the underlying device and
returns the memory range to the system.
[1] https://lore.kernel.org/dri-devel/20220117180359.18114-1-zack@kde.org/
v2:
* rename variable 'dev' to 'device' (Javier)
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reported-by: Zack Rusin <zackr(a)vmware.com>
Reviewed-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Zack Rusin <zackr(a)vmware.com>
CC: stable(a)vger.kernel.org # v5.11+
---
drivers/video/fbdev/core/fbmem.c | 29 ++++++++++++++++++++++++++---
include/linux/fb.h | 1 +
2 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 0fa7ede94fa6..b585339509b0 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -25,6 +25,7 @@
#include <linux/init.h>
#include <linux/linux_logo.h>
#include <linux/proc_fs.h>
+#include <linux/platform_device.h>
#include <linux/seq_file.h>
#include <linux/console.h>
#include <linux/kmod.h>
@@ -1557,18 +1558,36 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
+ struct device *device;
if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
continue;
gen_aper = registered_fb[i]->apertures;
+ device = registered_fb[i]->device;
if (fb_do_apertures_overlap(gen_aper, a) ||
(primary && gen_aper && gen_aper->count &&
gen_aper->ranges[0].base == VGA_FB_PHYS)) {
printk(KERN_INFO "fb%d: switching to %s from %s\n",
i, name, registered_fb[i]->fix.id);
- do_unregister_framebuffer(registered_fb[i]);
+
+ /*
+ * If we kick-out a firmware driver, we also want to remove
+ * the underlying platform device, such as simple-framebuffer,
+ * VESA, EFI, etc. A native driver will then be able to
+ * allocate the memory range.
+ *
+ * If it's not a platform device, at least print a warning. A
+ * fix would add code to remove the device from the system.
+ */
+ if (dev_is_platform(device)) {
+ registered_fb[i]->forced_out = true;
+ platform_device_unregister(to_platform_device(device));
+ } else {
+ pr_warn("fb%d: cannot remove device\n", i);
+ do_unregister_framebuffer(registered_fb[i]);
+ }
}
}
}
@@ -1898,9 +1917,13 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- mutex_lock(®istration_lock);
+ bool forced_out = fb_info->forced_out;
+
+ if (!forced_out)
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- mutex_unlock(®istration_lock);
+ if (!forced_out)
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3da95842b207..9a14f3f8a329 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -502,6 +502,7 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
+ bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.34.1
The patch titled
Subject: mm, kasan: use compare-exchange operation to set KASAN page tag
has been added to the -mm tree. Its filename is
mm-use-compare-exchange-operation-to-set-kasan-page-tag.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-use-compare-exchange-operation…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-use-compare-exchange-operation…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Peter Collingbourne <pcc(a)google.com>
Subject: mm, kasan: use compare-exchange operation to set KASAN page tag
It has been reported that the tag setting operation on newly-allocated
pages can cause the page flags to be corrupted when performed concurrently
with other flag updates as a result of the use of non-atomic operations.
Fix the problem by using a compare-exchange loop to update the tag.
Link: https://lkml.kernel.org/r/20220120020148.1632253-1-pcc@google.com
Link: https://linux-review.googlesource.com/id/I456b24a2b9067d93968d43b4bb3351c0c…
Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc")
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mm.h | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
--- a/include/linux/mm.h~mm-use-compare-exchange-operation-to-set-kasan-page-tag
+++ a/include/linux/mm.h
@@ -1506,11 +1506,18 @@ static inline u8 page_kasan_tag(const st
static inline void page_kasan_tag_set(struct page *page, u8 tag)
{
- if (kasan_enabled()) {
- tag ^= 0xff;
- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
- }
+ unsigned long old_flags, flags;
+
+ if (!kasan_enabled())
+ return;
+
+ tag ^= 0xff;
+ old_flags = READ_ONCE(page->flags);
+ do {
+ flags = old_flags;
+ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
}
static inline void page_kasan_tag_reset(struct page *page)
_
Patches currently in -mm which might be from pcc(a)google.com are
mm-use-compare-exchange-operation-to-set-kasan-page-tag.patch
The patch titled
Subject: mm/gup.c: fix invalid page pointer returned with FOLL_PIN gups
has been added to the -mm tree. Its filename is
mm-fix-invalid-page-pointer-returned-with-foll_pin-gups.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-fix-invalid-page-pointer-retur…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-fix-invalid-page-pointer-retur…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/gup.c: fix invalid page pointer returned with FOLL_PIN gups
Alex reported invalid page pointer returned with pin_user_pages_remote()
from vfio after upstream commit 4b6c33b32296 ("vfio/type1: Prepare for
batched pinning with struct vfio_batch"). This problem breaks NVIDIA vfio
mdev.
It turns out that it's not the fault of the vfio commit; however after
vfio switches to a full page buffer to store the page pointers it starts
to expose the problem easier.
The problem is for VM_PFNMAP vmas we should normally fail with an -EFAULT
then vfio will carry on to handle the MMIO regions. However when the bug
triggered, follow_page_mask() returned -EEXIST for such a page, which will
jump over the current page, leaving that entry in **pages untouched.
However the caller is not aware of it, hence the caller will reference the
page as usual even if the pointer data can be anything.
We had that -EEXIST logic since commit 1027e4436b6a ("mm: make GUP handle
pfn mapping unless FOLL_GET is requested") which seems very reasonable.
It could be that when we reworked GUP with FOLL_PIN we could have
overlooked that special path in commit 3faa52c03f44 ("mm/gup: track
FOLL_PIN pages"), even if that commit rightfully touched up
follow_devmap_pud() on checking FOLL_PIN when it needs to return an
-EEXIST.
Since at it, add another WARN_ON_ONCE() at the -EEXIST handling to make
sure we mustn't have **pages set when reaching there, because otherwise it
means the caller will try to read a garbage right after __get_user_pages()
returns.
Attaching the Fixes to the FOLL_PIN rework commit, as it happened later
than 1027e4436b6a.
Link: https://lkml.kernel.org/r/20220125033700.69705-1-peterx@redhat.com
Fixes: 3faa52c03f44 ("mm/gup: track FOLL_PIN pages")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reported-by: Alex Williamson <alex.williamson(a)redhat.com>
Debugged-by: Alex Williamson <alex.williamson(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jérôme Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/gup.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
--- a/mm/gup.c~mm-fix-invalid-page-pointer-returned-with-foll_pin-gups
+++ a/mm/gup.c
@@ -440,7 +440,7 @@ static int follow_pfn_pte(struct vm_area
pte_t *pte, unsigned int flags)
{
/* No page to get reference */
- if (flags & FOLL_GET)
+ if (flags & (FOLL_GET | FOLL_PIN))
return -EFAULT;
if (flags & FOLL_TOUCH) {
@@ -1181,7 +1181,13 @@ retry:
/*
* Proper page table entry exists, but no corresponding
* struct page.
+ *
+ * Warn if we jumped over even with a valid **pages.
+ * It shouldn't trigger in practise, but when there's
+ * buggy returns on -EEXIST we'll warn before returning
+ * an invalid page pointer in the array.
*/
+ WARN_ON_ONCE(pages);
goto next_page;
} else if (IS_ERR(page)) {
ret = PTR_ERR(page);
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-fix-invalid-page-pointer-returned-with-foll_pin-gups.patch
My husband wish, can I trust you?
I am Mrs. Maya Oliver, from Norway
. Firstly, I am married to Mr. Patrick Oliver, A diamond and gold
merchant who owns a small gold Mine in Burkina Faso and Egypt Cairo;
He died of Cardiovascular Disease in mid-March 2011. During his
lifetime he deposited the sum of € 18.5 Million Euro) Eighteen
million, Five hundred thousand Euros in a bank in Ouagadougou the
capital city of Burkina Faso. The deposited money was from the sale of
the shares, death benefits payment and entitlements of my deceased
husband by his company.
Since his death I decided not to remarry, When my late husband was
Alive he deposited the sum of € 18.5 Million Euro) Eight million, Five hundred
thousand Euro) in a bank in Burkina Faso, Presently this money is
still in bank. And My Doctor told me that I don't have much time to
live because of the cancer problem,
Having known my condition I decided to hand you over this fund to take
care of the less-privileged people, you will utilize this money the
way I am going to instruct herein. I want you to take 35% Percent of
the total money for your personal use While 65% of the money will go
to charity" people and helping the orphanage.
I don't want my husband's efforts to be used by the Government. I grew
up as an Orphan and I don't have anybody as my family member,
Meanwhile the total funds is currently with the RIA transfer company
under the guiding of my bank director and they have been instructed to
transfer the funds to you through the mention options bellow
1, Money Gram
2, ATM card,
3 RIA
4, Online Transfer
that mention above method of transfer is 100% guarantee for you to
received the funds without much delaying, once you are in contact with
them, base on the urgency required for you to handle the project, as
my doctors has confirmed that I don’t have much time to live, bellow
is the contact of the RIA transfer manager who will proceed the
transfer to you once you are in contact with them.
BELOW HERE IS THEIR CONTACT INFORMATION
OFFICE NAME: RIA MONEY TRANSFER SERVICE BURKINA FASO
CONTACT PERSON: Mr. Mohamed SIMPORE - Directeur général
CONTACT FIRST EMAIL: transferriamoney0(a)gmail.com
CONTACT SECOND EMAIL: servicemoneygram8(a)gmail.com
Phone Numbers: (+226) 25 49 24 0470
Please note you will be required to send them your information as below
A scan copy of your passport or ID card
Your telephone number……………………………………
Your occupation ……………………………………………
So as to commence the transfer to you without delaying
Regards,
Mrs. Maya Oliver,
The patch titled
Subject: ocfs2: fix a deadlock when commit trans
has been added to the -mm tree. Its filename is
ocfs2-fix-a-deadlock-when-commit-trans.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/ocfs2-fix-a-deadlock-when-commit-…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/ocfs2-fix-a-deadlock-when-commit-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Subject: ocfs2: fix a deadlock when commit trans
commit 6f1b228529ae introduces a regression which can deadlock as follows:
Task1: Task2:
jbd2_journal_commit_transaction ocfs2_test_bg_bit_allocatable
spin_lock(&jh->b_state_lock) jbd_lock_bh_journal_head
__jbd2_journal_remove_checkpoint spin_lock(&jh->b_state_lock)
jbd2_journal_put_journal_head
jbd_lock_bh_journal_head
Task1 and Task2 lock bh->b_state and jh->b_state_lock in different
order, which finally result in a deadlock.
So use jbd2_journal_[grab|put]_journal_head instead in
ocfs2_test_bg_bit_allocatable() to fix it.
Link: https://lkml.kernel.org/r/20220121071205.100648-3-joseph.qi@linux.alibaba.c…
Fixes: 6f1b228529ae ("ocfs2: fix race between searching chunks and release journal_head from buffer_head")
Signed-off-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Reported-by: Saeed Mirzamohammadi <saeed.mirzamohammadi(a)oracle.com>
Cc: "Theodore Ts'o" <tytso(a)mit.edu>
Cc: Andreas Dilger <adilger.kernel(a)dilger.ca>
Cc: <stable(a)vger.kernel.org>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/suballoc.c | 25 +++++++++++--------------
1 file changed, 11 insertions(+), 14 deletions(-)
--- a/fs/ocfs2/suballoc.c~ocfs2-fix-a-deadlock-when-commit-trans
+++ a/fs/ocfs2/suballoc.c
@@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
- int ret = 1;
+ int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;
- if (!buffer_jbd(bg_bh))
+ jh = jbd2_journal_grab_journal_head(bg_bh);
+ if (!jh)
return 1;
- jbd_lock_bh_journal_head(bg_bh);
- if (buffer_jbd(bg_bh)) {
- jh = bh2jh(bg_bh);
- spin_lock(&jh->b_state_lock);
- bg = (struct ocfs2_group_desc *) jh->b_committed_data;
- if (bg)
- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
- else
- ret = 1;
- spin_unlock(&jh->b_state_lock);
- }
- jbd_unlock_bh_journal_head(bg_bh);
+ spin_lock(&jh->b_state_lock);
+ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
+ if (bg)
+ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+ else
+ ret = 1;
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
return ret;
}
_
Patches currently in -mm which might be from joseph.qi(a)linux.alibaba.com are
jbd2-export-jbd2_journal__journal_head.patch
ocfs2-fix-a-deadlock-when-commit-trans.patch
The patch titled
Subject: jbd2: export jbd2_journal_[grab|put]_journal_head
has been added to the -mm tree. Its filename is
jbd2-export-jbd2_journal__journal_head.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/jbd2-export-jbd2_journal__journal…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/jbd2-export-jbd2_journal__journal…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Subject: jbd2: export jbd2_journal_[grab|put]_journal_head
Patch series "ocfs2: fix a deadlock case".
This fixes a deadlock case in ocfs2. We firstly export jbd2 symbols
jbd2_journal_[grab|put]_journal_head as preparation and later use them in
ocfs2 insread of jbd_[lock|unlock]_bh_journal_head to fix the deadlock.
This patch (of 2):
This exports symbols jbd2_journal_[grab|put]_journal_head, which will be
used outside modules, e.g. ocfs2.
Link: https://lkml.kernel.org/r/20220121071205.100648-2-joseph.qi@linux.alibaba.c…
Signed-off-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Andreas Dilger <adilger.kernel(a)dilger.ca>
Cc: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Cc: Saeed Mirzamohammadi <saeed.mirzamohammadi(a)oracle.com>
Cc: "Theodore Ts'o" <tytso(a)mit.edu>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/jbd2/journal.c | 2 ++
1 file changed, 2 insertions(+)
--- a/fs/jbd2/journal.c~jbd2-export-jbd2_journal__journal_head
+++ a/fs/jbd2/journal.c
@@ -2972,6 +2972,7 @@ struct journal_head *jbd2_journal_grab_j
jbd_unlock_bh_journal_head(bh);
return jh;
}
+EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
static void __journal_remove_journal_head(struct buffer_head *bh)
{
@@ -3024,6 +3025,7 @@ void jbd2_journal_put_journal_head(struc
jbd_unlock_bh_journal_head(bh);
}
}
+EXPORT_SYMBOL(jbd2_journal_put_journal_head);
/*
* Initialize jbd inode head
_
Patches currently in -mm which might be from joseph.qi(a)linux.alibaba.com are
jbd2-export-jbd2_journal__journal_head.patch
ocfs2-fix-a-deadlock-when-commit-trans.patch
Sir/Madam,
Good day to you.
I am Dr.Gertjan Vlieghe personal Secretary to Andrew Bailey who double as the Governor, Bank of England (https://en.wikipedia.org/wiki/Andrew_Bailey_%28banker%29). We have an inheritance of a deceased client, who bear the same name with your surname. kindly contact Andrew Bailey through his personal email ( andbaill228(a)mail2world.com ) with your details for more information.
Thank you.
Dr.Gertjan Vlieghe
Dearest Friend,
In the name of God, Most Gracious, Most Merciful.
Peace be upon you and mercy be upon you and blessings be upon you.
I have the sum of $27.5 million USD for investment, I am interested in
you for investment project assistance in your country. My name is
Aisha Gaddafi and presently living in Oman, I am a Widow and single
Mother with three Children, the only biological Daughter of late
Libyan President (Late Colonel Muammar Gaddafi) and presently I am
under political asylum protection by the Omani Government.
Kindly reply urgently for more details.
my email address below: ayishagddafio(a)mail.ru
Thanks
Yours Truly Aisha
hallo Greg
5.16.3-rc1
compiles, boots and runs on my x86_64
(Intel i5-11400, Fedora 35)
Thanks
Tested-by: Ronald Warsow <rwarsow(a)gmx.de>
--
regards
Ronald
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d386f7ef9f410266bc1f364ad6a11cb28dae09a8 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello(a)amd.com>
Date: Fri, 10 Dec 2021 08:35:29 -0600
Subject: [PATCH] platform/x86: amd-pmc: only use callbacks for suspend
This driver is intended to be used exclusively for suspend to idle
so callbacks to send OS_HINT during hibernate and S5 will set OS_HINT
at the wrong time leading to an undefined behavior.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://lore.kernel.org/r/20211210143529.10594-1-mario.limonciello@amd.com
Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index c709ff993e8b..f794343d6aaa 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -585,7 +585,8 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
}
static const struct dev_pm_ops amd_pmc_pm_ops = {
- SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(amd_pmc_suspend, amd_pmc_resume)
+ .suspend_noirq = amd_pmc_suspend,
+ .resume_noirq = amd_pmc_resume,
};
static const struct pci_device_id pmc_pci_ids[] = {
Greetings!
I'm Nasser Rashid, a business financial specialist and investment
expert. consultant experienced in financial funding services. I
have a
I have a serious business investment opportunity to discuss with
you. Century Financial Dubai is the home of discerning investors.
We
We offer independent financial advice and assist our clients in
making sound investment decisions from a variety of investment
options.
Opportunities are available.
Our company is structured to provide personalized services to As
a result, capital security and adequate funding are ensured.
returns on investment. Our investors are ready to provide funding
for your business expansion, such as debt and equity.
financing. If you require funding, we would be able to partner
with you. We look forward to your response.
Thank you and stay safe,
Nasser Rashid, CFA,
Century Financial
In order to optimize FIFO access, especially on m_can cores attached
to slow busses like SPI, in patch
| e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors")
bulk read/write support has been added to the m_can_fifo_{read,write}
functions.
That change leads to the tcan driver to call
regmap_bulk_{read,write}() with a length of 0 (for CAN frames with 0
data length). regmap treats this as an error:
| tcan4x5x spi1.0 tcan4x5x0: FIFO write returned -22
This patch fixes the problem by not calling the
cdev->ops->{read,write)_fifo() in case of a 0 length read/write.
Fixes: e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors")
Link: https://lore.kernel.org/all/20220114155751.2651888-1-mkl@pengutronix.de
Cc: stable(a)vger.kernel.org
Cc: Matt Kline <matt(a)bitbashing.io>
Cc: Chandrasekar Ramakrishnan <rcsekar(a)samsung.com>
Reported-by: Michael Anochin <anochin(a)photo-meter.com>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/m_can/m_can.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 5b47cd867783..1a4b56f6fa8c 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *cdev,
u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
offset;
+ if (val_count == 0)
+ return 0;
+
return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
}
@@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *cdev,
u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
offset;
+ if (val_count == 0)
+ return 0;
+
return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
}
--
2.34.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7c8a4742c4abe205ec9daf416c9d42fd6b406e8e Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack(a)google.com>
Date: Thu, 13 Jan 2022 23:30:17 +0000
Subject: [PATCH] KVM: x86/mmu: Fix write-protection of PTs mapped by the TDP
MMU
When the TDP MMU is write-protection GFNs for page table protection (as
opposed to for dirty logging, or due to the HVA not being writable), it
checks if the SPTE is already write-protected and if so skips modifying
the SPTE and the TLB flush.
This behavior is incorrect because it fails to check if the SPTE
is write-protected for page table protection, i.e. fails to check
that MMU-writable is '0'. If the SPTE was write-protected for dirty
logging but not page table protection, the SPTE could locklessly be made
writable, and vCPUs could still be running with writable mappings cached
in their TLB.
Fix this by only skipping setting the SPTE if the SPTE is already
write-protected *and* MMU-writable is already clear. Technically,
checking only MMU-writable would suffice; a SPTE cannot be writable
without MMU-writable being set. But check both to be paranoid and
because it arguably yields more readable code.
Fixes: 46044f72c382 ("kvm: x86/mmu: Support write protection for nesting in tdp MMU")
Cc: stable(a)vger.kernel.org
Signed-off-by: David Matlack <dmatlack(a)google.com>
Message-Id: <20220113233020.3986005-2-dmatlack(a)google.com>
Reviewed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7b1bc816b7c3..bc9e3553fba2 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1442,12 +1442,12 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- if (!is_writable_pte(iter.old_spte))
- break;
-
new_spte = iter.old_spte &
~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
+ if (new_spte == iter.old_spte)
+ break;
+
tdp_mmu_set_spte(kvm, &iter, new_spte);
spte_set = true;
}
It has been reported that the tag setting operation on newly-allocated
pages can cause the page flags to be corrupted when performed
concurrently with other flag updates as a result of the use of
non-atomic operations. Fix the problem by using a compare-exchange
loop to update the tag.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/I456b24a2b9067d93968d43b4bb3351c0c…
Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc")
Cc: stable(a)vger.kernel.org
---
v3:
- use try_cmpxchg() as suggested by Peter Zijlstra on another
patch
v2:
- use READ_ONCE()
include/linux/mm.h | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c768a7c81b0b..87473fe52c3f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1531,11 +1531,18 @@ static inline u8 page_kasan_tag(const struct page *page)
static inline void page_kasan_tag_set(struct page *page, u8 tag)
{
- if (kasan_enabled()) {
- tag ^= 0xff;
- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
- }
+ unsigned long old_flags, flags;
+
+ if (!kasan_enabled())
+ return;
+
+ tag ^= 0xff;
+ old_flags = READ_ONCE(page->flags);
+ do {
+ flags = old_flags;
+ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
}
static inline void page_kasan_tag_reset(struct page *page)
--
2.34.1.703.g22d0c6ccf7-goog
The membarrier command MEMBARRIER_CMD_QUERY allows querying the
available membarrier commands. When the membarrier-rseq fence commands
were added, a new MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK was
introduced with the intent to expose them with the MEMBARRIER_CMD_QUERY
command, the but it was never added to MEMBARRIER_CMD_BITMASK.
The membarrier-rseq fence commands are therefore not wired up with the
query command.
Rename MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK to
MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK (the bitmask is not a command
per-se), and change the erroneous
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK (which does not
actually exist) to MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ.
Wire up MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK in
MEMBARRIER_CMD_BITMASK. Fixing this allows discovering availability of
the membarrier-rseq fence feature.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Peter Oskolkov <posk(a)google.com>
Cc: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: <stable(a)vger.kernel.org> # 5.10+
---
kernel/sched/membarrier.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index b5add64d9698..3d2825408e3a 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -147,11 +147,11 @@
#endif
#ifdef CONFIG_RSEQ
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
- | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
#else
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
#endif
#define MEMBARRIER_CMD_BITMASK \
@@ -159,7 +159,8 @@
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
- | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
+ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
static void ipi_mb(void *info)
{
--
2.17.1
In Linux 4.14 and 4.19 these architectures still have their own
implementations of get_user_pages_fast(). These also need to force
the write flag on when taking the fast path.
Fixes: 407faed92b4a ("gup: document and work around "COW can break either way" issue")
Fixes: 5e24029791e8 ("gup: document and work around "COW can break either way" issue")
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
---
arch/mips/mm/gup.c | 9 ++++++++-
arch/s390/mm/gup.c | 9 ++++++++-
arch/sh/mm/gup.c | 9 ++++++++-
arch/sparc/mm/gup.c | 9 ++++++++-
4 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 5a4875cac1ec..2e7a0d201c09 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -274,7 +274,14 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ /*
+ * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+ * because get_user_pages() may need to cause an early COW in
+ * order to avoid confusing the normal COW routines. So only
+ * targets that are already writable are safe to do by just
+ * looking at the page tables.
+ */
+ if (!gup_pud_range(pgd, addr, next, 1, pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 9b5b866d8adf..5389bf5bc828 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -287,7 +287,14 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
might_sleep();
start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ /*
+ * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+ * because get_user_pages() may need to cause an early COW in
+ * order to avoid confusing the normal COW routines. So only
+ * targets that are already writable are safe to do by just
+ * looking at the page tables.
+ */
+ nr = __get_user_pages_fast(start, nr_pages, 1, pages);
if (nr == nr_pages)
return nr;
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index 56c86ca98ecf..23fa2fc8aabc 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -242,7 +242,14 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ /*
+ * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+ * because get_user_pages() may need to cause an early COW in
+ * order to avoid confusing the normal COW routines. So only
+ * targets that are already writable are safe to do by just
+ * looking at the page tables.
+ */
+ if (!gup_pud_range(pgd, addr, next, 1, pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index aee6dba83d0e..f291d34a1cd5 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -303,7 +303,14 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ /*
+ * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+ * because get_user_pages() may need to cause an early COW in
+ * order to avoid confusing the normal COW routines. So only
+ * targets that are already writable are safe to do by just
+ * looking at the page tables.
+ */
+ if (!gup_pud_range(pgd, addr, next, 1, pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
From: Ross Zwisler <ross.zwisler(a)linux.intel.com>
commit 097963959594c5eccaba42510f7033f703211bda upstream.
Patch series "Write protect DAX PMDs in *sync path".
Currently dax_mapping_entry_mkclean() fails to clean and write protect
the pmd_t of a DAX PMD entry during an *sync operation. This can result
in data loss, as detailed in patch 2.
This series is based on Dan's "libnvdimm-pending" branch, which is the
current home for Jan's "dax: Page invalidation fixes" series. You can
find a working tree here:
https://git.kernel.org/cgit/linux/kernel/git/zwisler/linux.git/log/?h=dax_p…
This patch (of 2):
Similar to follow_pte(), follow_pte_pmd() allows either a PTE leaf or a
huge page PMD leaf to be found and returned.
Link: http://lkml.kernel.org/r/1482272586-21177-2-git-send-email-ross.zwisler@lin…
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Suggested-by: Dave Hansen <dave.hansen(a)intel.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Matthew Wilcox <mawilcox(a)microsoft.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[bwh: Backported to 4.9: adjust context]
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
---
include/linux/mm.h | 2 ++
mm/memory.c | 37 ++++++++++++++++++++++++++++++-------
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7a4c035b187f..81ee5d0b2642 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1269,6 +1269,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma);
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
unsigned long *pfn);
int follow_phys(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index c2890dc104d9..2b2cc69ddcce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3780,8 +3780,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
}
#endif /* __PAGETABLE_PMD_FOLDED */
-static int __follow_pte(struct mm_struct *mm, unsigned long address,
- pte_t **ptepp, spinlock_t **ptlp)
+static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
{
pgd_t *pgd;
pud_t *pud;
@@ -3798,11 +3798,20 @@ static int __follow_pte(struct mm_struct *mm, unsigned long address,
pmd = pmd_offset(pud, address);
VM_BUG_ON(pmd_trans_huge(*pmd));
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- goto out;
- /* We cannot handle huge page PFN maps. Luckily they don't exist. */
- if (pmd_huge(*pmd))
+ if (pmd_huge(*pmd)) {
+ if (!pmdpp)
+ goto out;
+
+ *ptlp = pmd_lock(mm, pmd);
+ if (pmd_huge(*pmd)) {
+ *pmdpp = pmd;
+ return 0;
+ }
+ spin_unlock(*ptlp);
+ }
+
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
goto out;
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
@@ -3825,9 +3834,23 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
/* (void) is needed to make gcc happy */
(void) __cond_lock(*ptlp,
- !(res = __follow_pte(mm, address, ptepp, ptlp)));
+ !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
+ ptlp)));
+ return res;
+}
+
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+{
+ int res;
+
+ /* (void) is needed to make gcc happy */
+ (void) __cond_lock(*ptlp,
+ !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
+ ptlp)));
return res;
}
+EXPORT_SYMBOL(follow_pte_pmd);
/**
* follow_pfn - look up PFN at a user virtual address
From: Davidlohr Bueso <dave(a)stgolabs.net>
commit cd9e61ed1eebbcd5dfad59475d41ec58d9b64b6a upstream.
Patch series "rbtree: Cache leftmost node internally", v4.
A series to extending rbtrees to internally cache the leftmost node such
that we can have fast overlap check optimization for all interval tree
users[1]. The benefits of this series are that:
(i) Unify users that do internal leftmost node caching.
(ii) Optimize all interval tree users.
(iii) Convert at least two new users (epoll and procfs) to the new interface.
This patch (of 16):
Red-black tree semantics imply that nodes with smaller or greater (or
equal for duplicates) keys always be to the left and right,
respectively. For the kernel this is extremely evident when considering
our rb_first() semantics. Enabling lookups for the smallest node in the
tree in O(1) can save a good chunk of cycles in not having to walk down
the tree each time. To this end there are a few core users that
explicitly do this, such as the scheduler and rtmutexes. There is also
the desire for interval trees to have this optimization allowing faster
overlap checking.
This patch introduces a new 'struct rb_root_cached' which is just the
root with a cached pointer to the leftmost node. The reason why the
regular rb_root was not extended instead of adding a new structure was
that this allows the user to have the choice between memory footprint
and actual tree performance. The new wrappers on top of the regular
rb_root calls are:
- rb_first_cached(cached_root) -- which is a fast replacement
for rb_first.
- rb_insert_color_cached(node, cached_root, new)
- rb_erase_cached(node, cached_root)
In addition, augmented cached interfaces are also added for basic
insertion and deletion operations; which becomes important for the
interval tree changes.
With the exception of the inserts, which adds a bool for updating the
new leftmost, the interfaces are kept the same. To this end, porting rb
users to the cached version becomes really trivial, and keeping current
rbtree semantics for users that don't care about the optimization
requires zero overhead.
Link: http://lkml.kernel.org/r/20170719014603.19029-2-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso(a)suse.de>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
---
Documentation/rbtree.txt | 33 +++++++++++++++++++++++++++++++
include/linux/rbtree.h | 21 ++++++++++++++++++++
include/linux/rbtree_augmented.h | 33 ++++++++++++++++++++++++++++---
lib/rbtree.c | 34 +++++++++++++++++++++++++++-----
4 files changed, 113 insertions(+), 8 deletions(-)
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index b9d9cc57be18..9fedfedfd85f 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -190,6 +190,39 @@ rb_entry(node, type, member).
for (node = rb_first(&mytree); node; node = rb_next(node))
printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+Cached rbtrees
+--------------
+
+Computing the leftmost (smallest) node is quite a common task for binary
+search trees, such as for traversals or users relying on a the particular
+order for their own logic. To this end, users can use 'struct rb_root_cached'
+to optimize O(logN) rb_first() calls to a simple pointer fetch avoiding
+potentially expensive tree iterations. This is done at negligible runtime
+overhead for maintanence; albeit larger memory footprint.
+
+Similar to the rb_root structure, cached rbtrees are initialized to be
+empty via:
+
+ struct rb_root_cached mytree = RB_ROOT_CACHED;
+
+Cached rbtree is simply a regular rb_root with an extra pointer to cache the
+leftmost node. This allows rb_root_cached to exist wherever rb_root does,
+which permits augmented trees to be supported as well as only a few extra
+interfaces:
+
+ struct rb_node *rb_first_cached(struct rb_root_cached *tree);
+ void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
+ void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+
+Both insert and erase calls have their respective counterpart of augmented
+trees:
+
+ void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
+ bool, struct rb_augment_callbacks *);
+ void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
+ struct rb_augment_callbacks *);
+
+
Support for Augmented rbtrees
-----------------------------
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index e585018498d5..d574361943ea 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -44,10 +44,25 @@ struct rb_root {
struct rb_node *rb_node;
};
+/*
+ * Leftmost-cached rbtrees.
+ *
+ * We do not cache the rightmost node based on footprint
+ * size vs number of potential users that could benefit
+ * from O(1) rb_last(). Just not worth it, users that want
+ * this feature can always implement the logic explicitly.
+ * Furthermore, users that want to cache both pointers may
+ * find it a bit asymmetric, but that's ok.
+ */
+struct rb_root_cached {
+ struct rb_root rb_root;
+ struct rb_node *rb_leftmost;
+};
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
#define RB_ROOT (struct rb_root) { NULL, }
+#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
@@ -69,6 +84,12 @@ extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);
+extern void rb_insert_color_cached(struct rb_node *,
+ struct rb_root_cached *, bool);
+extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+/* Same as rb_first(), but O(1) */
+#define rb_first_cached(root) (root)->rb_leftmost
+
/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index d076183e49be..023d64657e95 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -41,7 +41,9 @@ struct rb_augment_callbacks {
void (*rotate)(struct rb_node *old, struct rb_node *new);
};
-extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+extern void __rb_insert_augmented(struct rb_node *node,
+ struct rb_root *root,
+ bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
/*
* Fixup the rbtree and update the augmented information when rebalancing.
@@ -57,7 +59,16 @@ static inline void
rb_insert_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- __rb_insert_augmented(node, root, augment->rotate);
+ __rb_insert_augmented(node, root, false, NULL, augment->rotate);
+}
+
+static inline void
+rb_insert_augmented_cached(struct rb_node *node,
+ struct rb_root_cached *root, bool newleft,
+ const struct rb_augment_callbacks *augment)
+{
+ __rb_insert_augmented(node, &root->rb_root,
+ newleft, &root->rb_leftmost, augment->rotate);
}
#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
@@ -148,6 +159,7 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ struct rb_node **leftmost,
const struct rb_augment_callbacks *augment)
{
struct rb_node *child = node->rb_right;
@@ -155,6 +167,9 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
struct rb_node *parent, *rebalance;
unsigned long pc;
+ if (leftmost && node == *leftmost)
+ *leftmost = rb_next(node);
+
if (!tmp) {
/*
* Case 1: node to erase has no more than 1 child (easy!)
@@ -254,9 +269,21 @@ static __always_inline void
rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+ struct rb_node *rebalance = __rb_erase_augmented(node, root,
+ NULL, augment);
if (rebalance)
__rb_erase_color(rebalance, root, augment->rotate);
}
+static __always_inline void
+rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root,
+ &root->rb_leftmost,
+ augment);
+ if (rebalance)
+ __rb_erase_color(rebalance, &root->rb_root, augment->rotate);
+}
+
#endif /* _LINUX_RBTREE_AUGMENTED_H */
diff --git a/lib/rbtree.c b/lib/rbtree.c
index eb8a19fee110..53746be42903 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -95,10 +95,14 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
static __always_inline void
__rb_insert(struct rb_node *node, struct rb_root *root,
+ bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+ if (newleft)
+ *leftmost = node;
+
while (true) {
/*
* Loop invariant: node is red
@@ -417,19 +421,38 @@ static const struct rb_augment_callbacks dummy_callbacks = {
void rb_insert_color(struct rb_node *node, struct rb_root *root)
{
- __rb_insert(node, root, dummy_rotate);
+ __rb_insert(node, root, false, NULL, dummy_rotate);
}
EXPORT_SYMBOL(rb_insert_color);
void rb_erase(struct rb_node *node, struct rb_root *root)
{
struct rb_node *rebalance;
- rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+ rebalance = __rb_erase_augmented(node, root,
+ NULL, &dummy_callbacks);
if (rebalance)
____rb_erase_color(rebalance, root, dummy_rotate);
}
EXPORT_SYMBOL(rb_erase);
+void rb_insert_color_cached(struct rb_node *node,
+ struct rb_root_cached *root, bool leftmost)
+{
+ __rb_insert(node, &root->rb_root, leftmost,
+ &root->rb_leftmost, dummy_rotate);
+}
+EXPORT_SYMBOL(rb_insert_color_cached);
+
+void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
+{
+ struct rb_node *rebalance;
+ rebalance = __rb_erase_augmented(node, &root->rb_root,
+ &root->rb_leftmost, &dummy_callbacks);
+ if (rebalance)
+ ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate);
+}
+EXPORT_SYMBOL(rb_erase_cached);
+
/*
* Augmented rbtree manipulation functions.
*
@@ -438,9 +461,10 @@ EXPORT_SYMBOL(rb_erase);
*/
void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
- __rb_insert(node, root, augment_rotate);
+ __rb_insert(node, root, newleft, leftmost, augment_rotate);
}
EXPORT_SYMBOL(__rb_insert_augmented);
@@ -485,7 +509,7 @@ struct rb_node *rb_next(const struct rb_node *node)
* as we can.
*/
if (node->rb_right) {
- node = node->rb_right;
+ node = node->rb_right;
while (node->rb_left)
node=node->rb_left;
return (struct rb_node *)node;
@@ -517,7 +541,7 @@ struct rb_node *rb_prev(const struct rb_node *node)
* as we can.
*/
if (node->rb_left) {
- node = node->rb_left;
+ node = node->rb_left;
while (node->rb_right)
node=node->rb_right;
return (struct rb_node *)node;
From: Paul Moore <paul(a)paul-moore.com>
commit ad5d07f4a9cd671233ae20983848874731102c08 upstream.
The current CIPSO and CALIPSO refcounting scheme for the DOI
definitions is a bit flawed in that we:
1. Don't correctly match gets/puts in netlbl_cipsov4_list().
2. Decrement the refcount on each attempt to remove the DOI from the
DOI list, only removing it from the list once the refcount drops
to zero.
This patch fixes these problems by adding the missing "puts" to
netlbl_cipsov4_list() and introduces a more conventional, i.e.
not-buggy, refcounting mechanism to the DOI definitions. Upon the
addition of a DOI to the DOI list, it is initialized with a refcount
of one, removing a DOI from the list removes it from the list and
drops the refcount by one; "gets" and "puts" behave as expected with
respect to refcounts, increasing and decreasing the DOI's refcount by
one.
Fixes: b1edeb102397 ("netlabel: Replace protocol/NetLabel linking with refrerence counts")
Fixes: d7cce01504a0 ("netlabel: Add support for removing a CALIPSO DOI.")
Reported-by: syzbot+9ec037722d2603a9f52e(a)syzkaller.appspotmail.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[bwh: Backported to 4.9: adjust context]
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
---
net/ipv4/cipso_ipv4.c | 11 +----------
net/ipv6/calipso.c | 14 +++++---------
net/netlabel/netlabel_cipso_v4.c | 3 +++
3 files changed, 9 insertions(+), 19 deletions(-)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 553cda6f887a..b7dc20a65b64 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -534,16 +534,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
- if (!atomic_dec_and_test(&doi_def->refcount)) {
- spin_unlock(&cipso_v4_doi_list_lock);
- ret_val = -EBUSY;
- goto doi_remove_return;
- }
list_del_rcu(&doi_def->list);
spin_unlock(&cipso_v4_doi_list_lock);
- cipso_v4_cache_invalidate();
- call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+ cipso_v4_doi_putdef(doi_def);
ret_val = 0;
doi_remove_return:
@@ -600,9 +594,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
if (!atomic_dec_and_test(&doi_def->refcount))
return;
- spin_lock(&cipso_v4_doi_list_lock);
- list_del_rcu(&doi_def->list);
- spin_unlock(&cipso_v4_doi_list_lock);
cipso_v4_cache_invalidate();
call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index b206415bbde7..7628963ddacc 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -97,6 +97,9 @@ struct calipso_map_cache_entry {
static struct calipso_map_cache_bkt *calipso_cache;
+static void calipso_cache_invalidate(void);
+static void calipso_doi_putdef(struct calipso_doi *doi_def);
+
/* Label Mapping Cache Functions
*/
@@ -458,15 +461,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
- if (!atomic_dec_and_test(&doi_def->refcount)) {
- spin_unlock(&calipso_doi_list_lock);
- ret_val = -EBUSY;
- goto doi_remove_return;
- }
list_del_rcu(&doi_def->list);
spin_unlock(&calipso_doi_list_lock);
- call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+ calipso_doi_putdef(doi_def);
ret_val = 0;
doi_remove_return:
@@ -522,10 +520,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def)
if (!atomic_dec_and_test(&doi_def->refcount))
return;
- spin_lock(&calipso_doi_list_lock);
- list_del_rcu(&doi_def->list);
- spin_unlock(&calipso_doi_list_lock);
+ calipso_cache_invalidate();
call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
}
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 422fac2a4a3c..9a256d0fb957 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -587,6 +587,7 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
break;
}
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
genlmsg_end(ans_skb, data);
@@ -595,12 +596,14 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
list_retry:
/* XXX - this limit is a guesstimate */
if (nlsze_mult < 4) {
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
kfree_skb(ans_skb);
nlsze_mult *= 2;
goto list_start;
}
list_failure_lock:
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
list_failure:
kfree_skb(ans_skb);
Proszę o uwagę!
Jak się masz? Mam nadzieję, że jesteś zdrowy i zdrowy? Informuję, że
udało mi się zakończyć transakcję z pomocą nowego partnera z Indii i
teraz środki zostały przelane do Indii na konto bankowe nowego
partnera.
W międzyczasie zdecydowałem się zrekompensować ci sumę 500 000 $
(tylko pięćset tysięcy dolarów amerykańskich) z powodu twoich
wcześniejszych wysiłków, chociaż mnie rozczarowałeś. Niemniej jednak
bardzo się cieszę z pomyślnego zakończenia transakcji bez żadnego
problemu i dlatego postanowiłem zrekompensować Ci kwotę 500 000 $,
abyś podzielił się ze mną radością.
Radzę skontaktować się z moją sekretarką w sprawie karty bankomatowej
o wartości 500 000 $, którą zachowałem dla Ciebie. Skontaktuj się z
nią teraz bez zwłoki.
Imię: Linda Koffi
E-mail: koffilinda785(a)gmail.com
Uprzejmie potwierdź jej następujące informacje:
Twoje pełne imię:........
Twój adres:..........
Twój kraj:..........
Twój wiek:.........
Twój zawód:..........
Twój numer telefonu komórkowego:..........
Twój paszport lub prawo jazdy:........
Pamiętaj, że jeśli nie prześlesz jej powyższych informacji
kompletnych, nie wyda ci karty bankomatowej, ponieważ musi się
upewnić, że to ty. Poproś ją, aby przesłała Ci całkowitą sumę (500 000
USD) karty bankomatowej, którą dla Ciebie zachowałem.
Z wyrazami szacunku,
Pan Abraham Morrison
Please pick the following commits for 4.9 and 4.14. They should apply
cleanly.
commit d903ec77118c09f93a610b384d83a6df33a64fe6
Author: Andy Spencer <aspencer(a)spacex.com>
Date: Thu Feb 22 11:05:33 2018 -0800
gianfar: simplify FCS handling and fix memory leak
commit d8861bab48b6c1fc3cdbcab8ff9d1eaea43afe7f
Author: Michael Braun <michael-dev(a)fami-braun.de>
Date: Thu Mar 4 20:52:52 2021 +0100
gianfar: fix jumbo packets+napi+rx overrun crash
Ben.
--
Ben Hutchings
Teamwork is essential - it allows you to blame someone else.
Hot-unplug all firmware-framebuffer devices as part of removing
them via remove_conflicting_framebuffers() et al. Releases all
memory regions to be acquired by native drivers.
Firmware, such as EFI, install a framebuffer while posting the
computer. After removing the firmware-framebuffer device from fbdev,
a native driver takes over the hardware and the firmware framebuffer
becomes invalid.
Firmware-framebuffer drivers, specifically simplefb, don't release
their device from Linux' device hierarchy. It still owns the firmware
framebuffer and blocks the native drivers from loading. This has been
observed in the vmwgfx driver. [1]
Initiating a device removal (i.e., hot unplug) as part of
remove_conflicting_framebuffers() removes the underlying device and
returns the memory range to the system.
[1] https://lore.kernel.org/dri-devel/20220117180359.18114-1-zack@kde.org/
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
CC: stable(a)vger.kernel.org # v5.11+
---
drivers/video/fbdev/core/fbmem.c | 29 ++++++++++++++++++++++++++---
include/linux/fb.h | 1 +
2 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 0fa7ede94fa6..f73f8415b8cb 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -25,6 +25,7 @@
#include <linux/init.h>
#include <linux/linux_logo.h>
#include <linux/proc_fs.h>
+#include <linux/platform_device.h>
#include <linux/seq_file.h>
#include <linux/console.h>
#include <linux/kmod.h>
@@ -1557,18 +1558,36 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
+ struct device *dev;
if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
continue;
gen_aper = registered_fb[i]->apertures;
+ dev = registered_fb[i]->device;
if (fb_do_apertures_overlap(gen_aper, a) ||
(primary && gen_aper && gen_aper->count &&
gen_aper->ranges[0].base == VGA_FB_PHYS)) {
printk(KERN_INFO "fb%d: switching to %s from %s\n",
i, name, registered_fb[i]->fix.id);
- do_unregister_framebuffer(registered_fb[i]);
+
+ /*
+ * If we kick-out a firmware driver, we also want to remove
+ * the underlying platform device, such as simple-framebuffer,
+ * VESA, EFI, etc. A native driver will then be able to
+ * allocate the memory range.
+ *
+ * If it's not a platform device, at least print a warning. A
+ * fix would add code to remove the device from the system.
+ */
+ if (dev_is_platform(dev)) {
+ registered_fb[i]->forced_out = true;
+ platform_device_unregister(to_platform_device(dev));
+ } else {
+ pr_warn("fb%d: cannot remove device\n", i);
+ do_unregister_framebuffer(registered_fb[i]);
+ }
}
}
}
@@ -1898,9 +1917,13 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- mutex_lock(®istration_lock);
+ bool forced_out = fb_info->forced_out;
+
+ if (!forced_out)
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- mutex_unlock(®istration_lock);
+ if (!forced_out)
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3da95842b207..9a14f3f8a329 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -502,6 +502,7 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
+ bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.34.1
From: Dave Airlie <airlied(a)redhat.com>
commit 5de5b6ecf97a021f29403aa272cb4e03318ef586 upstream.
This is confusing, and from my reading of all the drivers only
nouveau got this right.
Just make the API act under driver control of it's own allocation
failing, and don't call destroy, if the page table fails to
create there is nothing to cleanup here.
(I'm willing to believe I've missed something here, so please
review deeply).
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Dave Airlie <airlied(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200728041736.20689-1-airlie…
[bwh: Backported to 4.14:
- Drop change in ttm_sg_tt_init()
- Adjust context]
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
---
drivers/gpu/drm/nouveau/nouveau_sgdma.c | 9 +++------
drivers/gpu/drm/ttm/ttm_tt.c | 2 --
2 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index fde11ce466e4..495c4043467e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -106,12 +106,9 @@ nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
else
nvbe->ttm.ttm.func = &nv50_sgdma_backend;
- if (ttm_dma_tt_init(&nvbe->ttm, bdev, size, page_flags, dummy_read_page))
- /*
- * A failing ttm_dma_tt_init() will call ttm_tt_destroy()
- * and thus our nouveau_sgdma_destroy() hook, so we don't need
- * to free nvbe here.
- */
+ if (ttm_dma_tt_init(&nvbe->ttm, bdev, size, page_flags, dummy_read_page)) {
+ kfree(nvbe);
return NULL;
+ }
return &nvbe->ttm.ttm;
}
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 8ebc8d3560c3..fc8bdcc1541b 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -199,7 +199,6 @@ int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
ttm_tt_alloc_page_directory(ttm);
if (!ttm->pages) {
- ttm_tt_destroy(ttm);
pr_err("Failed allocating page table\n");
return -ENOMEM;
}
@@ -232,7 +231,6 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev,
INIT_LIST_HEAD(&ttm_dma->pages_list);
ttm_dma_tt_alloc_page_directory(ttm_dma);
if (!ttm->pages) {
- ttm_tt_destroy(ttm);
pr_err("Failed allocating page table\n");
return -ENOMEM;
}
We don't currently validate that the values being set are within the range
we advertised to userspace as being valid, do so and reject any values
that are out of range.
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
sound/soc/soc-ops.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index c31e63b27193..dc0e7c8d31f3 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -879,6 +879,8 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol,
long val = ucontrol->value.integer.value[0];
unsigned int i;
+ if (val < mc->min || val > mc->max)
+ return -EINVAL;
if (invert)
val = max - val;
val &= mask;
--
2.30.2
We don't currently validate that the values being set are within the range
we advertised to userspace as being valid, do so and reject any values
that are out of range.
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
sound/soc/soc-ops.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index fbe5d326b0f2..c31e63b27193 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -423,8 +423,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
int err = 0;
unsigned int val, val_mask;
+ val = ucontrol->value.integer.value[0];
+ if (mc->platform_max && val > mc->platform_max)
+ return -EINVAL;
+ if (val > max - min)
+ return -EINVAL;
+ if (val < 0)
+ return -EINVAL;
val_mask = mask << shift;
- val = (ucontrol->value.integer.value[0] + min) & mask;
+ val = (val + min) & mask;
val = val << shift;
err = snd_soc_component_update_bits(component, reg, val_mask, val);
--
2.30.2
We don't currently validate that the values being set are within the range
we advertised to userspace as being valid, do so and reject any values
that are out of range.
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
sound/soc/soc-ops.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 08eaa9ddf191..fbe5d326b0f2 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -316,13 +316,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
if (sign_bit)
mask = BIT(sign_bit + 1) - 1;
- val = ((ucontrol->value.integer.value[0] + min) & mask);
+ val = ucontrol->value.integer.value[0];
+ if (mc->platform_max && val > mc->platform_max)
+ return -EINVAL;
+ if (val > max - min)
+ return -EINVAL;
+ if (val < 0)
+ return -EINVAL;
+ val = (val + min) & mask;
if (invert)
val = max - val;
val_mask = mask << shift;
val = val << shift;
if (snd_soc_volsw_is_stereo(mc)) {
- val2 = ((ucontrol->value.integer.value[1] + min) & mask);
+ val2 = ucontrol->value.integer.value[1];
+ if (mc->platform_max && val2 > mc->platform_max)
+ return -EINVAL;
+ if (val2 > max - min)
+ return -EINVAL;
+ if (val2 < 0)
+ return -EINVAL;
+ val2 = (val2 + min) & mask;
if (invert)
val2 = max - val2;
if (reg == reg2) {
--
2.30.2
Mark the start_backtrace() as notrace and NOKPROBE_SYMBOL
because this function is called from ftrace and lockdep to
get the caller address via return_address(). The lockdep
is used in kprobes, it should also be NOKPROBE_SYMBOL.
Fixes: b07f3499661c ("arm64: stacktrace: Move start_backtrace() out of the header")
Cc: stable(a)vger.kernel.org
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
---
arch/arm64/kernel/stacktrace.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 94f83cd44e50..b0f21677764d 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -33,7 +33,7 @@
*/
-void start_backtrace(struct stackframe *frame, unsigned long fp,
+void notrace start_backtrace(struct stackframe *frame, unsigned long fp,
unsigned long pc)
{
frame->fp = fp;
@@ -55,6 +55,7 @@ void start_backtrace(struct stackframe *frame, unsigned long fp,
frame->prev_fp = 0;
frame->prev_type = STACK_TYPE_UNKNOWN;
}
+NOKPROBE_SYMBOL(start_backtrace);
/*
* Unwind from one frame record (A) to the next frame record (B).
This reverts commit 77fa5e15c933a1ec812de61ad709c00aa51e96ae.
Since the upstream commit e792ff804f49720ce003b3e4c618b5d996256a18
depends on the generic kretprobe trampoline handler, which was
introduced by commit 66ada2ccae4e ("kprobes: Add generic kretprobe
trampoline handler") but that is not ported to the stable kernel
because it is not a bugfix series.
So revert this commit to fix a build error.
NOTE: I keep commit a7fe2378454c ("ia64: kprobes: Fix to pass
correct trampoline address to the handler") on the tree, that seems
just a cleanup without the original reverted commit, but it would
be better to use dereference_function_descriptor() macro instead
of accessing descriptor's field directly.
Fixes: 77fa5e15c933 ("ia64: kprobes: Use generic kretprobe trampoline handler")
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
---
Changes in v2:
- fix the lack of type casting for dereference_function_descriptor().
---
arch/ia64/kernel/kprobes.c | 78 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 75 insertions(+), 3 deletions(-)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 8a223d0e4918..fa10d51f6217 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -396,10 +396,83 @@ static void kretprobe_trampoline(void)
{
}
+/*
+ * At this point the target function has been tricked into
+ * returning into our trampoline. Lookup the associated instance
+ * and then:
+ * - call the handler function
+ * - cleanup by marking the instance as unused
+ * - long jump back to the original return address
+ */
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- regs->cr_iip = __kretprobe_trampoline_handler(regs,
- dereference_function_descriptor(kretprobe_trampoline), NULL);
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =
+ (unsigned long)dereference_function_descriptor(kretprobe_trampoline);
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more than one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ regs->cr_iip = orig_ret_address;
+
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ kretprobe_hash_unlock(current, &flags);
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
@@ -412,7 +485,6 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *)regs->b0;
- ri->fp = NULL;
/* Replace the return addr with trampoline addr */
regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline);
Dear friend.
This being a wide world in which it can be difficult to make new
acquaintances and because it is virtually impossible to know who is
trustworthy and who can be believed, I have decided to repose
confidence in you after much fasting and prayer. It is only because of
this that I have decided to confide in you and to share with you this
confidential business.
overdue and unclaimed sum of $15.5m, (Fifteen Million Five Hundred
Thousand Dollars Only) when the account holder suddenly passed on, he
left no beneficiary who would be entitled to the receipt of this fund.
For this reason, I have found it expedient to transfer this fund to a
trustworthy individual with capacity to act as foreign business
partner.
Yours Faithful,
Mr.Sal Kavar.
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 96da174024b9c63bd5d3358668d0bc12677be877 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com>
Date: Tue, 23 Nov 2021 19:16:06 +0200
Subject: [PATCH] ASoC: SOF: handle paused streams during system suspend
During system suspend, paused streams do not get suspended.
Therefore, we need to explicitly free these PCMs in the DSP
and free the associated DAPM widgets so that they can be set
up again during resume.
Fixes: 5fcdbb2d45df ("ASoC: SOF: Add support for dynamic pipelines")
Signed-off-by: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com>
Reviewed-by: Paul Olaru <paul.olaru(a)oss.nxp.com>
Reviewed-by: Bard Liao <bard.liao(a)intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20211123171606.129350-3-kai.vehmanen@linux.intel.…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c
index 31dd79b794f1..0ceb1a9cbf73 100644
--- a/sound/soc/sof/pcm.c
+++ b/sound/soc/sof/pcm.c
@@ -100,9 +100,8 @@ void snd_sof_pcm_period_elapsed(struct snd_pcm_substream *substream)
}
EXPORT_SYMBOL(snd_sof_pcm_period_elapsed);
-static int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream,
- struct snd_sof_dev *sdev,
- struct snd_sof_pcm *spcm)
+int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream, struct snd_sof_dev *sdev,
+ struct snd_sof_pcm *spcm)
{
struct sof_ipc_stream stream;
struct sof_ipc_reply reply;
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index f4e142ec0fbd..e00ce275052f 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -129,6 +129,14 @@ int sof_widget_free(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget)
case snd_soc_dapm_buffer:
ipc_free.hdr.cmd |= SOF_IPC_TPLG_BUFFER_FREE;
break;
+ case snd_soc_dapm_dai_in:
+ case snd_soc_dapm_dai_out:
+ {
+ struct snd_sof_dai *dai = swidget->private;
+
+ dai->configured = false;
+ fallthrough;
+ }
default:
ipc_free.hdr.cmd |= SOF_IPC_TPLG_COMP_FREE;
break;
@@ -720,6 +728,55 @@ int sof_set_up_pipelines(struct snd_sof_dev *sdev, bool verify)
return 0;
}
+/*
+ * Free the PCM, its associated widgets and set the prepared flag to false for all PCMs that
+ * did not get suspended(ex: paused streams) so the widgets can be set up again during resume.
+ */
+static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev)
+{
+ struct snd_sof_widget *swidget;
+ struct snd_sof_pcm *spcm;
+ int dir, ret;
+
+ /*
+ * free all PCMs and their associated DAPM widgets if their connected DAPM widget
+ * list is not NULL. This should only be true for paused streams at this point.
+ * This is equivalent to the handling of FE DAI suspend trigger for running streams.
+ */
+ list_for_each_entry(spcm, &sdev->pcm_list, list)
+ for_each_pcm_streams(dir) {
+ struct snd_pcm_substream *substream = spcm->stream[dir].substream;
+
+ if (!substream || !substream->runtime)
+ continue;
+
+ if (spcm->stream[dir].list) {
+ ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
+ if (ret < 0)
+ return ret;
+
+ ret = sof_widget_list_free(sdev, spcm, dir);
+ if (ret < 0) {
+ dev_err(sdev->dev, "failed to free widgets during suspend\n");
+ return ret;
+ }
+ }
+ }
+
+ /*
+ * free any left over DAI widgets. This is equivalent to the handling of suspend trigger
+ * for the BE DAI for running streams.
+ */
+ list_for_each_entry(swidget, &sdev->widget_list, list)
+ if (WIDGET_IS_DAI(swidget->id) && swidget->use_count == 1) {
+ ret = sof_widget_free(sdev, swidget);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
/*
* For older firmware, this function doesn't free widgets for static pipelines during suspend.
* It only resets use_count for all widgets.
@@ -734,8 +791,8 @@ int sof_tear_down_pipelines(struct snd_sof_dev *sdev, bool verify)
/*
* This function is called during suspend and for one-time topology verification during
* first boot. In both cases, there is no need to protect swidget->use_count and
- * sroute->setup because during suspend all streams are suspended and during topology
- * loading the sound card unavailable to open PCMs.
+ * sroute->setup because during suspend all running streams are suspended and during
+ * topology loading the sound card unavailable to open PCMs.
*/
list_for_each_entry(swidget, &sdev->widget_list, list) {
if (swidget->dynamic_pipeline_widget)
@@ -754,6 +811,19 @@ int sof_tear_down_pipelines(struct snd_sof_dev *sdev, bool verify)
return ret;
}
+ /*
+ * Tear down all pipelines associated with PCMs that did not get suspended
+ * and unset the prepare flag so that they can be set up again during resume.
+ * Skip this step for older firmware.
+ */
+ if (!verify && v->abi_version >= SOF_ABI_VER(3, 19, 0)) {
+ ret = sof_tear_down_left_over_pipelines(sdev);
+ if (ret < 0) {
+ dev_err(sdev->dev, "failed to tear down paused pipelines\n");
+ return ret;
+ }
+ }
+
list_for_each_entry(sroute, &sdev->route_list, list)
sroute->setup = false;
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 389d56ac3aba..1c4f59d34717 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -265,4 +265,6 @@ int sof_widget_free(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget);
/* PCM */
int sof_widget_list_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, int dir);
int sof_widget_list_free(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, int dir);
+int sof_pcm_dsp_pcm_free(struct snd_pcm_substream *substream, struct snd_sof_dev *sdev,
+ struct snd_sof_pcm *spcm);
#endif
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 01429183f479c54c1b5d15453a8ce574ea43e525 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Date: Tue, 23 Nov 2021 19:16:04 +0200
Subject: [PATCH] ASoC: SOF: sof-audio: setup sched widgets during pipeline
complete step
Older firmware prior to ABI 3.19 has a dependency where the scheduler
widgets need to be setup last. Moving the call to sof_widget_setup()
before the pipeline_complete() call also helps remove the need for the
'reverse' direction when walking through the widget list - this was
only working because of the topology macros but the topology does not
require any order.
Fixes: 5fcdbb2d45df ("ASoC: SOF: Add support for dynamic pipelines")
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20211123171606.129350-1-kai.vehmanen@linux.intel.…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index 0f2566f7c094..f4e142ec0fbd 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -637,16 +637,25 @@ const struct sof_ipc_pipe_new *snd_sof_pipeline_find(struct snd_sof_dev *sdev,
int sof_set_up_pipelines(struct snd_sof_dev *sdev, bool verify)
{
+ struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
struct snd_sof_widget *swidget;
struct snd_sof_route *sroute;
int ret;
/* restore pipeline components */
- list_for_each_entry_reverse(swidget, &sdev->widget_list, list) {
+ list_for_each_entry(swidget, &sdev->widget_list, list) {
/* only set up the widgets belonging to static pipelines */
if (!verify && swidget->dynamic_pipeline_widget)
continue;
+ /*
+ * For older firmware, skip scheduler widgets in this loop,
+ * sof_widget_setup() will be called in the 'complete pipeline' loop
+ */
+ if (v->abi_version < SOF_ABI_VER(3, 19, 0) &&
+ swidget->id == snd_soc_dapm_scheduler)
+ continue;
+
/* update DAI config. The IPC will be sent in sof_widget_setup() */
if (WIDGET_IS_DAI(swidget->id)) {
struct snd_sof_dai *dai = swidget->private;
@@ -694,6 +703,12 @@ int sof_set_up_pipelines(struct snd_sof_dev *sdev, bool verify)
if (!verify && swidget->dynamic_pipeline_widget)
continue;
+ if (v->abi_version < SOF_ABI_VER(3, 19, 0)) {
+ ret = sof_widget_setup(sdev, swidget);
+ if (ret < 0)
+ return ret;
+ }
+
swidget->complete =
snd_sof_complete_pipeline(sdev, swidget);
break;
@@ -722,7 +737,7 @@ int sof_tear_down_pipelines(struct snd_sof_dev *sdev, bool verify)
* sroute->setup because during suspend all streams are suspended and during topology
* loading the sound card unavailable to open PCMs.
*/
- list_for_each_entry_reverse(swidget, &sdev->widget_list, list) {
+ list_for_each_entry(swidget, &sdev->widget_list, list) {
if (swidget->dynamic_pipeline_widget)
continue;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a674e48c5443d12a8a43c3ac42367aa39505d506 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe(a)redhat.com>
Date: Fri, 14 Jan 2022 14:07:41 -0800
Subject: [PATCH] dma/pool: create dma atomic pool only if dma zone has managed
pages
Currently three dma atomic pools are initialized as long as the relevant
kernel codes are built in. While in kdump kernel of x86_64, this is not
right when trying to create atomic_pool_dma, because there's no managed
pages in DMA zone. In the case, DMA zone only has low 1M memory
presented and locked down by memblock allocator. So no pages are added
into buddy of DMA zone. Please check commit f1d4d47c5851 ("x86/setup:
Always reserve the first 1M of RAM").
Then in kdump kernel of x86_64, it always prints below failure message:
DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations
swapper/0: page allocation failure: order:5, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-0.rc5.20210611git929d931f2b40.42.fc35.x86_64 #1
Hardware name: Dell Inc. PowerEdge R910/0P658H, BIOS 2.12.0 06/04/2018
Call Trace:
dump_stack+0x7f/0xa1
warn_alloc.cold+0x72/0xd6
__alloc_pages_slowpath.constprop.0+0xf29/0xf50
__alloc_pages+0x24d/0x2c0
alloc_page_interleave+0x13/0xb0
atomic_pool_expand+0x118/0x210
__dma_atomic_pool_init+0x45/0x93
dma_atomic_pool_init+0xdb/0x176
do_one_initcall+0x67/0x320
kernel_init_freeable+0x290/0x2dc
kernel_init+0xa/0x111
ret_from_fork+0x22/0x30
Mem-Info:
......
DMA: failed to allocate 128 KiB GFP_KERNEL|GFP_DMA pool for atomic allocation
DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations
Here, let's check if DMA zone has managed pages, then create
atomic_pool_dma if yes. Otherwise just skip it.
Link: https://lkml.kernel.org/r/20211223094435.248523-3-bhe@redhat.com
Fixes: 6f599d84231f ("x86/kdump: Always reserve the low 1M when the crashkernel option is specified")
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Acked-by: John Donnelly <john.p.donnelly(a)oracle.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Marek Szyprowski <m.szyprowski(a)samsung.com>
Cc: Robin Murphy <robin.murphy(a)arm.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Hyeonggon Yoo <42.hyeyoo(a)gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 5f84e6cdb78e..4d40dcce7604 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -203,7 +203,7 @@ static int __init dma_atomic_pool_init(void)
GFP_KERNEL);
if (!atomic_pool_kernel)
ret = -ENOMEM;
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ if (has_managed_dma()) {
atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
GFP_KERNEL | GFP_DMA);
if (!atomic_pool_dma)
@@ -226,7 +226,7 @@ static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
if (prev == NULL) {
if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
return atomic_pool_dma32;
- if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+ if (atomic_pool_dma && (gfp & GFP_DMA))
return atomic_pool_dma;
return atomic_pool_kernel;
}
This reverts commit 9bbd42e79720122334226afad9ddcac1c3e6d373, which
was commit 17839856fd588f4ab6b789f482ed3ffd7c403e1f upstream. The
backport was incorrect and incomplete:
* It forced the write flag on in the generic __get_user_pages_fast(),
whereas only get_user_pages_fast() was supposed to do that.
* It only fixed the generic RCU-based implementation used by arm,
arm64, and powerpc. Before Linux 4.13, several other architectures
had their own implementations: mips, s390, sparc, sh, and x86.
This will be followed by a (hopefully) correct backport.
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: stable(a)vger.kernel.org
---
mm/gup.c | 48 ++++++++----------------------------------------
mm/huge_memory.c | 7 ++++---
2 files changed, 12 insertions(+), 43 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index 301dd96ef176..6bb7a8eb7f82 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -61,22 +61,13 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
}
/*
- * FOLL_FORCE or a forced COW break can write even to unwritable pte's,
- * but only after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
- return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
-}
-
-/*
- * A (separate) COW fault might break the page the other way and
- * get_user_pages() would return the page from what is now the wrong
- * VM. So we need to force a COW break at GUP time even for reads.
- */
-static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
-{
- return is_cow_mapping(vma->vm_flags) && (flags & FOLL_GET);
+ return pte_write(pte) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -586,18 +577,12 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (!vma || check_vma_flags(vma, gup_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
- if (should_force_cow_break(vma, foll_flags))
- foll_flags |= FOLL_WRITE;
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
- foll_flags);
+ gup_flags);
continue;
}
}
-
- if (should_force_cow_break(vma, foll_flags))
- foll_flags |= FOLL_WRITE;
-
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
@@ -1518,10 +1503,6 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
- *
- * Careful, careful! COW breaking can go either way, so a non-write
- * access can get ambiguous page results. If you call this function without
- * 'write' set, you'd better be sure that you're ok with that ambiguity.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
@@ -1551,12 +1532,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
*
* We do not adopt an rcu_read_lock(.) here as we also want to
* block IPIs that come from THPs splitting.
- *
- * NOTE! We allow read-only gup_fast() here, but you'd better be
- * careful about possible COW pages. You'll get _a_ COW page, but
- * not necessarily the one you intended to get depending on what
- * COW event happens after this. COW may break the page copy in a
- * random direction.
*/
local_irq_save(flags);
@@ -1567,22 +1542,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
- /*
- * The FAST_GUP case requires FOLL_WRITE even for pure reads,
- * because get_user_pages() may need to cause an early COW in
- * order to avoid confusing the normal COW routines. So only
- * targets that are already writable are safe to do by just
- * looking at the page tables.
- */
if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, 1,
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
pages, &nr))
break;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, 1, pages, &nr))
+ PGDIR_SHIFT, next, write, pages, &nr))
break;
- } else if (!gup_pud_range(pgd, addr, next, 1, pages, &nr))
+ } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3f3a86cc62b6..91f33bb43f17 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1135,12 +1135,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
}
/*
- * FOLL_FORCE or a forced COW break can write even to unwritable pmd's,
- * but only after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE can write to even unwritable pmd's, but only
+ * after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
- return pmd_write(pmd) || ((flags & FOLL_COW) && pmd_dirty(pmd));
+ return pmd_write(pmd) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
This is a backport of the recently merged "[PATCH v3 0/4] KVM: x86:
Partially allow KVM_SET_CPUID{,2} after KVM_RUN for CPU hotplug"
(https://lore.kernel.org/kvm/20220118141801.2219924-1-vkuznets@redhat.com/)
Original description:
Recently, KVM made it illegal to change CPUID after KVM_RUN but
unfortunately this change is not fully compatible with existing VMMs.
In particular, QEMU reuses vCPU fds for CPU hotplug after unplug and it
calls KVM_SET_CPUID2. Relax the requirement by implementing an allowing
KVM_SET_CPUID{,2} with the exact same data.
Vitaly Kuznetsov (4):
KVM: x86: Do runtime CPUID update before updating
vcpu->arch.cpuid_entries
KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN
KVM: selftests: Rename 'get_cpuid_test' to 'cpuid_test'
KVM: selftests: Test KVM_SET_CPUID2 after KVM_RUN
arch/x86/kvm/cpuid.c | 90 ++++++++++++++-----
arch/x86/kvm/x86.c | 19 ----
tools/testing/selftests/kvm/.gitignore | 2 +-
tools/testing/selftests/kvm/Makefile | 4 +-
.../selftests/kvm/include/x86_64/processor.h | 7 ++
.../selftests/kvm/lib/x86_64/processor.c | 33 ++++++-
.../x86_64/{get_cpuid_test.c => cpuid_test.c} | 30 +++++++
7 files changed, 139 insertions(+), 46 deletions(-)
rename tools/testing/selftests/kvm/x86_64/{get_cpuid_test.c => cpuid_test.c} (83%)
--
2.34.1
Hi stable maintainers,
On 06.04.21 03:47, Yoshio Furuyama wrote:
> From: "Doyle, Patrick" <pdoyle(a)irobot.com>
>
> In the unlikely event that both blocks 10 and 11 are marked as bad (on a
> 32 bit machine), then the process of marking block 10 as bad stomps on
> cached entry for block 11. There are (of course) other examples.
>
> Signed-off-by: Patrick Doyle <pdoyle(a)irobot.com>
> Reviewed-by: Richard Weinberger <richard(a)nod.at>
We have systems on which this patch fixes real failures. Could you
please add the upstream patch fd0d8d85f723 ("mtd: nand: bbt: Fix corner
case in bad block table handling") to the stable queues for 4.19, 5.4, 5.10?
Thanks!
Cc: stable(a)vger.kernel.org
Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with
NAND devices")
> ---
> drivers/mtd/nand/bbt.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c
> index 044adf913854..64af6898131d 100644
> --- a/drivers/mtd/nand/bbt.c
> +++ b/drivers/mtd/nand/bbt.c
> @@ -123,7 +123,7 @@ int nanddev_bbt_set_block_status(struct nand_device *nand, unsigned int entry,
> unsigned int rbits = bits_per_block + offs - BITS_PER_LONG;
>
> pos[1] &= ~GENMASK(rbits - 1, 0);
> - pos[1] |= val >> rbits;
> + pos[1] |= val >> (bits_per_block - rbits);
> }
>
> return 0;
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 802d4d207e75d7208ff75adb712b556c1e91cf1c Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc(a)marvell.com>
Date: Fri, 17 Dec 2021 08:55:52 -0800
Subject: [PATCH] bnx2x: Invalidate fastpath HSI version for VFs
Commit 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
added validation for fastpath HSI versions for different
client init which was not meant for SR-IOV VF clients, which
resulted in firmware asserts when running VF clients with
different fastpath HSI version.
This patch along with the new firmware support in patch #1
fixes this behavior in order to not validate fastpath HSI
version for the VFs.
Fixes: 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
Signed-off-by: Manish Chopra <manishc(a)marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha(a)marvell.com>
Signed-off-by: Alok Prasad <palok(a)marvell.com>
Signed-off-by: Ariel Elior <aelior(a)marvell.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 74a8931ce1d1..11d15cd03600 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
{
+ u16 abs_fid;
+
+ abs_fid = FW_VF_HANDLE(abs_vfid);
+
/* set the VF-PF association in the FW */
- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
+ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
+ storm_memset_func_en(bp, abs_fid, 1);
+
+ /* Invalidate fp_hsi version for vfs */
+ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
+ REG_WR8(bp, BAR_XSTRORM_INTMEM +
+ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
/* clear vf errors*/
bnx2x_vf_semi_clear_err(bp, abs_vfid);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 802d4d207e75d7208ff75adb712b556c1e91cf1c Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc(a)marvell.com>
Date: Fri, 17 Dec 2021 08:55:52 -0800
Subject: [PATCH] bnx2x: Invalidate fastpath HSI version for VFs
Commit 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
added validation for fastpath HSI versions for different
client init which was not meant for SR-IOV VF clients, which
resulted in firmware asserts when running VF clients with
different fastpath HSI version.
This patch along with the new firmware support in patch #1
fixes this behavior in order to not validate fastpath HSI
version for the VFs.
Fixes: 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
Signed-off-by: Manish Chopra <manishc(a)marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha(a)marvell.com>
Signed-off-by: Alok Prasad <palok(a)marvell.com>
Signed-off-by: Ariel Elior <aelior(a)marvell.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 74a8931ce1d1..11d15cd03600 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
{
+ u16 abs_fid;
+
+ abs_fid = FW_VF_HANDLE(abs_vfid);
+
/* set the VF-PF association in the FW */
- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
+ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
+ storm_memset_func_en(bp, abs_fid, 1);
+
+ /* Invalidate fp_hsi version for vfs */
+ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
+ REG_WR8(bp, BAR_XSTRORM_INTMEM +
+ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
/* clear vf errors*/
bnx2x_vf_semi_clear_err(bp, abs_vfid);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 87c01d57fa23de82fff593a7d070933d08755801 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple(a)nvidia.com>
Date: Fri, 14 Jan 2022 14:09:31 -0800
Subject: [PATCH] mm/hmm.c: allow VM_MIXEDMAP to work with hmm_range_fault
hmm_range_fault() can be used instead of get_user_pages() for devices
which allow faulting however unlike get_user_pages() it will return an
error when used on a VM_MIXEDMAP range.
To make hmm_range_fault() more closely match get_user_pages() remove
this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
case in hmm_vma_handle_pte(). Rather than replicating the logic of
vm_normal_page() call it directly and do a check for the zero pfn
similar to what get_user_pages() currently does.
Also add a test to hmm selftest to verify functionality.
Link: https://lkml.kernel.org/r/20211104012001.2555676-1-apopple@nvidia.com
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Alistair Popple <apopple(a)nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: Felix Kuehling <Felix.Kuehling(a)amd.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e2ce8f9b7605..767538089a62 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
return 0;
}
+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
diff --git a/mm/hmm.c b/mm/hmm.c
index 842e26599238..bd56641c79d4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !pte_devmap(pte) &&
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !pte_devmap(pte) &&
!is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
@@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1248,6 +1248,48 @@ TEST_F(hmm, anon_teardown)
}
}
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 87c01d57fa23de82fff593a7d070933d08755801 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple(a)nvidia.com>
Date: Fri, 14 Jan 2022 14:09:31 -0800
Subject: [PATCH] mm/hmm.c: allow VM_MIXEDMAP to work with hmm_range_fault
hmm_range_fault() can be used instead of get_user_pages() for devices
which allow faulting however unlike get_user_pages() it will return an
error when used on a VM_MIXEDMAP range.
To make hmm_range_fault() more closely match get_user_pages() remove
this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
case in hmm_vma_handle_pte(). Rather than replicating the logic of
vm_normal_page() call it directly and do a check for the zero pfn
similar to what get_user_pages() currently does.
Also add a test to hmm selftest to verify functionality.
Link: https://lkml.kernel.org/r/20211104012001.2555676-1-apopple@nvidia.com
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Alistair Popple <apopple(a)nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: Felix Kuehling <Felix.Kuehling(a)amd.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e2ce8f9b7605..767538089a62 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
return 0;
}
+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
diff --git a/mm/hmm.c b/mm/hmm.c
index 842e26599238..bd56641c79d4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !pte_devmap(pte) &&
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !pte_devmap(pte) &&
!is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
@@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1248,6 +1248,48 @@ TEST_F(hmm, anon_teardown)
}
}
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 87c01d57fa23de82fff593a7d070933d08755801 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple(a)nvidia.com>
Date: Fri, 14 Jan 2022 14:09:31 -0800
Subject: [PATCH] mm/hmm.c: allow VM_MIXEDMAP to work with hmm_range_fault
hmm_range_fault() can be used instead of get_user_pages() for devices
which allow faulting however unlike get_user_pages() it will return an
error when used on a VM_MIXEDMAP range.
To make hmm_range_fault() more closely match get_user_pages() remove
this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
case in hmm_vma_handle_pte(). Rather than replicating the logic of
vm_normal_page() call it directly and do a check for the zero pfn
similar to what get_user_pages() currently does.
Also add a test to hmm selftest to verify functionality.
Link: https://lkml.kernel.org/r/20211104012001.2555676-1-apopple@nvidia.com
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Alistair Popple <apopple(a)nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: Felix Kuehling <Felix.Kuehling(a)amd.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e2ce8f9b7605..767538089a62 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
return 0;
}
+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
diff --git a/mm/hmm.c b/mm/hmm.c
index 842e26599238..bd56641c79d4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !pte_devmap(pte) &&
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !pte_devmap(pte) &&
!is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
@@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1248,6 +1248,48 @@ TEST_F(hmm, anon_teardown)
}
}
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 Mon Sep 17 00:00:00 2001
From: Laibin Qiu <qiulaibin(a)huawei.com>
Date: Thu, 13 Jan 2022 10:55:36 +0800
Subject: [PATCH] blk-mq: fix tag_get wait task can't be awakened
In case of shared tags, there might be more than one hctx which
allocates from the same tags, and each hctx is limited to allocate at
most:
hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);
tag idle detection is lazy, and may be delayed for 30sec, so there
could be just one real active hctx(queue) but all others are actually
idle and still accounted as active because of the lazy idle detection.
Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
forever on this real active hctx.
Fix this by recalculating wake_batch when inc or dec active_queues.
Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
Suggested-by: Ming Lei <ming.lei(a)redhat.com>
Suggested-by: John Garry <john.garry(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20220113025536.1479653-1-qiulaibin@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -16,6 +16,21 @@
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
+/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+ unsigned int users)
+{
+ if (!users)
+ return;
+
+ sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+ users);
+ sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+ users);
+}
+
/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
@@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+ return true;
+ }
} else {
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+ return true;
+ }
}
+ users = atomic_inc_return(&hctx->tags->active_queues);
+
+ blk_mq_update_wake_batch(hctx->tags, users);
+
return true;
}
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
+ unsigned int users;
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- atomic_dec(&tags->active_queues);
+ users = atomic_dec_return(&tags->active_queues);
+
+ blk_mq_update_wake_batch(tags, users);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
sbitmap_free(&sbq->sb);
}
+/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users);
+
/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2709ab825499..6220fa67fb7e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
- unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int wake_batch)
{
- unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
+{
+ unsigned int wake_batch;
+
+ wake_batch = sbq_calc_wake_batch(sbq, depth);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users)
+{
+ unsigned int wake_batch;
+
+ wake_batch = clamp_val((sbq->sb.depth + users - 1) /
+ users, 4, SBQ_WAKE_BATCH);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 Mon Sep 17 00:00:00 2001
From: Laibin Qiu <qiulaibin(a)huawei.com>
Date: Thu, 13 Jan 2022 10:55:36 +0800
Subject: [PATCH] blk-mq: fix tag_get wait task can't be awakened
In case of shared tags, there might be more than one hctx which
allocates from the same tags, and each hctx is limited to allocate at
most:
hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);
tag idle detection is lazy, and may be delayed for 30sec, so there
could be just one real active hctx(queue) but all others are actually
idle and still accounted as active because of the lazy idle detection.
Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
forever on this real active hctx.
Fix this by recalculating wake_batch when inc or dec active_queues.
Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
Suggested-by: Ming Lei <ming.lei(a)redhat.com>
Suggested-by: John Garry <john.garry(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20220113025536.1479653-1-qiulaibin@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -16,6 +16,21 @@
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
+/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+ unsigned int users)
+{
+ if (!users)
+ return;
+
+ sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+ users);
+ sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+ users);
+}
+
/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
@@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+ return true;
+ }
} else {
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+ return true;
+ }
}
+ users = atomic_inc_return(&hctx->tags->active_queues);
+
+ blk_mq_update_wake_batch(hctx->tags, users);
+
return true;
}
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
+ unsigned int users;
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- atomic_dec(&tags->active_queues);
+ users = atomic_dec_return(&tags->active_queues);
+
+ blk_mq_update_wake_batch(tags, users);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
sbitmap_free(&sbq->sb);
}
+/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users);
+
/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2709ab825499..6220fa67fb7e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
- unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int wake_batch)
{
- unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
+{
+ unsigned int wake_batch;
+
+ wake_batch = sbq_calc_wake_batch(sbq, depth);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users)
+{
+ unsigned int wake_batch;
+
+ wake_batch = clamp_val((sbq->sb.depth + users - 1) /
+ users, 4, SBQ_WAKE_BATCH);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 Mon Sep 17 00:00:00 2001
From: Laibin Qiu <qiulaibin(a)huawei.com>
Date: Thu, 13 Jan 2022 10:55:36 +0800
Subject: [PATCH] blk-mq: fix tag_get wait task can't be awakened
In case of shared tags, there might be more than one hctx which
allocates from the same tags, and each hctx is limited to allocate at
most:
hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);
tag idle detection is lazy, and may be delayed for 30sec, so there
could be just one real active hctx(queue) but all others are actually
idle and still accounted as active because of the lazy idle detection.
Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
forever on this real active hctx.
Fix this by recalculating wake_batch when inc or dec active_queues.
Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
Suggested-by: Ming Lei <ming.lei(a)redhat.com>
Suggested-by: John Garry <john.garry(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20220113025536.1479653-1-qiulaibin@huawei.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -16,6 +16,21 @@
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
+/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+ unsigned int users)
+{
+ if (!users)
+ return;
+
+ sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+ users);
+ sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+ users);
+}
+
/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
@@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+ return true;
+ }
} else {
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+ return true;
+ }
}
+ users = atomic_inc_return(&hctx->tags->active_queues);
+
+ blk_mq_update_wake_batch(hctx->tags, users);
+
return true;
}
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
+ unsigned int users;
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- atomic_dec(&tags->active_queues);
+ users = atomic_dec_return(&tags->active_queues);
+
+ blk_mq_update_wake_batch(tags, users);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
sbitmap_free(&sbq->sb);
}
+/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users);
+
/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2709ab825499..6220fa67fb7e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
- unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int wake_batch)
{
- unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
+{
+ unsigned int wake_batch;
+
+ wake_batch = sbq_calc_wake_batch(sbq, depth);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users)
+{
+ unsigned int wake_batch;
+
+ wake_batch = clamp_val((sbq->sb.depth + users - 1) /
+ users, 4, SBQ_WAKE_BATCH);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);