From: Aaro Koskinen aaro.koskinen@iki.fi
[ Upstream commit 8443e4843e1c2594bf5664e1d993a1be71d1befb ]
Commit a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") started using DT specified timings for GPMC, and as a result the OneNAND stopped working on N950/N9 as we had wrong values in the DT. Fix by updating the values to bootloader timings that have been tested to be working on both Nokia N950 and N9.
Fixes: a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") Signed-off-by: Aaro Koskinen aaro.koskinen@iki.fi Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 42 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 0d9b85317529b..e142e6c70a59f 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -370,6 +370,19 @@ compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */
+ /* + * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported + * bootloader set values when booted with v4.19 using both N950 + * and N9 devices (OneNAND Manufacturer: Samsung): + * + * gpmc cs0 before gpmc_cs_program_settings: + * cs0 GPMC_CS_CONFIG1: 0xfd001202 + * cs0 GPMC_CS_CONFIG2: 0x00181800 + * cs0 GPMC_CS_CONFIG3: 0x00030300 + * cs0 GPMC_CS_CONFIG4: 0x18001804 + * cs0 GPMC_CS_CONFIG5: 0x03171d1d + * cs0 GPMC_CS_CONFIG6: 0x97080000 + */ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; @@ -379,26 +392,27 @@ gpmc,device-width = <2>; gpmc,mux-add-data = <2>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <122>; + gpmc,cs-wr-off-ns = <122>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <15>; + gpmc,adv-wr-off-ns = <15>; + gpmc,oe-on-ns = <20>; + gpmc,oe-off-ns = <122>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; + gpmc,we-off-ns = <122>; + gpmc,rd-cycle-ns = <148>; + gpmc,wr-cycle-ns = <148>; + gpmc,access-ns = <117>; gpmc,page-burst-access-ns = <15>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; - gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; - gpmc,sync-clk-ps = <15000>; + gpmc,clk-activation-ns = <10>; + gpmc,wr-data-mux-bus-ns = <40>; + gpmc,wr-access-ns = <117>; + + gpmc,sync-clk-ps = <15000>; /* TBC; Where this value came? */
/* * MTD partition table corresponding to Nokia's MeeGo 1.2
From: Tony Lindgren tony@atomide.com
[ Upstream commit ef4a55b9197a8f844ea0663138e902dcce3e2f36 ]
We're now getting the following error:
genirq: Setting trigger mode 1 for irq 230 failed (regmap_irq_set_type+0x0/0x15c) cpcap-usb-phy cpcap-usb-phy.0: could not get irq dp: -524
Cc: Sebastian Reichel sre@kernel.org Reported-by: Pavel Machek pavel@ucw.cz Tested-by: Pavel Machek pavel@ucw.cz Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index ddc7a7bb33c0d..f57acf8f66b95 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -105,7 +105,7 @@ interrupts-extended = < &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 - &cpcap 48 1 + &cpcap 48 0 >; interrupt-names = "id_ground", "id_float", "se0conn", "vbusvld",
From: Jernej Skrabec jernej.skrabec@siol.net
[ Upstream commit cc4bddade114b696ab27c1a77cfc7040151306da ]
Because "ethernet0" alias is missing, U-Boot doesn't generate board specific MAC address. Effect of this is random MAC address every boot and thus new IP address is assigned to the board.
Fix this by adding alias.
Fixes: 7389172fc3ed ("ARM: dts: sun8i: h3: Enable dwmac-sun8i on the Beelink X2") Signed-off-by: Jernej Skrabec jernej.skrabec@siol.net [Maxime: Removed unneeded comment] Signed-off-by: Maxime Ripard maxime.ripard@bootlin.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/sun8i-h3-beelink-x2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts index 5d23667dc2d2e..25540b7694d59 100644 --- a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts +++ b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts @@ -53,7 +53,7 @@
aliases { serial0 = &uart0; - /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ + ethernet0 = &emac; ethernet1 = &sdiowifi; };
From: Carlo Caione ccaione@baylibre.com
[ Upstream commit e35e26b26e955c53e61c154ba26b9bb15da6b858 ]
A long running stress test on a custom board shipping an AXG SoCs and a Realtek RTL8211F PHY revealed that after a few hours the connection speed would drop drastically, from ~1000Mbps to ~3Mbps. At the same time the 'macirq' (eth0) IRQ would stop being triggered at all and as consequence the GMAC IRQs never ACKed.
After a painful investigation the problem seemed to be due to a wrong defined IRQ type for the GMAC IRQ that should be LEVEL_HIGH instead of EDGE_RISING.
The change in the macirq IRQ type also solved another long standing issue affecting this SoC/PHY where EEE was causing the network connection to die after stressing it with iperf3 (even though much sooner). It's now possible to remove the 'eee-broken-1000t' quirk as well.
Fixes: 9c15795a4f96 ("ARM: dts: meson8b-odroidc1: ethernet support") Signed-off-by: Carlo Caione ccaione@baylibre.com Reviewed-by: Martin Blumenstingl martin.blumenstingl@googlemail.com Tested-by: Martin Blumenstingl martin.blumenstingl@googlemail.com Signed-off-by: Kevin Hilman khilman@baylibre.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/meson.dtsi | 2 +- arch/arm/boot/dts/meson8b-odroidc1.dts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 0d9faf1a51eac..a86b890863347 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -263,7 +263,7 @@ compatible = "amlogic,meson6-dwmac", "snps,dwmac"; reg = <0xc9410000 0x10000 0xc1108108 0x4>; - interrupts = <GIC_SPI 8 IRQ_TYPE_EDGE_RISING>; + interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index ef3177d3da3dc..9e48936e2d8d7 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -125,7 +125,6 @@ /* Realtek RTL8211F (0x001cc916) */ eth_phy: ethernet-phy@0 { reg = <0>; - eee-broken-1000t; interrupt-parent = <&gpio_intc>; /* GPIOH_3 */ interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
From: Martin Blumenstingl martin.blumenstingl@googlemail.com
[ Upstream commit 3fb348e030319f20ebbde082a449d4bf8a96f2fd ]
After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore.
The CD GPIO is "active low" on Odroid-C1. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted".
Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted.
Fixes: e03efbce6bebf5 ("ARM: dts: meson8b-odroidc1: add microSD support") Signed-off-by: Martin Blumenstingl martin.blumenstingl@googlemail.com Reviewed-by: Linus Walleij linus.walleij@linaro.org Tested-by: Anand Moon linux.amoon@gmail.com Signed-off-by: Kevin Hilman khilman@baylibre.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/meson8b-odroidc1.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index 9e48936e2d8d7..8fdeeffecbdbc 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -171,8 +171,7 @@ cap-sd-highspeed; disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&tflash_vdd>; vqmmc-supply = <&tf_io>;
From: Martin Blumenstingl martin.blumenstingl@googlemail.com
[ Upstream commit 8615f5596335db0978cea593dcd0070dc5f8b116 ]
After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore.
The CD GPIO is "active low" on the MXIII-Plus. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted".
Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted.
Fixes: 35ee52bea66c74 ("ARM: dts: meson8m2: add support for the Tronsmart MXIII Plus") Signed-off-by: Martin Blumenstingl martin.blumenstingl@googlemail.com Reviewed-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Kevin Hilman khilman@baylibre.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/meson8m2-mxiii-plus.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts index f5853610b20b8..6ac02beb5fa72 100644 --- a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts +++ b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts @@ -206,8 +206,7 @@ cap-sd-highspeed; disable-wp;
- cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vcc_3v3>; };
From: Anson Huang anson.huang@nxp.com
[ Upstream commit ba0f4560526ba19300c07ed5a3c1df7592815dc6 ]
i.MX6SX has same GPT type as i.MX6DL, in GPT driver, it uses below TIMER_OF_DECLARE, so the backward compatible should be "fsl,imx6dl-gpt", correct it.
TIMER_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt);
Signed-off-by: Anson Huang Anson.Huang@nxp.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6sx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 844caa39364ff..50083cecc6c97 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -462,7 +462,7 @@ };
gpt: gpt@2098000 { - compatible = "fsl,imx6sx-gpt", "fsl,imx31-gpt"; + compatible = "fsl,imx6sx-gpt", "fsl,imx6dl-gpt"; reg = <0x02098000 0x4000>; interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SX_CLK_GPT_BUS>,
From: Geert Uytterhoeven geert+renesas@glider.be
[ Upstream commit 97f26702bc95b5c3a72671d5c6675e4d6ee0a2f4 ]
SCIF2 on R-Car M3-W can be used with both DMAC1 and DMAC2.
Fixes: dbcae5ea4bd27409 ("arm64: dts: r8a7796: Enable SCIF DMA") Signed-off-by: Geert Uytterhoeven geert+renesas@glider.be Signed-off-by: Simon Horman horms+renesas@verge.net.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/renesas/r8a7796.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi index cbd35c00b4af6..33cb0281c39c8 100644 --- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi @@ -1161,6 +1161,9 @@ <&cpg CPG_CORE R8A7796_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7796_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled";
From: Geert Uytterhoeven geert+renesas@glider.be
[ Upstream commit 05c8478abd485507c25aa565afab604af8d8fe46 ]
SCIF2 on R-Car M3-N can be used with both DMAC1 and DMAC2.
Fixes: 0ea5b2fd38db56aa ("arm64: dts: renesas: r8a77965: Add SCIF device nodes") Signed-off-by: Geert Uytterhoeven geert+renesas@glider.be Signed-off-by: Simon Horman horms+renesas@verge.net.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/renesas/r8a77965.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 0cd44461a0bd2..f60f08ba1a6f9 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -951,6 +951,9 @@ <&cpg CPG_CORE R8A77965_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A77965_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled";
From: Madalin Bucur madalin.bucur@nxp.com
[ Upstream commit 89857a8a5c89a406b967ab2be7bd2ccdbe75e73d ]
By clearing all interrupt sources, not only those that already occurred, the existing code may acknowledge by mistake interrupts that occurred after the code checks for them.
Signed-off-by: Madalin Bucur madalin.bucur@nxp.com Signed-off-by: Roy Pledge roy.pledge@nxp.com Signed-off-by: Li Yang leoyang.li@nxp.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/soc/fsl/qbman/qman.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 8cc0151830433..a4ac6073c555d 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1081,18 +1081,19 @@ static void qm_mr_process_task(struct work_struct *work); static irqreturn_t portal_isr(int irq, void *ptr) { struct qman_portal *p = ptr; - - u32 clear = QM_DQAVAIL_MASK | p->irq_sources; u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources; + u32 clear = 0;
if (unlikely(!is)) return IRQ_NONE;
/* DQRR-handling if it's interrupt-driven */ - if (is & QM_PIRQ_DQRI) + if (is & QM_PIRQ_DQRI) { __poll_portal_fast(p, QMAN_POLL_LIMIT); + clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; + } /* Handling of anything else that's interrupt-driven */ - clear |= __poll_portal_slow(p, is); + clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW; qm_out(&p->p, QM_REG_ISR, clear); return IRQ_HANDLED; }
From: Jason Kridner jkridner@gmail.com
[ Upstream commit f165988b77ef849eb0c1aebd94fe778024f88314 ]
Fixes issue created by 9b3e4207661e67f04c72af15e29f74cd944f5964.
It wasn't possible for one_regmap_config to be non-NULL at the point it was tested for mcp23s18 devices.
Applied the same pattern of allocating one_regmap_config using devm_kmemdump() and then initializing the local regmap structure from that.
Signed-off-by: Jason Kridner jdk@ti.com Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pinctrl/pinctrl-mcp23s08.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index cf73a403d22df..cecbce21d01f7 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -832,8 +832,13 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, break;
case MCP_TYPE_S18: + one_regmap_config = + devm_kmemdup(dev, &mcp23x17_regmap, + sizeof(struct regmap_config), GFP_KERNEL); + if (!one_regmap_config) + return -ENOMEM; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, - &mcp23x17_regmap); + one_regmap_config); mcp->reg_shift = 1; mcp->chip.ngpio = 16; mcp->chip.label = "mcp23s18";
From: Ulf Hansson ulf.hansson@linaro.org
[ Upstream commit 13e62626c578d9889ebbda7c521be5adff9bef8e ]
During "wlan-up", we are programming the FW into the WiFi-chip. However, re-programming the FW doesn't work, unless a power cycle of the WiFi-chip is made in-between the programmings.
To conform to this requirement and to fix the regression in a simple way, let's start by allowing that the SDIO card (WiFi-chip) may stay powered on (runtime resumed) when wl12xx_sdio_power_off() returns. The intent with the current code is to treat this scenario as an error, but unfortunate this doesn't work as expected, so let's fix this.
The other part is to guarantee that a power cycle of the SDIO card has been completed when wl12xx_sdio_power_on() returns, as to allow the FW programming to succeed. However, relying solely on runtime PM to deal with this isn't sufficient. For example, userspace may prevent runtime suspend via sysfs for the device that represents the SDIO card, leading to that the mmc core also keeps it powered on. For this reason, let's instead do a brute force power cycle in wl12xx_sdio_power_on().
Fixes: 728a9dc61f13 ("wlcore: sdio: Fix flakey SDIO runtime PM handling") Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Tested-by: Tony Lindgren tony@atomide.com Tested-by: Anders Roxell anders.roxell@linaro.org Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/wireless/ti/wlcore/sdio.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 750bea3574ee4..627df164b7b6d 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -164,6 +164,12 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue) }
sdio_claim_host(func); + /* + * To guarantee that the SDIO card is power cycled, as required to make + * the FW programming to succeed, let's do a brute force HW reset. + */ + mmc_hw_reset(card->host); + sdio_enable_func(func); sdio_release_host(func);
@@ -174,20 +180,13 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue) { struct sdio_func *func = dev_to_sdio_func(glue->dev); struct mmc_card *card = func->card; - int error;
sdio_claim_host(func); sdio_disable_func(func); sdio_release_host(func);
/* Let runtime PM know the card is powered off */ - error = pm_runtime_put(&card->dev); - if (error < 0 && error != -EBUSY) { - dev_err(&card->dev, "%s failed: %i\n", __func__, error); - - return error; - } - + pm_runtime_put(&card->dev); return 0; }
From: Jiri Olsa jolsa@kernel.org
[ Upstream commit 752bcf80f5549c9901b2e8bc77b2138de55b1026 ]
Lance reported an issue with bpftool not being able to dump program if there are more programs loaded and you want to dump any but the first program, like:
# bpftool prog 28: kprobe name trace_req_start tag 1dfc28ba8b3dd597 gpl loaded_at 2019-01-18T17:02:40+1100 uid 0 xlated 112B jited 109B memlock 4096B map_ids 13 29: kprobe name trace_req_compl tag 5b6a5ecc6030a683 gpl loaded_at 2019-01-18T17:02:40+1100 uid 0 xlated 928B jited 575B memlock 4096B map_ids 13,14 # bpftool prog dum jited tag 1dfc28ba8b3dd597 0: push %rbp 1: mov %rsp,%rbp ...
# bpftool prog dum jited tag 5b6a5ecc6030a683 Error: can't get prog info (29): Bad address
The problem is in the prog_fd_by_tag function not cleaning the struct bpf_prog_info before another request, so the previous program length is still in there and kernel assumes it needs to dump the program, which fails because there's no user pointer set.
Moving the struct bpf_prog_info declaration into the loop, so it gets cleaned before each query.
Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Reported-by: Lance Digby ldigby@redhat.com Signed-off-by: Jiri Olsa jolsa@kernel.org Reviewed-by: Quentin Monnet quentin.monnet@netronome.com Acked-by: Jakub Kicinski jakub.kicinski@netronome.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/bpf/bpftool/prog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 0de024a6cc2be..bbba0d61570fe 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -109,13 +109,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd;
while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno));
From: Paolo Abeni pabeni@redhat.com
[ Upstream commit b0ca5ecb8e2279d706261f525f1bd0ba9e3fe800 ]
When updating a percpu map, bpftool currently copies the provided value only into the first per CPU copy of the specified value, all others instances are left zeroed.
This change explicitly copies the user-provided bytes to all the per CPU instances, keeping the sub-command syntax unchanged.
v2 -> v3: - drop unused argument, as per Quentin's suggestion v1 -> v2: - rename the helper as per Quentin's suggestion
Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Quentin Monnet quentin.monnet@netronome.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/bpf/bpftool/map.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b455930a3eaf7..ec73d83d0d310 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -370,6 +370,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, return argv + i; }
+/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); +} + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@ -449,6 +463,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info, argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); }
return parse_elem(argv, info, key, NULL, key_size, value_size,
From: Yafang Shao laoar.shao@gmail.com
[ Upstream commit c9e4576743eeda8d24dedc164d65b78877f9a98c ]
When sock recvbuff is set by bpf_setsockopt(), the value must by limited by rmem_max. It is the same with sendbuff.
Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Signed-off-by: Yafang Shao laoar.shao@gmail.com Acked-by: Martin KaFai Lau kafai@fb.com Acked-by: Lawrence Brakmo brakmo@fb.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/net/core/filter.c b/net/core/filter.c index fb0080e84bd43..bed9061102f43 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3909,10 +3909,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break;
From: Peng Hao peng.hao2@zte.com.cn
[ Upstream commit ba16adeb346387eb2d1ada69003588be96f098fa ]
devm_ allocated data will be automatically freed. The free of devm_ allocated data is invalid.
Fixes: 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") Signed-off-by: Peng Hao peng.hao2@zte.com.cn [title's prefix changed] Signed-off-by: Robert Jarzmik robert.jarzmik@free.fr Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/plat-pxa/ssp.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ed36dcab80f1e..f519199741837 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -190,8 +190,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV;
- iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res));
@@ -201,7 +199,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock);
- kfree(ssp); return 0; }
From: Srinivas Kandagatla srinivas.kandagatla@linaro.org
[ Upstream commit 2a81efb0de0e33f2d2c83154af0bd3ce389b3269 ]
Add compatible to gicv3 node to enable quirk required to restrict writing to GICR_WAKER register which is restricted on msm8996 SoC in Hypervisor.
With this quirk MSM8996 can at least boot out of mainline, which can help community to work with boards based on MSM8996.
Without this patch Qualcomm DB820c board reboots on mainline.
Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Signed-off-by: Andy Gross andy.gross@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index cd3865e7a270b..8c86c41a0d25f 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -399,7 +399,7 @@ };
intc: interrupt-controller@9bc0000 { - compatible = "arm,gic-v3"; + compatible = "qcom,msm8996-gic-v3", "arm,gic-v3"; #interrupt-cells = <3>; interrupt-controller; #redistributor-regions = <1>;
From: Felix Fietkau nbd@nbd.name
[ Upstream commit 7d652669b61d702c6e62a39579d17f6881670ab6 ]
With the addition of TXQ stats in the per-tid statistics the struct station_info grew significantly. This resulted in stack size warnings due to the structure itself being above the limit for the warnings.
To work around this, the TID array was allocated dynamically. Also a function to free this content was introduced with commit 7ea3e110f2f8 ("cfg80211: release station info tidstats where needed") but the necessary changes were not provided for batman-adv's B.A.T.M.A.N. V implementation.
Signed-off-by: Felix Fietkau nbd@nbd.name Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info") [sven@narfation.org: add commit message] Signed-off-by: Sven Eckelmann sven@narfation.org Signed-off-by: Simon Wunderlich sw@simonwunderlich.de Signed-off-by: Sasha Levin sashal@kernel.org --- net/batman-adv/bat_v_elp.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e8090f099eb80..ef0dec20c7d87 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+ /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be
From: Zhou Yanjie zhouyanjie@cduestc.edu.cn
[ Upstream commit 1ca1c87f91d9dc50d6a38e2177b2032996e7901c ]
According to the Schematic, the hardware of ci20 leads to uart3, but not to uart2. Uart2 is miswritten in the original code.
Signed-off-by: Zhou Yanjie zhouyanjie@cduestc.edu.cn Signed-off-by: Paul Burton paul.burton@mips.com Cc: linux-mips linux-mips@vger.kernel.org Cc: linux-kernel linux-kernel@vger.kernel.org Cc: devicetree@vger.kernel.org Cc: robh+dt@kernel.org Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: mark.rutland@arm.com Cc: malat@debian.org Cc: ezequiel@collabora.co.uk Cc: ulf.hansson@linaro.org Cc: syq syq@debian.org Cc: jiaxun.yang jiaxun.yang@flygoat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/boot/dts/ingenic/ci20.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 50cff3cbcc6de..4f7b1fa31cf53 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -76,7 +76,7 @@ status = "okay";
pinctrl-names = "default"; - pinctrl-0 = <&pins_uart2>; + pinctrl-0 = <&pins_uart3>; };
&uart4 { @@ -196,9 +196,9 @@ bias-disable; };
- pins_uart2: uart2 { - function = "uart2"; - groups = "uart2-data", "uart2-hwflow"; + pins_uart3: uart3 { + function = "uart3"; + groups = "uart3-data", "uart3-hwflow"; bias-disable; };
From: Anders Roxell anders.roxell@linaro.org
[ Upstream commit f2105d42597f4d10e431b195d69e96dccaf9b012 ]
Fix link errors when CONFIG_FSL_USB2_OTG is enabled and USB_OTG_FSM is set to module then the following link error occurs.
aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_ioctl': drivers/usb/phy/phy-fsl-usb.c:1083: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:1083:(.text+0x574): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_srp': drivers/usb/phy/phy-fsl-usb.c:674: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:674:(.text+0x61c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_host': drivers/usb/phy/phy-fsl-usb.c:593: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:593:(.text+0x7a4): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_hnp': drivers/usb/phy/phy-fsl-usb.c:695: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:695:(.text+0x858): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `a_wait_enum': drivers/usb/phy/phy-fsl-usb.c:274: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:274:(.text+0x16f0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o:drivers/usb/phy/phy-fsl-usb.c:619: more undefined references to `otg_statemachine' follow aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_peripheral': drivers/usb/phy/phy-fsl-usb.c:619:(.text+0x1fa0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' make[1]: *** [Makefile:1020: vmlinux] Error 1 make[1]: Target 'Image' not remade because of errors. make: *** [Makefile:152: sub-make] Error 2 make: Target 'Image' not remade because of errors.
Rework so that FSL_USB2_OTG depends on that the USB_OTG_FSM is builtin.
Signed-off-by: Anders Roxell anders.roxell@linaro.org Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index d7312eed60882..91ea3083e7ad9 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -21,7 +21,7 @@ config AB8500_USB
config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" - depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM + depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY help
From: Heyi Guo guoheyi@huawei.com
[ Upstream commit 6479450f72c1391c03f08affe0d0110f41ae7ca0 ]
1. In current implementation, every VLPI will temporarily be mapped to the first CPU in system (normally CPU0) and then moved to the real scheduled CPU later.
2. So there is a time window and a VLPI may be sent to CPU0 instead of the real scheduled vCPU, in a multi-CPU virtual machine.
3. However, CPU0 may have not been scheduled as a virtual CPU after system boots up, so the value of its GICR_VPROPBASER is unknown at that moment.
4. If the INTID of VLPI is larger than 2^(GICR_VPROPBASER.IDbits+1), while IDbits is also in unknown state, GIC will behave as if the VLPI is out of range and simply drop it, which results in interrupt missing in Guest.
As no code will clear GICR_VPROPBASER at runtime, we can safely initialize the IDbits field at boot time for each CPU to get rid of this issue.
We also clear Valid bit of GICR_VPENDBASER in case any ancient programming gets left in and causes memory corrupting. A new function its_clear_vpend_valid() is added to reuse the code in its_vpe_deschedule().
Fixes: e643d8034036 ("irqchip/gic-v3-its: Add VPE scheduling") Signed-off-by: Heyi Guo guoheyi@huawei.com Signed-off-by: Heyi Guo heyi.guo@linaro.org Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 66 ++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 17 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4c2246fe5dbe9..d9a8801083156 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1951,6 +1951,29 @@ static void its_free_pending_table(struct page *pt) get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K))); }
+static u64 its_clear_vpend_valid(void __iomem *vlpi_base) +{ + u32 count = 1000000; /* 1s! */ + bool clean; + u64 val; + + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val &= ~GICR_VPENDBASER_Valid; + gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + do { + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + clean = !(val & GICR_VPENDBASER_Dirty); + if (!clean) { + count--; + cpu_relax(); + udelay(1); + } + } while (!clean && count); + + return val; +} + static void its_cpu_init_lpis(void) { void __iomem *rbase = gic_data_rdist_rd_base(); @@ -2024,6 +2047,30 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR);
+ if (gic_rdists->has_vlpis) { + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + + /* + * It's possible for CPU to receive VLPIs before it is + * sheduled as a vPE, especially for the first CPU, and the + * VLPI with INTID larger than 2^(IDbits+1) will be considered + * as out of range and dropped by GIC. + * So we initialize IDbits to known value to avoid VLPI drop. + */ + val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n", + smp_processor_id(), val); + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + /* + * Also clear Valid bit of GICR_VPENDBASER, in case some + * ancient programming gets left in and has possibility of + * corrupting memory. + */ + val = its_clear_vpend_valid(vlpi_base); + WARN_ON(val & GICR_VPENDBASER_Dirty); + } + /* Make sure the GIC has seen the above */ dsb(sy); } @@ -2644,26 +2691,11 @@ static void its_vpe_schedule(struct its_vpe *vpe) static void its_vpe_deschedule(struct its_vpe *vpe) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); - u32 count = 1000000; /* 1s! */ - bool clean; u64 val;
- /* We're being scheduled out */ - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - do { - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - clean = !(val & GICR_VPENDBASER_Dirty); - if (!clean) { - count--; - cpu_relax(); - udelay(1); - } - } while (!clean && count); + val = its_clear_vpend_valid(vlpi_base);
- if (unlikely(!clean && !count)) { + if (unlikely(val & GICR_VPENDBASER_Dirty)) { pr_err_ratelimited("ITS virtual pending table not cleaning\n"); vpe->idai = false; vpe->pending_last = true;
From: Marc Zyngier marc.zyngier@arm.com
[ Upstream commit 45725e0fc3e7fe52fedb94f59806ec50e9618682 ]
In the unlikely event that we cannot find any available LPI in the system, we should gracefully return an error instead of carrying on with no LPI allocated at all.
Fixes: 38dd7c494cf6 ("irqchip/gic-v3-its: Drop chunk allocation compatibility") Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d9a8801083156..15579cba1a882 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1581,6 +1581,9 @@ static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) nr_irqs /= 2; } while (nr_irqs > 0);
+ if (!nr_irqs) + err = -ENOSPC; + if (err) goto out;
From: Lubomir Rintel lkundrak@v3.sk
[ Upstream commit 2380a22b60ce6f995eac806e69c66e397b59d045 ]
Resetting bit 4 disables the interrupt delivery to the "secure processor" core. This breaks the keyboard on a OLPC XO 1.75 laptop, where the firmware running on the "secure processor" bit-bangs the PS/2 protocol over the GPIO lines.
It is not clear what the rest of the bits are and Marvell was unhelpful when asked for documentation. Aside from the SP bit, there are probably priority bits.
Leaving the unknown bits as the firmware set them up seems to be a wiser course of action compared to just turning them off.
Signed-off-by: Lubomir Rintel lkundrak@v3.sk Acked-by: Pavel Machek pavel@ucw.cz [maz: fixed-up subject and commit message] Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/irqchip/irq-mmp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 25f32e1d77647..3496b61a312ae 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f
+#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static const struct mmp_intc_conf mmp_conf = { static const struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, };
static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs)
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit dc14eb12f6bb3e779c5461429c1889a339c67aab ]
Add missing power_average to visible check for power attributes for APUs. Was missed before.
Reviewed-by: Evan Quan evan.quan@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7b4e657a95c70..c3df75a9f65d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1443,7 +1443,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR;
if ((adev->flags & AMD_IS_APU) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + (attr == &sensor_dev_attr_power1_average.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0;
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit afeff4c16edaa6275b903f82b0561406259aa3a3 ]
Check if the device is root rather before attempting to see what speeds the pcie port supports. Fixes a crash with pci passthrough in a VM.
Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109366 Reviewed-by: Evan Quan evan.quan@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/radeon/ci_dpm.c | 5 +++-- drivers/gpu/drm/radeon/si_dpm.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index d587779a80b4d..a97294ac96d59 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5676,7 +5676,7 @@ int ci_dpm_init(struct radeon_device *rdev) u16 data_offset, size; u8 frev, crev; struct ci_power_info *pi; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret;
@@ -5685,7 +5685,8 @@ int ci_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi;
- speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { pi->sys_pcie_mask = 0; } else { diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8fb60b3af0158..0a785ef0ab660 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6899,7 +6899,7 @@ int si_dpm_init(struct radeon_device *rdev) struct ni_power_info *ni_pi; struct si_power_info *si_pi; struct atom_clock_dividers dividers; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret;
@@ -6911,7 +6911,8 @@ int si_dpm_init(struct radeon_device *rdev) eg_pi = &ni_pi->eg; pi = &eg_pi->rv7xx;
- speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { si_pi->sys_pcie_mask = 0; } else {
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
[ Upstream commit ff9fb72bc07705c00795ca48631f7fffe24d2c6b ]
When an error happens, debugfs should return an error pointer value, not NULL. This will prevent the totally theoretical error where a debugfs call fails due to lack of memory, returning NULL, and that dentry value is then passed to another debugfs call, which would end up succeeding, creating a file at the root of the debugfs tree, but would then be impossible to remove (because you can not remove the directory NULL).
So, to make everyone happy, always return errors, this makes the users of debugfs much simpler (they do not have to ever check the return value), and everyone can rest easy.
Reported-by: Gary R Hook ghook@amd.com Reported-by: Heiko Carstens heiko.carstens@de.ibm.com Reported-by: Masami Hiramatsu mhiramat@kernel.org Reported-by: Michal Hocko mhocko@kernel.org Reported-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Reported-by: Ulf Hansson ulf.hansson@linaro.org Reviewed-by: Masami Hiramatsu mhiramat@kernel.org Reviewed-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/debugfs/inode.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 41ef452c1fcfb..b16f8035b1af7 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -254,8 +254,8 @@ MODULE_ALIAS_FS("debugfs"); * @parent: a pointer to the parent dentry of the file. * * This function will return a pointer to a dentry if it succeeds. If the file - * doesn't exist or an error occurs, %NULL will be returned. The returned - * dentry must be passed to dput() when it is no longer needed. + * doesn't exist or an error occurs, %ERR_PTR(-ERROR) will be returned. The + * returned dentry must be passed to dput() when it is no longer needed. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -265,17 +265,17 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) struct dentry *dentry;
if (IS_ERR(parent)) - return NULL; + return parent;
if (!parent) parent = debugfs_mount->mnt_root;
dentry = lookup_one_len_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) - return NULL; + return dentry; if (!d_really_is_positive(dentry)) { dput(dentry); - return NULL; + return ERR_PTR(-EINVAL); } return dentry; } @@ -324,7 +324,7 @@ static struct dentry *failed_creating(struct dentry *dentry) inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); - return NULL; + return ERR_PTR(-ENOMEM); }
static struct dentry *end_creating(struct dentry *dentry) @@ -347,7 +347,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, dentry = start_creating(name, parent);
if (IS_ERR(dentry)) - return NULL; + return dentry;
inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -386,7 +386,8 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -495,7 +497,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -506,7 +509,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) struct inode *inode;
if (IS_ERR(dentry)) - return NULL; + return dentry;
inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -545,7 +548,7 @@ struct dentry *debugfs_create_automount(const char *name, struct inode *inode;
if (IS_ERR(dentry)) - return NULL; + return dentry;
inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -581,8 +584,8 @@ EXPORT_SYMBOL(debugfs_create_automount); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the symbolic * link is to be removed (no automatic cleanup happens if your module is - * unloaded, you are responsible here.) If an error occurs, %NULL will be - * returned. + * unloaded, you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) + * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -594,12 +597,12 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, struct inode *inode; char *link = kstrdup(target, GFP_KERNEL); if (!link) - return NULL; + return ERR_PTR(-ENOMEM);
dentry = start_creating(name, parent); if (IS_ERR(dentry)) { kfree(link); - return NULL; + return dentry; }
inode = debugfs_get_inode(dentry->d_sb); @@ -827,7 +830,9 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, if (dentry && !IS_ERR(dentry)) dput(dentry); unlock_rename(new_dir, old_dir); - return NULL; + if (IS_ERR(dentry)) + return dentry; + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(debugfs_rename);
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
[ Upstream commit 37ea7b630ae5cdea4e8ff381d9d23abfef5939e6 ]
Lots of callers of debugfs_lookup() were just checking NULL to see if the file/directory was found or not. By changing this in ff9fb72bc077 ("debugfs: return error values, not NULL") we caused some subsystems to easily crash.
Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Reported-by: syzbot+b382ba6a802a3d242790@syzkaller.appspotmail.com Reported-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Cc: Omar Sandoval osandov@fb.com Cc: Jens Axboe axboe@fb.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/debugfs/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b16f8035b1af7..29c68c5d44d5f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -254,8 +254,8 @@ MODULE_ALIAS_FS("debugfs"); * @parent: a pointer to the parent dentry of the file. * * This function will return a pointer to a dentry if it succeeds. If the file - * doesn't exist or an error occurs, %ERR_PTR(-ERROR) will be returned. The - * returned dentry must be passed to dput() when it is no longer needed. + * doesn't exist or an error occurs, %NULL will be returned. The returned + * dentry must be passed to dput() when it is no longer needed. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -265,17 +265,17 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) struct dentry *dentry;
if (IS_ERR(parent)) - return parent; + return NULL;
if (!parent) parent = debugfs_mount->mnt_root;
dentry = lookup_one_len_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) - return dentry; + return NULL; if (!d_really_is_positive(dentry)) { dput(dentry); - return ERR_PTR(-EINVAL); + return NULL; } return dentry; }
From: Chris Wilson chris@chris-wilson.co.uk
[ Upstream commit 6e11ea9de9576a644045ffdc2067c09bc2012eda ]
amdgpu only uses shared-fences internally, but dmabuf importers rely on implicit write hazard tracking via the reservation_object.fence_excl. For example, the importer use the write hazard for timing a page flip to only occur after the exporter has finished flushing its write into the surface. As such, on exporting a dmabuf, we must either flush all outstanding fences (for we do not know which are writes and should have been exclusive) or alternatively create a new exclusive fence that is the composite of all the existing shared fences, and so will only be signaled when all earlier fences are signaled (ensuring that we can not be signaled before the completion of any earlier write).
v2: reservation_object is already locked by amdgpu_bo_reserve() v3: Replace looping with get_fences_rcu and special case the promotion of a single shared fence directly to an exclusive fence, bypassing the fence array. v4: Drop the fence array ref after assigning to reservation_object
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107341 Testcase: igt/amd_prime/amd-to-i915 References: 8e94a46c1770 ("drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5)") Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Alex Deucher alexander.deucher@amd.com Cc: "Christian König" christian.koenig@amd.com Reviewed-by: "Christian König" christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 59 ++++++++++++++++++++--- 1 file changed, 51 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 1c5d97f4b4dde..8dcf6227ab990 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -37,6 +37,7 @@ #include "amdgpu_display.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> +#include <linux/dma-fence-array.h>
static const struct dma_buf_ops amdgpu_dmabuf_ops;
@@ -188,6 +189,48 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, return ERR_PTR(ret); }
+static int +__reservation_object_make_exclusive(struct reservation_object *obj) +{ + struct dma_fence **fences; + unsigned int count; + int r; + + if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + return 0; + + r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + if (r) + return r; + + if (count == 0) { + /* Now that was unexpected. */ + } else if (count == 1) { + reservation_object_add_excl_fence(obj, fences[0]); + dma_fence_put(fences[0]); + kfree(fences); + } else { + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, + false); + if (!array) + goto err_fences_put; + + reservation_object_add_excl_fence(obj, &array->base); + dma_fence_put(&array->base); + } + + return 0; + +err_fences_put: + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; +} + /** * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation * @dma_buf: shared DMA buffer @@ -219,16 +262,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
if (attach->dev->driver != adev->dev->driver) { /* - * Wait for all shared fences to complete before we switch to future - * use of exclusive fence on this prime shared bo. + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. */ - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, - true, false, - MAX_SCHEDULE_TIMEOUT); - if (unlikely(r < 0)) { - DRM_DEBUG_PRIME("Fence wait failed: %li\n", r); + r = __reservation_object_make_exclusive(bo->tbo.resv); + if (r) goto error_unreserve; - } }
/* pin buffer into GTT */
From: Jose Abreu jose.abreu@synopsys.com
[ Upstream commit 4ec5302fa906ec9d86597b236f62315bacdb9622 ]
If we don't have DT then stmmac_clk will not be available. Let's add a new Platform Data field so that we can specify the refclk by this mean.
This way we can still use the coalesce command in PCI based setups.
Signed-off-by: Jose Abreu joabreu@synopsys.com Cc: Joao Pinto jpinto@synopsys.com Cc: David S. Miller davem@davemloft.net Cc: Giuseppe Cavallaro peppe.cavallaro@st.com Cc: Alexandre Torgue alexandre.torgue@st.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 14 ++++++++++---- include/linux/stmmac.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 9caf79ba5ef16..4d5fb4b51cc4f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -719,8 +719,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk);
- if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + }
return (usec * (clk / 1000000)) / 256; } @@ -729,8 +732,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk);
- if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + }
return (riwt * 256) / (clk / 1000000); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b04..4335bd771ce57 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4;
From: Jose Abreu jose.abreu@synopsys.com
[ Upstream commit c5acdbee22a1b200dde07effd26fd1f649e9ab8a ]
The number of TSO enabled channels in HW can be different than the number of total channels. There is no way to determined, at runtime, the number of TSO capable channels and its safe to assume that if TSO is enabled then at least channel 0 will be TSO capable.
Lets always send TSO packets from Queue 0.
Signed-off-by: Jose Abreu joabreu@synopsys.com Cc: Joao Pinto jpinto@synopsys.com Cc: David S. Miller davem@davemloft.net Cc: Giuseppe Cavallaro peppe.cavallaro@st.com Cc: Alexandre Torgue alexandre.torgue@st.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 123b74e25ed81..57c56e2957058 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3030,8 +3030,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
/* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } }
if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
From: Jose Abreu jose.abreu@synopsys.com
[ Upstream commit e2cd682deb231ba6f80524bb84e57e7138261149 ]
In stmmac xmit callback we use a different flow for TSO packets but TSO xmit callback is not disabling the EEE mode.
Fix this by disabling earlier the EEE mode, i.e. before calling the TSO xmit callback.
Signed-off-by: Jose Abreu joabreu@synopsys.com Cc: Joao Pinto jpinto@synopsys.com Cc: David S. Miller davem@davemloft.net Cc: Giuseppe Cavallaro peppe.cavallaro@st.com Cc: Alexandre Torgue alexandre.torgue@st.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 57c56e2957058..43ab9e905bed9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3028,6 +3028,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q = &priv->tx_queue[queue];
+ if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { @@ -3055,9 +3058,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; }
- if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]);
From: Zenghui Yu yuzenghui@huawei.com
[ Upstream commit 56841070ccc87b463ac037d2d1f2beb8e5e35f0c ]
According to ARM IHI 0069C (ID070116), we should use GITS_TYPER's bits [7:4] as ITT_entry_size instead of [8:4]. Although this is pretty annoying, it only results in a potential over-allocation of memory, and nothing bad happens.
Fixes: 3dfa576bfb45 ("irqchip/gic-v3-its: Add probing for VLPI properties") Signed-off-by: Zenghui Yu yuzenghui@huawei.com [maz: massaged subject and commit message] Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 8bdbb5f29494b..3188c0bef3e79 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -319,7 +319,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
[ Upstream commit 2c1cf00eeacb784781cf1c9896b8af001246d339 ]
If create_buf_file() returns an error, don't try to reference it later as a valid dentry pointer.
This problem was exposed when debugfs started to return errors instead of just NULL for some calls when they do not succeed properly.
Also, the check for WARN_ON(dentry) was just wrong :)
Reported-by: Kees Cook keescook@chromium.org Reported-and-tested-by: syzbot+16c3a70e1e9b29346c43@syzkaller.appspotmail.com Reported-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Cc: Andrew Morton akpm@linux-foundation.org Cc: David Rientjes rientjes@google.com Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/relay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e065..9e0f52375487d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); + if (IS_ERR(dentry)) + dentry = NULL;
kfree(tmpname);
@@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); - if (WARN_ON(dentry)) + if (IS_ERR_OR_NULL(dentry)) goto free_buf; }
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
[ Upstream commit 36991ca68db9dd43bac7f3519f080ee3939263ef ]
If debugfs were to return a non-NULL error for a debugfs call, using that pointer later in debugfs_create_files() would crash.
Fix that by properly checking the pointer before referencing it.
Reported-by: Michal Hocko mhocko@kernel.org Reported-and-tested-by: syzbot+b382ba6a802a3d242790@syzkaller.appspotmail.com Reported-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- block/blk-mq-debugfs.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cb1e6cf7ac48f..9dc3a0896462f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -806,6 +806,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { static bool debugfs_create_files(struct dentry *parent, void *data, const struct blk_mq_debugfs_attr *attr) { + if (IS_ERR_OR_NULL(parent)) + return false; + d_inode(parent)->i_private = data;
for (; attr->name; attr++) {
From: Martynas Pumputis m@lambda.lt
[ Upstream commit 1bb54c4071f585ebef56ce8fdfe6026fa2cbcddd ]
Previously, bpf_num_possible_cpus() had a bug when calculating a number of possible CPUs in the case of sparse CPU allocations, as it was considering only the first range or element of /sys/devices/system/cpu/possible.
E.g. in the case of "0,2-3" (CPU 1 is not available), the function returned 1 instead of 3.
This patch fixes the function by making it parse all CPU ranges and elements.
Signed-off-by: Martynas Pumputis m@lambda.lt Acked-by: Yonghong Song yhs@fb.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/bpf/bpf_util.h | 30 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af3..84fd6f1bf33e7 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0;
fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); }
- while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); + } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; } - possible_cpus = start == 0 ? end + 1 : 0; - break; } + fclose(fp);
return possible_cpus;
From: Alexei Starovoitov ast@kernel.org
[ Upstream commit a89fac57b5d080771efd4d71feaae19877cf68f0 ]
Lockdep warns about false positive: [ 12.492084] 00000000e6b28347 (&head->lock){+...}, at: pcpu_freelist_push+0x2a/0x40 [ 12.492696] but this lock was taken by another, HARDIRQ-safe lock in the past: [ 12.493275] (&rq->lock){-.-.} [ 12.493276] [ 12.493276] [ 12.493276] and interrupts could create inverse lock ordering between them. [ 12.493276] [ 12.494435] [ 12.494435] other info that might help us debug this: [ 12.494979] Possible interrupt unsafe locking scenario: [ 12.494979] [ 12.495518] CPU0 CPU1 [ 12.495879] ---- ---- [ 12.496243] lock(&head->lock); [ 12.496502] local_irq_disable(); [ 12.496969] lock(&rq->lock); [ 12.497431] lock(&head->lock); [ 12.497890] <Interrupt> [ 12.498104] lock(&rq->lock); [ 12.498368] [ 12.498368] *** DEADLOCK *** [ 12.498368] [ 12.498837] 1 lock held by dd/276: [ 12.499110] #0: 00000000c58cb2ee (rcu_read_lock){....}, at: trace_call_bpf+0x5e/0x240 [ 12.499747] [ 12.499747] the shortest dependencies between 2nd lock and 1st lock: [ 12.500389] -> (&rq->lock){-.-.} { [ 12.500669] IN-HARDIRQ-W at: [ 12.500934] _raw_spin_lock+0x2f/0x40 [ 12.501373] scheduler_tick+0x4c/0xf0 [ 12.501812] update_process_times+0x40/0x50 [ 12.502294] tick_periodic+0x27/0xb0 [ 12.502723] tick_handle_periodic+0x1f/0x60 [ 12.503203] timer_interrupt+0x11/0x20 [ 12.503651] __handle_irq_event_percpu+0x43/0x2c0 [ 12.504167] handle_irq_event_percpu+0x20/0x50 [ 12.504674] handle_irq_event+0x37/0x60 [ 12.505139] handle_level_irq+0xa7/0x120 [ 12.505601] handle_irq+0xa1/0x150 [ 12.506018] do_IRQ+0x77/0x140 [ 12.506411] ret_from_intr+0x0/0x1d [ 12.506834] _raw_spin_unlock_irqrestore+0x53/0x60 [ 12.507362] __setup_irq+0x481/0x730 [ 12.507789] setup_irq+0x49/0x80 [ 12.508195] hpet_time_init+0x21/0x32 [ 12.508644] x86_late_time_init+0xb/0x16 [ 12.509106] start_kernel+0x390/0x42a [ 12.509554] secondary_startup_64+0xa4/0xb0 [ 12.510034] IN-SOFTIRQ-W at: [ 12.510305] _raw_spin_lock+0x2f/0x40 [ 12.510772] try_to_wake_up+0x1c7/0x4e0 [ 12.511220] swake_up_locked+0x20/0x40 [ 12.511657] swake_up_one+0x1a/0x30 [ 12.512070] rcu_process_callbacks+0xc5/0x650 [ 12.512553] __do_softirq+0xe6/0x47b [ 12.512978] irq_exit+0xc3/0xd0 [ 12.513372] smp_apic_timer_interrupt+0xa9/0x250 [ 12.513876] apic_timer_interrupt+0xf/0x20 [ 12.514343] default_idle+0x1c/0x170 [ 12.514765] do_idle+0x199/0x240 [ 12.515159] cpu_startup_entry+0x19/0x20 [ 12.515614] start_kernel+0x422/0x42a [ 12.516045] secondary_startup_64+0xa4/0xb0 [ 12.516521] INITIAL USE at: [ 12.516774] _raw_spin_lock_irqsave+0x38/0x50 [ 12.517258] rq_attach_root+0x16/0xd0 [ 12.517685] sched_init+0x2f2/0x3eb [ 12.518096] start_kernel+0x1fb/0x42a [ 12.518525] secondary_startup_64+0xa4/0xb0 [ 12.518986] } [ 12.519132] ... key at: [<ffffffff82b7bc28>] __key.71384+0x0/0x8 [ 12.519649] ... acquired at: [ 12.519892] pcpu_freelist_pop+0x7b/0xd0 [ 12.520221] bpf_get_stackid+0x1d2/0x4d0 [ 12.520563] ___bpf_prog_run+0x8b4/0x11a0 [ 12.520887] [ 12.521008] -> (&head->lock){+...} { [ 12.521292] HARDIRQ-ON-W at: [ 12.521539] _raw_spin_lock+0x2f/0x40 [ 12.521950] pcpu_freelist_push+0x2a/0x40 [ 12.522396] bpf_get_stackid+0x494/0x4d0 [ 12.522828] ___bpf_prog_run+0x8b4/0x11a0 [ 12.523296] INITIAL USE at: [ 12.523537] _raw_spin_lock+0x2f/0x40 [ 12.523944] pcpu_freelist_populate+0xc0/0x120 [ 12.524417] htab_map_alloc+0x405/0x500 [ 12.524835] __do_sys_bpf+0x1a3/0x1a90 [ 12.525253] do_syscall_64+0x4a/0x180 [ 12.525659] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 12.526167] } [ 12.526311] ... key at: [<ffffffff838f7668>] __key.13130+0x0/0x8 [ 12.526812] ... acquired at: [ 12.527047] __lock_acquire+0x521/0x1350 [ 12.527371] lock_acquire+0x98/0x190 [ 12.527680] _raw_spin_lock+0x2f/0x40 [ 12.527994] pcpu_freelist_push+0x2a/0x40 [ 12.528325] bpf_get_stackid+0x494/0x4d0 [ 12.528645] ___bpf_prog_run+0x8b4/0x11a0 [ 12.528970] [ 12.529092] [ 12.529092] stack backtrace: [ 12.529444] CPU: 0 PID: 276 Comm: dd Not tainted 5.0.0-rc3-00018-g2fa53f892422 #475 [ 12.530043] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 12.530750] Call Trace: [ 12.530948] dump_stack+0x5f/0x8b [ 12.531248] check_usage_backwards+0x10c/0x120 [ 12.531598] ? ___bpf_prog_run+0x8b4/0x11a0 [ 12.531935] ? mark_lock+0x382/0x560 [ 12.532229] mark_lock+0x382/0x560 [ 12.532496] ? print_shortest_lock_dependencies+0x180/0x180 [ 12.532928] __lock_acquire+0x521/0x1350 [ 12.533271] ? find_get_entry+0x17f/0x2e0 [ 12.533586] ? find_get_entry+0x19c/0x2e0 [ 12.533902] ? lock_acquire+0x98/0x190 [ 12.534196] lock_acquire+0x98/0x190 [ 12.534482] ? pcpu_freelist_push+0x2a/0x40 [ 12.534810] _raw_spin_lock+0x2f/0x40 [ 12.535099] ? pcpu_freelist_push+0x2a/0x40 [ 12.535432] pcpu_freelist_push+0x2a/0x40 [ 12.535750] bpf_get_stackid+0x494/0x4d0 [ 12.536062] ___bpf_prog_run+0x8b4/0x11a0
It has been explained that is a false positive here: https://lkml.org/lkml/2018/7/25/756 Recap: - stackmap uses pcpu_freelist - The lock in pcpu_freelist is a percpu lock - stackmap is only used by tracing bpf_prog - A tracing bpf_prog cannot be run if another bpf_prog has already been running (ensured by the percpu bpf_prog_active counter).
Eric pointed out that this lockdep splats stops other legit lockdep splats in selftests/bpf/test_progs.c.
Fix this by calling local_irq_save/restore for stackmap.
Another false positive had also been worked around by calling local_irq_save in commit 89ad2fa3f043 ("bpf: fix lockdep splat"). That commit added unnecessary irq_save/restore to fast path of bpf hash map. irqs are already disabled at that point, since htab is holding per bucket spin_lock with irqsave.
Let's reduce overhead for htab by introducing __pcpu_freelist_push/pop function w/o irqsave and convert pcpu_freelist_push/pop to irqsave to be used elsewhere (right now only in stackmap). It stops lockdep false positive in stackmap with a bit of acceptable overhead.
Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Naresh Kamboju naresh.kamboju@linaro.org Reported-by: Eric Dumazet eric.dumazet@gmail.com Acked-by: Martin KaFai Lau kafai@fb.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/percpu_freelist.c | 41 +++++++++++++++++++++++++----------- kernel/bpf/percpu_freelist.h | 4 ++++ 3 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 03cc59ee9c953..cebadd6af4d9d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -677,7 +677,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) }
if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@ -739,7 +739,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { struct pcpu_freelist_node *l;
- l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73c..0c1b4ba9e90e7 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) free_percpu(s->freelist); }
-static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, - struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, + struct pcpu_freelist_node *node) { raw_spin_lock(&head->lock); node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, raw_spin_unlock(&head->lock); }
-void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s, struct pcpu_freelist_node *node) { struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
- __pcpu_freelist_push(head, node); + ___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, + struct pcpu_freelist_node *node) +{ + unsigned long flags; + + local_irq_save(flags); + __pcpu_freelist_push(s, node); + local_irq_restore(flags); }
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, for_each_possible_cpu(cpu) { again: head = per_cpu_ptr(s->freelist, cpu); - __pcpu_freelist_push(head, buf); + ___pcpu_freelist_push(head, buf); i++; buf += elem_size; if (i == nr_elems) @@ -74,14 +84,12 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, local_irq_restore(flags); }
-struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - unsigned long flags; int orig_cpu, cpu;
- local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) { - local_irq_restore(flags); + if (cpu == orig_cpu) return NULL; - } } } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ + struct pcpu_freelist_node *ret; + unsigned long flags; + + local_irq_save(flags); + ret = __pcpu_freelist_pop(s); + local_irq_restore(flags); + return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e2..c3960118e6178 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node { struct pcpu_freelist_node *next; };
+/* pcpu_freelist_* do spin_lock_irqsave. */ void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems); int pcpu_freelist_init(struct pcpu_freelist *);
From: Alexei Starovoitov ast@kernel.org
[ Upstream commit e16ec34039c701594d55d08a5aa49ee3e1abc821 ]
Lockdep found a potential deadlock between cpu_hotplug_lock, bpf_event_mutex, and cpuctx_mutex: [ 13.007000] WARNING: possible circular locking dependency detected [ 13.007587] 5.0.0-rc3-00018-g2fa53f892422-dirty #477 Not tainted [ 13.008124] ------------------------------------------------------ [ 13.008624] test_progs/246 is trying to acquire lock: [ 13.009030] 0000000094160d1d (tracepoints_mutex){+.+.}, at: tracepoint_probe_register_prio+0x2d/0x300 [ 13.009770] [ 13.009770] but task is already holding lock: [ 13.010239] 00000000d663ef86 (bpf_event_mutex){+.+.}, at: bpf_probe_register+0x1d/0x60 [ 13.010877] [ 13.010877] which lock already depends on the new lock. [ 13.010877] [ 13.011532] [ 13.011532] the existing dependency chain (in reverse order) is: [ 13.012129] [ 13.012129] -> #4 (bpf_event_mutex){+.+.}: [ 13.012582] perf_event_query_prog_array+0x9b/0x130 [ 13.013016] _perf_ioctl+0x3aa/0x830 [ 13.013354] perf_ioctl+0x2e/0x50 [ 13.013668] do_vfs_ioctl+0x8f/0x6a0 [ 13.014003] ksys_ioctl+0x70/0x80 [ 13.014320] __x64_sys_ioctl+0x16/0x20 [ 13.014668] do_syscall_64+0x4a/0x180 [ 13.015007] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.015469] [ 13.015469] -> #3 (&cpuctx_mutex){+.+.}: [ 13.015910] perf_event_init_cpu+0x5a/0x90 [ 13.016291] perf_event_init+0x1b2/0x1de [ 13.016654] start_kernel+0x2b8/0x42a [ 13.016995] secondary_startup_64+0xa4/0xb0 [ 13.017382] [ 13.017382] -> #2 (pmus_lock){+.+.}: [ 13.017794] perf_event_init_cpu+0x21/0x90 [ 13.018172] cpuhp_invoke_callback+0xb3/0x960 [ 13.018573] _cpu_up+0xa7/0x140 [ 13.018871] do_cpu_up+0xa4/0xc0 [ 13.019178] smp_init+0xcd/0xd2 [ 13.019483] kernel_init_freeable+0x123/0x24f [ 13.019878] kernel_init+0xa/0x110 [ 13.020201] ret_from_fork+0x24/0x30 [ 13.020541] [ 13.020541] -> #1 (cpu_hotplug_lock.rw_sem){++++}: [ 13.021051] static_key_slow_inc+0xe/0x20 [ 13.021424] tracepoint_probe_register_prio+0x28c/0x300 [ 13.021891] perf_trace_event_init+0x11f/0x250 [ 13.022297] perf_trace_init+0x6b/0xa0 [ 13.022644] perf_tp_event_init+0x25/0x40 [ 13.023011] perf_try_init_event+0x6b/0x90 [ 13.023386] perf_event_alloc+0x9a8/0xc40 [ 13.023754] __do_sys_perf_event_open+0x1dd/0xd30 [ 13.024173] do_syscall_64+0x4a/0x180 [ 13.024519] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.024968] [ 13.024968] -> #0 (tracepoints_mutex){+.+.}: [ 13.025434] __mutex_lock+0x86/0x970 [ 13.025764] tracepoint_probe_register_prio+0x2d/0x300 [ 13.026215] bpf_probe_register+0x40/0x60 [ 13.026584] bpf_raw_tracepoint_open.isra.34+0xa4/0x130 [ 13.027042] __do_sys_bpf+0x94f/0x1a90 [ 13.027389] do_syscall_64+0x4a/0x180 [ 13.027727] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.028171] [ 13.028171] other info that might help us debug this: [ 13.028171] [ 13.028807] Chain exists of: [ 13.028807] tracepoints_mutex --> &cpuctx_mutex --> bpf_event_mutex [ 13.028807] [ 13.029666] Possible unsafe locking scenario: [ 13.029666] [ 13.030140] CPU0 CPU1 [ 13.030510] ---- ---- [ 13.030875] lock(bpf_event_mutex); [ 13.031166] lock(&cpuctx_mutex); [ 13.031645] lock(bpf_event_mutex); [ 13.032135] lock(tracepoints_mutex); [ 13.032441] [ 13.032441] *** DEADLOCK *** [ 13.032441] [ 13.032911] 1 lock held by test_progs/246: [ 13.033239] #0: 00000000d663ef86 (bpf_event_mutex){+.+.}, at: bpf_probe_register+0x1d/0x60 [ 13.033909] [ 13.033909] stack backtrace: [ 13.034258] CPU: 1 PID: 246 Comm: test_progs Not tainted 5.0.0-rc3-00018-g2fa53f892422-dirty #477 [ 13.034964] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 13.035657] Call Trace: [ 13.035859] dump_stack+0x5f/0x8b [ 13.036130] print_circular_bug.isra.37+0x1ce/0x1db [ 13.036526] __lock_acquire+0x1158/0x1350 [ 13.036852] ? lock_acquire+0x98/0x190 [ 13.037154] lock_acquire+0x98/0x190 [ 13.037447] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.037876] __mutex_lock+0x86/0x970 [ 13.038167] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.038600] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.039028] ? __mutex_lock+0x86/0x970 [ 13.039337] ? __mutex_lock+0x24a/0x970 [ 13.039649] ? bpf_probe_register+0x1d/0x60 [ 13.039992] ? __bpf_trace_sched_wake_idle_without_ipi+0x10/0x10 [ 13.040478] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.040906] tracepoint_probe_register_prio+0x2d/0x300 [ 13.041325] bpf_probe_register+0x40/0x60 [ 13.041649] bpf_raw_tracepoint_open.isra.34+0xa4/0x130 [ 13.042068] ? __might_fault+0x3e/0x90 [ 13.042374] __do_sys_bpf+0x94f/0x1a90 [ 13.042678] do_syscall_64+0x4a/0x180 [ 13.042975] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.043382] RIP: 0033:0x7f23b10a07f9 [ 13.045155] RSP: 002b:00007ffdef42fdd8 EFLAGS: 00000202 ORIG_RAX: 0000000000000141 [ 13.045759] RAX: ffffffffffffffda RBX: 00007ffdef42ff70 RCX: 00007f23b10a07f9 [ 13.046326] RDX: 0000000000000070 RSI: 00007ffdef42fe10 RDI: 0000000000000011 [ 13.046893] RBP: 00007ffdef42fdf0 R08: 0000000000000038 R09: 00007ffdef42fe10 [ 13.047462] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 13.048029] R13: 0000000000000016 R14: 00007f23b1db4690 R15: 0000000000000000
Since tracepoints_mutex will be taken in tracepoint_probe_register/unregister() there is no need to take bpf_event_mutex too. bpf_event_mutex is protecting modifications to prog array used in kprobe/perf bpf progs. bpf_raw_tracepoints don't need to take this mutex.
Fixes: c4f6699dfcb8 ("bpf: introduce BPF_RAW_TRACEPOINT") Acked-by: Martin KaFai Lau kafai@fb.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/trace/bpf_trace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9864a35c8bb57..6c28d519447d1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1158,22 +1158,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = __bpf_probe_register(btp, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return __bpf_probe_register(btp, prog); }
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); }
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
From: Martin KaFai Lau kafai@fb.com
[ Upstream commit 7c4cd051add3d00bbff008a133c936c515eaa8fe ]
The map_lookup_elem used to not acquiring spinlock in order to optimize the reader.
It was true until commit 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") The syscall's map_lookup_elem(stackmap) calls bpf_stackmap_copy(). bpf_stackmap_copy() may find the elem no longer needed after the copy is done. If that is the case, pcpu_freelist_push() saves this elem for reuse later. This push requires a spinlock.
If a tracing bpf_prog got run in the middle of the syscall's map_lookup_elem(stackmap) and this tracing bpf_prog is calling bpf_get_stackid(stackmap) which also requires the same pcpu_freelist's spinlock, it may end up with a dead lock situation as reported by Eric Dumazet in https://patchwork.ozlabs.org/patch/1030266/
The situation is the same as the syscall's map_update_elem() which needs to acquire the pcpu_freelist's spinlock and could race with tracing bpf_prog. Hence, this patch fixes it by protecting bpf_stackmap_copy() with this_cpu_inc(bpf_prog_active) to prevent tracing bpf_prog from running.
A later syscall's map_lookup_elem commit f1a2e44a3aec ("bpf: add queue and stack maps") also acquires a spinlock and races with tracing bpf_prog similarly. Hence, this patch is forward looking and protects the majority of the map lookups. bpf_map_offload_lookup_elem() is the exception since it is for network bpf_prog only (i.e. never called by tracing bpf_prog).
Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Eric Dumazet eric.dumazet@gmail.com Acked-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Martin KaFai Lau kafai@fb.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/syscall.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 382c09dddf93b..cc40b8be1171d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -701,8 +701,13 @@ static int map_lookup_elem(union bpf_attr *attr)
if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@ -722,7 +727,10 @@ static int map_lookup_elem(union bpf_attr *attr) rcu_read_unlock(); err = ptr ? 0 : -ENOENT; } + this_cpu_dec(bpf_prog_active); + preempt_enable();
+done: if (err) goto free_value;
From: Paul Kocialkowski paul.kocialkowski@bootlin.com
[ Upstream commit b14e945bda8ae227d1bf2b1837c0c4a61721cd1a ]
When initializing clocks, a reference to the TCON channel 0 clock is obtained. However, the clock is never prepared and enabled later. Switching from simplefb to DRM actually disables the clock (that was usually configured by U-Boot) because of that.
On the V3s, this results in a hang when writing to some mixer registers when switching over to DRM from simplefb.
Fix this by preparing and enabling the clock when initializing other clocks. Waiting for sun4i_tcon_channel_enable to enable the clock is apparently too late and results in the same mixer register access hang.
Signed-off-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Signed-off-by: Maxime Ripard maxime.ripard@bootlin.com Link: https://patchwork.freedesktop.org/patch/msgid/20190131132550.26355-1-paul.ko... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 3fb084f802e29..8c31c9ab06f8b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -672,6 +672,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, return PTR_ERR(tcon->sclk0); } } + clk_prepare_enable(tcon->sclk0);
if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -686,6 +687,7 @@ static int sun4i_tcon_init_clocks(struct device *dev,
static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) { + clk_disable_unprepare(tcon->sclk0); clk_disable_unprepare(tcon->clk); }
From: Codrin Ciubotariu codrin.ciubotariu@microchip.com
[ Upstream commit dc3f595b6617ebc0307e0ce151e8f2f2b2489b95 ]
atchan->status variable is used to store two different information: - pass channel interrupts status from interrupt handler to tasklet; - channel information like whether it is cyclic or paused;
This causes a bug when device_terminate_all() is called, (AT_XDMAC_CHAN_IS_CYCLIC cleared on atchan->status) and then a late End of Block interrupt arrives (AT_XDMAC_CIS_BIS), which sets bit 0 of atchan->status. Bit 0 is also used for AT_XDMAC_CHAN_IS_CYCLIC, so when a new descriptor for a cyclic transfer is created, the driver reports the channel as in use:
if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) { dev_err(chan2dev(chan), "channel currently used\n"); return NULL; }
This patch fixes the bug by adding a different struct member to keep the interrupts status separated from the channel status bits.
Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Codrin Ciubotariu codrin.ciubotariu@microchip.com Acked-by: Ludovic Desroches ludovic.desroches@microchip.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/dma/at_xdmac.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 4bf72561667c7..a75b95fac3bd5 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1580,8 +1581,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask;
- dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status);
error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1589,15 +1590,15 @@ static void at_xdmac_tasklet(unsigned long data)
if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd;
- if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
spin_lock_bh(&atchan->lock); @@ -1652,7 +1653,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1666,7 +1667,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
- if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
tasklet_schedule(&atchan->tasklet);
From: Stefano Garzarella sgarzare@redhat.com
[ Upstream commit 22b5c0b63f32568e130fa2df4ba23efce3eb495b ]
virtio_vsock_remove() invokes the vsock_core_exit() also if there are opened sockets for the AF_VSOCK protocol family. In this way the vsock "transport" pointer is set to NULL, triggering the kernel panic at the first socket activity.
This patch move the vsock_core_init()/vsock_core_exit() in the virtio_vsock respectively in module_init and module_exit functions, that cannot be invoked until there are open sockets.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1609699 Reported-by: Yan Fu yafu@redhat.com Signed-off-by: Stefano Garzarella sgarzare@redhat.com Acked-by: Stefan Hajnoczi stefanha@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744d..9dae54698737c 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get();
+ if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; }
@@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
virtio_vsock_update_guest_cid(vsock);
- ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0;
-out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -669,7 +666,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex);
vdev->config->del_vqs(vdev); @@ -702,14 +698,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + }
static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); }
From: Stefano Garzarella sgarzare@redhat.com
[ Upstream commit 85965487abc540368393a15491e6e7fcd230039d ]
When the virtio transport device disappear, we should reset all connected sockets in order to inform the users.
Signed-off-by: Stefano Garzarella sgarzare@redhat.com Reviewed-by: Stefan Hajnoczi stefanha@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/vmw_vsock/virtio_transport.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9dae54698737c..15eb5d3d47509 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -634,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work);
+ /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev);
mutex_lock(&vsock->rx_lock);
From: Andy Shevchenko andriy.shevchenko@linux.intel.com
[ Upstream commit 6454368a804c4955ccd116236037536f81e5b1f1 ]
In case of mapping error the DMA addresses are invalid and continuing will screw system memory or potentially something else.
[ 222.480310] dmatest: dma0chan7-copy0: summary 1 tests, 3 failures 6 iops 349 KB/s (0) ... [ 240.912725] check: Corrupted low memory at 00000000c7c75ac9 (2940 phys) = 5656000000000000 [ 240.921998] check: Corrupted low memory at 000000005715a1cd (2948 phys) = 279f2aca5595ab2b [ 240.931280] check: Corrupted low memory at 000000002f4024c0 (2950 phys) = 5e5624f349e793cf ...
Abort any test if mapping failed.
Fixes: 4076e755dbec ("dmatest: convert to dmaengine_unmap_data") Cc: Dan Williams dan.j.williams@intel.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/dma/dmatest.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index aa1712beb0cc3..7b7fba0c92532 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -642,11 +642,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -661,11 +659,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -693,12 +689,10 @@ static int dmatest_func(void *data) }
if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; }
done->done = false; @@ -707,12 +701,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan);
@@ -725,16 +717,14 @@ static int dmatest_func(void *data) dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { dmaengine_unmap_put(um); result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; }
dmaengine_unmap_put(um); @@ -779,6 +769,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } ktime = ktime_sub(ktime_get(), ktime); ktime = ktime_sub(ktime, comparetime);
From: Naresh Kamboju naresh.kamboju@linaro.org
[ Upstream commit 952b72f89ae23b316da8c1021b18d0c388ad6cc4 ]
In selftests the config fragment for netfilter was added as NF_TABLES_INET=y and this patch correct it as CONFIG_NF_TABLES_INET=y
Signed-off-by: Naresh Kamboju naresh.kamboju@linaro.org Acked-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a85..59caa8f71cd80 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y
From: Florian Westphal fw@strlen.de
[ Upstream commit 98bfc3414bda335dbd7fec58bde6266f991801d7 ]
Check basic nat/redirect/masquerade for ipv4 and ipv6.
Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/nft_nat.sh | 762 +++++++++++++++++++ 2 files changed, 763 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_nat.sh
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb8..c9ff2b47bd1ca 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh
include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 0000000000000..8ec76681605cc --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add ip6 dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat to 10.0.2.99 + } +} +EOF + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +sleep 3 +# test basic connectivity +for i in 1 2; do + ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret
From: Martynas Pumputis martynas@weave.works
[ Upstream commit 4e35c1cb9460240e983a01745b5f29fe3a4d8e39 ]
It is possible that two concurrent packets originating from the same socket of a connection-less protocol (e.g. UDP) can end up having different IP_CT_DIR_REPLY tuples which results in one of the packets being dropped.
To illustrate this, consider the following simplified scenario:
1. Packet A and B are sent at the same time from two different threads by same UDP socket. No matching conntrack entry exists yet. Both packets cause allocation of a new conntrack entry. 2. get_unique_tuple gets called for A. No clashing entry found. conntrack entry for A is added to main conntrack table. 3. get_unique_tuple is called for B and will find that the reply tuple of B is already taken by A. It will allocate a new UDP source port for B to resolve the clash. 4. conntrack entry for B cannot be added to main conntrack table because its ORIGINAL direction is clashing with A and the REPLY directions of A and B are not the same anymore due to UDP source port reallocation done in step 3.
This patch modifies nf_conntrack_tuple_taken so it doesn't consider colliding reply tuples if the IP_CT_DIR_ORIGINAL tuples are equal.
[ Florian: simplify patch to not use .allow_clash setting and always ignore identical flows ]
Signed-off-by: Martynas Pumputis martynas@weave.works Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_conntrack_core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 277d02a8cac8c..895171a2e1f18 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1007,6 +1007,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, }
if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1;
From: Will Deacon will.deacon@arm.com
[ Upstream commit d23c808c6fc6132e812690648e14c0d6b0cbe273 ]
When 52-bit virtual addressing is enabled for userspace (CONFIG_ARM64_USER_VA_BITS_52=y), the kernel continues to utilise 48-bit virtual addressing in TTBR1. Consequently, PTRS_PER_PGD reflects the larger page table size for userspace and the pgd pointer for kernel page tables is offset before being written to TTBR1.
This means that we can't use PTRS_PER_PGD to iterate over kernel page tables unless we apply the same offset, which is fiddly to get right and leads to some non-idiomatic walking code. Instead, just follow the usual pattern when walking page tables by using a while loop driven by pXd_offset() and pXd_addr_end().
Reported-by: Qian Cai cai@lca.pw Tested-by: Qian Cai cai@lca.pw Acked-by: Steve Capper steve.capper@arm.com Tested-by: Steve Capper steve.capper@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/mm/dump.c | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-)
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 65dfc8571bf83..39ac622a2d72c 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -286,74 +286,73 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
}
-static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start) +static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, + unsigned long end) { - pte_t *ptep = pte_offset_kernel(pmdp, 0UL); - unsigned long addr; - unsigned i; + unsigned long addr = start; + pte_t *ptep = pte_offset_kernel(pmdp, start);
- for (i = 0; i < PTRS_PER_PTE; i++, ptep++) { - addr = start + i * PAGE_SIZE; + do { note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } + } while (ptep++, addr += PAGE_SIZE, addr != end); }
-static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start) +static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, + unsigned long end) { - pmd_t *pmdp = pmd_offset(pudp, 0UL); - unsigned long addr; - unsigned i; + unsigned long next, addr = start; + pmd_t *pmdp = pmd_offset(pudp, start);
- for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) { + do { pmd_t pmd = READ_ONCE(*pmdp); + next = pmd_addr_end(addr, end);
- addr = start + i * PMD_SIZE; if (pmd_none(pmd) || pmd_sect(pmd)) { note_page(st, addr, 3, pmd_val(pmd)); } else { BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr); + walk_pte(st, pmdp, addr, next); } - } + } while (pmdp++, addr = next, addr != end); }
-static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start) +static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, + unsigned long end) { - pud_t *pudp = pud_offset(pgdp, 0UL); - unsigned long addr; - unsigned i; + unsigned long next, addr = start; + pud_t *pudp = pud_offset(pgdp, start);
- for (i = 0; i < PTRS_PER_PUD; i++, pudp++) { + do { pud_t pud = READ_ONCE(*pudp); + next = pud_addr_end(addr, end);
- addr = start + i * PUD_SIZE; if (pud_none(pud) || pud_sect(pud)) { note_page(st, addr, 2, pud_val(pud)); } else { BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr); + walk_pmd(st, pudp, addr, next); } - } + } while (pudp++, addr = next, addr != end); }
static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) { - pgd_t *pgdp = pgd_offset(mm, 0UL); - unsigned i; - unsigned long addr; + unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; + unsigned long next, addr = start; + pgd_t *pgdp = pgd_offset(mm, start);
- for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) { + do { pgd_t pgd = READ_ONCE(*pgdp); + next = pgd_addr_end(addr, end);
- addr = start + i * PGDIR_SIZE; if (pgd_none(pgd)) { note_page(st, addr, 1, pgd_val(pgd)); } else { BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr); + walk_pud(st, pgdp, addr, next); } - } + } while (pgdp++, addr = next, addr != end); }
void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
On Thu, Feb 28, 2019 at 10:10:48AM -0500, Sasha Levin wrote:
From: Will Deacon will.deacon@arm.com
[ Upstream commit d23c808c6fc6132e812690648e14c0d6b0cbe273 ]
When 52-bit virtual addressing is enabled for userspace (CONFIG_ARM64_USER_VA_BITS_52=y), the kernel continues to utilise 48-bit virtual addressing in TTBR1. Consequently, PTRS_PER_PGD reflects the larger page table size for userspace and the pgd pointer for kernel page tables is offset before being written to TTBR1.
This means that we can't use PTRS_PER_PGD to iterate over kernel page tables unless we apply the same offset, which is fiddly to get right and leads to some non-idiomatic walking code. Instead, just follow the usual pattern when walking page tables by using a while loop driven by pXd_offset() and pXd_addr_end().
Reported-by: Qian Cai cai@lca.pw Tested-by: Qian Cai cai@lca.pw Acked-by: Steve Capper steve.capper@arm.com Tested-by: Steve Capper steve.capper@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org
arch/arm64/mm/dump.c | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-)
This isn't needed for any released kernels. In future, is there a tag I can use to prevent a patch from being picked up by the AUTOSEL bot? That way you can distinguish "I forgot to cc stable" from "I deliberately didn't cc stable".
Will
On Thu, Feb 28, 2019 at 03:18:22PM +0000, Will Deacon wrote:
On Thu, Feb 28, 2019 at 10:10:48AM -0500, Sasha Levin wrote:
From: Will Deacon will.deacon@arm.com
[ Upstream commit d23c808c6fc6132e812690648e14c0d6b0cbe273 ]
When 52-bit virtual addressing is enabled for userspace (CONFIG_ARM64_USER_VA_BITS_52=y), the kernel continues to utilise 48-bit virtual addressing in TTBR1. Consequently, PTRS_PER_PGD reflects the larger page table size for userspace and the pgd pointer for kernel page tables is offset before being written to TTBR1.
This means that we can't use PTRS_PER_PGD to iterate over kernel page tables unless we apply the same offset, which is fiddly to get right and leads to some non-idiomatic walking code. Instead, just follow the usual pattern when walking page tables by using a while loop driven by pXd_offset() and pXd_addr_end().
Reported-by: Qian Cai cai@lca.pw Tested-by: Qian Cai cai@lca.pw Acked-by: Steve Capper steve.capper@arm.com Tested-by: Steve Capper steve.capper@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org
arch/arm64/mm/dump.c | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-)
This isn't needed for any released kernels. In future, is there a tag I can use to prevent a patch from being picked up by the AUTOSEL bot? That way you can distinguish "I forgot to cc stable" from "I deliberately didn't cc stable".
I've dropped it, thanks!
Generally the bot will look into a Fixes: tag, and if the commit pointed to it is not in the tree then the patch won't be picked up.
-- Thanks, Sasha
From: Julian Wiedmann jwi@linux.ibm.com
[ Upstream commit 5065b2dd3e5f9247a6c9d67974bc0472bf561b9d ]
Whenever we fail before/while starting an IO, make sure to release the IO buffer. Usually qeth_irq() would do this for us, but if the IO doesn't even start we obviously won't get an interrupt for it either.
Signed-off-by: Julian Wiedmann jwi@linux.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/net/qeth_core_main.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b03515d43745d..4b3b9c4e46d2a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -565,6 +565,7 @@ static int __qeth_issue_next_read(struct qeth_card *card) QETH_DBF_MESSAGE(2, "%s error in starting next read ccw! " "rc=%i\n", dev_name(&card->gdev->dev), rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); wake_up(&card->wait_q); @@ -1187,6 +1188,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(cdev, irb); if (rc) { card->read_or_write_problem = 1; + if (iob) + qeth_release_buffer(iob->channel, iob); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@ -1852,6 +1855,7 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel, QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc); QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -1923,6 +1927,7 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel, rc); QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -2110,6 +2115,7 @@ int qeth_send_control_data(struct qeth_card *card, int len, } reply = qeth_alloc_reply(card); if (!reply) { + qeth_release_buffer(channel, iob); return -ENOMEM; } reply->callback = reply_cb;
From: Julian Wiedmann jwi@linux.ibm.com
[ Upstream commit afa0c5904ba16d59b0454f7ee4c807dae350f432 ]
The error path in qeth_alloc_qdio_buffers() that takes care of cleaning up the Output Queues is buggy. It first frees the queue, but then calls qeth_clear_outq_buffers() with that very queue struct.
Make the call to qeth_clear_outq_buffers() part of the free action (in the correct order), and while at it fix the naming of the helper.
Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann jwi@linux.ibm.com Reviewed-by: Alexandra Winter wintera@linux.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/net/qeth_core_main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4b3b9c4e46d2a..56aacf32f71b0 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2454,11 +2454,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) return 0; }
-static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return;
+ qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2532,10 +2533,8 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card) card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@ -2568,10 +2567,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; }
From: Julian Wiedmann jwi@linux.ibm.com
[ Upstream commit c2780c1a3fb724560b1d44f7976e0de17bf153c7 ]
A card's close_dev work is scheduled on a driver-wide workqueue. If the card is removed and freed while the work is still active, this causes a use-after-free. So make sure that the work is completed before freeing the card.
Fixes: 0f54761d167f ("qeth: Support VEPA mode") Signed-off-by: Julian Wiedmann jwi@linux.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 1 + 3 files changed, 4 insertions(+)
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 970654fcc48d2..2d1f6a583641b 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -22,6 +22,7 @@ #include <linux/hashtable.h> #include <linux/ip.h> #include <linux/refcount.h> +#include <linux/workqueue.h>
#include <net/ipv6.h> #include <net/if_inet6.h> diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 76b2fba5fba22..b7513c5848cf9 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -854,6 +854,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); + + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b7f6a8384543c..7f71ca0d08e7f 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2611,6 +2611,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev);
+ cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0);
From: Jiri Olsa jolsa@redhat.com
[ Upstream commit 59a17706915fe5ea6f711e1f92d4fb706bce07fe ]
When perf is built with the annobin plugin (RHEL8 build) extra symbols are added to its binary:
# nm perf | grep annobin | head -10 0000000000241100 t .annobin_annotate.c 0000000000326490 t .annobin_annotate.c 0000000000249255 t .annobin_annotate.c_end 00000000003283a8 t .annobin_annotate.c_end 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bc3e2 t .annobin_annotate.c_end.unlikely 00000000001bc400 t .annobin_annotate.c_end.unlikely 00000000001bce18 t .annobin_annotate.c.hot 00000000001bce18 t .annobin_annotate.c.hot ...
Those symbols have no use for report or annotation and should be skipped. Moreover they interfere with the DWARF unwind test on the PPC arch, where they are mixed with checked symbols and then the test fails:
# perf test dwarf -v 59: Test dwarf unwind : --- start --- test child forked, pid 8515 unwind: .annobin_dwarf_unwind.c:ip = 0x10dba40dc (0x2740dc) ... got: .annobin_dwarf_unwind.c 0x10dba40dc, expecting test__arch_unwind_sample unwind: failed with 'no error'
The annobin symbols are defined as NOTYPE/LOCAL/HIDDEN:
# readelf -s ./perf | grep annobin | head -1 40: 00000000001bce4f 0 NOTYPE LOCAL HIDDEN 13 .annobin_init.c
They can still pass the check for the label symbol. Adding check for HIDDEN and INTERNAL (as suggested by Nick below) visibility and filter out such symbols.
Just to be awkward, if you are going to ignore STV_HIDDEN symbols then you should probably also ignore STV_INTERNAL ones as well... Annobin does not generate them, but you never know, one day some other tool might create some.
Signed-off-by: Jiri Olsa jolsa@kernel.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Masami Hiramatsu mhiramat@kernel.org Cc: Michael Petlan mpetlan@redhat.com Cc: Namhyung Kim namhyung@kernel.org Cc: Nick Clifton nickc@redhat.com Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/20190128133526.GD15461@krava Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/symbol-elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6e70cc00c1618..a701a8a48f005 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -87,6 +87,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); }
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -111,7 +116,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; }
static bool elf_sym__filter(GElf_Sym *sym)
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit 6ab3bc240ade47a0f52bc16d97edd9accbe0024e ]
With a suitably defined "probe:vfs_getname" probe, 'perf trace' can "beautify" its output, so syscalls like open() or openat() can print the "filename" argument instead of just its hex address, like:
$ perf trace -e open -- touch /dev/null [...] 0.590 ( 0.014 ms): touch/18063 open(filename: /dev/null, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 [...]
The output without such beautifier looks like:
0.529 ( 0.011 ms): touch/18075 open(filename: 0xc78cf288, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3
However, when the vfs_getname probe expands to multiple probes and it is not the first one that is hit, the beautifier fails, as following:
0.326 ( 0.010 ms): touch/18072 open(filename: , flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3
Fix it by hooking into all the expanded probes (inlines), now, for instance:
[root@quaco ~]# perf probe -l probe:vfs_getname (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_1 (on getname_flags:73@fs/namei.c with pathname) [root@quaco ~]# perf trace -e open* sleep 1 0.010 ( 0.005 ms): sleep/5588 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: RDONLY|CLOEXEC) = 3 0.029 ( 0.006 ms): sleep/5588 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: RDONLY|CLOEXEC) = 3 0.194 ( 0.008 ms): sleep/5588 openat(dfd: CWD, filename: /usr/lib/locale/locale-archive, flags: RDONLY|CLOEXEC) = 3 [root@quaco ~]#
Works, further verified with:
[root@quaco ~]# perf test vfs 65: Use vfs_getname probe to get syscall args filenames : Ok 66: Add vfs_getname probe to get syscall args filenames : Ok 67: Check open filename arg using perf trace + vfs_getname: Ok [root@quaco ~]#
Reported-by: Michael Petlan mpetlan@redhat.com Tested-by: Michael Petlan mpetlan@redhat.com Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Namhyung Kim namhyung@kernel.org Link: https://lkml.kernel.org/n/tip-mv8kolk17xla1smvmp3qabv1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/builtin-trace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 22ab8e67c7600..3f43aedb384d4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2263,19 +2263,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err);
- if (IS_ERR(evsel)) + if (ret) return false;
- if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; }
- evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; }
static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
From: Huacai Chen chenhc@lemote.com
[ Upstream commit e02e07e3127d8aec1f4bcdfb2fc52a2d99b4859e ]
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions "before each ll/lld" and "at the branch-target between ll/sc" to workaround. Otherwise, this flaw will cause deadlock occasionally (e.g. when doing heavy load test with LTP).
Below is the explaination of CPU designer:
"For Loongson 3 family, when a memory access instruction (load, store, or prefetch)'s executing occurs between the execution of LL and SC, the success or failure of SC is not predictable. Although programmer would not insert memory access instructions between LL and SC, the memory instructions before LL in program-order, may dynamically executed between the execution of LL/SC, so a memory fence (SYNC) is needed before LL/LLD to avoid this situation.
Since Loongson-3A R2 (3A2000), we have improved our hardware design to handle this case. But we later deduce a rarely circumstance that some speculatively executed memory instructions due to branch misprediction between LL/SC still fall into the above case, so a memory fence (SYNC) at branch-target (if its target is not between LL/SC) is needed for Loongson 3A1000, 3B1500, 3A2000 and 3A3000.
Our processor is continually evolving and we aim to to remove all these workaround-SYNCs around LL/SC for new-come processor."
Here is an example:
Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var, this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same time('sc' return 1), and the variable is only *added by 1*, sometimes, which is wrong and unacceptable(it should be added by 2).
Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's __ex_table section.
This patch fix all the cases in kernel, but:
+. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix the ll and branch-target coincidently such as atomic_sub_if_positive/ cmpxchg/xchg, just like this one.
+. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch edac.h
+. local_ops and cmpxchg_local should not be affected by this bug since only the owner can write.
+. mips_atomic_set for syscall.c is deprecated and rarely used, just let it go
Signed-off-by: Huacai Chen chenhc@lemote.com Signed-off-by: Huang Pei huangpei@loongson.cn [paul.burton@mips.com: - Simplify the addition of -mno-fix-loongson3-llsc to cflags, and add a comment describing why it's there. - Make loongson_llsc_mb() a no-op when CONFIG_CPU_LOONGSON3_WORKAROUNDS=n, rather than a compiler memory barrier. - Add a comment describing the bug & how loongson_llsc_mb() helps in asm/barrier.h.] Signed-off-by: Paul Burton paul.burton@mips.com Cc: Ralf Baechle ralf@linux-mips.org Cc: ambrosehua@gmail.com Cc: Steven J . Hill Steven.Hill@cavium.com Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang zhangfx@lemote.com Cc: Zhangjin Wu wuzhangjin@gmail.com Cc: Li Xuefeng lixuefeng@loongson.cn Cc: Xu Chenghua xuchenghua@loongson.cn Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/Kconfig | 15 ++++++++++++++ arch/mips/include/asm/atomic.h | 6 ++++++ arch/mips/include/asm/barrier.h | 36 +++++++++++++++++++++++++++++++++ arch/mips/include/asm/bitops.h | 5 +++++ arch/mips/include/asm/futex.h | 3 +++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 23 +++++++++++++++++++++ arch/mips/mm/tlbex.c | 10 +++++++++ 8 files changed, 100 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 201caf226b47b..2f1b20543ce74 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1397,6 +1397,21 @@ config LOONGSON3_ENHANCEMENT please say 'N' here. If you want a high-performance kernel to run on new Loongson 3 machines only, please say 'Y' here.
+config CPU_LOONGSON3_WORKAROUNDS + bool "Old Loongson 3 LLSC Workarounds" + default y if SMP + depends on CPU_LOONGSON3 + help + Loongson 3 processors have the llsc issues which require workarounds. + Without workarounds the system may hang unexpectedly. + + Newer Loongson 3 will fix these issues and no workarounds are needed. + The workarounds have no significant side effect on them but may + decrease the performance of the system so this option should be + disabled unless the kernel is intended to be run on old systems. + + If unsure, please say Y. + config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 9e805317847d8..1fc6f04e85a1c 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set "MIPS_ISA_LEVEL" \n" \ "1: ll %0, %1 # atomic_" #op " \n" \ @@ -84,6 +85,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set "MIPS_ISA_LEVEL" \n" \ "1: ll %1, %2 # atomic_" #op "_return \n" \ @@ -116,6 +118,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set "MIPS_ISA_LEVEL" \n" \ "1: ll %1, %2 # atomic_fetch_" #op " \n" \ @@ -251,6 +254,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set "MIPS_ISA_LEVEL" \n" \ "1: lld %0, %1 # atomic64_" #op " \n" \ @@ -277,6 +281,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set "MIPS_ISA_LEVEL" \n" \ "1: lld %1, %2 # atomic64_" #op "_return\n" \ @@ -309,6 +314,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set "MIPS_ISA_LEVEL" \n" \ "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb199a7f..b7f6ac5e513c9 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,42 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb()
+/* + * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, + * store or pref) in between an ll & sc can cause the sc instruction to + * erroneously succeed, breaking atomicity. Whilst it's unusual to write code + * containing such sequences, this bug bites harder than we might otherwise + * expect due to reordering & speculation: + * + * 1) A memory access appearing prior to the ll in program order may actually + * be executed after the ll - this is the reordering case. + * + * In order to avoid this we need to place a memory barrier (ie. a sync + * instruction) prior to every ll instruction, in between it & any earlier + * memory access instructions. Many of these cases are already covered by + * smp_mb__before_llsc() but for the remaining cases, typically ones in + * which multiple CPUs may operate on a memory location but ordering is not + * usually guaranteed, we use loongson_llsc_mb() below. + * + * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. + * + * 2) If a conditional branch exists between an ll & sc with a target outside + * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg() + * or similar, then misprediction of the branch may allow speculative + * execution of memory accesses from outside of the ll-sc loop. + * + * In order to avoid this we need a memory barrier (ie. a sync instruction) + * at each affected branch target, for which we also use loongson_llsc_mb() + * defined below. + * + * This case affects all current Loongson 3 CPUs. + */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() do { } while (0) +#endif + #include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index da1b8718861e0..2a40ecd69ac49 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -68,6 +68,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -78,6 +79,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -120,6 +122,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -130,6 +133,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -188,6 +192,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp;
+ loongson_llsc_mb(); do { __asm__ __volatile__( " .set "MIPS_ISA_ARCH_LEVEL" \n" diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index a9e61ea54ca96..0a62a91b592d6 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -162,6 +163,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { + loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -190,6 +192,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); + loongson_llsc_mb(); } else return -ENOSYS;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 129e0328367f2..6a35bbf46b93c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -229,6 +229,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) { + loongson_llsc_mb(); __asm__ __volatile__ ( " .set "MIPS_ISA_ARCH_LEVEL" \n" " .set push \n" @@ -244,6 +245,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set mips0 \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); + loongson_llsc_mb(); } #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce4608aa886..c1a4d4dc46655 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap + +# +# Some versions of binutils, not currently mainline as of 2019/02/04, support +# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction +# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a +# description). +# +# We disable this in order to prevent the assembler meddling with the +# instruction that labels refer to, ie. if we label an ll instruction: +# +# 1: ll v0, 0(a0) +# +# ...then with the assembler fix applied the label may actually point at a sync +# instruction inserted by the assembler, and if we were using the label in an +# exception table the table would no longer contain the address of the ll +# instruction. +# +# Avoid this by explicitly disabling that assembler behaviour. If upstream +# binutils does not merge support for the flag then we can revisit & remove +# this later - for now it ensures vendor toolchains don't cause problems. +# +cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 0677142916431..64db0400a8be1 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -931,6 +931,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr);
@@ -1645,6 +1647,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); # ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2258,6 +2262,8 @@ static void build_r4000_tlb_load_handler(void) #endif
uasm_l_nopage_tlbl(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2312,6 +2318,8 @@ static void build_r4000_tlb_store_handler(void) #endif
uasm_l_nopage_tlbs(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2367,6 +2375,8 @@ static void build_r4000_tlb_modify_handler(void) #endif
uasm_l_nopage_tlbm(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) {
From: Jun-Ru Chang jrjang@realtek.com
[ Upstream commit 2b424cfc69728224fcb5fad138ea7260728e0901 ]
Patch (b6c7a324df37b "MIPS: Fix get_frame_info() handling of microMIPS function size.") introduces additional function size check for microMIPS by only checking insn between ip and ip + func_size. However, func_size in get_frame_info() is always 0 if KALLSYMS is not enabled. This causes get_frame_info() to return immediately without calculating correct frame_size, which in turn causes "Can't analyze schedule() prologue" warning messages at boot time.
This patch removes func_size check, and let the frame_size check run up to 128 insns for both MIPS and microMIPS.
Signed-off-by: Jun-Ru Chang jrjang@realtek.com Signed-off-by: Tony Wu tonywu@realtek.com Signed-off-by: Paul Burton paul.burton@mips.com Fixes: b6c7a324df37b ("MIPS: Fix get_frame_info() handling of microMIPS function size.") Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: macro@mips.com Cc: yamada.masahiro@socionext.com Cc: peterz@infradead.org Cc: mingo@kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d4f7fd4550e10..85522c137f19f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -371,7 +371,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip, int *frame_size) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -384,10 +384,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err;
- ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.word = ip->halfword[0] << 16; last_insn_size = 2;
From: Ross Lagerwall ross.lagerwall@citrix.com
[ Upstream commit d8f6382a7d026989029e2e50c515df954488459b ]
This reverts commit bbc0f8bd88abefb0f27998f40a073634a3a2db89.
It added a warning whose intent was to check whether the rport was still linked into the peer list. It doesn't work as intended and gives false positive warnings for two reasons:
1) If the rport is never linked into the peer list it will not be considered empty since the list_head is never initialized.
2) If the rport is deleted from the peer list using list_del_rcu(), then the list_head is in an undefined state and it is not considered empty.
Signed-off-by: Ross Lagerwall ross.lagerwall@citrix.com Reviewed-by: Hannes Reinecke hare@suse.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/libfc/fc_rport.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 372387a450dff..1797e47fab386 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -184,7 +184,6 @@ void fc_rport_destroy(struct kref *kref) struct fc_rport_priv *rdata;
rdata = container_of(kref, struct fc_rport_priv, kref); - WARN_ON(!list_empty(&rdata->peers)); kfree_rcu(rdata, rcu); } EXPORT_SYMBOL(fc_rport_destroy);
From: Tony Lindgren tony@atomide.com
[ Upstream commit c6e2bd956936d925748581e4d0294f10f1d92f2c ]
We currently get the following error with pixcir_ts driver during a suspend resume cycle:
omap_i2c 4802a000.i2c: controller timed out pixcir_ts 1-005c: pixcir_int_enable: can't read reg 0x34 : -110 pixcir_ts 1-005c: Failed to disable interrupt generation: -110 pixcir_ts 1-005c: Failed to stop dpm_run_callback(): pixcir_i2c_ts_resume+0x0/0x98 [pixcir_i2c_ts] returns -110 PM: Device 1-005c failed to resume: error -110
And at least am437x based devices with pixcir_ts will fail to resume to a touchscreen that is configured as the wakeup-source in device tree for these devices.
This is because pixcir_ts tries to reconfigure it's registers for noirq suspend which fails. This also leaves i2c-omap in enabled state for suspend.
Let's fix the pixcir_ts issue and make sure i2c-omap is suspended by adding SET_NOIRQ_SYSTEM_SLEEP_PM_OPS.
Let's also get rid of some ifdefs while at it and replace them with __maybe_unused as SET_RUNTIME_PM_OPS and SET_NOIRQ_SYSTEM_SLEEP_PM_OPS already deal with the various PM Kconfig options.
Reported-by: Keerthy j-keerthy@ti.com Signed-off-by: Tony Lindgren tony@atomide.com Acked-by: Vignesh R vigneshr@ti.com Signed-off-by: Wolfram Sang wsa@the-dreams.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-omap.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 65d06a8193074..2ac86096ddd95 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1498,8 +1498,7 @@ static int omap_i2c_remove(struct platform_device *pdev) return 0; }
-#ifdef CONFIG_PM -static int omap_i2c_runtime_suspend(struct device *dev) +static int __maybe_unused omap_i2c_runtime_suspend(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev);
@@ -1525,7 +1524,7 @@ static int omap_i2c_runtime_suspend(struct device *dev) return 0; }
-static int omap_i2c_runtime_resume(struct device *dev) +static int __maybe_unused omap_i2c_runtime_resume(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev);
@@ -1540,20 +1539,18 @@ static int omap_i2c_runtime_resume(struct device *dev) }
static const struct dev_pm_ops omap_i2c_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend, omap_i2c_runtime_resume, NULL) }; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif /* CONFIG_PM */
static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", - .pm = OMAP_I2C_PM_OPS, + .pm = &omap_i2c_pm_ops, .of_match_table = of_match_ptr(omap_i2c_of_match), }, };
From: Philip Yang Philip.Yang@amd.com
[ Upstream commit 0a5f49cbf9d6ad3721c16f8a6d823363ea7a160f ]
amdgpu_vm_get_task_info is called from interrupt handler and sched timeout workqueue, we should use irq version spin_lock to avoid deadlock.
Signed-off-by: Philip Yang Philip.Yang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6a84526e20e09..49fe5084c53dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3011,14 +3011,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info) { struct amdgpu_vm *vm; + unsigned long flags;
- spin_lock(&adev->vm_manager.pasid_lock); + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
vm = idr_find(&adev->vm_manager.pasid_idr, pasid); if (vm) *task_info = vm->task_info;
- spin_unlock(&adev->vm_manager.pasid_lock); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); }
/**
From: Keith Busch keith.busch@intel.com
[ Upstream commit e7ad43c3eda6a1690c4c3c341f95dc1c6898da83 ]
If a controller supports the NS Change Notification, the namespace scan_work is automatically triggered after attaching a new namespace.
Occasionally the namespace scan_work may append the new namespace to the list before the admin command effects handling is completed. The effects handling unfreezes namespaces, but if it unfreezes the newly attached namespace, its request_queue freeze depth will be off and we'll hit the warning in blk_mq_unfreeze_queue().
On the next namespace add, we will fail to freeze that queue due to the previous bad accounting and deadlock waiting for frozen.
Fix that by preventing scan work from altering the namespace list while command effects handling needs to pair freeze with unfreeze.
Reported-by: Wen Xiong wenxiong@us.ibm.com Tested-by: Wen Xiong wenxiong@us.ibm.com Signed-off-by: Keith Busch keith.busch@intel.com Reviewed-by: Chaitanya Kulkarni chaitanya.kulkarni@wdc.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 8 +++++++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e0d2b74739018..2cdb3032ca0fc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1182,6 +1182,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, * effects say only one namespace is affected. */ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + mutex_lock(&ctrl->scan_lock); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); } @@ -1210,8 +1211,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) */ if (effects & NVME_CMD_EFFECTS_LBCC) nvme_update_formats(ctrl); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { nvme_unfreeze(ctrl); + mutex_unlock(&ctrl->scan_lock); + } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) @@ -3292,6 +3295,7 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return;
+ mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { @@ -3300,6 +3304,7 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); out_free_id: + mutex_unlock(&ctrl->scan_lock); kfree(id); down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); @@ -3535,6 +3540,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); + mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 60220de2db52b..e82cdaec81c9c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -148,6 +148,7 @@ struct nvme_ctrl { enum nvme_ctrl_state state; bool identified; spinlock_t lock; + struct mutex scan_lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct request_queue *connect_q;
From: Keith Busch keith.busch@intel.com
[ Upstream commit 5c959d73dba6495ec01d04c206ee679d61ccb2b0 ]
A surprise removal may fail to tear down request queues if it is racing with the initial asynchronous probe. If that happens, the remove path won't see the queue resources to tear down, and the controller reset path may create a new request queue on a removed device, but will not be able to make forward progress, deadlocking the pci removal.
Protect setting up non-blocking resources from a shutdown by holding the same mutex, and transition to the CONNECTING state after these resources are initialized so the probe path may see the dead controller state before dispatching new IO.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202081 Reported-by: Alex Gagniuc Alex_Gagniuc@Dellteam.com Signed-off-by: Keith Busch keith.busch@intel.com Tested-by: Alex Gagniuc mr.nuke.me@gmail.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/pci.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f46313f441ece..6398ffbce6dea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2260,16 +2260,7 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false);
- /* - * Introduce CONNECTING state from nvme-fc/rdma transports to mark the - * initializing procedure here. - */ - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { - dev_warn(dev->ctrl.device, - "failed to mark controller CONNECTING\n"); - goto out; - } - + mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) goto out; @@ -2288,6 +2279,17 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; dev->ctrl.max_segments = NVME_MAX_SEGS; + mutex_unlock(&dev->shutdown_lock); + + /* + * Introduce CONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + goto out; + }
result = nvme_init_identify(&dev->ctrl); if (result)
On Thu, Feb 28, 2019 at 10:11:00AM -0500, Sasha Levin wrote:
From: Keith Busch keith.busch@intel.com
[ Upstream commit 5c959d73dba6495ec01d04c206ee679d61ccb2b0 ]
This patch was broken, so please make sure to include the one that fixes this:
Upstream commit 4726bcf30fad37cc555cd9dcd6c73f2b2668c879 ("nvme-pci: add missing unlock for reset error")
On Thu, Feb 28, 2019 at 08:16:43AM -0700, Keith Busch wrote:
On Thu, Feb 28, 2019 at 10:11:00AM -0500, Sasha Levin wrote:
From: Keith Busch keith.busch@intel.com
[ Upstream commit 5c959d73dba6495ec01d04c206ee679d61ccb2b0 ]
This patch was broken, so please make sure to include the one that fixes this:
Upstream commit 4726bcf30fad37cc555cd9dcd6c73f2b2668c879 ("nvme-pci: add missing unlock for reset error")
I've queued this one as well, thank you.
-- Thanks, Sasha
From: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp
[ Upstream commit 43636c804df0126da669c261fc820fb22f62bfc2 ]
When something let __find_get_block_slow() hit all_mapped path, it calls printk() for 100+ times per a second. But there is no need to print same message with such high frequency; it is just asking for stall warning, or at least bloating log files.
[ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.873324][T15342] b_state=0x00000029, b_size=512 [ 399.878403][T15342] device loop0 blocksize: 4096 [ 399.883296][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.890400][T15342] b_state=0x00000029, b_size=512 [ 399.895595][T15342] device loop0 blocksize: 4096 [ 399.900556][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.907471][T15342] b_state=0x00000029, b_size=512 [ 399.912506][T15342] device loop0 blocksize: 4096
This patch reduces frequency to up to once per a second, in addition to concatenating three lines into one.
[ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8, b_state=0x00000029, b_size=512, device loop0 blocksize: 4096
Signed-off-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Reviewed-by: Jan Kara jack@suse.cz Cc: Dmitry Vyukov dvyukov@google.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/buffer.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c index 6f1ae3ac97896..c083c4b3c1e7c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock);
From: Manish Chopra manishc@marvell.com
[ Upstream commit 660492bcf4a7561b5fdc13be0ae0b0c0a8c120be ]
When slowpath messages are sent with high rate, the resulting events can lead to a FW assert in case they are not handled fast enough (Event Queue Full assert). Attempt to send queued slowpath messages only after the newly evacuated entries in the EQ ring are indicated to FW.
Signed-off-by: Manish Chopra manishc@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 3157c0d994417..dae2896e1d8e4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -380,6 +380,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn); * @param p_hwfn */ void qed_consq_free(struct qed_hwfn *p_hwfn); +int qed_spq_pend_post(struct qed_hwfn *p_hwfn);
/** * @file diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 7106ad17afe2e..a0ee847f379bb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -402,6 +402,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain));
+ /* Attempt to post pending requests */ + spin_lock_bh(&p_hwfn->p_spq->lock); + rc = qed_spq_pend_post(p_hwfn); + spin_unlock_bh(&p_hwfn->p_spq->lock); + return rc; }
@@ -745,7 +750,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, return 0; }
-static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) +int qed_spq_pend_post(struct qed_hwfn *p_hwfn) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -883,7 +888,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent = NULL; struct qed_spq_entry *tmp; struct qed_spq_entry *found = NULL; - int rc;
if (!p_hwfn) return -EINVAL; @@ -941,12 +945,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, */ qed_spq_return_entry(p_hwfn, found);
- /* Attempt to post pending requests */ - spin_lock_bh(&p_spq->lock); - rc = qed_spq_pend_post(p_hwfn); - spin_unlock_bh(&p_spq->lock); - - return rc; + return 0; }
int qed_consq_alloc(struct qed_hwfn *p_hwfn)
From: Sudarsana Reddy Kalluru skalluru@marvell.com
[ Upstream commit fb1faab74ddef9ec2d841d54e5d0912a097b3abe ]
Max supported queues is derived incorrectly in the case of multi-CoS. Need to consider TCs while calculating num_queues for PF.
Signed-off-by: Sudarsana Reddy Kalluru skalluru@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 67c02ea939062..bde6f5f3bf8f7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2207,7 +2207,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 num_queues = 0;
/* Since the feature controls only queue-zones, - * make sure we have the contexts [rx, tx, xdp] to + * make sure we have the contexts [rx, xdp, tcs] to * match. */ for_each_hwfn(cdev, i) { @@ -2217,7 +2217,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 cids;
cids = hwfn->pf_params.eth_pf_params.num_cons; - num_queues += min_t(u16, l2_queues, cids / 3); + cids /= (2 + info->num_tc); + num_queues += min_t(u16, l2_queues, cids); }
/* queues might theoretically be >256, but interrupts'
From: Sudarsana Reddy Kalluru skalluru@marvell.com
[ Upstream commit 0aa4febb420d91df5b56b1864a2465765da85f4b ]
Under heavy traffic load, when changing number of channels via ethtool (ethtool -L) which will cause interface to be reloaded, it was observed that some packets gets transmitted on old TX channel/queue id which doesn't really exist after the channel configuration leads to system crash.
Add a safeguard in the driver by validating queue id through ndo_select_queue() which is called before the ndo_start_xmit().
Signed-off-by: Sudarsana Reddy Kalluru skalluru@marvell.com Signed-off-by: Ariel Elior aelior@marvell.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qede/qede.h | 3 +++ drivers/net/ethernet/qlogic/qede/qede_fp.c | 13 +++++++++++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +++ 3 files changed, 19 insertions(+)
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 6a4d266fb8e21..d242a57240691 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -489,6 +489,9 @@ struct qede_reload_args {
/* Datapath functions definition */ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback); netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 1a78027de071f..a96da16f34049 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1695,6 +1695,19 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; }
+u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + struct qede_dev *edev = netdev_priv(dev); + int total_txq; + + total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc; + + return QEDE_TSS_COUNT(edev) ? + fallback(dev, skb, NULL) % total_txq : 0; +} + /* 8B udp header + 8B base tunnel header + 32B option length */ #define QEDE_MAX_TUN_HDR_LEN 48
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 46d0f2eaa0c09..f3d9c40c41159 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -631,6 +631,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -666,6 +667,7 @@ static const struct net_device_ops qede_netdev_vf_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -684,6 +686,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr,
From: Liu Bo bo.liu@linux.alibaba.com
[ Upstream commit 8c772a9bfc7c07c76f4a58b58910452fbb20843b ]
Our test reported the following stack, and vmcore showed that ->inflight counter is -1.
[ffffc9003fcc38d0] __schedule at ffffffff8173d95d [ffffc9003fcc3958] schedule at ffffffff8173de26 [ffffc9003fcc3970] io_schedule at ffffffff810bb6b6 [ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb [ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3 [ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a [ffffc9003fcc3b08] generic_make_request at ffffffff81368b49 [ffffc9003fcc3b68] submit_bio at ffffffff81368d7d [ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4] [ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4] [ffffc9003fcc3d68] do_writepages at ffffffff811c49ae [ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188 [ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301 [ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4] [ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b [ffffc9003fcc3ee8] do_fsync at ffffffff81285abd [ffffc9003fcc3f18] sys_fsync at ffffffff81285d50 [ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04 [ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e
The ->inflight counter may be negative (-1) if
1) blk-iolatency was disabled when the IO was issued,
2) blk-iolatency was enabled before this IO reached its endio,
3) the ->inflight counter is decreased from 0 to -1 in endio()
In fact the hang can be easily reproduced by the below script,
H=/sys/fs/cgroup/unified/ P=/sys/fs/cgroup/unified/test
echo "+io" > $H/cgroup.subtree_control mkdir -p $P
echo $$ > $P/cgroup.procs
xfs_io -f -d -c "pwrite 0 4k" /dev/sdg
echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency
xfs_io -f -d -c "pwrite 0 4k" /dev/sdg
This fixes the problem by freezing the queue so that while enabling/disabling iolatency, there is no inflight rq running.
Note that quiesce_queue is not needed as this only updating iolatency configuration about which dispatching request_queue doesn't care.
Signed-off-by: Liu Bo bo.liu@linux.alibaba.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/blk-iolatency.c | 52 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-)
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 19923f8a029dd..b154e057ca67c 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -72,6 +72,7 @@ #include <linux/sched/loadavg.h> #include <linux/sched/signal.h> #include <trace/events/block.h> +#include <linux/blk-mq.h> #include "blk-rq-qos.h" #include "blk-stat.h"
@@ -568,6 +569,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) return;
enabled = blk_iolatency_enabled(iolat->blkiolat); + if (!enabled) + return; + while (blkg && blkg->parent) { iolat = blkg_to_lat(blkg); if (!iolat) { @@ -577,7 +581,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) rqw = &iolat->rq_wait;
atomic_dec(&rqw->inflight); - if (!enabled || iolat->min_lat_nsec == 0) + if (iolat->min_lat_nsec == 0) goto next; iolatency_record_time(iolat, &bio->bi_issue, now, issue_as_root); @@ -721,10 +725,13 @@ int blk_iolatency_init(struct request_queue *q) return 0; }
-static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +/* + * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise + * return 0. + */ +static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec;
iolat->min_lat_nsec = val; @@ -733,9 +740,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) BLKIOLATENCY_MAX_WIN_SIZE);
if (!oldval && val) - atomic_inc(&blkiolat->enabled); + return 1; if (oldval && !val) - atomic_dec(&blkiolat->enabled); + return -1; + return 0; }
static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -768,6 +776,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; + int enable = 0;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -803,7 +812,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec;
- iolatency_set_min_lat_nsec(blkg, lat_val); + enable = iolatency_set_min_lat_nsec(blkg, lat_val); + if (enable) { + WARN_ON_ONCE(!blk_get_queue(blkg->q)); + blkg_get(blkg); + } + if (oldval != iolat->min_lat_nsec) { iolatency_clear_scaling(blkg); } @@ -811,6 +825,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, ret = 0; out: blkg_conf_finish(&ctx); + if (ret == 0 && enable) { + struct iolatency_grp *tmp = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = tmp->blkiolat; + + blk_mq_freeze_queue(blkg->q); + + if (enable == 1) + atomic_inc(&blkiolat->enabled); + else if (enable == -1) + atomic_dec(&blkiolat->enabled); + else + WARN_ON_ONCE(1); + + blk_mq_unfreeze_queue(blkg->q); + + blkg_put(blkg); + blk_put_queue(blkg->q); + } return ret ?: nbytes; }
@@ -910,8 +942,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); + struct blk_iolatency *blkiolat = iolat->blkiolat; + int ret;
- iolatency_set_min_lat_nsec(blkg, 0); + ret = iolatency_set_min_lat_nsec(blkg, 0); + if (ret == 1) + atomic_inc(&blkiolat->enabled); + if (ret == -1) + atomic_dec(&blkiolat->enabled); iolatency_clear_scaling(blkg); }
linux-stable-mirror@lists.linaro.org