From: Tzung-Bi Shih tzungbi@google.com
[ Upstream commit eb5a558705c7f63d06b4ddd072898b1ca894e053 ]
RT1015's output widget name is "SPO" instead of "Speaker". Fixes it to use the correct names.
Signed-off-by: Tzung-Bi Shih tzungbi@google.com Link: https://lore.kernel.org/r/20201019044724.1601476-1-tzungbi@google.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../mediatek/mt8183/mt8183-da7219-max98357.c | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-)
diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index a6c690c5308d3..58b76e985f7f3 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -624,15 +624,34 @@ static struct snd_soc_codec_conf mt8183_da7219_rt1015_codec_conf[] = { }, };
+static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = { + SOC_DAPM_PIN_SWITCH("Left Spk"), + SOC_DAPM_PIN_SWITCH("Right Spk"), +}; + +static const +struct snd_soc_dapm_widget mt8183_da7219_rt1015_dapm_widgets[] = { + SND_SOC_DAPM_SPK("Left Spk", NULL), + SND_SOC_DAPM_SPK("Right Spk", NULL), + SND_SOC_DAPM_PINCTRL("TDM_OUT_PINCTRL", + "aud_tdm_out_on", "aud_tdm_out_off"), +}; + +static const struct snd_soc_dapm_route mt8183_da7219_rt1015_dapm_routes[] = { + {"Left Spk", NULL, "Left SPO"}, + {"Right Spk", NULL, "Right SPO"}, + {"I2S Playback", NULL, "TDM_OUT_PINCTRL"}, +}; + static struct snd_soc_card mt8183_da7219_rt1015_card = { .name = "mt8183_da7219_rt1015", .owner = THIS_MODULE, - .controls = mt8183_da7219_max98357_snd_controls, - .num_controls = ARRAY_SIZE(mt8183_da7219_max98357_snd_controls), - .dapm_widgets = mt8183_da7219_max98357_dapm_widgets, - .num_dapm_widgets = ARRAY_SIZE(mt8183_da7219_max98357_dapm_widgets), - .dapm_routes = mt8183_da7219_max98357_dapm_routes, - .num_dapm_routes = ARRAY_SIZE(mt8183_da7219_max98357_dapm_routes), + .controls = mt8183_da7219_rt1015_snd_controls, + .num_controls = ARRAY_SIZE(mt8183_da7219_rt1015_snd_controls), + .dapm_widgets = mt8183_da7219_rt1015_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(mt8183_da7219_rt1015_dapm_widgets), + .dapm_routes = mt8183_da7219_rt1015_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(mt8183_da7219_rt1015_dapm_routes), .dai_link = mt8183_da7219_dai_links, .num_links = ARRAY_SIZE(mt8183_da7219_dai_links), .aux_dev = &mt8183_da7219_max98357_headset_dev,
From: Srinivas Kandagatla srinivas.kandagatla@linaro.org
[ Upstream commit 3f48b6eba15ea342ef4cb420b580f5ed6605669f ]
With the current state of code, we would endup with something like below in /proc/asound/cards for 2 machines based on this driver.
Machine 1: 0 [DB845c ]: DB845c - DB845c DB845c Machine 2: 0 [LenovoYOGAC6301]: Lenovo-YOGA-C63 - Lenovo-YOGA-C630-13Q50 LENOVO-81JL-LenovoYOGAC630_13Q50-LNVNB161216
This is not very UCM friendly both w.r.t to common up configs and card identification, and UCM2 became totally not usefull with just one ucm sdm845.conf for two machines which have different setups w.r.t HDMI and other dais.
Reasons for such thing is partly because Qualcomm machine drivers never cared to set driver_name.
This patch sets up driver name for the this driver to sort out the UCM integration issues!
after this patch contents of /proc/asound/cards:
Machine 1: 0 [DB845c ]: sdm845 - DB845c DB845c Machine 2: 0 [LenovoYOGAC6301]: sdm845 - Lenovo-YOGA-C630-13Q50 LENOVO-81JL-LenovoYOGAC630_13Q50-LNVNB161216
with this its possible to align with what UCM2 expects and we can have sdm845/DB845.conf sdm845/LENOVO-81JL-LenovoYOGAC630_13Q50-LNVNB161216.conf ... for board variants. This should scale much better!
Signed-off-by: Srinivas Kandagatla srinivas.kandagatla@linaro.org Link: https://lore.kernel.org/r/20201023095849.22894-1-srinivas.kandagatla@linaro.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/qcom/sdm845.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index ab1bf23c21a68..6c2760e27ea6f 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -17,6 +17,7 @@ #include "qdsp6/q6afe.h" #include "../codecs/rt5663.h"
+#define DRIVER_NAME "sdm845" #define DEFAULT_SAMPLE_RATE_48K 48000 #define DEFAULT_MCLK_RATE 24576000 #define TDM_BCLK_RATE 6144000 @@ -552,6 +553,7 @@ static int sdm845_snd_platform_probe(struct platform_device *pdev) if (!data) return -ENOMEM;
+ card->driver_name = DRIVER_NAME; card->dapm_widgets = sdm845_snd_widgets; card->num_dapm_widgets = ARRAY_SIZE(sdm845_snd_widgets); card->dev = dev;
From: Olivier Moysan olivier.moysan@st.com
[ Upstream commit 20afe581c9b980848ad097c4d54dde9bec7593ef ]
A delay must be introduced before the shutdown down of the mclk, as stated in CS42L51 datasheet. Otherwise the codec may produce some noise after the end of DAPM power down sequence. The delay between DAC and CLOCK_SUPPLY widgets is too short. Add a delay in mclk shutdown request to manage the shutdown delay explicitly. From experiments, at least 10ms delay is necessary. Set delay to 20ms as recommended in Documentation/timers/timers-howto.rst when using msleep().
Signed-off-by: Olivier Moysan olivier.moysan@st.com Link: https://lore.kernel.org/r/20201020150109.482-1-olivier.moysan@st.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/cs42l51.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 764f2ef8f59df..2b617993b0adb 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -245,8 +245,28 @@ static const struct snd_soc_dapm_widget cs42l51_dapm_widgets[] = { &cs42l51_adcr_mux_controls), };
+static int mclk_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm); + struct cs42l51_private *cs42l51 = snd_soc_component_get_drvdata(comp); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + return clk_prepare_enable(cs42l51->mclk_handle); + case SND_SOC_DAPM_POST_PMD: + /* Delay mclk shutdown to fulfill power-down sequence requirements */ + msleep(20); + clk_disable_unprepare(cs42l51->mclk_handle); + break; + } + + return 0; +} + static const struct snd_soc_dapm_widget cs42l51_dapm_mclk_widgets[] = { - SND_SOC_DAPM_CLOCK_SUPPLY("MCLK") + SND_SOC_DAPM_SUPPLY("MCLK", SND_SOC_NOPM, 0, 0, mclk_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), };
static const struct snd_soc_dapm_route cs42l51_routes[] = {
From: Bard Liao yung-chuan.liao@linux.intel.com
[ Upstream commit 6e5329c6e6032cd997400b43b8299f607a61883e ]
Do not emit a warning for extended firmware header fields that are not used by kernel. This creates unnecessary noise to kernel logs like:
sof-audio-pci 0000:00:1f.3: warning: unknown ext header type 3 size 0x1c sof-audio-pci 0000:00:1f.3: warning: unknown ext header type 4 size 0x10
Signed-off-by: Bard Liao yung-chuan.liao@linux.intel.com Reviewed-by: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com Reviewed-by: Ranjani Sridharan ranjani.sridharan@linux.intel.com Reviewed-by: Guennadi Liakhovetski guennadi.liakhovetski@linux.intel.com Signed-off-by: Kai Vehmanen kai.vehmanen@linux.intel.com Link: https://lore.kernel.org/r/20201021182419.1160391-1-kai.vehmanen@linux.intel.... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/sof/loader.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index b94fa5f5d4808..c90c3f3a3b3ee 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -118,6 +118,11 @@ int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 bar, u32 offset) case SOF_IPC_EXT_CC_INFO: ret = get_cc_info(sdev, ext_hdr); break; + case SOF_IPC_EXT_UNUSED: + case SOF_IPC_EXT_PROBE_INFO: + case SOF_IPC_EXT_USER_ABI_INFO: + /* They are supported but we don't do anything here */ + break; default: dev_warn(sdev->dev, "warning: unknown ext header type %d size 0x%x\n", ext_hdr->type, ext_hdr->hdr.size);
From: Heikki Krogerus heikki.krogerus@linux.intel.com
[ Upstream commit 1384ab4fee12c4c4f8bd37bc9f8686881587b286 ]
This patch adds the necessary PCI ID for Intel Alder Lake-S devices.
Signed-off-by: Heikki Krogerus heikki.krogerus@linux.intel.com Signed-off-by: Felipe Balbi balbi@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 242b6210380a4..bae6a70664c80 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -40,6 +40,7 @@ #define PCI_DEVICE_ID_INTEL_TGPLP 0xa0ee #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee #define PCI_DEVICE_ID_INTEL_JSP 0x4dee +#define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
#define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 @@ -367,6 +368,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_JSP), (kernel_ulong_t) &dwc3_pci_intel_properties, },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), + (kernel_ulong_t) &dwc3_pci_intel_properties, }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB), (kernel_ulong_t) &dwc3_pci_amd_properties, }, { } /* Terminating Entry */
From: Viresh Kumar viresh.kumar@linaro.org
[ Upstream commit e0df59de670b48a923246fae1f972317b84b2764 ]
There is a lot of stuff here which can be done outside of the big opp_table_lock, do that. This helps avoiding few circular dependency lockdeps around debugfs and interconnects.
Reported-by: Rob Clark robdclark@gmail.com Reported-by: Dmitry Osipenko digetx@gmail.com Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/opp/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 1a95ad40795be..a963df7bd2749 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1160,6 +1160,10 @@ static void _opp_table_kref_release(struct kref *kref) struct opp_device *opp_dev, *temp; int i;
+ /* Drop the lock as soon as we can */ + list_del(&opp_table->node); + mutex_unlock(&opp_table_lock); + _of_clear_opp_table(opp_table);
/* Release clk */ @@ -1187,10 +1191,7 @@ static void _opp_table_kref_release(struct kref *kref)
mutex_destroy(&opp_table->genpd_virt_dev_lock); mutex_destroy(&opp_table->lock); - list_del(&opp_table->node); kfree(opp_table); - - mutex_unlock(&opp_table_lock); }
void dev_pm_opp_put_opp_table(struct opp_table *opp_table)
From: Evgeny Novikov novikov@ispras.ru
[ Upstream commit 0d66e04875c5aae876cf3d4f4be7978fa2b00523 ]
goku_probe() goes to error label "err" and invokes goku_remove() in case of failures of pci_enable_device(), pci_resource_start() and ioremap(). goku_remove() gets a device from pci_get_drvdata(pdev) and works with it without any checks, in particular it dereferences a corresponding pointer. But goku_probe() did not set this device yet. So, one can expect various crashes. The patch moves setting the device just after allocation of memory for it.
Found by Linux Driver Verification project (linuxtesting.org).
Reported-by: Pavel Andrianov andrianov@ispras.ru Signed-off-by: Evgeny Novikov novikov@ispras.ru Signed-off-by: Felipe Balbi balbi@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/gadget/udc/goku_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c index 25c1d6ab5adb4..3e1267d38774f 100644 --- a/drivers/usb/gadget/udc/goku_udc.c +++ b/drivers/usb/gadget/udc/goku_udc.c @@ -1760,6 +1760,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; }
+ pci_set_drvdata(pdev, dev); spin_lock_init(&dev->lock); dev->pdev = pdev; dev->gadget.ops = &goku_ops; @@ -1793,7 +1794,6 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } dev->regs = (struct goku_udc_regs __iomem *) base;
- pci_set_drvdata(pdev, dev); INFO(dev, "%s\n", driver_desc); INFO(dev, "version: " DRIVER_VERSION " %s\n", dmastr()); INFO(dev, "irq %d, pci mem %p\n", pdev->irq, base);
From: Zqiang qiang.zhang@windriver.com
[ Upstream commit 129aa9734559a17990ee933351c7b6956f1dba62 ]
When fetch 'event' from event queue, after copy its address space content to user space, the 'event' the memory space pointed to by the 'event' pointer need be freed.
BUG: memory leak unreferenced object 0xffff888110622660 (size 32): comm "softirq", pid 0, jiffies 4294941981 (age 12.480s) hex dump (first 32 bytes): 02 00 00 00 08 00 00 00 80 06 00 01 00 00 40 00 ..............@. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000efd29abd>] kmalloc include/linux/slab.h:554 [inline] [<00000000efd29abd>] raw_event_queue_add drivers/usb/gadget/legacy/raw_gadget.c:66 [inline] [<00000000efd29abd>] raw_queue_event drivers/usb/gadget/legacy/raw_gadget.c:225 [inline] [<00000000efd29abd>] gadget_setup+0xf6/0x220 drivers/usb/gadget/legacy/raw_gadget.c:343 [<00000000952c4a46>] dummy_timer+0xb9f/0x14c0 drivers/usb/gadget/udc/dummy_hcd.c:1899 [<0000000074ac2c54>] call_timer_fn+0x38/0x200 kernel/time/timer.c:1415 [<00000000560a3a79>] expire_timers kernel/time/timer.c:1460 [inline] [<00000000560a3a79>] __run_timers.part.0+0x319/0x400 kernel/time/timer.c:1757 [<000000009d9503d0>] __run_timers kernel/time/timer.c:1738 [inline] [<000000009d9503d0>] run_timer_softirq+0x3d/0x80 kernel/time/timer.c:1770 [<000000009df27c89>] __do_softirq+0xcc/0x2c2 kernel/softirq.c:298 [<000000007a3f1a47>] asm_call_irq_on_stack+0xf/0x20 [<000000004a62cc2e>] __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline] [<000000004a62cc2e>] run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline] [<000000004a62cc2e>] do_softirq_own_stack+0x32/0x40 arch/x86/kernel/irq_64.c:77 [<00000000b0086800>] invoke_softirq kernel/softirq.c:393 [inline] [<00000000b0086800>] __irq_exit_rcu kernel/softirq.c:423 [inline] [<00000000b0086800>] irq_exit_rcu+0x91/0xc0 kernel/softirq.c:435 [<00000000175f9523>] sysvec_apic_timer_interrupt+0x36/0x80 arch/x86/kernel/apic/apic.c:1091 [<00000000a348e847>] asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:631 [<0000000060661100>] native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline] [<0000000060661100>] arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline] [<0000000060661100>] acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] [<0000000060661100>] acpi_idle_do_entry+0xc3/0xd0 drivers/acpi/processor_idle.c:517 [<000000003f413b99>] acpi_idle_enter+0x128/0x1f0 drivers/acpi/processor_idle.c:648 [<00000000f5e5afb8>] cpuidle_enter_state+0xc9/0x650 drivers/cpuidle/cpuidle.c:237 [<00000000d50d51fc>] cpuidle_enter+0x29/0x40 drivers/cpuidle/cpuidle.c:351 [<00000000d674baed>] call_cpuidle kernel/sched/idle.c:132 [inline] [<00000000d674baed>] cpuidle_idle_call kernel/sched/idle.c:213 [inline] [<00000000d674baed>] do_idle+0x1c8/0x250 kernel/sched/idle.c:273
Reported-by: syzbot+bd38200f53df6259e6bf@syzkaller.appspotmail.com Signed-off-by: Zqiang qiang.zhang@windriver.com Signed-off-by: Felipe Balbi balbi@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/gadget/legacy/raw_gadget.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index e01e366d89cd5..062dfac303996 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -564,9 +564,12 @@ static int raw_ioctl_event_fetch(struct raw_dev *dev, unsigned long value) return -ENODEV; } length = min(arg.length, event->length); - if (copy_to_user((void __user *)value, event, sizeof(*event) + length)) + if (copy_to_user((void __user *)value, event, sizeof(*event) + length)) { + kfree(event); return -EFAULT; + }
+ kfree(event); return 0; }
From: Colin Ian King colin.king@canonical.com
[ Upstream commit e3e40312567087fbe6880f316cb2b0e1f3d8a82c ]
More recent libc implementations are now using openat/openat2 system calls so also add do_sys_openat2 to the tracing so that the test passes on these systems because do_sys_open may not be called.
Thanks to Masami Hiramatsu for the help on getting this fix to work correctly.
Signed-off-by: Colin Ian King colin.king@canonical.com Acked-by: Masami Hiramatsu mhiramat@kernel.org Acked-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a30a9c07290d0..d25d01a197781 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '[u]<offset>' || exit_unsupported :;: "user-memory access syntax and ustring working on user memory";: echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ > kprobe_events +echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \ + >> kprobe_events
grep myevent kprobe_events | \ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' echo 1 > events/kprobes/myevent/enable +echo 1 > events/kprobes/myevent2/enable echo > /dev/null echo 0 > events/kprobes/myevent/enable +echo 0 > events/kprobes/myevent2/enable
grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
From: Tommi Rantala tommi.t.rantala@nokia.com
[ Upstream commit 1948172fdba5ad643529ddcd00a601c0caa913ed ]
Drop unneeded <linux/wait.h> header inclusion to fix pidfd compilation errors seen in Fedora 32:
In file included from pidfd_open_test.c:9: ../../../../usr/include/linux/wait.h:17:16: error: expected identifier before numeric constant 17 | #define P_ALL 0 | ^
Signed-off-by: Tommi Rantala tommi.t.rantala@nokia.com Reviewed-by: Kees Cook keescook@chromium.org Acked-by: Christian Brauner christian.brauner@ubuntu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/pidfd/pidfd_open_test.c | 1 - tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 - 2 files changed, 2 deletions(-)
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e517..8a59438ccc78b 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include <inttypes.h> #include <limits.h> #include <linux/types.h> -#include <linux/wait.h> #include <sched.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe90..6108112753573 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/types.h> -#include <linux/wait.h> #include <poll.h> #include <signal.h> #include <stdbool.h>
From: Kai-Heng Feng kai.heng.feng@canonical.com
[ Upstream commit f5dac54d9d93826a776dffc848df76746f7135bb ]
Both pm_runtime_force_suspend() and pm_runtime_force_resume() have some implicit checks, so it can make code flow more straightforward if we separate runtime and system suspend callbacks.
High Definition Audio Specification, 4.5.9.3 Codec Wake From System S3 states that codec can wake the system up from S3 if WAKEEN is toggled. Since HDA controller has different wakeup settings for runtime and system susend, we also need to explicitly disable direct-complete which can be enabled automatically by PCI core. In addition to that, avoid waking up codec if runtime resume is for system suspend, to not break direct-complete for codecs.
While at it, also remove AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP, as the original bug commit a6630529aecb ("ALSA: hda: Workaround for spurious wakeups on some Intel platforms") solves doesn't happen with this patch.
Signed-off-by: Kai-Heng Feng kai.heng.feng@canonical.com Link: https://lore.kernel.org/r/20201027130038.16463-3-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/pci/hda/hda_controller.h | 3 +- sound/pci/hda/hda_intel.c | 62 +++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index be63ead8161f8..68f9668788ea2 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -41,7 +41,7 @@ /* 24 unused */ #define AZX_DCAPS_COUNT_LPIB_DELAY (1 << 25) /* Take LPIB as delay */ #define AZX_DCAPS_PM_RUNTIME (1 << 26) /* runtime PM support */ -#define AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP (1 << 27) /* Workaround for spurious wakeups after suspend */ +/* 27 unused */ #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28) /* CORBRP clears itself after reset */ #define AZX_DCAPS_NO_MSI64 (1 << 29) /* Stick to 32-bit MSIs */ #define AZX_DCAPS_SEPARATE_STREAM_TAG (1 << 30) /* capture and playback use separate stream tag */ @@ -143,6 +143,7 @@ struct azx { unsigned int align_buffer_size:1; unsigned int region_requested:1; unsigned int disabled:1; /* disabled by vga_switcheroo */ + unsigned int pm_prepared:1;
/* GTS present */ unsigned int gts_present:1; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 476a8b871daa1..268e9ead9795f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -297,8 +297,7 @@ enum { /* PCH for HSW/BDW; with runtime PM */ /* no i915 binding for this as HSW/BDW has another controller for HDMI */ #define AZX_DCAPS_INTEL_PCH \ - (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\ - AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP) + (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME)
/* HSW HDMI */ #define AZX_DCAPS_INTEL_HASWELL \ @@ -984,7 +983,7 @@ static void __azx_runtime_suspend(struct azx *chip) display_power(chip, false); }
-static void __azx_runtime_resume(struct azx *chip, bool from_rt) +static void __azx_runtime_resume(struct azx *chip) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); struct hdac_bus *bus = azx_bus(chip); @@ -1001,7 +1000,8 @@ static void __azx_runtime_resume(struct azx *chip, bool from_rt) azx_init_pci(chip); hda_intel_init_chip(chip, true);
- if (from_rt) { + /* Avoid codec resume if runtime resume is for system suspend */ + if (!chip->pm_prepared) { list_for_each_codec(codec, &chip->bus) { if (codec->relaxed_resume) continue; @@ -1017,6 +1017,29 @@ static void __azx_runtime_resume(struct azx *chip, bool from_rt) }
#ifdef CONFIG_PM_SLEEP +static int azx_prepare(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip; + + chip = card->private_data; + chip->pm_prepared = 1; + + /* HDA controller always requires different WAKEEN for runtime suspend + * and system suspend, so don't use direct-complete here. + */ + return 0; +} + +static void azx_complete(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip; + + chip = card->private_data; + chip->pm_prepared = 0; +} + static int azx_suspend(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); @@ -1028,15 +1051,7 @@ static int azx_suspend(struct device *dev)
chip = card->private_data; bus = azx_bus(chip); - snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); - /* An ugly workaround: direct call of __azx_runtime_suspend() and - * __azx_runtime_resume() for old Intel platforms that suffer from - * spurious wakeups after S3 suspend - */ - if (chip->driver_caps & AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP) - __azx_runtime_suspend(chip); - else - pm_runtime_force_suspend(dev); + __azx_runtime_suspend(chip); if (bus->irq >= 0) { free_irq(bus->irq, chip); bus->irq = -1; @@ -1065,11 +1080,7 @@ static int azx_resume(struct device *dev) if (azx_acquire_irq(chip, 1) < 0) return -EIO;
- if (chip->driver_caps & AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP) - __azx_runtime_resume(chip, false); - else - pm_runtime_force_resume(dev); - snd_power_change_state(card, SNDRV_CTL_POWER_D0); + __azx_runtime_resume(chip);
trace_azx_resume(chip); return 0; @@ -1117,10 +1128,7 @@ static int azx_runtime_suspend(struct device *dev) chip = card->private_data;
/* enable controller wake up event */ - if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0) { - azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) | - STATESTS_INT_MASK); - } + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) | STATESTS_INT_MASK);
__azx_runtime_suspend(chip); trace_azx_runtime_suspend(chip); @@ -1131,18 +1139,14 @@ static int azx_runtime_resume(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct azx *chip; - bool from_rt = snd_power_get_state(card) == SNDRV_CTL_POWER_D0;
if (!azx_is_pm_ready(card)) return 0; chip = card->private_data; - __azx_runtime_resume(chip, from_rt); + __azx_runtime_resume(chip);
/* disable controller Wake Up event*/ - if (from_rt) { - azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) & - ~STATESTS_INT_MASK); - } + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) & ~STATESTS_INT_MASK);
trace_azx_runtime_resume(chip); return 0; @@ -1176,6 +1180,8 @@ static int azx_runtime_idle(struct device *dev) static const struct dev_pm_ops azx_pm = { SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) #ifdef CONFIG_PM_SLEEP + .prepare = azx_prepare, + .complete = azx_complete, .freeze_noirq = azx_freeze_noirq, .thaw_noirq = azx_thaw_noirq, #endif
From: Kai-Heng Feng kai.heng.feng@canonical.com
[ Upstream commit 9fc149c3bce7bdbb94948a8e6bd025e3b3538603 ]
The broken jack detection should be fixed by commit a6e7d0a4bdb0 ("ALSA: hda: fix jack detection with Realtek codecs when in D3"), let's try enabling runtime PM by default again.
Signed-off-by: Kai-Heng Feng kai.heng.feng@canonical.com Link: https://lore.kernel.org/r/20201027130038.16463-4-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 268e9ead9795f..0ae0290eb2bfd 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2361,6 +2361,7 @@ static int azx_probe_continue(struct azx *chip)
if (azx_has_pm_runtime(chip)) { pm_runtime_use_autosuspend(&pci->dev); + pm_runtime_allow(&pci->dev); pm_runtime_put_autosuspend(&pci->dev); }
From: Joerg Roedel jroedel@suse.de
[ Upstream commit 3ad84246a4097010f3ae3d6944120c0be00e9e7a ]
Introduce sev_status and initialize it together with sme_me_mask to have an indicator which SEV features are enabled.
Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Tom Lendacky thomas.lendacky@amd.com Link: https://lkml.kernel.org/r/20201028164659.27002-2-joro@8bytes.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/boot/compressed/mem_encrypt.S | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index dd07e7b41b115..3092ae173f94e 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -81,6 +81,19 @@ SYM_FUNC_START(set_sev_encryption_mask)
bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
+ /* + * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in + * get_sev_encryption_bit() because this function is 32-bit code and + * shared between 64-bit and 32-bit boot path. + */ + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + + /* Store MSR value in sev_status */ + shlq $32, %rdx + orq %rdx, %rax + movq %rax, sev_status(%rip) + .Lno_sev_mask: movq %rbp, %rsp /* Restore original stack pointer */
@@ -96,5 +109,6 @@ SYM_FUNC_END(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 -SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sev_status, .quad 0) #endif
From: Bob Peterson rpeterso@redhat.com
[ Upstream commit d0f17d3883f1e3f085d38572c2ea8edbd5150172 ]
Function gfs2_clear_rgrpd calls kfree(rgd->rd_bits) before calling return_all_reservations, but return_all_reservations still dereferences rgd->rd_bits in __rs_deltree. Fix that by moving the call to kfree below the call to return_all_reservations.
Signed-off-by: Bob Peterson rpeterso@redhat.com Signed-off-by: Andreas Gruenbacher agruenba@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 1bba5a9d45fa3..1d65db1b3914a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -719,9 +719,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) }
gfs2_free_clones(rgd); + return_all_reservations(rgd); kfree(rgd->rd_bits); rgd->rd_bits = NULL; - return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } }
From: Bob Peterson rpeterso@redhat.com
[ Upstream commit a9dd945ccef07a904e412f208f8de708a3d7159e ]
Gfs2 creates an address space for its rgrps called sd_aspace, but it never called truncate_inode_pages_final on it. This confused vfs greatly which tried to reference the address space after gfs2 had freed the superblock that contained it.
This patch adds a call to truncate_inode_pages_final for sd_aspace, thus avoiding the use-after-free.
Signed-off-by: Bob Peterson rpeterso@redhat.com Signed-off-by: Andreas Gruenbacher agruenba@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/gfs2/super.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 32ae1a7cdaed8..831f6e31d6821 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -732,6 +732,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp);
From: Bob Peterson rpeterso@redhat.com
[ Upstream commit c5c68724696e7d2f8db58a5fce3673208d35c485 ]
Before this patch, gfs2_fitrim was not properly checking for a "live" file system. If the file system had something to trim and the file system was read-only (or spectator) it would start the trim, but when it starts the transaction, gfs2_trans_begin returns -EROFS (read-only file system) and it errors out. However, if the file system was already trimmed so there's no work to do, it never called gfs2_trans_begin. That code is bypassed so it never returns the error. Instead, it returns a good return code with 0 work. All this makes for inconsistent behavior: The same fstrim command can return -EROFS in one case and 0 in another. This tripped up xfstests generic/537 which reports the error as:
+fstrim with unrecovered metadata just ate your filesystem
This patch adds a check for a "live" (iow, active journal, iow, RW) file system, and if not, returns the error properly.
Signed-off-by: Bob Peterson rpeterso@redhat.com Signed-off-by: Andreas Gruenbacher agruenba@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/gfs2/rgrp.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 1d65db1b3914a..ac306895bbbcc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1374,6 +1374,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + if (!blk_queue_discard(q)) return -EOPNOTSUPP;
From: Keita Suzuki keitasuzuki.park@sslab.ics.keio.ac.jp
[ Upstream commit af61bc1e33d2c0ec22612b46050f5b58ac56a962 ]
When hpsa_scsi_add_host() fails, h->lastlogicals is leaked since it is missing a free() in the error handler.
Fix this by adding free() when hpsa_scsi_add_host() fails.
Link: https://lore.kernel.org/r/20201027073125.14229-1-keitasuzuki.park@sslab.ics.... Tested-by: Don Brace don.brace@microchip.com Acked-by: Don Brace don.brace@microchip.com Signed-off-by: Keita Suzuki keitasuzuki.park@sslab.ics.keio.ac.jp Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/hpsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 48d5da59262b4..aed59ec20ad9e 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -8854,7 +8854,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* hook into SCSI subsystem */ rc = hpsa_scsi_add_host(h); if (rc) - goto clean7; /* perf, sg, cmd, irq, shost, pci, lu, aer/h */ + goto clean8; /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */
/* Monitor the controller for firmware lockups */ h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL; @@ -8869,6 +8869,8 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) HPSA_EVENT_MONITOR_INTERVAL); return 0;
+clean8: /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */ + kfree(h->lastlogicals); clean7: /* perf, sg, cmd, irq, shost, pci, lu, aer/h */ hpsa_free_performant_mode(h); h->access.set_intr_mask(h, HPSA_INTR_OFF);
From: Evan Quan evan.quan@amd.com
[ Upstream commit 253475c455eb5f8da34faa1af92709e7bb414624 ]
This can address the random SDMA hang after pci config reset seen on Hawaii.
Signed-off-by: Evan Quan evan.quan@amd.com Tested-by: Sandeep Raghuraman sandy.8925@gmail.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 20f108818b2b9..a3c3fe96515f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1071,22 +1071,19 @@ static int cik_sdma_soft_reset(void *handle) { u32 srbm_soft_reset = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 tmp = RREG32(mmSRBM_STATUS2); + u32 tmp;
- if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; - } - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; - } + /* sdma0 */ + tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; + + /* sdma1 */ + tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
if (srbm_soft_reset) { tmp = RREG32(mmSRBM_SOFT_RESET);
From: Evan Quan evan.quan@amd.com
[ Upstream commit c108725ef589af462be6b957f63c7925e38213eb ]
Correct some registers bitmasks and add mmBIOS_SCRATCH_7 reset.
Signed-off-by: Evan Quan evan.quan@amd.com Tested-by: Sandeep Raghuraman sandy.8925@gmail.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c index 3be40114e63d2..45f608838f6eb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ci_baco.c @@ -142,12 +142,12 @@ static const struct baco_cmd_entry exit_baco_tbl[] = { CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_BCLK_OFF_MASK, BACO_CNTL__BACO_BCLK_OFF__SHIFT, 0, 0x00 }, { CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0x00 }, { CMD_DELAY_MS, 0, 0, 0, 20, 0 }, - { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x20 }, + { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_BF_MASK, 0, 0xffffffff, 0x200 }, { CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ISO_DIS_MASK, BACO_CNTL__BACO_ISO_DIS__SHIFT, 0, 0x01 }, - { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c }, + { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__PWRGOOD_MASK, 0, 5, 0x1c00 }, { CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_ANA_ISO_DIS_MASK, BACO_CNTL__BACO_ANA_ISO_DIS__SHIFT, 0, 0x01 }, { CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_RESET_EN_MASK, BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0x00 }, - { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x10 }, + { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__RCU_BIF_CONFIG_DONE_MASK, 0, 5, 0x100 }, { CMD_READMODIFYWRITE, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0x00 }, { CMD_WAITFOR, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0x00 } }; @@ -155,6 +155,7 @@ static const struct baco_cmd_entry exit_baco_tbl[] = static const struct baco_cmd_entry clean_baco_tbl[] = { { CMD_WRITE, mmBIOS_SCRATCH_6, 0, 0, 0, 0 }, + { CMD_WRITE, mmBIOS_SCRATCH_7, 0, 0, 0, 0 }, { CMD_WRITE, mmCP_PFP_UCODE_ADDR, 0, 0, 0, 0 } };
From: Evan Quan evan.quan@amd.com
[ Upstream commit 277b080f98803cb73a83fb234f0be83a10e63958 ]
So that the succeeding resume can be performed based on a clean state.
Signed-off-by: Evan Quan evan.quan@amd.com Tested-by: Sandeep Raghuraman sandy.8925@gmail.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 4 ++++ drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 + drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 2 ++ .../gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 24 +++++++++++++++++++ drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 8 +++++++ 5 files changed, 39 insertions(+)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index fc63d9e32e1f8..c8ee931075e52 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1541,6 +1541,10 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to reset to default!", result = tmp_result);
+ tmp_result = smum_stop_smc(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to stop smc!", result = tmp_result); + tmp_result = smu7_force_switch_to_arbf0(hwmgr); PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to force to switch arbf0!", result = tmp_result); diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 15ed6cbdf3660..91cdc53472f01 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -229,6 +229,7 @@ struct pp_smumgr_func { bool (*is_hw_avfs_present)(struct pp_hwmgr *hwmgr); int (*update_dpm_settings)(struct pp_hwmgr *hwmgr, void *profile_setting); int (*smc_table_manager)(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t table_id, bool rw); /*rw: true for read, false for write */ + int (*stop_smc)(struct pp_hwmgr *hwmgr); };
struct pp_hwmgr_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index ad100b533d049..5f46f1a4f38ef 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -113,4 +113,6 @@ extern int smum_update_dpm_settings(struct pp_hwmgr *hwmgr, void *profile_settin
extern int smum_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t table_id, bool rw);
+extern int smum_stop_smc(struct pp_hwmgr *hwmgr); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index e4d1f3d66ef48..09128122b4932 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2939,6 +2939,29 @@ static int ci_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) return 0; }
+static void ci_reset_smc(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, + rst_reg, 1); +} + + +static void ci_stop_smc_clock(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, + ck_disable, 1); +} + +static int ci_stop_smc(struct pp_hwmgr *hwmgr) +{ + ci_reset_smc(hwmgr); + ci_stop_smc_clock(hwmgr); + + return 0; +} + const struct pp_smumgr_func ci_smu_funcs = { .name = "ci_smu", .smu_init = ci_smu_init, @@ -2964,4 +2987,5 @@ const struct pp_smumgr_func ci_smu_funcs = { .is_dpm_running = ci_is_dpm_running, .update_dpm_settings = ci_update_dpm_settings, .update_smc_table = ci_update_smc_table, + .stop_smc = ci_stop_smc, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index b6fb480668416..b6921db3c1305 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -245,3 +245,11 @@ int smum_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t tabl
return -EINVAL; } + +int smum_stop_smc(struct pp_hwmgr *hwmgr) +{ + if (hwmgr->smumgr_funcs->stop_smc) + return hwmgr->smumgr_funcs->stop_smc(hwmgr); + + return 0; +}
From: Evan Quan evan.quan@amd.com
[ Upstream commit 786436b453001dafe81025389f96bf9dac1e9690 ]
This reverts commit f87812284172a9809820d10143b573d833cd3f75 ("drm/amdgpu: Fix bug where DPM is not enabled after hibernate and resume"). It was intended to fix Hawaii S4(hibernation) issue but break S3. As ixFEATURE_STATUS is filled with garbage data on resume which can be only cleared by reloading smc firmware(but that will involve many changes). So, we will revert this S4 fix and seek a new way.
Signed-off-by: Evan Quan evan.quan@amd.com Tested-by: Sandeep Raghuraman sandy.8925@gmail.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 09128122b4932..329bf4d44bbce 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2726,10 +2726,7 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr *hwmgr)
static bool ci_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, - VOLTAGE_CONTROLLER_ON)) - ? true : false; + return ci_is_smc_ram_running(hwmgr); }
static int ci_smu_init(struct pp_hwmgr *hwmgr)
From: Johannes Berg johannes.berg@intel.com
[ Upstream commit 14f46c1e5108696ec1e5a129e838ecedf108c7bf ]
When ieee80211_skb_resize() is called from ieee80211_build_hdr() the skb has no 802.11 header yet, in fact it consist only of the payload as the ethernet frame is removed. As such, we're using the payload data for ieee80211_is_mgmt(), which is of course completely wrong. This didn't really hurt us because these are always data frames, so we could only have added more tailroom than we needed if we determined it was a management frame and sdata->crypto_tx_tailroom_needed_cnt was false.
However, syzbot found that of course there need not be any payload, so we're using at best uninitialized memory for the check.
Fix this to pass explicitly the kind of frame that we have instead of checking there, by replacing the "bool may_encrypt" argument with an argument that can carry the three possible states - it's not going to be encrypted, it's a management frame, or it's a data frame (and then we check sdata->crypto_tx_tailroom_needed_cnt).
Reported-by: syzbot+32fd1a1bfe355e93f1e2@syzkaller.appspotmail.com Signed-off-by: Johannes Berg johannes.berg@intel.com Link: https://lore.kernel.org/r/20201009132538.e1fd7f802947.I799b288466ea2815f9d4c... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/tx.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 282b0bc201eeb..b82d6cfd6aaf4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1938,19 +1938,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
/* device xmit handlers */
+enum ieee80211_encrypt { + ENCRYPT_NO, + ENCRYPT_MGMT, + ENCRYPT_DATA, +}; + static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - int head_need, bool may_encrypt) + int head_need, + enum ieee80211_encrypt encrypt) { struct ieee80211_local *local = sdata->local; - struct ieee80211_hdr *hdr; bool enc_tailroom; int tail_need = 0;
- hdr = (struct ieee80211_hdr *) skb->data; - enc_tailroom = may_encrypt && - (sdata->crypto_tx_tailroom_needed_cnt || - ieee80211_is_mgmt(hdr->frame_control)); + enc_tailroom = encrypt == ENCRYPT_MGMT || + (encrypt == ENCRYPT_DATA && + sdata->crypto_tx_tailroom_needed_cnt);
if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; @@ -1981,23 +1986,29 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; int headroom; - bool may_encrypt; + enum ieee80211_encrypt encrypt;
- may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT); + if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT) + encrypt = ENCRYPT_NO; + else if (ieee80211_is_mgmt(hdr->frame_control)) + encrypt = ENCRYPT_MGMT; + else + encrypt = ENCRYPT_DATA;
headroom = local->tx_headroom; - if (may_encrypt) + if (encrypt != ENCRYPT_NO) headroom += sdata->encrypt_headroom; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom);
- if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { + if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) { ieee80211_free_txskb(&local->hw, skb); return; }
+ /* reload after potential resize */ hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif;
@@ -2822,7 +2833,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, head_need += sdata->encrypt_headroom; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(sdata, skb, head_need, true)) { + if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) { ieee80211_free_txskb(&local->hw, skb); skb = NULL; return ERR_PTR(-ENOMEM); @@ -3496,7 +3507,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (unlikely(ieee80211_skb_resize(sdata, skb, max_t(int, extra_head + hw_headroom - skb_headroom(skb), 0), - false))) { + ENCRYPT_NO))) { kfree_skb(skb); return true; }
From: Johannes Berg johannes.berg@intel.com
[ Upstream commit 9bdaf3b91efd229dd272b228e13df10310c80d19 ]
There's a race condition in the netdev registration in that NETDEV_REGISTER actually happens after the netdev is available, and so if we initialize things only there, we might get called with an uninitialized wdev through nl80211 - not using a wdev but using a netdev interface index.
I found this while looking into a syzbot report, but it doesn't really seem to be related, and unfortunately there's no repro for it (yet). I can't (yet) explain how it managed to get into cfg80211_release_pmsr() from nl80211_netlink_notify() without the wdev having been initialized, as the latter only iterates the wdevs that are linked into the rdev, which even without the change here happened after init.
However, looking at this, it seems fairly clear that the init needs to be done earlier, otherwise we might even re-init on a netns move, when data might still be pending.
Signed-off-by: Johannes Berg johannes.berg@intel.com Link: https://lore.kernel.org/r/20201009135821.fdcbba3aad65.Ie9201d91dbcb7da323188... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/wireless/core.c | 57 +++++++++++++++++++++++------------------- net/wireless/core.h | 5 ++-- net/wireless/nl80211.c | 3 ++- 3 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/net/wireless/core.c b/net/wireless/core.c index 354b0ccbdc240..e025493171262 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1248,8 +1248,7 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, } EXPORT_SYMBOL(cfg80211_stop_iface);
-void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void cfg80211_init_wdev(struct wireless_dev *wdev) { mutex_init(&wdev->mtx); INIT_LIST_HEAD(&wdev->event_list); @@ -1260,6 +1259,30 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
+#ifdef CONFIG_CFG80211_WEXT + wdev->wext.default_key = -1; + wdev->wext.default_mgmt_key = -1; + wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; +#endif + + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) + wdev->ps = true; + else + wdev->ps = false; + /* allow mac80211 to determine the timeout */ + wdev->ps_timeout = -1; + + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || + wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) + wdev->netdev->priv_flags |= IFF_DONT_BRIDGE; + + INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk); +} + +void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ /* * We get here also when the interface changes network namespaces, * as it's registered into the new one, but we don't want it to @@ -1293,6 +1316,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, switch (state) { case NETDEV_POST_INIT: SET_NETDEV_DEVTYPE(dev, &wiphy_type); + wdev->netdev = dev; + /* can only change netns with wiphy */ + dev->features |= NETIF_F_NETNS_LOCAL; + + cfg80211_init_wdev(wdev); break; case NETDEV_REGISTER: /* @@ -1300,35 +1328,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * called within code protected by it when interfaces * are added with nl80211. */ - /* can only change netns with wiphy */ - dev->features |= NETIF_F_NETNS_LOCAL; - if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, "phy80211")) { pr_err("failed to add phy80211 symlink to netdev!\n"); } - wdev->netdev = dev; -#ifdef CONFIG_CFG80211_WEXT - wdev->wext.default_key = -1; - wdev->wext.default_mgmt_key = -1; - wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; -#endif - - if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) - wdev->ps = true; - else - wdev->ps = false; - /* allow mac80211 to determine the timeout */ - wdev->ps_timeout = -1; - - if ((wdev->iftype == NL80211_IFTYPE_STATION || - wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || - wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) - dev->priv_flags |= IFF_DONT_BRIDGE; - - INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk);
- cfg80211_init_wdev(rdev, wdev); + cfg80211_register_wdev(rdev, wdev); break; case NETDEV_GOING_DOWN: cfg80211_leave(rdev, wdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index 67b0389fca4dc..8cd4a9793298e 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -208,8 +208,9 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net);
-void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev); +void cfg80211_init_wdev(struct wireless_dev *wdev); +void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev);
static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e14307f2bddcc..8eb43c47e582a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3801,7 +3801,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ - cfg80211_init_wdev(rdev, wdev); + cfg80211_init_wdev(wdev); + cfg80211_register_wdev(rdev, wdev); break; default: break;
From: Johannes Berg johannes.berg@intel.com
[ Upstream commit dcd479e10a0510522a5d88b29b8f79ea3467d501 ]
When (for example) an IBSS station is pre-moved to AUTHORIZED before it's inserted, and then the insertion fails, we don't clean up the fast RX/TX states that might already have been created, since we don't go through all the state transitions again on the way down.
Do that, if it hasn't been done already, when the station is freed. I considered only freeing the fast TX/RX state there, but we might add more state so it's more robust to wind down the state properly.
Note that we warn if the station was ever inserted, it should have been properly cleaned up in that case, and the driver will probably not like things happening out of order.
Reported-by: syzbot+2e293dbd67de2836ba42@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201009141710.7223b322a955.I95bd08b9ad0e039c03492... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/sta_info.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index fb4f2b9b294f0..4fe284ff1ea3d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -258,6 +258,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + /* + * If we had used sta_info_pre_move_state() then we might not + * have gone through the state transitions down again, so do + * it here now (and warn if it's inserted). + * + * This will clear state such as fast TX/RX that may have been + * allocated during state transitions. + */ + while (sta->sta_state > IEEE80211_STA_NONE) { + int ret; + + WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); + + ret = sta_info_move_state(sta, sta->sta_state - 1); + if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) + break; + } + if (sta->rate_ctrl) rate_control_free_sta(sta);
From: Ye Bin yebin10@huawei.com
[ Upstream commit db18d20d1cb0fde16d518fb5ccd38679f174bc04 ]
Fix follow warning: [net/wireless/reg.c:3619]: (warning) %d in format string (no. 2) requires 'int' but the argument type is 'unsigned int'.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Ye Bin yebin10@huawei.com Link: https://lore.kernel.org/r/20201009070215.63695-1-yebin10@huawei.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d8a90d3974235..763a45655ac21 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3411,7 +3411,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) power_rule = ®_rule->power_rule;
if (reg_rule->flags & NL80211_RRF_AUTO_BW) - snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO", + snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO", freq_range->max_bandwidth_khz, reg_get_max_bandwidth(rd, reg_rule)); else
From: "Jason A. Donenfeld" Jason@zx2c4.com
[ Upstream commit af8afcf1fdd5f365f70e2386c2d8c7a1abd853d7 ]
If netfilter changes the packet mark, the packet is rerouted. The ip_route_me_harder family of functions fails to use the right sk, opting to instead use skb->sk, resulting in a routing loop when used with tunnels. With the next change fixing this issue in netfilter, test for the relevant condition inside our test suite, since wireguard was where the bug was discovered.
Reported-by: Chen Minqiang ptpt52@gmail.com Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/wireguard/netns.sh | 8 ++++++++ tools/testing/selftests/wireguard/qemu/kernel.config | 2 ++ 2 files changed, 10 insertions(+)
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index d77f4829f1e07..74c69b75f6f5a 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -316,6 +316,14 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + # Test that onion routing works, even when it loops n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 ip1 addr add 192.168.242.1/24 dev wg0 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index d531de13c95b0..4eecb432a66c1 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -18,10 +18,12 @@ CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_NAT=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y
Note that this requires https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... And that commit should be backported to every kernel ever, since the bug is so old.
On Tue, Nov 10, 2020 at 01:29:41PM +0100, Jason A. Donenfeld wrote:
Note that this requires https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... And that commit should be backported to every kernel ever, since the bug is so old.
Sasha queued this up to 5.4.y and 5.9.y, but it looks like it doesn't easily apply to older kernels. If you think that it's needed beyond that, I'll gladly take backported patches.
thanks,
greg k-h
On Tue, Nov 10, 2020 at 6:20 PM Greg KH greg@kroah.com wrote:
On Tue, Nov 10, 2020 at 01:29:41PM +0100, Jason A. Donenfeld wrote:
Note that this requires https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... And that commit should be backported to every kernel ever, since the bug is so old.
Sasha queued this up to 5.4.y and 5.9.y, but it looks like it doesn't easily apply to older kernels. If you think that it's needed beyond that, I'll gladly take backported patches.
Backport to older kernels coming your way shortly.
[ Upstream commit 46d6c5ae953cc0be38efd0e469284df7c4328cf8 ]
If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit():
/* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing.
One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes.
So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit().
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Reviewed-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org [Jason: backported to 4.19 from Sasha's 5.4 backport] Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com --- include/linux/netfilter_ipv4.h | 2 +- include/linux/netfilter_ipv6.h | 2 +- net/ipv4/netfilter.c | 12 +++++++----- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_reject_ipv4.c | 2 +- net/ipv4/netfilter/nft_chain_route_ipv4.c | 2 +- net/ipv6/netfilter.c | 6 +++--- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 2 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- 13 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 95ab5cc64422..45ff1330b339 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; };
-int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
struct nf_queue_entry;
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index c0dc4dd78887..47a2de582f57 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -36,7 +36,7 @@ struct nf_ipv6_ops { };
#ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 8d2e5dc9a827..3d670d5aea34 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,17 +17,19 @@ #include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type) +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - const struct sock *sk = skb_to_full_sk(skb); - __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0; + __u8 flags; struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len;
+ sk = sk_to_full_sk(sk); + flags = sk ? inet_sk_flowi_flags(sk) : 0; + if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) @@ -91,8 +93,8 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); + return ip_route_me_harder(entry->state.net, entry->state.sk, + skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 690b17ef6a44..d64b1ef43c10 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -54,7 +54,7 @@ synproxy_send_tcp(struct net *net,
skb_dst_set_noref(nskb, skb_dst(skb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb;
if (nfct) { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dea138ca8925..0829f46ddfdd 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -65,7 +65,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 6115bf1ff6f0..6a27766b7d0f 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -329,7 +329,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 5cd06ba3535d..4996db1f64a1 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -129,7 +129,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
- if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb;
niph = ip_hdr(nskb); diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 7d82934c46f4..61003768e52b 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -50,7 +50,7 @@ static unsigned int nf_route_table_hook(void *priv, iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0b1f3e927b..5679fa3f696a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -17,10 +17,10 @@ #include <net/xfrm.h> #include <net/netfilter/nf_queue.h>
-int ip6_route_me_harder(struct net *net, struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk = sk_to_full_sk(skb->sk); + struct sock *sk = sk_to_full_sk(sk_partial); unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -81,7 +81,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, skb); + return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index b0524b18c4fb..acba3757ff60 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -60,7 +60,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index ca6d38698b1a..2b6a3b27f670 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -352,7 +352,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index da3f1f8cb325..afe79cb46e63 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -52,7 +52,7 @@ static unsigned int nf_route_table_hook(void *priv, skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d5e4329579e2..acaeeaf81441 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -725,12 +725,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, struct dst_entry *dst = skb_dst(skb);
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && - ip6_route_me_harder(ipvs->net, skb) != 0) + ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0) + ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0) return 1;
return 0;
On Fri, Nov 13, 2020 at 11:49:36PM +0100, Jason A. Donenfeld wrote:
[ Upstream commit 46d6c5ae953cc0be38efd0e469284df7c4328cf8 ]
If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit():
/* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing.
One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes.
So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit().
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Reviewed-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org [Jason: backported to 4.19 from Sasha's 5.4 backport] Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com
Now queued up, thanks!
greg k-h
From: Qiujun Huang hqjagain@gmail.com
[ Upstream commit 906695e59324635c62b5ae59df111151a546ca66 ]
The array size is FTRACE_KSTACK_NESTING, so the index FTRACE_KSTACK_NESTING is illegal too. And fix two typos by the way.
Link: https://lkml.kernel.org/r/20201031085714.2147-1-hqjagain@gmail.com
Signed-off-by: Qiujun Huang hqjagain@gmail.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3e5de717df2f..410fedfbe5d6e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2611,7 +2611,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer - * to store the trace event for the tigger to use. It's recusive + * to store the trace event for the trigger to use. It's recursive * safe and will not be recorded anywhere. */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { @@ -2934,7 +2934,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
/* This should never happen. If it does, yell once and skip */ - if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING)) + if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) goto out;
/*
From: Keith Busch kbusch@kernel.org
[ Upstream commit 38210800bf66d7302da1bb5b624ad68638da1562 ]
Multiple CPUs may be mapped to the same hctx, allowing mulitple submission contexts to attempt commit_rqs(). We need to verify we're not writing the same doorbell value multiple times since that's a spec violation.
Revert commit 54b2fcee1db041a83b52b51752dade6090cf952f.
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1878596 Reported-by: "B.L. Jones" brandon.gustav@googlemail.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/pci.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8984796db0c80..a6af96aaa0eb7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -198,6 +198,7 @@ struct nvme_queue { u32 q_depth; u16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -455,11 +456,24 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; }
-static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) +/* + * Write sq tail if we are asked to, or if the next command would wrap. + */ +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) { + if (!write_sq) { + u16 next_tail = nvmeq->sq_tail + 1; + + if (next_tail == nvmeq->q_depth) + next_tail = 0; + if (next_tail != nvmeq->last_sq_tail) + return; + } + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; }
/** @@ -476,8 +490,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - if (write_sq) - nvme_write_sq_db(nvmeq); + nvme_write_sq_db(nvmeq, write_sq); spin_unlock(&nvmeq->sq_lock); }
@@ -486,7 +499,8 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data;
spin_lock(&nvmeq->sq_lock); - nvme_write_sq_db(nvmeq); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq, true); spin_unlock(&nvmeq->sq_lock); }
@@ -1496,6 +1510,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev;
nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
From: Vineet Gupta vgupta@synopsys.com
[ Upstream commit 3b57533b460c8dc22a432684b7e8d22571f34d2e ]
ARC HSDK platform stopped booting on released v5.10-rc1, getting stuck in startup of non master SMP cores.
This was bisected to upstream commit 7fef431be9c9ac25 "(mm/page_alloc: place pages to tail in __free_pages_core())" That commit itself is harmless, it just exposed a subtle assumption in our platform code (hence CC'ing linux-mm just as FYI in case some other arches / platforms trip on it).
The upstream commit is semantically disruptive as it reverses the order of page allocations (actually it can be good test for hardware verification to exercise different memory patterns altogether). For ARC HSDK platform that meant a remapped memory region (pertaining to unused Closely Coupled Memory) started getting used early for dynamice allocations, while not effectively remapped on all the cores, triggering memory error exception on those cores.
The fix is to move the CCM remapping from early platform code to to early core boot code. And while it is undesirable to riddle common boot code with platform quirks, there is no other way to do this since the faltering code involves setting up stack itself so even function calls are not allowed at that point.
If anyone is interested, all the gory details can be found at Link below.
Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/32 Cc: David Hildenbrand david@redhat.com Cc: linux-mm@kvack.org Signed-off-by: Vineet Gupta vgupta@synopsys.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arc/kernel/head.S | 17 ++++++++++++++++- arch/arc/plat-hsdk/platform.c | 17 ----------------- 2 files changed, 16 insertions(+), 18 deletions(-)
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 17fd1ed700cca..9152782444b55 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -67,7 +67,22 @@ sr r5, [ARC_REG_LPB_CTRL] 1: #endif /* CONFIG_ARC_LPB_DISABLE */ -#endif + + /* On HSDK, CCMs need to remapped super early */ +#ifdef CONFIG_ARC_SOC_HSDK + mov r6, 0x60000000 + lr r5, [ARC_REG_ICCM_BUILD] + breq r5, 0, 1f + sr r6, [ARC_REG_AUX_ICCM] +1: + lr r5, [ARC_REG_DCCM_BUILD] + breq r5, 0, 2f + sr r6, [ARC_REG_AUX_DCCM] +2: +#endif /* CONFIG_ARC_SOC_HSDK */ + +#endif /* CONFIG_ISA_ARCV2 */ + ; Config DSP_CTRL properly, so kernel may use integer multiply, ; multiply-accumulate, and divide operations DSP_EARLY_INIT diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 0b961a2a10b8e..22c9e2c9c0283 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -17,22 +17,6 @@ int arc_hsdk_axi_dmac_coherent __section(.data) = 0;
#define ARC_CCM_UNUSED_ADDR 0x60000000
-static void __init hsdk_init_per_cpu(unsigned int cpu) -{ - /* - * By default ICCM is mapped to 0x7z while this area is used for - * kernel virtual mappings, so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].iccm.sz) - write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR); - - /* - * By default DCCM is mapped to 0x8z while this area is used by kernel, - * so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].dccm.sz) - write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR); -}
#define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) @@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = { MACHINE_START(SIMULATION, "hsdk") .dt_compat = hsdk_compat, .init_early = hsdk_init_early, - .init_per_cpu = hsdk_init_per_cpu, MACHINE_END
From: Hannes Reinecke hare@suse.de
[ Upstream commit 5faf50e9e9fdc2117c61ff7e20da49cd6a29e0ca ]
alua_bus_detach() might be running concurrently with alua_rtpg_work(), so we might trip over h->sdev == NULL and call BUG_ON(). The correct way of handling it is to not set h->sdev to NULL in alua_bus_detach(), and call rcu_synchronize() before the final delete to ensure that all concurrent threads have left the critical section. Then we can get rid of the BUG_ON() and replace it with a simple if condition.
Link: https://lore.kernel.org/r/1600167537-12509-1-git-send-email-jitendra.khasdev... Link: https://lore.kernel.org/r/20200924104559.26753-1-hare@suse.de Cc: Brian Bunker brian@purestorage.com Acked-by: Brian Bunker brian@purestorage.com Tested-by: Jitendra Khasdev jitendra.khasdev@oracle.com Reviewed-by: Jitendra Khasdev jitendra.khasdev@oracle.com Signed-off-by: Hannes Reinecke hare@suse.de Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/device_handler/scsi_dh_alua.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index f32da0ca529e0..308bda2e9c000 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -658,8 +658,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) rcu_read_lock(); list_for_each_entry_rcu(h, &tmp_pg->dh_list, node) { - /* h->sdev should always be valid */ - BUG_ON(!h->sdev); + if (!h->sdev) + continue; h->sdev->access_state = desc[0]; } rcu_read_unlock(); @@ -705,7 +705,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) pg->expiry = 0; rcu_read_lock(); list_for_each_entry_rcu(h, &pg->dh_list, node) { - BUG_ON(!h->sdev); + if (!h->sdev) + continue; h->sdev->access_state = (pg->state & SCSI_ACCESS_STATE_MASK); if (pg->pref) @@ -1147,7 +1148,6 @@ static void alua_bus_detach(struct scsi_device *sdev) spin_lock(&h->pg_lock); pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); rcu_assign_pointer(h->pg, NULL); - h->sdev = NULL; spin_unlock(&h->pg_lock); if (pg) { spin_lock_irq(&pg->lock); @@ -1156,6 +1156,7 @@ static void alua_bus_detach(struct scsi_device *sdev) kref_put(&pg->kref, release_port_group); } sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); }
From: Sreekanth Reddy sreekanth.reddy@broadcom.com
[ Upstream commit 5feed64f9199ff90c4239971733f23f30aeb2484 ]
While reenabling the IRQ after irq poll there may be small time window where HBA firmware has posted some replies and raise the interrupts but driver has not received the interrupts. So we may observe I/O timeouts as the driver has not processed the replies as interrupts got missed while reenabling the IRQ.
To fix this issue the driver has to go for one more round of processing the reply descriptors from reply descriptor post queue after enabling the IRQ.
Link: https://lore.kernel.org/r/20201102072746.27410-1-sreekanth.reddy@broadcom.co... Reported-by: Tomas Henzl thenzl@redhat.com Reviewed-by: Tomas Henzl thenzl@redhat.com Signed-off-by: Sreekanth Reddy sreekanth.reddy@broadcom.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/mpt3sas/mpt3sas_base.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e86682dc34eca..87d05c1950870 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1742,6 +1742,13 @@ _base_irqpoll(struct irq_poll *irqpoll, int budget) reply_q->irq_poll_scheduled = false; reply_q->irq_line_enable = true; enable_irq(reply_q->os_irq); + /* + * Go for one more round of processing the + * reply descriptor post queue incase if HBA + * Firmware has posted some reply descriptors + * while reenabling the IRQ. + */ + _base_process_reply_queue(reply_q); }
return num_entries;
From: Chao Leng lengchao@huawei.com
[ Upstream commit 04800fbff4764ab7b32c49d19628605a5d4cb85c ]
Introduce sync io queues for some scenarios which just only need sync io queues not sync all queues.
Signed-off-by: Chao Leng lengchao@huawei.com Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 8 ++++++-- drivers/nvme/host/nvme.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 893e29624c16b..59040bab5d6fa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4641,8 +4641,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues);
- -void nvme_sync_queues(struct nvme_ctrl *ctrl) +void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns;
@@ -4650,7 +4649,12 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); +} +EXPORT_SYMBOL_GPL(nvme_sync_io_queues);
+void nvme_sync_queues(struct nvme_ctrl *ctrl) +{ + nvme_sync_io_queues(ctrl); if (ctrl->admin_q) blk_sync_queue(ctrl->admin_q); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2aaedfa43ed86..97fbd61191b33 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -602,6 +602,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl); +void nvme_sync_io_queues(struct nvme_ctrl *ctrl); void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
From: Chao Leng lengchao@huawei.com
[ Upstream commit 3017013dcc82a4862bd1e140f8b762cfc594008d ]
Now use teardown_lock to serialize for time out and tear down. This may cause abnormal: first cancel all request in tear down, then time out may complete the request again, but the request may already be freed or restarted.
To avoid race between time out and tear down, in tear down process, first we quiesce the queue, and then delete the timer and cancel the time out work for the queue. At the same time we need to delete teardown_lock.
Signed-off-by: Chao Leng lengchao@huawei.com Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/rdma.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 116902b1b2c34..18321c4517549 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -122,7 +122,6 @@ struct nvme_rdma_ctrl { struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl; - struct mutex teardown_lock; bool use_inline_data; u32 io_queues[HCTX_MAX_TYPES]; }; @@ -1010,8 +1009,8 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); if (ctrl->ctrl.admin_tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, @@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); - mutex_unlock(&ctrl->teardown_lock); }
static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); if (ctrl->ctrl.queue_count > 1) { nvme_start_freeze(&ctrl->ctrl); nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); if (ctrl->ctrl.tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, @@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); } - mutex_unlock(&ctrl->teardown_lock); }
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) @@ -1967,16 +1964,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; - struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- /* fence other contexts that may complete the command */ - mutex_lock(&ctrl->teardown_lock); nvme_rdma_stop_queue(queue); if (!blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&ctrl->teardown_lock); }
static enum blk_eh_timer_return @@ -2311,7 +2304,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return ERR_PTR(-ENOMEM); ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - mutex_init(&ctrl->teardown_lock);
if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid =
From: Chao Leng lengchao@huawei.com
[ Upstream commit d6f66210f4b1aa2f5944f0e34e0f8db44f499f92 ]
Now use teardown_lock to serialize for time out and tear down. This may cause abnormal: first cancel all request in tear down, then time out may complete the request again, but the request may already be freed or restarted.
To avoid race between time out and tear down, in tear down process, first we quiesce the queue, and then delete the timer and cancel the time out work for the queue. At the same time we need to delete teardown_lock.
Signed-off-by: Chao Leng lengchao@huawei.com Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d6a3e14873542..19f86ea547bbc 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -124,7 +124,6 @@ struct nvme_tcp_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl;
- struct mutex teardown_lock; struct work_struct err_work; struct delayed_work connect_work; struct nvme_tcp_request async_req; @@ -1886,8 +1885,8 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, @@ -1897,18 +1896,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); }
static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); if (ctrl->queue_count <= 1) - goto out; + return; blk_mq_quiesce_queue(ctrl->admin_q); nvme_start_freeze(ctrl); nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); if (ctrl->tagset) { blk_mq_tagset_busy_iter(ctrl->tagset, @@ -1918,8 +1916,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); -out: - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); }
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) @@ -2171,14 +2167,11 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
- /* fence other contexts that may complete the command */ - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); if (!blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); }
static enum blk_eh_timer_return @@ -2455,7 +2448,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, nvme_tcp_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); - mutex_init(&ctrl->teardown_lock);
if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid =
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit fdf58e02adecbef4c7cbb2073d8ea225e6fd5f26 ]
The request may be executed asynchronously, and rq->state may be changed to IDLE. To avoid repeated request completion, only MQ_RQ_COMPLETE of rq->state is checked in nvme_rdma_complete_timed_out. It is not safe, so need adding check IDLE for rq->state.
Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Chao Leng lengchao@huawei.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 18321c4517549..4361476031c40 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1966,7 +1966,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq) struct nvme_rdma_queue *queue = req->queue;
nvme_rdma_stop_queue(queue); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); }
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit 0a8a2c85b83589a5c10bc5564b796836bf4b4984 ]
The request may be executed asynchronously, and rq->state may be changed to IDLE. To avoid repeated request completion, only MQ_RQ_COMPLETE of rq->state is checked in nvme_tcp_complete_timed_out. It is not safe, so need adding check IDLE for rq->state.
Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Chao Leng lengchao@huawei.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 19f86ea547bbc..c0c33320fe659 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2168,7 +2168,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); }
From: Suravee Suthikulpanit suravee.suthikulpanit@amd.com
[ Upstream commit 73db2fc595f358460ce32bcaa3be1f0cce4a2db1 ]
Certain device drivers allocate IO queues on a per-cpu basis. On AMD EPYC platform, which can support up-to 256 cpu threads, this can exceed the current MAX_IRQ_PER_TABLE limit of 256, and result in the error message:
AMD-Vi: Failed to allocate IRTE
This has been observed with certain NVME devices.
AMD IOMMU hardware can actually support upto 512 interrupt remapping table entries. Therefore, update the driver to match the hardware limit.
Please note that this also increases the size of interrupt remapping table to 8KB per device when using the 128-bit IRTE format.
Signed-off-by: Suravee Suthikulpanit suravee.suthikulpanit@amd.com Link: https://lore.kernel.org/r/20201015025002.87997-1-suravee.suthikulpanit@amd.c... Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/amd/amd_iommu_types.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 30a5d412255a4..427484c455891 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup;
-#define MAX_IRQS_PER_TABLE 256 +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define MAX_IRQS_PER_TABLE 512 #define IRQ_TABLE_ALIGNMENT 128
struct irq_remap_table {
From: Qian Cai cai@redhat.com
[ Upstream commit de5d9dae150ca1c1b5c7676711a9ca139d1a8dec ]
The call to rcu_cpu_starting() in smp_init_secondary() is not early enough in the CPU-hotplug onlining process, which results in lockdep splats as follows:
WARNING: suspicious RCU usage ----------------------------- kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
other info that might help us debug this:
RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0.
Call Trace: show_stack+0x158/0x1f0 dump_stack+0x1f2/0x238 __lock_acquire+0x2640/0x4dd0 lock_acquire+0x3a8/0xd08 _raw_spin_lock_irqsave+0xc0/0xf0 clockevents_register_device+0xa8/0x528 init_cpu_timer+0x33e/0x468 smp_init_secondary+0x11a/0x328 smp_start_secondary+0x82/0x88
This is avoided by moving the call to rcu_cpu_starting up near the beginning of the smp_init_secondary() function. Note that the raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers.
Link: https://lore.kernel.org/lkml/160223032121.7002.1269740091547117869.tip-bot2@... Signed-off-by: Qian Cai cai@redhat.com Acked-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 85700bd85f98d..3b4c3140c18e7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -855,13 +855,14 @@ void __init smp_detect_cpus(void)
static void smp_init_secondary(void) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id();
S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init();
From: Zhang Qilong zhangqilong3@huawei.com
[ Upstream commit bb742ad01961a3b9d1f9d19375487b879668b6b2 ]
pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put will result in reference leak in vfio_platform_open, so we should fix it.
Signed-off-by: Zhang Qilong zhangqilong3@huawei.com Acked-by: Eric Auger eric.auger@redhat.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vfio/platform/vfio_platform_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index c0771a9567fb5..fb4b385191f28 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -267,7 +267,7 @@ static int vfio_platform_open(void *device_data)
ret = pm_runtime_get_sync(vdev->device); if (ret < 0) - goto err_pm; + goto err_rst;
ret = vfio_platform_call_reset(vdev, &extra_dbg); if (ret && vdev->reset_required) { @@ -284,7 +284,6 @@ static int vfio_platform_open(void *device_data)
err_rst: pm_runtime_put(vdev->device); -err_pm: vfio_platform_irq_cleanup(vdev); err_irq: vfio_platform_regions_cleanup(vdev);
From: Fred Gao fred.gao@intel.com
[ Upstream commit e4eccb853664de7bcf9518fb658f35e748bf1f68 ]
Bypass the IGD initialization when -ENODEV returns, that should be the case if opregion is not available for IGD or within discrete graphics device's option ROM, or host/lpc bridge is not found.
Then use of -ENODEV here means no special device resources found which needs special care for VFIO, but we still allow other normal device resource access.
Cc: Zhenyu Wang zhenyuw@linux.intel.com Cc: Xiong Zhang xiong.y.zhang@intel.com Cc: Hang Yuan hang.yuan@linux.intel.com Cc: Stuart Summers stuart.summers@intel.com Signed-off-by: Fred Gao fred.gao@intel.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vfio/pci/vfio_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1ab1f5cda4ac2..bfdc010a6b043 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -385,7 +385,7 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) pdev->vendor == PCI_VENDOR_ID_INTEL && IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { ret = vfio_pci_igd_init(vdev); - if (ret) { + if (ret && ret != -ENODEV) { pci_warn(pdev, "Failed to setup Intel IGD regions\n"); goto disable_exit; }
From: Qii Wang qii.wang@mediatek.com
[ Upstream commit aafced673c06b7c77040c1df42e2e965be5d0376 ]
The i2c driver default do dma reset after i2c reset, but sometimes i2c reset will trigger dma tx2rx, then apdma write data to dram which has been i2c_put_dma_safe_msg_buf(kfree). Move dma reset before i2c reset in mtk_i2c_init_hw to fix it.
Signed-off-by: Qii Wang qii.wang@mediatek.com Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-mt65xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 0cbdfbe605b55..33de99b7bc20c 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -475,6 +475,10 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg;
+ writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); + udelay(50); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET);
/* Set ioconfig */ @@ -529,10 +533,6 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c)
mtk_i2c_writew(i2c, control_reg, OFFSET_CONTROL); mtk_i2c_writew(i2c, I2C_DELAY_LEN, OFFSET_DELAY_LEN); - - writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); - udelay(50); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); }
static const struct i2c_spec_values *mtk_i2c_get_spec(unsigned int speed)
From: Daniele Palmas dnlplm@gmail.com
[ Upstream commit 5fd8477ed8ca77e64b93d44a6dae4aa70c191396 ]
Add support for Telit LE910Cx 0x1230 composition:
0x1230: tty, adb, rmnet, audio, tty, tty, tty, tty
Signed-off-by: Daniele Palmas dnlplm@gmail.com Acked-by: Bjørn Mork bjorn@mork.no Link: https://lore.kernel.org/r/20201102110108.17244-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5ca1356b8656f..3db5b5d104798 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1331,6 +1331,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1230, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */
From: Veerabadhran Gopalakrishnan veerabadhran.gopalakrishnan@amd.com
[ Upstream commit c6d2b0fbb893d5c7dda405aa0e7bcbecf1c75f98 ]
Concurrent operation of VCN and JPEG decoder in DPG mode is causing ring timeout due to power state.
Signed-off-by: Veerabadhran Gopalakrishnan veerabadhran.gopalakrishnan@amd.com Reviewed-by: Leo Liu leo.liu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c28ebf41530aa..254ab2ada70a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1220,8 +1220,7 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_MMHUB | - AMD_PG_SUPPORT_VCN | - AMD_PG_SUPPORT_VCN_DPG; + AMD_PG_SUPPORT_VCN; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS |
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
[ Upstream commit 645f224e7ba2f4200bf163153d384ceb0de5462e ]
Since the kprobe handlers have protection that prohibits other handlers from executing in other contexts (like if an NMI comes in while processing a kprobe, and executes the same kprobe, it will get fail with a "busy" return). Lockdep is unaware of this protection. Use lockdep's nesting api to differentiate between locks taken in INT3 context and other context to suppress the false warnings.
Link: https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08b85@kernel.org
Cc: Peter Zijlstra peterz@infradead.org Acked-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/kprobes.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e995541d277d4..5cfe6c26d6ea7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1250,7 +1250,13 @@ __acquires(hlist_lock)
*head = &kretprobe_inst_table[hash]; hlist_lock = kretprobe_table_lock_ptr(hash); - raw_spin_lock_irqsave(hlist_lock, *flags); + /* + * Nested is a workaround that will soon not be needed. + * There's other protections that make sure the same lock + * is not taken on the same CPU that lockdep is unaware of. + * Differentiate when it is taken in NMI context. + */ + raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi()); } NOKPROBE_SYMBOL(kretprobe_hash_lock);
@@ -1259,7 +1265,13 @@ static void kretprobe_table_lock(unsigned long hash, __acquires(hlist_lock) { raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); - raw_spin_lock_irqsave(hlist_lock, *flags); + /* + * Nested is a workaround that will soon not be needed. + * There's other protections that make sure the same lock + * is not taken on the same CPU that lockdep is unaware of. + * Differentiate when it is taken in NMI context. + */ + raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi()); } NOKPROBE_SYMBOL(kretprobe_table_lock);
@@ -1950,7 +1962,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
/* TODO: consider to only swap the RA after the last pre_handler fired */ hash = hash_ptr(current, KPROBE_HASH_BITS); - raw_spin_lock_irqsave(&rp->lock, flags); + /* + * Nested is a workaround that will soon not be needed. + * There's other protections that make sure the same lock + * is not taken on the same CPU that lockdep is unaware of. + */ + raw_spin_lock_irqsave_nested(&rp->lock, flags, 1); if (!hlist_empty(&rp->free_instances)) { ri = hlist_entry(rp->free_instances.first, struct kretprobe_instance, hlist); @@ -1961,7 +1978,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) ri->task = current;
if (rp->entry_handler && rp->entry_handler(ri, regs)) { - raw_spin_lock_irqsave(&rp->lock, flags); + raw_spin_lock_irqsave_nested(&rp->lock, flags, 1); hlist_add_head(&ri->hlist, &rp->free_instances); raw_spin_unlock_irqrestore(&rp->lock, flags); return 0;
From: Brian Foster bfoster@redhat.com
[ Upstream commit 50e7d6c7a5210063b9a6f0d8799d9d1440907fcf ]
The iomap writepage error handling logic is a mash of old and slightly broken XFS writepage logic. When keepwrite writeback state tracking was introduced in XFS in commit 0d085a529b42 ("xfs: ensure WB_SYNC_ALL writeback handles partial pages correctly"), XFS had an additional cluster writeback context that scanned ahead of ->writepage() to process dirty pages over the current ->writepage() extent mapping. This context expected a dirty page and required retention of the TOWRITE tag on partial page processing so the higher level writeback context would revisit the page (in contrast to ->writepage(), which passes a page with the dirty bit already cleared).
The cluster writeback mechanism was eventually removed and some of the error handling logic folded into the primary writeback path in commit 150d5be09ce4 ("xfs: remove xfs_cancel_ioend"). This patch accidentally conflated the two contexts by using the keepwrite logic in ->writepage() without accounting for the fact that the page is not dirty. Further, the keepwrite logic has no practical effect on the core ->writepage() caller (write_cache_pages()) because it never revisits a page in the current function invocation.
Technically, the page should be redirtied for the keepwrite logic to have any effect. Otherwise, write_cache_pages() may find the tagged page but will skip it since it is clean. Even if the page was redirtied, however, there is still no practical effect to keepwrite since write_cache_pages() does not wrap around within a single invocation of the function. Therefore, the dirty page would simply end up retagged on the next writeback sequence over the associated range.
All that being said, none of this really matters because redirtying a partially processed page introduces a potential infinite redirty -> writeback failure loop that deviates from the current design principle of clearing the dirty state on writepage failure to avoid building up too much dirty, unreclaimable memory on the system. Therefore, drop the spurious keepwrite usage and dirty state clearing logic from iomap_writepage_map(), treat the partially processed page the same as a fully processed page, and let the imminent ioend failure clean up the writeback state.
Signed-off-by: Brian Foster bfoster@redhat.com Reviewed-by: Darrick J. Wong darrick.wong@oracle.com Signed-off-by: Darrick J. Wong darrick.wong@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/iomap/buffered-io.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b115e7d47fcec..238613443bec2 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1395,6 +1395,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list)); WARN_ON_ONCE(!PageLocked(page)); WARN_ON_ONCE(PageWriteback(page)); + WARN_ON_ONCE(PageDirty(page));
/* * We cannot cancel the ioend directly here on error. We may have @@ -1415,21 +1416,9 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, unlock_page(page); goto done; } - - /* - * If the page was not fully cleaned, we need to ensure that the - * higher layers come back to it correctly. That means we need - * to keep the page dirty, and for WB_SYNC_ALL writeback we need - * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed - * so another attempt to write this page in this writeback sweep - * will be made. - */ - set_page_writeback_keepwrite(page); - } else { - clear_page_dirty_for_io(page); - set_page_writeback(page); }
+ set_page_writeback(page); unlock_page(page);
/*
From: Tommi Rantala tommi.t.rantala@nokia.com
[ Upstream commit f3ae6c6e8a3ea49076d826c64e63ea78fbf9db43 ]
Makefile already contains -D_GNU_SOURCE, so we can remove it from the *.c files.
Signed-off-by: Tommi Rantala tommi.t.rantala@nokia.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/proc/proc-loadavg-001.c | 1 - tools/testing/selftests/proc/proc-self-syscall.c | 1 - tools/testing/selftests/proc/proc-uptime-002.c | 1 - 3 files changed, 3 deletions(-)
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa280776..fb4fe9188806e 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include <errno.h> #include <sched.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c02455..8511dcfe67c75 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b78490898..e7ceabed7f51f 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include <assert.h> #include <unistd.h>
From: Benjamin Gwin bgwin@google.com
[ Upstream commit 108aa503657ee2fe8aa071dc620d96372c252ecd ]
It's possible that the first region picked for the new kernel will make it impossible to fit the other segments in the required 32GB window, especially if we have a very large initrd.
Instead of giving up, we can keep testing other regions for the kernel until we find one that works.
Suggested-by: Ryan O'Leary ryanoleary@google.com Signed-off-by: Benjamin Gwin bgwin@google.com Link: https://lore.kernel.org/r/20201103201106.2397844-1-bgwin@google.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/kexec_image.c | 41 +++++++++++++++++++------- arch/arm64/kernel/machine_kexec_file.c | 9 +++++- 2 files changed, 39 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index af9987c154cab..66adee8b5fc81 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -43,7 +43,7 @@ static void *image_load(struct kimage *image, u64 flags, value; bool be_image, be_kernel; struct kexec_buf kbuf; - unsigned long text_offset; + unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; int ret;
@@ -88,11 +88,37 @@ static void *image_load(struct kimage *image, /* Adjust kernel segment with TEXT_OFFSET */ kbuf.memsz += text_offset;
- ret = kexec_add_buffer(&kbuf); - if (ret) + kernel_segment_number = image->nr_segments; + + /* + * The location of the kernel segment may make it impossible to satisfy + * the other segment requirements, so we try repeatedly to find a + * location that will work. + */ + while ((ret = kexec_add_buffer(&kbuf)) == 0) { + /* Try to load additional data */ + kernel_segment = &image->segment[kernel_segment_number]; + ret = load_other_segments(image, kernel_segment->mem, + kernel_segment->memsz, initrd, + initrd_len, cmdline); + if (!ret) + break; + + /* + * We couldn't find space for the other segments; erase the + * kernel segment and try the next available hole. + */ + image->nr_segments -= 1; + kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + } + + if (ret) { + pr_err("Could not find any suitable kernel location!"); return ERR_PTR(ret); + }
- kernel_segment = &image->segment[image->nr_segments - 1]; + kernel_segment = &image->segment[kernel_segment_number]; kernel_segment->mem += text_offset; kernel_segment->memsz -= text_offset; image->start = kernel_segment->mem; @@ -101,12 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz);
- /* Load additional data */ - ret = load_other_segments(image, - kernel_segment->mem, kernel_segment->memsz, - initrd, initrd_len, cmdline); - - return ERR_PTR(ret); + return 0; }
#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 361a1143e09ee..e443df8569881 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -242,6 +242,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) return ret; }
+/* + * Tries to add the initrd and DTB to the image. If it is not possible to find + * valid locations, this function will undo changes to the image and return non + * zero. + */ int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, @@ -250,7 +255,8 @@ int load_other_segments(struct kimage *image, { struct kexec_buf kbuf; void *headers, *dtb = NULL; - unsigned long headers_sz, initrd_load_addr = 0, dtb_len; + unsigned long headers_sz, initrd_load_addr = 0, dtb_len, + orig_segments = image->nr_segments; int ret = 0;
kbuf.image = image; @@ -336,6 +342,7 @@ int load_other_segments(struct kimage *image, return 0;
out_err: + image->nr_segments = orig_segments; vfree(dtb); return ret; }
From: Sean Anderson seanga2@gmail.com
[ Upstream commit 79605f1394261995c2b955c906a5a20fb27cdc84 ]
M-Mode Linux is loaded at the start of RAM, not 2MB later. Perhaps this should be calculated based on PAGE_OFFSET somehow? Even better would be to deprecate text_offset and instead introduce something absolute.
Signed-off-by: Sean Anderson seanga2@gmail.com Signed-off-by: Palmer Dabbelt palmerdabbelt@google.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/kernel/head.S | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 0a4e81b8dc795..5a0ae2eaf5e2f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -27,12 +27,17 @@ ENTRY(_start) /* reserved */ .word 0 .balign 8 +#ifdef CONFIG_RISCV_M_MODE + /* Image load offset (0MB) from start of RAM for M-mode */ + .dword 0 +#else #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ .dword 0x200000 #else /* Image load offset(4MB) from start of RAM */ .dword 0x400000 +#endif #endif /* Effective size of kernel image */ .dword _end - _start
From: Ulrich Hecht uli+renesas@fpond.eu
[ Upstream commit a49cc1fe9d64a2dc4e19b599204f403e5d25f44b ]
Implements atomic transfers to fix reboot/shutdown on r8a7790 Lager and similar boards.
Signed-off-by: Ulrich Hecht uli+renesas@fpond.eu Tested-by: Wolfram Sang wsa+renesas@sang-engineering.com Tested-by: Geert Uytterhoeven geert+renesas@glider.be [wsa: some whitespace fixing] Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-sh_mobile.c | 86 +++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 20 deletions(-)
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index cab7255599991..bdd60770779ad 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -129,6 +129,7 @@ struct sh_mobile_i2c_data { int sr; bool send_stop; bool stop_after_dma; + bool atomic_xfer;
struct resource *res; struct dma_chan *dma_tx; @@ -330,13 +331,15 @@ static unsigned char i2c_op(struct sh_mobile_i2c_data *pd, enum sh_mobile_i2c_op ret = iic_rd(pd, ICDR); break; case OP_RX_STOP: /* enable DTE interrupt, issue stop */ - iic_wr(pd, ICIC, - ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); + if (!pd->atomic_xfer) + iic_wr(pd, ICIC, + ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); iic_wr(pd, ICCR, ICCR_ICE | ICCR_RACK); break; case OP_RX_STOP_DATA: /* enable DTE interrupt, read data, issue stop */ - iic_wr(pd, ICIC, - ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); + if (!pd->atomic_xfer) + iic_wr(pd, ICIC, + ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); ret = iic_rd(pd, ICDR); iic_wr(pd, ICCR, ICCR_ICE | ICCR_RACK); break; @@ -429,7 +432,8 @@ static irqreturn_t sh_mobile_i2c_isr(int irq, void *dev_id)
if (wakeup) { pd->sr |= SW_DONE; - wake_up(&pd->wait); + if (!pd->atomic_xfer) + wake_up(&pd->wait); }
/* defeat write posting to avoid spurious WAIT interrupts */ @@ -581,6 +585,9 @@ static void start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, pd->pos = -1; pd->sr = 0;
+ if (pd->atomic_xfer) + return; + pd->dma_buf = i2c_get_dma_safe_msg_buf(pd->msg, 8); if (pd->dma_buf) sh_mobile_i2c_xfer_dma(pd); @@ -637,15 +644,13 @@ static int poll_busy(struct sh_mobile_i2c_data *pd) return i ? 0 : -ETIMEDOUT; }
-static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, - struct i2c_msg *msgs, - int num) +static int sh_mobile_xfer(struct sh_mobile_i2c_data *pd, + struct i2c_msg *msgs, int num) { - struct sh_mobile_i2c_data *pd = i2c_get_adapdata(adapter); struct i2c_msg *msg; int err = 0; int i; - long timeout; + long time_left;
/* Wake up device and enable clock */ pm_runtime_get_sync(pd->dev); @@ -662,15 +667,35 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, if (do_start) i2c_op(pd, OP_START);
- /* The interrupt handler takes care of the rest... */ - timeout = wait_event_timeout(pd->wait, - pd->sr & (ICSR_TACK | SW_DONE), - adapter->timeout); - - /* 'stop_after_dma' tells if DMA transfer was complete */ - i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, pd->stop_after_dma); + if (pd->atomic_xfer) { + unsigned long j = jiffies + pd->adap.timeout; + + time_left = time_before_eq(jiffies, j); + while (time_left && + !(pd->sr & (ICSR_TACK | SW_DONE))) { + unsigned char sr = iic_rd(pd, ICSR); + + if (sr & (ICSR_AL | ICSR_TACK | + ICSR_WAIT | ICSR_DTE)) { + sh_mobile_i2c_isr(0, pd); + udelay(150); + } else { + cpu_relax(); + } + time_left = time_before_eq(jiffies, j); + } + } else { + /* The interrupt handler takes care of the rest... */ + time_left = wait_event_timeout(pd->wait, + pd->sr & (ICSR_TACK | SW_DONE), + pd->adap.timeout); + + /* 'stop_after_dma' tells if DMA xfer was complete */ + i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, + pd->stop_after_dma); + }
- if (!timeout) { + if (!time_left) { dev_err(pd->dev, "Transfer request timed out\n"); if (pd->dma_direction != DMA_NONE) sh_mobile_i2c_cleanup_dma(pd); @@ -696,14 +721,35 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, return err ?: num; }
+static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, + struct i2c_msg *msgs, + int num) +{ + struct sh_mobile_i2c_data *pd = i2c_get_adapdata(adapter); + + pd->atomic_xfer = false; + return sh_mobile_xfer(pd, msgs, num); +} + +static int sh_mobile_i2c_xfer_atomic(struct i2c_adapter *adapter, + struct i2c_msg *msgs, + int num) +{ + struct sh_mobile_i2c_data *pd = i2c_get_adapdata(adapter); + + pd->atomic_xfer = true; + return sh_mobile_xfer(pd, msgs, num); +} + static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_PROTOCOL_MANGLING; }
static const struct i2c_algorithm sh_mobile_i2c_algorithm = { - .functionality = sh_mobile_i2c_func, - .master_xfer = sh_mobile_i2c_xfer, + .functionality = sh_mobile_i2c_func, + .master_xfer = sh_mobile_i2c_xfer, + .master_xfer_atomic = sh_mobile_i2c_xfer_atomic, };
static const struct i2c_adapter_quirks sh_mobile_i2c_quirks = {
From: Michael Wu michael.wu@vatics.com
[ Upstream commit 66b92313e2ca9208b5f3ebf5d86e9a818299d8fa ]
If some bits were cleared by i2c_dw_read_clear_intrbits_slave() in i2c_dw_isr_slave() and not handled immediately, those cleared bits would not be shown again by later i2c_dw_read_clear_intrbits_slave(). They therefore were forgotten to be handled.
i2c_dw_read_clear_intrbits_slave() should be called once in an ISR and take its returned state for all later handlings.
Signed-off-by: Michael Wu michael.wu@vatics.com Acked-by: Jarkko Nikula jarkko.nikula@linux.intel.com Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-designware-slave.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 44974b53a6268..13de01a0f75f0 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -159,7 +159,6 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) u32 raw_stat, stat, enabled, tmp; u8 val = 0, slave_activity;
- regmap_read(dev->map, DW_IC_INTR_STAT, &stat); regmap_read(dev->map, DW_IC_ENABLE, &enabled); regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &raw_stat); regmap_read(dev->map, DW_IC_STATUS, &tmp); @@ -168,6 +167,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave) return 0;
+ stat = i2c_dw_read_clear_intrbits_slave(dev); dev_dbg(dev->dev, "%#x STATUS SLAVE_ACTIVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n", enabled, slave_activity, raw_stat, stat); @@ -188,11 +188,9 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) val); } regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - stat = i2c_dw_read_clear_intrbits_slave(dev); } else { regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); regmap_read(dev->map, DW_IC_CLR_RX_UNDER, &tmp); - stat = i2c_dw_read_clear_intrbits_slave(dev); } if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_REQUESTED, @@ -207,7 +205,6 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) regmap_read(dev->map, DW_IC_CLR_RX_DONE, &tmp);
i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - stat = i2c_dw_read_clear_intrbits_slave(dev); return 1; }
@@ -219,7 +216,6 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) dev_vdbg(dev->dev, "Byte %X acked!", val); } else { i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - stat = i2c_dw_read_clear_intrbits_slave(dev); }
return 1; @@ -230,7 +226,6 @@ static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) struct dw_i2c_dev *dev = dev_id; int ret;
- i2c_dw_read_clear_intrbits_slave(dev); ret = i2c_dw_irq_handler_slave(dev); if (ret > 0) complete(&dev->cmd_complete);
From: Michael Wu michael.wu@vatics.com
[ Upstream commit 3b5f7f10ff6e6b66f553e12cc50d9bb751ce60ad ]
Sometimes we would get the following flow when doing an i2cset:
0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 I2C_SLAVE_WRITE_RECEIVED 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x714 : INTR_STAT=0x204 I2C_SLAVE_WRITE_REQUESTED I2C_SLAVE_WRITE_RECEIVED
Documentation/i2c/slave-interface.rst says that I2C_SLAVE_WRITE_REQUESTED, which is mandatory, should be sent while the data did not arrive yet. It means in a write-request I2C_SLAVE_WRITE_REQUESTED should be reported before any I2C_SLAVE_WRITE_RECEIVED.
By the way, I2C_SLAVE_STOP didn't be reported in the above case because DW_IC_INTR_STAT was not 0x200.
dev->status can be used to record the current state, especially Designware I2C controller has no interrupts to identify a write-request. This patch makes not only I2C_SLAVE_WRITE_REQUESTED been reported first when IC_INTR_RX_FULL is rising and dev->status isn't STATUS_WRITE_IN_PROGRESS but also I2C_SLAVE_STOP been reported when a STOP condition is received.
Signed-off-by: Michael Wu michael.wu@vatics.com Acked-by: Jarkko Nikula jarkko.nikula@linux.intel.com Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-designware-slave.c | 45 +++++++++-------------- 1 file changed, 18 insertions(+), 27 deletions(-)
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 13de01a0f75f0..0d15f4c1e9f7e 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -172,26 +172,25 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) "%#x STATUS SLAVE_ACTIVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n", enabled, slave_activity, raw_stat, stat);
- if ((stat & DW_IC_INTR_RX_FULL) && (stat & DW_IC_INTR_STOP_DET)) - i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, &val); + if (stat & DW_IC_INTR_RX_FULL) { + if (dev->status != STATUS_WRITE_IN_PROGRESS) { + dev->status = STATUS_WRITE_IN_PROGRESS; + i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, + &val); + } + + regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); + val = tmp; + if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, + &val)) + dev_vdbg(dev->dev, "Byte %X acked!", val); + }
if (stat & DW_IC_INTR_RD_REQ) { if (slave_activity) { - if (stat & DW_IC_INTR_RX_FULL) { - regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); - val = tmp; - - if (!i2c_slave_event(dev->slave, - I2C_SLAVE_WRITE_RECEIVED, - &val)) { - dev_vdbg(dev->dev, "Byte %X acked!", - val); - } - regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - } else { - regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - regmap_read(dev->map, DW_IC_CLR_RX_UNDER, &tmp); - } + regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); + + dev->status = STATUS_READ_IN_PROGRESS; if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_REQUESTED, &val)) @@ -203,18 +202,10 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED, &val)) regmap_read(dev->map, DW_IC_CLR_RX_DONE, &tmp); - - i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - return 1; }
- if (stat & DW_IC_INTR_RX_FULL) { - regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); - val = tmp; - if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, - &val)) - dev_vdbg(dev->dev, "Byte %X acked!", val); - } else { + if (stat & DW_IC_INTR_STOP_DET) { + dev->status = STATUS_IDLE; i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); }
From: Jerry Snitselaar jsnitsel@redhat.com
[ Upstream commit b154ce11ead925de6a94feb3b0317fafeefa0ebc ]
There is a misconfiguration in the bios of the gpio pin used for the interrupt in the T490s. When interrupts are enabled in the tpm_tis driver code this results in an interrupt storm. This was initially reported when we attempted to enable the interrupt code in the tpm_tis driver, which previously wasn't setting a flag to enable it. Due to the reports of the interrupt storm that code was reverted and we went back to polling instead of using interrupts. Now that we know the T490s problem is a firmware issue, add code to check if the system is a T490s and disable interrupts if that is the case. This will allow us to enable interrupts for everyone else. If the user has a fixed bios they can force the enabling of interrupts with tpm_tis.interrupts=1 on the kernel command line.
Cc: Peter Huewe peterhuewe@gmx.de Cc: Jason Gunthorpe jgg@ziepe.ca Cc: Hans de Goede hdegoede@redhat.com Signed-off-by: Jerry Snitselaar jsnitsel@redhat.com Reviewed-by: James Bottomley James.Bottomley@HansenPartnership.com Reviewed-by: Hans de Goede hdegoede@redhat.com Reviewed-by: Jarkko Sakkinen jarkko@kernel.org Signed-off-by: Jarkko Sakkinen jarkko@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/char/tpm/tpm_tis.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 0b214963539de..4ed6e660273a4 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -27,6 +27,7 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/kernel.h> +#include <linux/dmi.h> #include "tpm.h" #include "tpm_tis_core.h"
@@ -49,8 +50,8 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da return container_of(data, struct tpm_tis_tcg_phy, priv); }
-static bool interrupts = true; -module_param(interrupts, bool, 0444); +static int interrupts = -1; +module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts");
static bool itpm; @@ -63,6 +64,28 @@ module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry"); #endif
+static int tpm_tis_disable_irq(const struct dmi_system_id *d) +{ + if (interrupts == -1) { + pr_notice("tpm_tis: %s detected: disabling interrupts.\n", d->ident); + interrupts = 0; + } + + return 0; +} + +static const struct dmi_system_id tpm_tis_dmi_table[] = { + { + .callback = tpm_tis_disable_irq, + .ident = "ThinkPad T490s", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T490s"), + }, + }, + {} +}; + #if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int has_hid(struct acpi_device *dev, const char *hid) { @@ -192,6 +215,8 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info) int irq = -1; int rc;
+ dmi_check_system(tpm_tis_dmi_table); + rc = check_acpi_tpm2(dev); if (rc) return rc;
From: Christoph Hellwig hch@lst.de
[ Upstream commit d4d50710a8b46082224376ef119a4dbb75b25c56 ]
iov_iter based variant for reading a seq_file. seq_read is reimplemented on top of the iter variant.
Signed-off-by: Christoph Hellwig hch@lst.de Tested-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/seq_file.c | 45 ++++++++++++++++++++++++++++------------ include/linux/seq_file.h | 1 + 2 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/fs/seq_file.c b/fs/seq_file.c index 31219c1db17de..3b20e21604e74 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -18,6 +18,7 @@ #include <linux/mm.h> #include <linux/printk.h> #include <linux/string_helpers.h> +#include <linux/uio.h>
#include <linux/uaccess.h> #include <asm/page.h> @@ -146,7 +147,28 @@ static int traverse(struct seq_file *m, loff_t offset) */ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - struct seq_file *m = file->private_data; + struct iovec iov = { .iov_base = buf, .iov_len = size}; + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, file); + iov_iter_init(&iter, READ, &iov, 1, size); + + kiocb.ki_pos = *ppos; + ret = seq_read_iter(&kiocb, &iter); + *ppos = kiocb.ki_pos; + return ret; +} +EXPORT_SYMBOL(seq_read); + +/* + * Ready-made ->f_op->read_iter() + */ +ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct seq_file *m = iocb->ki_filp->private_data; + size_t size = iov_iter_count(iter); size_t copied = 0; size_t n; void *p; @@ -158,14 +180,14 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) * if request is to read from zero offset, reset iterator to first * record as it might have been already advanced by previous requests */ - if (*ppos == 0) { + if (iocb->ki_pos == 0) { m->index = 0; m->count = 0; }
- /* Don't assume *ppos is where we left it */ - if (unlikely(*ppos != m->read_pos)) { - while ((err = traverse(m, *ppos)) == -EAGAIN) + /* Don't assume ki_pos is where we left it */ + if (unlikely(iocb->ki_pos != m->read_pos)) { + while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN) ; if (err) { /* With prejudice... */ @@ -174,7 +196,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) m->count = 0; goto Done; } else { - m->read_pos = *ppos; + m->read_pos = iocb->ki_pos; } }
@@ -187,13 +209,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) /* if not empty - flush it first */ if (m->count) { n = min(m->count, size); - err = copy_to_user(buf, m->buf + m->from, n); - if (err) + if (copy_to_iter(m->buf + m->from, n, iter) != n) goto Efault; m->count -= n; m->from += n; size -= n; - buf += n; copied += n; if (!size) goto Done; @@ -254,8 +274,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) } m->op->stop(m, p); n = min(m->count, size); - err = copy_to_user(buf, m->buf, n); - if (err) + if (copy_to_iter(m->buf, n, iter) != n) goto Efault; copied += n; m->count -= n; @@ -264,7 +283,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (!copied) copied = err; else { - *ppos += copied; + iocb->ki_pos += copied; m->read_pos += copied; } mutex_unlock(&m->lock); @@ -276,7 +295,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) err = -EFAULT; goto Done; } -EXPORT_SYMBOL(seq_read); +EXPORT_SYMBOL(seq_read_iter);
/** * seq_lseek - ->llseek() method for sequential files. diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 813614d4b71fb..b83b3ae3c877f 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -107,6 +107,7 @@ void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); +ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len);
On Mon, Nov 09, 2020 at 10:53:16PM -0500, Sasha Levin wrote:
From: Christoph Hellwig hch@lst.de
[ Upstream commit d4d50710a8b46082224376ef119a4dbb75b25c56 ]
iov_iter based variant for reading a seq_file. seq_read is reimplemented on top of the iter variant.
Signed-off-by: Christoph Hellwig hch@lst.de Tested-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org
fs/seq_file.c | 45 ++++++++++++++++++++++++++++------------ include/linux/seq_file.h | 1 + 2 files changed, 33 insertions(+), 13 deletions(-)
This is not needed for anything older than 5.10, please drop.
thanks,
greg k-h
Should not be needed in stable in any form.
From: Daniel Vetter daniel.vetter@ffwll.ch
[ Upstream commit 3c4e0dff2095c579b142d5a0693257f1c58b4804 ]
It's buggy:
On Fri, Nov 06, 2020 at 10:30:08PM +0800, Minh Yuan wrote:
We recently discovered a slab-out-of-bounds read in fbcon in the latest kernel ( v5.10-rc2 for now ). The root cause of this vulnerability is that "fbcon_do_set_font" did not handle "vc->vc_font.data" and "vc->vc_font.height" correctly, and the patch https://lkml.org/lkml/2020/9/27/223 for VT_RESIZEX can't handle this issue.
Specifically, we use KD_FONT_OP_SET to set a small font.data for tty6, and use KD_FONT_OP_SET again to set a large font.height for tty1. After that, we use KD_FONT_OP_COPY to assign tty6's vc_font.data to tty1's vc_font.data in "fbcon_do_set_font", while tty1 retains the original larger height. Obviously, this will cause an out-of-bounds read, because we can access a smaller vc_font.data with a larger vc_font.height.
Further there was only one user ever. - Android's loadfont, busybox and console-tools only ever use OP_GET and OP_SET - fbset documentation only mentions the kernel cmdline font: option, not anything else. - systemd used OP_COPY before release 232 published in Nov 2016
Now unfortunately the crucial report seems to have gone down with gmane, and the commit message doesn't say much. But the pull request hints at OP_COPY being broken
https://github.com/systemd/systemd/pull/3651
So in other words, this never worked, and the only project which foolishly every tried to use it, realized that rather quickly too.
Instead of trying to fix security issues here on dead code by adding missing checks, fix the entire thing by removing the functionality.
Note that systemd code using the OP_COPY function ignored the return value, so it doesn't matter what we're doing here really - just in case a lone server somewhere happens to be extremely unlucky and running an affected old version of systemd. The relevant code from font_copy_to_all_vcs() in systemd was:
/* copy font from active VT, where the font was uploaded to */ cfo.op = KD_FONT_OP_COPY; cfo.height = vcs.v_active-1; /* tty1 == index 0 */ (void) ioctl(vcfd, KDFONTOP, &cfo);
Note this just disables the ioctl, garbage collecting the now unused callbacks is left for -next.
v2: Tetsuo found the old mail, which allowed me to find it on another archive. Add the link too.
Acked-by: Peilin Ye yepeilin.cs@gmail.com Reported-by: Minh Yuan yuanmingbuaa@gmail.com References: https://lists.freedesktop.org/archives/systemd-devel/2016-June/036935.html References: https://github.com/systemd/systemd/pull/3651 Cc: Greg KH greg@kroah.com Cc: Peilin Ye yepeilin.cs@gmail.com Cc: Tetsuo Handa penguin-kernel@i-love.sakura.ne.jp Signed-off-by: Daniel Vetter daniel.vetter@intel.com Link: https://lore.kernel.org/r/20201108153806.3140315-1-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/tty/vt/vt.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 19cd4a4b19399..e7ef807bcaaaf 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4700,27 +4700,6 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) return rc; }
-static int con_font_copy(struct vc_data *vc, struct console_font_op *op) -{ - int con = op->height; - int rc; - - - console_lock(); - if (vc->vc_mode != KD_TEXT) - rc = -EINVAL; - else if (!vc->vc_sw->con_font_copy) - rc = -ENOSYS; - else if (con < 0 || !vc_cons_allocated(con)) - rc = -ENOTTY; - else if (con == vc->vc_num) /* nothing to do */ - rc = 0; - else - rc = vc->vc_sw->con_font_copy(vc, con); - console_unlock(); - return rc; -} - int con_font_op(struct vc_data *vc, struct console_font_op *op) { switch (op->op) { @@ -4731,7 +4710,8 @@ int con_font_op(struct vc_data *vc, struct console_font_op *op) case KD_FONT_OP_SET_DEFAULT: return con_font_default(vc, op); case KD_FONT_OP_COPY: - return con_font_copy(vc, op); + /* was buggy and never really used */ + return -EINVAL; } return -ENOSYS; }
From: Eddy Wu itseddy0402@gmail.com
[ Upstream commit b4e00444cab4c3f3fec876dc0cccc8cbb0d1a948 ]
current->group_leader->exit_signal may change during copy_process() if current->real_parent exits.
Move the assignment inside tasklist_lock to avoid the race.
Signed-off-by: Eddy Wu eddy_wu@trendmicro.com Acked-by: Oleg Nesterov oleg@redhat.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/fork.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c index 8934886d16549..5fe09d4e6d6a0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2167,14 +2167,9 @@ static __latent_entropy struct task_struct *copy_process( /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { - p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { - if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = args->exit_signal; p->group_leader = p; p->tgid = p->pid; } @@ -2218,9 +2213,14 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; + if (clone_flags & CLONE_THREAD) + p->exit_signal = -1; + else + p->exit_signal = current->group_leader->exit_signal; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; + p->exit_signal = args->exit_signal; }
klp_copy_process(p);
linux-stable-mirror@lists.linaro.org