From: Stefano Brivio sbrivio@redhat.com
[ Upstream commit 8cc4ccf58379935f3ad456cc34e61c4e4c921d0e ]
There doesn't seem to be any reason to restrict MAC address matching to source MAC addresses in set types bitmap:ipmac, hash:ipmac and hash:mac. With this patch, and this setup:
ip netns add A ip link add veth1 type veth peer name veth2 netns A ip addr add 192.0.2.1/24 dev veth1 ip -net A addr add 192.0.2.2/24 dev veth2 ip link set veth1 up ip -net A link set veth2 up
ip netns exec A ipset create test hash:mac dst=$(ip netns exec A cat /sys/class/net/veth2/address) ip netns exec A ipset add test ${dst} ip netns exec A iptables -P INPUT DROP ip netns exec A iptables -I INPUT -m set --match-set test dst -j ACCEPT
ipset will match packets based on destination MAC address:
# ping -c1 192.0.2.2 >/dev/null # echo $? 0
Reported-by: Yi Chen yiche@redhat.com Signed-off-by: Stefano Brivio sbrivio@redhat.com Signed-off-by: Jozsef Kadlecsik kadlec@blackhole.kfki.hu Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +++++----- net/netfilter/ipset/ip_set_hash_ipmac.c | 16 ++++++++++------ net/netfilter/ipset/ip_set_hash_mac.c | 10 +++++----- 3 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index c00b6a2e8e3c..13ade5782847 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -219,10 +219,6 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip;
- /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -233,7 +229,11 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, return -EINVAL;
e.id = ip_to_id(map, ip); - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 1ab5ed2f6839..fd87de3ed55b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -103,7 +103,11 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + if (ether_addr_equal(e.ether, invalid_ether)) return -EINVAL;
@@ -211,15 +215,15 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + if (ether_addr_equal(e.ether, invalid_ether)) return -EINVAL;
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index f9d5a2a1e3d0..4fe5f243d0a3 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -81,15 +81,15 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_ONE_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL;
- ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
From: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org
[ Upstream commit ed8dce4c6f726b7f3c6bf40859b92a9e32f189c1 ]
Keeping the irq_chip definition static will make it shared with multiple giochips in the system. This practice is considered to be bad and now we will get the below warning from gpiolib core:
"detected irqchip that is shared with multiple gpiochips: please fix the driver."
Hence, move the irq_chip definition from static to `struct pl061` for using a unique irq_chip for each gpiochip.
Signed-off-by: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpio/gpio-pl061.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 2afd9de84a0d..dc42571e6fdc 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -54,6 +54,7 @@ struct pl061 {
void __iomem *base; struct gpio_chip gc; + struct irq_chip irq_chip; int parent_irq;
#ifdef CONFIG_PM @@ -281,15 +282,6 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned int state) return irq_set_irq_wake(pl061->parent_irq, state); }
-static struct irq_chip pl061_irqchip = { - .name = "pl061", - .irq_ack = pl061_irq_ack, - .irq_mask = pl061_irq_mask, - .irq_unmask = pl061_irq_unmask, - .irq_set_type = pl061_irq_type, - .irq_set_wake = pl061_irq_set_wake, -}; - static int pl061_probe(struct amba_device *adev, const struct amba_id *id) { struct device *dev = &adev->dev; @@ -328,6 +320,13 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) /* * irq_chip support */ + pl061->irq_chip.name = dev_name(dev); + pl061->irq_chip.irq_ack = pl061_irq_ack; + pl061->irq_chip.irq_mask = pl061_irq_mask; + pl061->irq_chip.irq_unmask = pl061_irq_unmask; + pl061->irq_chip.irq_set_type = pl061_irq_type; + pl061->irq_chip.irq_set_wake = pl061_irq_set_wake; + writeb(0, pl061->base + GPIOIE); /* disable irqs */ irq = adev->irq[0]; if (irq < 0) { @@ -336,14 +335,14 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) } pl061->parent_irq = irq;
- ret = gpiochip_irqchip_add(&pl061->gc, &pl061_irqchip, + ret = gpiochip_irqchip_add(&pl061->gc, &pl061->irq_chip, 0, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_info(&adev->dev, "could not add irqchip\n"); return ret; } - gpiochip_set_chained_irqchip(&pl061->gc, &pl061_irqchip, + gpiochip_set_chained_irqchip(&pl061->gc, &pl061->irq_chip, irq, pl061_irq_handler);
amba_set_drvdata(adev, pl061);
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 576ce4075bfa0f03e0e91a89eecc539b3b828b08 ]
gcc notices that without either the ac97 bus or the pdata, we never initialize the regmap pointer, which leads to an uninitialized variable access:
sound/soc/codecs/wm9712.c: In function 'wm9712_soc_probe': sound/soc/codecs/wm9712.c:666:2: error: 'regmap' may be used uninitialized in this function [-Werror=maybe-uninitialized]
Since that configuration is invalid, it's better to return an error here. I tried to avoid adding complexity to the conditions, and turned the #ifdef into a regular if(IS_ENABLED()) check for readability. This in turn requires moving some header file declarations out of an #ifdef.
The same code is used in three drivers, all of which I'm changing the same way.
Fixes: 2ed1a8e0ce8d ("ASoC: wm9712: add ac97 new bus support") Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/sound/soc.h | 2 +- sound/soc/codecs/wm9705.c | 10 ++++------ sound/soc/codecs/wm9712.c | 10 ++++------ sound/soc/codecs/wm9713.c | 10 ++++------ 4 files changed, 13 insertions(+), 19 deletions(-)
diff --git a/include/sound/soc.h b/include/sound/soc.h index 70c10a8f3e90..3e0ac310a3df 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -553,12 +553,12 @@ static inline void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count, } #endif
-#ifdef CONFIG_SND_SOC_AC97_BUS struct snd_ac97 *snd_soc_alloc_ac97_component(struct snd_soc_component *component); struct snd_ac97 *snd_soc_new_ac97_component(struct snd_soc_component *component, unsigned int id, unsigned int id_mask); void snd_soc_free_ac97_component(struct snd_ac97 *ac97);
+#ifdef CONFIG_SND_SOC_AC97_BUS int snd_soc_set_ac97_ops(struct snd_ac97_bus_ops *ops); int snd_soc_set_ac97_ops_of_reset(struct snd_ac97_bus_ops *ops, struct platform_device *pdev); diff --git a/sound/soc/codecs/wm9705.c b/sound/soc/codecs/wm9705.c index ccdf088461b7..54c306707c02 100644 --- a/sound/soc/codecs/wm9705.c +++ b/sound/soc/codecs/wm9705.c @@ -325,8 +325,7 @@ static int wm9705_soc_probe(struct snd_soc_component *component) if (wm9705->mfd_pdata) { wm9705->ac97 = wm9705->mfd_pdata->ac97; regmap = wm9705->mfd_pdata->regmap; - } else { -#ifdef CONFIG_SND_SOC_AC97_BUS + } else if (IS_ENABLED(CONFIG_SND_SOC_AC97_BUS)) { wm9705->ac97 = snd_soc_new_ac97_component(component, WM9705_VENDOR_ID, WM9705_VENDOR_ID_MASK); if (IS_ERR(wm9705->ac97)) { @@ -339,7 +338,8 @@ static int wm9705_soc_probe(struct snd_soc_component *component) snd_soc_free_ac97_component(wm9705->ac97); return PTR_ERR(regmap); } -#endif + } else { + return -ENXIO; }
snd_soc_component_set_drvdata(component, wm9705->ac97); @@ -350,14 +350,12 @@ static int wm9705_soc_probe(struct snd_soc_component *component)
static void wm9705_soc_remove(struct snd_soc_component *component) { -#ifdef CONFIG_SND_SOC_AC97_BUS struct wm9705_priv *wm9705 = snd_soc_component_get_drvdata(component);
- if (!wm9705->mfd_pdata) { + if (IS_ENABLED(CONFIG_SND_SOC_AC97_BUS) && !wm9705->mfd_pdata) { snd_soc_component_exit_regmap(component); snd_soc_free_ac97_component(wm9705->ac97); } -#endif }
static const struct snd_soc_component_driver soc_component_dev_wm9705 = { diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index e873baa9e778..01949eaba4fd 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -642,8 +642,7 @@ static int wm9712_soc_probe(struct snd_soc_component *component) if (wm9712->mfd_pdata) { wm9712->ac97 = wm9712->mfd_pdata->ac97; regmap = wm9712->mfd_pdata->regmap; - } else { -#ifdef CONFIG_SND_SOC_AC97_BUS + } else if (IS_ENABLED(CONFIG_SND_SOC_AC97_BUS)) { int ret;
wm9712->ac97 = snd_soc_new_ac97_component(component, WM9712_VENDOR_ID, @@ -660,7 +659,8 @@ static int wm9712_soc_probe(struct snd_soc_component *component) snd_soc_free_ac97_component(wm9712->ac97); return PTR_ERR(regmap); } -#endif + } else { + return -ENXIO; }
snd_soc_component_init_regmap(component, regmap); @@ -673,14 +673,12 @@ static int wm9712_soc_probe(struct snd_soc_component *component)
static void wm9712_soc_remove(struct snd_soc_component *component) { -#ifdef CONFIG_SND_SOC_AC97_BUS struct wm9712_priv *wm9712 = snd_soc_component_get_drvdata(component);
- if (!wm9712->mfd_pdata) { + if (IS_ENABLED(CONFIG_SND_SOC_AC97_BUS) && !wm9712->mfd_pdata) { snd_soc_component_exit_regmap(component); snd_soc_free_ac97_component(wm9712->ac97); } -#endif }
static const struct snd_soc_component_driver soc_component_dev_wm9712 = { diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 643863bb32e0..5a2fdf4f69bf 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1214,8 +1214,7 @@ static int wm9713_soc_probe(struct snd_soc_component *component) if (wm9713->mfd_pdata) { wm9713->ac97 = wm9713->mfd_pdata->ac97; regmap = wm9713->mfd_pdata->regmap; - } else { -#ifdef CONFIG_SND_SOC_AC97_BUS + } else if (IS_ENABLED(CONFIG_SND_SOC_AC97_BUS)) { wm9713->ac97 = snd_soc_new_ac97_component(component, WM9713_VENDOR_ID, WM9713_VENDOR_ID_MASK); if (IS_ERR(wm9713->ac97)) @@ -1225,7 +1224,8 @@ static int wm9713_soc_probe(struct snd_soc_component *component) snd_soc_free_ac97_component(wm9713->ac97); return PTR_ERR(regmap); } -#endif + } else { + return -ENXIO; }
snd_soc_component_init_regmap(component, regmap); @@ -1238,14 +1238,12 @@ static int wm9713_soc_probe(struct snd_soc_component *component)
static void wm9713_soc_remove(struct snd_soc_component *component) { -#ifdef CONFIG_SND_SOC_AC97_BUS struct wm9713_priv *wm9713 = snd_soc_component_get_drvdata(component);
- if (!wm9713->mfd_pdata) { + if (IS_ENABLED(CONFIG_SND_SOC_AC97_BUS) && !wm9713->mfd_pdata) { snd_soc_component_exit_regmap(component); snd_soc_free_ac97_component(wm9713->ac97); } -#endif }
static const struct snd_soc_component_driver soc_component_dev_wm9713 = {
From: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
[ Upstream commit f41a895026b8cb6f765190de7d2e7bc3ccbbd183 ]
[Why]
The igt@kms_plane@pixel-format-pipe tests can create a sequence where stream_state is NULL during amdgpu_dm_crtc_set_crc_source which results in a null pointer dereference.
[How]
Guard against stream_state being NULL before accessing its fields. This doesn't fix the root cause of the issue so a DRM_ERROR is generated to still fail the tests.
Signed-off-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com Reviewed-by: David Francis David.Francis@amd.com Acked-by: Leo Li sunpeng.li@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 01fc5717b657..f088ac585978 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -75,6 +75,11 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) return -EINVAL; }
+ if (!stream_state) { + DRM_ERROR("No stream state for CRTC%d\n", crtc->index); + return -EINVAL; + } + /* When enabling CRC, we should also disable dithering. */ if (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO) { if (dc_stream_configure_crc(stream_state->ctx->dc,
From: Christian König christian.koenig@amd.com
[ Upstream commit 2383a767c0ca06f96534456d8313909017c6c8d0 ]
Vega10 has multiple interrupt rings, so this can be called from multiple calles at the same time resulting in:
[ 71.779334] ================================ [ 71.779406] WARNING: inconsistent lock state [ 71.779478] 4.19.0-rc1+ #44 Tainted: G W [ 71.779565] -------------------------------- [ 71.779637] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 71.779740] kworker/6:1/120 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 71.779832] 00000000ad761971 (&(&kfd->interrupt_lock)->rlock){?...}, at: kgd2kfd_interrupt+0x75/0x100 [amdgpu] [ 71.780058] {IN-HARDIRQ-W} state was registered at: [ 71.780115] _raw_spin_lock+0x2c/0x40 [ 71.780180] kgd2kfd_interrupt+0x75/0x100 [amdgpu] [ 71.780248] amdgpu_irq_callback+0x6c/0x150 [amdgpu] [ 71.780315] amdgpu_ih_process+0x88/0x100 [amdgpu] [ 71.780380] amdgpu_irq_handler+0x20/0x40 [amdgpu] [ 71.780409] __handle_irq_event_percpu+0x49/0x2a0 [ 71.780436] handle_irq_event_percpu+0x30/0x70 [ 71.780461] handle_irq_event+0x37/0x60 [ 71.780484] handle_edge_irq+0x83/0x1b0 [ 71.780506] handle_irq+0x1f/0x30 [ 71.780526] do_IRQ+0x53/0x110 [ 71.780544] ret_from_intr+0x0/0x22 [ 71.780566] cpuidle_enter_state+0xaa/0x330 [ 71.780591] do_idle+0x203/0x280 [ 71.780610] cpu_startup_entry+0x6f/0x80 [ 71.780634] start_secondary+0x1b0/0x200 [ 71.780657] secondary_startup_64+0xa4/0xb0
Fix this by always using irq save spin locks.
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e4ded890b1cb..6edaf11d69aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -688,6 +688,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; bool is_patched = false; + unsigned long flags;
if (!kfd->init_complete) return; @@ -697,7 +698,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) return; }
- spin_lock(&kfd->interrupt_lock); + spin_lock_irqsave(&kfd->interrupt_lock, flags);
if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, @@ -706,7 +707,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) is_patched ? patched_ihre : ih_ring_entry)) queue_work(kfd->ih_wq, &kfd->interrupt_work);
- spin_unlock(&kfd->interrupt_lock); + spin_unlock_irqrestore(&kfd->interrupt_lock, flags); }
int kgd2kfd_quiesce_mm(struct mm_struct *mm)
From: Sergey Matyukevich sergey.matyukevich.os@quantenna.com
[ Upstream commit 1066bd193d681bda0fbacda9df351241a5ee04d9 ]
This patch fixes the following warnings:
- smatch drivers/net/wireless/quantenna/qtnfmac/commands.c:132 qtnf_cmd_send_with_reply() warn: variable dereferenced before check 'resp' (see line 117) drivers/net/wireless/quantenna/qtnfmac/commands.c:716 qtnf_cmd_get_sta_info() error: uninitialized symbol 'var_resp_len'. drivers/net/wireless/quantenna/qtnfmac/commands.c:1668 qtnf_cmd_get_mac_info() error: uninitialized symbol 'var_data_len'. drivers/net/wireless/quantenna/qtnfmac/commands.c:1697 qtnf_cmd_get_hw_info() error: uninitialized symbol 'info_len'. drivers/net/wireless/quantenna/qtnfmac/commands.c:1753 qtnf_cmd_band_info_get() error: uninitialized symbol 'info_len'. drivers/net/wireless/quantenna/qtnfmac/commands.c:1782 qtnf_cmd_send_get_phy_params() error: uninitialized symbol 'response_size'. drivers/net/wireless/quantenna/qtnfmac/commands.c:2438 qtnf_cmd_get_chan_stats() error: uninitialized symbol 'var_data_len'.
- gcc-8.2.1 drivers/net/wireless/quantenna/qtnfmac/commands.c: In function 'qtnf_cmd_send_with_reply': drivers/net/wireless/quantenna/qtnfmac/commands.c:133:54: error: 'resp' may be used uninitialized in this function [-Werror=maybe-uninitialized]
Reported-by: Dan Carpenter dan.carpenter@oracle.com Reported-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Sergey Matyukevich sergey.matyukevich.os@quantenna.com Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../net/wireless/quantenna/qtnfmac/commands.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index bfdc1ad30c13..659e7649fe22 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -84,7 +84,7 @@ static int qtnf_cmd_send_with_reply(struct qtnf_bus *bus, size_t *var_resp_size) { struct qlink_cmd *cmd; - const struct qlink_resp *resp; + struct qlink_resp *resp = NULL; struct sk_buff *resp_skb = NULL; u16 cmd_id; u8 mac_id; @@ -113,7 +113,12 @@ static int qtnf_cmd_send_with_reply(struct qtnf_bus *bus, if (ret) goto out;
- resp = (const struct qlink_resp *)resp_skb->data; + if (WARN_ON(!resp_skb || !resp_skb->data)) { + ret = -EFAULT; + goto out; + } + + resp = (struct qlink_resp *)resp_skb->data; ret = qtnf_cmd_check_reply_header(resp, cmd_id, mac_id, vif_id, const_resp_size); if (ret) @@ -686,7 +691,7 @@ int qtnf_cmd_get_sta_info(struct qtnf_vif *vif, const u8 *sta_mac, struct sk_buff *cmd_skb, *resp_skb = NULL; struct qlink_cmd_get_sta_info *cmd; const struct qlink_resp_get_sta_info *resp; - size_t var_resp_len; + size_t var_resp_len = 0; int ret = 0;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, @@ -1650,7 +1655,7 @@ int qtnf_cmd_get_mac_info(struct qtnf_wmac *mac) { struct sk_buff *cmd_skb, *resp_skb = NULL; const struct qlink_resp_get_mac_info *resp; - size_t var_data_len; + size_t var_data_len = 0; int ret = 0;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, QLINK_VIFID_RSVD, @@ -1680,8 +1685,8 @@ int qtnf_cmd_get_hw_info(struct qtnf_bus *bus) { struct sk_buff *cmd_skb, *resp_skb = NULL; const struct qlink_resp_get_hw_info *resp; + size_t info_len = 0; int ret = 0; - size_t info_len;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(QLINK_MACID_RSVD, QLINK_VIFID_RSVD, QLINK_CMD_GET_HW_INFO, @@ -1709,9 +1714,9 @@ int qtnf_cmd_band_info_get(struct qtnf_wmac *mac, struct ieee80211_supported_band *band) { struct sk_buff *cmd_skb, *resp_skb = NULL; - size_t info_len; struct qlink_cmd_band_info_get *cmd; struct qlink_resp_band_info_get *resp; + size_t info_len = 0; int ret = 0; u8 qband;
@@ -1764,8 +1769,8 @@ int qtnf_cmd_band_info_get(struct qtnf_wmac *mac, int qtnf_cmd_send_get_phy_params(struct qtnf_wmac *mac) { struct sk_buff *cmd_skb, *resp_skb = NULL; - size_t response_size; struct qlink_resp_phy_params *resp; + size_t response_size = 0; int ret = 0;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0, @@ -2431,7 +2436,7 @@ int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel, struct sk_buff *cmd_skb, *resp_skb = NULL; struct qlink_cmd_get_chan_stats *cmd; struct qlink_resp_get_chan_stats *resp; - size_t var_data_len; + size_t var_data_len = 0; int ret = 0;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, QLINK_VIFID_RSVD,
From: Shannon Nelson shannon.nelson@oracle.com
[ Upstream commit 7fa57ca443cffe81ce8416b57966bfb0370678a1 ]
When it's possible that the PF might end up trying to send a packet to one of its own VFs, we have to forbid IPsec offload because the device drops the packets into a black hole. See commit 47b6f50077e6 ("ixgbe: disallow IPsec Tx offload when in SR-IOV mode") for more info.
This really is only necessary when the device is in the default VEB mode. If instead the device is running in VEPA mode, the packets will go through the encryption engine and out the MAC/PHY as normal, and get "hairpinned" as needed by the switch.
So let's not block IPsec offload when in VEPA mode. To get there with the ixgbe device, use the handy 'bridge' command: bridge link set dev eth1 hwmode vepa
Signed-off-by: Shannon Nelson shannon.nelson@oracle.com Tested-by: Andrew Bowers andrewx.bowers@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirsher@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index fd1b0546fd67..4d77f42e035c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -4,6 +4,7 @@ #include "ixgbe.h" #include <net/xfrm.h> #include <crypto/aead.h> +#include <linux/if_bridge.h>
#define IXGBE_IPSEC_KEY_BITS 160 static const char aes_gcm_name[] = "rfc4106(gcm(aes))"; @@ -693,7 +694,8 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) } else { struct tx_sa tsa;
- if (adapter->num_vfs) + if (adapter->num_vfs && + adapter->bridge_mode != BRIDGE_MODE_VEPA) return -EOPNOTSUPP;
/* find the first unused index */
From: João Paulo Rechi Vita jprvita@gmail.com
[ Upstream commit 78f3ac76d9e5219589718b9e4733bee21627b3f5 ]
In the past, Asus firmwares would change the panel backlight directly through the EC when the display off hotkey (Fn+F7) was pressed, and only notify the OS of such change, with 0x33 when the LCD was ON and 0x34 when the LCD was OFF. These are currently mapped to KEY_DISPLAYTOGGLE and KEY_DISPLAY_OFF, respectively.
Most recently the EC on Asus most machines lost ability to toggle the LCD backlight directly, but unless the OS informs the firmware it is going to handle the display toggle hotkey events, the firmware still tries change the brightness through the EC, to no effect. The end result is a long list (at Endless we counted 11) of Asus laptop models where the display toggle hotkey does not perform any action. Our firmware engineers contacts at Asus were surprised that there were still machines out there with the old behavior.
Calling WMNB(ASUS_WMI_DEVID_BACKLIGHT==0x00050011, 2) on the _WDG device tells the firmware that it should let the OS handle the display toggle event, in which case it will simply notify the OS of a key press with 0x35, as shown by the DSDT excerpts bellow.
Scope (_SB) { (...)
Device (ATKD) { (...)
Name (_WDG, Buffer (0x28) { /* 0000 */ 0xD0, 0x5E, 0x84, 0x97, 0x6D, 0x4E, 0xDE, 0x11, /* 0008 */ 0x8A, 0x39, 0x08, 0x00, 0x20, 0x0C, 0x9A, 0x66, /* 0010 */ 0x4E, 0x42, 0x01, 0x02, 0x35, 0xBB, 0x3C, 0x0B, /* 0018 */ 0xC2, 0xE3, 0xED, 0x45, 0x91, 0xC2, 0x4C, 0x5A, /* 0020 */ 0x6D, 0x19, 0x5D, 0x1C, 0xFF, 0x00, 0x01, 0x08 }) Method (WMNB, 3, Serialized) { CreateDWordField (Arg2, Zero, IIA0) CreateDWordField (Arg2, 0x04, IIA1) Local0 = (Arg1 & 0xFFFFFFFF)
(...)
If ((Local0 == 0x53564544)) { (...)
If ((IIA0 == 0x00050011)) { If ((IIA1 == 0x02)) { ^^PCI0.SBRG.EC0.SPIN (0x72, One) ^^PCI0.SBRG.EC0.BLCT = One }
Return (One) } } (...) } (...) } (...) } (...)
Scope (_SB.PCI0.SBRG.EC0) { (...)
Name (BLCT, Zero)
(...)
Method (_Q10, 0, NotSerialized) // _Qxx: EC Query { If ((BLCT == Zero)) { Local0 = One Local0 = RPIN (0x72) Local0 ^= One SPIN (0x72, Local0) If (ATKP) { Local0 = (0x34 - Local0) ^^^^ATKD.IANE (Local0) } } ElseIf ((BLCT == One)) { If (ATKP) { ^^^^ATKD.IANE (0x35) } } } (...) }
Signed-off-by: João Paulo Rechi Vita jprvita@endlessm.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/asus-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index c285a16675ee..37b5de541270 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2131,7 +2131,8 @@ static int asus_wmi_add(struct platform_device *pdev) err = asus_wmi_backlight_init(asus); if (err && err != -ENODEV) goto fail_backlight; - } + } else + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL);
status = wmi_install_notify_handler(asus->driver->event_guid, asus_wmi_notify, asus);
Hello Sasha,
On Tue, Jan 8, 2019 at 11:26 AM Sasha Levin sashal@kernel.org wrote:
From: João Paulo Rechi Vita jprvita@gmail.com
[ Upstream commit 78f3ac76d9e5219589718b9e4733bee21627b3f5 ]
In the past, Asus firmwares would change the panel backlight directly through the EC when the display off hotkey (Fn+F7) was pressed, and only notify the OS of such change, with 0x33 when the LCD was ON and 0x34 when the LCD was OFF. These are currently mapped to KEY_DISPLAYTOGGLE and KEY_DISPLAY_OFF, respectively.
Most recently the EC on Asus most machines lost ability to toggle the LCD backlight directly, but unless the OS informs the firmware it is going to handle the display toggle hotkey events, the firmware still tries change the brightness through the EC, to no effect. The end result is a long list (at Endless we counted 11) of Asus laptop models where the display toggle hotkey does not perform any action. Our firmware engineers contacts at Asus were surprised that there were still machines out there with the old behavior.
Calling WMNB(ASUS_WMI_DEVID_BACKLIGHT==0x00050011, 2) on the _WDG device tells the firmware that it should let the OS handle the display toggle event, in which case it will simply notify the OS of a key press with 0x35, as shown by the DSDT excerpts bellow.
Scope (_SB) { (...)
Device (ATKD) { (...) Name (_WDG, Buffer (0x28) { /* 0000 */ 0xD0, 0x5E, 0x84, 0x97, 0x6D, 0x4E, 0xDE, 0x11, /* 0008 */ 0x8A, 0x39, 0x08, 0x00, 0x20, 0x0C, 0x9A, 0x66, /* 0010 */ 0x4E, 0x42, 0x01, 0x02, 0x35, 0xBB, 0x3C, 0x0B, /* 0018 */ 0xC2, 0xE3, 0xED, 0x45, 0x91, 0xC2, 0x4C, 0x5A, /* 0020 */ 0x6D, 0x19, 0x5D, 0x1C, 0xFF, 0x00, 0x01, 0x08 }) Method (WMNB, 3, Serialized) { CreateDWordField (Arg2, Zero, IIA0) CreateDWordField (Arg2, 0x04, IIA1) Local0 = (Arg1 & 0xFFFFFFFF) (...) If ((Local0 == 0x53564544)) { (...) If ((IIA0 == 0x00050011)) { If ((IIA1 == 0x02)) { ^^PCI0.SBRG.EC0.SPIN (0x72, One) ^^PCI0.SBRG.EC0.BLCT = One } Return (One) } } (...) } (...) } (...)
} (...)
Scope (_SB.PCI0.SBRG.EC0) { (...)
Name (BLCT, Zero) (...) Method (_Q10, 0, NotSerialized) // _Qxx: EC Query { If ((BLCT == Zero)) { Local0 = One Local0 = RPIN (0x72) Local0 ^= One SPIN (0x72, Local0) If (ATKP) { Local0 = (0x34 - Local0) ^^^^ATKD.IANE (Local0) } } ElseIf ((BLCT == One)) { If (ATKP) { ^^^^ATKD.IANE (0x35) } } } (...)
}
Signed-off-by: João Paulo Rechi Vita jprvita@endlessm.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org
I am not entirely sure this is linux-stable material. This patch makes the "turn off the display backlight" hotkey work on some Asus machines where, without this patch, the key would simply do nothing. It seems to me this is more of a new feature support than a bug fix. That said, if you or Andy think this should go to stable after this short explanation, I'll not object it.
Thanks and best regards,
-- João Paulo Rechi Vita http://about.me/jprvita
On Tue, Jan 08, 2019 at 04:49:18PM -0800, João Paulo Rechi Vita wrote:
Hello Sasha,
On Tue, Jan 8, 2019 at 11:26 AM Sasha Levin sashal@kernel.org wrote:
From: João Paulo Rechi Vita jprvita@gmail.com
[ Upstream commit 78f3ac76d9e5219589718b9e4733bee21627b3f5 ]
In the past, Asus firmwares would change the panel backlight directly through the EC when the display off hotkey (Fn+F7) was pressed, and only notify the OS of such change, with 0x33 when the LCD was ON and 0x34 when the LCD was OFF. These are currently mapped to KEY_DISPLAYTOGGLE and KEY_DISPLAY_OFF, respectively.
Most recently the EC on Asus most machines lost ability to toggle the LCD backlight directly, but unless the OS informs the firmware it is going to handle the display toggle hotkey events, the firmware still tries change the brightness through the EC, to no effect. The end result is a long list (at Endless we counted 11) of Asus laptop models where the display toggle hotkey does not perform any action. Our firmware engineers contacts at Asus were surprised that there were still machines out there with the old behavior.
Calling WMNB(ASUS_WMI_DEVID_BACKLIGHT==0x00050011, 2) on the _WDG device tells the firmware that it should let the OS handle the display toggle event, in which case it will simply notify the OS of a key press with 0x35, as shown by the DSDT excerpts bellow.
Scope (_SB) { (...)
Device (ATKD) { (...) Name (_WDG, Buffer (0x28) { /* 0000 */ 0xD0, 0x5E, 0x84, 0x97, 0x6D, 0x4E, 0xDE, 0x11, /* 0008 */ 0x8A, 0x39, 0x08, 0x00, 0x20, 0x0C, 0x9A, 0x66, /* 0010 */ 0x4E, 0x42, 0x01, 0x02, 0x35, 0xBB, 0x3C, 0x0B, /* 0018 */ 0xC2, 0xE3, 0xED, 0x45, 0x91, 0xC2, 0x4C, 0x5A, /* 0020 */ 0x6D, 0x19, 0x5D, 0x1C, 0xFF, 0x00, 0x01, 0x08 }) Method (WMNB, 3, Serialized) { CreateDWordField (Arg2, Zero, IIA0) CreateDWordField (Arg2, 0x04, IIA1) Local0 = (Arg1 & 0xFFFFFFFF) (...) If ((Local0 == 0x53564544)) { (...) If ((IIA0 == 0x00050011)) { If ((IIA1 == 0x02)) { ^^PCI0.SBRG.EC0.SPIN (0x72, One) ^^PCI0.SBRG.EC0.BLCT = One } Return (One) } } (...) } (...) } (...)
} (...)
Scope (_SB.PCI0.SBRG.EC0) { (...)
Name (BLCT, Zero) (...) Method (_Q10, 0, NotSerialized) // _Qxx: EC Query { If ((BLCT == Zero)) { Local0 = One Local0 = RPIN (0x72) Local0 ^= One SPIN (0x72, Local0) If (ATKP) { Local0 = (0x34 - Local0) ^^^^ATKD.IANE (Local0) } } ElseIf ((BLCT == One)) { If (ATKP) { ^^^^ATKD.IANE (0x35) } } } (...)
}
Signed-off-by: João Paulo Rechi Vita jprvita@endlessm.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org
I am not entirely sure this is linux-stable material. This patch makes the "turn off the display backlight" hotkey work on some Asus machines where, without this patch, the key would simply do nothing. It seems to me this is more of a new feature support than a bug fix. That said, if you or Andy think this should go to stable after this short explanation, I'll not object it.
I think I'll keep this in. It looks a lot like a quirk to make this device work.
-- Thanks, Sasha
From: Miroslav Lichvar mlichvar@redhat.com
[ Upstream commit e1f65b0d70e9e5c80e15105cd96fa00174d7c436 ]
It seems with some NICs supported by the e1000e driver a SYSTIM reading may occasionally be few microseconds before the previous reading and if enabled also pass e1000e_sanitize_systim() without reaching the maximum number of rereads, even if the function is modified to check three consecutive readings (i.e. it doesn't look like a double read error). This causes an underflow in the timecounter and the PHC time jumps hours ahead.
This was observed on 82574, I217 and I219. The fastest way to reproduce it is to run a program that continuously calls the PTP_SYS_OFFSET ioctl on the PHC.
Modify e1000e_phc_gettime() to use timecounter_cyc2time() instead of timecounter_read() in order to allow non-monotonic SYSTIM readings and prevent the PHC from jumping.
Cc: Richard Cochran richardcochran@gmail.com Signed-off-by: Miroslav Lichvar mlichvar@redhat.com Acked-by: Jacob Keller jacob.e.keller@intel.com Tested-by: Aaron Brown aaron.f.brown@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirsher@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/intel/e1000e/ptp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 37c76945ad9b..e1f821edbc21 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -173,10 +173,14 @@ static int e1000e_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, ptp_clock_info); unsigned long flags; - u64 ns; + u64 cycles, ns;
spin_lock_irqsave(&adapter->systim_lock, flags); - ns = timecounter_read(&adapter->tc); + + /* Use timecounter_cyc2time() to allow non-monotonic SYSTIM readings */ + cycles = adapter->cc.read(&adapter->cc); + ns = timecounter_cyc2time(&adapter->tc, cycles); + spin_unlock_irqrestore(&adapter->systim_lock, flags);
*ts = ns_to_timespec64(ns); @@ -232,9 +236,12 @@ static void e1000e_systim_overflow_work(struct work_struct *work) systim_overflow_work.work); struct e1000_hw *hw = &adapter->hw; struct timespec64 ts; + u64 ns;
- adapter->ptp_clock_info.gettime64(&adapter->ptp_clock_info, &ts); + /* Update the timecounter */ + ns = timecounter_read(&adapter->tc);
+ ts = ns_to_timespec64(ns); e_dbg("SYSTIM overflow check at %lld.%09lu\n", (long long) ts.tv_sec, ts.tv_nsec);
From: Badhri Jagan Sridharan badhri@google.com
[ Upstream commit 23b5f73266e59a598c1e5dd435d87651b5a7626b ]
During HARD_RESET the data link is disconnected. For self powered device, the spec is advising against doing that.
From USB_PD_R3_0
7.1.5 Response to Hard Resets Device operation during and after a Hard Reset is defined as follows: Self-powered devices Should Not disconnect from USB during a Hard Reset (see Section 9.1.2). Bus powered devices will disconnect from USB during a Hard Reset due to the loss of their power source.
Tackle this by letting TCPM know whether the device is self or bus powered.
This overcomes unnecessary port disconnections from hard reset. Also, speeds up the enumeration time when connected to Type-A ports.
Signed-off-by: Badhri Jagan Sridharan badhri@google.com Reviewed-by: Heikki Krogerus heikki.krogerus@linux.intel.com --------- Version history: V3: Rebase on top of usb-next
V2: Based on feedback from heikki.krogerus@linux.intel.com - self_powered added to the struct tcpm_port which is populated from a. "connector" node of the device tree in tcpm_fw_get_caps() b. "self_powered" node of the tcpc_config in tcpm_copy_caps
Based on feedbase from linux@roeck-us.net - Code was refactored - SRC_HARD_RESET_VBUS_OFF sets the link state to false based on self_powered flag
V1 located here: https://lkml.org/lkml/2018/9/13/94 Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/typec/tcpm/tcpm.c | 12 ++++++++++-- include/linux/usb/tcpm.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index dbbd71f754d0..ba6e5cdaed2c 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -317,6 +317,9 @@ struct tcpm_port { /* Deadline in jiffies to exit src_try_wait state */ unsigned long max_wait;
+ /* port belongs to a self powered device */ + bool self_powered; + #ifdef CONFIG_DEBUG_FS struct dentry *dentry; struct mutex logbuffer_lock; /* log buffer access lock */ @@ -3254,7 +3257,8 @@ static void run_state_machine(struct tcpm_port *port) case SRC_HARD_RESET_VBUS_OFF: tcpm_set_vconn(port, true); tcpm_set_vbus(port, false); - tcpm_set_roles(port, false, TYPEC_SOURCE, TYPEC_HOST); + tcpm_set_roles(port, port->self_powered, TYPEC_SOURCE, + TYPEC_HOST); tcpm_set_state(port, SRC_HARD_RESET_VBUS_ON, PD_T_SRC_RECOVER); break; case SRC_HARD_RESET_VBUS_ON: @@ -3267,7 +3271,8 @@ static void run_state_machine(struct tcpm_port *port) memset(&port->pps_data, 0, sizeof(port->pps_data)); tcpm_set_vconn(port, false); tcpm_set_charge(port, false); - tcpm_set_roles(port, false, TYPEC_SINK, TYPEC_DEVICE); + tcpm_set_roles(port, port->self_powered, TYPEC_SINK, + TYPEC_DEVICE); /* * VBUS may or may not toggle, depending on the adapter. * If it doesn't toggle, transition to SNK_HARD_RESET_SINK_ON @@ -4412,6 +4417,8 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, return -EINVAL; port->operating_snk_mw = mw / 1000;
+ port->self_powered = fwnode_property_read_bool(fwnode, "self-powered"); + return 0; }
@@ -4720,6 +4727,7 @@ static int tcpm_copy_caps(struct tcpm_port *port, port->typec_caps.prefer_role = tcfg->default_role; port->typec_caps.type = tcfg->type; port->typec_caps.data = tcfg->data; + port->self_powered = port->tcpc->config->self_powered;
return 0; } diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 7e7fbfb84e8e..50c74a77db55 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -89,6 +89,7 @@ struct tcpc_config { enum typec_port_data data; enum typec_role default_role; bool try_role_hw; /* try.{src,snk} implemented in hardware */ + bool self_powered; /* port belongs to a self powered device */
const struct typec_altmode_desc *alt_modes; };
From: Quentin Monnet quentin.monnet@netronome.com
[ Upstream commit f96afa767baffba7645f5e10998f5178948bb9aa ]
libbpf is now able to load successfully test_l4lb_noinline.o and samples/bpf/tracex3_kern.o.
For the test_l4lb_noinline, uncomment related tests from test_libbpf.c and remove the associated "TODO".
For tracex3_kern.o, instead of loading a program from samples/bpf/ that might not have been compiled at this stage, try loading a program from BPF selftests. Since this test case is about loading a program compiled without the "-target bpf" flag, change the Makefile to compile one program accordingly (instead of passing the flag for compiling all programs).
Regarding test_xdp_noinline.o: in its current shape the program fails to load because it provides no version section, but the loader needs one. The test was added to make sure that libbpf could load XDP programs even if they do not provide a version number in a dedicated section. But libbpf is already capable of doing that: in our case loading fails because the loader does not know that this is an XDP program (it does not need to, since it does not attach the program). So trying to load test_xdp_noinline.o does not bring much here: just delete this subtest.
For the record, the error message obtained with tracex3_kern.o was fixed by commit e3d91b0ca523 ("tools/libbpf: handle issues with bpf ELF objects containing .eh_frames")
I have not been abled to reproduce the "libbpf: incorrect bpf_call opcode" error for test_l4lb_noinline.o, even with the version of libbpf present at the time when test_libbpf.sh and test_libbpf_open.c were created.
RFC -> v1: - Compile test_xdp without the "-target bpf" flag, and try to load it instead of ../../samples/bpf/tracex3_kern.o. - Delete test_xdp_noinline.o subtest.
Cc: Jesper Dangaard Brouer brouer@redhat.com Signed-off-by: Quentin Monnet quentin.monnet@netronome.com Acked-by: Jakub Kicinski jakub.kicinski@netronome.com Acked-by: Jesper Dangaard Brouer brouer@redhat.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/bpf/Makefile | 10 ++++++++++ tools/testing/selftests/bpf/test_libbpf.sh | 14 ++++---------- 2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e39dfb4e7970..ecd79b7fb107 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -135,6 +135,16 @@ endif endif endif
+# Have one program compiled without "-target bpf" to test whether libbpf loads +# it successfully +$(OUTPUT)/test_xdp.o: test_xdp.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif + $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index 156d89f1edcc..2989b2e2d856 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -33,17 +33,11 @@ trap exit_handler 0 2 3 6 9
libbpf_open_file test_l4lb.o
-# TODO: fix libbpf to load noinline functions -# [warning] libbpf: incorrect bpf_call opcode -#libbpf_open_file test_l4lb_noinline.o +# Load a program with BPF-to-BPF calls +libbpf_open_file test_l4lb_noinline.o
-# TODO: fix test_xdp_meta.c to load with libbpf -# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version -#libbpf_open_file test_xdp_meta.o - -# TODO: fix libbpf to handle .eh_frame -# [warning] libbpf: relocation failed: no section(10) -#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o +# Load a program compiled without the "-target bpf" flag +libbpf_open_file test_xdp.o
# Success exit 0
From: Frank Rowand frank.rowand@sony.com
[ Upstream commit 7c528e457d53c75107d5aa56892316d265c778de ]
The refcount of a newly added overlay node decrements to one (instead of zero) when the overlay changeset is destroyed. This change will cause the final decrement be to zero.
After applying this patch, new validation warnings will be reported from the devicetree unittest during boot due to a pre-existing devicetree bug. The warnings will be similar to:
OF: ERROR: memory leak before free overlay changeset, /testcase-data/overlay-node/test-bus/test-unittest4
This pre-existing devicetree bug will also trigger a WARN_ONCE() from refcount_sub_and_test_checked() when an overlay changeset is destroyed without having first been applied. This scenario occurs when an error in the overlay is detected during the overlay changeset creation:
WARNING: CPU: 0 PID: 1 at lib/refcount.c:187 refcount_sub_and_test_checked+0xa8/0xbc refcount_t: underflow; use-after-free.
(unwind_backtrace) from (show_stack+0x10/0x14) (show_stack) from (dump_stack+0x6c/0x8c) (dump_stack) from (__warn+0xdc/0x104) (__warn) from (warn_slowpath_fmt+0x44/0x6c) (warn_slowpath_fmt) from (refcount_sub_and_test_checked+0xa8/0xbc) (refcount_sub_and_test_checked) from (kobject_put+0x24/0x208) (kobject_put) from (of_changeset_destroy+0x2c/0xb4) (of_changeset_destroy) from (free_overlay_changeset+0x1c/0x9c) (free_overlay_changeset) from (of_overlay_remove+0x284/0x2cc) (of_overlay_remove) from (of_unittest_apply_revert_overlay_check.constprop.4+0xf8/0x1e8) (of_unittest_apply_revert_overlay_check.constprop.4) from (of_unittest_overlay+0x960/0xed8) (of_unittest_overlay) from (of_unittest+0x1cc4/0x2138) (of_unittest) from (do_one_initcall+0x4c/0x28c) (do_one_initcall) from (kernel_init_freeable+0x29c/0x378) (kernel_init_freeable) from (kernel_init+0x8/0x110) (kernel_init) from (ret_from_fork+0x14/0x2c)
Tested-by: Alan Tull atull@kernel.org Signed-off-by: Frank Rowand frank.rowand@sony.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/of/overlay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 42b1f73ac5f6..1e058196f23f 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -378,7 +378,9 @@ static int add_changeset_node(struct overlay_changeset *ovcs, if (ret) return ret;
- return build_changeset_next_level(ovcs, tchild, node); + ret = build_changeset_next_level(ovcs, tchild, node); + of_node_put(tchild); + return ret; }
if (node->phandle && tchild->phandle)
From: Anders Roxell anders.roxell@linaro.org
[ Upstream commit 347a28b586802d09604a149c1a1f6de5dccbe6fa ]
This happened while running in qemu-system-aarch64, the AMBA PL011 UART driver when enabling CONFIG_DEBUG_TEST_DRIVER_REMOVE. arch_initcall(pl011_init) came before subsys_initcall(default_bdi_init), devtmpfs' handle_remove() crashes because the reference count is a NULL pointer only because wb->bdi hasn't been initialized yet.
Rework so that wb_put have an extra check if wb->bdi before decrement wb->refcnt and also add a WARN_ON_ONCE to get a warning if it happens again in other drivers.
Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") Co-developed-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Anders Roxell anders.roxell@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/backing-dev-defs.h | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 9a6bc0951cfa..c31157135598 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -258,6 +258,14 @@ static inline void wb_get(struct bdi_writeback *wb) */ static inline void wb_put(struct bdi_writeback *wb) { + if (WARN_ON_ONCE(!wb->bdi)) { + /* + * A driver bug might cause a file to be removed before bdi was + * initialized. + */ + return; + } + if (wb != &wb->bdi->wb) percpu_ref_put(&wb->refcnt); }
From: Anders Roxell anders.roxell@linaro.org
[ Upstream commit 646097940ad35aa2c1f2012af932d55976a9f255 ]
When the test 'CONFIG_DEBUG_TEST_DRIVER_REMOVE=y' is enabled, arch_initcall(pl011_init) came before subsys_initcall(default_bdi_init). devtmpfs gets killed because we try to remove a file and decrement the wb reference count before the noop_backing_device_info gets initialized.
[ 0.332075] Serial: AMBA PL011 UART driver [ 0.485276] 9000000.pl011: ttyAMA0 at MMIO 0x9000000 (irq = 39, base_baud = 0) is a PL011 rev1 [ 0.502382] console [ttyAMA0] enabled [ 0.515710] Unable to handle kernel paging request at virtual address 0000800074c12000 [ 0.516053] Mem abort info: [ 0.516222] ESR = 0x96000004 [ 0.516417] Exception class = DABT (current EL), IL = 32 bits [ 0.516641] SET = 0, FnV = 0 [ 0.516826] EA = 0, S1PTW = 0 [ 0.516984] Data abort info: [ 0.517149] ISV = 0, ISS = 0x00000004 [ 0.517339] CM = 0, WnR = 0 [ 0.517553] [0000800074c12000] user address but active_mm is swapper [ 0.517928] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 0.518305] Modules linked in: [ 0.518839] CPU: 0 PID: 13 Comm: kdevtmpfs Not tainted 4.19.0-rc5-next-20180928-00002-g2ba39ab0cd01-dirty #82 [ 0.519307] Hardware name: linux,dummy-virt (DT) [ 0.519681] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 0.519959] pc : __destroy_inode+0x94/0x2a8 [ 0.520212] lr : __destroy_inode+0x78/0x2a8 [ 0.520401] sp : ffff0000098c3b20 [ 0.520590] x29: ffff0000098c3b20 x28: 00000000087a3714 [ 0.520904] x27: 0000000000002000 x26: 0000000000002000 [ 0.521179] x25: ffff000009583000 x24: 0000000000000000 [ 0.521467] x23: ffff80007bb52000 x22: ffff80007bbaa7c0 [ 0.521737] x21: ffff0000093f9338 x20: 0000000000000000 [ 0.522033] x19: ffff80007bbb05d8 x18: 0000000000000400 [ 0.522376] x17: 0000000000000000 x16: 0000000000000000 [ 0.522727] x15: 0000000000000400 x14: 0000000000000400 [ 0.523068] x13: 0000000000000001 x12: 0000000000000001 [ 0.523421] x11: 0000000000000000 x10: 0000000000000970 [ 0.523749] x9 : ffff0000098c3a60 x8 : ffff80007bbab190 [ 0.524017] x7 : ffff80007bbaa880 x6 : 0000000000000c88 [ 0.524305] x5 : ffff0000093d96c8 x4 : 61c8864680b583eb [ 0.524567] x3 : ffff0000093d6180 x2 : ffffffffffffffff [ 0.524872] x1 : 0000800074c12000 x0 : 0000800074c12000 [ 0.525207] Process kdevtmpfs (pid: 13, stack limit = 0x(____ptrval____)) [ 0.525529] Call trace: [ 0.525806] __destroy_inode+0x94/0x2a8 [ 0.526108] destroy_inode+0x34/0x88 [ 0.526370] evict+0x144/0x1c8 [ 0.526636] iput+0x184/0x230 [ 0.526871] dentry_unlink_inode+0x118/0x130 [ 0.527152] d_delete+0xd8/0xe0 [ 0.527420] vfs_unlink+0x240/0x270 [ 0.527665] handle_remove+0x1d8/0x330 [ 0.527875] devtmpfsd+0x138/0x1c8 [ 0.528085] kthread+0x14c/0x158 [ 0.528291] ret_from_fork+0x10/0x18 [ 0.528720] Code: 92800002 aa1403e0 d538d081 8b010000 (c85f7c04) [ 0.529367] ---[ end trace 5a3dee47727f877c ]---
Rework to set suppress_bind_attrs flag to avoid removing the device when CONFIG_DEBUG_TEST_DRIVER_REMOVE=y. This applies for pic32_uart and xilinx_uartps as well.
Co-developed-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Anders Roxell anders.roxell@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/tty/serial/amba-pl011.c | 2 ++ drivers/tty/serial/pic32_uart.c | 1 + drivers/tty/serial/xilinx_uartps.c | 1 + 3 files changed, 4 insertions(+)
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index ebd33c0232e6..89ade213a1a9 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2780,6 +2780,7 @@ static struct platform_driver arm_sbsa_uart_platform_driver = { .name = "sbsa-uart", .of_match_table = of_match_ptr(sbsa_uart_of_match), .acpi_match_table = ACPI_PTR(sbsa_uart_acpi_match), + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), }, };
@@ -2808,6 +2809,7 @@ static struct amba_driver pl011_driver = { .drv = { .name = "uart-pl011", .pm = &pl011_dev_pm_ops, + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), }, .id_table = pl011_ids, .probe = pl011_probe, diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c index fd80d999308d..0bdf1687983f 100644 --- a/drivers/tty/serial/pic32_uart.c +++ b/drivers/tty/serial/pic32_uart.c @@ -919,6 +919,7 @@ static struct platform_driver pic32_uart_platform_driver = { .driver = { .name = PIC32_DEV_NAME, .of_match_table = of_match_ptr(pic32_serial_dt_ids), + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_PIC32), }, };
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 57c66d2c3471..379242b96790 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1719,6 +1719,7 @@ static struct platform_driver cdns_uart_platform_driver = { .name = CDNS_UART_NAME, .of_match_table = cdns_uart_of_match, .pm = &cdns_uart_dev_pm_ops, + .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_XILINX_PS_UART), }, };
From: Andrey Ignatov rdna@fb.com
[ Upstream commit 46f53a65d2de3e1591636c22b626b09d8684fd71 ]
Currently BPF verifier allows narrow loads for a context field only with offset zero. E.g. if there is a __u32 field then only the following loads are permitted: * off=0, size=1 (narrow); * off=0, size=2 (narrow); * off=0, size=4 (full).
On the other hand LLVM can generate a load with offset different than zero that make sense from program logic point of view, but verifier doesn't accept it.
E.g. tools/testing/selftests/bpf/sendmsg4_prog.c has code:
#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */ ... if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
where ctx is struct bpf_sock_addr.
Some versions of LLVM can produce the following byte code for it:
8: 71 12 07 00 00 00 00 00 r2 = *(u8 *)(r1 + 7) 9: 67 02 00 00 18 00 00 00 r2 <<= 24 10: 18 03 00 00 00 00 00 fe 00 00 00 00 00 00 00 00 r3 = 4261412864 ll 12: 5d 32 07 00 00 00 00 00 if r2 != r3 goto +7 <LBB0_6>
where `*(u8 *)(r1 + 7)` means narrow load for ctx->user_ip4 with size=1 and offset=3 (7 - sizeof(ctx->user_family) = 3). This load is currently rejected by verifier.
Verifier code that rejects such loads is in bpf_ctx_narrow_access_ok() what means any is_valid_access implementation, that uses the function, works this way, e.g. bpf_skb_is_valid_access() for __sk_buff or sock_addr_is_valid_access() for bpf_sock_addr.
The patch makes such loads supported. Offset can be in [0; size_default) but has to be multiple of load size. E.g. for __u32 field the following loads are supported now: * off=0, size=1 (narrow); * off=1, size=1 (narrow); * off=2, size=1 (narrow); * off=3, size=1 (narrow); * off=0, size=2 (narrow); * off=2, size=2 (narrow); * off=0, size=4 (full).
Reported-by: Yonghong Song yhs@fb.com Signed-off-by: Andrey Ignatov rdna@fb.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/filter.h | 16 +--------------- kernel/bpf/verifier.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h index a8b9d90a8042..25a556589ae8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -675,24 +675,10 @@ static inline u32 bpf_ctx_off_adjust_machine(u32 size) return size; }
-static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access, - u32 size_default) -{ - size_default = bpf_ctx_off_adjust_machine(size_default); - size_access = bpf_ctx_off_adjust_machine(size_access); - -#ifdef __LITTLE_ENDIAN - return (off & (size_default - 1)) == 0; -#else - return (off & (size_default - 1)) + size_access == size_default; -#endif -} - static inline bool bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) { - return bpf_ctx_narrow_align_ok(off, size, size_default) && - size <= size_default && (size & (size - 1)) == 0; + return size <= size_default && (size & (size - 1)) == 0; }
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 51ba84d4d34a..a81f52b2c92e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5789,10 +5789,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; + u32 target_size, size_default, off; struct bpf_prog *new_prog; enum bpf_access_type type; bool is_narrower_load; - u32 target_size;
if (ops->gen_prologue || env->seen_direct_write) { if (!ops->gen_prologue) { @@ -5885,9 +5885,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) * we will apply proper mask to the result. */ is_narrower_load = size < ctx_field_size; + size_default = bpf_ctx_off_adjust_machine(ctx_field_size); + off = insn->off; if (is_narrower_load) { - u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); - u32 off = insn->off; u8 size_code;
if (type == BPF_WRITE) { @@ -5915,12 +5915,23 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) }
if (is_narrower_load && size < target_size) { - if (ctx_field_size <= 4) + u8 shift = (off & (size_default - 1)) * 8; + + if (ctx_field_size <= 4) { + if (shift) + insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, + insn->dst_reg, + shift); insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); - else + } else { + if (shift) + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, + insn->dst_reg, + shift); insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); + } }
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
From: Matthew Bobrowski mbobrowski@mbobrowski.org
[ Upstream commit 2d10b23082a7eb8be508b3789f2e7250a88a5ddb ]
Modify fanotify_should_send_event() so that it now returns a mask for an event that contains ONLY flags for the event types that have been specifically requested by the user. Flags that may have been included within the event mask, but have not been explicitly requested by the user will not be present in the returned value.
As an example, given the situation where a user requests events of type FAN_OPEN. Traditionally, the event mask returned within an event that occurred on a filesystem object that has been marked for monitoring and is opened, will only ever have the FAN_OPEN bit set. With the introduction of the new flags like FAN_OPEN_EXEC, and perhaps any other future event flags, there is a possibility of the returned event mask containing more than a single bit set, despite having only requested the single event type. Prior to these modifications performed to fanotify_should_send_event(), a user would have received a bundled event mask containing flags FAN_OPEN and FAN_OPEN_EXEC in the instance that a file was opened for execution via execve(), for example. This means that a user would receive event types in the returned event mask that have not been requested. This runs the possibility of breaking existing systems and causing other unforeseen issues.
To mitigate this possibility, fanotify_should_send_event() has been modified to return the event mask containing ONLY event types explicitly requested by the user. This means that we will NOT report events that the user did no set a mask for, and we will NOT report events that the user has set an ignore mask for.
The function name fanotify_should_send_event() has also been updated so that it's more relevant to what it has been designed to do.
Signed-off-by: Matthew Bobrowski mbobrowski@mbobrowski.org Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- fs/notify/fanotify/fanotify.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e08a6647267b..f4f8359bc597 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -89,7 +89,13 @@ static int fanotify_get_response(struct fsnotify_group *group, return ret; }
-static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, +/* + * This function returns a mask for an event that only contains the flags + * that have been specifically requested by the user. Flags that may have + * been included within the event mask, but have not been explicitly + * requested by the user, will not be present in the returned mask. + */ +static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, u32 event_mask, const void *data, int data_type) { @@ -101,14 +107,14 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type);
- /* if we don't have enough info to send an event to userspace say no */ + /* If we don't have enough info to send an event to userspace say no */ if (data_type != FSNOTIFY_EVENT_PATH) - return false; + return 0;
- /* sorry, fanotify only gives a damn about files and dirs */ + /* Sorry, fanotify only gives a damn about files and dirs */ if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) - return false; + return 0;
fsnotify_foreach_obj_type(type) { if (!fsnotify_iter_should_report_type(iter_info, type)) @@ -129,13 +135,10 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
if (d_is_dir(path->dentry) && !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) - return false; - - if (event_mask & FANOTIFY_OUTGOING_EVENTS & - marks_mask & ~marks_ignored_mask) - return true; + return 0;
- return false; + return event_mask & FANOTIFY_OUTGOING_EVENTS & marks_mask & + ~marks_ignored_mask; }
struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, @@ -210,7 +213,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10);
- if (!fanotify_should_send_event(iter_info, mask, data, data_type)) + mask = fanotify_group_event_mask(iter_info, mask, data, data_type); + if (!mask) return 0;
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
On Tue, Jan 8, 2019 at 10:11 PM Sasha Levin sashal@kernel.org wrote:
From: Matthew Bobrowski mbobrowski@mbobrowski.org
[ Upstream commit 2d10b23082a7eb8be508b3789f2e7250a88a5ddb ]
Modify fanotify_should_send_event() so that it now returns a mask for an event that contains ONLY flags for the event types that have been specifically requested by the user. Flags that may have been included within the event mask, but have not been explicitly requested by the user will not be present in the returned value.
As an example, given the situation where a user requests events of type FAN_OPEN. Traditionally, the event mask returned within an event that occurred on a filesystem object that has been marked for monitoring and is opened, will only ever have the FAN_OPEN bit set. With the introduction of the new flags like FAN_OPEN_EXEC, and perhaps any other future event flags, there is a possibility of the returned event mask containing more than a single bit set, despite having only requested the single event type. Prior to these modifications performed to fanotify_should_send_event(), a user would have received a bundled event mask containing flags FAN_OPEN and FAN_OPEN_EXEC in the instance that a file was opened for execution via execve(), for example. This means that a user would receive event types in the returned event mask that have not been requested. This runs the possibility of breaking existing systems and causing other unforeseen issues.
To mitigate this possibility, fanotify_should_send_event() has been modified to return the event mask containing ONLY event types explicitly requested by the user. This means that we will NOT report events that the user did no set a mask for, and we will NOT report events that the user has set an ignore mask for.
The function name fanotify_should_send_event() has also been updated so that it's more relevant to what it has been designed to do.
Signed-off-by: Matthew Bobrowski mbobrowski@mbobrowski.org Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org
Sasha,
I have no objection to applying this patch to 4.20, but FYI, it does not fix anything. Before introducing FAN_OPEN_EXEC in 5.0-rc1, this patch has no visible effect.
I don't mind if you apply it. It will make stable code closer to mainline, which is always a good thing IMO. And FWIW, I think that patch is quite trivial and low risk.
Thanks, Amir.
On Wed 09-01-19 08:50:33, Amir Goldstein wrote:
On Tue, Jan 8, 2019 at 10:11 PM Sasha Levin sashal@kernel.org wrote:
From: Matthew Bobrowski mbobrowski@mbobrowski.org
[ Upstream commit 2d10b23082a7eb8be508b3789f2e7250a88a5ddb ]
Modify fanotify_should_send_event() so that it now returns a mask for an event that contains ONLY flags for the event types that have been specifically requested by the user. Flags that may have been included within the event mask, but have not been explicitly requested by the user will not be present in the returned value.
As an example, given the situation where a user requests events of type FAN_OPEN. Traditionally, the event mask returned within an event that occurred on a filesystem object that has been marked for monitoring and is opened, will only ever have the FAN_OPEN bit set. With the introduction of the new flags like FAN_OPEN_EXEC, and perhaps any other future event flags, there is a possibility of the returned event mask containing more than a single bit set, despite having only requested the single event type. Prior to these modifications performed to fanotify_should_send_event(), a user would have received a bundled event mask containing flags FAN_OPEN and FAN_OPEN_EXEC in the instance that a file was opened for execution via execve(), for example. This means that a user would receive event types in the returned event mask that have not been requested. This runs the possibility of breaking existing systems and causing other unforeseen issues.
To mitigate this possibility, fanotify_should_send_event() has been modified to return the event mask containing ONLY event types explicitly requested by the user. This means that we will NOT report events that the user did no set a mask for, and we will NOT report events that the user has set an ignore mask for.
The function name fanotify_should_send_event() has also been updated so that it's more relevant to what it has been designed to do.
Signed-off-by: Matthew Bobrowski mbobrowski@mbobrowski.org Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org
I have no objection to applying this patch to 4.20, but FYI, it does not fix anything. Before introducing FAN_OPEN_EXEC in 5.0-rc1, this patch has no visible effect.
Yes, the patch is just a code refactoring useful for the FAN_OPEN_EXEC feature.
I don't mind if you apply it. It will make stable code closer to mainline, which is always a good thing IMO. And FWIW, I think that patch is quite trivial and low risk.
I don't think applying code refactoring to stable is a good idea. Every change has a risk of regression and this particular one brings users no benefit. So I'd prefer to drop this patch from stable queue.
Honza
On Wed, Jan 09, 2019 at 12:32:42PM +0100, Jan Kara wrote:
On Wed 09-01-19 08:50:33, Amir Goldstein wrote:
On Tue, Jan 8, 2019 at 10:11 PM Sasha Levin sashal@kernel.org wrote:
From: Matthew Bobrowski mbobrowski@mbobrowski.org
[ Upstream commit 2d10b23082a7eb8be508b3789f2e7250a88a5ddb ]
Modify fanotify_should_send_event() so that it now returns a mask for an event that contains ONLY flags for the event types that have been specifically requested by the user. Flags that may have been included within the event mask, but have not been explicitly requested by the user will not be present in the returned value.
As an example, given the situation where a user requests events of type FAN_OPEN. Traditionally, the event mask returned within an event that occurred on a filesystem object that has been marked for monitoring and is opened, will only ever have the FAN_OPEN bit set. With the introduction of the new flags like FAN_OPEN_EXEC, and perhaps any other future event flags, there is a possibility of the returned event mask containing more than a single bit set, despite having only requested the single event type. Prior to these modifications performed to fanotify_should_send_event(), a user would have received a bundled event mask containing flags FAN_OPEN and FAN_OPEN_EXEC in the instance that a file was opened for execution via execve(), for example. This means that a user would receive event types in the returned event mask that have not been requested. This runs the possibility of breaking existing systems and causing other unforeseen issues.
To mitigate this possibility, fanotify_should_send_event() has been modified to return the event mask containing ONLY event types explicitly requested by the user. This means that we will NOT report events that the user did no set a mask for, and we will NOT report events that the user has set an ignore mask for.
The function name fanotify_should_send_event() has also been updated so that it's more relevant to what it has been designed to do.
Signed-off-by: Matthew Bobrowski mbobrowski@mbobrowski.org Reviewed-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org
I have no objection to applying this patch to 4.20, but FYI, it does not fix anything. Before introducing FAN_OPEN_EXEC in 5.0-rc1, this patch has no visible effect.
Yes, the patch is just a code refactoring useful for the FAN_OPEN_EXEC feature.
I don't mind if you apply it. It will make stable code closer to mainline, which is always a good thing IMO. And FWIW, I think that patch is quite trivial and low risk.
I don't think applying code refactoring to stable is a good idea. Every change has a risk of regression and this particular one brings users no benefit. So I'd prefer to drop this patch from stable queue.
No objections there, dropping it. Thank you.
-- Thanks, Sasha
From: Takashi Sakamoto o-takashi@sakamocchi.jp
[ Upstream commit fba43f454cdf9caa3185219d116bd2a6e6354552 ]
This commit adds support for APOGEE duet FireWire, launched 2007, already discontinued. This model uses Oxford Semiconductor FW971 as its communication engine. Below is information on Configuration ROM of this unit. The unit supports some AV/C commands defined by Audio subunit specification and vendor dependent commands.
$ ./hinawa-config-rom-printer /dev/fw1 { 'bus-info': { 'adj': False, 'bmc': False, 'chip_ID': 42949742248, 'cmc': False, 'cyc_clk_acc': 255, 'generation': 0, 'imc': False, 'isc': True, 'link_spd': 3, 'max_ROM': 0, 'max_rec': 64, 'name': '1394', 'node_vendor_ID': 987, 'pmc': False}, 'root-directory': [ ['VENDOR', 987], ['DESCRIPTOR', 'Apogee Electronics'], ['MODEL', 122333], ['DESCRIPTOR', 'Duet'], [ 'NODE_CAPABILITIES', { 'addressing': {'64': True, 'fix': True, 'prv': False}, 'misc': {'int': False, 'ms': False, 'spt': True}, 'state': { 'atn': False, 'ded': False, 'drq': True, 'elo': False, 'init': False, 'lst': True, 'off': False}, 'testing': {'bas': False, 'ext': False}}], [ 'UNIT', [ ['SPECIFIER_ID', 41005], ['VERSION', 65537], ['MODEL', 122333], ['DESCRIPTOR', 'Duet']]]]}
Signed-off-by: Takashi Sakamoto o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/firewire/Kconfig | 1 + sound/firewire/oxfw/oxfw.c | 8 ++++++++ 2 files changed, 9 insertions(+)
diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig index 8a146b039276..44cedb65bb88 100644 --- a/sound/firewire/Kconfig +++ b/sound/firewire/Kconfig @@ -41,6 +41,7 @@ config SND_OXFW * Mackie(Loud) U.420/U.420d * TASCAM FireOne * Stanton Controllers & Systems 1 Deck/Mixer + * APOGEE duet FireWire
To compile this driver as a module, choose M here: the module will be called snd-oxfw. diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index afb78d90384b..3d27f3378d5d 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -20,6 +20,7 @@ #define VENDOR_LACIE 0x00d04b #define VENDOR_TASCAM 0x00022e #define OUI_STANTON 0x001260 +#define OUI_APOGEE 0x0003db
#define MODEL_SATELLITE 0x00200f
@@ -397,6 +398,13 @@ static const struct ieee1394_device_id oxfw_id_table[] = { .vendor_id = OUI_STANTON, .model_id = 0x002000, }, + // APOGEE, duet FireWire + { + .match_flags = IEEE1394_MATCH_VENDOR_ID | + IEEE1394_MATCH_MODEL_ID, + .vendor_id = OUI_APOGEE, + .model_id = 0x01dddd, + }, { } }; MODULE_DEVICE_TABLE(ieee1394, oxfw_id_table);
From: Borislav Petkov bp@suse.de
[ Upstream commit 68b5e4326e4b8ac9080835005d8254fed0fb3c56 ]
Add the proper includes and make smca_get_name() static.
Fix an actual bug too which the warning triggered:
arch/x86/kernel/cpu/mcheck/therm_throt.c:395:39: error: conflicting \ types for ‘smp_thermal_interrupt’ asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r) ^~~~~~~~~~~~~~~~~~~~~ In file included from arch/x86/kernel/cpu/mcheck/therm_throt.c:29: ./arch/x86/include/asm/traps.h:107:17: note: previous declaration of \ ‘smp_thermal_interrupt’ was here asmlinkage void smp_thermal_interrupt(void);
Signed-off-by: Borislav Petkov bp@suse.de Cc: Yi Wang wang.yi59@zte.com.cn Cc: Michael Matz matz@suse.de Cc: x86@kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811081633160.1549@nanos.tec.linut... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/include/asm/traps.h | 6 +++--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 5 +++-- arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 ++- arch/x86/kernel/cpu/mcheck/threshold.c | 3 ++- 4 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 3de69330e6c5..afbc87206886 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -104,9 +104,9 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 -asmlinkage void smp_thermal_interrupt(void); -asmlinkage void smp_threshold_interrupt(void); -asmlinkage void smp_deferred_error_interrupt(void); +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs); +asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); +asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif
extern void ist_enter(struct pt_regs *regs); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e12454e21b8a..9f915a8791cc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -23,6 +23,7 @@ #include <linux/string.h>
#include <asm/amd_nb.h> +#include <asm/traps.h> #include <asm/apic.h> #include <asm/mce.h> #include <asm/msr.h> @@ -99,7 +100,7 @@ static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } };
-const char *smca_get_name(enum smca_bank_types t) +static const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) return NULL; @@ -824,7 +825,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) mce_log(&m); }
-asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs) { entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2da67b70ba98..ee229ceee745 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -25,6 +25,7 @@ #include <linux/cpu.h>
#include <asm/processor.h> +#include <asm/traps.h> #include <asm/apic.h> #include <asm/mce.h> #include <asm/msr.h> @@ -390,7 +391,7 @@ static void unexpected_thermal_interrupt(void)
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r) +asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs) { entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 2b584b319eff..c21e0a1efd0f 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -6,6 +6,7 @@ #include <linux/kernel.h>
#include <asm/irq_vectors.h> +#include <asm/traps.h> #include <asm/apic.h> #include <asm/mce.h> #include <asm/trace/irq_vectors.h> @@ -18,7 +19,7 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs) { entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
From: "Maciej W. Rozycki" macro@linux-mips.org
[ Upstream commit e4849aff1e169b86c561738daf8ff020e9de1011 ]
The Broadcom SiByte BCM1250, BCM1125, and BCM1125H SOCs have an onchip DRAM controller that supports memory amounts of up to 16GiB, and due to how the address decoder has been wired in the SOC any memory beyond 1GiB is actually mapped starting from 4GiB physical up, that is beyond the 32-bit addressable limit[1]. Consequently if the maximum amount of memory has been installed, then it will span up to 19GiB.
Many of the evaluation boards we support that are based on one of these SOCs have their memory soldered and the amount present fits in the 32-bit address range. The BCM91250A SWARM board however has actual DIMM slots and accepts, depending on the peripherals revision of the SOC, up to 4GiB or 8GiB of memory in commercially available JEDEC modules[2]. I believe this is also the case with the BCM91250C2 LittleSur board. This means that up to either 3GiB or 7GiB of memory requires 64-bit addressing to access.
I believe the BCM91480B BigSur board, which has the BCM1480 SOC instead, accepts at least as much memory, although I have no documentation or actual hardware available to verify that.
Both systems have PCI slots installed for use by any PCI option boards, including ones that only support 32-bit addressing (additionally the 32-bit PCI host bridge of the BCM1250, BCM1125, and BCM1125H SOCs limits addressing to 32-bits), and there is no IOMMU available. Therefore for PCI DMA to work in the presence of memory beyond enable swiotlb for the affected systems.
All the other SOC onchip DMA devices use 40-bit addressing and therefore can address the whole memory, so only enable swiotlb if PCI support and support for DMA beyond 4GiB have been both enabled in the configuration of the kernel.
This shows up as follows:
Broadcom SiByte BCM1250 B2 @ 800 MHz (SB1 rev 2) Board type: SiByte BCM91250A (SWARM) Determined physical RAM map: memory: 000000000fe7fe00 @ 0000000000000000 (usable) memory: 000000001ffffe00 @ 0000000080000000 (usable) memory: 000000000ffffe00 @ 00000000c0000000 (usable) memory: 0000000087fffe00 @ 0000000100000000 (usable) software IO TLB: mapped [mem 0xcbffc000-0xcfffc000] (64MB)
in the bootstrap log and removes failures like these:
defxx 0000:02:00.0: dma_direct_map_page: overflow 0x0000000185bc6080+4608 of device mask ffffffff bus mask 0 fddi0: Receive buffer allocation failed fddi0: Adapter open failed! IP-Config: Failed to open fddi0 defxx 0000:09:08.0: dma_direct_map_page: overflow 0x0000000185bc6080+4608 of device mask ffffffff bus mask 0 fddi1: Receive buffer allocation failed fddi1: Adapter open failed! IP-Config: Failed to open fddi1
when memory beyond 4GiB is handed out to devices that can only do 32-bit addressing.
This updates commit cce335ae47e2 ("[MIPS] 64-bit Sibyte kernels need DMA32.").
References:
[1] "BCM1250/BCM1125/BCM1125H User Manual", Revision 1250_1125-UM100-R, Broadcom Corporation, 21 Oct 2002, Section 3: "System Overview", "Memory Map", pp. 34-38
[2] "BCM91250A User Manual", Revision 91250A-UM100-R, Broadcom Corporation, 18 May 2004, Section 3: "Physical Description", "Supported DRAM", p. 23
Signed-off-by: Maciej W. Rozycki macro@linux-mips.org [paul.burton@mips.com: Remove GPL text from dma.c; SPDX tag covers it] Signed-off-by: Paul Burton paul.burton@mips.com Reviewed-by: Christoph Hellwig hch@lst.de Patchwork: https://patchwork.linux-mips.org/patch/21108/ References: cce335ae47e2 ("[MIPS] 64-bit Sibyte kernels need DMA32.") Cc: Ralf Baechle ralf@linux-mips.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/Kconfig | 3 +++ arch/mips/sibyte/common/Makefile | 1 + arch/mips/sibyte/common/dma.c | 14 ++++++++++++++ 3 files changed, 18 insertions(+) create mode 100644 arch/mips/sibyte/common/dma.c
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8272ea4c7264..a19c9fd05886 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -794,6 +794,7 @@ config SIBYTE_SWARM select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI
config SIBYTE_LITTLESUR bool "Sibyte BCM91250C2-LittleSur" @@ -814,6 +815,7 @@ config SIBYTE_SENTOSA select SYS_HAS_CPU_SB1 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI
config SIBYTE_BIGSUR bool "Sibyte BCM91480B-BigSur" @@ -826,6 +828,7 @@ config SIBYTE_BIGSUR select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI
config SNI_RM bool "SNI RM200/300/400" diff --git a/arch/mips/sibyte/common/Makefile b/arch/mips/sibyte/common/Makefile index b3d6bf23a662..3ef3fb658136 100644 --- a/arch/mips/sibyte/common/Makefile +++ b/arch/mips/sibyte/common/Makefile @@ -1,4 +1,5 @@ obj-y := cfe.o +obj-$(CONFIG_SWIOTLB) += dma.o obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o obj-$(CONFIG_SIBYTE_CFE_CONSOLE) += cfe_console.o obj-$(CONFIG_SIBYTE_TBPROF) += sb_tbprof.o diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c new file mode 100644 index 000000000000..eb47a94f3583 --- /dev/null +++ b/arch/mips/sibyte/common/dma.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * DMA support for Broadcom SiByte platforms. + * + * Copyright (c) 2018 Maciej W. Rozycki + */ + +#include <linux/swiotlb.h> +#include <asm/bootinfo.h> + +void __init plat_swiotlb_setup(void) +{ + swiotlb_init(1); +}
From: Vitaly Chikunov vt@altlinux.org
[ Upstream commit 3da2c1dfdb802b184eea0653d1e589515b52d74b ]
ecc_point_mult is supposed to be used with a regularized scalar, otherwise, it's possible to deduce the position of the top bit of the scalar with timing attack. This is important when the scalar is a private key.
ecc_point_mult is already using a regular algorithm (i.e. having an operation flow independent of the input scalar) but regularization step is not implemented.
Arrange scalar to always have fixed top bit by adding a multiple of the curve order (n).
References: The constant time regularization step is based on micro-ecc by Kenneth MacKay and also referenced in the literature (Bernstein, D. J., & Lange, T. (2017). Montgomery curves and the Montgomery ladder. (Cryptology ePrint Archive; Vol. 2017/293). s.l.: IACR. Chapter 4.6.2.)
Signed-off-by: Vitaly Chikunov vt@altlinux.org Cc: kernel-hardening@lists.openwall.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/ecc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/crypto/ecc.c b/crypto/ecc.c index 8facafd67802..adcce310f646 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -842,15 +842,23 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
static void ecc_point_mult(struct ecc_point *result, const struct ecc_point *point, const u64 *scalar, - u64 *initial_z, u64 *curve_prime, + u64 *initial_z, const struct ecc_curve *curve, unsigned int ndigits) { /* R0 and R1 */ u64 rx[2][ECC_MAX_DIGITS]; u64 ry[2][ECC_MAX_DIGITS]; u64 z[ECC_MAX_DIGITS]; + u64 sk[2][ECC_MAX_DIGITS]; + u64 *curve_prime = curve->p; int i, nb; - int num_bits = vli_num_bits(scalar, ndigits); + int num_bits; + int carry; + + carry = vli_add(sk[0], scalar, curve->n, ndigits); + vli_add(sk[1], sk[0], curve->n, ndigits); + scalar = sk[!carry]; + num_bits = sizeof(u64) * ndigits * 8 + 1;
vli_set(rx[1], point->x, ndigits); vli_set(ry[1], point->y, ndigits); @@ -1004,7 +1012,7 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits, goto out; }
- ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits); + ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits); if (ecc_point_is_zero(pk)) { ret = -EAGAIN; goto err_free_point; @@ -1090,7 +1098,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, goto err_alloc_product; }
- ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits); + ecc_point_mult(product, pk, priv, rand_z, curve, ndigits);
ecc_swap_digits(product->x, secret, ndigits);
From: Trigger Huang Trigger.Huang@amd.com
[ Upstream commit 85744e9c100696d3f210e80b85fd56dd19767c81 ]
A bad job is the one triggered TDR(In the current amdgpu's implementation, actually all the jobs in the current joq-queue will be treated as bad jobs). In the recovery process, its fence will be fake signaled and as a result, the work behind will be scheduled to delete it from the mirror list, but if the TDR process is invoked before the work's execution, then this bad job might be processed again and the call dma_fence_set_error to its fence in TDR process will lead to kernel warning trace:
[ 143.033605] WARNING: CPU: 2 PID: 53 at ./include/linux/dma-fence.h:437 amddrm_sched_job_recovery+0x1af/0x1c0 [amd_sched] kernel: [ 143.033606] Modules linked in: amdgpu(OE) amdchash(OE) amdttm(OE) amd_sched(OE) amdkcl(OE) amd_iommu_v2 drm_kms_helper drm i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 snd_hda_codec_generic crypto_simd glue_helper cryptd snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq joydev snd_seq_device snd_timer snd soundcore binfmt_misc input_leds mac_hid serio_raw nfsd auth_rpcgss nfs_acl lockd grace sunrpc sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 8139too floppy psmouse 8139cp mii i2c_piix4 pata_acpi [ 143.033649] CPU: 2 PID: 53 Comm: kworker/2:1 Tainted: G OE 4.15.0-20-generic #21-Ubuntu [ 143.033650] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 143.033653] Workqueue: events drm_sched_job_timedout [amd_sched] [ 143.033656] RIP: 0010:amddrm_sched_job_recovery+0x1af/0x1c0 [amd_sched] [ 143.033657] RSP: 0018:ffffa9f880fe7d48 EFLAGS: 00010202 [ 143.033659] RAX: 0000000000000007 RBX: ffff9b98f2b24c00 RCX: ffff9b98efef4f08 [ 143.033660] RDX: ffff9b98f2b27400 RSI: ffff9b98f2b24c50 RDI: ffff9b98efef4f18 [ 143.033660] RBP: ffffa9f880fe7d98 R08: 0000000000000001 R09: 00000000000002b6 [ 143.033661] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9b98efef3430 [ 143.033662] R13: ffff9b98efef4d80 R14: ffff9b98efef4e98 R15: ffff9b98eaf91c00 [ 143.033663] FS: 0000000000000000(0000) GS:ffff9b98ffd00000(0000) knlGS:0000000000000000 [ 143.033664] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 143.033665] CR2: 00007fc49c96d470 CR3: 000000001400a005 CR4: 00000000003606e0 [ 143.033669] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 143.033669] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 143.033670] Call Trace: [ 143.033744] amdgpu_device_gpu_recover+0x144/0x820 [amdgpu] [ 143.033788] amdgpu_job_timedout+0x9b/0xa0 [amdgpu] [ 143.033791] drm_sched_job_timedout+0xcc/0x150 [amd_sched] [ 143.033795] process_one_work+0x1de/0x410 [ 143.033797] worker_thread+0x32/0x410 [ 143.033799] kthread+0x121/0x140 [ 143.033801] ? process_one_work+0x410/0x410 [ 143.033803] ? kthread_create_worker_on_cpu+0x70/0x70 [ 143.033806] ret_from_fork+0x35/0x40
So just delete the bad job from mirror list directly
Changes in v3: - Add a helper function to delete the bad jobs from mirror list and call it directly *before* the job's fence is signaled
Changes in v2: - delete the useless list node check - also delete bad jobs in drm_sched_main because: kthread_unpark(ring->sched.thread) will be invoked very early before amdgpu_device_gpu_recover's return, then drm_sched_main will have chance to pick up a new job from the job queue. This new job will be added into the mirror list and processed by amdgpu_job_run, but may not be deleted from the mirror list on time due to the same reason. And finally re-processed by drm_sched_job_recovery
Signed-off-by: Trigger Huang Trigger.Huang@amd.com Reviewed-by: Christian König chrstian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/scheduler/sched_main.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 44fe587aaef9..c5bbbd7cb2de 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -60,6 +60,8 @@
static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
+static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job); + /** * drm_sched_rq_init - initialize a given run queue struct * @@ -215,7 +217,7 @@ static void drm_sched_job_finish(struct work_struct *work)
spin_lock(&sched->job_list_lock); /* remove job from ring_mirror_list */ - list_del(&s_job->node); + list_del_init(&s_job->node); /* queue TDR for next job */ drm_sched_start_timeout(sched); spin_unlock(&sched->job_list_lock); @@ -378,6 +380,8 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) r); dma_fence_put(fence); } else { + if (s_fence->finished.error < 0) + drm_sched_expel_job_unlocked(s_job); drm_sched_process_job(NULL, &s_fence->cb); } spin_lock(&sched->job_list_lock); @@ -567,6 +571,8 @@ static int drm_sched_main(void *param) r); dma_fence_put(fence); } else { + if (s_fence->finished.error < 0) + drm_sched_expel_job_unlocked(sched_job); drm_sched_process_job(NULL, &s_fence->cb); }
@@ -575,6 +581,15 @@ static int drm_sched_main(void *param) return 0; }
+static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job) +{ + struct drm_gpu_scheduler *sched = s_job->sched; + + spin_lock(&sched->job_list_lock); + list_del_init(&s_job->node); + spin_unlock(&sched->job_list_lock); +} + /** * drm_sched_init - Init a gpu scheduler instance *
From: Anders Roxell anders.roxell@linaro.org
[ Upstream commit 81e9fa8bab381f8b6eb04df7cdf0f71994099bd4 ]
The armv8_pmuv3 driver doesn't have a remove function, and when the test 'CONFIG_DEBUG_TEST_DRIVER_REMOVE=y' is enabled, the following Call trace can be seen.
[ 1.424287] Failed to register pmu: armv8_pmuv3, reason -17 [ 1.424870] WARNING: CPU: 0 PID: 1 at ../kernel/events/core.c:11771 perf_event_sysfs_init+0x98/0xdc [ 1.425220] Modules linked in: [ 1.425531] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 4.19.0-rc7-next-20181012-00003-ge7a97b1ad77b-dirty #35 [ 1.425951] Hardware name: linux,dummy-virt (DT) [ 1.426212] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 1.426458] pc : perf_event_sysfs_init+0x98/0xdc [ 1.426720] lr : perf_event_sysfs_init+0x98/0xdc [ 1.426908] sp : ffff00000804bd50 [ 1.427077] x29: ffff00000804bd50 x28: ffff00000934e078 [ 1.427429] x27: ffff000009546000 x26: 0000000000000007 [ 1.427757] x25: ffff000009280710 x24: 00000000ffffffef [ 1.428086] x23: ffff000009408000 x22: 0000000000000000 [ 1.428415] x21: ffff000009136008 x20: ffff000009408730 [ 1.428744] x19: ffff80007b20b400 x18: 000000000000000a [ 1.429075] x17: 0000000000000000 x16: 0000000000000000 [ 1.429418] x15: 0000000000000400 x14: 2e79726f74636572 [ 1.429748] x13: 696420656d617320 x12: 656874206e692065 [ 1.430060] x11: 6d616e20656d6173 x10: 2065687420687469 [ 1.430335] x9 : ffff00000804bd50 x8 : 206e6f7361657220 [ 1.430610] x7 : 2c3376756d705f38 x6 : ffff00000954d7ce [ 1.430880] x5 : 0000000000000000 x4 : 0000000000000000 [ 1.431226] x3 : 0000000000000000 x2 : ffffffffffffffff [ 1.431554] x1 : 4d151327adc50b00 x0 : 0000000000000000 [ 1.431868] Call trace: [ 1.432102] perf_event_sysfs_init+0x98/0xdc [ 1.432382] do_one_initcall+0x6c/0x1a8 [ 1.432637] kernel_init_freeable+0x1bc/0x280 [ 1.432905] kernel_init+0x18/0x160 [ 1.433115] ret_from_fork+0x10/0x18 [ 1.433297] ---[ end trace 27fd415390eb9883 ]---
Rework to set suppress_bind_attrs flag to avoid removing the device when CONFIG_DEBUG_TEST_DRIVER_REMOVE=y, since there's no real reason to remove the armv8_pmuv3 driver.
Cc: Arnd Bergmann arnd@arndb.de Co-developed-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Anders Roxell anders.roxell@linaro.org Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index e213f8e867f6..8a91ac067d44 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1274,6 +1274,7 @@ static struct platform_driver armv8_pmu_driver = { .driver = { .name = ARMV8_PMU_PDEV_NAME, .of_match_table = armv8_pmu_of_device_ids, + .suppress_bind_attrs = true, }, .probe = armv8_pmu_device_probe, };
From: Huacai Chen chenhc@lemote.com
[ Upstream commit f3ade253615ae6d83aeb72d1c8a96f62a4b4b29b ]
Loongson-3A R2.1 is the bugfix revision of Loongson-3A R2.
All Loongson-3 CPU family:
Code-name Brand-name PRId Loongson-3A R1 Loongson-3A1000 0x6305 Loongson-3A R2 Loongson-3A2000 0x6308 Loongson-3A R2.1 Loongson-3A2000 0x630c Loongson-3A R3 Loongson-3A3000 0x6309 Loongson-3A R3.1 Loongson-3A3000 0x630d Loongson-3B R1 Loongson-3B1000 0x6306 Loongson-3B R2 Loongson-3B1500 0x6307
Signed-off-by: Huacai Chen chenhc@lemote.com Signed-off-by: Paul Burton paul.burton@mips.com Patchwork: https://patchwork.linux-mips.org/patch/21128/ Cc: Ralf Baechle ralf@linux-mips.org Cc: James Hogan james.hogan@mips.com Cc: Steven J . Hill Steven.Hill@cavium.com Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang zhangfx@lemote.com Cc: Zhangjin Wu wuzhangjin@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/include/asm/cpu.h | 3 ++- arch/mips/include/asm/mach-loongson64/kernel-entry-init.h | 4 ++-- arch/mips/kernel/cpu-probe.c | 3 ++- arch/mips/kernel/idle.c | 2 +- arch/mips/loongson64/common/env.c | 3 ++- arch/mips/loongson64/loongson-3/smp.c | 3 ++- arch/mips/mm/c-r4k.c | 2 +- drivers/platform/mips/cpu_hwmon.c | 3 ++- 8 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index dacbdb84516a..532b49b1dbb3 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -248,8 +248,9 @@ #define PRID_REV_LOONGSON3A_R1 0x0005 #define PRID_REV_LOONGSON3B_R1 0x0006 #define PRID_REV_LOONGSON3B_R2 0x0007 -#define PRID_REV_LOONGSON3A_R2 0x0008 +#define PRID_REV_LOONGSON3A_R2_0 0x0008 #define PRID_REV_LOONGSON3A_R3_0 0x0009 +#define PRID_REV_LOONGSON3A_R2_1 0x000c #define PRID_REV_LOONGSON3A_R3_1 0x000d
/* diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h index cbac603ced19..b5e288a12dfe 100644 --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h @@ -31,7 +31,7 @@ /* Enable STFill Buffer */ mfc0 t0, CP0_PRID andi t0, (PRID_IMP_MASK | PRID_REV_MASK) - slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2) + slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0) bnez t0, 1f mfc0 t0, CP0_CONFIG6 or t0, 0x100 @@ -60,7 +60,7 @@ /* Enable STFill Buffer */ mfc0 t0, CP0_PRID andi t0, (PRID_IMP_MASK | PRID_REV_MASK) - slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2) + slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0) bnez t0, 1f mfc0 t0, CP0_CONFIG6 or t0, 0x100 diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index d535fc706a8b..f70cf6447cfb 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1843,7 +1843,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_LOONGSON_64: /* Loongson-2/3 */ switch (c->processor_id & PRID_REV_MASK) { - case PRID_REV_LOONGSON3A_R2: + case PRID_REV_LOONGSON3A_R2_0: + case PRID_REV_LOONGSON3A_R2_1: c->cputype = CPU_LOONGSON3; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 046846999efd..909b7a87c89c 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -183,7 +183,7 @@ void __init check_wait(void) cpu_wait = r4k_wait; break; case CPU_LOONGSON3: - if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2) + if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0) cpu_wait = r4k_wait; break;
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index 8f68ee02a8c2..72e5f8fb2b35 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -197,7 +197,8 @@ void __init prom_init_env(void) cpu_clock_freq = 797000000; break; case PRID_REV_LOONGSON3A_R1: - case PRID_REV_LOONGSON3A_R2: + case PRID_REV_LOONGSON3A_R2_0: + case PRID_REV_LOONGSON3A_R2_1: case PRID_REV_LOONGSON3A_R3_0: case PRID_REV_LOONGSON3A_R3_1: cpu_clock_freq = 900000000; diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index b5c1e0aa955e..8fba0aa48bf4 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -682,7 +682,8 @@ void play_dead(void) play_dead_at_ckseg1 = (void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead); break; - case PRID_REV_LOONGSON3A_R2: + case PRID_REV_LOONGSON3A_R2_0: + case PRID_REV_LOONGSON3A_R2_1: case PRID_REV_LOONGSON3A_R3_0: case PRID_REV_LOONGSON3A_R3_1: play_dead_at_ckseg1 = diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 05bd77727fb9..7e430b4d8778 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1352,7 +1352,7 @@ static void probe_pcache(void) c->dcache.ways * c->dcache.linesz; c->dcache.waybit = 0; - if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2) + if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0) c->options |= MIPS_CPU_PREFETCH; break;
diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c index f66521c7f846..42efcb850722 100644 --- a/drivers/platform/mips/cpu_hwmon.c +++ b/drivers/platform/mips/cpu_hwmon.c @@ -25,9 +25,10 @@ int loongson3_cpu_temp(int cpu) case PRID_REV_LOONGSON3A_R1: reg = (reg >> 8) & 0xff; break; - case PRID_REV_LOONGSON3A_R2: case PRID_REV_LOONGSON3B_R1: case PRID_REV_LOONGSON3B_R2: + case PRID_REV_LOONGSON3A_R2_0: + case PRID_REV_LOONGSON3A_R2_1: reg = ((reg >> 8) & 0xff) - 100; break; case PRID_REV_LOONGSON3A_R3_0:
From: Andy Lutomirski luto@kernel.org
[ Upstream commit ebb53e2597e2dc7637ab213df006e99681b6ee25 ]
This avoids a situation in which we attempt to apply various fixups that are not intended to handle implicit supervisor accesses from user mode if we screw up in a way that causes this type of fault.
Signed-off-by: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Dave Hansen dave.hansen@linux.intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rik van Riel riel@surriel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Yu-cheng Yu yu-cheng.yu@intel.com Link: http://lkml.kernel.org/r/9999f151d72ff352265f3274c5ab3a4105090f49.1542841400... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/mm/fault.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 71d4b9d4d43f..26388576a599 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -639,6 +639,15 @@ no_context(struct pt_regs *regs, unsigned long error_code, unsigned long flags; int sig;
+ if (user_mode(regs)) { + /* + * This is an implicit supervisor-mode access from user + * mode. Bypass all the kernel-mode recovery code and just + * OOPS. + */ + goto oops; + } + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { /* @@ -726,6 +735,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (IS_ENABLED(CONFIG_EFI)) efi_recover_from_page_fault(address);
+oops: /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice:
From: Nikolaj Fogh nikolajfogh@gmail.com
[ Upstream commit 6abd837104a3a8e1cda64fc4d7675f6c3ece9d8b ]
Improve baud-rate generation by using rounding-to-closest instead of truncation in divisor calculation.
Results have been verified by logic analyzer on an FT232RT (232BM) chip. The following table shows the wanted baud rate, the baud rate obtained with the old method (truncation), with the new method (rounding) and the baud rate generated by the windows 10 driver. The numbers in parentheses is the error.
+- Wanted --+------ Old -------+------ New -------+------ Win -------+ | 9600 | 9600 (0.00%) | 9604 (0.05%) | 9605 (0.05%) | | 19200 | 19200 (0.00%) | 19199 (0.01%) | 19198 (0.01%) | | 38400 | 38395 (0.01%) | 38431 (0.08%) | 38394 (0.02%) | | 57600 | 57725 (0.22%) | 57540 (0.10%) | 57673 (0.13%) | | 115200 | 115307 (0.09%) | 115330 (0.11%) | 115320 (0.10%) | | 921600 | 919963 (0.18%) | 920386 (0.13%) | 920810 (0.09%) | | 961200 | 996512 (3.67%) | 956480 (0.49%) | 956937 (0.44%) | +-----------+------------------+------------------+------------------+
The error due to noise in the measurements is in the order of a few tenths of a %. As can be seen, the baud rate is significantly improved for some rates (e.g. 961200), and corresponds to the output given by the windows driver.
The theoretical baud rate has been calculated for all baud rates from 1 to 3M, and as expected, the error is centered around 0, with a triangle shape instead of a sawtooth, so the maximum error is decreased to half.
Signed-off-by: Nikolaj Fogh nikolajfogh@gmail.com [ johan: edit commit message slightly ] Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/serial/ftdi_sio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 609198d9594c..1ab2a6191013 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1130,7 +1130,7 @@ static unsigned short int ftdi_232am_baud_base_to_divisor(int baud, int base) { unsigned short int divisor; /* divisor shifted 3 bits to the left */ - int divisor3 = base / 2 / baud; + int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud); if ((divisor3 & 0x7) == 7) divisor3++; /* round x.7/8 up to x+1 */ divisor = divisor3 >> 3; @@ -1156,7 +1156,7 @@ static u32 ftdi_232bm_baud_base_to_divisor(int baud, int base) static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 }; u32 divisor; /* divisor shifted 3 bits to the left */ - int divisor3 = base / 2 / baud; + int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud); divisor = divisor3 >> 3; divisor |= (u32)divfrac[divisor3 & 0x7] << 14; /* Deal with special cases for highest baud rates. */ @@ -1179,7 +1179,7 @@ static u32 ftdi_2232h_baud_base_to_divisor(int baud, int base) int divisor3;
/* hi-speed baud rate is 10-bit sampling instead of 16-bit */ - divisor3 = base * 8 / (baud * 10); + divisor3 = DIV_ROUND_CLOSEST(8 * base, 10 * baud);
divisor = divisor3 >> 3; divisor |= (u32)divfrac[divisor3 & 0x7] << 14;
On Tue, Jan 08, 2019 at 02:24:53PM -0500, Sasha Levin wrote:
From: Nikolaj Fogh nikolajfogh@gmail.com
[ Upstream commit 6abd837104a3a8e1cda64fc4d7675f6c3ece9d8b ]
Improve baud-rate generation by using rounding-to-closest instead of truncation in divisor calculation.
Results have been verified by logic analyzer on an FT232RT (232BM) chip. The following table shows the wanted baud rate, the baud rate obtained with the old method (truncation), with the new method (rounding) and the baud rate generated by the windows 10 driver. The numbers in parentheses is the error.
+- Wanted --+------ Old -------+------ New -------+------ Win -------+ | 9600 | 9600 (0.00%) | 9604 (0.05%) | 9605 (0.05%) | | 19200 | 19200 (0.00%) | 19199 (0.01%) | 19198 (0.01%) | | 38400 | 38395 (0.01%) | 38431 (0.08%) | 38394 (0.02%) | | 57600 | 57725 (0.22%) | 57540 (0.10%) | 57673 (0.13%) | | 115200 | 115307 (0.09%) | 115330 (0.11%) | 115320 (0.10%) | | 921600 | 919963 (0.18%) | 920386 (0.13%) | 920810 (0.09%) | | 961200 | 996512 (3.67%) | 956480 (0.49%) | 956937 (0.44%) | +-----------+------------------+------------------+------------------+
The error due to noise in the measurements is in the order of a few tenths of a %. As can be seen, the baud rate is significantly improved for some rates (e.g. 961200), and corresponds to the output given by the windows driver.
The theoretical baud rate has been calculated for all baud rates from 1 to 3M, and as expected, the error is centered around 0, with a triangle shape instead of a sawtooth, so the maximum error is decreased to half.
Signed-off-by: Nikolaj Fogh nikolajfogh@gmail.com [ johan: edit commit message slightly ] Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org
The stable tag was left out on purpose for this one to avoid breaking working setups which may have compensated and adapted to this driver behaviour which has been there since the driver was merged ages ago.
Johan
On Wed, Jan 09, 2019 at 09:24:37AM +0100, Johan Hovold wrote:
On Tue, Jan 08, 2019 at 02:24:53PM -0500, Sasha Levin wrote:
From: Nikolaj Fogh nikolajfogh@gmail.com
[ Upstream commit 6abd837104a3a8e1cda64fc4d7675f6c3ece9d8b ]
Improve baud-rate generation by using rounding-to-closest instead of truncation in divisor calculation.
Results have been verified by logic analyzer on an FT232RT (232BM) chip. The following table shows the wanted baud rate, the baud rate obtained with the old method (truncation), with the new method (rounding) and the baud rate generated by the windows 10 driver. The numbers in parentheses is the error.
+- Wanted --+------ Old -------+------ New -------+------ Win -------+ | 9600 | 9600 (0.00%) | 9604 (0.05%) | 9605 (0.05%) | | 19200 | 19200 (0.00%) | 19199 (0.01%) | 19198 (0.01%) | | 38400 | 38395 (0.01%) | 38431 (0.08%) | 38394 (0.02%) | | 57600 | 57725 (0.22%) | 57540 (0.10%) | 57673 (0.13%) | | 115200 | 115307 (0.09%) | 115330 (0.11%) | 115320 (0.10%) | | 921600 | 919963 (0.18%) | 920386 (0.13%) | 920810 (0.09%) | | 961200 | 996512 (3.67%) | 956480 (0.49%) | 956937 (0.44%) | +-----------+------------------+------------------+------------------+
The error due to noise in the measurements is in the order of a few tenths of a %. As can be seen, the baud rate is significantly improved for some rates (e.g. 961200), and corresponds to the output given by the windows driver.
The theoretical baud rate has been calculated for all baud rates from 1 to 3M, and as expected, the error is centered around 0, with a triangle shape instead of a sawtooth, so the maximum error is decreased to half.
Signed-off-by: Nikolaj Fogh nikolajfogh@gmail.com [ johan: edit commit message slightly ] Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org
The stable tag was left out on purpose for this one to avoid breaking working setups which may have compensated and adapted to this driver behaviour which has been there since the driver was merged ages ago.
I'll drop it, thank you.
-- Thanks, Sasha
From: Ville Syrjälä ville.syrjala@linux.intel.com
[ Upstream commit 2de42f79bb21a412f40ade8831eb6fc445cb78a4 ]
Consider the following scenario: 1. nonblocking enable crtc 2. wait for the event 3. nonblocking disable crtc
On i915 this can lead to a spurious -EBUSY from step 3 on account of non-enabled planes getting the fake_commit in step 1 and we don't complete the fake_commit-> flip_done until drm_atomic_helper_commit_hw_done() which can happen a long time after the flip event was sent out.
This will become somewhat easy to hit on SKL+ once we start to add all the planes for the crtc to every modeset commit for the purposes of forcing a watermark register programming [1].
To make the race a little less pronounced let's complete fake_commit->flip_done after drm_atomic_helper_wait_for_flip_done(). For the single crtc case this should make the race quite theoretical, assuming drm_atomic_helper_wait_for_flip_done() actually has to wait for the real commit flip_done. In case the real commit flip_done gets completed singificantly before drm_atomic_helper_wait_for_flip_done(), or we are dealing with multiple crtcs whose vblanks don't line up nicely the race still exists.
[1] https://patchwork.freedesktop.org/patch/262670/
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Fixes: 080de2e5be2d ("drm/atomic: Check for busy planes/connectors before setting the commit") Testcase: igt/kms_cursor_legacy/*nonblocking-modeset-vs-cursor-atomic Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20181122143412.11655-1-ville.s... Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/drm_atomic_helper.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index d8b526b7932c..b4e292a56046 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1445,6 +1445,9 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev, DRM_ERROR("[CRTC:%d:%s] flip_done timed out\n", crtc->base.id, crtc->name); } + + if (old_state->fake_commit) + complete_all(&old_state->fake_commit->flip_done); } EXPORT_SYMBOL(drm_atomic_helper_wait_for_flip_done);
From: Martin Blumenstingl martin.blumenstingl@googlemail.com
[ Upstream commit e36c7e9898f2ba34becf37bda37b70e984b0bf4e ]
The sys_pll on the EC-100 board is configured to 1584MHz at boot (either by u-boot, firmware or chip defaults). This is achieved by using M = 66, N = 1 (24MHz * 66 / 1). At boot the CPU clock is running off sys_pll divided by 2 which results in 792MHz. Thus M = 66 is considered to be a "safe" value for Meson8b.
To achieve 1608MHz (one of the CPU OPPs on Meson8 and Meson8m2) we need M = 67, N = 1. I ran "stress --cpu 4" while infinitely cycling through all available frequencies on my Meson8m2 board and could not spot any issues with this setting (after ~12 hours of running this).
On Meson8, Meson8b and Meson8m2 we also want to be able to use 408MHz and 816MHz CPU frequencies. These can be achieved by dividing sys_pll by 4 (for 408MHz) or 2 (for 816MHz). That means that sys_pll has to run at 1632MHz which can be generated using M = 68, N = 1. Similarily we also want to be able to use 1008MHz as CPU frequency. This means that sys_pll has to run either at 1008MHz or 2016MHz. The former would result in an M value of 42, which is lower than the smallest value used by the 3.10 GPL kernel sources from Amlogic (50 is the lower limit there). Thus we need to run sys_pll at 2016MHz which can ge generated using M = 84, N = 1. I tested M = 68 and M = 84 on my Meson8b Odroid-C1 and my Meson8m2 board by running "stress --cpu 4" while infinitely cycling thorugh all available frequencies. I could not spot any issues after ~12 hours of running this.
Amlogic's 3.10 GPL kernel sources have more M/N combinations. I did not add them yet because M = 74 (to achieve close to 1800MHz on Meson8) and M = 82 (to achieve close to 1992MHz on Meson8 as well) caused my Meson8m2 board to hang randomly. It's not clear why this is (for example because the board's voltage regulator design is bad, some missing bits for these values in our clk-pll driver, etc.). Thus the following M values from the Amlogic 3.10 GPL kernel sources are skipped as of now: 69, 70, 71, 72, 73, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98
Signed-off-by: Martin Blumenstingl martin.blumenstingl@googlemail.com Acked-by: Jerome Brunet jbrunet@baylibre.com Signed-off-by: Neil Armstrong narmstrong@baylibre.com Link: https://lkml.kernel.org/r/20181115224048.13511-5-martin.blumenstingl@googlem... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/meson/meson8b.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index 346b9e165b7a..a059db63907c 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -42,6 +42,11 @@ static const struct pll_params_table sys_pll_params_table[] = { PLL_PARAMS(62, 1), PLL_PARAMS(63, 1), PLL_PARAMS(64, 1), + PLL_PARAMS(65, 1), + PLL_PARAMS(66, 1), + PLL_PARAMS(67, 1), + PLL_PARAMS(68, 1), + PLL_PARAMS(84, 1), { /* sentinel */ }, };
From: Martin Blumenstingl martin.blumenstingl@googlemail.com
[ Upstream commit ad9b2b8e53af61375322e3c7d624acf3a3ef53b0 ]
The public S805 datasheet only mentions that HHI_SYS_CPU_CLK_CNTL1[20:29] contains a divider called "cpu_scale_div". Unfortunately it does not mention how to use the register contents.
The Amlogic 3.10 GPL kernel sources are using the following code to calculate the CPU clock based on that register (taken from arch/arm/mach-meson8/clock.c in the 3.10 Amlogic kernel, shortened to make it easier to read): N = (aml_read_reg32(P_HHI_SYS_CPU_CLK_CNTL1) >> 20) & 0x3FF; if (sel == 3) /* use cpu_scale_div */ div = 2 * N; else div = ... /* not relevant for this example */ cpu_clk = parent_clk / div;
This suggests that the formula is: parent_rate / 2 * register_value However, running perf (which can measure the CPU clock rate thanks to the ARM PMU) shows that this formula is not correct. This can be reproduced with the following steps: 1. boot into u-boot 2. let the CPU clock run off the XTAL clock: mw.l 0xC110419C 0x30 1 3. set the cpu_scale_div register: to value 0x1: mw.l 0xC110415C 0x801016A2 1 to value 0x2: mw.l 0xC110415C 0x802016A2 1 to value 0x5: mw.l 0xC110415C 0x805016A2 1 4. let the CPU clock run off cpu_scale_div: mw.l 0xC110419C 0xbd 1 5. boot Linux 6. run: perf stat -aB stress --cpu 4 --timeout 10 7. check the "cycles" value
I get the following results depending on the cpu_scale_div value: - (cpu_in_sel - this is the input clock for cpu_scale_div - runs at 1.2GHz) - 0x1 = 300MHz - 0x2 = 200MHz - 0x5 = 100MHz
This means that the actual formula to calculate the output of the cpu_scale_div clock is: parent_rate / 2 * (register value + 1).
The register value 0x0 is reserved. When letting the CPU clock run off the cpu_scale_div while the value is 0x0 the whole board hangs (even in u-boot).
I also verified this with the TWD timer: when adding this to the .dts without specifying it's clock it will auto-detect the PERIPH (which is the input clock of the TWD) clock rate (and the result is shown in the kernel log). On Meson8, Meson8b and Meson8m2 the PERIPH clock is CPUCLK divided by 4. This also matched for all three test-cases from above (in all cases the TWD timer clock rate was approx. one fourth of the CPU clock rate).
A small note regarding the "fixes" tag: the original issue seems to exist virtually since forever. Even commit 28b9fcd016126e ("clk: meson8b: Add support for Meson8b clocks") seems to handle this wrong. I still decided to use commit 251b6fd38bcb9c ("clk: meson: rework meson8b cpu clock") because this is the first commit which gets the CPU hiearchy correct and thus it's the first commit where the cpu_scale_div register is used correctly (apart from the bug in the cpu_scale_table).
Fixes: 251b6fd38bcb9c ("clk: meson: rework meson8b cpu clock") Signed-off-by: Martin Blumenstingl martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong narmstrong@baylibre.com Link: https://lkml.kernel.org/r/20180927085921.24627-2-martin.blumenstingl@googlem... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/meson/meson8b.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index a059db63907c..1d39273d7a04 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -584,13 +584,14 @@ static struct clk_fixed_factor meson8b_cpu_div3 = { };
static const struct clk_div_table cpu_scale_table[] = { - { .val = 2, .div = 4 }, - { .val = 3, .div = 6 }, - { .val = 4, .div = 8 }, - { .val = 5, .div = 10 }, - { .val = 6, .div = 12 }, - { .val = 7, .div = 14 }, - { .val = 8, .div = 16 }, + { .val = 1, .div = 4 }, + { .val = 2, .div = 6 }, + { .val = 3, .div = 8 }, + { .val = 4, .div = 10 }, + { .val = 5, .div = 12 }, + { .val = 6, .div = 14 }, + { .val = 7, .div = 16 }, + { .val = 8, .div = 18 }, { /* sentinel */ }, };
From: "Daniel T. Lee" danieltimlee@gmail.com
[ Upstream commit 5a863813216ce79e16a8c1503b2543c528b778b6 ]
Currently, kprobe_events failure won't be handled properly. Due to calling system() indirectly to write to kprobe_events, it can't be identified whether an error is derived from kprobe or system.
// buf = "echo '%c:%s %s' >> /s/k/d/t/kprobe_events" err = system(buf); if (err < 0) { printf("failed to create kprobe .."); return -1; }
For example, running ./tracex7 sample in ext4 partition, "echo p:open_ctree open_ctree >> /s/k/d/t/kprobe_events" gets 256 error code system() failure. => The error comes from kprobe, but it's not handled correctly.
According to man of system(3), it's return value just passes the termination status of the child shell rather than treating the error as -1. (don't care success)
Which means, currently it's not working as desired. (According to the upper code snippet)
ex) running ./tracex7 with ext4 env. # Current Output sh: echo: I/O error failed to open event open_ctree
# Desired Output failed to create kprobe 'open_ctree' error 'No such file or directory'
The problem is, error can't be verified whether from child ps or system. But using write() directly can verify the command failure, and it will treat all error as -1. So I suggest using write() directly to 'kprobe_events' rather than calling system().
Signed-off-by: Daniel T. Lee danieltimlee@gmail.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org --- samples/bpf/bpf_load.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index e6d7e0fe155b..96783207de4a 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -54,6 +54,23 @@ static int populate_prog_array(const char *event, int prog_fd) return 0; }
+static int write_kprobe_events(const char *val) +{ + int fd, ret, flags; + + if ((val != NULL) && (val[0] == '\0')) + flags = O_WRONLY | O_TRUNC; + else + flags = O_WRONLY | O_APPEND; + + fd = open("/sys/kernel/debug/tracing/kprobe_events", flags); + + ret = write(fd, val, strlen(val)); + close(fd); + + return ret; +} + static int load_and_attach(const char *event, struct bpf_insn *prog, int size) { bool is_socket = strncmp(event, "socket", 6) == 0; @@ -165,10 +182,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
#ifdef __x86_64__ if (strncmp(event, "sys_", 4) == 0) { - snprintf(buf, sizeof(buf), - "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events", - is_kprobe ? 'p' : 'r', event, event); - err = system(buf); + snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s", + is_kprobe ? 'p' : 'r', event, event); + err = write_kprobe_events(buf); if (err >= 0) { need_normal_check = false; event_prefix = "__x64_"; @@ -176,10 +192,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) } #endif if (need_normal_check) { - snprintf(buf, sizeof(buf), - "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", - is_kprobe ? 'p' : 'r', event, event); - err = system(buf); + snprintf(buf, sizeof(buf), "%c:%s %s", + is_kprobe ? 'p' : 'r', event, event); + err = write_kprobe_events(buf); if (err < 0) { printf("failed to create kprobe '%s' error '%s'\n", event, strerror(errno)); @@ -519,7 +534,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) return 1;
/* clear all kprobes */ - i = system("echo "" > /sys/kernel/debug/tracing/kprobe_events"); + i = write_kprobe_events("");
/* scan over all elf sections to get license and map info */ for (i = 1; i < ehdr.e_shnum; i++) {
From: Yoshihiro Shimoda yoshihiro.shimoda.uh@renesas.com
[ Upstream commit ceb94bc52c437463f0903e61060a94a2226fb672 ]
This patch adds a safety connection way for "forced_b_device" with "workaround_for_vbus" like below:
< Example for R-Car E3 Ebisu > # modprobe <any usb gadget driver> # echo 1 > /sys/kernel/debug/ee020000.usb/b_device (connect a usb cable to host side.) # echo 2 > /sys/kernel/debug/ee020000.usb/b_device
Previous code should have connected a usb cable before the "b_device" is set to 1 on the Ebisu board. However, if xHCI driver on the board is probed, it causes some troubles: - Conflicts USB VBUS/signals between the board and another host. - "Cannot enable. Maybe the USB cable is bad?" might happen on both the board and another host with a usb hub. - Cannot enumerate a usb gadget correctly because an interruption of VBUS change happens unexpectedly.
Reported-by: Kazuya Mizuguchi kazuya.mizuguchi.ks@renesas.com Signed-off-by: Yoshihiro Shimoda yoshihiro.shimoda.uh@renesas.com Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/gadget/udc/renesas_usb3.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index cdffbd1e0316..6e34f9594159 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -358,6 +358,7 @@ struct renesas_usb3 { bool extcon_host; /* check id and set EXTCON_USB_HOST */ bool extcon_usb; /* check vbus and set EXTCON_USB */ bool forced_b_device; + bool start_to_connect; };
#define gadget_to_renesas_usb3(_gadget) \ @@ -476,7 +477,8 @@ static void usb3_init_axi_bridge(struct renesas_usb3 *usb3) static void usb3_init_epc_registers(struct renesas_usb3 *usb3) { usb3_write(usb3, ~0, USB3_USB_INT_STA_1); - usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG); + if (!usb3->workaround_for_vbus) + usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG); }
static bool usb3_wakeup_usb2_phy(struct renesas_usb3 *usb3) @@ -700,8 +702,7 @@ static void usb3_mode_config(struct renesas_usb3 *usb3, bool host, bool a_dev) usb3_set_mode_by_role_sw(usb3, host); usb3_vbus_out(usb3, a_dev); /* for A-Peripheral or forced B-device mode */ - if ((!host && a_dev) || - (usb3->workaround_for_vbus && usb3->forced_b_device)) + if ((!host && a_dev) || usb3->start_to_connect) usb3_connect(usb3); spin_unlock_irqrestore(&usb3->lock, flags); } @@ -2432,7 +2433,11 @@ static ssize_t renesas_usb3_b_device_write(struct file *file, if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT;
- if (!strncmp(buf, "1", 1)) + usb3->start_to_connect = false; + if (usb3->workaround_for_vbus && usb3->forced_b_device && + !strncmp(buf, "2", 1)) + usb3->start_to_connect = true; + else if (!strncmp(buf, "1", 1)) usb3->forced_b_device = true; else usb3->forced_b_device = false; @@ -2440,7 +2445,7 @@ static ssize_t renesas_usb3_b_device_write(struct file *file, if (usb3->workaround_for_vbus) usb3_disconnect(usb3);
- /* Let this driver call usb3_connect() anyway */ + /* Let this driver call usb3_connect() if needed */ usb3_check_id(usb3);
return count;
From: Anatolij Gustschin agust@denx.de
[ Upstream commit 30522a951f9d02f261d0697c35cb42205b1fae17 ]
Currently registering CvP managers works only for first probed CvP device, for all other devices it is refused due to duplicated chkcfg sysfs entry:
fpga_manager fpga3: Altera CvP FPGA Manager @0000:0c:00.0 registered sysfs: cannot create duplicate filename '/bus/pci/drivers/altera-cvp/chkcfg' CPU: 0 PID: 3808 Comm: bash Tainted: G O 4.19.0-custom+ #5 Call Trace: dump_stack+0x46/0x5b sysfs_warn_dup+0x53/0x60 sysfs_add_file_mode_ns+0x16d/0x180 sysfs_create_file_ns+0x51/0x60 altera_cvp_probe+0x16f/0x2a0 [altera_cvp] local_pci_probe+0x3f/0xa0 ? pci_match_device+0xb1/0xf0 pci_device_probe+0x116/0x170 really_probe+0x21b/0x2c0 driver_probe_device+0x4b/0xe0 bind_store+0xcb/0x130 kernfs_fop_write+0xfd/0x180 __vfs_write+0x21/0x150 ? selinux_file_permission+0xdc/0x130 vfs_write+0xa8/0x1a0 ? find_vma+0xd/0x60 ksys_write+0x3d/0x90 do_syscall_64+0x44/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... altera-cvp 0000:0c:00.0: Can't create sysfs chkcfg file fpga_manager fpga3: fpga_mgr_unregister Altera CvP FPGA Manager @0000:0c:00.0
Move chkcfg creation to module init as suggested by Alan.
Signed-off-by: Anatolij Gustschin agust@denx.de Acked-by: Alan Tull atull@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/fpga/altera-cvp.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c index 610a1558e0ed..d9fa7d4bf11f 100644 --- a/drivers/fpga/altera-cvp.c +++ b/drivers/fpga/altera-cvp.c @@ -466,14 +466,6 @@ static int altera_cvp_probe(struct pci_dev *pdev, if (ret) goto err_unmap;
- ret = driver_create_file(&altera_cvp_driver.driver, - &driver_attr_chkcfg); - if (ret) { - dev_err(&pdev->dev, "Can't create sysfs chkcfg file\n"); - fpga_mgr_unregister(mgr); - goto err_unmap; - } - return 0;
err_unmap: @@ -491,7 +483,6 @@ static void altera_cvp_remove(struct pci_dev *pdev) struct altera_cvp_conf *conf = mgr->priv; u16 cmd;
- driver_remove_file(&altera_cvp_driver.driver, &driver_attr_chkcfg); fpga_mgr_unregister(mgr); pci_iounmap(pdev, conf->map); pci_release_region(pdev, CVP_BAR); @@ -500,7 +491,30 @@ static void altera_cvp_remove(struct pci_dev *pdev) pci_write_config_word(pdev, PCI_COMMAND, cmd); }
-module_pci_driver(altera_cvp_driver); +static int __init altera_cvp_init(void) +{ + int ret; + + ret = pci_register_driver(&altera_cvp_driver); + if (ret) + return ret; + + ret = driver_create_file(&altera_cvp_driver.driver, + &driver_attr_chkcfg); + if (ret) + pr_warn("Can't create sysfs chkcfg file\n"); + + return 0; +} + +static void __exit altera_cvp_exit(void) +{ + driver_remove_file(&altera_cvp_driver.driver, &driver_attr_chkcfg); + pci_unregister_driver(&altera_cvp_driver); +} + +module_init(altera_cvp_init); +module_exit(altera_cvp_exit);
MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anatolij Gustschin agust@denx.de");
From: Ondrej Mosnacek omosnace@redhat.com
[ Upstream commit 2cbdcb882f97a45f7475c67ac6257bbc16277dfe ]
If a superblock has the MS_SUBMOUNT flag set, we should always allow mounting it. These mounts are done automatically by the kernel either as part of mounting some parent mount (e.g. debugfs always mounts tracefs under "tracing" for compatibility) or they are mounted automatically as needed on subdirectory accesses (e.g. NFS crossmnt mounts). Since such automounts are either an implicit consequence of the parent mount (which is already checked) or they can happen during regular accesses (where it doesn't make sense to check against the current task's context), the mount permission check should be skipped for them.
Without this patch, attempts to access contents of an automounted directory can cause unexpected SELinux denials.
In the current kernel tree, the MS_SUBMOUNT flag is set only via vfs_submount(), which is called only from the following places: - AFS, when automounting special "symlinks" referencing other cells - CIFS, when automounting "referrals" - NFS, when automounting subtrees - debugfs, when automounting tracefs
In all cases the submounts are meant to be transparent to the user and it makes sense that if mounting the master is allowed, then so should be the automounts. Note that CAP_SYS_ADMIN capability checking is already skipped for (SB_KERNMOUNT|SB_SUBMOUNT) in: - sget_userns() in fs/super.c: if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !(type->fs_flags & FS_USERNS_MOUNT) && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - sget() in fs/super.c: /* Ensure the requestor has permissions over the target filesystem */ if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM);
Verified internally on patched RHEL 7.6 with a reproducer using NFS+httpd and selinux-tesuite.
Fixes: 93faccbbfa95 ("fs: Better permission checking for submounts") Signed-off-by: Ondrej Mosnacek omosnace@redhat.com Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin sashal@kernel.org --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a67459eb62d5..0f27db6d94a9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2934,7 +2934,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) return rc;
/* Allow all mounts performed by the kernel */ - if (flags & MS_KERNMOUNT) + if (flags & (MS_KERNMOUNT | MS_SUBMOUNT)) return 0;
ad.type = LSM_AUDIT_DATA_DENTRY;
From: James Morse james.morse@arm.com
[ Upstream commit d8797b125711f23d83f5a71e908d34dfcd1fc3e9 ]
__install_bp_hardening_cb() is called via stop_machine() as part of the cpu_enable callback. To force each CPU to take its turn when allocating slots, they take a spinlock.
With the RT patches applied, the spinlock becomes a mutex, and we get warnings about sleeping while in stop_machine(): | [ 0.319176] CPU features: detected: RAS Extension Support | [ 0.319950] BUG: scheduling while atomic: migration/3/36/0x00000002 | [ 0.319955] Modules linked in: | [ 0.319958] Preemption disabled at: | [ 0.319969] [<ffff000008181ae4>] cpu_stopper_thread+0x7c/0x108 | [ 0.319973] CPU: 3 PID: 36 Comm: migration/3 Not tainted 4.19.1-rt3-00250-g330fc2c2a880 #2 | [ 0.319975] Hardware name: linux,dummy-virt (DT) | [ 0.319976] Call trace: | [ 0.319981] dump_backtrace+0x0/0x148 | [ 0.319983] show_stack+0x14/0x20 | [ 0.319987] dump_stack+0x80/0xa4 | [ 0.319989] __schedule_bug+0x94/0xb0 | [ 0.319991] __schedule+0x510/0x560 | [ 0.319992] schedule+0x38/0xe8 | [ 0.319994] rt_spin_lock_slowlock_locked+0xf0/0x278 | [ 0.319996] rt_spin_lock_slowlock+0x5c/0x90 | [ 0.319998] rt_spin_lock+0x54/0x58 | [ 0.320000] enable_smccc_arch_workaround_1+0xdc/0x260 | [ 0.320001] __enable_cpu_capability+0x10/0x20 | [ 0.320003] multi_cpu_stop+0x84/0x108 | [ 0.320004] cpu_stopper_thread+0x84/0x108 | [ 0.320008] smpboot_thread_fn+0x1e8/0x2b0 | [ 0.320009] kthread+0x124/0x128 | [ 0.320010] ret_from_fork+0x10/0x18
Switch this to a raw spinlock, as we know this is only called with IRQs masked.
Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/kernel/cpu_errata.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6ad715d67df8..99622e5ad21b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -135,7 +135,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { - static DEFINE_SPINLOCK(bp_lock); + static DEFINE_RAW_SPINLOCK(bp_lock); int cpu, slot = -1;
/* @@ -147,7 +147,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, return; }
- spin_lock(&bp_lock); + raw_spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); @@ -163,7 +163,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); __this_cpu_write(bp_hardening_data.fn, fn); - spin_unlock(&bp_lock); + raw_spin_unlock(&bp_lock); } #else #define __smccc_workaround_1_smc_start NULL
From: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
[ Upstream commit 520f08df45fbe300ed650da786a74093d658b7e1 ]
When variable refresh rate is active the hardware counter can return a position >= vtotal. This results in a vpos being returned from amdgpu_display_get_crtc_scanoutpos that's a positive value. The positive value indicates to the caller that the display is currently in scanout when the display is actually still in vblank.
This is because the vfront porch duration is unknown with variable refresh active and will end when either a page flip occurs or the timeout specified by the driver/display is reached.
The behavior of the amdgpu_display_get_crtc_scanoutpos remains the same when the position is below vtotal. When the position is above vtotal the function will return a value that is effectively -vbl_end, the size of the vback porch.
The only caller affected by this change is the DRM helper for calculating vblank timestamps. This change corrects behavior for calculating the page flip timestamp from being the previous timestamp to the calculation to the next timestamp when position >= vtotal.
Signed-off-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com Reviewed-by: Harry Wentland harry.wentland@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 686a26de50f9..9c940bbed608 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -857,7 +857,12 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, /* Inside "upper part" of vblank area? Apply corrective offset if so: */ if (in_vbl && (*vpos >= vbl_start)) { vtotal = mode->crtc_vtotal; - *vpos = *vpos - vtotal; + + /* With variable refresh rate displays the vpos can exceed + * the vtotal value. Clamp to 0 to return -vbl_end instead + * of guessing the remaining number of lines until scanout. + */ + *vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0; }
/* Correct for shifted end of vbl at vbl_end. */
On 2019-01-08 8:25 p.m., Sasha Levin wrote:
From: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
[ Upstream commit 520f08df45fbe300ed650da786a74093d658b7e1 ]
When variable refresh rate is active [...]
Variable refresh rate (FreeSync) support is only landing in 5.0, therefore this fix isn't needed in older kernels.
On Wed, Jan 09, 2019 at 10:20:49AM +0100, Michel Dänzer wrote:
On 2019-01-08 8:25 p.m., Sasha Levin wrote:
From: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
[ Upstream commit 520f08df45fbe300ed650da786a74093d658b7e1 ]
When variable refresh rate is active [...]
Variable refresh rate (FreeSync) support is only landing in 5.0, therefore this fix isn't needed in older kernels.
I'll drop it, thank you.
-- Thanks, Sasha
From: Yufen Yu yuyufen@huawei.com
[ Upstream commit 94a2c3a32b62e868dc1e3d854326745a7f1b8c7a ]
We recently got a stack by syzkaller like this:
BUG: sleeping function called from invalid context at mm/slab.h:361 in_atomic(): 1, irqs_disabled(): 0, pid: 6644, name: blkid INFO: lockdep is turned off. CPU: 1 PID: 6644 Comm: blkid Not tainted 4.4.163-514.55.6.9.x86_64+ #76 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 0000000000000000 5ba6a6b879e50c00 ffff8801f6b07b10 ffffffff81cb2194 0000000041b58ab3 ffffffff833c7745 ffffffff81cb2080 5ba6a6b879e50c00 0000000000000000 0000000000000001 0000000000000004 0000000000000000 Call Trace: <IRQ> [<ffffffff81cb2194>] __dump_stack lib/dump_stack.c:15 [inline] <IRQ> [<ffffffff81cb2194>] dump_stack+0x114/0x1a0 lib/dump_stack.c:51 [<ffffffff8129a981>] ___might_sleep+0x291/0x490 kernel/sched/core.c:7675 [<ffffffff8129ac33>] __might_sleep+0xb3/0x270 kernel/sched/core.c:7637 [<ffffffff81794c13>] slab_pre_alloc_hook mm/slab.h:361 [inline] [<ffffffff81794c13>] slab_alloc_node mm/slub.c:2610 [inline] [<ffffffff81794c13>] slab_alloc mm/slub.c:2692 [inline] [<ffffffff81794c13>] kmem_cache_alloc_trace+0x2c3/0x5c0 mm/slub.c:2709 [<ffffffff81cbe9a7>] kmalloc include/linux/slab.h:479 [inline] [<ffffffff81cbe9a7>] kzalloc include/linux/slab.h:623 [inline] [<ffffffff81cbe9a7>] kobject_uevent_env+0x2c7/0x1150 lib/kobject_uevent.c:227 [<ffffffff81cbf84f>] kobject_uevent+0x1f/0x30 lib/kobject_uevent.c:374 [<ffffffff81cbb5b9>] kobject_cleanup lib/kobject.c:633 [inline] [<ffffffff81cbb5b9>] kobject_release+0x229/0x440 lib/kobject.c:675 [<ffffffff81cbb0a2>] kref_sub include/linux/kref.h:73 [inline] [<ffffffff81cbb0a2>] kref_put include/linux/kref.h:98 [inline] [<ffffffff81cbb0a2>] kobject_put+0x72/0xd0 lib/kobject.c:692 [<ffffffff8216f095>] put_device+0x25/0x30 drivers/base/core.c:1237 [<ffffffff81c4cc34>] delete_partition_rcu_cb+0x1d4/0x2f0 block/partition-generic.c:232 [<ffffffff813c08bc>] __rcu_reclaim kernel/rcu/rcu.h:118 [inline] [<ffffffff813c08bc>] rcu_do_batch kernel/rcu/tree.c:2705 [inline] [<ffffffff813c08bc>] invoke_rcu_callbacks kernel/rcu/tree.c:2973 [inline] [<ffffffff813c08bc>] __rcu_process_callbacks kernel/rcu/tree.c:2940 [inline] [<ffffffff813c08bc>] rcu_process_callbacks+0x59c/0x1c70 kernel/rcu/tree.c:2957 [<ffffffff8120f509>] __do_softirq+0x299/0xe20 kernel/softirq.c:273 [<ffffffff81210496>] invoke_softirq kernel/softirq.c:350 [inline] [<ffffffff81210496>] irq_exit+0x216/0x2c0 kernel/softirq.c:391 [<ffffffff82c2cd7b>] exiting_irq arch/x86/include/asm/apic.h:652 [inline] [<ffffffff82c2cd7b>] smp_apic_timer_interrupt+0x8b/0xc0 arch/x86/kernel/apic/apic.c:926 [<ffffffff82c2bc25>] apic_timer_interrupt+0xa5/0xb0 arch/x86/entry/entry_64.S:746 <EOI> [<ffffffff814cbf40>] ? audit_kill_trees+0x180/0x180 [<ffffffff8187d2f7>] fd_install+0x57/0x80 fs/file.c:626 [<ffffffff8180989e>] do_sys_open+0x45e/0x550 fs/open.c:1043 [<ffffffff818099c2>] SYSC_open fs/open.c:1055 [inline] [<ffffffff818099c2>] SyS_open+0x32/0x40 fs/open.c:1050 [<ffffffff82c299e1>] entry_SYSCALL_64_fastpath+0x1e/0x9a
In softirq context, we call rcu callback function delete_partition_rcu_cb(), which may allocate memory by kzalloc with GFP_KERNEL flag. If the allocation cannot be satisfied, it may sleep. However, That is not allowed in softirq contex.
Although we found this problem on linux 4.4, the latest kernel version seems to have this problem as well. And it is very similar to the previous one: https://lkml.org/lkml/2018/7/9/391
Fix it by using RCU workqueue, which allows sleep.
Reviewed-by: Paul E. McKenney paulmck@linux.ibm.com Signed-off-by: Yufen Yu yuyufen@huawei.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/partition-generic.c | 8 +++++--- include/linux/genhd.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/block/partition-generic.c b/block/partition-generic.c index d3d14e81fb12..5f8db5c5140f 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -249,9 +249,10 @@ struct device_type part_type = { .uevent = part_uevent, };
-static void delete_partition_rcu_cb(struct rcu_head *head) +static void delete_partition_work_fn(struct work_struct *work) { - struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); + struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct, + rcu_work);
part->start_sect = 0; part->nr_sects = 0; @@ -262,7 +263,8 @@ static void delete_partition_rcu_cb(struct rcu_head *head) void __delete_partition(struct percpu_ref *ref) { struct hd_struct *part = container_of(ref, struct hd_struct, ref); - call_rcu(&part->rcu_head, delete_partition_rcu_cb); + INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn); + queue_rcu_work(system_wq, &part->rcu_work); }
/* diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 70fc838e6773..0c5ee17b4d88 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,7 +129,7 @@ struct hd_struct { struct disk_stats dkstats; #endif struct percpu_ref ref; - struct rcu_head rcu_head; + struct rcu_work rcu_work; };
#define GENHD_FL_REMOVABLE 1
From: Jiada Wang jiada_wang@mentor.com
[ Upstream commit 489db5d941500249583ec6b49fa70e006bd8f632 ]
pcm3168 codec support runtime_[resume|suspend], whenever it is not active, it enters suspend mode, and it's clock and regulators will be disabled. so there is no need to disable them again in remove callback. Otherwise we got following kernel warnings, when unload pcm3168a driver
[ 222.257514] unbalanced disables for amp-en-regulator [ 222.262526] ------------[ cut here ]------------ [ 222.267158] WARNING: CPU: 0 PID: 2423 at drivers/regulator/core.c:2264 _regulator_disable+0x28/0x108 [ 222.276291] Modules linked in: [ 222.279343] snd_soc_pcm3168a_i2c(-) [ 222.282916] snd_aloop [ 222.285272] arc4 [ 222.287194] wl18xx [ 222.289289] wlcore [ 222.291385] mac80211 [ 222.293654] cfg80211 [ 222.295923] aes_ce_blk [ 222.298366] crypto_simd [ 222.300896] cryptd [ 222.302992] aes_ce_cipher [ 222.305696] crc32_ce [ 222.307965] ghash_ce [ 222.310234] aes_arm64 [ 222.312590] gf128mul [ 222.314860] snd_soc_rcar [ 222.317476] sha2_ce [ 222.319658] xhci_plat_hcd [ 222.322362] sha256_arm64 [ 222.324978] xhci_hcd [ 222.327247] sha1_ce [ 222.329430] renesas_usbhs [ 222.332133] evdev [ 222.334142] sha1_generic [ 222.336758] rcar_gen3_thermal [ 222.339810] cpufreq_dt [ 222.342253] ravb_streaming(C) [ 222.345304] wlcore_sdio [ 222.347834] thermal_sys [ 222.350363] udc_core [ 222.352632] mch_core(C) [ 222.355161] usb_dmac [ 222.357430] snd_soc_pcm3168a [ 222.360394] snd_soc_ak4613 [ 222.363184] gpio_keys [ 222.365540] virt_dma [ 222.367809] nfsd [ 222.369730] ipv6 [ 222.371652] autofs4 [ 222.373834] [last unloaded: snd_soc_pcm3168a_i2c] [ 222.378629] CPU: 0 PID: 2423 Comm: rmmod Tainted: G WC 4.14.63-04798-gd456126e4a42-dirty #457 [ 222.388196] Hardware name: Renesas H3ULCB Kingfisher board based on r8a7795 ES2.0+ (DT) [ 222.396199] task: ffff8006fa8c6200 task.stack: ffff00000a0a0000 [ 222.402117] PC is at _regulator_disable+0x28/0x108 [ 222.406906] LR is at _regulator_disable+0x28/0x108 [ 222.411695] pc : [<ffff0000083bd89c>] lr : [<ffff0000083bd89c>] pstate: 00000145 [ 222.419089] sp : ffff00000a0a3c80 [ 222.422401] x29: ffff00000a0a3c80 [ 222.425799] x28: ffff8006fa8c6200 [ 222.429199] x27: ffff0000086f1000 [ 222.432597] x26: 000000000000006a [ 222.435997] x25: 0000000000000124 [ 222.439395] x24: 0000000000000018 [ 222.442795] x23: 0000000000000006 [ 222.446193] x22: ffff8006f925d490 [ 222.449592] x21: ffff8006f9ac2068 [ 222.452991] x20: ffff8006f9ac2000 [ 222.456390] x19: 0000000000000005 [ 222.459787] x18: 000000000000000a [ 222.463186] x17: 0000000000000000 [ 222.466584] x16: 0000000000000000 [ 222.469984] x15: 000000000d3f616a [ 222.473382] x14: 0720072007200720 [ 222.476781] x13: 0720072007200720 [ 222.480179] x12: 0720072007200720 [ 222.483578] x11: 0720072007200720 [ 222.486975] x10: 0720072007200720 [ 222.490375] x9 : 0720072007200720 [ 222.493773] x8 : 07200772076f0774 [ 222.497172] x7 : 0000000000000000 [ 222.500570] x6 : 0000000000000007 [ 222.503969] x5 : 0000000000000000 [ 222.507367] x4 : 0000000000000000 [ 222.510766] x3 : 0000000000000000 [ 222.514164] x2 : c790b852091e2600 [ 222.517563] x1 : 0000000000000000 [ 222.520961] x0 : 0000000000000028 [ 222.524361] Call trace: [ 222.526805] Exception stack(0xffff00000a0a3b40 to 0xffff00000a0a3c80) [ 222.533245] 3b40: 0000000000000028 0000000000000000 c790b852091e2600 0000000000000000 [ 222.541075] 3b60: 0000000000000000 0000000000000000 0000000000000007 0000000000000000 [ 222.548905] 3b80: 07200772076f0774 0720072007200720 0720072007200720 0720072007200720 [ 222.556735] 3ba0: 0720072007200720 0720072007200720 0720072007200720 000000000d3f616a [ 222.564564] 3bc0: 0000000000000000 0000000000000000 000000000000000a 0000000000000005 [ 222.572394] 3be0: ffff8006f9ac2000 ffff8006f9ac2068 ffff8006f925d490 0000000000000006 [ 222.580224] 3c00: 0000000000000018 0000000000000124 000000000000006a ffff0000086f1000 [ 222.588053] 3c20: ffff8006fa8c6200 ffff00000a0a3c80 ffff0000083bd89c ffff00000a0a3c80 [ 222.595883] 3c40: ffff0000083bd89c 0000000000000145 0000000000000000 0000000000000000 [ 222.603713] 3c60: 0000ffffffffffff ffff00000a0a3c30 ffff00000a0a3c80 ffff0000083bd89c [ 222.611543] [<ffff0000083bd89c>] _regulator_disable+0x28/0x108 [ 222.617375] [<ffff0000083bd9c4>] regulator_disable+0x48/0x68 [ 222.623033] [<ffff0000083be8e4>] regulator_bulk_disable+0x58/0xc0 [ 222.629134] [<ffff0000007d831c>] pcm3168a_remove+0x30/0x50 [snd_soc_pcm3168a] [ 222.636270] [<ffff0000007e5010>] pcm3168a_i2c_remove+0x10/0x1c [snd_soc_pcm3168a_i2c] [ 222.644106] [<ffff0000084b9d9c>] i2c_device_remove+0x38/0x70 [ 222.649766] [<ffff00000843cd5c>] device_release_driver_internal+0xd0/0x1c0 [ 222.656640] [<ffff00000843ced8>] driver_detach+0x70/0x7c [ 222.661951] [<ffff00000843bf68>] bus_remove_driver+0x74/0xa0 [ 222.667609] [<ffff00000843d7e4>] driver_unregister+0x48/0x4c [ 222.673268] [<ffff0000084ba8dc>] i2c_del_driver+0x24/0x30 [ 222.678666] [<ffff0000007e5078>] pcm3168a_i2c_driver_exit+0x10/0xf98 [snd_soc_pcm3168a_i2c] [ 222.687019] [<ffff00000811bd28>] SyS_delete_module+0x198/0x1d4 [ 222.692850] Exception stack(0xffff00000a0a3ec0 to 0xffff00000a0a4000) [ 222.699289] 3ec0: 0000aaaafeb4b268 0000000000000800 14453f6470497100 0000fffffaa520d8 [ 222.707119] 3ee0: 0000fffffaa520d9 000000000000000a 1999999999999999 0000000000000000 [ 222.714948] 3f00: 000000000000006a 0000ffffa8f7d1d8 000000000000000a 0000000000000005 [ 222.722778] 3f20: 0000000000000000 0000000000000000 000000000000002d 0000000000000000 [ 222.730607] 3f40: 0000aaaae19b9f68 0000ffffa8f411f0 0000000000000000 0000aaaae19b9000 [ 222.738436] 3f60: 0000fffffaa533b8 0000fffffaa531f0 0000000000000000 0000000000000001 [ 222.746266] 3f80: 0000fffffaa53ec6 0000000000000000 0000aaaafeb4b200 0000aaaafeb4a010 [ 222.754096] 3fa0: 0000000000000000 0000fffffaa53130 0000aaaae199f36c 0000fffffaa53130 [ 222.761926] 3fc0: 0000ffffa8f411f8 0000000000000000 0000aaaafeb4b268 000000000000006a [ 222.769755] 3fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 222.777589] [<ffff0000080832c0>] el0_svc_naked+0x34/0x38 [ 222.782899] ---[ end trace eaf8939a3698b1a8 ]--- [ 222.787609] Failed to disable VCCDA2: -5 [ 222.791649] ------------[ cut here ]------------ [ 222.796283] WARNING: CPU: 0 PID: 2423 at drivers/clk/clk.c:595 clk_core_disable+0xc/0x1d8 [ 222.804460] Modules linked in: [ 222.807511] snd_soc_pcm3168a_i2c(-) [ 222.811083] snd_aloop [ 222.813439] arc4 [ 222.815360] wl18xx [ 222.817456] wlcore [ 222.819551] mac80211 [ 222.821820] cfg80211 [ 222.824088] aes_ce_blk [ 222.826531] crypto_simd [ 222.829060] cryptd [ 222.831155] aes_ce_cipher [ 222.833859] crc32_ce [ 222.836127] ghash_ce [ 222.838396] aes_arm64 [ 222.840752] gf128mul [ 222.843020] snd_soc_rcar [ 222.845637] sha2_ce [ 222.847818] xhci_plat_hcd [ 222.850522] sha256_arm64 [ 222.853138] xhci_hcd [ 222.855407] sha1_ce [ 222.857589] renesas_usbhs [ 222.860292] evdev [ 222.862300] sha1_generic [ 222.864917] rcar_gen3_thermal [ 222.867968] cpufreq_dt [ 222.870410] ravb_streaming(C) [ 222.873461] wlcore_sdio [ 222.875991] thermal_sys [ 222.878520] udc_core [ 222.880789] mch_core(C) [ 222.883318] usb_dmac [ 222.885587] snd_soc_pcm3168a [ 222.888551] snd_soc_ak4613 [ 222.891341] gpio_keys [ 222.893696] virt_dma [ 222.895965] nfsd [ 222.897886] ipv6 [ 222.899808] autofs4 [ 222.901990] [last unloaded: snd_soc_pcm3168a_i2c] [ 222.906783] CPU: 0 PID: 2423 Comm: rmmod Tainted: G WC 4.14.63-04798-gd456126e4a42-dirty #457 [ 222.916349] Hardware name: Renesas H3ULCB Kingfisher board based on r8a7795 ES2.0+ (DT) [ 222.924351] task: ffff8006fa8c6200 task.stack: ffff00000a0a0000 [ 222.930270] PC is at clk_core_disable+0xc/0x1d8 [ 222.934799] LR is at clk_core_disable_lock+0x20/0x34 [ 222.939761] pc : [<ffff0000083ab9b8>] lr : [<ffff0000083acd28>] pstate: 800001c5 [ 222.947154] sp : ffff00000a0a3cf0 [ 222.950466] x29: ffff00000a0a3cf0 [ 222.953864] x28: ffff8006fa8c6200 [ 222.957263] x27: ffff0000086f1000 [ 222.960661] x26: 000000000000006a [ 222.964061] x25: 0000000000000124 [ 222.967458] x24: 0000000000000015 [ 222.970858] x23: ffff8006f9ffa8d0 [ 222.974256] x22: ffff8006faf16480 [ 222.977655] x21: ffff0000007e7040 [ 222.981053] x20: ffff8006faadd100 [ 222.984452] x19: 0000000000000140 [ 222.987850] x18: 000000000000000a [ 222.991249] x17: 0000000000000000 [ 222.994647] x16: 0000000000000000 [ 222.998046] x15: 000000000d477819 [ 223.001444] x14: 0720072007200720 [ 223.004843] x13: 0720072007200720 [ 223.008242] x12: 0720072007200720 [ 223.011641] x11: 0720072007200720 [ 223.015039] x10: 0720072007200720 [ 223.018438] x9 : 0720072007200720 [ 223.021837] x8 : 0720072007200720 [ 223.025236] x7 : 0000000000000000 [ 223.028634] x6 : 0000000000000007 [ 223.032034] x5 : 0000000000000000 [ 223.035432] x4 : 0000000000000000 [ 223.038831] x3 : 0000000000000000 [ 223.042229] x2 : 0000000004720471 [ 223.045628] x1 : 0000000000000000 [ 223.049026] x0 : ffff8006faadd100 [ 223.052426] Call trace: [ 223.054870] Exception stack(0xffff00000a0a3bb0 to 0xffff00000a0a3cf0) [ 223.061309] 3ba0: ffff8006faadd100 0000000000000000 [ 223.069139] 3bc0: 0000000004720471 0000000000000000 0000000000000000 0000000000000000 [ 223.076969] 3be0: 0000000000000007 0000000000000000 0720072007200720 0720072007200720 [ 223.084798] 3c00: 0720072007200720 0720072007200720 0720072007200720 0720072007200720 [ 223.092628] 3c20: 0720072007200720 000000000d477819 0000000000000000 0000000000000000 [ 223.100458] 3c40: 000000000000000a 0000000000000140 ffff8006faadd100 ffff0000007e7040 [ 223.108287] 3c60: ffff8006faf16480 ffff8006f9ffa8d0 0000000000000015 0000000000000124 [ 223.116117] 3c80: 000000000000006a ffff0000086f1000 ffff8006fa8c6200 ffff00000a0a3cf0 [ 223.123947] 3ca0: ffff0000083acd28 ffff00000a0a3cf0 ffff0000083ab9b8 00000000800001c5 [ 223.131777] 3cc0: ffff00000a0a3cf0 ffff0000083acd1c 0000ffffffffffff ffff8006faadd100 [ 223.139606] 3ce0: ffff00000a0a3cf0 ffff0000083ab9b8 [ 223.144483] [<ffff0000083ab9b8>] clk_core_disable+0xc/0x1d8 [ 223.150054] [<ffff0000083acd58>] clk_disable+0x1c/0x28 [ 223.155198] [<ffff0000007d8328>] pcm3168a_remove+0x3c/0x50 [snd_soc_pcm3168a] [ 223.162334] [<ffff0000007e5010>] pcm3168a_i2c_remove+0x10/0x1c [snd_soc_pcm3168a_i2c] [ 223.170167] [<ffff0000084b9d9c>] i2c_device_remove+0x38/0x70 [ 223.175826] [<ffff00000843cd5c>] device_release_driver_internal+0xd0/0x1c0 [ 223.182700] [<ffff00000843ced8>] driver_detach+0x70/0x7c [ 223.188012] [<ffff00000843bf68>] bus_remove_driver+0x74/0xa0 [ 223.193669] [<ffff00000843d7e4>] driver_unregister+0x48/0x4c [ 223.199329] [<ffff0000084ba8dc>] i2c_del_driver+0x24/0x30 [ 223.204726] [<ffff0000007e5078>] pcm3168a_i2c_driver_exit+0x10/0xf98 [snd_soc_pcm3168a_i2c] [ 223.213079] [<ffff00000811bd28>] SyS_delete_module+0x198/0x1d4 [ 223.218909] Exception stack(0xffff00000a0a3ec0 to 0xffff00000a0a4000) [ 223.225349] 3ec0: 0000aaaafeb4b268 0000000000000800 14453f6470497100 0000fffffaa520d8 [ 223.233179] 3ee0: 0000fffffaa520d9 000000000000000a 1999999999999999 0000000000000000 [ 223.241008] 3f00: 000000000000006a 0000ffffa8f7d1d8 000000000000000a 0000000000000005 [ 223.248838] 3f20: 0000000000000000 0000000000000000 000000000000002d 0000000000000000 [ 223.256668] 3f40: 0000aaaae19b9f68 0000ffffa8f411f0 0000000000000000 0000aaaae19b9000 [ 223.264497] 3f60: 0000fffffaa533b8 0000fffffaa531f0 0000000000000000 0000000000000001 [ 223.272327] 3f80: 0000fffffaa53ec6 0000000000000000 0000aaaafeb4b200 0000aaaafeb4a010 [ 223.280157] 3fa0: 0000000000000000 0000fffffaa53130 0000aaaae199f36c 0000fffffaa53130 [ 223.287986] 3fc0: 0000ffffa8f411f8 0000000000000000 0000aaaafeb4b268 000000000000006a [ 223.295816] 3fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 223.303648] [<ffff0000080832c0>] el0_svc_naked+0x34/0x38 [ 223.308958] ---[ end trace eaf8939a3698b1a9 ]--- [ 223.313752] ------------[ cut here ]------------ [ 223.318383] WARNING: CPU: 0 PID: 2423 at drivers/clk/clk.c:477 clk_core_unprepare+0xc/0x1ac [ 223.326733] Modules linked in: [ 223.329784] snd_soc_pcm3168a_i2c(-) [ 223.333356] snd_aloop [ 223.335712] arc4 [ 223.337633] wl18xx [ 223.339728] wlcore [ 223.341823] mac80211 [ 223.344092] cfg80211 [ 223.346360] aes_ce_blk [ 223.348803] crypto_simd [ 223.351332] cryptd [ 223.353428] aes_ce_cipher [ 223.356131] crc32_ce [ 223.358400] ghash_ce [ 223.360668] aes_arm64 [ 223.363024] gf128mul [ 223.365293] snd_soc_rcar [ 223.367909] sha2_ce [ 223.370091] xhci_plat_hcd [ 223.372794] sha256_arm64 [ 223.375410] xhci_hcd [ 223.377679] sha1_ce [ 223.379861] renesas_usbhs [ 223.382564] evdev [ 223.384572] sha1_generic [ 223.387188] rcar_gen3_thermal [ 223.390239] cpufreq_dt [ 223.392682] ravb_streaming(C) [ 223.395732] wlcore_sdio [ 223.398261] thermal_sys [ 223.400790] udc_core [ 223.403059] mch_core(C) [ 223.405588] usb_dmac [ 223.407856] snd_soc_pcm3168a [ 223.410820] snd_soc_ak4613 [ 223.413609] gpio_keys [ 223.415965] virt_dma [ 223.418234] nfsd [ 223.420155] ipv6 [ 223.422076] autofs4 [ 223.424258] [last unloaded: snd_soc_pcm3168a_i2c] [ 223.429050] CPU: 0 PID: 2423 Comm: rmmod Tainted: G WC 4.14.63-04798-gd456126e4a42-dirty #457 [ 223.438616] Hardware name: Renesas H3ULCB Kingfisher board based on r8a7795 ES2.0+ (DT) [ 223.446618] task: ffff8006fa8c6200 task.stack: ffff00000a0a0000 [ 223.452536] PC is at clk_core_unprepare+0xc/0x1ac [ 223.457239] LR is at clk_unprepare+0x28/0x3c [ 223.461506] pc : [<ffff0000083ab5a4>] lr : [<ffff0000083ace4c>] pstate: 60000145 [ 223.468900] sp : ffff00000a0a3d00 [ 223.472211] x29: ffff00000a0a3d00 [ 223.475609] x28: ffff8006fa8c6200 [ 223.479009] x27: ffff0000086f1000 [ 223.482407] x26: 000000000000006a [ 223.485807] x25: 0000000000000124 [ 223.489205] x24: 0000000000000015 [ 223.492604] x23: ffff8006f9ffa8d0 [ 223.496003] x22: ffff8006faf16480 [ 223.499402] x21: ffff0000007e7040 [ 223.502800] x20: ffff8006faf16420 [ 223.506199] x19: ffff8006faadd100 [ 223.509597] x18: 000000000000000a [ 223.512997] x17: 0000000000000000 [ 223.516395] x16: 0000000000000000 [ 223.519794] x15: 0000000000000000 [ 223.523192] x14: 00000033fe89076c [ 223.526591] x13: 0000000000000400 [ 223.529989] x12: 0000000000000400 [ 223.533388] x11: 0000000000000000 [ 223.536786] x10: 00000000000009e0 [ 223.540185] x9 : ffff00000a0a3be0 [ 223.543583] x8 : ffff8006fa8c6c40 [ 223.546982] x7 : ffff8006fa8c6400 [ 223.550380] x6 : 0000000000000001 [ 223.553780] x5 : 0000000000000000 [ 223.557178] x4 : ffff8006fa8c6200 [ 223.560577] x3 : 0000000000000000 [ 223.563975] x2 : ffff8006fa8c6200 [ 223.567374] x1 : 0000000000000000 [ 223.570772] x0 : ffff8006faadd100 [ 223.574170] Call trace: [ 223.576615] Exception stack(0xffff00000a0a3bc0 to 0xffff00000a0a3d00) [ 223.583054] 3bc0: ffff8006faadd100 0000000000000000 ffff8006fa8c6200 0000000000000000 [ 223.590884] 3be0: ffff8006fa8c6200 0000000000000000 0000000000000001 ffff8006fa8c6400 [ 223.598714] 3c00: ffff8006fa8c6c40 ffff00000a0a3be0 00000000000009e0 0000000000000000 [ 223.606544] 3c20: 0000000000000400 0000000000000400 00000033fe89076c 0000000000000000 [ 223.614374] 3c40: 0000000000000000 0000000000000000 000000000000000a ffff8006faadd100 [ 223.622204] 3c60: ffff8006faf16420 ffff0000007e7040 ffff8006faf16480 ffff8006f9ffa8d0 [ 223.630033] 3c80: 0000000000000015 0000000000000124 000000000000006a ffff0000086f1000 [ 223.637863] 3ca0: ffff8006fa8c6200 ffff00000a0a3d00 ffff0000083ace4c ffff00000a0a3d00 [ 223.645693] 3cc0: ffff0000083ab5a4 0000000060000145 0000000000000140 ffff8006faadd100 [ 223.653523] 3ce0: 0000ffffffffffff ffff0000083ace44 ffff00000a0a3d00 ffff0000083ab5a4 [ 223.661353] [<ffff0000083ab5a4>] clk_core_unprepare+0xc/0x1ac [ 223.667103] [<ffff0000007d8330>] pcm3168a_remove+0x44/0x50 [snd_soc_pcm3168a] [ 223.674239] [<ffff0000007e5010>] pcm3168a_i2c_remove+0x10/0x1c [snd_soc_pcm3168a_i2c] [ 223.682070] [<ffff0000084b9d9c>] i2c_device_remove+0x38/0x70 [ 223.687731] [<ffff00000843cd5c>] device_release_driver_internal+0xd0/0x1c0 [ 223.694604] [<ffff00000843ced8>] driver_detach+0x70/0x7c [ 223.699915] [<ffff00000843bf68>] bus_remove_driver+0x74/0xa0 [ 223.705572] [<ffff00000843d7e4>] driver_unregister+0x48/0x4c [ 223.711230] [<ffff0000084ba8dc>] i2c_del_driver+0x24/0x30 [ 223.716628] [<ffff0000007e5078>] pcm3168a_i2c_driver_exit+0x10/0xf98 [snd_soc_pcm3168a_i2c] [ 223.724980] [<ffff00000811bd28>] SyS_delete_module+0x198/0x1d4 [ 223.730811] Exception stack(0xffff00000a0a3ec0 to 0xffff00000a0a4000) [ 223.737250] 3ec0: 0000aaaafeb4b268 0000000000000800 14453f6470497100 0000fffffaa520d8 [ 223.745079] 3ee0: 0000fffffaa520d9 000000000000000a 1999999999999999 0000000000000000 [ 223.752909] 3f00: 000000000000006a 0000ffffa8f7d1d8 000000000000000a 0000000000000005 [ 223.760739] 3f20: 0000000000000000 0000000000000000 000000000000002d 0000000000000000 [ 223.768568] 3f40: 0000aaaae19b9f68 0000ffffa8f411f0 0000000000000000 0000aaaae19b9000 [ 223.776398] 3f60: 0000fffffaa533b8 0000fffffaa531f0 0000000000000000 0000000000000001 [ 223.784227] 3f80: 0000fffffaa53ec6 0000000000000000 0000aaaafeb4b200 0000aaaafeb4a010 [ 223.792057] 3fa0: 0000000000000000 0000fffffaa53130 0000aaaae199f36c 0000fffffaa53130 [ 223.799886] 3fc0: 0000ffffa8f411f8 0000000000000000 0000aaaafeb4b268 000000000000006a [ 223.807715] 3fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 223.815546] [<ffff0000080832c0>] el0_svc_naked+0x34/0x38 [ 223.820855] ---[ end trace eaf8939a3698b1aa ]---
Fix this issue by only disable clock and regulators in remove callback when CONFIG_PM isn't defined
Signed-off-by: Jiada Wang jiada_wang@mentor.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/codecs/pcm3168a.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/sound/soc/codecs/pcm3168a.c b/sound/soc/codecs/pcm3168a.c index 52cc950c9fd1..445d025e1409 100644 --- a/sound/soc/codecs/pcm3168a.c +++ b/sound/soc/codecs/pcm3168a.c @@ -770,15 +770,22 @@ int pcm3168a_probe(struct device *dev, struct regmap *regmap) } EXPORT_SYMBOL_GPL(pcm3168a_probe);
-void pcm3168a_remove(struct device *dev) +static void pcm3168a_disable(struct device *dev) { struct pcm3168a_priv *pcm3168a = dev_get_drvdata(dev);
- pm_runtime_disable(dev); regulator_bulk_disable(ARRAY_SIZE(pcm3168a->supplies), - pcm3168a->supplies); + pcm3168a->supplies); clk_disable_unprepare(pcm3168a->scki); } + +void pcm3168a_remove(struct device *dev) +{ + pm_runtime_disable(dev); +#ifndef CONFIG_PM + pcm3168a_disable(dev); +#endif +} EXPORT_SYMBOL_GPL(pcm3168a_remove);
#ifdef CONFIG_PM @@ -833,10 +840,7 @@ static int pcm3168a_rt_suspend(struct device *dev)
regcache_cache_only(pcm3168a->regmap, true);
- regulator_bulk_disable(ARRAY_SIZE(pcm3168a->supplies), - pcm3168a->supplies); - - clk_disable_unprepare(pcm3168a->scki); + pcm3168a_disable(dev);
return 0; }
From: Manish Rangankar manish.rangankar@cavium.com
[ Upstream commit d5632b11f0a17efa6356311e535ae135d178438d ]
The kernel panic was observed after switch side perturbation,
BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffff8132b5a0>] strcmp+0x20/0x40 PGD 0 Oops: 0000 [#1] SMP CPU: 8 PID: 647 Comm: kworker/8:1 Tainted: G W OE ------------ 3.10.0-693.el7.x86_64 #1 Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 06/20/2018 Workqueue: slowpath-13:00. qed_slowpath_task [qed] task: ffff880429eb8fd0 ti: ffff880429190000 task.ti: ffff880429190000 RIP: 0010:[<ffffffff8132b5a0>] [<ffffffff8132b5a0>] strcmp+0x20/0x40 RSP: 0018:ffff880429193c68 EFLAGS: 00010202 RAX: 000000000000000a RBX: 0000000000000002 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff88042bda7a41 RBP: ffff880429193c68 R08: 000000000000ffff R09: 000000000000ffff R10: 0000000000000007 R11: ffff88042b3af338 R12: ffff880420b007a0 R13: ffff88081aa56af8 R14: 0000000000000001 R15: ffff88081aa50410 FS: 0000000000000000(0000) GS:ffff88042fe00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000019f2000 CR4: 00000000003407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: ffff880429193d20 ffffffffc02a0c90 ffffc90004b32000 ffff8803fd3ec600 ffff88042bda7800 ffff88042bda7a00 ffff88042bda7840 ffff88042bda7a40 0000000129193d10 2e3836312e323931 ff000a342e363232 ffffffffc01ad99d Call Trace: [<ffffffffc02a0c90>] qedi_get_protocol_tlv_data+0x270/0x470 [qedi] [<ffffffffc01ad99d>] ? qed_mfw_process_tlv_req+0x24d/0xbf0 [qed] [<ffffffffc01653ae>] qed_mfw_fill_tlv_data+0x5e/0xd0 [qed] [<ffffffffc01ad9b9>] qed_mfw_process_tlv_req+0x269/0xbf0 [qed]
Fix kernel NULL pointer deref by checking for session is online before getting iSCSI TLV data.
Signed-off-by: Manish Rangankar manish.rangankar@cavium.com Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qedi/qedi_main.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 105b0e4d7818..5d7d018dad6e 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -952,6 +952,9 @@ static int qedi_find_boot_info(struct qedi_ctx *qedi, cls_sess = iscsi_conn_to_session(cls_conn); sess = cls_sess->dd_data;
+ if (!iscsi_is_session_online(cls_sess)) + continue; + if (pri_ctrl_flags) { if (!strcmp(pri_tgt->iscsi_name, sess->targetname) && !strcmp(pri_tgt->ip_addr, ep_ip_addr)) {
From: Kunihiko Hayashi hayashi.kunihiko@socionext.com
[ Upstream commit 7200f2e3c9e267d29e2bfa075794339032e0b98e ]
If wol state of phy hardware is enabled after reset, phy_ethtool_get_wol() returns that wol.wolopts is true.
However, since net_device.wol_enabled is zero and this doesn't apply wol state until calling ethtool_set_wol(), so mdio_bus_phy_may_suspend() returns true, that is, it's in a state where phy can suspend even though wol state is enabled.
In this inconsistency, phy_suspend() returns -EBUSY, and at last, suspend sequence fails with the following message:
dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x58 returns -16 PM: Device 65000000.ethernet-ffffffff:01 failed to suspend: error -16 PM: Some devices failed to suspend, or early wake event detected
In order to fix the above issue, this patch forces to set initial wol state to disabled as default.
Signed-off-by: Kunihiko Hayashi hayashi.kunihiko@socionext.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/socionext/sni_ave.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 7c7cd9d94bcc..3d0114ba2bfe 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1210,9 +1210,13 @@ static int ave_init(struct net_device *ndev)
priv->phydev = phydev;
- phy_ethtool_get_wol(phydev, &wol); + ave_ethtool_get_wol(ndev, &wol); device_set_wakeup_capable(&ndev->dev, !!wol.supported);
+ /* set wol initial state disabled */ + wol.wolopts = 0; + ave_ethtool_set_wol(ndev, &wol); + if (!phy_interface_is_rgmii(phydev)) phy_set_max_speed(phydev, SPEED_100);
From: Chris Wilson chris@chris-wilson.co.uk
[ Upstream commit 3b34c14fd50c302db091f020f26dd00ede902c80 ]
As amd_uvd_resume() accesses the uvd ring, it must be initialised first or else we trigger errors like:
[ 5.595963] [drm] Found UVD firmware Version: 1.87 Family ID: 17 [ 5.595969] [drm] PSP loading UVD firmware [ 5.596266] ------------[ cut here ]------------ [ 5.596268] ODEBUG: assert_init not available (active state 0) object type: timer_list hint: (null) [ 5.596285] WARNING: CPU: 0 PID: 507 at lib/debugobjects.c:329 debug_print_object+0x6a/0x80 [ 5.596286] Modules linked in: amdgpu(+) hid_logitech_hidpp(+) chash gpu_sched amd_iommu_v2 ttm drm_kms_helper crc32c_intel drm hid_sony ff_memless igb hid_logitech_dj nvme dca i2c_algo_bit nvme_core wmi pinctrl_amd uas usb_storage [ 5.596299] CPU: 0 PID: 507 Comm: systemd-udevd Tainted: G W 4.20.0-0.rc1.git4.1.fc30.x86_64 #1 [ 5.596301] Hardware name: System manufacturer System Product Name/ROG STRIX X470-I GAMING, BIOS 0901 07/23/2018 [ 5.596303] RIP: 0010:debug_print_object+0x6a/0x80 [ 5.596305] Code: 8b 43 10 83 c2 01 8b 4b 14 4c 89 e6 89 15 e6 82 b0 02 4c 8b 45 00 48 c7 c7 60 fd 34 a6 48 8b 14 c5 a0 da 08 a6 e8 6a 6a b8 ff <0f> 0b 5b 83 05 d0 45 3e 01 01 5d 41 5c c3 83 05 c5 45 3e 01 01 c3 [ 5.596306] RSP: 0018:ffffa02ac863f8c0 EFLAGS: 00010282 [ 5.596307] RAX: 0000000000000000 RBX: ffffa02ac863f8e0 RCX: 0000000000000006 [ 5.596308] RDX: 0000000000000007 RSI: ffff9160e9a7bfe8 RDI: ffff9160f91d6c60 [ 5.596310] RBP: ffffffffa6742740 R08: 0000000000000002 R09: 0000000000000000 [ 5.596311] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa634ff69 [ 5.596312] R13: 00000000000b79d0 R14: ffffffffa80f76d8 R15: 0000000000266000 [ 5.596313] FS: 00007f762abf7940(0000) GS:ffff9160f9000000(0000) knlGS:0000000000000000 [ 5.596314] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.596315] CR2: 000055fdc593f000 CR3: 00000007e999c000 CR4: 00000000003406f0 [ 5.596317] Call Trace: [ 5.596321] debug_object_assert_init+0x14a/0x180 [ 5.596327] del_timer+0x2e/0x90 [ 5.596383] amdgpu_fence_process+0x47/0x100 [amdgpu] [ 5.596430] amdgpu_uvd_resume+0xf6/0x120 [amdgpu] [ 5.596475] uvd_v7_0_sw_init+0xe0/0x280 [amdgpu] [ 5.596523] amdgpu_device_init.cold.30+0xf97/0x14b6 [amdgpu] [ 5.596563] ? amdgpu_driver_load_kms+0x53/0x330 [amdgpu] [ 5.596604] amdgpu_driver_load_kms+0x86/0x330 [amdgpu] [ 5.596614] drm_dev_register+0x115/0x150 [drm] [ 5.596654] amdgpu_pci_probe+0xbd/0x120 [amdgpu] [ 5.596658] local_pci_probe+0x41/0x90 [ 5.596661] pci_device_probe+0x188/0x1a0 [ 5.596666] really_probe+0xf8/0x3b0 [ 5.596669] driver_probe_device+0xb3/0xf0 [ 5.596672] __driver_attach+0xe1/0x110 [ 5.596674] ? driver_probe_device+0xf0/0xf0 [ 5.596676] bus_for_each_dev+0x79/0xc0 [ 5.596679] bus_add_driver+0x155/0x230 [ 5.596681] ? 0xffffffffc07d9000 [ 5.596683] driver_register+0x6b/0xb0 [ 5.596685] ? 0xffffffffc07d9000 [ 5.596688] do_one_initcall+0x5d/0x2be [ 5.596691] ? rcu_read_lock_sched_held+0x79/0x80 [ 5.596693] ? kmem_cache_alloc_trace+0x264/0x290 [ 5.596695] ? do_init_module+0x22/0x210 [ 5.596698] do_init_module+0x5a/0x210 [ 5.596701] load_module+0x2137/0x2430 [ 5.596703] ? lockdep_hardirqs_on+0xed/0x180 [ 5.596714] ? __do_sys_init_module+0x150/0x1a0 [ 5.596715] __do_sys_init_module+0x150/0x1a0 [ 5.596722] do_syscall_64+0x60/0x1f0 [ 5.596725] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 5.596726] RIP: 0033:0x7f762b877dee [ 5.596728] Code: 48 8b 0d 9d 20 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 6a 20 0c 00 f7 d8 64 89 01 48 [ 5.596729] RSP: 002b:00007ffc777b8558 EFLAGS: 00000246 ORIG_RAX: 00000000000000af [ 5.596730] RAX: ffffffffffffffda RBX: 000055fdc48da320 RCX: 00007f762b877dee [ 5.596731] RDX: 00007f762b9f284d RSI: 00000000006c5fc6 RDI: 000055fdc527a060 [ 5.596732] RBP: 00007f762b9f284d R08: 0000000000000003 R09: 0000000000000002 [ 5.596733] R10: 000055fdc48ad010 R11: 0000000000000246 R12: 000055fdc527a060 [ 5.596734] R13: 000055fdc48dca20 R14: 0000000000020000 R15: 0000000000000000 [ 5.596740] irq event stamp: 134618 [ 5.596743] hardirqs last enabled at (134617): [<ffffffffa513d52e>] console_unlock+0x45e/0x610 [ 5.596744] hardirqs last disabled at (134618): [<ffffffffa50037e8>] trace_hardirqs_off_thunk+0x1a/0x1c [ 5.596746] softirqs last enabled at (133146): [<ffffffffa5e00365>] __do_softirq+0x365/0x47c [ 5.596748] softirqs last disabled at (133139): [<ffffffffa50c64f9>] irq_exit+0x119/0x120 [ 5.596749] ---[ end trace eaee508abfebccdc ]---
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108709 Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Alex Deucher alexdeucher@gmail.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 1fc17bf39fed..44ca41837187 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -116,16 +116,16 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r;
- r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r;
+ r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev);
return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index fde6ad5ac9ab..6bb05ae232b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -113,16 +113,16 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r;
- r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r;
+ r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev);
return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 7a5b40275e8e..07fd96df4321 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -416,16 +416,16 @@ static int uvd_v6_0_sw_init(void *handle) DRM_INFO("UVD ENC is disabled\n"); }
- r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r;
+ r = amdgpu_uvd_resume(adev); + if (r) + return r; + if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 58b39afcfb86..1ef023a7b8ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -447,10 +447,6 @@ static int uvd_v7_0_sw_init(void *handle) DRM_INFO("PSP loading UVD firmware\n"); }
- r = amdgpu_uvd_resume(adev); - if (r) - return r; - for (j = 0; j < adev->uvd.num_uvd_inst; j++) { if (adev->uvd.harvest_config & (1 << j)) continue; @@ -482,6 +478,10 @@ static int uvd_v7_0_sw_init(void *handle) } }
+ r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); if (r) return r;
From: Chuck Lever chuck.lever@oracle.com
[ Upstream commit b024dd0eba6e6d568f69d63c5e3153aba94c23e3 ]
FRWR memory registration is done with a series of calls and WRs. 1. ULP invokes ib_dma_map_sg() 2. ULP invokes ib_map_mr_sg() 3. ULP posts an IB_WR_REG_MR on the Send queue
Step 2 generates an iova. It is permissible for ULPs to change this iova (with certain restrictions) between steps 2 and 3.
rxe_map_mr_sg captures the MR's iova but later when rxe processes the REG_MR WR, it ignores the MR's iova field. If a ULP alters the MR's iova after step 2 but before step 3, rxe never captures that change.
When the remote sends an RDMA Read targeting that MR, rxe looks up the R_key, but the altered iova does not match the iova stored in the MR, causing the RDMA Read request to fail.
Reported-by: Anna Schumaker schumaker.anna@gmail.com Signed-off-by: Chuck Lever chuck.lever@oracle.com Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/sw/rxe/rxe_req.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 6c361d70d7cd..46f62f71cd28 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -643,6 +643,7 @@ int rxe_requester(void *arg) rmr->access = wqe->wr.wr.reg.access; rmr->lkey = wqe->wr.wr.reg.key; rmr->rkey = wqe->wr.wr.reg.key; + rmr->iova = wqe->wr.wr.reg.mr->iova; wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; } else {
From: Nathan Chancellor natechancellor@gmail.com
[ Upstream commit 3db5e0ba8b8f4aee631d7ee04b7a11c56cfdc213 ]
When building the kernel with Clang, some disabled warnings appear because this Makefile overrides KBUILD_CFLAGS for x86{,_64}. Add them to this list so that the build is clean again.
-Wpointer-sign was disabled for the whole kernel before the beginning of Git history.
-Waddress-of-packed-member was disabled for the whole kernel and for the early boot code in these commits:
bfb38988c51e ("kbuild: clang: Disable 'address-of-packed-member' warning") 20c6c1890455 ("x86/boot: Disable the address-of-packed-member compiler warning").
-Wgnu was disabled for the whole kernel and for the early boot code in these commits:
61163efae020 ("kbuild: LLVMLinux: Add Kbuild support for building kernel with Clang") 6c3b56b19730 ("x86/boot: Disable Clang warnings about GNU extensions").
[ mingo: Made the changelog more readable. ]
Tested-by: Sedat Dilek sedat.dilek@gmail.com Signed-off-by: Nathan Chancellor natechancellor@gmail.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Reviewed-by: Sedat Dilek sedat.dilek@gmail.com Cc: Andy Lutomirski luto@kernel.org Cc: Arend van Spriel arend.vanspriel@broadcom.com Cc: Bhupesh Sharma bhsharma@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: Dave Hansen dave.hansen@intel.com Cc: Eric Snowberg eric.snowberg@oracle.com Cc: Hans de Goede hdegoede@redhat.com Cc: Joe Perches joe@perches.com Cc: Jon Hunter jonathanh@nvidia.com Cc: Julien Thierry julien.thierry@arm.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Marc Zyngier marc.zyngier@arm.com Cc: Matt Fleming matt@codeblueprint.co.uk Cc: Peter Zijlstra peterz@infradead.org Cc: Sai Praneeth Prakhya sai.praneeth.prakhya@intel.com Cc: Thomas Gleixner tglx@linutronix.de Cc: YiFei Zhu zhuyifei1999@gmail.com Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-8-ard.biesheuvel@linaro.org Link: https://github.com/ClangBuiltLinux/linux/issues/112 Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firmware/efi/libstub/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index c51627660dbb..d9845099635e 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -9,7 +9,10 @@ cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ - -mno-mmx -mno-sse -fshort-wchar + -mno-mmx -mno-sse -fshort-wchar \ + -Wno-pointer-sign \ + $(call cc-disable-warning, address-of-packed-member) \ + $(call cc-disable-warning, gnu)
# arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly # disable the stackleak plugin
From: Daniel Santos daniel.santos@pobox.com
[ Upstream commit a788c5272769ddbcdbab297cf386413eeac04463 ]
jffs2_sync_fs makes the assumption that if CONFIG_JFFS2_FS_WRITEBUFFER is defined then a write buffer is available and has been initialized. However, this does is not the case when the mtd device has no out-of-band buffer:
int jffs2_nand_flash_setup(struct jffs2_sb_info *c) { if (!c->mtd->oobsize) return 0; ...
The resulting call to cancel_delayed_work_sync passing a uninitialized (but zeroed) delayed_work struct forces lockdep to become disabled.
[ 90.050639] overlayfs: upper fs does not support tmpfile. [ 90.652264] INFO: trying to register non-static key. [ 90.662171] the code is fine but needs lockdep annotation. [ 90.673090] turning off the locking correctness validator. [ 90.684021] CPU: 0 PID: 1762 Comm: mount_root Not tainted 4.14.63 #0 [ 90.696672] Stack : 00000000 00000000 80d8f6a2 00000038 805f0000 80444600 8fe364f4 805dfbe7 [ 90.713349] 80563a30 000006e2 8068370c 00000001 00000000 00000001 8e2fdc48 ffffffff [ 90.730020] 00000000 00000000 80d90000 00000000 00000106 00000000 6465746e 312e3420 [ 90.746690] 6b636f6c 03bf0000 f8000000 20676e69 00000000 80000000 00000000 8e2c2a90 [ 90.763362] 80d90000 00000001 00000000 8e2c2a90 00000003 80260dc0 08052098 80680000 [ 90.780033] ... [ 90.784902] Call Trace: [ 90.789793] [<8000f0d8>] show_stack+0xb8/0x148 [ 90.798659] [<8005a000>] register_lock_class+0x270/0x55c [ 90.809247] [<8005cb64>] __lock_acquire+0x13c/0xf7c [ 90.818964] [<8005e314>] lock_acquire+0x194/0x1dc [ 90.828345] [<8003f27c>] flush_work+0x200/0x24c [ 90.837374] [<80041dfc>] __cancel_work_timer+0x158/0x210 [ 90.847958] [<801a8770>] jffs2_sync_fs+0x20/0x54 [ 90.857173] [<80125cf4>] iterate_supers+0xf4/0x120 [ 90.866729] [<80158fc4>] sys_sync+0x44/0x9c [ 90.875067] [<80014424>] syscall_common+0x34/0x58
Signed-off-by: Daniel Santos daniel.santos@pobox.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Boris Brezillon boris.brezillon@bootlin.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/jffs2/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 902a7dd10e5c..bb6ae387469f 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -101,7 +101,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER - cancel_delayed_work_sync(&c->wbuf_dwork); + if (jffs2_is_writebuffered(c)) + cancel_delayed_work_sync(&c->wbuf_dwork); #endif
mutex_lock(&c->alloc_sem);
From: Masahiro Yamada yamada.masahiro@socionext.com
[ Upstream commit 392885ee82d35d515ba2af7b72c5e357c3002113 ]
Currently, fixdep writes dependencies to .*.tmp, which is renamed to .*.cmd after everything succeeds. This is a very safe way to avoid corrupted .*.cmd files. The if_changed_dep has carried this safety mechanism since it was added in 2002.
If fixdep fails for some reasons or a user terminates the build while fixdep is running, the incomplete output from the fixdep could be troublesome.
This is my insight about some bad scenarios:
[1] If the compiler succeeds to generate *.o file, but fixdep fails to write necessary dependencies to .*.cmd file, Make will miss to rebuild the object when headers or CONFIG options are changed. In this case, fixdep should not generate .*.cmd file at all so that 'arg-check' will surely trigger the rebuild of the object.
[2] A partially constructed .*.cmd file may not be a syntactically correct makefile. The next time Make runs, it would include it, then fail to parse it. Once this happens, 'make clean' is be the only way to fix it.
In fact, [1] is no longer a problem since commit 9c2af1c7377a ("kbuild: add .DELETE_ON_ERROR special target"). Make deletes a target file on any failure in its recipe. Because fixdep is a part of the recipe of *.o target, if it fails, the *.o is deleted anyway. However, I am a bit worried about the slight possibility of [2].
So, here is a solution. Let fixdep directly write to a .*.cmd file, but allow makefiles to include it only when its corresponding target exists.
This effectively reverts commit 2982c953570b ("kbuild: remove redundant $(wildcard ...) for cmd_files calculation"), and commit 00d78ab2ba75 ("kbuild: remove dead code in cmd_files calculation in top Makefile") because now we must check the presence of targets.
Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Signed-off-by: Sasha Levin sashal@kernel.org --- Makefile | 13 +++++++------ scripts/Kbuild.include | 10 ++++------ scripts/Makefile.build | 12 +++++------- 3 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/Makefile b/Makefile index 7a2a9a175756..28b87f551470 100644 --- a/Makefile +++ b/Makefile @@ -1034,6 +1034,8 @@ ifdef CONFIG_GDB_SCRIPTS endif +$(call if_changed,link-vmlinux)
+targets := vmlinux + # Build samples along the rest of the kernel. This needs headers_install. ifdef CONFIG_SAMPLES vmlinux-dirs += samples @@ -1758,13 +1760,12 @@ quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR) \ $(if $(KBUILD_MODULES),; rm -f $(MODVERDIR)/*)
-# read all saved command lines -cmd_files := $(wildcard .*.cmd) +# read saved command lines for existing targets +existing-targets := $(wildcard $(sort $(targets)))
-ifneq ($(cmd_files),) - $(cmd_files): ; # Do not try to update included dependency files - include $(cmd_files) -endif +cmd_files := $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) +$(cmd_files): ; # Do not try to update included dependency files +-include $(cmd_files)
endif # ifeq ($(config-targets),1) endif # ifeq ($(mixed-targets),1) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 3d09844405c9..b8c866193ae6 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -262,9 +262,8 @@ ifndef CONFIG_TRIM_UNUSED_KSYMS
cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \ - scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;\ - rm -f $(depfile); \ - mv -f $(dot-target).tmp $(dot-target).cmd; + scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).cmd;\ + rm -f $(depfile);
else
@@ -287,9 +286,8 @@ cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \ $(ksym_dep_filter) | \ scripts/basic/fixdep -e $(depfile) $@ '$(make-cmd)' \ - > $(dot-target).tmp; \ - rm -f $(depfile); \ - mv -f $(dot-target).tmp $(dot-target).cmd; + > $(dot-target).cmd; \ + rm -f $(depfile);
endif
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 6a6be9f440cf..1d56f181b917 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -527,18 +527,16 @@ FORCE: # optimization, we don't need to read them if the target does not # exist, we will rebuild anyway in that case.
-cmd_files := $(wildcard $(foreach f,$(sort $(targets)),$(dir $(f)).$(notdir $(f)).cmd)) +existing-targets := $(wildcard $(sort $(targets)))
-ifneq ($(cmd_files),) - include $(cmd_files) -endif +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
ifneq ($(KBUILD_SRC),) # Create directories for object files if they do not exist obj-dirs := $(sort $(obj) $(patsubst %/,%, $(dir $(targets)))) -# If cmd_files exist, their directories apparently exist. Skip mkdir. -exist-dirs := $(sort $(patsubst %/,%, $(dir $(cmd_files)))) -obj-dirs := $(strip $(filter-out $(exist-dirs), $(obj-dirs))) +# If targets exist, their directories apparently exist. Skip mkdir. +existing-dirs := $(sort $(patsubst %/,%, $(dir $(existing-targets)))) +obj-dirs := $(strip $(filter-out $(existing-dirs), $(obj-dirs))) ifneq ($(obj-dirs),) $(shell mkdir -p $(obj-dirs)) endif
From: "A.s. Dong" aisheng.dong@nxp.com
[ Upstream commit 9e5ef7a57ca75a1b9411c46caeeb6881124284a3 ]
As the commit 2893c379461a ("clk: make strings in parent name arrays const"), let's make the parent strings const, otherwise we may meet the following warning when compiling:
drivers/clk/imx/clk-imx7ulp.c: In function 'imx7ulp_clocks_init': drivers/clk/imx/clk-imx7ulp.c:73:35: warning: passing argument 5 of 'imx_clk_mux_flags' discards 'const' qualifier from pointer target type
clks[IMX7ULP_CLK_APLL_PRE_SEL] = imx_clk_mux_flags("apll_pre_sel", base + 0x508, 0, 1, pll_pre_sels, ARRAY_SIZE(pll_pre_sels), CLK_SET_PARENT_GATE); ^ In file included from drivers/clk/imx/clk-imx7ulp.c:23:0: drivers/clk/imx/clk.h:200:27: note: expected 'const char **' but argument is of type 'const char * const*' ...
Cc: Stephen Boyd sboyd@codeaurora.org Cc: Michael Turquette mturquette@baylibre.com Cc: Shawn Guo shawnguo@kernel.org Signed-off-by: Dong Aisheng aisheng.dong@nxp.com Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/imx/clk-busy.c | 2 +- drivers/clk/imx/clk-fixup-mux.c | 2 +- drivers/clk/imx/clk.h | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/clk/imx/clk-busy.c b/drivers/clk/imx/clk-busy.c index 99036527eb0d..e695622c5aa5 100644 --- a/drivers/clk/imx/clk-busy.c +++ b/drivers/clk/imx/clk-busy.c @@ -154,7 +154,7 @@ static const struct clk_ops clk_busy_mux_ops = {
struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift, u8 width, void __iomem *busy_reg, u8 busy_shift, - const char **parent_names, int num_parents) + const char * const *parent_names, int num_parents) { struct clk_busy_mux *busy; struct clk *clk; diff --git a/drivers/clk/imx/clk-fixup-mux.c b/drivers/clk/imx/clk-fixup-mux.c index c9b327e0a8dd..44817c1b0b88 100644 --- a/drivers/clk/imx/clk-fixup-mux.c +++ b/drivers/clk/imx/clk-fixup-mux.c @@ -70,7 +70,7 @@ static const struct clk_ops clk_fixup_mux_ops = { };
struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, + u8 shift, u8 width, const char * const *parents, int num_parents, void (*fixup)(u32 *val)) { struct clk_fixup_mux *fixup_mux; diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index 5895e2237b6c..2c377e188281 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -63,14 +63,14 @@ struct clk *imx_clk_busy_divider(const char *name, const char *parent_name,
struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift, u8 width, void __iomem *busy_reg, u8 busy_shift, - const char **parent_names, int num_parents); + const char * const *parent_names, int num_parents);
struct clk *imx_clk_fixup_divider(const char *name, const char *parent, void __iomem *reg, u8 shift, u8 width, void (*fixup)(u32 *val));
struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, + u8 shift, u8 width, const char * const *parents, int num_parents, void (*fixup)(u32 *val));
static inline struct clk *imx_clk_fixed(const char *name, int rate) @@ -79,7 +79,8 @@ static inline struct clk *imx_clk_fixed(const char *name, int rate) }
static inline struct clk *imx_clk_mux_ldb(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, int num_parents) + u8 shift, u8 width, const char * const *parents, + int num_parents) { return clk_register_mux(NULL, name, parents, num_parents, CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT, reg, @@ -199,7 +200,8 @@ static inline struct clk *imx_clk_gate4(const char *name, const char *parent, }
static inline struct clk *imx_clk_mux(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, int num_parents) + u8 shift, u8 width, const char * const *parents, + int num_parents) { return clk_register_mux(NULL, name, parents, num_parents, CLK_SET_RATE_NO_REPARENT, reg, shift, @@ -207,7 +209,8 @@ static inline struct clk *imx_clk_mux(const char *name, void __iomem *reg, }
static inline struct clk *imx_clk_mux2(const char *name, void __iomem *reg, - u8 shift, u8 width, const char **parents, int num_parents) + u8 shift, u8 width, const char * const *parents, + int num_parents) { return clk_register_mux(NULL, name, parents, num_parents, CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE, @@ -215,8 +218,9 @@ static inline struct clk *imx_clk_mux2(const char *name, void __iomem *reg, }
static inline struct clk *imx_clk_mux_flags(const char *name, - void __iomem *reg, u8 shift, u8 width, const char **parents, - int num_parents, unsigned long flags) + void __iomem *reg, u8 shift, u8 width, + const char * const *parents, int num_parents, + unsigned long flags) { return clk_register_mux(NULL, name, parents, num_parents, flags | CLK_SET_RATE_NO_REPARENT, reg, shift, width, 0,
From: "Joel Fernandes (Google)" joel@joelfernandes.org
[ Upstream commit 30696378f68a9e3dad6bfe55938b112e72af00c2 ]
The ramoops backend currently calls persistent_ram_save_old() even if a buffer is empty. While this appears to work, it is does not seem like the right thing to do and could lead to future bugs so lets avoid that. It also prevents misleading prints in the logs which claim the buffer is valid.
I got something like:
found existing buffer, size 0, start 0
When I was expecting:
no valid data in buffer (sig = ...)
This bails out early (and reports with pr_debug()), since it's an acceptable state.
Signed-off-by: Joel Fernandes (Google) joel@joelfernandes.org Co-developed-by: Kees Cook keescook@chromium.org Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/pstore/ram_core.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 12e21f789194..79f0e183f135 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -497,6 +497,11 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) { + if (buffer_size(prz) == 0) { + pr_debug("found existing empty buffer\n"); + return 0; + } + if (buffer_size(prz) > prz->buffer_size || buffer_start(prz) > buffer_size(prz)) pr_info("found existing invalid buffer, size %zu, start %zu\n",
From: Daniel Axtens dja@axtens.net
[ Upstream commit 10e1fdb95809ed21406f53b5b4f064673a1b9ceb ]
Currently, disconnecting a USB webcam while it is in use prints out a number of warnings, such as:
WARNING: CPU: 2 PID: 3118 at /build/linux-ezBi1T/linux-4.8.0/fs/sysfs/group.c:237 sysfs_remove_group+0x8b/0x90 sysfs group ffffffffa7cd0780 not found for kobject 'event13'
This has been noticed before. [0]
This is because of the order in which things are torn down.
If there are no streams active during a USB disconnect:
- uvc_disconnect() is invoked via device_del() through the bus notifier mechanism.
- this calls uvc_unregister_video().
- uvc_unregister_video() unregisters the video device for each stream,
- because there are no streams open, it calls uvc_delete()
- uvc_delete() calls uvc_status_cleanup(), which cleans up the status input device.
- uvc_delete() calls media_device_unregister(), which cleans up the media device
- uvc_delete(), uvc_unregister_video() and uvc_disconnect() all return, and we end up back in device_del().
- device_del() then cleans up the sysfs folder for the camera with dpm_sysfs_remove(). Because uvc_status_cleanup() and media_device_unregister() have already been called, this all works nicely.
If, on the other hand, there *are* streams active during a USB disconnect:
- uvc_disconnect() is invoked
- this calls uvc_unregister_video()
- uvc_unregister_video() unregisters the video device for each stream,
- uvc_unregister_video() and uvc_disconnect() return, and we end up back in device_del().
- device_del() then cleans up the sysfs folder for the camera with dpm_sysfs_remove(). Because the status input device and the media device are children of the USB device, this also deletes their sysfs folders.
- Sometime later, the final stream is closed, invoking uvc_release().
- uvc_release() calls uvc_delete()
- uvc_delete() calls uvc_status_cleanup(), which cleans up the status input device. Because the sysfs directory has already been removed, this causes a WARNing.
- uvc_delete() calls media_device_unregister(), which cleans up the media device. Because the sysfs directory has already been removed, this causes another WARNing.
To fix this, we need to make sure the devices are always unregistered before the end of uvc_disconnect(). To this, move the unregistration into the disconnect path:
- split uvc_status_cleanup() into two parts, one on disconnect that unregisters and one on delete that frees.
- move v4l2_device_unregister() and media_device_unregister() into the disconnect path.
[0]: https://lkml.org/lkml/2016/12/8/657
[Renamed uvc_input_cleanup() to uvc_input_unregister()]
Signed-off-by: Daniel Axtens dja@axtens.net Acked-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/usb/uvc/uvc_driver.c | 13 +++++++++---- drivers/media/usb/uvc/uvc_status.c | 12 ++++++++---- drivers/media/usb/uvc/uvcvideo.h | 1 + 3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index bc369a0934a3..76dc3ee8ca21 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1824,11 +1824,7 @@ static void uvc_delete(struct kref *kref) usb_put_intf(dev->intf); usb_put_dev(dev->udev);
- if (dev->vdev.dev) - v4l2_device_unregister(&dev->vdev); #ifdef CONFIG_MEDIA_CONTROLLER - if (media_devnode_is_registered(dev->mdev.devnode)) - media_device_unregister(&dev->mdev); media_device_cleanup(&dev->mdev); #endif
@@ -1885,6 +1881,15 @@ static void uvc_unregister_video(struct uvc_device *dev)
uvc_debugfs_cleanup_stream(stream); } + + uvc_status_unregister(dev); + + if (dev->vdev.dev) + v4l2_device_unregister(&dev->vdev); +#ifdef CONFIG_MEDIA_CONTROLLER + if (media_devnode_is_registered(dev->mdev.devnode)) + media_device_unregister(&dev->mdev); +#endif }
int uvc_register_video_device(struct uvc_device *dev, diff --git a/drivers/media/usb/uvc/uvc_status.c b/drivers/media/usb/uvc/uvc_status.c index 0722dc684378..883e4cab45e7 100644 --- a/drivers/media/usb/uvc/uvc_status.c +++ b/drivers/media/usb/uvc/uvc_status.c @@ -54,7 +54,7 @@ static int uvc_input_init(struct uvc_device *dev) return ret; }
-static void uvc_input_cleanup(struct uvc_device *dev) +static void uvc_input_unregister(struct uvc_device *dev) { if (dev->input) input_unregister_device(dev->input); @@ -71,7 +71,7 @@ static void uvc_input_report_key(struct uvc_device *dev, unsigned int code,
#else #define uvc_input_init(dev) -#define uvc_input_cleanup(dev) +#define uvc_input_unregister(dev) #define uvc_input_report_key(dev, code, value) #endif /* CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV */
@@ -292,12 +292,16 @@ int uvc_status_init(struct uvc_device *dev) return 0; }
-void uvc_status_cleanup(struct uvc_device *dev) +void uvc_status_unregister(struct uvc_device *dev) { usb_kill_urb(dev->int_urb); + uvc_input_unregister(dev); +} + +void uvc_status_cleanup(struct uvc_device *dev) +{ usb_free_urb(dev->int_urb); kfree(dev->status); - uvc_input_cleanup(dev); }
int uvc_status_start(struct uvc_device *dev, gfp_t flags) diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h index c0cbd833d0a4..1db6634b2455 100644 --- a/drivers/media/usb/uvc/uvcvideo.h +++ b/drivers/media/usb/uvc/uvcvideo.h @@ -757,6 +757,7 @@ int uvc_register_video_device(struct uvc_device *dev,
/* Status */ int uvc_status_init(struct uvc_device *dev); +void uvc_status_unregister(struct uvc_device *dev); void uvc_status_cleanup(struct uvc_device *dev); int uvc_status_start(struct uvc_device *dev, gfp_t flags); void uvc_status_stop(struct uvc_device *dev);
From: Breno Leitao leitao@debian.org
[ Upstream commit 8d4a862276a9c30a269d368d324fb56529e6d5fd ]
Currently xmon needs to get devtree_lock (through rtas_token()) during its invocation (at crash time). If there is a crash while devtree_lock is being held, then xmon tries to get the lock but spins forever and never get into the interactive debugger, as in the following case:
int *ptr = NULL; raw_spin_lock_irqsave(&devtree_lock, flags); *ptr = 0xdeadbeef;
This patch avoids calling rtas_token(), thus trying to get the same lock, at crash time. This new mechanism proposes getting the token at initialization time (xmon_init()) and just consuming it at crash time.
This would allow xmon to be possible invoked independent of devtree_lock being held or not.
Signed-off-by: Breno Leitao leitao@debian.org Reviewed-by: Thiago Jung Bauermann bauerman@linux.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/xmon/xmon.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 36b8dc47a3c3..b566203d09c5 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -75,6 +75,9 @@ static int xmon_gate; #define xmon_owner 0 #endif /* CONFIG_SMP */
+#ifdef CONFIG_PPC_PSERIES +static int set_indicator_token = RTAS_UNKNOWN_SERVICE; +#endif static unsigned long in_xmon __read_mostly = 0; static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT);
@@ -358,7 +361,6 @@ static inline void disable_surveillance(void) #ifdef CONFIG_PPC_PSERIES /* Since this can't be a module, args should end up below 4GB. */ static struct rtas_args args; - int token;
/* * At this point we have got all the cpus we can into @@ -367,11 +369,11 @@ static inline void disable_surveillance(void) * If we did try to take rtas.lock there would be a * real possibility of deadlock. */ - token = rtas_token("set-indicator"); - if (token == RTAS_UNKNOWN_SERVICE) + if (set_indicator_token == RTAS_UNKNOWN_SERVICE) return;
- rtas_call_unlocked(&args, token, 3, 1, NULL, SURVEILLANCE_TOKEN, 0, 0); + rtas_call_unlocked(&args, set_indicator_token, 3, 1, NULL, + SURVEILLANCE_TOKEN, 0, 0);
#endif /* CONFIG_PPC_PSERIES */ } @@ -3688,6 +3690,14 @@ static void xmon_init(int enable) __debugger_iabr_match = xmon_iabr_match; __debugger_break_match = xmon_break_match; __debugger_fault_handler = xmon_fault_handler; + +#ifdef CONFIG_PPC_PSERIES + /* + * Get the token here to avoid trying to get a lock + * during the crash, causing a deadlock. + */ + set_indicator_token = rtas_token("set-indicator"); +#endif } else { __debugger = NULL; __debugger_ipi = NULL;
From: Breno Leitao leitao@debian.org
[ Upstream commit 2b038cbc5fcf12a7ee1cc9bfd5da1e46dacdee87 ]
When booting a pseries kernel with PREEMPT enabled, it dumps the following warning:
BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is pseries_processor_idle_init+0x5c/0x22c CPU: 13 PID: 1 Comm: swapper/0 Not tainted 4.20.0-rc3-00090-g12201a0128bc-dirty #828 Call Trace: [c000000429437ab0] [c0000000009c8878] dump_stack+0xec/0x164 (unreliable) [c000000429437b00] [c0000000005f2f24] check_preemption_disabled+0x154/0x160 [c000000429437b90] [c000000000cab8e8] pseries_processor_idle_init+0x5c/0x22c [c000000429437c10] [c000000000010ed4] do_one_initcall+0x64/0x300 [c000000429437ce0] [c000000000c54500] kernel_init_freeable+0x3f0/0x500 [c000000429437db0] [c0000000000112dc] kernel_init+0x2c/0x160 [c000000429437e20] [c00000000000c1d0] ret_from_kernel_thread+0x5c/0x6c
This happens because the code calls get_lppaca() which calls get_paca() and it checks if preemption is disabled through check_preemption_disabled().
Preemption should be disabled because the per CPU variable may make no sense if there is a preemption (and a CPU switch) after it reads the per CPU data and when it is used.
In this device driver specifically, it is not a problem, because this code just needs to have access to one lppaca struct, and it does not matter if it is the current per CPU lppaca struct or not (i.e. when there is a preemption and a CPU migration).
That said, the most appropriate fix seems to be related to avoiding the debug_smp_processor_id() call at get_paca(), instead of calling preempt_disable() before get_paca().
Signed-off-by: Breno Leitao leitao@debian.org Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/cpuidle/cpuidle-pseries.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 9e56bc411061..74c247972bb3 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -247,7 +247,13 @@ static int pseries_idle_probe(void) return -ENODEV;
if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - if (lppaca_shared_proc(get_lppaca())) { + /* + * Use local_paca instead of get_lppaca() since + * preemption is not disabled, and it is not required in + * fact, since lppaca_ptr does not need to be the value + * associated to the current CPU, it can be from any CPU. + */ + if (lppaca_shared_proc(local_paca->lppaca_ptr)) { cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else {
From: Ard Biesheuvel ard.biesheuvel@linaro.org
[ Upstream commit 3bbd3db86470c701091fb1d67f1fab6621debf50 ]
readelf complains about the section layout of vmlinux when building with CONFIG_RELOCATABLE=y (for KASLR):
readelf: Warning: [21]: Link field (0) should index a symtab section. readelf: Warning: [21]: Info field (0) should index a relocatable section.
Also, it seems that our use of '-pie -shared' is contradictory, and thus ambiguous. In general, the way KASLR is wired up at the moment is highly tailored to how ld.bfd happens to implement (and conflate) PIE executables and shared libraries, so given the current effort to support other toolchains, let's fix some of these issues as well.
- Drop the -pie linker argument and just leave -shared. In ld.bfd, the differences between them are unclear (except for the ELF type of the produced image [0]) but lld chokes on seeing both at the same time.
- Rename the .rela output section to .rela.dyn, as is customary for shared libraries and PIE executables, so that it is not misidentified by readelf as a static relocation section (producing the warnings above).
- Pass the -z notext and -z norelro options to explicitly instruct the linker to permit text relocations, and to omit the RELRO program header (which requires a certain section layout that we don't adhere to in the kernel). These are the defaults for current versions of ld.bfd.
- Discard .eh_frame and .gnu.hash sections to avoid them from being emitted between .head.text and .text, screwing up the section layout.
These changes only affect the ELF image, and produce the same binary image.
[0] b9dce7f1ba01 ("arm64: kernel: force ET_DYN ELF type for ...")
Cc: Nick Desaulniers ndesaulniers@google.com Cc: Peter Smith peter.smith@linaro.org Tested-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/Makefile | 2 +- arch/arm64/kernel/vmlinux.lds.S | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6cb9fc7e9382..8978f60779c4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour # for relative relocs, since this leads to better Image compression # with the relocation offsets always being zero. -LDFLAGS_vmlinux += -pie -shared -Bsymbolic \ +LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ $(call ld-option, --no-apply-dynamic-relocs) endif
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 03b00007553d..7fa008374907 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -99,7 +99,8 @@ SECTIONS *(.discard) *(.discard.*) *(.interp .dynamic) - *(.dynsym .dynstr .hash) + *(.dynsym .dynstr .hash .gnu.hash) + *(.eh_frame) }
. = KIMAGE_VADDR + TEXT_OFFSET; @@ -192,12 +193,12 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
- .rela : ALIGN(8) { + .rela.dyn : ALIGN(8) { *(.rela .rela*) }
- __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR); - __rela_size = SIZEOF(.rela); + __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); + __rela_size = SIZEOF(.rela.dyn);
. = ALIGN(SEGMENT_ALIGN); __initdata_end = .;
From: Nathan Chancellor natechancellor@gmail.com
[ Upstream commit b2e9a4eda11fd2cb1e6714e9ad3f455c402568ff ]
Clang warns:
drivers/media/firewire/firedtv-avc.c:999:45: warning: implicit conversion from 'int' to 'char' changes value from 159 to -97 [-Wconstant-conversion] app_info[0] = (EN50221_TAG_APP_INFO >> 16) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ drivers/media/firewire/firedtv-avc.c:1000:45: warning: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Wconstant-conversion] app_info[1] = (EN50221_TAG_APP_INFO >> 8) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ drivers/media/firewire/firedtv-avc.c:1040:44: warning: implicit conversion from 'int' to 'char' changes value from 159 to -97 [-Wconstant-conversion] app_info[0] = (EN50221_TAG_CA_INFO >> 16) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ drivers/media/firewire/firedtv-avc.c:1041:44: warning: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Wconstant-conversion] app_info[1] = (EN50221_TAG_CA_INFO >> 8) & 0xff; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ 4 warnings generated.
Change app_info's type to unsigned char to match the type of the member msg in struct ca_msg, which is the only thing passed into the app_info parameter in this function.
Link: https://github.com/ClangBuiltLinux/linux/issues/105
Signed-off-by: Nathan Chancellor natechancellor@gmail.com Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/firewire/firedtv-avc.c | 6 ++++-- drivers/media/firewire/firedtv.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c index 1c933b2cf760..3ef5df1648d7 100644 --- a/drivers/media/firewire/firedtv-avc.c +++ b/drivers/media/firewire/firedtv-avc.c @@ -968,7 +968,8 @@ static int get_ca_object_length(struct avc_response_frame *r) return r->operand[7]; }
-int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len) +int avc_ca_app_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len) { struct avc_command_frame *c = (void *)fdtv->avc_data; struct avc_response_frame *r = (void *)fdtv->avc_data; @@ -1009,7 +1010,8 @@ int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len) return ret; }
-int avc_ca_info(struct firedtv *fdtv, char *app_info, unsigned int *len) +int avc_ca_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len) { struct avc_command_frame *c = (void *)fdtv->avc_data; struct avc_response_frame *r = (void *)fdtv->avc_data; diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h index 876cdec8329b..009905a19947 100644 --- a/drivers/media/firewire/firedtv.h +++ b/drivers/media/firewire/firedtv.h @@ -124,8 +124,10 @@ int avc_lnb_control(struct firedtv *fdtv, char voltage, char burst, struct dvb_diseqc_master_cmd *diseqcmd); void avc_remote_ctrl_work(struct work_struct *work); int avc_register_remote_control(struct firedtv *fdtv); -int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len); -int avc_ca_info(struct firedtv *fdtv, char *app_info, unsigned int *len); +int avc_ca_app_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len); +int avc_ca_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len); int avc_ca_reset(struct firedtv *fdtv); int avc_ca_pmt(struct firedtv *fdtv, char *app_info, int length); int avc_ca_get_time_date(struct firedtv *fdtv, int *interval);
From: Yu Zhao yuzhao@google.com
[ Upstream commit 23aa128bb28d9da69bb1bdb2b70e50128857884a ]
AMD platform device acp_audio_dma can only be created by parent PCI device driver (drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c). Pass struct device of the parent to snd_pcm_lib_preallocate_pages() so dma_alloc_coherent() can use correct dma_ops. Otherwise, it will use default dma_ops which is nommu_dma_ops on x86_64 even when IOMMU is enabled and set to non passthrough mode.
Though platform device inherits some dma related fields during its creation in mfd_add_device(), we can't simply pass its struct device to snd_pcm_lib_preallocate_pages() because dma_ops is not among the inherited fields. Even it were, drivers/iommu/amd_iommu.c would ignore it because get_device_id() doesn't handle platform device.
This change shouldn't give us any trouble even struct device of the parent becomes null or represents some non PCI device in the future, because get_dma_ops() correctly handles null struct device or uses the default dma_ops if struct device doesn't have it set.
Signed-off-by: Yu Zhao yuzhao@google.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/amd/acp-pcm-dma.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index cdebab2f8ce5..7ada2c1f4964 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -1151,18 +1151,21 @@ static int acp_dma_new(struct snd_soc_pcm_runtime *rtd) struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, DRV_NAME); struct audio_drv_data *adata = dev_get_drvdata(component->dev); + struct device *parent = component->dev->parent;
switch (adata->asic_type) { case CHIP_STONEY: ret = snd_pcm_lib_preallocate_pages_for_all(rtd->pcm, SNDRV_DMA_TYPE_DEV, - NULL, ST_MIN_BUFFER, + parent, + ST_MIN_BUFFER, ST_MAX_BUFFER); break; default: ret = snd_pcm_lib_preallocate_pages_for_all(rtd->pcm, SNDRV_DMA_TYPE_DEV, - NULL, MIN_BUFFER, + parent, + MIN_BUFFER, MAX_BUFFER); break; }
From: Leo Yan leo.yan@linaro.org
[ Upstream commit 323ed1e0f60b35df55763356d4973a18d5eaea15 ]
Commit 4d3ebd3658d8 ("coreisght: tmc: Claim device before use") uses CLAIM tag to validate if the device is available, it needs to pass the device base address to access related registers.
In the function tmc_etb_disable_hw() it wrongly passes the driver data pointer as register base address, thus it's easily to produce the kernel warning info like below:
[ 83.579898] WARNING: CPU: 4 PID: 2970 at drivers/hwtracing/coresight/coresight.c:207 coresight_disclaim_device_unlocked+0x44/0x80 [ 83.591448] Modules linked in: [ 83.594485] CPU: 4 PID: 2970 Comm: uname Not tainted 4.19.0-rc6-00417-g721b509 #110 [ 83.602067] Hardware name: ARM Juno development board (r2) (DT) [ 83.607932] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 83.612681] pc : coresight_disclaim_device_unlocked+0x44/0x80 [ 83.618375] lr : coresight_disclaim_device_unlocked+0x44/0x80 [ 83.624064] sp : ffff00000fe3ba20 [ 83.627347] x29: ffff00000fe3ba20 x28: ffff80002d430dc0 [ 83.632618] x27: ffff800033177c00 x26: ffff80002eb44480 [ 83.637889] x25: 0000000000000001 x24: ffff800033c72600 [ 83.643160] x23: ffff0000099b11f8 x22: ffff0000099b11c8 [ 83.648430] x21: 0000000000000002 x20: ffff800033a90418 [ 83.653701] x19: ffff0000099b11c8 x18: 0000000000000000 [ 83.658971] x17: 0000000000000000 x16: 0000000000000000 [ 83.664241] x15: 0000000000000000 x14: 0000000000000000 [ 83.669511] x13: 0000000000000000 x12: 0000000000000000 [ 83.674782] x11: 0000000000000000 x10: 0000000000000000 [ 83.680052] x9 : 0000000000000000 x8 : 0000000000000001 [ 83.685322] x7 : 0000000000010000 x6 : ffff800033ebab18 [ 83.690593] x5 : ffff800033ebab18 x4 : ffff800033e6c698 [ 83.695862] x3 : 0000000000000001 x2 : 0000000000000000 [ 83.701133] x1 : 0000000000000000 x0 : 0000000000000001 [ 83.706404] Call trace: [ 83.708830] coresight_disclaim_device_unlocked+0x44/0x80 [ 83.714180] coresight_disclaim_device+0x34/0x48 [ 83.718756] tmc_disable_etf_sink+0xc4/0xf0 [ 83.722902] coresight_disable_path_from+0xc8/0x240 [ 83.727735] coresight_disable_path+0x24/0x30 [ 83.732053] etm_event_stop+0x130/0x170 [ 83.735854] etm_event_del+0x24/0x30 [ 83.739399] event_sched_out.isra.51+0xcc/0x1e8 [ 83.743887] group_sched_out.part.53+0x44/0xb0 [ 83.748291] ctx_sched_out+0x298/0x2b8 [ 83.752005] task_ctx_sched_out+0x74/0xa8 [ 83.755980] perf_event_exit_task+0x140/0x418 [ 83.760298] do_exit+0x3f4/0xcf0 [ 83.763497] do_group_exit+0x5c/0xc0 [ 83.767041] __arm64_sys_exit_group+0x24/0x28 [ 83.771359] el0_svc_common+0x110/0x178 [ 83.775160] el0_svc_handler+0x94/0xe8 [ 83.778875] el0_svc+0x8/0xc [ 83.781728] ---[ end trace 02d8d8eac46db9e5 ]---
This patch is to fix this bug by using 'drvdata->base' as the register base address for CLAIM related operation.
Fixes: 4d3ebd3658d8 ("coreisght: tmc: Claim device before use") Cc: Suzuki Poulose suzuki.poulose@arm.com Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org Cc: Robert Walker robert.walker@arm.com Signed-off-by: Leo Yan leo.yan@linaro.org Reviewed-by: Suzuki K Poulose suzuki.poulose@arm.com Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 53fc83b72a49..5864ac55e275 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -86,7 +86,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) { - coresight_disclaim_device(drvdata); + coresight_disclaim_device(drvdata->base); __tmc_etb_disable_hw(drvdata); }
From: "Michael J. Ruhl" michael.j.ruhl@intel.com
[ Upstream commit dbc2970caef74e8ff41923d302aa6fb5a4812d0e ]
An incorrect sge sizing in the HFI PIO path will cause an OOPs similar to this:
BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] hfi1_verbs_send_pio+0x3d8/0x530 [hfi1] PGD 0 Oops: 0000 1 SMP Call Trace: ? hfi1_verbs_send_dma+0xad0/0xad0 [hfi1] hfi1_verbs_send+0xdf/0x250 [hfi1] ? make_rc_ack+0xa80/0xa80 [hfi1] hfi1_do_send+0x192/0x430 [hfi1] hfi1_do_send_from_rvt+0x10/0x20 [hfi1] rvt_post_send+0x369/0x820 [rdmavt] ib_uverbs_post_send+0x317/0x570 [ib_uverbs] ib_uverbs_write+0x26f/0x420 [ib_uverbs] ? security_file_permission+0x21/0xa0 vfs_write+0xbd/0x1e0 ? mntput+0x24/0x40 SyS_write+0x7f/0xe0 system_call_fastpath+0x16/0x1b
Fix by adding the missing sizing check to correctly determine the sge length.
Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn mike.marciniszyn@intel.com Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com Signed-off-by: Dennis Dalessandro dennis.dalessandro@intel.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/hfi1/verbs.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index a365089a9305..b7257d7dd925 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -919,6 +919,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (slen > len) slen = len; + if (slen > ss->sge.sge_length) + slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen;
From: Vivek Gautam vivek.gautam@codeaurora.org
[ Upstream commit de2563bce7a157f5296bab94f3843d7d64fb14b4 ]
Turning on CONFIG_DMA_API_DEBUG_SG results in the following error:
[ 460.308650] ------------[ cut here ]------------ [ 460.313490] qcom-venus aa00000.video-codec: DMA-API: mapping sg segment longer than device claims to support [len=4194304] [max=65536] [ 460.326017] WARNING: CPU: 3 PID: 3555 at src/kernel/dma/debug.c:1301 debug_dma_map_sg+0x174/0x254 [ 460.338888] Modules linked in: venus_dec venus_enc videobuf2_dma_sg videobuf2_memops hci_uart btqca bluetooth venus_core v4l2_mem2mem videobuf2_v4l2 videobuf2_common ath10k_snoc ath10k_core ath lzo lzo_compress zramjoydev [ 460.375811] CPU: 3 PID: 3555 Comm: V4L2DecoderThre Tainted: G W 4.19.1 #82 [ 460.384223] Hardware name: Google Cheza (rev1) (DT) [ 460.389251] pstate: 60400009 (nZCv daif +PAN -UAO) [ 460.394191] pc : debug_dma_map_sg+0x174/0x254 [ 460.398680] lr : debug_dma_map_sg+0x174/0x254 [ 460.403162] sp : ffffff80200c37d0 [ 460.406583] x29: ffffff80200c3830 x28: 0000000000010000 [ 460.412056] x27: 00000000ffffffff x26: ffffffc0f785ea80 [ 460.417532] x25: 0000000000000000 x24: ffffffc0f4ea1290 [ 460.423001] x23: ffffffc09e700300 x22: ffffffc0f4ea1290 [ 460.428470] x21: ffffff8009037000 x20: 0000000000000001 [ 460.433936] x19: ffffff80091b0000 x18: 0000000000000000 [ 460.439411] x17: 0000000000000000 x16: 000000000000f251 [ 460.444885] x15: 0000000000000006 x14: 0720072007200720 [ 460.450354] x13: ffffff800af536e0 x12: 0000000000000000 [ 460.455822] x11: 0000000000000000 x10: 0000000000000000 [ 460.461288] x9 : 537944d9c6c48d00 x8 : 537944d9c6c48d00 [ 460.466758] x7 : 0000000000000000 x6 : ffffffc0f8d98f80 [ 460.472230] x5 : 0000000000000000 x4 : 0000000000000000 [ 460.477703] x3 : 000000000000008a x2 : ffffffc0fdb13948 [ 460.483170] x1 : ffffffc0fdb0b0b0 x0 : 000000000000007a [ 460.488640] Call trace: [ 460.491165] debug_dma_map_sg+0x174/0x254 [ 460.495307] vb2_dma_sg_alloc+0x260/0x2dc [videobuf2_dma_sg] [ 460.501150] __vb2_queue_alloc+0x164/0x374 [videobuf2_common] [ 460.507076] vb2_core_reqbufs+0xfc/0x23c [videobuf2_common] [ 460.512815] vb2_reqbufs+0x44/0x5c [videobuf2_v4l2] [ 460.517853] v4l2_m2m_reqbufs+0x44/0x78 [v4l2_mem2mem] [ 460.523144] v4l2_m2m_ioctl_reqbufs+0x1c/0x28 [v4l2_mem2mem] [ 460.528976] v4l_reqbufs+0x30/0x40 [ 460.532480] __video_do_ioctl+0x36c/0x454 [ 460.536610] video_usercopy+0x25c/0x51c [ 460.540572] video_ioctl2+0x38/0x48 [ 460.544176] v4l2_ioctl+0x60/0x74 [ 460.547602] do_video_ioctl+0x948/0x3520 [ 460.551648] v4l2_compat_ioctl32+0x60/0x98 [ 460.555872] __arm64_compat_sys_ioctl+0x134/0x20c [ 460.560718] el0_svc_common+0x9c/0xe4 [ 460.564498] el0_svc_compat_handler+0x2c/0x38 [ 460.568982] el0_svc_compat+0x8/0x18 [ 460.572672] ---[ end trace ce209b87b2f3af88 ]---
From above warning one would deduce that the sg segment will overflow
the device's capacity. In reality, the hardware can accommodate larger sg segments. So, initialize the max segment size properly to weed out this warning.
Based on a similar patch sent by Sean Paul for mdss: https://patchwork.kernel.org/patch/10671457/
Signed-off-by: Vivek Gautam vivek.gautam@codeaurora.org Acked-by: Stanimir Varbanov stanimir.varbanov@linaro.org Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/media/platform/qcom/venus/core.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index bb6add9d340e..5b8350e87e75 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -264,6 +264,14 @@ static int venus_probe(struct platform_device *pdev) if (ret) return ret;
+ if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + INIT_LIST_HEAD(&core->instances); mutex_init(&core->lock); INIT_DELAYED_WORK(&core->work, venus_sys_error_handler);
From: Gao Xiang gaoxiang25@huawei.com
[ Upstream commit 848bd9acdcd00c164b42b14aacec242949ecd471 ]
The root cause is the race as follows: Thread #0 Thread #1
z_erofs_vle_unzip_kickoff z_erofs_submit_and_unzip
struct z_erofs_vle_unzip_io io[] atomic_add_return() wait_event() [end of function] wake_up()
Fix it by taking the waitqueue lock between atomic_add_return and wake_up to close such the race.
kernel message:
Unable to handle kernel paging request at virtual address 97f7052caa1303dc ... Workqueue: kverityd verity_work task: ffffffe32bcb8000 task.stack: ffffffe3298a0000 PC is at __wake_up_common+0x48/0xa8 LR is at __wake_up+0x3c/0x58 ... Call trace: ... [<ffffff94a08ff648>] __wake_up_common+0x48/0xa8 [<ffffff94a08ff8b8>] __wake_up+0x3c/0x58 [<ffffff94a0c11b60>] z_erofs_vle_unzip_kickoff+0x40/0x64 [<ffffff94a0c118e4>] z_erofs_vle_read_endio+0x94/0x134 [<ffffff94a0c83c9c>] bio_endio+0xe4/0xf8 [<ffffff94a1076540>] dec_pending+0x134/0x32c [<ffffff94a1076f28>] clone_endio+0x90/0xf4 [<ffffff94a0c83c9c>] bio_endio+0xe4/0xf8 [<ffffff94a1095024>] verity_work+0x210/0x368 [<ffffff94a08c4150>] process_one_work+0x188/0x4b4 [<ffffff94a08c45bc>] worker_thread+0x140/0x458 [<ffffff94a08cad48>] kthread+0xec/0x108 [<ffffff94a0883ab4>] ret_from_fork+0x10/0x1c Code: d1006273 54000260 f9400804 b9400019 (b85fc081) ---[ end trace be9dde154f677cd1 ]---
Reviewed-by: Chao Yu yuchao0@huawei.com Signed-off-by: Gao Xiang gaoxiang25@huawei.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/erofs/unzip_vle.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 79d3ba62b298..45e88bada907 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -717,13 +717,18 @@ static void z_erofs_vle_unzip_kickoff(void *ptr, int bios) struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t); bool background = tagptr_unfold_tags(t);
- if (atomic_add_return(bios, &io->pending_bios)) + if (!background) { + unsigned long flags; + + spin_lock_irqsave(&io->u.wait.lock, flags); + if (!atomic_add_return(bios, &io->pending_bios)) + wake_up_locked(&io->u.wait); + spin_unlock_irqrestore(&io->u.wait.lock, flags); return; + }
- if (background) + if (!atomic_add_return(bios, &io->pending_bios)) queue_work(z_erofs_workqueue, &io->u.work); - else - wake_up(&io->u.wait); }
static inline void z_erofs_vle_read_endio(struct bio *bio)
From: Mauro Carvalho Chehab mchehab+samsung@kernel.org
[ Upstream commit e4d7b113fdccde1acf8638c5879f2a450d492303 ]
A common mistake is to assume that initializing a var with: struct foo f = { 0 };
Would initialize a zeroed struct. Actually, what this does is to initialize the first element of the struct to zero.
According to C99 Standard 6.7.8.21:
"If there are fewer initializers in a brace-enclosed list than there are elements or members of an aggregate, or fewer characters in a string literal used to initialize an array of known size than there are elements in the array, the remainder of the aggregate shall be initialized implicitly the same as objects that have static storage duration."
So, in practice, it could zero the entire struct, but, if the first element is not an integer, it will produce warnings:
drivers/staging/media/sunxi/cedrus/cedrus.c:drivers/staging/media/sunxi/cedrus/cedrus.c:78:49: warning: Using plain integer as NULL pointer drivers/staging/media/sunxi/cedrus/cedrus_dec.c:drivers/staging/media/sunxi/cedrus/cedrus_dec.c:29:35: warning: Using plain integer as NULL pointer
As the right initialization would be, instead:
struct foo f = { NULL };
Another way to initialize it with gcc is to use:
struct foo f = {};
That seems to be a gcc extension, but clang also does the right thing, and that's a clean way for doing it.
Anyway, I decided to check upstream what's the most commonly pattern. The "= {}" pattern has about 2000 entries:
$ git grep -E "=\s*{\s*}"|wc -l 1951
The standard-C compliant pattern has about 2500 entries:
$ git grep -E "=\s*{\s*NULL\s*}"|wc -l 137 $ git grep -E "=\s*{\s*0\s*}"|wc -l 2323
Meaning that developers have split options on that.
So, let's opt to the simpler form.
Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Acked-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/staging/media/sunxi/cedrus/cedrus.c | 2 +- drivers/staging/media/sunxi/cedrus/cedrus_dec.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c index c912c70b3ef7..f18077e8810a 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c @@ -75,7 +75,7 @@ static int cedrus_init_ctrls(struct cedrus_dev *dev, struct cedrus_ctx *ctx) memset(ctx->ctrls, 0, ctrl_size);
for (i = 0; i < CEDRUS_CONTROLS_COUNT; i++) { - struct v4l2_ctrl_config cfg = { 0 }; + struct v4l2_ctrl_config cfg = {};
cfg.elem_size = cedrus_controls[i].elem_size; cfg.id = cedrus_controls[i].id; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index e40180a33951..f10c25f5460e 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c @@ -26,7 +26,7 @@ void cedrus_device_run(void *priv) { struct cedrus_ctx *ctx = priv; struct cedrus_dev *dev = ctx->dev; - struct cedrus_run run = { 0 }; + struct cedrus_run run = {}; struct media_request *src_req; unsigned long flags;
This is a pure cleanup patch, it doesn't affect runtime.
On Tue, Jan 08, 2019 at 02:25:24PM -0500, Sasha Levin wrote:
From: Mauro Carvalho Chehab mchehab+samsung@kernel.org
[ Upstream commit e4d7b113fdccde1acf8638c5879f2a450d492303 ]
A common mistake is to assume that initializing a var with: struct foo f = { 0 };
Would initialize a zeroed struct. Actually, what this does is to initialize the first element of the struct to zero.
According to C99 Standard 6.7.8.21:
"If there are fewer initializers in a brace-enclosed list than there are elements or members of an aggregate, or fewer characters in a string literal used to initialize an array of known size than there are elements in the array, the remainder of the aggregate shall be initialized implicitly the same as objects that have static storage duration."
Static storage is initialized to zero so this is fine. It's just that Sparse complains if you mix NULL and zero.
regards, dan carpenter
On Wed, Jan 09, 2019 at 11:48:54AM +0300, Dan Carpenter wrote:
This is a pure cleanup patch, it doesn't affect runtime.
On Tue, Jan 08, 2019 at 02:25:24PM -0500, Sasha Levin wrote:
From: Mauro Carvalho Chehab mchehab+samsung@kernel.org
[ Upstream commit e4d7b113fdccde1acf8638c5879f2a450d492303 ]
A common mistake is to assume that initializing a var with: struct foo f = { 0 };
Would initialize a zeroed struct. Actually, what this does is to initialize the first element of the struct to zero.
According to C99 Standard 6.7.8.21:
"If there are fewer initializers in a brace-enclosed list than there are elements or members of an aggregate, or fewer characters in a string literal used to initialize an array of known size than there are elements in the array, the remainder of the aggregate shall be initialized implicitly the same as objects that have static storage duration."
Static storage is initialized to zero so this is fine. It's just that Sparse complains if you mix NULL and zero.
I'll drop it, thank you.
-- Thanks, Sasha
From: yupeng yupeng0921@gmail.com
[ Upstream commit 0fbe82e628c817e292ff588cd5847fc935e025f2 ]
after set SO_DONTROUTE to 1, the IP layer should not route packets if the dest IP address is not in link scope. But if the socket has cached the dst_entry, such packets would be routed until the sk_dst_cache expires. So we should clean the sk_dst_cache when a user set SO_DONTROUTE option. Below are server/client python scripts which could reprodue this issue:
server side code:
========================================================================== import socket import struct import time
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('0.0.0.0', 9000)) s.listen(1) sock, addr = s.accept() sock.setsockopt(socket.SOL_SOCKET, socket.SO_DONTROUTE, struct.pack('i', 1)) while True: sock.send(b'foo') time.sleep(1) ==========================================================================
client side code: ========================================================================== import socket import time
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(('server_address', 9000)) while True: data = s.recv(1024) print(data) ==========================================================================
Signed-off-by: yupeng yupeng0921@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/core/sock.c b/net/core/sock.c index 080a880a1761..1d0e5ba366a9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -698,6 +698,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); + sk_dst_reset(sk); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
From: David Disseldorp ddiss@suse.de
[ Upstream commit 0de263577de5d5e052be5f4f93334e63cc8a7f0b ]
spc5r17.pdf specifies:
4.3.1 ASCII data field requirements ASCII data fields shall contain only ASCII printable characters (i.e., code values 20h to 7Eh) and may be terminated with one or more ASCII null (00h) characters. ASCII data fields described as being left-aligned shall have any unused bytes at the end of the field (i.e., highest offset) and the unused bytes shall be filled with ASCII space characters (20h).
LIO currently space-pads the T10 VENDOR IDENTIFICATION and PRODUCT IDENTIFICATION fields in the standard INQUIRY data. However, the PRODUCT REVISION LEVEL field in the standard INQUIRY data as well as the T10 VENDOR IDENTIFICATION field in the INQUIRY Device Identification VPD Page are zero-terminated/zero-padded.
Fix this inconsistency by using space-padding for all of the above fields.
Signed-off-by: David Disseldorp ddiss@suse.de Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Bryant G. Ly bly@catalogicsoftware.com Reviewed-by: Lee Duncan lduncan@suse.com Reviewed-by: Hannes Reinecke hare@suse.com Reviewed-by: Roman Bolshakov r.bolshakov@yadro.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/target/target_core_spc.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index f459118bc11b..c37dd36ec77d 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -108,12 +108,17 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf)
buf[7] = 0x2; /* CmdQue=1 */
- memcpy(&buf[8], "LIO-ORG ", 8); - memset(&buf[16], 0x20, 16); + /* + * ASCII data fields described as being left-aligned shall have any + * unused bytes at the end of the field (i.e., highest offset) and the + * unused bytes shall be filled with ASCII space characters (20h). + */ + memset(&buf[8], 0x20, 8 + 16 + 4); + memcpy(&buf[8], "LIO-ORG", sizeof("LIO-ORG") - 1); memcpy(&buf[16], dev->t10_wwn.model, - min_t(size_t, strlen(dev->t10_wwn.model), 16)); + strnlen(dev->t10_wwn.model, 16)); memcpy(&buf[32], dev->t10_wwn.revision, - min_t(size_t, strlen(dev->t10_wwn.revision), 4)); + strnlen(dev->t10_wwn.revision, 4)); buf[4] = 31; /* Set additional length to 31 */
return 0; @@ -251,7 +256,9 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf) buf[off] = 0x2; /* ASCII */ buf[off+1] = 0x1; /* T10 Vendor ID */ buf[off+2] = 0x0; - memcpy(&buf[off+4], "LIO-ORG", 8); + /* left align Vendor ID and pad with spaces */ + memset(&buf[off+4], 0x20, 8); + memcpy(&buf[off+4], "LIO-ORG", sizeof("LIO-ORG") - 1); /* Extra Byte for NULL Terminator */ id_len++; /* Identifier Length */
From: Bart Van Assche bvanassche@acm.org
[ Upstream commit ad669505c4e9db9af9faeb5c51aa399326a80d91 ]
A session must only be released after all code that accesses the session structure has finished. Make sure that this is the case by introducing a new command counter per session that is only decremented after the .release_cmd() callback has finished. This patch fixes the following crash:
BUG: KASAN: use-after-free in do_raw_spin_lock+0x1c/0x130 Read of size 4 at addr ffff8801534b16e4 by task rmdir/14805 CPU: 16 PID: 14805 Comm: rmdir Not tainted 4.18.0-rc2-dbg+ #5 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 do_raw_spin_lock+0x1c/0x130 _raw_spin_lock_irqsave+0x52/0x60 srpt_set_ch_state+0x27/0x70 [ib_srpt] srpt_disconnect_ch+0x1b/0xc0 [ib_srpt] srpt_close_session+0xa8/0x260 [ib_srpt] target_shutdown_sessions+0x170/0x180 [target_core_mod] core_tpg_del_initiator_node_acl+0xf3/0x200 [target_core_mod] target_fabric_nacl_base_release+0x25/0x30 [target_core_mod] config_item_release+0x9c/0x110 [configfs] config_item_put+0x26/0x30 [configfs] configfs_rmdir+0x3b8/0x510 [configfs] vfs_rmdir+0xb3/0x1e0 do_rmdir+0x262/0x2c0 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Cc: Nicholas Bellinger nab@linux-iscsi.org Cc: Mike Christie mchristi@redhat.com Cc: Christoph Hellwig hch@lst.de Cc: David Disseldorp ddiss@suse.de Cc: Hannes Reinecke hare@suse.de Signed-off-by: Bart Van Assche bvanassche@acm.org Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/target/target_core_transport.c | 35 ++++++++++++++++++-------- drivers/target/target_core_xcopy.c | 6 ++++- include/target/target_core_base.h | 1 + include/target/target_core_fabric.h | 2 +- 4 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2cfd61d62e97..ffa5b9f771b5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -224,19 +224,28 @@ void transport_subsystem_check_init(void) sub_api_initialized = 1; }
+static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) +{ + struct se_session *sess = container_of(ref, typeof(*sess), cmd_count); + + wake_up(&sess->cmd_list_wq); +} + /** * transport_init_session - initialize a session object * @se_sess: Session object pointer. * * The caller must have zero-initialized @se_sess before calling this function. */ -void transport_init_session(struct se_session *se_sess) +int transport_init_session(struct se_session *se_sess) { INIT_LIST_HEAD(&se_sess->sess_list); INIT_LIST_HEAD(&se_sess->sess_acl_list); INIT_LIST_HEAD(&se_sess->sess_cmd_list); spin_lock_init(&se_sess->sess_cmd_lock); init_waitqueue_head(&se_sess->cmd_list_wq); + return percpu_ref_init(&se_sess->cmd_count, + target_release_sess_cmd_refcnt, 0, GFP_KERNEL); } EXPORT_SYMBOL(transport_init_session);
@@ -247,6 +256,7 @@ EXPORT_SYMBOL(transport_init_session); struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops) { struct se_session *se_sess; + int ret;
se_sess = kmem_cache_zalloc(se_sess_cache, GFP_KERNEL); if (!se_sess) { @@ -254,7 +264,11 @@ struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops) " se_sess_cache\n"); return ERR_PTR(-ENOMEM); } - transport_init_session(se_sess); + ret = transport_init_session(se_sess); + if (ret < 0) { + kfree(se_sess); + return ERR_PTR(ret); + } se_sess->sup_prot_ops = sup_prot_ops;
return se_sess; @@ -581,6 +595,7 @@ void transport_free_session(struct se_session *se_sess) sbitmap_queue_free(&se_sess->sess_tag_pool); kvfree(se_sess->sess_cmd_map); } + percpu_ref_exit(&se_sess->cmd_count); kmem_cache_free(se_sess_cache, se_sess); } EXPORT_SYMBOL(transport_free_session); @@ -2719,6 +2734,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) } se_cmd->transport_state |= CMD_T_PRE_EXECUTE; list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); + percpu_ref_get(&se_sess->cmd_count); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
@@ -2749,8 +2765,6 @@ static void target_release_cmd_kref(struct kref *kref) if (se_sess) { spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); list_del_init(&se_cmd->se_cmd_list); - if (se_sess->sess_tearing_down && list_empty(&se_sess->sess_cmd_list)) - wake_up(&se_sess->cmd_list_wq); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); }
@@ -2758,6 +2772,8 @@ static void target_release_cmd_kref(struct kref *kref) se_cmd->se_tfo->release_cmd(se_cmd); if (compl) complete(compl); + + percpu_ref_put(&se_sess->cmd_count); }
/** @@ -2886,6 +2902,8 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); se_sess->sess_tearing_down = 1; spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + + percpu_ref_kill(&se_sess->cmd_count); } EXPORT_SYMBOL(target_sess_cmd_list_set_waiting);
@@ -2900,17 +2918,14 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
WARN_ON_ONCE(!se_sess->sess_tearing_down);
- spin_lock_irq(&se_sess->sess_cmd_lock); do { - ret = wait_event_lock_irq_timeout( - se_sess->cmd_list_wq, - list_empty(&se_sess->sess_cmd_list), - se_sess->sess_cmd_lock, 180 * HZ); + ret = wait_event_timeout(se_sess->cmd_list_wq, + percpu_ref_is_zero(&se_sess->cmd_count), + 180 * HZ); list_for_each_entry(cmd, &se_sess->sess_cmd_list, se_cmd_list) target_show_cmd("session shutdown: still waiting for ", cmd); } while (ret <= 0); - spin_unlock_irq(&se_sess->sess_cmd_lock); } EXPORT_SYMBOL(target_wait_for_sess_cmds);
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 70adcfdca8d1..124495f953fa 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -479,6 +479,8 @@ static const struct target_core_fabric_ops xcopy_pt_tfo = {
int target_xcopy_setup_pt(void) { + int ret; + xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0); if (!xcopy_wq) { pr_err("Unable to allocate xcopy_wq\n"); @@ -496,7 +498,9 @@ int target_xcopy_setup_pt(void) INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list); INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list); memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); - transport_init_session(&xcopy_pt_sess); + ret = transport_init_session(&xcopy_pt_sess); + if (ret < 0) + return ret;
xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e3bdb0550a59..d9fd4eac58c2 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -601,6 +601,7 @@ struct se_session { struct se_node_acl *se_node_acl; struct se_portal_group *se_tpg; void *fabric_sess_ptr; + struct percpu_ref cmd_count; struct list_head sess_list; struct list_head sess_acl_list; struct list_head sess_cmd_list; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index f4147b398431..eb9d0923c55c 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -116,7 +116,7 @@ struct se_session *target_setup_session(struct se_portal_group *, struct se_session *, void *)); void target_remove_session(struct se_session *);
-void transport_init_session(struct se_session *); +int transport_init_session(struct se_session *se_sess); struct se_session *transport_alloc_session(enum target_prot_op); int transport_alloc_session_tags(struct se_session *, unsigned int, unsigned int);
From: "Dmitry V. Levin" ldv@altlinux.org
[ Upstream commit b708a3cc9600390ccaa2b68a88087dd265154b2b ]
I've stumbled over the current macro-expand behaviour of the test harness:
$ gcc -Wall -xc - <<'__EOF__' TEST(macro) { int status = 0; ASSERT_TRUE(WIFSIGNALED(status)); } TEST_HARNESS_MAIN __EOF__ $ ./a.out [==========] Running 1 tests from 1 test cases. [ RUN ] global.macro <stdin>:4:global.macro:Expected 0 (0) != (((signed char) (((status) & 0x7f) + 1) >> 1) > 0) (0) global.macro: Test terminated by assertion [ FAIL ] global.macro [==========] 0 / 1 tests passed. [ FAILED ]
With this change the output of the same test looks much more comprehensible:
[==========] Running 1 tests from 1 test cases. [ RUN ] global.macro <stdin>:4:global.macro:Expected 0 (0) != WIFSIGNALED(status) (0) global.macro: Test terminated by assertion [ FAIL ] global.macro [==========] 0 / 1 tests passed. [ FAILED ]
The issue is very similar to the bug fixed in glibc assert(3) three years ago: https://sourceware.org/bugzilla/show_bug.cgi?id=18604
Cc: Shuah Khan shuah@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Andy Lutomirski luto@amacapital.net Cc: Will Drewry wad@chromium.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Dmitry V. Levin ldv@altlinux.org Acked-by: Kees Cook keescook@chromium.org Signed-off-by: Shuah Khan shuah@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/kselftest_harness.h | 42 ++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 6ae3730c4ee3..76d654ef3234 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -354,7 +354,7 @@ * ASSERT_EQ(expected, measured): expected == measured */ #define ASSERT_EQ(expected, seen) \ - __EXPECT(expected, seen, ==, 1) + __EXPECT(expected, #expected, seen, #seen, ==, 1)
/** * ASSERT_NE(expected, seen) @@ -365,7 +365,7 @@ * ASSERT_NE(expected, measured): expected != measured */ #define ASSERT_NE(expected, seen) \ - __EXPECT(expected, seen, !=, 1) + __EXPECT(expected, #expected, seen, #seen, !=, 1)
/** * ASSERT_LT(expected, seen) @@ -376,7 +376,7 @@ * ASSERT_LT(expected, measured): expected < measured */ #define ASSERT_LT(expected, seen) \ - __EXPECT(expected, seen, <, 1) + __EXPECT(expected, #expected, seen, #seen, <, 1)
/** * ASSERT_LE(expected, seen) @@ -387,7 +387,7 @@ * ASSERT_LE(expected, measured): expected <= measured */ #define ASSERT_LE(expected, seen) \ - __EXPECT(expected, seen, <=, 1) + __EXPECT(expected, #expected, seen, #seen, <=, 1)
/** * ASSERT_GT(expected, seen) @@ -398,7 +398,7 @@ * ASSERT_GT(expected, measured): expected > measured */ #define ASSERT_GT(expected, seen) \ - __EXPECT(expected, seen, >, 1) + __EXPECT(expected, #expected, seen, #seen, >, 1)
/** * ASSERT_GE(expected, seen) @@ -409,7 +409,7 @@ * ASSERT_GE(expected, measured): expected >= measured */ #define ASSERT_GE(expected, seen) \ - __EXPECT(expected, seen, >=, 1) + __EXPECT(expected, #expected, seen, #seen, >=, 1)
/** * ASSERT_NULL(seen) @@ -419,7 +419,7 @@ * ASSERT_NULL(measured): NULL == measured */ #define ASSERT_NULL(seen) \ - __EXPECT(NULL, seen, ==, 1) + __EXPECT(NULL, "NULL", seen, #seen, ==, 1)
/** * ASSERT_TRUE(seen) @@ -429,7 +429,7 @@ * ASSERT_TRUE(measured): measured != 0 */ #define ASSERT_TRUE(seen) \ - ASSERT_NE(0, seen) + __EXPECT(0, "0", seen, #seen, !=, 1)
/** * ASSERT_FALSE(seen) @@ -439,7 +439,7 @@ * ASSERT_FALSE(measured): measured == 0 */ #define ASSERT_FALSE(seen) \ - ASSERT_EQ(0, seen) + __EXPECT(0, "0", seen, #seen, ==, 1)
/** * ASSERT_STREQ(expected, seen) @@ -472,7 +472,7 @@ * EXPECT_EQ(expected, measured): expected == measured */ #define EXPECT_EQ(expected, seen) \ - __EXPECT(expected, seen, ==, 0) + __EXPECT(expected, #expected, seen, #seen, ==, 0)
/** * EXPECT_NE(expected, seen) @@ -483,7 +483,7 @@ * EXPECT_NE(expected, measured): expected != measured */ #define EXPECT_NE(expected, seen) \ - __EXPECT(expected, seen, !=, 0) + __EXPECT(expected, #expected, seen, #seen, !=, 0)
/** * EXPECT_LT(expected, seen) @@ -494,7 +494,7 @@ * EXPECT_LT(expected, measured): expected < measured */ #define EXPECT_LT(expected, seen) \ - __EXPECT(expected, seen, <, 0) + __EXPECT(expected, #expected, seen, #seen, <, 0)
/** * EXPECT_LE(expected, seen) @@ -505,7 +505,7 @@ * EXPECT_LE(expected, measured): expected <= measured */ #define EXPECT_LE(expected, seen) \ - __EXPECT(expected, seen, <=, 0) + __EXPECT(expected, #expected, seen, #seen, <=, 0)
/** * EXPECT_GT(expected, seen) @@ -516,7 +516,7 @@ * EXPECT_GT(expected, measured): expected > measured */ #define EXPECT_GT(expected, seen) \ - __EXPECT(expected, seen, >, 0) + __EXPECT(expected, #expected, seen, #seen, >, 0)
/** * EXPECT_GE(expected, seen) @@ -527,7 +527,7 @@ * EXPECT_GE(expected, measured): expected >= measured */ #define EXPECT_GE(expected, seen) \ - __EXPECT(expected, seen, >=, 0) + __EXPECT(expected, #expected, seen, #seen, >=, 0)
/** * EXPECT_NULL(seen) @@ -537,7 +537,7 @@ * EXPECT_NULL(measured): NULL == measured */ #define EXPECT_NULL(seen) \ - __EXPECT(NULL, seen, ==, 0) + __EXPECT(NULL, "NULL", seen, #seen, ==, 0)
/** * EXPECT_TRUE(seen) @@ -547,7 +547,7 @@ * EXPECT_TRUE(measured): 0 != measured */ #define EXPECT_TRUE(seen) \ - EXPECT_NE(0, seen) + __EXPECT(0, "0", seen, #seen, !=, 0)
/** * EXPECT_FALSE(seen) @@ -557,7 +557,7 @@ * EXPECT_FALSE(measured): 0 == measured */ #define EXPECT_FALSE(seen) \ - EXPECT_EQ(0, seen) + __EXPECT(0, "0", seen, #seen, ==, 0)
/** * EXPECT_STREQ(expected, seen) @@ -597,7 +597,7 @@ if (_metadata->passed && _metadata->step < 255) \ _metadata->step++;
-#define __EXPECT(_expected, _seen, _t, _assert) do { \ +#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ @@ -606,8 +606,8 @@ unsigned long long __exp_print = (uintptr_t)__exp; \ unsigned long long __seen_print = (uintptr_t)__seen; \ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ - #_expected, __exp_print, #_t, \ - #_seen, __seen_print); \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ _metadata->passed = 0; \ /* Ensure the optional handler is triggered */ \ _metadata->trigger = 1; \
From: Qian Cai cai@lca.pw
[ Upstream commit 6e8830674ea77f57d57a33cca09083b117a71f41 ]
If the kernel is configured with KASAN_EXTRA, the stack size is increased significantly due to setting the GCC -fstack-reuse option to "none" [1]. As a result, it can trigger a stack overrun quite often with 32k stack size compiled using GCC 8. For example, this reproducer
https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/sysca...
can trigger a "corrupted stack end detected inside scheduler" very reliably with CONFIG_SCHED_STACK_END_CHECK enabled. There are other reports at:
https://lore.kernel.org/lkml/1542144497.12945.29.camel@gmx.us/ https://lore.kernel.org/lkml/721E7B42-2D55-4866-9C1A-3E8D64F33F9C@gmx.us/
There are just too many functions that could have a large stack with KASAN_EXTRA due to large local variables that have been called over and over again without being able to reuse the stacks. Some noticiable ones are,
size 7536 shrink_inactive_list 7440 shrink_page_list 6560 fscache_stats_show 3920 jbd2_journal_commit_transaction 3216 try_to_unmap_one 3072 migrate_page_move_mapping 3584 migrate_misplaced_transhuge_page 3920 ip_vs_lblcr_schedule 4304 lpfc_nvme_info_show 3888 lpfc_debugfs_nvmestat_data.constprop
There are other 49 functions over 2k in size while compiling kernel with "-Wframe-larger-than=" on this machine. Hence, it is too much work to change Makefiles for each object to compile without -fsanitize-address-use-after-scope individually.
[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23
Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/memory.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f0a5c9531e8b..778af0b7f7fd 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -67,12 +67,17 @@ /* * KASAN requires 1/8th of the kernel virtual address space for the shadow * region. KASAN can bloat the stack significantly, so double the (minimum) - * stack size when KASAN is in use. + * stack size when KASAN is in use, and then double it again if KASAN_EXTRA is + * on. */ #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_THREAD_SHIFT 2 +#else #define KASAN_THREAD_SHIFT 1 +#endif /* CONFIG_KASAN_EXTRA */ #else #define KASAN_SHADOW_SIZE (0) #define KASAN_THREAD_SHIFT 0
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit f7542d817733f461258fd3a47d77da35b2d9fc81 ]
The exclusive gates may be set up in the wrong way by software running before the clock driver comes up. In that case the exclusive setup is locked in its initial state, as the complementary function can't be activated without disabling the initial setup first.
To avoid this lock situation, reset the exclusive gates to the off state and allow the kernel to provide the proper setup.
Signed-off-by: Lucas Stach l.stach@pengutronix.de Reviewed-by: Dong Aisheng Aisheng.dong@nxp.com Signed-off-by: Stephen Boyd sboyd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clk/imx/clk-imx6q.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index bbe0c60f4d09..59f6a3e087db 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -508,8 +508,12 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) * lvds1_gate and lvds2_gate are pseudo-gates. Both can be * independently configured as clock inputs or outputs. We treat * the "output_enable" bit as a gate, even though it's really just - * enabling clock output. + * enabling clock output. Initially the gate bits are cleared, as + * otherwise the exclusive configuration gets locked in the setup done + * by software running before the clock driver, with no way to change + * it. */ + writel(readl(base + 0x160) & ~0x3c00, base + 0x160); clk[IMX6QDL_CLK_LVDS1_GATE] = imx_clk_gate_exclusive("lvds1_gate", "lvds1_sel", base + 0x160, 10, BIT(12)); clk[IMX6QDL_CLK_LVDS2_GATE] = imx_clk_gate_exclusive("lvds2_gate", "lvds2_sel", base + 0x160, 11, BIT(13));
From: Will Deacon will.deacon@arm.com
[ Upstream commit 33309ecda0070506c49182530abe7728850ebe78 ]
The dcache_by_line_op macro suffers from a couple of small problems:
First, the GAS directives that are currently being used rely on assembler behavior that is not documented, and probably not guaranteed to produce the correct behavior going forward. As a result, we end up with some undefined symbols in cache.o:
$ nm arch/arm64/mm/cache.o ... U civac ... U cvac U cvap U cvau
This is due to the fact that the comparisons used to select the operation type in the dcache_by_line_op macro are comparing symbols not strings, and even though it seems that GAS is doing the right thing here (undefined symbols by the same name are equal to each other), it seems unwise to rely on this.
Second, when patching in a DC CVAP instruction on CPUs that support it, the fallback path consists of a DC CVAU instruction which may be affected by CPU errata that require ARM64_WORKAROUND_CLEAN_CACHE.
Solve these issues by unrolling the various maintenance routines and using the conditional directives that are documented as operating on strings. To avoid the complexity of nested alternatives, we move the DC CVAP patching to __clean_dcache_area_pop, falling back to a branch to __clean_dcache_area_poc if DCPOP is not supported by the CPU.
Reported-by: Ard Biesheuvel ard.biesheuvel@linaro.org Suggested-by: Robin Murphy robin.murphy@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/assembler.h | 30 ++++++++++++++++++------------ arch/arm64/mm/cache.S | 3 +++ 2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6142402c2eb4..08b216c200c9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -377,27 +377,33 @@ alternative_endif * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ + .macro __dcache_op_workaround_clean_cache, op, kaddr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .endm + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 dcache_line_size \tmp1, \tmp2 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr -alternative_else - dc civac, \kaddr -alternative_endif - .elseif (\op == cvap) -alternative_if ARM64_HAS_DCPOP - sys 3, c7, c12, 1, \kaddr // dc cvap -alternative_else - dc cvac, \kaddr -alternative_endif + .ifc \op, cvau + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvac + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvap + sys 3, c7, c12, 1, \kaddr // dc cvap .else dc \op, \kaddr .endif + .endif + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 0c22ede52f90..a194fd0e837f 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -212,6 +212,9 @@ ENDPROC(__dma_clean_area) * - size - size in question */ ENTRY(__clean_dcache_area_pop) + alternative_if_not ARM64_HAS_DCPOP + b __clean_dcache_area_poc + alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret ENDPIPROC(__clean_dcache_area_pop)
From: Jiong Wang jiong.wang@netronome.com
[ Upstream commit e434b8cdf788568ba65a0a0fd9f3cb41f3ca1803 ]
Currently, the destination register is marked as unknown for 32-bit sub-register move (BPF_MOV | BPF_ALU) whenever the source register type is SCALAR_VALUE.
This is too conservative that some valid cases will be rejected. Especially, this may turn a constant scalar value into unknown value that could break some assumptions of verifier.
For example, test_l4lb_noinline.c has the following C code:
struct real_definition *dst
1: if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) 2: return TC_ACT_SHOT; 3: 4: if (dst->flags & F_IPV6) {
get_packet_dst is responsible for initializing "dst" into valid pointer and return true (1), otherwise return false (0). The compiled instruction sequence using alu32 will be:
412: (54) (u32) r7 &= (u32) 1 413: (bc) (u32) r0 = (u32) r7 414: (95) exit
insn 413, a BPF_MOV | BPF_ALU, however will turn r0 into unknown value even r7 contains SCALAR_VALUE 1.
This causes trouble when verifier is walking the code path that hasn't initialized "dst" inside get_packet_dst, for which case 0 is returned and we would then expect verifier concluding line 1 in the above C code pass the "if" check, therefore would skip fall through path starting at line 4. Now, because r0 returned from callee has became unknown value, so verifier won't skip analyzing path starting at line 4 and "dst->flags" requires dereferencing the pointer "dst" which actually hasn't be initialized for this path.
This patch relaxed the code marking sub-register move destination. For a SCALAR_VALUE, it is safe to just copy the value from source then truncate it into 32-bit.
A unit test also included to demonstrate this issue. This test will fail before this patch.
This relaxation could let verifier skipping more paths for conditional comparison against immediate. It also let verifier recording a more accurate/strict value for one register at one state, if this state end up with going through exit without rejection and it is used for state comparison later, then it is possible an inaccurate/permissive value is better. So the real impact on verifier processed insn number is complex. But in all, without this fix, valid program could be rejected.
From real benchmarking on kernel selftests and Cilium bpf tests, there is
no impact on processed instruction number when tests ares compiled with default compilation options. There is slightly improvements when they are compiled with -mattr=+alu32 after this patch.
Also, test_xdp_noinline/-mattr=+alu32 now passed verification. It is rejected before this fix.
Insn processed before/after this patch:
default -mattr=+alu32
Kernel selftest
=== test_xdp.o 371/371 369/369 test_l4lb.o 6345/6345 5623/5623 test_xdp_noinline.o 2971/2971 rejected/2727 test_tcp_estates.o 429/429 430/430
Cilium bpf === bpf_lb-DLB_L3.o: 2085/2085 1685/1687 bpf_lb-DLB_L4.o: 2287/2287 1986/1982 bpf_lb-DUNKNOWN.o: 690/690 622/622 bpf_lxc.o: 95033/95033 N/A bpf_netdev.o: 7245/7245 N/A bpf_overlay.o: 2898/2898 3085/2947
NOTE: - bpf_lxc.o and bpf_netdev.o compiled by -mattr=+alu32 are rejected by verifier due to another issue inside verifier on supporting alu32 binary. - Each cilium bpf program could generate several processed insn number, above number is sum of them.
v1->v2: - Restrict the change on SCALAR_VALUE. - Update benchmark numbers on Cilium bpf tests.
Signed-off-by: Jiong Wang jiong.wang@netronome.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/bpf/verifier.c | 16 ++++++++++++---- tools/testing/selftests/bpf/test_verifier.c | 13 +++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a81f52b2c92e..eedc7bd4185d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3571,12 +3571,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return err;
if (BPF_SRC(insn->code) == BPF_X) { + struct bpf_reg_state *src_reg = regs + insn->src_reg; + struct bpf_reg_state *dst_reg = regs + insn->dst_reg; + if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 * copy register state to dest reg */ - regs[insn->dst_reg] = regs[insn->src_reg]; - regs[insn->dst_reg].live |= REG_LIVE_WRITTEN; + *dst_reg = *src_reg; + dst_reg->live |= REG_LIVE_WRITTEN; } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { @@ -3584,9 +3587,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) "R%d partial copy of pointer\n", insn->src_reg); return -EACCES; + } else if (src_reg->type == SCALAR_VALUE) { + *dst_reg = *src_reg; + dst_reg->live |= REG_LIVE_WRITTEN; + } else { + mark_reg_unknown(env, regs, + insn->dst_reg); } - mark_reg_unknown(env, regs, insn->dst_reg); - coerce_reg_to_size(®s[insn->dst_reg], 4); + coerce_reg_to_size(dst_reg, 4); } } else { /* case: R = imm diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f8eac4a544f4..444f49176a2d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2903,6 +2903,19 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "alu32: mov u32 const", + .insns = { + BPF_MOV32_IMM(BPF_REG_7, 0), + BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), + BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, { "unpriv: partial copy of pointer", .insns = {
From: Minas Harutyunyan minas.harutyunyan@synopsys.com
[ Upstream commit 4fe4f9fecc36956fd53c8edf96dd0c691ef98ff9 ]
Disabling all EP's allow to reset EP's to initial state. Introduced new function dwc2_hsotg_ep_disable_lock() which before calling dwc2_hsotg_ep_disable() function acquire hsotg->lock and release on exiting.
From dwc2_hsotg_ep_disable() function removed acquiring
hsotg->lock. In dwc2_hsotg_core_init_disconnected() function when USB reset interrupt asserted disabling all ep’s by dwc2_hsotg_ep_disable() function. This updates eliminating sparse imbalance warnings.
Reverted changes in dwc2_hostg_disconnect() function. Introduced new function dwc2_hsotg_ep_disable_lock(). Changed dwc2_hsotg_ep_ops. Now disable point to dwc2_hsotg_ep_disable_lock() function. In functions dwc2_hsotg_udc_stop() and dwc2_hsotg_suspend() dwc2_hsotg_ep_disable() function replaced by dwc2_hsotg_ep_disable_lock() function. In dwc2_hsotg_ep_disable() function removed acquiring of hsotg->lock.
Fixes: dccf1bad4be7 ("usb: dwc2: Disable all EP's on disconnect") Signed-off-by: Minas Harutyunyan hminas@synopsys.com Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/usb/dwc2/gadget.c | 41 ++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 2d6d2c8244de..a00a56b4ae79 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3165,8 +3165,6 @@ static void kill_all_requests(struct dwc2_hsotg *hsotg, dwc2_hsotg_txfifo_flush(hsotg, ep->fifo_index); }
-static int dwc2_hsotg_ep_disable(struct usb_ep *ep); - /** * dwc2_hsotg_disconnect - disconnect service * @hsotg: The device state. @@ -3188,9 +3186,11 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg) /* all endpoints should be shutdown */ for (ep = 0; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); + kill_all_requests(hsotg, hsotg->eps_in[ep], + -ESHUTDOWN); if (hsotg->eps_out[ep]) - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); + kill_all_requests(hsotg, hsotg->eps_out[ep], + -ESHUTDOWN); }
call_gadget(hsotg, disconnect); @@ -3234,6 +3234,7 @@ static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic) GINTSTS_PTXFEMP | \ GINTSTS_RXFLVL)
+static int dwc2_hsotg_ep_disable(struct usb_ep *ep); /** * dwc2_hsotg_core_init - issue softreset to the core * @hsotg: The device state @@ -4069,10 +4070,8 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) struct dwc2_hsotg *hsotg = hs_ep->parent; int dir_in = hs_ep->dir_in; int index = hs_ep->index; - unsigned long flags; u32 epctrl_reg; u32 ctrl; - int locked;
dev_dbg(hsotg->dev, "%s(ep %p)\n", __func__, ep);
@@ -4088,10 +4087,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
- locked = spin_is_locked(&hsotg->lock); - if (!locked) - spin_lock_irqsave(&hsotg->lock, flags); - ctrl = dwc2_readl(hsotg, epctrl_reg);
if (ctrl & DXEPCTL_EPENA) @@ -4114,12 +4109,22 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep) hs_ep->fifo_index = 0; hs_ep->fifo_size = 0;
- if (!locked) - spin_unlock_irqrestore(&hsotg->lock, flags); - return 0; }
+static int dwc2_hsotg_ep_disable_lock(struct usb_ep *ep) +{ + struct dwc2_hsotg_ep *hs_ep = our_ep(ep); + struct dwc2_hsotg *hsotg = hs_ep->parent; + unsigned long flags; + int ret; + + spin_lock_irqsave(&hsotg->lock, flags); + ret = dwc2_hsotg_ep_disable(ep); + spin_unlock_irqrestore(&hsotg->lock, flags); + return ret; +} + /** * on_list - check request is on the given endpoint * @ep: The endpoint to check. @@ -4267,7 +4272,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
static const struct usb_ep_ops dwc2_hsotg_ep_ops = { .enable = dwc2_hsotg_ep_enable, - .disable = dwc2_hsotg_ep_disable, + .disable = dwc2_hsotg_ep_disable_lock, .alloc_request = dwc2_hsotg_ep_alloc_request, .free_request = dwc2_hsotg_ep_free_request, .queue = dwc2_hsotg_ep_queue_lock, @@ -4407,9 +4412,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) /* all endpoints should be shutdown */ for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); }
spin_lock_irqsave(&hsotg->lock, flags); @@ -4857,9 +4862,9 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
for (ep = 0; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) - dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep); + dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) - dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep); + dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep); } }
From: Reinette Chatre reinette.chatre@intel.com
[ Upstream commit 52eb74339a6233c69f4e3794b69ea7c98eeeae1b ]
rdt_find_domain() returns an ERR_PTR() that is generated from a provided domain id when the value is negative.
Care needs to be taken when creating an ERR_PTR() from this value because a subsequent check using IS_ERR() expects the error to be within the MAX_ERRNO range. Using an invalid domain id as an ERR_PTR() does work at this time since this is currently always -1. Using this undocumented assumption is fragile since future users of rdt_find_domain() may not be aware of thus assumption.
Two related issues are addressed:
- Ensure that rdt_find_domain() always returns a valid error value by forcing the error to be -ENODEV when a negative domain id is provided.
- In a few instances the return value of rdt_find_domain() is just checked for NULL - fix these to include a check of ERR_PTR.
Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Fixes: 521348b011d6 ("x86/intel_rdt: Introduce utility to obtain CDP peer") Signed-off-by: Reinette Chatre reinette.chatre@intel.com Signed-off-by: Borislav Petkov bp@suse.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: gavin.hindman@intel.com Cc: jithu.joseph@intel.com Cc: x86-ml x86@kernel.org Link: https://lkml.kernel.org/r/b88cd4ff6a75995bf8db9b0ea546908fe50f69f3.154447985... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/cpu/intel_rdt.c | 2 +- arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 2 +- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 44272b7107ad..2d0a565fd0bb 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -421,7 +421,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, struct list_head *l;
if (id < 0) - return ERR_PTR(id); + return ERR_PTR(-ENODEV);
list_for_each(l, &r->domains) { d = list_entry(l, struct rdt_domain, list); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index efa4a519f5e5..c8b72aff55e0 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -467,7 +467,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
r = &rdt_resources_all[resid]; d = rdt_find_domain(r, domid, NULL); - if (!d) { + if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; goto out; } diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index f27b8115ffa2..951c61367688 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1029,7 +1029,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, * peer RDT CDP resource. Hence the WARN. */ _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); - if (WARN_ON(!_d_cdp)) { + if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; ret = -EINVAL; }
From: Masahiro Yamada yamada.masahiro@socionext.com
[ Upstream commit 77c1c0fa8b1477c5799bdad65026ea5ff676da44 ]
Currently, warn_ignore_character() displays invalid file name and line number.
The lexer should use current_file->name and yylineno, while the parser should use zconf_curname() and zconf_lineno().
This difference comes from that the lexer is always going ahead of the parser. The parser needs to look ahead one token to make a shift/reduce decision, so the lexer is requested to scan more text from the input file.
This commit fixes the warning message from warn_ignored_character().
[Test Code]
----(Kconfig begin)---- / -----(Kconfig end)-----
[Output]
Before the fix:
<none>:0:warning: ignoring unsupported character '/'
After the fix:
Kconfig:1:warning: ignoring unsupported character '/'
Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Signed-off-by: Sasha Levin sashal@kernel.org --- scripts/kconfig/zconf.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 25bd2b89fe3f..eeac64ccc730 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -73,7 +73,7 @@ static void warn_ignored_character(char chr) { fprintf(stderr, "%s:%d:warning: ignoring unsupported character '%c'\n", - zconf_curname(), zconf_lineno(), chr); + current_file->name, yylineno, chr); } %}
From: Masahiro Yamada yamada.masahiro@socionext.com
[ Upstream commit fbac5977d81cb2b2b7e37b11c459055d9585273c ]
An unterminated string literal followed by new line is passed to the parser (with "multi-line strings not supported" warning shown), then handled properly there.
On the other hand, an unterminated string literal at end of file is never passed to the parser, then results in memory leak.
[Test Code]
----------(Kconfig begin)---------- source "Kconfig.inc"
config A bool "a" -----------(Kconfig end)-----------
--------(Kconfig.inc begin)-------- config B bool "b\No new line at end of file ---------(Kconfig.inc end)---------
[Summary from Valgrind]
Before the fix:
LEAK SUMMARY: definitely lost: 16 bytes in 1 blocks ...
After the fix:
LEAK SUMMARY: definitely lost: 0 bytes in 0 blocks ...
Eliminate the memory leak path by handling this case. Of course, such a Kconfig file is wrong already, so I will add an error message later.
Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Signed-off-by: Sasha Levin sashal@kernel.org --- scripts/kconfig/zconf.l | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index eeac64ccc730..c2f577d71964 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -221,6 +221,8 @@ n [A-Za-z0-9_-] } <<EOF>> { BEGIN(INITIAL); + yylval.string = text; + return T_WORD_QUOTE; } }
From: Evan Green evgreen@chromium.org
[ Upstream commit db23d88756abd38e0995ea8449d0025b3de4b26b ]
adc5_get_dt_data uses a local, prop, feeds it to adc5_get_dt_channel_data, and then puts the result into adc->chan_props. The problem is adc5_get_dt_channel_data may not initialize that structure fully, so a garbage value is used for prescale if the optional "qcom,pre-scaling" is not defined in DT. adc5_read_raw then uses this as an array index, generating a crash that looks like this:
[ 6.683186] Unable to handle kernel paging request at virtual address ffffff90e78c7964 Call trace: qcom_vadc_scale_code_voltage_factor+0x74/0x104 qcom_vadc_scale_hw_calib_die_temp+0x20/0x60 qcom_adc5_hw_scale+0x78/0xa4 adc5_read_raw+0x3d0/0x65c iio_channel_read+0x240/0x30c iio_read_channel_processed+0x10c/0x150 qpnp_tm_get_temp+0xc0/0x40c of_thermal_get_temp+0x7c/0x98 thermal_zone_get_temp+0xac/0xd8 thermal_zone_device_update+0xc0/0x38c qpnp_tm_probe+0x624/0x81c platform_drv_probe+0xe4/0x11c really_probe+0x188/0x3fc driver_probe_device+0xb8/0x188 __device_attach_driver+0x114/0x180 bus_for_each_drv+0xd8/0x118 __device_attach+0x180/0x27c device_initial_probe+0x20/0x2c bus_probe_device+0x78/0x124 deferred_probe_work_func+0xfc/0x138 process_one_work+0x3d8/0x8b0 process_scheduled_works+0x48/0x6c worker_thread+0x488/0x7cc kthread+0x24c/0x264 ret_from_fork+0x10/0x18
Unfortunately, when I went to add the initializer for this and tried to boot it, my machine shut down immediately, complaining that it was hotter than the sun. It appears that adc5_chans_pmic and adc5_chans_rev2 were initializing prescale_index as if it were directly a divisor, rather than the index into adc5_prescale_ratios that it is.
Fix the uninitialized value, and change the static initialization to use indices into adc5_prescale_ratios.
Signed-off-by: Evan Green evgreen@chromium.org Reviewed-by: Matthias Kaehlcke mka@chromium.org Cc: Stable@vger.kernel.org Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/adc/qcom-spmi-adc5.c | 58 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 27 deletions(-)
diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c index f9af6b082916..6a866cc187f7 100644 --- a/drivers/iio/adc/qcom-spmi-adc5.c +++ b/drivers/iio/adc/qcom-spmi-adc5.c @@ -423,6 +423,7 @@ struct adc5_channels { enum vadc_scale_fn_type scale_fn_type; };
+/* In these definitions, _pre refers to an index into adc5_prescale_ratios. */ #define ADC5_CHAN(_dname, _type, _mask, _pre, _scale) \ { \ .datasheet_name = _dname, \ @@ -443,63 +444,63 @@ struct adc5_channels { _pre, _scale) \
static const struct adc5_channels adc5_chans_pmic[ADC5_MAX_CHANNEL] = { - [ADC5_REF_GND] = ADC5_CHAN_VOLT("ref_gnd", 1, + [ADC5_REF_GND] = ADC5_CHAN_VOLT("ref_gnd", 0, SCALE_HW_CALIB_DEFAULT) - [ADC5_1P25VREF] = ADC5_CHAN_VOLT("vref_1p25", 1, + [ADC5_1P25VREF] = ADC5_CHAN_VOLT("vref_1p25", 0, SCALE_HW_CALIB_DEFAULT) - [ADC5_VPH_PWR] = ADC5_CHAN_VOLT("vph_pwr", 3, + [ADC5_VPH_PWR] = ADC5_CHAN_VOLT("vph_pwr", 1, SCALE_HW_CALIB_DEFAULT) - [ADC5_VBAT_SNS] = ADC5_CHAN_VOLT("vbat_sns", 3, + [ADC5_VBAT_SNS] = ADC5_CHAN_VOLT("vbat_sns", 1, SCALE_HW_CALIB_DEFAULT) - [ADC5_DIE_TEMP] = ADC5_CHAN_TEMP("die_temp", 1, + [ADC5_DIE_TEMP] = ADC5_CHAN_TEMP("die_temp", 0, SCALE_HW_CALIB_PMIC_THERM) - [ADC5_USB_IN_I] = ADC5_CHAN_VOLT("usb_in_i_uv", 1, + [ADC5_USB_IN_I] = ADC5_CHAN_VOLT("usb_in_i_uv", 0, SCALE_HW_CALIB_DEFAULT) - [ADC5_USB_IN_V_16] = ADC5_CHAN_VOLT("usb_in_v_div_16", 16, + [ADC5_USB_IN_V_16] = ADC5_CHAN_VOLT("usb_in_v_div_16", 8, SCALE_HW_CALIB_DEFAULT) - [ADC5_CHG_TEMP] = ADC5_CHAN_TEMP("chg_temp", 1, + [ADC5_CHG_TEMP] = ADC5_CHAN_TEMP("chg_temp", 0, SCALE_HW_CALIB_PM5_CHG_TEMP) /* Charger prescales SBUx and MID_CHG to fit within 1.8V upper unit */ - [ADC5_SBUx] = ADC5_CHAN_VOLT("chg_sbux", 3, + [ADC5_SBUx] = ADC5_CHAN_VOLT("chg_sbux", 1, SCALE_HW_CALIB_DEFAULT) - [ADC5_MID_CHG_DIV6] = ADC5_CHAN_VOLT("chg_mid_chg", 6, + [ADC5_MID_CHG_DIV6] = ADC5_CHAN_VOLT("chg_mid_chg", 3, SCALE_HW_CALIB_DEFAULT) - [ADC5_XO_THERM_100K_PU] = ADC5_CHAN_TEMP("xo_therm", 1, + [ADC5_XO_THERM_100K_PU] = ADC5_CHAN_TEMP("xo_therm", 0, SCALE_HW_CALIB_XOTHERM) - [ADC5_AMUX_THM1_100K_PU] = ADC5_CHAN_TEMP("amux_thm1_100k_pu", 1, + [ADC5_AMUX_THM1_100K_PU] = ADC5_CHAN_TEMP("amux_thm1_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM2_100K_PU] = ADC5_CHAN_TEMP("amux_thm2_100k_pu", 1, + [ADC5_AMUX_THM2_100K_PU] = ADC5_CHAN_TEMP("amux_thm2_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM3_100K_PU] = ADC5_CHAN_TEMP("amux_thm3_100k_pu", 1, + [ADC5_AMUX_THM3_100K_PU] = ADC5_CHAN_TEMP("amux_thm3_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM2] = ADC5_CHAN_TEMP("amux_thm2", 1, + [ADC5_AMUX_THM2] = ADC5_CHAN_TEMP("amux_thm2", 0, SCALE_HW_CALIB_PM5_SMB_TEMP) };
static const struct adc5_channels adc5_chans_rev2[ADC5_MAX_CHANNEL] = { - [ADC5_REF_GND] = ADC5_CHAN_VOLT("ref_gnd", 1, + [ADC5_REF_GND] = ADC5_CHAN_VOLT("ref_gnd", 0, SCALE_HW_CALIB_DEFAULT) - [ADC5_1P25VREF] = ADC5_CHAN_VOLT("vref_1p25", 1, + [ADC5_1P25VREF] = ADC5_CHAN_VOLT("vref_1p25", 0, SCALE_HW_CALIB_DEFAULT) - [ADC5_VPH_PWR] = ADC5_CHAN_VOLT("vph_pwr", 3, + [ADC5_VPH_PWR] = ADC5_CHAN_VOLT("vph_pwr", 1, SCALE_HW_CALIB_DEFAULT) - [ADC5_VBAT_SNS] = ADC5_CHAN_VOLT("vbat_sns", 3, + [ADC5_VBAT_SNS] = ADC5_CHAN_VOLT("vbat_sns", 1, SCALE_HW_CALIB_DEFAULT) - [ADC5_VCOIN] = ADC5_CHAN_VOLT("vcoin", 3, + [ADC5_VCOIN] = ADC5_CHAN_VOLT("vcoin", 1, SCALE_HW_CALIB_DEFAULT) - [ADC5_DIE_TEMP] = ADC5_CHAN_TEMP("die_temp", 1, + [ADC5_DIE_TEMP] = ADC5_CHAN_TEMP("die_temp", 0, SCALE_HW_CALIB_PMIC_THERM) - [ADC5_AMUX_THM1_100K_PU] = ADC5_CHAN_TEMP("amux_thm1_100k_pu", 1, + [ADC5_AMUX_THM1_100K_PU] = ADC5_CHAN_TEMP("amux_thm1_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM2_100K_PU] = ADC5_CHAN_TEMP("amux_thm2_100k_pu", 1, + [ADC5_AMUX_THM2_100K_PU] = ADC5_CHAN_TEMP("amux_thm2_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM3_100K_PU] = ADC5_CHAN_TEMP("amux_thm3_100k_pu", 1, + [ADC5_AMUX_THM3_100K_PU] = ADC5_CHAN_TEMP("amux_thm3_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM4_100K_PU] = ADC5_CHAN_TEMP("amux_thm4_100k_pu", 1, + [ADC5_AMUX_THM4_100K_PU] = ADC5_CHAN_TEMP("amux_thm4_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_AMUX_THM5_100K_PU] = ADC5_CHAN_TEMP("amux_thm5_100k_pu", 1, + [ADC5_AMUX_THM5_100K_PU] = ADC5_CHAN_TEMP("amux_thm5_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) - [ADC5_XO_THERM_100K_PU] = ADC5_CHAN_TEMP("xo_therm_100k_pu", 1, + [ADC5_XO_THERM_100K_PU] = ADC5_CHAN_TEMP("xo_therm_100k_pu", 0, SCALE_HW_CALIB_THERM_100K_PULLUP) };
@@ -558,6 +559,9 @@ static int adc5_get_dt_channel_data(struct adc5_chip *adc, return ret; } prop->prescale = ret; + } else { + prop->prescale = + adc->data->adc_chans[prop->channel].prescale_index; }
ret = of_property_read_u32(node, "qcom,hw-settle-time", &value);
From: Jonas Danielsson jonas@orbital-systems.com
[ Upstream commit ae460c115b7aa50c9a36cf78fced07b27962c9d0 ]
On our AT91SAM9260 board we use the same sdio bus for wifi and for the sd card slot. This caused the atmel-mci to give the following splat on the serial console:
------------[ cut here ]------------ WARNING: CPU: 0 PID: 538 at drivers/mmc/host/atmel-mci.c:859 atmci_send_command+0x24/0x44 Modules linked in: CPU: 0 PID: 538 Comm: mmcqd/0 Not tainted 4.14.76 #14 Hardware name: Atmel AT91SAM9 [<c000fccc>] (unwind_backtrace) from [<c000d3dc>] (show_stack+0x10/0x14) [<c000d3dc>] (show_stack) from [<c0017644>] (__warn+0xd8/0xf4) [<c0017644>] (__warn) from [<c0017704>] (warn_slowpath_null+0x1c/0x24) [<c0017704>] (warn_slowpath_null) from [<c033bb9c>] (atmci_send_command+0x24/0x44) [<c033bb9c>] (atmci_send_command) from [<c033e984>] (atmci_start_request+0x1f4/0x2dc) [<c033e984>] (atmci_start_request) from [<c033f3b4>] (atmci_request+0xf0/0x164) [<c033f3b4>] (atmci_request) from [<c0327108>] (mmc_start_request+0x280/0x2d0) [<c0327108>] (mmc_start_request) from [<c032800c>] (mmc_start_areq+0x230/0x330) [<c032800c>] (mmc_start_areq) from [<c03366f8>] (mmc_blk_issue_rw_rq+0xc4/0x310) [<c03366f8>] (mmc_blk_issue_rw_rq) from [<c03372c4>] (mmc_blk_issue_rq+0x118/0x5ac) [<c03372c4>] (mmc_blk_issue_rq) from [<c033781c>] (mmc_queue_thread+0xc4/0x118) [<c033781c>] (mmc_queue_thread) from [<c002daf8>] (kthread+0x100/0x118) [<c002daf8>] (kthread) from [<c000a580>] (ret_from_fork+0x14/0x34) ---[ end trace 594371ddfa284bd6 ]---
This is: WARN_ON(host->cmd);
This was fixed on our board by letting atmci_request_end determine what state we are in. Instead of unconditionally setting it to STATE_IDLE on STATE_END_REQUEST.
Signed-off-by: Jonas Danielsson jonas@orbital-systems.com Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/mmc/host/atmel-mci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index be53044086c7..fbc56ee99682 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1954,13 +1954,14 @@ static void atmci_tasklet_func(unsigned long priv) }
atmci_request_end(host, host->mrq); - state = STATE_IDLE; + goto unlock; /* atmci_request_end() sets host->state */ break; } } while (state != prev_state);
host->state = state;
+unlock: spin_unlock(&host->lock); }
From: Qu Wenruo wqu@suse.com
[ Upstream commit 5eb193812a42dc49331f25137a38dfef9612d3e4 ]
Enhance btrfs_verify_dev_extents() to remember previous checked dev extents, so it can verify no dev extents can overlap.
Analysis from Hans:
"Imagine allocating a DATA|DUP chunk.
In the chunk allocator, we first set... max_stripe_size = SZ_1G; max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE ... which is 10GiB.
Then... /* we don't want a chunk larger than 10% of writeable space */ max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size);
Imagine we only have one 7880MiB block device in this filesystem. Now max_chunk_size is down to 788MiB.
The next step in the code is to search for max_stripe_size * dev_stripes amount of free space on the device, which is in our example 1GiB * 2 = 2GiB. Imagine the device has exactly 1578MiB free in one contiguous piece. This amount of bytes will be put in devices_info[ndevs - 1].max_avail
Next we recalculate the stripe_size (which is actually the device extent length), based on the actual maximum amount of available raw disk space: stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
stripe_size is now 789MiB
Next we do... data_stripes = num_stripes / ncopies ...where data_stripes ends up as 1, because num_stripes is 2 (the amount of device extents we're going to have), and DUP has ncopies 2.
Next there's a check... if (stripe_size * data_stripes > max_chunk_size) ...which matches because 789MiB * 1 > 788MiB.
We go into the if code, and next is... stripe_size = div_u64(max_chunk_size, data_stripes); ...which resets stripe_size to max_chunk_size: 788MiB
Next is a fun one... /* bump the answer up to a 16MB boundary */ stripe_size = round_up(stripe_size, SZ_16M); ...which changes stripe_size from 788MiB to 800MiB.
We're not done changing stripe_size yet... /* But don't go higher than the limits we found while searching * for free extents */ stripe_size = min(devices_info[ndevs - 1].max_avail, stripe_size);
This is bad. max_avail is twice the stripe_size (we need to fit 2 device extents on the same device for DUP).
The result here is that 800MiB < 1578MiB, so it's unchanged. However, the resulting DUP chunk will need 1600MiB disk space, which isn't there, and the second dev_extent might extend into the next thing (next dev_extent? end of device?) for 22MiB.
The last shown line of code relies on a situation where there's twice the value of stripe_size present as value for the variable stripe_size when it's DUP. This was actually the case before commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling", from which I quote: "[...] in the meantime there's a check to see if the stripe_size does not exceed max_chunk_size. Since during this check stripe_size is twice the amount as intended, the check will reduce the stripe_size to max_chunk_size if the actual correct to be used stripe_size is more than half the amount of max_chunk_size."
In the previous version of the code, the 16MiB alignment (why is this done, by the way?) would result in a 50% chance that it would actually do an 8MiB alignment for the individual dev_extents, since it was operating on double the size. Does this matter?
Does it matter that stripe_size can be set to anything which is not 16MiB aligned because of the amount of remaining available disk space which is just taken?
What is the main purpose of this round_up?
The most straightforward thing to do seems something like... stripe_size = min( div_u64(devices_info[ndevs - 1].max_avail, dev_stripes), stripe_size ) ..just putting half of the max_avail into stripe_size."
Link: https://lore.kernel.org/linux-btrfs/b3461a38-e5f8-f41d-c67c-2efac8129054@men... Reported-by: Hans van Kranenburg hans.van.kranenburg@mendix.com Signed-off-by: Qu Wenruo wqu@suse.com [ add analysis from report ] Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/volumes.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f435d397019e..a567ee0bf060 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7478,6 +7478,8 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) struct btrfs_path *path; struct btrfs_root *root = fs_info->dev_root; struct btrfs_key key; + u64 prev_devid = 0; + u64 prev_dev_ext_end = 0; int ret = 0;
key.objectid = 1; @@ -7522,10 +7524,22 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); physical_len = btrfs_dev_extent_length(leaf, dext);
+ /* Check if this dev extent overlaps with the previous one */ + if (devid == prev_devid && physical_offset < prev_dev_ext_end) { + btrfs_err(fs_info, +"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu", + devid, physical_offset, prev_dev_ext_end); + ret = -EUCLEAN; + goto out; + } + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, physical_offset, physical_len); if (ret < 0) goto out; + prev_devid = devid; + prev_dev_ext_end = physical_offset + physical_len; + ret = btrfs_next_item(root, path); if (ret < 0) goto out;
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
However, the same calculation reappears a bit later, and that one was not changed yet. The resulting bug that is exposed is that the newly allocated device extents ('stripes') can have a few MiB overlap with the next thing stored after them, which is another device extent or the end of the disk.
The scenario in which this can happen is: * The block device for the filesystem is less than 10GiB in size. * The amount of contiguous free unallocated disk space chosen to use for chunk allocation is 20% of the total device size, or a few MiB more or less.
An example: - The filesystem device is 7880MiB (max_chunk_size gets set to 788MiB) - There's 1578MiB unallocated raw disk space left in one contiguous piece.
In this case stripe_size is first calculated as 789MiB, (half of 1578MiB).
Since 789MiB (stripe_size * data_stripes) > 788MiB (max_chunk_size), we enter the if block. Now stripe_size value is immediately overwritten while calculating an adjusted value based on max_chunk_size, which ends up as 788MiB.
Next, the value is rounded up to a 16MiB boundary, 800MiB, which is actually more than the value we had before. However, the last comparison fails to detect this, because it's comparing the value with the total amount of free space, which is about twice the size of stripe_size.
In the example above, this means that the resulting raw disk space being allocated is 1600MiB, while only a gap of 1578MiB has been found. The second device extent object for this DUP chunk will overlap for 22MiB with whatever comes next.
The underlying problem here is that the stripe_size is reused all the time for different things. So, when entering the code in the if block, stripe_size is immediately overwritten with something else. If later we decide we want to have the previous value back, then the logic to compute it was copy pasted in again.
With this change, the value in stripe_size is not unnecessarily destroyed, so the duplicated calculation is not needed any more.
Signed-off-by: Hans van Kranenburg hans.van.kranenburg@mendix.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/volumes.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a567ee0bf060..1797a82eb7df 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4768,19 +4768,17 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, /* * Use the number of data stripes to figure out how big this chunk * is really going to be in terms of logical address space, - * and compare that answer with the max chunk size + * and compare that answer with the max chunk size. If it's higher, + * we try to reduce stripe_size. */ if (stripe_size * data_stripes > max_chunk_size) { - stripe_size = div_u64(max_chunk_size, data_stripes); - - /* bump the answer up to a 16MB boundary */ - stripe_size = round_up(stripe_size, SZ_16M); - /* - * But don't go higher than the limits we found while searching - * for free extents + * Reduce stripe_size, round it up to a 16MB boundary again and + * then use it, unless it ends up being even bigger than the + * previous value we had already. */ - stripe_size = min(devices_info[ndevs - 1].max_avail, + stripe_size = min(round_up(div_u64(max_chunk_size, + data_stripes), SZ_16M), stripe_size); }
Hi Sasha,
On 1/8/19 8:25 PM, Sasha Levin wrote:
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
That one also ended up as:
4.14-stable 0136bd7238b2cb8238426af4183ed0b02165c3f9
4.9-stable 8890bae03f4dba1c2292e5445682b556af4e8f1b
4.4-stable 97c3e46ef53748278286fc09dcc30b138d6677c4
3.16.57-rc1 f68f46284a199f6837c1d5b94a6ae979a2cc463c
While hitting the failure condition without adding "crafting" steps to make it exactly match the scenario is unlikely, it might be good if we just go all the way back with this regression fix?
Thanks, Hans
However, the same calculation reappears a bit later, and that one was not changed yet. The resulting bug that is exposed is that the newly allocated device extents ('stripes') can have a few MiB overlap with the next thing stored after them, which is another device extent or the end of the disk.
The scenario in which this can happen is:
- The block device for the filesystem is less than 10GiB in size.
- The amount of contiguous free unallocated disk space chosen to use for chunk allocation is 20% of the total device size, or a few MiB more or less.
An example:
- The filesystem device is 7880MiB (max_chunk_size gets set to 788MiB)
- There's 1578MiB unallocated raw disk space left in one contiguous piece.
In this case stripe_size is first calculated as 789MiB, (half of 1578MiB).
Since 789MiB (stripe_size * data_stripes) > 788MiB (max_chunk_size), we enter the if block. Now stripe_size value is immediately overwritten while calculating an adjusted value based on max_chunk_size, which ends up as 788MiB.
Next, the value is rounded up to a 16MiB boundary, 800MiB, which is actually more than the value we had before. However, the last comparison fails to detect this, because it's comparing the value with the total amount of free space, which is about twice the size of stripe_size.
In the example above, this means that the resulting raw disk space being allocated is 1600MiB, while only a gap of 1578MiB has been found. The second device extent object for this DUP chunk will overlap for 22MiB with whatever comes next.
The underlying problem here is that the stripe_size is reused all the time for different things. So, when entering the code in the if block, stripe_size is immediately overwritten with something else. If later we decide we want to have the previous value back, then the logic to compute it was copy pasted in again.
With this change, the value in stripe_size is not unnecessarily destroyed, so the duplicated calculation is not needed any more.
Signed-off-by: Hans van Kranenburg hans.van.kranenburg@mendix.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/volumes.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a567ee0bf060..1797a82eb7df 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4768,19 +4768,17 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, /* * Use the number of data stripes to figure out how big this chunk * is really going to be in terms of logical address space,
* and compare that answer with the max chunk size
* and compare that answer with the max chunk size. If it's higher,
*/ if (stripe_size * data_stripes > max_chunk_size) {* we try to reduce stripe_size.
stripe_size = div_u64(max_chunk_size, data_stripes);
/* bump the answer up to a 16MB boundary */
stripe_size = round_up(stripe_size, SZ_16M);
- /*
* But don't go higher than the limits we found while searching
* for free extents
* Reduce stripe_size, round it up to a 16MB boundary again and
* then use it, unless it ends up being even bigger than the
*/* previous value we had already.
stripe_size = min(devices_info[ndevs - 1].max_avail,
stripe_size = min(round_up(div_u64(max_chunk_size,
}data_stripes), SZ_16M), stripe_size);
On Tue, Jan 08, 2019 at 11:52:02PM +0000, Hans van Kranenburg wrote:
Hi Sasha,
On 1/8/19 8:25 PM, Sasha Levin wrote:
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
That one also ended up as:
4.14-stable 0136bd7238b2cb8238426af4183ed0b02165c3f9
4.9-stable 8890bae03f4dba1c2292e5445682b556af4e8f1b
4.4-stable 97c3e46ef53748278286fc09dcc30b138d6677c4
3.16.57-rc1 f68f46284a199f6837c1d5b94a6ae979a2cc463c
While hitting the failure condition without adding "crafting" steps to make it exactly match the scenario is unlikely, it might be good if we just go all the way back with this regression fix?
What do you mean with "all the way back"?
-- Thanks, Sasha
On 1/23/19 3:37 PM, Sasha Levin wrote:
On Tue, Jan 08, 2019 at 11:52:02PM +0000, Hans van Kranenburg wrote:
Hi Sasha,
On 1/8/19 8:25 PM, Sasha Levin wrote:
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
That one also ended up as:
4.14-stable 0136bd7238b2cb8238426af4183ed0b02165c3f9
4.9-stable 8890bae03f4dba1c2292e5445682b556af4e8f1b
4.4-stable 97c3e46ef53748278286fc09dcc30b138d6677c4
3.16.57-rc1 f68f46284a199f6837c1d5b94a6ae979a2cc463c
While hitting the failure condition without adding "crafting" steps to make it exactly match the scenario is unlikely, it might be good if we just go all the way back with this regression fix?
What do you mean with "all the way back"?
Oh, apologies for not using unambigious phrasing.
I mean, it seems the autoselection only found 92e222df7b in places where it's actually called 92e222df7b, and not where it was cherry-picked.
So, for my own understanding: If I have to do something like this ever again, then should I have added it like this inside baf92114c?
Fixes: 92e222df7b ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 0136bd7238 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 8890bae03f ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 97c3e46ef5 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: f68f46284a ("btrfs: alloc_chunk: fix DUP stripe size handling")
Thanks for your patience, :)
On Wed, Jan 23, 2019 at 03:54:00PM +0000, Hans van Kranenburg wrote:
On 1/23/19 3:37 PM, Sasha Levin wrote:
On Tue, Jan 08, 2019 at 11:52:02PM +0000, Hans van Kranenburg wrote:
Hi Sasha,
On 1/8/19 8:25 PM, Sasha Levin wrote:
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
That one also ended up as:
4.14-stable 0136bd7238b2cb8238426af4183ed0b02165c3f9
4.9-stable 8890bae03f4dba1c2292e5445682b556af4e8f1b
4.4-stable 97c3e46ef53748278286fc09dcc30b138d6677c4
3.16.57-rc1 f68f46284a199f6837c1d5b94a6ae979a2cc463c
While hitting the failure condition without adding "crafting" steps to make it exactly match the scenario is unlikely, it might be good if we just go all the way back with this regression fix?
What do you mean with "all the way back"?
Oh, apologies for not using unambigious phrasing.
I mean, it seems the autoselection only found 92e222df7b in places where it's actually called 92e222df7b, and not where it was cherry-picked.
So, for my own understanding: If I have to do something like this ever again, then should I have added it like this inside baf92114c?
Fixes: 92e222df7b ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 0136bd7238 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 8890bae03f ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 97c3e46ef5 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: f68f46284a ("btrfs: alloc_chunk: fix DUP stripe size handling")
The Fixes: tag should always refer to a commit id in Linus' tree, the stable tree backports use that for common reference so the individual commit ids of stable trees are not relevant.
On Wed, Jan 23, 2019 at 03:54:00PM +0000, Hans van Kranenburg wrote:
On 1/23/19 3:37 PM, Sasha Levin wrote:
On Tue, Jan 08, 2019 at 11:52:02PM +0000, Hans van Kranenburg wrote:
Hi Sasha,
On 1/8/19 8:25 PM, Sasha Levin wrote:
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
That one also ended up as:
4.14-stable 0136bd7238b2cb8238426af4183ed0b02165c3f9
4.9-stable 8890bae03f4dba1c2292e5445682b556af4e8f1b
4.4-stable 97c3e46ef53748278286fc09dcc30b138d6677c4
3.16.57-rc1 f68f46284a199f6837c1d5b94a6ae979a2cc463c
While hitting the failure condition without adding "crafting" steps to make it exactly match the scenario is unlikely, it might be good if we just go all the way back with this regression fix?
What do you mean with "all the way back"?
Oh, apologies for not using unambigious phrasing.
I mean, it seems the autoselection only found 92e222df7b in places where it's actually called 92e222df7b, and not where it was cherry-picked.
So, for my own understanding: If I have to do something like this ever again, then should I have added it like this inside baf92114c?
Fixes: 92e222df7b ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 0136bd7238 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 8890bae03f ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 97c3e46ef5 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: f68f46284a ("btrfs: alloc_chunk: fix DUP stripe size handling")
Thanks for your patience, :)
Ah, the scripts have enough "brains" to deal with these on their own, so no need to annotate that much.
This patch wasn't applied to older trees because it didn't cherry-pick cleanly on top of them. Looking at it now, it seems to depend on 793ff2c88c6 ("btrfs: volumes: Cleanup stripe size calculation") which can possibly be picked up if it makes sense.
-- Thanks, Sasha
(Add to Cc: Ben Hutchings)
On 1/23/19 7:18 PM, Sasha Levin wrote:
On Wed, Jan 23, 2019 at 03:54:00PM +0000, Hans van Kranenburg wrote:
On 1/23/19 3:37 PM, Sasha Levin wrote:
On Tue, Jan 08, 2019 at 11:52:02PM +0000, Hans van Kranenburg wrote:
Hi Sasha,
On 1/8/19 8:25 PM, Sasha Levin wrote:
From: Hans van Kranenburg hans.van.kranenburg@mendix.com
[ Upstream commit baf92114c7e6dd6124aa3d506e4bc4b694da3bc3 ]
Commit 92e222df7b "btrfs: alloc_chunk: fix DUP stripe size handling" fixed calculating the stripe_size for a new DUP chunk.
That one also ended up as:
4.14-stable 0136bd7238b2cb8238426af4183ed0b02165c3f9
4.9-stable 8890bae03f4dba1c2292e5445682b556af4e8f1b
4.4-stable 97c3e46ef53748278286fc09dcc30b138d6677c4
3.16.57-rc1 f68f46284a199f6837c1d5b94a6ae979a2cc463c
While hitting the failure condition without adding "crafting" steps to make it exactly match the scenario is unlikely, it might be good if we just go all the way back with this regression fix?
What do you mean with "all the way back"?
Oh, apologies for not using unambigious phrasing.
I mean, it seems the autoselection only found 92e222df7b in places where it's actually called 92e222df7b, and not where it was cherry-picked.
So, for my own understanding: If I have to do something like this ever again, then should I have added it like this inside baf92114c?
Fixes: 92e222df7b ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 0136bd7238 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 8890bae03f ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: 97c3e46ef5 ("btrfs: alloc_chunk: fix DUP stripe size handling") Fixes: f68f46284a ("btrfs: alloc_chunk: fix DUP stripe size handling")
Thanks for your patience, :)
Ah, the scripts have enough "brains" to deal with these on their own, so no need to annotate that much.
This patch wasn't applied to older trees because it didn't cherry-pick cleanly on top of them. Looking at it now, it seems to depend on 793ff2c88c6 ("btrfs: volumes: Cleanup stripe size calculation") which can possibly be picked up if it makes sense.
Ok, I get it.
The changes are really limited to the few lines in that if block. And 793ff2c88c6 also is, and it doesn't change the behavior, so that's good.
For 3.16.y it also first needs a little part of b8b93addde from David Sterba, which is a collection of coding style changes in quite some places. After that 793ff2c88c6 and then baf92114c7 apply cleanly, only touch that single part of the code and end up with the right thing.
I attached 3.16.y-WIP-partially-apply-b8b93addde.patch as quick and dirty example, please let me know how this should be done properly.
For 4.4, 4.9 and 4.14, first 793ff2c88c6 and then baf92114c7 indeed also end up with the right thing. I just tried that here.
Thanks,
On Wed, 2019-01-23 at 19:32 +0000, Hans van Kranenburg wrote: [...]
For 3.16.y it also first needs a little part of b8b93addde from David Sterba, which is a collection of coding style changes in quite some places. After that 793ff2c88c6 and then baf92114c7 apply cleanly, only touch that single part of the code and end up with the right thing.
I attached 3.16.y-WIP-partially-apply-b8b93addde.patch as quick and dirty example, please let me know how this should be done properly.
For 4.4, 4.9 and 4.14, first 793ff2c88c6 and then baf92114c7 indeed also end up with the right thing. I just tried that here.
I've belatedly queued up the required changes for 3.16, thanks.
Ben.
From: Anand Jain anand.jain@oracle.com
[ Upstream commit d189dd70e2556181732598956d808ea53cc8774e ]
The device replace cancel thread can race with the replace start thread and if fs_info::scrubs_running is not yet set, btrfs_scrub_cancel() will fail to stop the scrub thread.
The scrub thread continues with the scrub for replace which then will try to write to the target device and which is already freed by the cancel thread.
scrub_setup_ctx() warns as tgtdev is NULL.
struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) { ... if (is_dev_replace) { WARN_ON(!fs_info->dev_replace.tgtdev); <=== sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; sctx->flush_all_writes = false; }
[ 6724.497655] BTRFS info (device sdb): dev_replace from /dev/sdb (devid 1) to /dev/sdc started [ 6753.945017] BTRFS info (device sdb): dev_replace from /dev/sdb (devid 1) to /dev/sdc canceled [ 6852.426700] WARNING: CPU: 0 PID: 4494 at fs/btrfs/scrub.c:622 scrub_setup_ctx.isra.19+0x220/0x230 [btrfs] ... [ 6852.428928] RIP: 0010:scrub_setup_ctx.isra.19+0x220/0x230 [btrfs] ... [ 6852.432970] Call Trace: [ 6852.433202] btrfs_scrub_dev+0x19b/0x5c0 [btrfs] [ 6852.433471] btrfs_dev_replace_start+0x48c/0x6a0 [btrfs] [ 6852.433800] btrfs_dev_replace_by_ioctl+0x3a/0x60 [btrfs] [ 6852.434097] btrfs_ioctl+0x2476/0x2d20 [btrfs] [ 6852.434365] ? do_sigaction+0x7d/0x1e0 [ 6852.434623] do_vfs_ioctl+0xa9/0x6c0 [ 6852.434865] ? syscall_trace_enter+0x1c8/0x310 [ 6852.435124] ? syscall_trace_enter+0x1c8/0x310 [ 6852.435387] ksys_ioctl+0x60/0x90 [ 6852.435663] __x64_sys_ioctl+0x16/0x20 [ 6852.435907] do_syscall_64+0x50/0x180 [ 6852.436150] entry_SYSCALL_64_after_hwframe+0x49/0xbe
Further, as the replace thread enters scrub_write_page_to_dev_replace() without the target device it panics:
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage) { ... bio_set_dev(bio, sbio->dev->bdev); <======
[ 6929.715145] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 .. [ 6929.717106] Workqueue: btrfs-scrub btrfs_scrub_helper [btrfs] [ 6929.717420] RIP: 0010:scrub_write_page_to_dev_replace+0xb4/0x260 [btrfs] .. [ 6929.721430] Call Trace: [ 6929.721663] scrub_write_block_to_dev_replace+0x3f/0x60 [btrfs] [ 6929.721975] scrub_bio_end_io_worker+0x1af/0x490 [btrfs] [ 6929.722277] normal_work_helper+0xf0/0x4c0 [btrfs] [ 6929.722552] process_one_work+0x1f4/0x520 [ 6929.722805] ? process_one_work+0x16e/0x520 [ 6929.723063] worker_thread+0x46/0x3d0 [ 6929.723313] kthread+0xf8/0x130 [ 6929.723544] ? process_one_work+0x520/0x520 [ 6929.723800] ? kthread_delayed_work_timer_fn+0x80/0x80 [ 6929.724081] ret_from_fork+0x3a/0x50
Fix this by letting the btrfs_dev_replace_finishing() to do the job of cleaning after the cancel, including freeing of the target device. btrfs_dev_replace_finishing() is called when btrfs_scub_dev() returns along with the scrub return status.
Signed-off-by: Anand Jain anand.jain@oracle.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/dev-replace.c | 63 +++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 22 deletions(-)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 2aa48aecc52b..f6e9bf9f9a69 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -797,39 +797,58 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; btrfs_dev_replace_write_unlock(dev_replace); - goto leave; + break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: + result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; + tgt_device = dev_replace->tgtdev; + src_device = dev_replace->srcdev; + btrfs_dev_replace_write_unlock(dev_replace); + btrfs_scrub_cancel(fs_info); + /* btrfs_dev_replace_finishing() will handle the cleanup part */ + btrfs_info_in_rcu(fs_info, + "dev_replace from %s (devid %llu) to %s canceled", + btrfs_dev_name(src_device), src_device->devid, + btrfs_dev_name(tgt_device)); + break; case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: + /* + * Scrub doing the replace isn't running so we need to do the + * cleanup step of btrfs_dev_replace_finishing() here + */ result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; tgt_device = dev_replace->tgtdev; src_device = dev_replace->srcdev; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; - break; - } - dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; - dev_replace->time_stopped = ktime_get_real_seconds(); - dev_replace->item_needs_writeback = 1; - btrfs_dev_replace_write_unlock(dev_replace); - btrfs_scrub_cancel(fs_info); + dev_replace->replace_state = + BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; + dev_replace->time_stopped = ktime_get_real_seconds(); + dev_replace->item_needs_writeback = 1;
- trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); - return PTR_ERR(trans); - } - ret = btrfs_commit_transaction(trans); - WARN_ON(ret); + btrfs_dev_replace_write_unlock(dev_replace);
- btrfs_info_in_rcu(fs_info, - "dev_replace from %s (devid %llu) to %s canceled", - btrfs_dev_name(src_device), src_device->devid, - btrfs_dev_name(tgt_device)); + btrfs_scrub_cancel(fs_info); + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); + return PTR_ERR(trans); + } + ret = btrfs_commit_transaction(trans); + WARN_ON(ret);
- if (tgt_device) - btrfs_destroy_dev_replace_tgtdev(tgt_device); + btrfs_info_in_rcu(fs_info, + "suspended dev_replace from %s (devid %llu) to %s canceled", + btrfs_dev_name(src_device), src_device->devid, + btrfs_dev_name(tgt_device)); + + if (tgt_device) + btrfs_destroy_dev_replace_tgtdev(tgt_device); + break; + default: + result = -EINVAL; + }
-leave: mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return result; }
From: Filipe Manana fdmanana@suse.com
[ Upstream commit 9a6f209e36500efac51528132a3e3083586eda5f ]
If the quota enable and snapshot creation ioctls are called concurrently we can get into a deadlock where the task enabling quotas will deadlock on the fs_info->qgroup_ioctl_lock mutex because it attempts to lock it twice, or the task creating a snapshot tries to commit the transaction while the task enabling quota waits for the former task to commit the transaction while holding the mutex. The following time diagrams show how both cases happen.
First scenario:
CPU 0 CPU 1
btrfs_ioctl() btrfs_ioctl_quota_ctl() btrfs_quota_enable() mutex_lock(fs_info->qgroup_ioctl_lock) btrfs_start_transaction()
btrfs_ioctl() btrfs_ioctl_snap_create_v2 create_snapshot() --> adds snapshot to the list pending_snapshots of the current transaction
btrfs_commit_transaction() create_pending_snapshots() create_pending_snapshot() qgroup_account_snapshot() btrfs_qgroup_inherit() mutex_lock(fs_info->qgroup_ioctl_lock) --> deadlock, mutex already locked by this task at btrfs_quota_enable()
Second scenario:
CPU 0 CPU 1
btrfs_ioctl() btrfs_ioctl_quota_ctl() btrfs_quota_enable() mutex_lock(fs_info->qgroup_ioctl_lock) btrfs_start_transaction()
btrfs_ioctl() btrfs_ioctl_snap_create_v2 create_snapshot() --> adds snapshot to the list pending_snapshots of the current transaction
btrfs_commit_transaction() --> waits for task at CPU 0 to release its transaction handle
btrfs_commit_transaction() --> sees another task started the transaction commit first --> releases its transaction handle --> waits for the transaction commit to be completed by the task at CPU 1
create_pending_snapshot() qgroup_account_snapshot() btrfs_qgroup_inherit() mutex_lock(fs_info->qgroup_ioctl_lock) --> deadlock, task at CPU 0 has the mutex locked but it is waiting for us to finish the transaction commit
So fix this by setting the quota enabled flag in fs_info after committing the transaction at btrfs_quota_enable(). This ends up serializing quota enable and snapshot creation as if the snapshot creation happened just before the quota enable request. The quota rescan task, scheduled after committing the transaction in btrfs_quote_enable(), will do the accounting.
Fixes: 6426c7ad697d ("btrfs: qgroup: Fix qgroup accounting when creating snapshot") Signed-off-by: Filipe Manana fdmanana@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/qgroup.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f70825af6438..9e419f6878c5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1013,16 +1013,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) btrfs_abort_transaction(trans, ret); goto out_free_path; } - spin_lock(&fs_info->qgroup_lock); - fs_info->quota_root = quota_root; - set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - spin_unlock(&fs_info->qgroup_lock);
ret = btrfs_commit_transaction(trans); trans = NULL; if (ret) goto out_free_path;
+ /* + * Set quota enabled flag after committing the transaction, to avoid + * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot + * creation. + */ + spin_lock(&fs_info->qgroup_lock); + fs_info->quota_root = quota_root; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + spin_unlock(&fs_info->qgroup_lock); + ret = qgroup_rescan_init(fs_info, 0, 1); if (!ret) { qgroup_rescan_zero_tracking(fs_info);
From: Filipe Manana fdmanana@suse.com
[ Upstream commit 5a8067c0d17feb7579db0476191417b441a8996e ]
The available allocation bits members from struct btrfs_fs_info are protected by a sequence lock, and when starting balance we access them incorrectly in two different ways:
1) In the read sequence lock loop at btrfs_balance() we use the values we read from fs_info->avail_*_alloc_bits and we can immediately do actions that have side effects and can not be undone (printing a message and jumping to a label). This is wrong because a retry might be needed, so our actions must not have side effects and must be repeatable as long as read_seqretry() returns a non-zero value. In other words, we were essentially ignoring the sequence lock;
2) Right below the read sequence lock loop, we were reading the values from avail_metadata_alloc_bits and avail_data_alloc_bits without any protection from concurrent writers, that is, reading them outside of the read sequence lock critical section.
So fix this by making sure we only read the available allocation bits while in a read sequence lock critical section and that what we do in the critical section is repeatable (has nothing that can not be undone) so that any eventual retry that is needed is handled properly.
Fixes: de98ced9e743 ("Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits") Fixes: 14506127979a ("btrfs: fix a bogus warning when converting only data or metadata") Reviewed-by: Nikolay Borisov nborisov@suse.com Signed-off-by: Filipe Manana fdmanana@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/volumes.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1797a82eb7df..ea5fa9df9405 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3724,6 +3724,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, int ret; u64 num_devices; unsigned seq; + bool reducing_integrity;
if (btrfs_fs_closing(fs_info) || atomic_read(&fs_info->balance_pause_req) || @@ -3803,24 +3804,30 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, !(bctl->sys.target & allowed)) || ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && (fs_info->avail_metadata_alloc_bits & allowed) && - !(bctl->meta.target & allowed))) { - if (bctl->flags & BTRFS_BALANCE_FORCE) { - btrfs_info(fs_info, - "balance: force reducing metadata integrity"); - } else { - btrfs_err(fs_info, - "balance: reduces metadata integrity, use --force if you want this"); - ret = -EINVAL; - goto out; - } - } + !(bctl->meta.target & allowed))) + reducing_integrity = true; + else + reducing_integrity = false; + + /* if we're not converting, the target field is uninitialized */ + meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->meta.target : fs_info->avail_metadata_alloc_bits; + data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->data.target : fs_info->avail_data_alloc_bits; } while (read_seqretry(&fs_info->profiles_lock, seq));
- /* if we're not converting, the target field is uninitialized */ - meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? - bctl->meta.target : fs_info->avail_metadata_alloc_bits; - data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ? - bctl->data.target : fs_info->avail_data_alloc_bits; + if (reducing_integrity) { + if (bctl->flags & BTRFS_BALANCE_FORCE) { + btrfs_info(fs_info, + "balance: force reducing metadata integrity"); + } else { + btrfs_err(fs_info, + "balance: reduces metadata integrity, use --force if you want this"); + ret = -EINVAL; + goto out; + } + } + if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) < btrfs_get_num_tolerated_disk_barrier_failures(data_target)) { int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
From: Johannes Thumshirn jthumshirn@suse.de
[ Upstream commit 1690dd41e0cb1dade80850ed8a3eb0121b96d22f ]
In the error handling block, err holds the return value of either btrfs_del_root_ref() or btrfs_del_inode_ref() but it hasn't been checked since it's introduction with commit fe66a05a0679 (Btrfs: improve error handling for btrfs_insert_dir_item callers) in 2012.
If the error handling in the error handling fails, there's not much left to do and the abort either happened earlier in the callees or is necessary here.
So if one of btrfs_del_root_ref() or btrfs_del_inode_ref() failed, abort the transaction, but still return the original code of the failure stored in 'ret' as this will be reported to the user.
Signed-off-by: Johannes Thumshirn jthumshirn@suse.de Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9ea4c6f0352f..ff9268a60eaf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6406,14 +6406,19 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, err = btrfs_del_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, &local_index, name, name_len); - + if (err) + btrfs_abort_transaction(trans, err); } else if (add_backref) { u64 local_index; int err;
err = btrfs_del_inode_ref(trans, root, name, name_len, ino, parent_ino, &local_index); + if (err) + btrfs_abort_transaction(trans, err); } + + /* Return the original error code */ return ret; }
From: Sergey Senozhatsky sergey.senozhatsky.work@gmail.com
[ Upstream commit d72402145ace0697a6a9e8e75a3de5bf3375f78d ]
LKP has hit yet another circular locking dependency between uart console drivers and debugobjects [1]:
CPU0 CPU1
rhltable_init() __init_work() debug_object_init uart_shutdown() /* db->lock */ /* uart_port->lock */ debug_print_object() free_page() printk() call_console_drivers() debug_check_no_obj_freed() /* uart_port->lock */ /* db->lock */ debug_print_object()
So there are two dependency chains: uart_port->lock -> db->lock And db->lock -> uart_port->lock
This particular circular locking dependency can be addressed in several ways:
a) One way would be to move debug_print_object() out of db->lock scope and, thus, break the db->lock -> uart_port->lock chain. b) Another one would be to free() transmit buffer page out of db->lock in UART code; which is what this patch does.
It makes sense to apply a) and b) independently: there are too many things going on behind free(), none of which depend on uart_port->lock.
The patch fixes transmit buffer page free() in uart_shutdown() and, additionally, in uart_port_startup() (as was suggested by Dmitry Safonov).
[1] https://lore.kernel.org/lkml/20181211091154.GL23332@shao2-debian/T/#u Signed-off-by: Sergey Senozhatsky sergey.senozhatsky@gmail.com Reviewed-by: Petr Mladek pmladek@suse.com Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Jiri Slaby jslaby@suse.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Waiman Long longman@redhat.com Cc: Dmitry Safonov dima@arista.com Cc: Steven Rostedt rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/tty/serial/serial_core.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index c439a5a1e6c0..d4cca5bdaf1c 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -205,10 +205,15 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, if (!state->xmit.buf) { state->xmit.buf = (unsigned char *) page; uart_circ_clear(&state->xmit); + uart_port_unlock(uport, flags); } else { + uart_port_unlock(uport, flags); + /* + * Do not free() the page under the port lock, see + * uart_shutdown(). + */ free_page(page); } - uart_port_unlock(uport, flags);
retval = uport->ops->startup(uport); if (retval == 0) { @@ -268,6 +273,7 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) struct uart_port *uport = uart_port_check(state); struct tty_port *port = &state->port; unsigned long flags = 0; + char *xmit_buf = NULL;
/* * Set the TTY IO error marker @@ -298,14 +304,18 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) tty_port_set_suspended(port, 0);
/* - * Free the transmit buffer page. + * Do not free() the transmit buffer page under the port lock since + * this can create various circular locking scenarios. For instance, + * console driver may need to allocate/free a debug object, which + * can endup in printk() recursion. */ uart_port_lock(state, flags); - if (state->xmit.buf) { - free_page((unsigned long)state->xmit.buf); - state->xmit.buf = NULL; - } + xmit_buf = state->xmit.buf; + state->xmit.buf = NULL; uart_port_unlock(uport, flags); + + if (xmit_buf) + free_page((unsigned long)xmit_buf); }
/**
From: Adrian Hunter adrian.hunter@intel.com
[ Upstream commit 0631ca3a6e6edd23a2ca7cab707d1abf291a097d ]
Fix following warnings:
event-parse.c: In function ‘tep_find_event_by_name’: event-parse.c:3521:21: warning: ‘event’ may be used uninitialized in this function [-Wmaybe-uninitialized] pevent->last_event = event; ~~~~~~~~~~~~~~~~~~~^~~~~~~ CC ui/gtk/hists.o LINK plugin_mac80211.so CC nlattr.o event-parse.c: In function ‘tep_data_lat_fmt’: event-parse.c:5200:4: warning: ‘migrate_disable’ may be used uninitialized in this function [-Wmaybe-uninitialized] trace_seq_printf(s, "%d", migrate_disable); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ event-parse.c:5207:4: warning: ‘lock_depth’ may be used uninitialized in this function [-Wmaybe-uninitialized] trace_seq_printf(s, "%d", lock_depth); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LINK plugin_sched_switch.so LINK plugin_function.so LINK plugin_xen.so event-parse.c: In function ‘tep_event_info’: event-parse.c:5047:7: warning: ‘len_arg’ may be used uninitialized in this function [-Wmaybe-uninitialized] trace_seq_printf(s, format, len_arg, (char)val); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ event-parse.c:4884:6: note: ‘len_arg’ was declared here int len_arg; ^~~~~~~ event-parse.c:4338:11: warning: ‘vsize’ may be used uninitialized in this function [-Wmaybe-uninitialized] val = tep_read_number(pevent, bptr, vsize); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ event-parse.c:4224:6: note: ‘vsize’ was declared here int vsize; ^~~~~
$ gcc --version gcc (Clear Linux OS for Intel Architecture) 8.2.1 20180502
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Steven Rostedt (VMware) rostedt@goodmis.org Cc: Tzvetomir Stoyanov (VMware) tz.stoyanov@gmail.com Link: http://lkml.kernel.org/r/20181122112937.10582-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/lib/traceevent/event-parse.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 3692f29fee46..fbd6d6813fab 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3498,7 +3498,7 @@ struct tep_event_format * tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name) { - struct tep_event_format *event; + struct tep_event_format *event = NULL; int i;
if (pevent->last_event && @@ -4221,7 +4221,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s unsigned long long ip, val; char *ptr; void *bptr; - int vsize; + int vsize = 0;
field = pevent->bprint_buf_field; ip_field = pevent->bprint_ip_field; @@ -4881,7 +4881,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e char format[32]; int show_func; int len_as_arg; - int len_arg; + int len_arg = 0; int len; int ls;
@@ -5146,8 +5146,8 @@ void tep_data_lat_fmt(struct tep_handle *pevent, static int migrate_disable_exists; unsigned int lat_flags; unsigned int pc; - int lock_depth; - int migrate_disable; + int lock_depth = 0; + int migrate_disable = 0; int hardirq; int softirq; void *data = record->data;
From: Adrian Hunter adrian.hunter@intel.com
[ Upstream commit 1c6f709b9f96366cc47af23c05ecec9b8c0c392d ]
Users should never use 'pt=0', but if they do it may give a meaningless error:
$ perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u).
Fix that by forcing 'pt=1'.
Committer testing:
# perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). /bin/dmesg | grep -i perf may provide additional information.
# perf record -e intel_pt/pt=0/u uname pt=0 doesn't make sense, forcing pt=1 Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data ] #
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Tested-by: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Link: http://lkml.kernel.org/r/b7c5b4e5-9497-10e5-fd43-5f3e4a0fe51d@intel.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/arch/x86/util/intel-pt.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index db0ba8caf5a2..ba8ecaf52200 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, struct perf_evsel *evsel) { int err; + char c;
if (!evsel) return 0;
+ /* + * If supported, force pass-through config term (pt=1) even if user + * sets pt=0, which avoids senseless kernel errors. + */ + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + !(evsel->attr.config & 1)) { + pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); + evsel->attr.config |= 1; + } + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", "cyc_thresh", "caps/psb_cyc", evsel->attr.config);
From: Florian Fainelli f.fainelli@gmail.com
[ Upstream commit 24f967337f6d6bce931425769c0f5ff5cf2d212e ]
The breakpoint tests on the ARM 32-bit kernel are broken in several ways.
The breakpoint length requested does not necessarily match whether the function address has the Thumb bit (bit 0) set or not, and this does matter to the ARM kernel hw_breakpoint infrastructure. See [1] for background.
[1]: https://lkml.org/lkml/2018/11/15/205
As Will indicated, the overflow handling would require single-stepping which is not supported at the moment. Just disable those tests for the ARM 32-bit platforms and update the comment above to explain these limitations.
Co-developed-by: Will Deacon will.deacon@arm.com Signed-off-by: Florian Fainelli f.fainelli@gmail.com Signed-off-by: Will Deacon will.deacon@arm.com Acked-by: Jiri Olsa jolsa@redhat.com Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Namhyung Kim namhyung@kernel.org Cc: Peter Zijlstra peterz@infradead.org Link: http://lkml.kernel.org/r/20181203191138.2419-1-f.fainelli@gmail.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/tests/bp_signal.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index a467615c5a0e..910e25e64188 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -291,12 +291,20 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused
bool test__bp_signal_is_supported(void) { -/* - * The powerpc so far does not have support to even create - * instruction breakpoint using the perf event interface. - * Once it's there we can release this. - */ -#if defined(__powerpc__) || defined(__s390x__) + /* + * PowerPC and S390 do not support creation of instruction + * breakpoints using the perf_event interface. + * + * ARM requires explicit rounding down of the instruction + * pointer in Thumb mode, and then requires the single-step + * to be handled explicitly in the overflow handler to avoid + * stepping into the SIGIO handler and getting stuck on the + * breakpointed instruction. + * + * Just disable the test for these architectures until these + * issues are resolved. + */ +#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) return false; #else return true;
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit 2f5302533f306d5ee87bd375aef9ca35b91762cb ]
The strncpy() function may leave the destination string buffer unterminated, better use strlcpy() that we have a __weak fallback implementation for systems without it.
In this specific case this would only happen if fgets() was buggy, as its man page states that it should read one less byte than the size of the destination buffer, so that it can put the nul byte at the end of it, so it would never copy 255 non-nul chars, as fgets reads into the orig buffer at most 254 non-nul chars and terminates it. But lets just switch to strlcpy to keep the original intent and silence the gcc 8.2 warning.
This fixes this warning on an Alpine Linux Edge system with gcc 8.2:
In function 'cpu_model', inlined from 'svg_cpu_box' at util/svghelper.c:378:2: util/svghelper.c:337:5: error: 'strncpy' output may be truncated copying 255 bytes from a string of length 255 [-Werror=stringop-truncation] strncpy(cpu_m, &buf[13], 255); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@kernel.org Cc: Namhyung Kim namhyung@kernel.org Cc: Arjan van de Ven arjan@linux.intel.com Fixes: f48d55ce7871 ("perf: Add a SVG helper library file") Link: https://lkml.kernel.org/n/tip-xzkoo0gyr56gej39ltivuh9g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/svghelper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1cbada2dc6be..f735ee038713 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -334,7 +334,7 @@ static char *cpu_model(void) if (file) { while (fgets(buf, 255, file)) { if (strstr(buf, "model name")) { - strncpy(cpu_m, &buf[13], 255); + strlcpy(cpu_m, &buf[13], 255); break; } }
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit bd8d57fb7e25e9fcf67a9eef5fa13aabe2016e07 ]
The strncpy() function may leave the destination string buffer unterminated, better use strlcpy() that we have a __weak fallback implementation for systems without it.
This fixes this warning on an Alpine Linux Edge system with gcc 8.2:
util/parse-events.c: In function 'print_symbol_events': util/parse-events.c:2465:4: error: 'strncpy' specified bound 100 equals destination size [-Werror=stringop-truncation] strncpy(name, syms->symbol, MAX_NAME_LEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'print_symbol_events.constprop', inlined from 'print_events' at util/parse-events.c:2508:2: util/parse-events.c:2465:4: error: 'strncpy' specified bound 100 equals destination size [-Werror=stringop-truncation] strncpy(name, syms->symbol, MAX_NAME_LEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'print_symbol_events.constprop', inlined from 'print_events' at util/parse-events.c:2511:2: util/parse-events.c:2465:4: error: 'strncpy' specified bound 100 equals destination size [-Werror=stringop-truncation] strncpy(name, syms->symbol, MAX_NAME_LEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors
Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@kernel.org Cc: Namhyung Kim namhyung@kernel.org Fixes: 947b4ad1d198 ("perf list: Fix max event string size") Link: https://lkml.kernel.org/n/tip-b663e33bm6x8hrkie4uxh7u2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 59be3466d64d..920e1e6551dd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2462,7 +2462,7 @@ void print_symbol_events(const char *event_glob, unsigned type, if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else - strncpy(name, syms->symbol, MAX_NAME_LEN); + strlcpy(name, syms->symbol, MAX_NAME_LEN);
evt_list[evt_i] = strdup(name); if (evt_list[evt_i] == NULL)
From: Andi Kleen ak@linux.intel.com
[ Upstream commit 91b2b97025097ce7ca7536bc87eba2bf14760fb4 ]
Fix incorrect event names for the Load_Miss_Real_Latency metric for Skylake and Skylake Server.
Fixes https://github.com/andikleen/pmu-tools/issues/158
Before:
% perf stat -M Load_Miss_Real_Latency true event syntax error: '..ss.pending,mem_load_retired.l1_miss_ps,mem_load_retired.fb_hit_ps}:W' ___ parser error
Usage: perf stat [<options>] [<command>]
-M, --metrics <metric/metric group list> monitor specified metrics or metric groups (separated by ,)
After:
% perf stat -M Load_Miss_Real_Latency true
Performance counter stats for 'true':
279,204 l1d_pend_miss.pending # 14.0 Load_Miss_Real_Latency 4,784 mem_load_uops_retired.l1_miss 15,188 mem_load_uops_retired.hit_lfb
0.000899640 seconds time elapsed
Signed-off-by: Andi Kleen ak@linux.intel.com Acked-by: Jiri Olsa jolsa@kernel.org Tested-by: Arnaldo Carvalho de Melo acme@redhat.com Link: http://lkml.kernel.org/r/20181120050635.4215-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index 36c903faed0b..71e9737f4614 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -73,7 +73,7 @@ }, { "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 36c903faed0b..71e9737f4614 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -73,7 +73,7 @@ }, { "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" },
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 06aa151ad1fc74a49b45336672515774a678d78d ]
If same destination IP address config is already existing, that config is just used. MAC address also should be same. However, there is no MAC address checking routine. So that MAC address checking routine is added.
test commands: %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1 %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:21 --total-nodes 2 --local-node 1
After this patch, above commands are disallowed.
Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2c8d313ae216..e40e6795bd20 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -496,7 +496,8 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + return -EINVAL;
ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) {
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit b12f7bad5ad3724d19754390a3e80928525c0769 ]
When network namespace is destroyed, both clusterip_tg_destroy() and clusterip_net_exit() are called. and clusterip_net_exit() is called before clusterip_tg_destroy(). Hence cleanup check code in clusterip_net_exit() doesn't make sense.
test commands: %ip netns add vm1 %ip netns exec vm1 bash %ip link set lo up %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1 %exit %ip netns del vm1
splat looks like: [ 341.184508] WARNING: CPU: 1 PID: 87 at net/ipv4/netfilter/ipt_CLUSTERIP.c:840 clusterip_net_exit+0x319/0x380 [ipt_CLUSTERIP] [ 341.184850] Modules linked in: ipt_CLUSTERIP nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_tcpudp iptable_filter bpfilter ip_tables x_tables [ 341.184850] CPU: 1 PID: 87 Comm: kworker/u4:2 Not tainted 4.19.0-rc5+ #16 [ 341.227509] Workqueue: netns cleanup_net [ 341.227509] RIP: 0010:clusterip_net_exit+0x319/0x380 [ipt_CLUSTERIP] [ 341.227509] Code: 0f 85 7f fe ff ff 48 c7 c2 80 64 2c c0 be a8 02 00 00 48 c7 c7 a0 63 2c c0 c6 05 18 6e 00 00 01 e8 bc 38 ff f5 e9 5b fe ff ff <0f> 0b e9 33 ff ff ff e8 4b 90 50 f6 e9 2d fe ff ff 48 89 df e8 de [ 341.227509] RSP: 0018:ffff88011086f408 EFLAGS: 00010202 [ 341.227509] RAX: dffffc0000000000 RBX: 1ffff1002210de85 RCX: 0000000000000000 [ 341.227509] RDX: 1ffff1002210de85 RSI: ffff880110813be8 RDI: ffffed002210de58 [ 341.227509] RBP: ffff88011086f4d0 R08: 0000000000000000 R09: 0000000000000000 [ 341.227509] R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff1002210de81 [ 341.227509] R13: ffff880110625a48 R14: ffff880114cec8c8 R15: 0000000000000014 [ 341.227509] FS: 0000000000000000(0000) GS:ffff880116600000(0000) knlGS:0000000000000000 [ 341.227509] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 341.227509] CR2: 00007f11fd38e000 CR3: 000000013ca16000 CR4: 00000000001006e0 [ 341.227509] Call Trace: [ 341.227509] ? __clusterip_config_find+0x460/0x460 [ipt_CLUSTERIP] [ 341.227509] ? default_device_exit+0x1ca/0x270 [ 341.227509] ? remove_proc_entry+0x1cd/0x390 [ 341.227509] ? dev_change_net_namespace+0xd00/0xd00 [ 341.227509] ? __init_waitqueue_head+0x130/0x130 [ 341.227509] ops_exit_list.isra.10+0x94/0x140 [ 341.227509] cleanup_net+0x45b/0x900 [ ... ]
Fixes: 613d0776d3fe ("netfilter: exit_net cleanup check added") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e40e6795bd20..33491cb6b9d1 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -838,7 +838,6 @@ static void clusterip_net_exit(struct net *net) cn->procdir = NULL; #endif nf_unregister_net_hook(net, &cip_arp_ops); - WARN_ON_ONCE(!list_empty(&cn->configs)); }
static struct pernet_operations clusterip_net_ops = {
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 5a86d68bcf02f2d1e9a5897dd482079fd5f75e7f ]
When network namespace is destroyed, cleanup_net() is called. cleanup_net() holds pernet_ops_rwsem then calls each ->exit callback. So that clusterip_tg_destroy() is called by cleanup_net(). And clusterip_tg_destroy() calls unregister_netdevice_notifier().
But both cleanup_net() and clusterip_tg_destroy() hold same lock(pernet_ops_rwsem). hence deadlock occurrs.
After this patch, only 1 notifier is registered when module is inserted. And all of configs are added to per-net list.
test commands: %ip netns add vm1 %ip netns exec vm1 bash %ip link set lo up %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1 %exit %ip netns del vm1
splat looks like: [ 341.809674] ============================================ [ 341.809674] WARNING: possible recursive locking detected [ 341.809674] 4.19.0-rc5+ #16 Tainted: G W [ 341.809674] -------------------------------------------- [ 341.809674] kworker/u4:2/87 is trying to acquire lock: [ 341.809674] 000000005da2d519 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x8c/0x460 [ 341.809674] [ 341.809674] but task is already holding lock: [ 341.809674] 000000005da2d519 (pernet_ops_rwsem){++++}, at: cleanup_net+0x119/0x900 [ 341.809674] [ 341.809674] other info that might help us debug this: [ 341.809674] Possible unsafe locking scenario: [ 341.809674] [ 341.809674] CPU0 [ 341.809674] ---- [ 341.809674] lock(pernet_ops_rwsem); [ 341.809674] lock(pernet_ops_rwsem); [ 341.809674] [ 341.809674] *** DEADLOCK *** [ 341.809674] [ 341.809674] May be due to missing lock nesting notation [ 341.809674] [ 341.809674] 3 locks held by kworker/u4:2/87: [ 341.809674] #0: 00000000d9df6c92 ((wq_completion)"%s""netns"){+.+.}, at: process_one_work+0xafe/0x1de0 [ 341.809674] #1: 00000000c2cbcee2 (net_cleanup_work){+.+.}, at: process_one_work+0xb60/0x1de0 [ 341.809674] #2: 000000005da2d519 (pernet_ops_rwsem){++++}, at: cleanup_net+0x119/0x900 [ 341.809674] [ 341.809674] stack backtrace: [ 341.809674] CPU: 1 PID: 87 Comm: kworker/u4:2 Tainted: G W 4.19.0-rc5+ #16 [ 341.809674] Workqueue: netns cleanup_net [ 341.809674] Call Trace: [ ... ] [ 342.070196] down_write+0x93/0x160 [ 342.070196] ? unregister_netdevice_notifier+0x8c/0x460 [ 342.070196] ? down_read+0x1e0/0x1e0 [ 342.070196] ? sched_clock_cpu+0x126/0x170 [ 342.070196] ? find_held_lock+0x39/0x1c0 [ 342.070196] unregister_netdevice_notifier+0x8c/0x460 [ 342.070196] ? register_netdevice_notifier+0x790/0x790 [ 342.070196] ? __local_bh_enable_ip+0xe9/0x1b0 [ 342.070196] ? __local_bh_enable_ip+0xe9/0x1b0 [ 342.070196] ? clusterip_tg_destroy+0x372/0x650 [ipt_CLUSTERIP] [ 342.070196] ? trace_hardirqs_on+0x93/0x210 [ 342.070196] ? __bpf_trace_preemptirq_template+0x10/0x10 [ 342.070196] ? clusterip_tg_destroy+0x372/0x650 [ipt_CLUSTERIP] [ 342.123094] clusterip_tg_destroy+0x3ad/0x650 [ipt_CLUSTERIP] [ 342.123094] ? clusterip_net_init+0x3d0/0x3d0 [ipt_CLUSTERIP] [ 342.123094] ? cleanup_match+0x17d/0x200 [ip_tables] [ 342.123094] ? xt_unregister_table+0x215/0x300 [x_tables] [ 342.123094] ? kfree+0xe2/0x2a0 [ 342.123094] cleanup_entry+0x1d5/0x2f0 [ip_tables] [ 342.123094] ? cleanup_match+0x200/0x200 [ip_tables] [ 342.123094] __ipt_unregister_table+0x9b/0x1a0 [ip_tables] [ 342.123094] iptable_filter_net_exit+0x43/0x80 [iptable_filter] [ 342.123094] ops_exit_list.isra.10+0x94/0x140 [ 342.123094] cleanup_net+0x45b/0x900 [ ... ]
Fixes: 202f59afd441 ("netfilter: ipt_CLUSTERIP: do not hold dev") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 155 ++++++++++++++++------------- 1 file changed, 87 insertions(+), 68 deletions(-)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 33491cb6b9d1..fb1e7f237f53 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -57,17 +57,14 @@ struct clusterip_config { enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ struct rcu_head rcu; - + struct net *net; /* netns for pernet list */ char ifname[IFNAMSIZ]; /* device ifname */ - struct notifier_block notifier; /* refresh c->ifindex in it */ };
#ifdef CONFIG_PROC_FS static const struct file_operations clusterip_proc_fops; #endif
-static unsigned int clusterip_net_id __read_mostly; - struct clusterip_net { struct list_head configs; /* lock protects the configs list */ @@ -78,16 +75,30 @@ struct clusterip_net { #endif };
+static unsigned int clusterip_net_id __read_mostly; +static inline struct clusterip_net *clusterip_pernet(struct net *net) +{ + return net_generic(net, clusterip_net_id); +} + static inline void clusterip_config_get(struct clusterip_config *c) { refcount_inc(&c->refcount); }
- static void clusterip_config_rcu_free(struct rcu_head *head) { - kfree(container_of(head, struct clusterip_config, rcu)); + struct clusterip_config *config; + struct net_device *dev; + + config = container_of(head, struct clusterip_config, rcu); + dev = dev_get_by_name(config->net, config->ifname); + if (dev) { + dev_mc_del(dev, config->clustermac); + dev_put(dev); + } + kfree(config); }
static inline void @@ -101,9 +112,9 @@ clusterip_config_put(struct clusterip_config *c) * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ static inline void -clusterip_config_entry_put(struct net *net, struct clusterip_config *c) +clusterip_config_entry_put(struct clusterip_config *c) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(c->net);
local_bh_disable(); if (refcount_dec_and_lock(&c->entries, &cn->lock)) { @@ -118,8 +129,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) spin_unlock(&cn->lock); local_bh_enable();
- unregister_netdevice_notifier(&c->notifier); - return; } local_bh_enable(); @@ -129,7 +138,7 @@ static struct clusterip_config * __clusterip_config_find(struct net *net, __be32 clusterip) { struct clusterip_config *c; - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net);
list_for_each_entry_rcu(c, &cn->configs, list) { if (c->clusterip == clusterip) @@ -181,32 +190,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct clusterip_net *cn = clusterip_pernet(net); struct clusterip_config *c;
- c = container_of(this, struct clusterip_config, notifier); - switch (event) { - case NETDEV_REGISTER: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } - break; - case NETDEV_UNREGISTER: - if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; - } - break; - case NETDEV_CHANGENAME: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } else if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; + spin_lock_bh(&cn->lock); + list_for_each_entry_rcu(c, &cn->configs, list) { + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } else if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; } - break; } + spin_unlock_bh(&cn->lock);
return NOTIFY_DONE; } @@ -215,30 +229,44 @@ static struct clusterip_config * clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, __be32 ip, const char *iniface) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); struct clusterip_config *c; + struct net_device *dev; int err;
+ if (iniface[0] == '\0') { + pr_info("Please specify an interface name\n"); + return ERR_PTR(-EINVAL); + } + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return ERR_PTR(-ENOMEM);
- strcpy(c->ifname, iniface); - c->ifindex = -1; - c->clusterip = ip; + dev = dev_get_by_name(net, iniface); + if (!dev) { + pr_info("no such interface %s\n", iniface); + kfree(c); + return ERR_PTR(-ENOENT); + } + c->ifindex = dev->ifindex; + strcpy(c->ifname, dev->name); memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); + dev_mc_add(dev, c->clustermac); + dev_put(dev); + + c->clusterip = ip; c->num_total_nodes = i->num_total_nodes; clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; + c->net = net; refcount_set(&c->refcount, 1);
spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { - spin_unlock_bh(&cn->lock); - kfree(c); - - return ERR_PTR(-EBUSY); + err = -EBUSY; + goto out_config_put; }
list_add_rcu(&c->list, &cn->configs); @@ -260,22 +288,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, } #endif
- c->notifier.notifier_call = clusterip_netdev_event; - err = register_netdevice_notifier(&c->notifier); - if (!err) { - refcount_set(&c->entries, 1); - return c; - } + refcount_set(&c->entries, 1); + return c;
#ifdef CONFIG_PROC_FS - proc_remove(c->pde); err: #endif spin_lock_bh(&cn->lock); list_del_rcu(&c->list); +out_config_put: spin_unlock_bh(&cn->lock); clusterip_config_put(c); - return ERR_PTR(err); }
@@ -475,21 +498,6 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) &e->ip.dst.s_addr); return -EINVAL; } else { - struct net_device *dev; - - if (e->ip.iniface[0] == '\0') { - pr_info("Please specify an interface name\n"); - return -EINVAL; - } - - dev = dev_get_by_name(par->net, e->ip.iniface); - if (!dev) { - pr_info("no such interface %s\n", - e->ip.iniface); - return -ENOENT; - } - dev_put(dev); - config = clusterip_config_init(par->net, cipinfo, e->ip.dst.s_addr, e->ip.iniface); @@ -503,7 +511,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); - clusterip_config_entry_put(par->net, config); + clusterip_config_entry_put(config); clusterip_config_put(config); return ret; } @@ -525,7 +533,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
/* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ - clusterip_config_entry_put(par->net, cipinfo->config); + clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
@@ -807,7 +815,7 @@ static const struct file_operations clusterip_proc_fops = {
static int clusterip_net_init(struct net *net) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); int ret;
INIT_LIST_HEAD(&cn->configs); @@ -832,7 +840,7 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); #ifdef CONFIG_PROC_FS proc_remove(cn->procdir); cn->procdir = NULL; @@ -847,6 +855,10 @@ static struct pernet_operations clusterip_net_ops = { .size = sizeof(struct clusterip_net), };
+struct notifier_block cip_netdev_notifier = { + .notifier_call = clusterip_netdev_event +}; + static int __init clusterip_tg_init(void) { int ret; @@ -859,11 +871,17 @@ static int __init clusterip_tg_init(void) if (ret < 0) goto cleanup_subsys;
+ ret = register_netdevice_notifier(&cip_netdev_notifier); + if (ret < 0) + goto unregister_target; + pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION);
return 0;
+unregister_target: + xt_unregister_target(&clusterip_tg_reg); cleanup_subsys: unregister_pernet_subsys(&clusterip_net_ops); return ret; @@ -873,6 +891,7 @@ static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+ unregister_netdevice_notifier(&cip_netdev_notifier); xt_unregister_target(&clusterip_tg_reg); unregister_pernet_subsys(&clusterip_net_ops);
From: Hui Wang john.wanghui@huawei.com
[ Upstream commit aa02ef099cff042c2a9109782ec2bf1bffc955d4 ]
nr_cpu_ids can be limited on the command line via nr_cpus=. This can break the logical package management because it results in a smaller number of packages while in kdump kernel.
Check below case: There is a two sockets system, each socket has 8 cores, which has 16 logical cpus while HT was turn on.
0 1 2 3 4 5 6 7 | 16 17 18 19 20 21 22 23 cores on socket 0 threads on socket 0 8 9 10 11 12 13 14 15 | 24 25 26 27 28 29 30 31 cores on socket 1 threads on socket 1
While starting the kdump kernel with command line option nr_cpus=16 panic was triggered on one of the cpus 24-31 eg. 26, then online cpu will be 1-15, 26(cpu 0 was disabled in kdump), ncpus will be 16 and __max_logical_packages will be 1, but actually two packages were booted on.
This issue can reproduced by set kdump option nr_cpus=<real physical core numbers>, and then trigger panic on last socket's thread, for example:
taskset -c 26 echo c > /proc/sysrq-trigger
Use total_cpus which will not be limited by nr_cpus command line to calculate the value of __max_logical_packages.
Signed-off-by: Hui Wang john.wanghui@huawei.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Cc: guijianfeng@huawei.com Cc: wencongyang2@huawei.com Cc: douliyang1@huawei.com Cc: qiaonuohan@huawei.com Link: https://lkml.kernel.org/r/20181107023643.22174-1-john.wanghui@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a9134d1910b9..ccd1f2a8e557 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1347,7 +1347,7 @@ void __init calculate_max_logical_packages(void) * extrapolate the boot cpu's data to all packages. */ ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); - __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); pr_info("Max logical packages: %u\n", __max_logical_packages); }
From: AliOS system security alios_sys_security@linux.alibaba.com
[ Upstream commit 8d683dcd65c037efc9fb38c696ec9b65b306e573 ]
The iv_offset in the mapping table of crypt target is a 64bit number when IV algorithm is plain64, plain64be, essiv or benbi. It will be assigned to iv_offset of struct crypt_config, cc_sector of struct convert_context and iv_sector of struct dm_crypt_request. These structures members are defined as a sector_t. But sector_t is 32bit when CONFIG_LBDAF is not set in 32bit kernel. In this situation sector_t is not big enough to store the 64bit iv_offset.
Here is a reproducer. Prepare test image and device (loop is automatically allocated by cryptsetup):
# dd if=/dev/zero of=tst.img bs=1M count=1 # echo "tst"|cryptsetup open --type plain -c aes-xts-plain64 \ --skip 500000000000000000 tst.img test
On 32bit system (use IV offset value that overflows to 64bit; CONFIG_LBDAF if off) and device checksum is wrong:
# dmsetup table test --showkeys 0 2048 crypt aes-xts-plain64 dfa7cfe3c481f2239155739c42e539ae8f2d38f304dcc89d20b26f69daaf0933 3551657984 7:0 0
# sha256sum /dev/mapper/test 533e25c09176632b3794f35303488c4a8f3f965dffffa6ec2df347c168cb6c19 /dev/mapper/test
On 64bit system (and on 32bit system with the patch), table and checksum is now correct:
# dmsetup table test --showkeys 0 2048 crypt aes-xts-plain64 dfa7cfe3c481f2239155739c42e539ae8f2d38f304dcc89d20b26f69daaf0933 500000000000000000 7:0 0
# sha256sum /dev/mapper/test 5d16160f9d5f8c33d8051e65fdb4f003cc31cd652b5abb08f03aa6fce0df75fc /dev/mapper/test
Signed-off-by: AliOS system security alios_sys_security@linux.alibaba.com Tested-and-Reviewed-by: Milan Broz gmazyland@gmail.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/md/dm-crypt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b8eec515a003..9a4dec0a0f71 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -49,7 +49,7 @@ struct convert_context { struct bio *bio_out; struct bvec_iter iter_in; struct bvec_iter iter_out; - sector_t cc_sector; + u64 cc_sector; atomic_t cc_pending; union { struct skcipher_request *req; @@ -81,7 +81,7 @@ struct dm_crypt_request { struct convert_context *ctx; struct scatterlist sg_in[4]; struct scatterlist sg_out[4]; - sector_t iv_sector; + u64 iv_sector; };
struct crypt_config; @@ -160,7 +160,7 @@ struct crypt_config { struct iv_lmk_private lmk; struct iv_tcw_private tcw; } iv_gen_private; - sector_t iv_offset; + u64 iv_offset; unsigned int iv_size; unsigned short int sector_size; unsigned char sector_shift;
From: Nikos Tsironis ntsironis@arrikto.com
[ Upstream commit d7e6b8dfc7bcb3f4f3a18313581f67486a725b52 ]
When using kcopyd to run callbacks through dm_kcopyd_do_callback() or submitting copy jobs with a source size of 0, the jobs are pushed directly to the complete_jobs list, which could be under processing by the kcopyd thread. As a result, the kcopyd thread can continue running completed jobs indefinitely, without releasing the CPU, as long as someone keeps submitting new completed jobs through the aforementioned paths. Processing of work items, queued for execution on the same CPU as the currently running kcopyd thread, is thus stalled for excessive amounts of time, hurting performance.
Running the following test, from the device mapper test suite [1],
dmtest run --suite snapshot -n parallel_io_to_many_snaps_N
, with 8 active snapshots, we get, in dmesg, messages like the following:
[68899.948523] BUG: workqueue lockup - pool cpus=0 node=0 flags=0x0 nice=0 stuck for 95s! [68899.949282] Showing busy workqueues and worker pools: [68899.949288] workqueue events: flags=0x0 [68899.949295] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256 [68899.949306] pending: vmstat_shepherd, cache_reap [68899.949331] workqueue mm_percpu_wq: flags=0x8 [68899.949337] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949345] pending: vmstat_update [68899.949387] workqueue dm_bufio_cache: flags=0x8 [68899.949392] pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=1/256 [68899.949400] pending: work_fn [dm_bufio] [68899.949423] workqueue kcopyd: flags=0x8 [68899.949429] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949437] pending: do_work [dm_mod] [68899.949452] workqueue kcopyd: flags=0x8 [68899.949458] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256 [68899.949466] in-flight: 13:do_work [dm_mod] [68899.949474] pending: do_work [dm_mod] [68899.949487] workqueue kcopyd: flags=0x8 [68899.949493] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949501] pending: do_work [dm_mod] [68899.949515] workqueue kcopyd: flags=0x8 [68899.949521] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949529] pending: do_work [dm_mod] [68899.949541] workqueue kcopyd: flags=0x8 [68899.949547] pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 [68899.949555] pending: do_work [dm_mod] [68899.949568] pool 0: cpus=0 node=0 flags=0x0 nice=0 hung=95s workers=4 idle: 27130 27223 1084
Fix this by splitting the complete_jobs list into two parts: A user facing part, named callback_jobs, and one used internally by kcopyd, retaining the name complete_jobs. dm_kcopyd_do_callback() and dispatch_job() now push their jobs to the callback_jobs list, which is spliced to the complete_jobs list once, every time the kcopyd thread wakes up. This prevents kcopyd from hogging the CPU indefinitely and causing workqueue stalls.
Re-running the aforementioned test:
* Workqueue stalls are eliminated * The maximum writing time among all targets is reduced from 09m37.10s to 06m04.85s and the total run time of the test is reduced from 10m43.591s to 7m19.199s
[1] https://github.com/jthornber/device-mapper-test-suite
Signed-off-by: Nikos Tsironis ntsironis@arrikto.com Signed-off-by: Ilias Tsitsimpis iliastsi@arrikto.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/md/dm-kcopyd.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 2fc4213e02b5..671c24332802 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -56,15 +56,17 @@ struct dm_kcopyd_client { atomic_t nr_jobs;
/* - * We maintain three lists of jobs: + * We maintain four lists of jobs: * * i) jobs waiting for pages * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. + * iii) jobs that don't need to do any IO and just run a callback + * iv) jobs that have completed. * - * All three of these are protected by job_lock. + * All four of these are protected by job_lock. */ spinlock_t job_lock; + struct list_head callback_jobs; struct list_head complete_jobs; struct list_head io_jobs; struct list_head pages_jobs; @@ -625,6 +627,7 @@ static void do_work(struct work_struct *work) struct dm_kcopyd_client *kc = container_of(work, struct dm_kcopyd_client, kcopyd_work); struct blk_plug plug; + unsigned long flags;
/* * The order that these are called is *very* important. @@ -633,6 +636,10 @@ static void do_work(struct work_struct *work) * list. io jobs call wake when they complete and it all * starts again. */ + spin_lock_irqsave(&kc->job_lock, flags); + list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); + blk_start_plug(&plug); process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->pages_jobs, kc, run_pages_job); @@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job) struct dm_kcopyd_client *kc = job->kc; atomic_inc(&kc->nr_jobs); if (unlikely(!job->source.count)) - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); else if (job->pages == &zero_page_list) push(&kc->io_jobs, job); else @@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) job->read_err = read_err; job->write_err = write_err;
- push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); wake(kc); } EXPORT_SYMBOL(dm_kcopyd_do_callback); @@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro return ERR_PTR(-ENOMEM);
spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->callback_jobs); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); @@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
+ BUG_ON(!list_empty(&kc->callback_jobs)); BUG_ON(!list_empty(&kc->complete_jobs)); BUG_ON(!list_empty(&kc->io_jobs)); BUG_ON(!list_empty(&kc->pages_jobs));
From: Michael Petlan mpetlan@redhat.com
[ Upstream commit 51433ead1460fb3f46e1c34f68bb22fd2dd0f5d0 ]
Some 'perf stat' options do not make sense to be negated (event, cgroup), some do not have negated path implemented (metrics). Due to that, it is better to disable the "no-" prefix for them, since otherwise, the later opt-parsing segfaults.
Before:
$ perf stat --no-metrics -- ls Segmentation fault (core dumped)
After:
$ perf stat --no-metrics -- ls Error: option `no-metrics' isn't available Usage: perf stat [<options>] [<command>]
Signed-off-by: Michael Petlan mpetlan@redhat.com Tested-by: Arnaldo Carvalho de Melo acme@redhat.com LPU-Reference: 1485912065.62416880.1544457604340.JavaMail.zimbra@redhat.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/builtin-stat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a635abfa77b6..1410d66192f7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -709,7 +709,7 @@ static int parse_metric_groups(const struct option *opt, return metricgroup__parse_groups(opt, str, &stat_config.metric_events); }
-static const struct option stat_options[] = { +static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), OPT_CALLBACK('e', "event", &evsel_list, "event", @@ -1599,6 +1599,12 @@ int cmd_stat(int argc, const char **argv) return -ENOMEM;
parse_events__shrink_config_terms(); + + /* String-parsing callback-based options would segfault when negated */ + set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG); + set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG); + set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG); + argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION);
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit ece9804985b57e1ccd83b1fb6288520955a29d51 ]
At some point we decided not to directly include kernel sources files when building tools/perf/, but when tools/lib/subcmd/ was forked from tools/perf it somehow ended up adding it via these two lines in its Makefile:
CFLAGS += -I$(srctree)/include/uapi CFLAGS += -I$(srctree)/include
As $(srctree) points to the kernel sources.
Removing those lines and keeping just:
CFLAGS += -I$(srctree)/tools/include/
Is enough to build tools/perf and tools/objtool.
This fixes the build when building from the sources in environments such as the Android NDK crossbuilding from a fedora:26 system:
subcmd-util.h:11:15: error: expected ',' or ';' before 'void' static inline void report(const char *prefix, const char *err, va_list params) ^ In file included from /git/perf/include/uapi/linux/stddef.h:2:0, from /git/perf/include/uapi/linux/posix_types.h:5, from /opt/android-ndk-r12b/platforms/android-24/arch-arm/usr/include/sys/types.h:36, from /opt/android-ndk-r12b/platforms/android-24/arch-arm/usr/include/unistd.h:33, from run-command.c:2: subcmd-util.h:18:17: error: '__no_instrument_function__' attribute applies only to functions
The /opt/android-ndk-r12b/platforms/android-24/arch-arm/usr/include/sys/types.h file that includes linux/posix_types.h ends up getting the one in the kernel sources causing the breakage. Fix it.
Test built tools/objtool/ too.
Reported-by: Jiri Olsa jolsa@kernel.org Tested-by: Jiri Olsa jolsa@kernel.org Cc: Adrian Hunter adrian.hunter@intel.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Namhyung Kim namhyung@kernel.org Fixes: 4b6ab94eabe4 ("perf subcmd: Create subcmd library") Link: https://lkml.kernel.org/n/tip-5lhaoecrj12t0bqwvpiu14sm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/lib/subcmd/Makefile | 2 -- 1 file changed, 2 deletions(-)
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 95563b8e1ad7..ed61fb3a46c0 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -36,8 +36,6 @@ endif CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/include/uapi -CFLAGS += -I$(srctree)/include
SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
From: Nikos Tsironis ntsironis@arrikto.com
[ Upstream commit 721b1d98fb517ae99ab3b757021cf81db41e67be ]
kcopyd has no upper limit to the number of jobs one can allocate and issue. Under certain workloads this can lead to excessive memory usage and workqueue stalls. For example, when creating multiple dm-snapshot targets with a 4K chunk size and then writing to the origin through the page cache. Syncing the page cache causes a large number of BIOs to be issued to the dm-snapshot origin target, which itself issues an even larger (because of the BIO splitting taking place) number of kcopyd jobs.
Running the following test, from the device mapper test suite [1],
dmtest run --suite snapshot -n many_snapshots_of_same_volume_N
, with 8 active snapshots, results in the kcopyd job slab cache growing to 10G. Depending on the available system RAM this can lead to the OOM killer killing user processes:
[463.492878] kthreadd invoked oom-killer: gfp_mask=0x6040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null), order=1, oom_score_adj=0 [463.492894] kthreadd cpuset=/ mems_allowed=0 [463.492948] CPU: 7 PID: 2 Comm: kthreadd Not tainted 4.19.0-rc7 #3 [463.492950] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [463.492952] Call Trace: [463.492964] dump_stack+0x7d/0xbb [463.492973] dump_header+0x6b/0x2fc [463.492987] ? lockdep_hardirqs_on+0xee/0x190 [463.493012] oom_kill_process+0x302/0x370 [463.493021] out_of_memory+0x113/0x560 [463.493030] __alloc_pages_slowpath+0xf40/0x1020 [463.493055] __alloc_pages_nodemask+0x348/0x3c0 [463.493067] cache_grow_begin+0x81/0x8b0 [463.493072] ? cache_grow_begin+0x874/0x8b0 [463.493078] fallback_alloc+0x1e4/0x280 [463.493092] kmem_cache_alloc_node+0xd6/0x370 [463.493098] ? copy_process.part.31+0x1c5/0x20d0 [463.493105] copy_process.part.31+0x1c5/0x20d0 [463.493115] ? __lock_acquire+0x3cc/0x1550 [463.493121] ? __switch_to_asm+0x34/0x70 [463.493129] ? kthread_create_worker_on_cpu+0x70/0x70 [463.493135] ? finish_task_switch+0x90/0x280 [463.493165] _do_fork+0xe0/0x6d0 [463.493191] ? kthreadd+0x19f/0x220 [463.493233] kernel_thread+0x25/0x30 [463.493235] kthreadd+0x1bf/0x220 [463.493242] ? kthread_create_on_cpu+0x90/0x90 [463.493248] ret_from_fork+0x3a/0x50 [463.493279] Mem-Info: [463.493285] active_anon:20631 inactive_anon:4831 isolated_anon:0 [463.493285] active_file:80216 inactive_file:80107 isolated_file:435 [463.493285] unevictable:0 dirty:51266 writeback:109372 unstable:0 [463.493285] slab_reclaimable:31191 slab_unreclaimable:3483521 [463.493285] mapped:526 shmem:4903 pagetables:1759 bounce:0 [463.493285] free:33623 free_pcp:2392 free_cma:0 ... [463.493489] Unreclaimable slab info: [463.493513] Name Used Total [463.493522] bio-6 1028KB 1028KB [463.493525] bio-5 1028KB 1028KB [463.493528] dm_snap_pending_exception 236783KB 243789KB [463.493531] dm_exception 41KB 42KB [463.493534] bio-4 1216KB 1216KB [463.493537] bio-3 439396KB 439396KB [463.493539] kcopyd_job 6973427KB 6973427KB ... [463.494340] Out of memory: Kill process 1298 (ruby2.3) score 1 or sacrifice child [463.494673] Killed process 1298 (ruby2.3) total-vm:435740kB, anon-rss:20180kB, file-rss:4kB, shmem-rss:0kB [463.506437] oom_reaper: reaped process 1298 (ruby2.3), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
Moreover, issuing a large number of kcopyd jobs results in kcopyd hogging the CPU, while processing them. As a result, processing of work items, queued for execution on the same CPU as the currently running kcopyd thread, is stalled for long periods of time, hurting performance. Running the aforementioned test we get, in dmesg, messages like the following:
[67501.194592] BUG: workqueue lockup - pool cpus=4 node=0 flags=0x0 nice=0 stuck for 27s! [67501.195586] Showing busy workqueues and worker pools: [67501.195591] workqueue events: flags=0x0 [67501.195597] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195611] pending: cache_reap [67501.195641] workqueue mm_percpu_wq: flags=0x8 [67501.195645] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195656] pending: vmstat_update [67501.195682] workqueue kblockd: flags=0x18 [67501.195687] pwq 5: cpus=2 node=0 flags=0x0 nice=-20 active=1/256 [67501.195698] pending: blk_timeout_work [67501.195753] workqueue kcopyd: flags=0x8 [67501.195757] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195768] pending: do_work [dm_mod] [67501.195802] workqueue kcopyd: flags=0x8 [67501.195806] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195817] pending: do_work [dm_mod] [67501.195834] workqueue kcopyd: flags=0x8 [67501.195838] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195848] pending: do_work [dm_mod] [67501.195881] workqueue kcopyd: flags=0x8 [67501.195885] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=1/256 [67501.195896] pending: do_work [dm_mod] [67501.195920] workqueue kcopyd: flags=0x8 [67501.195924] pwq 8: cpus=4 node=0 flags=0x0 nice=0 active=2/256 [67501.195935] in-flight: 67:do_work [dm_mod] [67501.195945] pending: do_work [dm_mod] [67501.195961] pool 8: cpus=4 node=0 flags=0x0 nice=0 hung=27s workers=3 idle: 129 23765
The root cause for these issues is the way dm-snapshot uses kcopyd. In particular, the lack of an explicit or implicit limit to the maximum number of in-flight COW jobs. The merging path is not affected because it implicitly limits the in-flight kcopyd jobs to one.
Fix these issues by using a semaphore to limit the maximum number of in-flight kcopyd jobs. We grab the semaphore before allocating a new kcopyd job in start_copy() and start_full_bio() and release it after the job finishes in copy_callback().
The initial semaphore value is configurable through a module parameter, to allow fine tuning the maximum number of in-flight COW jobs. Setting this parameter to zero initializes the semaphore to INT_MAX.
A default value of 2048 maximum in-flight kcopyd jobs was chosen. This value was decided experimentally as a trade-off between memory consumption, stalling the kernel's workqueues and maintaining a high enough throughput.
Re-running the aforementioned test:
* Workqueue stalls are eliminated * kcopyd's job slab cache uses a maximum of 130MB * The time taken by the test to write to the snapshot-origin target is reduced from 05m20.48s to 03m26.38s
[1] https://github.com/jthornber/device-mapper-test-suite
Signed-off-by: Nikos Tsironis ntsironis@arrikto.com Signed-off-by: Ilias Tsitsimpis iliastsi@arrikto.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/md/dm-snap.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ae4b33d10924..36805b12661e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,6 +19,7 @@ #include <linux/vmalloc.h> #include <linux/log2.h> #include <linux/dm-kcopyd.h> +#include <linux/semaphore.h>
#include "dm.h"
@@ -105,6 +106,9 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store;
+ /* Maximum number of in-flight COW jobs. */ + struct semaphore cow_count; + struct dm_kcopyd_client *kcopyd_client;
/* Wait for events based on state_bits */ @@ -145,6 +149,19 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1
+/* + * Maximum number of chunks being copied on write. + * + * The value was decided experimentally as a trade-off between memory + * consumption, stalling the kernel's workqueues and maintaining a high enough + * throughput. + */ +#define DEFAULT_COW_THRESHOLD 2048 + +static int cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); + DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write");
@@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; }
+ sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); @@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) rb_link_node(&pe->out_of_order_node, parent, p); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } + up(&s->cow_count); }
/* @@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count;
/* Hand over to kcopyd */ + down(&s->cow_count); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); }
@@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io;
+ down(&s->cow_count); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe);
From: Leo Yan leo.yan@linaro.org
[ Upstream commit 43fd56669c28cd354e9228bdb58e4bca1c1a8b66 ]
The structure cs_etm_queue uses 'prev_packet' to point to previous packet, this can be used to combine with new coming packet to generate samples.
In function cs_etm__flush() it swaps packets only when the flag 'etm->synth_opts.last_branch' is true, this means that it will not swap packets if without option '--itrace=il' to generate last branch entries; thus for this case the 'prev_packet' doesn't point to the correct previous packet and the stale packet still will be used to generate sequential sample. Thus if dump trace with 'perf script' command we can see the incorrect flow with the stale packet's address info.
This patch corrects packets swapping in cs_etm__flush(); except using the flag 'etm->synth_opts.last_branch' it also checks the another flag 'etm->sample_branches', if any flag is true then it swaps packets so can save correct content to 'prev_packet'. Finally this can fix the wrong program flow dumping issue.
The patch has a minor refactoring to use 'etm->synth_opts.last_branch' instead of 'etmq->etm->synth_opts.last_branch' for condition checking, this is consistent with that is done in cs_etm__sample().
Signed-off-by: Leo Yan leo.yan@linaro.org Reviewed-by: Mathieu Poirier mathieu.poirier@linaro.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Mike Leach mike.leach@linaro.org Cc: Namhyung Kim namhyung@kernel.org Cc: Robert Walker robert.walker@arm.com Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1544513908-16805-2-git-send-email-leo.yan@linaro.or... Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 73430b73570d..c2f0c92623f0 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1005,7 +1005,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq) }
swap_packet: - if (etmq->etm->synth_opts.last_branch) { + if (etm->sample_branches || etm->synth_opts.last_branch) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet.
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit 748fe0889c1ff12d378946bd5326e8ee8eacf5cf ]
There are systems such as the Android NDK API level 24 has the sigqueue() function but doesn't provide a prototype, adding noise to the build:
util/evlist.c: In function 'perf_evlist__prepare_workload': util/evlist.c:1494:4: warning: implicit declaration of function 'sigqueue' [-Wimplicit-function-declaration] if (sigqueue(getppid(), SIGUSR1, val)) ^ util/evlist.c:1494:4: warning: nested extern declaration of 'sigqueue' [-Wnested-externs]
Define a LACKS_SIGQUEUE_PROTOTYPE define so that code needing that can get a prototype.
Checked in the bionic git repo to be available since level 23:
https://android.googlesource.com/platform/bionic/+/master/libc/include/signa...
int sigqueue(pid_t __pid, int __signal, const union sigval __value) __INTRODUCED_IN(23);
Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@kernel.org Cc: Namhyung Kim namhyung@kernel.org Link: https://lkml.kernel.org/n/tip-lmhpev1uni9kdrv7j29glyov@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/Makefile.config | 1 + tools/perf/util/evlist.c | 4 ++++ 2 files changed, 5 insertions(+)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a0e8c23f9125..e106e7ce6933 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -294,6 +294,7 @@ ifndef NO_BIONIC $(call feature_check,bionic) ifeq ($(feature-bionic), 1) BIONIC := 1 + CFLAGS += -DLACKS_SIGQUEUE_PROTOTYPE EXTLIBS := $(filter-out -lrt,$(EXTLIBS)) EXTLIBS := $(filter-out -lpthread,$(EXTLIBS)) endif diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 668d2a9ef0f4..8a806b0758b0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -34,6 +34,10 @@ #include <linux/log2.h> #include <linux/err.h>
+#ifdef LACKS_SIGQUEUE_PROTOTYPE +int sigqueue(pid_t pid, int sig, const union sigval value); +#endif + #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
From: Arnaldo Carvalho de Melo acme@redhat.com
[ Upstream commit d7a8c4a6a055097a67ccfa3ca7c9ff1b64603a70 ]
There are systems such as the Android NDK API level 24 has the open_memstream() function but doesn't provide a prototype, adding noise to the build:
builtin-timechart.c: In function 'cat_backtrace': builtin-timechart.c:486:2: warning: implicit declaration of function 'open_memstream' [-Wimplicit-function-declaration] FILE *f = open_memstream(&p, &p_len); ^ builtin-timechart.c:486:2: warning: nested extern declaration of 'open_memstream' [-Wnested-externs] builtin-timechart.c:486:12: warning: initialization makes pointer from integer without a cast FILE *f = open_memstream(&p, &p_len); ^
Define a LACKS_OPEN_MEMSTREAM_PROTOTYPE define so that code needing that can get a prototype.
Checked in the bionic git repo to be available since level 23:
https://android.googlesource.com/platform/bionic/+/master/libc/include/stdio...
FILE* open_memstream(char** __ptr, size_t* __size_ptr) __INTRODUCED_IN(23);
Cc: Adrian Hunter adrian.hunter@intel.com Cc: Jiri Olsa jolsa@kernel.org Cc: Namhyung Kim namhyung@kernel.org Link: https://lkml.kernel.org/n/tip-343ashae97e5bq6vizusyfno@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/Makefile.config | 1 + tools/perf/builtin-timechart.c | 4 ++++ 2 files changed, 5 insertions(+)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e106e7ce6933..acbb657f7ce2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -295,6 +295,7 @@ ifndef NO_BIONIC ifeq ($(feature-bionic), 1) BIONIC := 1 CFLAGS += -DLACKS_SIGQUEUE_PROTOTYPE + CFLAGS += -DLACKS_OPEN_MEMSTREAM_PROTOTYPE EXTLIBS := $(filter-out -lrt,$(EXTLIBS)) EXTLIBS := $(filter-out -lpthread,$(EXTLIBS)) endif diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index a827919c6263..775b99833e51 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -43,6 +43,10 @@ #include "util/data.h" #include "util/debug.h"
+#ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE +FILE *open_memstream(char **ptr, size_t *sizeloc); +#endif + #define SUPPORT_OLD_POWER_EVENTS 1 #define PWR_EVENT_EXIT -1
From: Javier Barrio javier.barrio.mart@gmail.com
[ Upstream commit 41c4f85cdac280d356df1f483000ecec4a8868be ]
Commit 1fa5efe3622db58cb8c7b9a50665e9eb9a6c7e97 (ext4: Use generic helpers for quotaon and quotaoff) made possible to call quotactl(Q_XQUOTAON/OFF) on ext4 filesystems with sysfile quota support. This leads to calling dquot_enable/disable without s_umount held in excl. mode, because quotactl_cmd_onoff checks only for Q_QUOTAON/OFF.
The following WARN_ON_ONCE triggers (in this case for dquot_enable, ext4, latest Linus' tree):
[ 117.807056] EXT4-fs (dm-0): mounted filesystem with ordered data mode. Opts: quota,prjquota
[...]
[ 155.036847] WARNING: CPU: 0 PID: 2343 at fs/quota/dquot.c:2469 dquot_enable+0x34/0xb9 [ 155.036851] Modules linked in: quota_v2 quota_tree ipv6 af_packet joydev mousedev psmouse serio_raw pcspkr i2c_piix4 intel_agp intel_gtt e1000 ttm drm_kms_helper drm agpgart fb_sys_fops syscopyarea sysfillrect sysimgblt i2c_core input_leds kvm_intel kvm irqbypass qemu_fw_cfg floppy evdev parport_pc parport button crc32c_generic dm_mod ata_generic pata_acpi ata_piix libata loop ext4 crc16 mbcache jbd2 usb_storage usbcore sd_mod scsi_mod [ 155.036901] CPU: 0 PID: 2343 Comm: qctl Not tainted 4.20.0-rc6-00025-gf5d582777bcb #9 [ 155.036903] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 155.036911] RIP: 0010:dquot_enable+0x34/0xb9 [ 155.036915] Code: 41 56 41 55 41 54 55 53 4c 8b 6f 28 74 02 0f 0b 4d 8d 7d 70 49 89 fc 89 cb 41 89 d6 89 f5 4c 89 ff e8 23 09 ea ff 85 c0 74 0a <0f> 0b 4c 89 ff e8 8b 09 ea ff 85 db 74 6a 41 8b b5 f8 00 00 00 0f [ 155.036918] RSP: 0018:ffffb09b00493e08 EFLAGS: 00010202 [ 155.036922] RAX: 0000000000000001 RBX: 0000000000000008 RCX: 0000000000000008 [ 155.036924] RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff9781b67cd870 [ 155.036926] RBP: 0000000000000002 R08: 0000000000000000 R09: 61c8864680b583eb [ 155.036929] R10: ffffb09b00493e48 R11: ffffffffff7ce7d4 R12: ffff9781b7ee8d78 [ 155.036932] R13: ffff9781b67cd800 R14: 0000000000000004 R15: ffff9781b67cd870 [ 155.036936] FS: 00007fd813250b88(0000) GS:ffff9781ba000000(0000) knlGS:0000000000000000 [ 155.036939] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 155.036942] CR2: 00007fd812ff61d6 CR3: 000000007c882000 CR4: 00000000000006b0 [ 155.036951] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 155.036953] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 155.036955] Call Trace: [ 155.037004] dquot_quota_enable+0x8b/0xd0 [ 155.037011] kernel_quotactl+0x628/0x74e [ 155.037027] ? do_mprotect_pkey+0x2a6/0x2cd [ 155.037034] __x64_sys_quotactl+0x1a/0x1d [ 155.037041] do_syscall_64+0x55/0xe4 [ 155.037078] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 155.037105] RIP: 0033:0x7fd812fe1198 [ 155.037109] Code: 02 77 0d 48 89 c1 48 c1 e9 3f 75 04 48 8b 04 24 48 83 c4 50 5b c3 48 83 ec 08 49 89 ca 48 63 d2 48 63 ff b8 b3 00 00 00 0f 05 <48> 89 c7 e8 c1 eb ff ff 5a c3 48 63 ff b8 bb 00 00 00 0f 05 48 89 [ 155.037112] RSP: 002b:00007ffe8cd7b050 EFLAGS: 00000206 ORIG_RAX: 00000000000000b3 [ 155.037116] RAX: ffffffffffffffda RBX: 00007ffe8cd7b148 RCX: 00007fd812fe1198 [ 155.037119] RDX: 0000000000000000 RSI: 00007ffe8cd7cea9 RDI: 0000000000580102 [ 155.037121] RBP: 00007ffe8cd7b0f0 R08: 000055fc8eba8a9d R09: 0000000000000000 [ 155.037124] R10: 00007ffe8cd7b074 R11: 0000000000000206 R12: 00007ffe8cd7b168 [ 155.037126] R13: 000055fc8eba8897 R14: 0000000000000000 R15: 0000000000000000 [ 155.037131] ---[ end trace 210f864257175c51 ]---
and then the syscall proceeds without s_umount locking.
This patch locks the superblock ->s_umount sem. in exclusive mode for all Q_XQUOTAON/OFF quotactls too in addition to Q_QUOTAON/OFF.
AFAICT, other than ext4, only xfs and ocfs2 are affected by this change. The VFS will now call in xfs_quota_* functions with s_umount held, which wasn't the case before. This looks good to me but I can not say for sure. Ext4 and ocfs2 where already beeing called with s_umount exclusive via quota_quotaon/off which is basically the same.
Signed-off-by: Javier Barrio javier.barrio.mart@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org --- fs/quota/quota.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index f0cbf58ad4da..fd5dd806f1b9 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -791,7 +791,8 @@ static int quotactl_cmd_write(int cmd) /* Return true if quotactl command is manipulating quota on/off state */ static bool quotactl_cmd_onoff(int cmd) { - return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF); + return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF) || + (cmd == Q_XQUOTAON) || (cmd == Q_XQUOTAOFF); }
/*
From: Yangtao Li tiny.windzz@gmail.com
[ Upstream commit 5eb73c831171115d3b4347e1e7124a5a35d8086c ]
The function of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller.
integrator_ap_timer_init_of() doesn't do that. The pri_node and the sec_node are used as an identifier to compare against the current node, so we can directly drop the refcount after getting the node from the path as it is not used as pointer.
By dropping the refcount right after getting it, a single variable is needed instead of two.
Fix this by use a single variable and drop the refcount right after of_find_node_by_path().
Signed-off-by: Yangtao Li tiny.windzz@gmail.com Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clocksource/timer-integrator-ap.c | 25 +++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c index 76e526f58620..19fb7de4b928 100644 --- a/drivers/clocksource/timer-integrator-ap.c +++ b/drivers/clocksource/timer-integrator-ap.c @@ -181,8 +181,7 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) int irq; struct clk *clk; unsigned long rate; - struct device_node *pri_node; - struct device_node *sec_node; + struct device_node *alias_node;
base = of_io_request_and_map(node, 0, "integrator-timer"); if (IS_ERR(base)) @@ -204,7 +203,18 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) return err; }
- pri_node = of_find_node_by_path(path); + alias_node = of_find_node_by_path(path); + + /* + * The pointer is used as an identifier not as a pointer, we + * can drop the refcount on the of__node immediately after + * getting it. + */ + of_node_put(alias_node); + + if (node == alias_node) + /* The primary timer lacks IRQ, use as clocksource */ + return integrator_clocksource_init(rate, base);
err = of_property_read_string(of_aliases, "arm,timer-secondary", &path); @@ -213,14 +223,11 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) return err; }
+ alias_node = of_find_node_by_path(path);
- sec_node = of_find_node_by_path(path); - - if (node == pri_node) - /* The primary timer lacks IRQ, use as clocksource */ - return integrator_clocksource_init(rate, base); + of_node_put(alias_node);
- if (node == sec_node) { + if (node == alias_node) { /* The secondary timer will drive the clock event */ irq = irq_of_parse_and_map(node, 0); return integrator_clockevent_init(rate, base, irq);
From: Milan Broz gmazyland@gmail.com
[ Upstream commit ef87bfc24f9b8da82c89aff493df20f078bc9cb1 ]
Reference to a device in device-mapper table contains offset in sectors.
If the sector_t is 32bit integer (CONFIG_LBDAF is not set), then several device-mapper targets can overflow this offset and validity check is then performed on a wrong offset and a wrong table is activated.
See for example (on 32bit without CONFIG_LBDAF) this overflow:
# dmsetup create test --table "0 2048 linear /dev/sdg 4294967297" # dmsetup table test 0 2048 linear 8:96 1
This patch adds explicit check for overflow if the offset is sector_t type.
Signed-off-by: Milan Broz gmazyland@gmail.com Reviewed-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-delay.c | 2 +- drivers/md/dm-flakey.c | 2 +- drivers/md/dm-linear.c | 2 +- drivers/md/dm-raid1.c | 3 ++- drivers/md/dm-unstripe.c | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9a4dec0a0f71..fc7d8b8a654f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2781,7 +2781,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) }
ret = -EINVAL; - if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { + if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2fb7bb4304ad..fddffe251bf6 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a unsigned long long tmpll; char dummy;
- if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { + if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { ti->error = "Invalid device sector"; return -EINVAL; } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 3cb97fa4c11d..8261aa8c7fe1 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) devname = dm_shift_arg(&as);
r = -EINVAL; - if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { + if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 8d7ddee6ac4d..ad980a38fb1e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) }
ret = -EINVAL; - if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { + if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 79eab1071ec2..5a51151f680d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, char dummy; int ret;
- if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { + if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 || + offset != (sector_t)offset) { ti->error = "Invalid offset"; return -EINVAL; } diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 954b7ab4e684..e673dacf6418 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err; }
- if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) { + if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { ti->error = "Invalid striped device offset"; goto err; }
From: Raghuram Hegde raghuram.hegde@intel.com
[ Upstream commit 2da711bcebe81209a9f2f90e145600eb1bae2b71 ]
Include the new USB product ID for Intel Bluetooth device 22260 family(CcPeak)
The /sys/kernel/debug/usb/devices portion for this device is:
T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=8087 ProdID=0029 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms
Signed-off-by: Raghuram Hegde raghuram.hegde@intel.com Signed-off-by: Chethan T N chethan.tumkur.narayan@intel.com Signed-off-by: Marcel Holtmann marcel@holtmann.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/bluetooth/btusb.c | 72 ++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 35 deletions(-)
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7439a7eb50ac..05c8a7ed859c 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -344,6 +344,7 @@ static const struct usb_device_id blacklist_table[] = { /* Intel Bluetooth devices */ { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW }, { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW }, + { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, @@ -2055,6 +2056,35 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb) return -EILSEQ; }
+static bool btusb_setup_intel_new_get_fw_name(struct intel_version *ver, + struct intel_boot_params *params, + char *fw_name, size_t len, + const char *suffix) +{ + switch (ver->hw_variant) { + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + snprintf(fw_name, len, "intel/ibt-%u-%u.%s", + le16_to_cpu(ver->hw_variant), + le16_to_cpu(params->dev_revid), + suffix); + break; + case 0x11: /* JfP */ + case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ + snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s", + le16_to_cpu(ver->hw_variant), + le16_to_cpu(ver->hw_revision), + le16_to_cpu(ver->fw_revision), + suffix); + break; + default: + return false; + } + return true; +} + static int btusb_setup_intel_new(struct hci_dev *hdev) { struct btusb_data *data = hci_get_drvdata(hdev); @@ -2106,7 +2136,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) case 0x11: /* JfP */ case 0x12: /* ThP */ case 0x13: /* HrP */ - case 0x14: /* QnJ, IcP */ + case 0x14: /* CcP */ break; default: bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)", @@ -2190,23 +2220,9 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi. * */ - switch (ver.hw_variant) { - case 0x0b: /* SfP */ - case 0x0c: /* WsP */ - snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi", - le16_to_cpu(ver.hw_variant), - le16_to_cpu(params.dev_revid)); - break; - case 0x11: /* JfP */ - case 0x12: /* ThP */ - case 0x13: /* HrP */ - case 0x14: /* QnJ, IcP */ - snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi", - le16_to_cpu(ver.hw_variant), - le16_to_cpu(ver.hw_revision), - le16_to_cpu(ver.fw_revision)); - break; - default: + err = btusb_setup_intel_new_get_fw_name(&ver, ¶ms, fwname, + sizeof(fwname), "sfi"); + if (!err) { bt_dev_err(hdev, "Unsupported Intel firmware naming"); return -EINVAL; } @@ -2222,23 +2238,9 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) /* Save the DDC file name for later use to apply once the firmware * downloading is done. */ - switch (ver.hw_variant) { - case 0x0b: /* SfP */ - case 0x0c: /* WsP */ - snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc", - le16_to_cpu(ver.hw_variant), - le16_to_cpu(params.dev_revid)); - break; - case 0x11: /* JfP */ - case 0x12: /* ThP */ - case 0x13: /* HrP */ - case 0x14: /* QnJ, IcP */ - snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc", - le16_to_cpu(ver.hw_variant), - le16_to_cpu(ver.hw_revision), - le16_to_cpu(ver.fw_revision)); - break; - default: + err = btusb_setup_intel_new_get_fw_name(&ver, ¶ms, fwname, + sizeof(fwname), "ddc"); + if (!err) { bt_dev_err(hdev, "Unsupported Intel firmware naming"); return -EINVAL; }
From: Takashi Sakamoto o-takashi@sakamocchi.jp
[ Upstream commit 644b2e97405b0b74845e1d3c2b4fe4c34858062b ]
This commit fixes hard-coded model-id for an unit of Apogee Ensemble with a correct value. This unit uses DM1500 ASIC produced ArchWave AG (formerly known as BridgeCo AG).
I note that this model supports three modes in the number of data channels in tx/rx streams; 8 ch pairs, 10 ch pairs, 18 ch pairs. The mode is switched by Vendor-dependent AV/C command, like:
$ cd linux-firewire-utils $ ./firewire-request /dev/fw1 fcp 0x00ff000003dbeb0600000000 (8ch pairs) $ ./firewire-request /dev/fw1 fcp 0x00ff000003dbeb0601000000 (10ch pairs) $ ./firewire-request /dev/fw1 fcp 0x00ff000003dbeb0602000000 (18ch pairs)
When switching between different mode, the unit disappears from IEEE 1394 bus, then appears on the bus with different combination of stream formats. In a mode of 18 ch pairs, available sampling rate is up to 96.0 kHz, else up to 192.0 kHz.
$ ./hinawa-config-rom-printer /dev/fw1 { 'bus-info': { 'adj': False, 'bmc': True, 'chip_ID': 21474898341, 'cmc': True, 'cyc_clk_acc': 100, 'generation': 2, 'imc': True, 'isc': True, 'link_spd': 2, 'max_ROM': 1, 'max_rec': 512, 'name': '1394', 'node_vendor_ID': 987, 'pmc': False}, 'root-directory': [ ['HARDWARE_VERSION', 19], [ 'NODE_CAPABILITIES', { 'addressing': {'64': True, 'fix': True, 'prv': False}, 'misc': {'int': False, 'ms': False, 'spt': True}, 'state': { 'atn': False, 'ded': False, 'drq': True, 'elo': False, 'init': False, 'lst': True, 'off': False}, 'testing': {'bas': False, 'ext': False}}], ['VENDOR', 987], ['DESCRIPTOR', 'Apogee Electronics'], ['MODEL', 126702], ['DESCRIPTOR', 'Ensemble'], ['VERSION', 5297], [ 'UNIT', [ ['SPECIFIER_ID', 41005], ['VERSION', 65537], ['MODEL', 126702], ['DESCRIPTOR', 'Ensemble']]], [ 'DEPENDENT_INFO', [ ['SPECIFIER_ID', 2037], ['VERSION', 1], [(58, 'IMMEDIATE'), 16777159], [(59, 'IMMEDIATE'), 1048576], [(60, 'IMMEDIATE'), 16777159], [(61, 'IMMEDIATE'), 6291456]]]]}
Signed-off-by: Takashi Sakamoto o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- sound/firewire/bebob/bebob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c index 672d13488454..d91874275d2c 100644 --- a/sound/firewire/bebob/bebob.c +++ b/sound/firewire/bebob/bebob.c @@ -408,7 +408,7 @@ static const struct ieee1394_device_id bebob_id_table[] = { /* Apogee Electronics, DA/AD/DD-16X (X-FireWire card) */ SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x00010048, &spec_normal), /* Apogee Electronics, Ensemble */ - SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x00001eee, &spec_normal), + SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x01eeee, &spec_normal), /* ESI, Quatafire610 */ SND_BEBOB_DEV_ENTRY(VEN_ESI, 0x00010064, &spec_normal), /* AcousticReality, eARMasterOne */
From: Daniel Vetter daniel.vetter@ffwll.ch
[ Upstream commit 4f4b374332ec0ae9c738ff8ec9bed5cd97ff9adc ]
This is the much more correct fix for my earlier attempt at:
https://lkml.org/lkml/2018/12/10/118
Short recap:
- There's not actually a locking issue, it's just lockdep being a bit too eager to complain about a possible deadlock.
- Contrary to what I claimed the real problem is recursion on kn->count. Greg pointed me at sysfs_break_active_protection(), used by the scsi subsystem to allow a sysfs file to unbind itself. That would be a real deadlock, which isn't what's happening here. Also, breaking the active protection means we'd need to manually handle all the lifetime fun.
- With Rafael we discussed the task_work approach, which kinda works, but has two downsides: It's a functional change for a lockdep annotation issue, and it won't work for the bind file (which needs to get the errno from the driver load function back to userspace).
- Greg also asked why this never showed up: To hit this you need to unregister a 2nd driver from the unload code of your first driver. I guess only gpus do that. The bug has always been there, but only with a recent patch series did we add more locks so that lockdep built a chain from unbinding the snd-hda driver to the acpi_video_unregister call.
Full lockdep splat:
[12301.898799] ============================================ [12301.898805] WARNING: possible recursive locking detected [12301.898811] 4.20.0-rc7+ #84 Not tainted [12301.898815] -------------------------------------------- [12301.898821] bash/5297 is trying to acquire lock: [12301.898826] 00000000f61c6093 (kn->count#39){++++}, at: kernfs_remove_by_name_ns+0x3b/0x80 [12301.898841] but task is already holding lock: [12301.898847] 000000005f634021 (kn->count#39){++++}, at: kernfs_fop_write+0xdc/0x190 [12301.898856] other info that might help us debug this: [12301.898862] Possible unsafe locking scenario: [12301.898867] CPU0 [12301.898870] ---- [12301.898874] lock(kn->count#39); [12301.898879] lock(kn->count#39); [12301.898883] *** DEADLOCK *** [12301.898891] May be due to missing lock nesting notation [12301.898899] 5 locks held by bash/5297: [12301.898903] #0: 00000000cd800e54 (sb_writers#4){.+.+}, at: vfs_write+0x17f/0x1b0 [12301.898915] #1: 000000000465e7c2 (&of->mutex){+.+.}, at: kernfs_fop_write+0xd3/0x190 [12301.898925] #2: 000000005f634021 (kn->count#39){++++}, at: kernfs_fop_write+0xdc/0x190 [12301.898936] #3: 00000000414ef7ac (&dev->mutex){....}, at: device_release_driver_internal+0x34/0x240 [12301.898950] #4: 000000003218fbdf (register_count_mutex){+.+.}, at: acpi_video_unregister+0xe/0x40 [12301.898960] stack backtrace: [12301.898968] CPU: 1 PID: 5297 Comm: bash Not tainted 4.20.0-rc7+ #84 [12301.898974] Hardware name: Hewlett-Packard HP EliteBook 8460p/161C, BIOS 68SCF Ver. F.01 03/11/2011 [12301.898982] Call Trace: [12301.898989] dump_stack+0x67/0x9b [12301.898997] __lock_acquire+0x6ad/0x1410 [12301.899003] ? kernfs_remove_by_name_ns+0x3b/0x80 [12301.899010] ? find_held_lock+0x2d/0x90 [12301.899017] ? mutex_spin_on_owner+0xe4/0x150 [12301.899023] ? find_held_lock+0x2d/0x90 [12301.899030] ? lock_acquire+0x90/0x180 [12301.899036] lock_acquire+0x90/0x180 [12301.899042] ? kernfs_remove_by_name_ns+0x3b/0x80 [12301.899049] __kernfs_remove+0x296/0x310 [12301.899055] ? kernfs_remove_by_name_ns+0x3b/0x80 [12301.899060] ? kernfs_name_hash+0xd/0x80 [12301.899066] ? kernfs_find_ns+0x6c/0x100 [12301.899073] kernfs_remove_by_name_ns+0x3b/0x80 [12301.899080] bus_remove_driver+0x92/0xa0 [12301.899085] acpi_video_unregister+0x24/0x40 [12301.899127] i915_driver_unload+0x42/0x130 [i915] [12301.899160] i915_pci_remove+0x19/0x30 [i915] [12301.899169] pci_device_remove+0x36/0xb0 [12301.899176] device_release_driver_internal+0x185/0x240 [12301.899183] unbind_store+0xaf/0x180 [12301.899189] kernfs_fop_write+0x104/0x190 [12301.899195] __vfs_write+0x31/0x180 [12301.899203] ? rcu_read_lock_sched_held+0x6f/0x80 [12301.899209] ? rcu_sync_lockdep_assert+0x29/0x50 [12301.899216] ? __sb_start_write+0x13c/0x1a0 [12301.899221] ? vfs_write+0x17f/0x1b0 [12301.899227] vfs_write+0xb9/0x1b0 [12301.899233] ksys_write+0x50/0xc0 [12301.899239] do_syscall_64+0x4b/0x180 [12301.899247] entry_SYSCALL_64_after_hwframe+0x49/0xbe [12301.899253] RIP: 0033:0x7f452ac7f7a4 [12301.899259] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 aa f0 2c 00 48 63 ff 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 55 53 48 89 d5 48 89 f3 48 83 [12301.899273] RSP: 002b:00007ffceafa6918 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [12301.899282] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f452ac7f7a4 [12301.899288] RDX: 000000000000000d RSI: 00005612a1abf7c0 RDI: 0000000000000001 [12301.899295] RBP: 00005612a1abf7c0 R08: 000000000000000a R09: 00005612a1c46730 [12301.899301] R10: 000000000000000a R11: 0000000000000246 R12: 000000000000000d [12301.899308] R13: 0000000000000001 R14: 00007f452af4a740 R15: 000000000000000d
Looking around I've noticed that usb and i2c already handle similar recursion problems, where a sysfs file can unbind the same type of sysfs somewhere else in the hierarchy. Relevant commits are:
commit 356c05d58af05d582e634b54b40050c73609617b Author: Alan Stern stern@rowland.harvard.edu Date: Mon May 14 13:30:03 2012 -0400
sysfs: get rid of some lockdep false positives
commit e9b526fe704812364bca07edd15eadeba163ebfb Author: Alexander Sverdlin alexander.sverdlin@nsn.com Date: Fri May 17 14:56:35 2013 +0200
i2c: suppress lockdep warning on delete_device
Implement the same trick for driver bind/unbind.
v2: Put the macro into bus.c (Greg).
Reviewed-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Cc: Ramalingam C ramalingam.c@intel.com Cc: Arend van Spriel aspriel@gmail.com Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: Geert Uytterhoeven geert+renesas@glider.be Cc: Bartosz Golaszewski brgl@bgdev.pl Cc: Heikki Krogerus heikki.krogerus@linux.intel.com Cc: Vivek Gautam vivek.gautam@codeaurora.org Cc: Joe Perches joe@perches.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/base/bus.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 8bfd27ec73d6..585e2e1c9c8f 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -31,6 +31,9 @@ static struct kset *system_kset;
#define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr)
+#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = \ + __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
static int __must_check bus_rescan_devices_helper(struct device *dev, void *data); @@ -195,7 +198,7 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf, bus_put(bus); return err; } -static DRIVER_ATTR_WO(unbind); +static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, S_IWUSR, NULL, unbind_store);
/* * Manually attach a device to a driver. @@ -231,7 +234,7 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, bus_put(bus); return err; } -static DRIVER_ATTR_WO(bind); +static DRIVER_ATTR_IGNORE_LOCKDEP(bind, S_IWUSR, NULL, bind_store);
static ssize_t show_drivers_autoprobe(struct bus_type *bus, char *buf) {
From: Parvi Kaustubhi pkaustub@cisco.com
[ Upstream commit 8036e90f92aae2784b855a0007ae2d8154d28b3c ]
Acquiring the rtnl lock while holding usdev_lock could result in a deadlock.
For example:
usnic_ib_query_port() | mutex_lock(&us_ibdev->usdev_lock) | ib_get_eth_speed() | rtnl_lock()
rtnl_lock() | usnic_ib_netdevice_event() | mutex_lock(&us_ibdev->usdev_lock)
This commit moves the usdev_lock acquisition after the rtnl lock has been released.
This is safe to do because usdev_lock is not protecting anything being accessed in ib_get_eth_speed(). Hence, the correct order of holding locks (rtnl -> usdev_lock) is not violated.
Signed-off-by: Parvi Kaustubhi pkaustub@cisco.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 0b91ff36768a..598e23cf01fc 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
usnic_dbg("\n");
- mutex_lock(&us_ibdev->usdev_lock); if (ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { - mutex_unlock(&us_ibdev->usdev_lock); + &props->active_width)) return -EINVAL; - }
+ /* + * usdev_lock is acquired after (and not before) ib_get_eth_speed call + * because acquiring rtnl_lock in ib_get_eth_speed, while holding + * usdev_lock could lead to a deadlock. + */ + mutex_lock(&us_ibdev->usdev_lock); /* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
From: Stephan Günther moepi@moepi.net
[ Upstream commit 23c3828aa2f84edec7020c7397a22931e7a879e1 ]
With commit 09c2f95ad404 ("scsi: mpt3sas: Swap I/O memory read value back to cpu endianness"), 64bit writes in _base_writeq() were rewritten to use __raw_writeq() instad of writeq().
This introduced a bug apparent on powerpc64 systems such as the Raptor Talos II that causes the HBA to drop from the PCIe bus under heavy load and being reinitialized after a couple of seconds.
It can easily be triggered on affacted systems by using something like
fio --name=random-write --iodepth=4 --rw=randwrite --bs=4k --direct=0 \ --size=128M --numjobs=64 --end_fsync=1 fio --name=random-write --iodepth=4 --rw=randwrite --bs=64k --direct=0 \ --size=128M --numjobs=64 --end_fsync=1
a couple of times. In my case I tested it on both a ZFS raidz2 and a btrfs raid6 using LSI 9300-8i and 9400-8i controllers.
The fix consists in resembling the write ordering of writeq() by adding a mandatory write memory barrier before device access and a compiler barrier afterwards. The additional MMIO barrier is superfluous.
Signed-off-by: Stephan Günther moepi@moepi.net Reported-by: Matt Corallo linux@bluematt.me Acked-by: Sreekanth Reddy Sreekanth.Reddy@broadcom.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/mpt3sas/mpt3sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 2500377d0723..bfd826deabbe 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3319,8 +3319,9 @@ _base_mpi_ep_writeq(__u64 b, volatile void __iomem *addr, static inline void _base_writeq(__u64 b, volatile void __iomem *addr, spinlock_t *writeq_lock) { + wmb(); __raw_writeq(b, addr); - mmiowb(); + barrier(); } #else static inline void
From: Kevin Barnett kevin.barnett@microsemi.com
[ Upstream commit 2ba55c9851d74eb015a554ef69ddf2ef061d5780 ]
Problem: The Linux kernel takes a logical volume offline after a LUN reset. This is generally accompanied by this message in the dmesg output:
Device offlined - not ready after error recovery
Root Cause: The root cause is a "quirk" in the timeout handling in the Linux SCSI layer. The Linux kernel places a 30-second timeout on most media access commands (reads and writes) that it send to device drivers. When a media access command times out, the Linux kernel goes into error recovery mode for the LUN that was the target of the command that timed out. Every command that timed out is kept on a list inside of the Linux kernel to be retried later. The kernel attempts to recover the command(s) that timed out by issuing a LUN reset followed by a TEST UNIT READY. If the LUN reset and TEST UNIT READY commands are successful, the kernel retries the command(s) that timed out.
Each SCSI command issued by the kernel has a result field associated with it. This field indicates the final result of the command (success or error). When a command times out, the kernel places a value in this result field indicating that the command timed out.
The "quirk" is that after the LUN reset and TEST UNIT READY commands are completed, the kernel checks each command on the timed-out command list before retrying it. If the result field is still "timed out", the kernel treats that command as not having been successfully recovered for a retry. If the number of commands that are in this state are greater than two, the kernel takes the LUN offline.
Fix: When our RAIDStack receives a LUN reset, it simply waits until all outstanding commands complete. Generally, all of these outstanding commands complete successfully. Therefore, the fix in the smartpqi driver is to always set the command result field to indicate success when a request completes successfully. This normally isn’t necessary because the result field is always initialized to success when the command is submitted to the driver. So when the command completes successfully, the result field is left untouched. But in this case, the kernel changes the result field behind the driver’s back and then expects the field to be changed by the driver as the commands that timed-out complete.
Reviewed-by: Dave Carroll david.carroll@microsemi.com Reviewed-by: Scott Teel scott.teel@microsemi.com Signed-off-by: Kevin Barnett kevin.barnett@microsemi.com Signed-off-by: Don Brace don.brace@microsemi.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/smartpqi/smartpqi_init.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index a25a07a0b7f0..c1efc182f5ea 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2704,6 +2704,9 @@ static unsigned int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info, switch (response->header.iu_type) { case PQI_RESPONSE_IU_RAID_PATH_IO_SUCCESS: case PQI_RESPONSE_IU_AIO_PATH_IO_SUCCESS: + if (io_request->scmd) + io_request->scmd->result = 0; + /* fall through */ case PQI_RESPONSE_IU_GENERAL_MANAGEMENT: break; case PQI_RESPONSE_IU_TASK_MANAGEMENT:
From: Zhi Chen zhichen@codeaurora.org
[ Upstream commit 2d3b55853b123c177037cf534c5aaa2650310094 ]
There was a race condition in SMP that an ath10k_peer was created but its member sta was null. Following are procedures of ath10k_peer creation and member sta access in peer statistics path.
1. Peer creation: ath10k_peer_create() =>ath10k_wmi_peer_create() =>ath10k_wait_for_peer_created() ...
# another kernel path, RX from firmware ath10k_htt_t2h_msg_handler() =>ath10k_peer_map_event() =>wake_up() # ar->peer_map[id] = peer //add peer to map
#wake up original path from waiting ... # peer->sta = sta //sta assignment
2. RX path of statistics ath10k_htt_t2h_msg_handler() =>ath10k_update_per_peer_tx_stats() =>ath10k_htt_fetch_peer_stats() # peer->sta //sta accessing
Any access of peer->sta after peer was added to peer_map but before sta was assigned could cause a null pointer issue. And because these two steps are asynchronous, no proper lock can protect them. So both peer and sta need to be checked before access.
Tested: QCA9984 with firmware ver 10.4-3.9.0.1-00005 Signed-off-by: Zhi Chen zhichen@codeaurora.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/wireless/ath/ath10k/debugfs_sta.c | 2 +- drivers/net/wireless/ath/ath10k/htt_rx.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/debugfs_sta.c b/drivers/net/wireless/ath/ath10k/debugfs_sta.c index b09cdc699c69..38afbbd9fb44 100644 --- a/drivers/net/wireless/ath/ath10k/debugfs_sta.c +++ b/drivers/net/wireless/ath/ath10k/debugfs_sta.c @@ -71,7 +71,7 @@ void ath10k_sta_update_rx_tid_stats_ampdu(struct ath10k *ar, u16 peer_id, u8 tid spin_lock_bh(&ar->data_lock);
peer = ath10k_peer_find_by_id(ar, peer_id); - if (!peer) + if (!peer || !peer->sta) goto out;
arsta = (struct ath10k_sta *)peer->sta->drv_priv; diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index ffec98f7be50..2c2761d04d01 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2832,7 +2832,7 @@ static void ath10k_htt_fetch_peer_stats(struct ath10k *ar, rcu_read_lock(); spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find_by_id(ar, peer_id); - if (!peer) { + if (!peer || !peer->sta) { ath10k_warn(ar, "Invalid peer id %d peer stats buffer\n", peer_id); goto out; @@ -2885,7 +2885,7 @@ static void ath10k_fetch_10_2_tx_stats(struct ath10k *ar, u8 *data) rcu_read_lock(); spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find_by_id(ar, peer_id); - if (!peer) { + if (!peer || !peer->sta) { ath10k_warn(ar, "Invalid peer id %d in peer stats buffer\n", peer_id); goto out;
From: Yanjiang Jin yanjiang.jin@hxt-semitech.com
[ Upstream commit e57b2945aa654e48f85a41e8917793c64ecb9de8 ]
We must free all irqs during shutdown, else kexec's 2nd kernel would hang in pqi_wait_for_completion_io() as below:
Call trace:
pqi_wait_for_completion_io pqi_submit_raid_request_synchronous.constprop.78+0x23c/0x310 [smartpqi] pqi_configure_events+0xec/0x1f8 [smartpqi] pqi_ctrl_init+0x814/0xca0 [smartpqi] pqi_pci_probe+0x400/0x46c [smartpqi] local_pci_probe+0x48/0xb0 pci_device_probe+0x14c/0x1b0 really_probe+0x218/0x3fc driver_probe_device+0x70/0x140 __driver_attach+0x11c/0x134 bus_for_each_dev+0x70/0xc8 driver_attach+0x30/0x38 bus_add_driver+0x1f0/0x294 driver_register+0x74/0x12c __pci_register_driver+0x64/0x70 pqi_init+0xd0/0x10000 [smartpqi] do_one_initcall+0x60/0x1d8 do_init_module+0x64/0x1f8 load_module+0x10ec/0x1350 __se_sys_finit_module+0xd4/0x100 __arm64_sys_finit_module+0x28/0x34 el0_svc_handler+0x104/0x160 el0_svc+0x8/0xc
This happens only in the following combinations:
1. smartpqi is built as module, not built-in; 2. We have a disk connected to smartpqi card; 3. Both kexec's 1st and 2nd kernels use this disk as Rootfs' mount point.
Signed-off-by: Yanjiang Jin yanjiang.jin@hxt-semitech.com Acked-by: Don Brace don.brace@microsemi.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/smartpqi/smartpqi_init.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index c1efc182f5ea..6f4cb3be97aa 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6673,6 +6673,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev) * storage. */ rc = pqi_flush_cache(ctrl_info, SHUTDOWN); + pqi_free_interrupts(ctrl_info); pqi_reset(ctrl_info); if (rc == 0) return;
From: Suraj Jitindar Singh sjitindarsingh@gmail.com
[ Upstream commit 8400f8740651c1a3081c30b46004451c448f4d5f ]
Nested rmap entries are used to store the translation from L1 gpa to L2 gpa when entries are inserted into the shadow (nested) page tables. This rmap list is located by indexing the rmap array in the memslot by L1 gfn. When we come to search for these entries we only know the L1 page size (which could be PAGE_SIZE, 2M or a 1G page) and so can only select a gfn aligned to that size. This means that when we insert the entry, so we can find it later, we need to align the gfn we use to select the rmap list in which to insert the entry to L1 page size as well.
By not doing this we were missing nested rmap entries when modifying L1 ptes which were for a page also passed through to an L2 guest.
Signed-off-by: Suraj Jitindar Singh sjitindarsingh@gmail.com Signed-off-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/kvm/book3s_hv_nested.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 401d2ecbebc5..fc64535e4c00 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1220,6 +1220,8 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, return ret; shift = kvmppc_radix_level_to_shift(level); } + /* Align gfn to the start of the page */ + gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
From: Suraj Jitindar Singh sjitindarsingh@gmail.com
[ Upstream commit 8b23eee4e55a32a2b51a180dfd27a8d214acc7a1 ]
The shadow page table contains ptes for translations from nested guest address to host address. Currently when creating these ptes we take the rc bits from the pte for the L1 guest address to host address translation. This is incorrect as we must also factor in the rc bits from the pte for the nested guest address to L1 guest address translation (as contained in the L1 guest partition table for the nested guest).
By not calculating these bits correctly L1 may not have been correctly notified when it needed to update its rc bits in the partition table it maintains for its nested guest.
Modify the code so that the rc bits in the resultant pte for the L2->L0 translation are the 'and' of the rc bits in the L2->L1 pte and the L1->L0 pte, also accounting for whether this was a write access when setting the dirty bit.
Signed-off-by: Suraj Jitindar Singh sjitindarsingh@gmail.com Signed-off-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/kvm/book3s_hv_nested.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index fc64535e4c00..f8176ae3a5a7 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1229,6 +1229,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, perm |= gpte.may_read ? 0UL : _PAGE_READ; perm |= gpte.may_write ? 0UL : _PAGE_WRITE; perm |= gpte.may_execute ? 0UL : _PAGE_EXEC; + /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */ + perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED; + perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY; pte = __pte(pte_val(pte) & ~perm);
/* What size pte can we insert? */
From: Qian Cai cai@lca.pw
[ Upstream commit c7a082e4242fd8cd21a441071e622f87c16bdacc ]
UBSAN reported those with MegaRAID SAS-3 3108,
[ 77.467308] UBSAN: Undefined behaviour in drivers/scsi/megaraid/megaraid_sas_fp.c:117:32 [ 77.475402] index 255 is out of range for type 'MR_LD_SPAN_MAP [1]' [ 77.481677] CPU: 16 PID: 333 Comm: kworker/16:1 Not tainted 4.20.0-rc5+ #1 [ 77.488556] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.50 06/01/2018 [ 77.495791] Workqueue: events work_for_cpu_fn [ 77.500154] Call trace: [ 77.502610] dump_backtrace+0x0/0x2c8 [ 77.506279] show_stack+0x24/0x30 [ 77.509604] dump_stack+0x118/0x19c [ 77.513098] ubsan_epilogue+0x14/0x60 [ 77.516765] __ubsan_handle_out_of_bounds+0xfc/0x13c [ 77.521767] mr_update_load_balance_params+0x150/0x158 [megaraid_sas] [ 77.528230] MR_ValidateMapInfo+0x2cc/0x10d0 [megaraid_sas] [ 77.533825] megasas_get_map_info+0x244/0x2f0 [megaraid_sas] [ 77.539505] megasas_init_adapter_fusion+0x9b0/0xf48 [megaraid_sas] [ 77.545794] megasas_init_fw+0x1ab4/0x3518 [megaraid_sas] [ 77.551212] megasas_probe_one+0x2c4/0xbe0 [megaraid_sas] [ 77.556614] local_pci_probe+0x7c/0xf0 [ 77.560365] work_for_cpu_fn+0x34/0x50 [ 77.564118] process_one_work+0x61c/0xf08 [ 77.568129] worker_thread+0x534/0xa70 [ 77.571882] kthread+0x1c8/0x1d0 [ 77.575114] ret_from_fork+0x10/0x1c
[ 89.240332] UBSAN: Undefined behaviour in drivers/scsi/megaraid/megaraid_sas_fp.c:117:32 [ 89.248426] index 255 is out of range for type 'MR_LD_SPAN_MAP [1]' [ 89.254700] CPU: 16 PID: 95 Comm: kworker/u130:0 Not tainted 4.20.0-rc5+ #1 [ 89.261665] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.50 06/01/2018 [ 89.268903] Workqueue: events_unbound async_run_entry_fn [ 89.274222] Call trace: [ 89.276680] dump_backtrace+0x0/0x2c8 [ 89.280348] show_stack+0x24/0x30 [ 89.283671] dump_stack+0x118/0x19c [ 89.287167] ubsan_epilogue+0x14/0x60 [ 89.290835] __ubsan_handle_out_of_bounds+0xfc/0x13c [ 89.295828] MR_LdRaidGet+0x50/0x58 [megaraid_sas] [ 89.300638] megasas_build_io_fusion+0xbb8/0xd90 [megaraid_sas] [ 89.306576] megasas_build_and_issue_cmd_fusion+0x138/0x460 [megaraid_sas] [ 89.313468] megasas_queue_command+0x398/0x3d0 [megaraid_sas] [ 89.319222] scsi_dispatch_cmd+0x1dc/0x8a8 [ 89.323321] scsi_request_fn+0x8e8/0xdd0 [ 89.327249] __blk_run_queue+0xc4/0x158 [ 89.331090] blk_execute_rq_nowait+0xf4/0x158 [ 89.335449] blk_execute_rq+0xdc/0x158 [ 89.339202] __scsi_execute+0x130/0x258 [ 89.343041] scsi_probe_and_add_lun+0x2fc/0x1488 [ 89.347661] __scsi_scan_target+0x1cc/0x8c8 [ 89.351848] scsi_scan_channel.part.3+0x8c/0xc0 [ 89.356382] scsi_scan_host_selected+0x130/0x1f0 [ 89.361002] do_scsi_scan_host+0xd8/0xf0 [ 89.364927] do_scan_async+0x9c/0x320 [ 89.368594] async_run_entry_fn+0x138/0x420 [ 89.372780] process_one_work+0x61c/0xf08 [ 89.376793] worker_thread+0x13c/0xa70 [ 89.380546] kthread+0x1c8/0x1d0 [ 89.383778] ret_from_fork+0x10/0x1c
This is because when populating Driver Map using firmware raid map, all non-existing VDs set their ldTgtIdToLd to 0xff, so it can be skipped later.
From drivers/scsi/megaraid/megaraid_sas_base.c ,
memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
From drivers/scsi/megaraid/megaraid_sas_fp.c ,
/* For non existing VDs, iterate to next VD*/ if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1)) continue;
However, there are a few places that failed to skip those non-existing VDs due to off-by-one errors. Then, those 0xff leaked into MR_LdRaidGet(0xff, map) and triggered the out-of-bound accesses.
Fixes: 51087a8617fe ("megaraid_sas : Extended VD support") Signed-off-by: Qian Cai cai@lca.pw Acked-by: Sumit Saxena sumit.saxena@broadcom.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 59ecbb3b53b5..a33628550425 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -1266,7 +1266,7 @@ void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *drv_map,
for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES_EXT; ldCount++) { ld = MR_TargetIdToLdGet(ldCount, drv_map); - if (ld >= MAX_LOGICAL_DRIVES_EXT) { + if (ld >= MAX_LOGICAL_DRIVES_EXT - 1) { lbInfo[ldCount].loadBalanceFlag = 0; continue; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index f74b5ea24f0f..49eaa87608f6 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2832,7 +2832,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance, device_id < instance->fw_supported_vd_count)) {
ld = MR_TargetIdToLdGet(device_id, local_map_ptr); - if (ld >= instance->fw_supported_vd_count) + if (ld >= instance->fw_supported_vd_count - 1) fp_possible = 0; else { raid = MR_LdRaidGet(ld, local_map_ptr);
From: Eric Sandeen sandeen@redhat.com
[ Upstream commit 3cc31fa65d85610574c0f6a474e89f4c419923d5 ]
iomap_is_partially_uptodate() is intended to check wither blocks within the selected range of a not-uptodate page are uptodate; if the range we care about is up to date, it's an optimization.
However, the iomap implementation continues to check all blocks up to from+count, which is beyond the page, and can even be well beyond the iop->uptodate bitmap.
I think the worst that will happen is that we may eventually find a zero bit and return "not partially uptodate" when it would have otherwise returned true, and skip the optimization. Still, it's clearly an invalid memory access that must be fixed.
So: fix this by limiting the search to within the page as is done in the non-iomap variant, block_is_partially_uptodate().
Zorro noticed thiswhen KASAN went off for 512 byte blocks on a 64k page system:
BUG: KASAN: slab-out-of-bounds in iomap_is_partially_uptodate+0x1a0/0x1e0 Read of size 8 at addr ffff800120c3a318 by task fsstress/22337
Reported-by: Zorro Lang zlang@redhat.com Signed-off-by: Eric Sandeen sandeen@redhat.com Signed-off-by: Eric Sandeen sandeen@sandeen.net Reviewed-by: Darrick J. Wong darrick.wong@oracle.com Signed-off-by: Darrick J. Wong darrick.wong@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/iomap.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/fs/iomap.c b/fs/iomap.c index d6bc98ae8d35..ce837d962d47 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -492,16 +492,29 @@ iomap_readpages(struct address_space *mapping, struct list_head *pages, } EXPORT_SYMBOL_GPL(iomap_readpages);
+/* + * iomap_is_partially_uptodate checks whether blocks within a page are + * uptodate or not. + * + * Returns true if all blocks which correspond to a file portion + * we want to read within the page are uptodate. + */ int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count) { struct iomap_page *iop = to_iomap_page(page); struct inode *inode = page->mapping->host; - unsigned first = from >> inode->i_blkbits; - unsigned last = (from + count - 1) >> inode->i_blkbits; + unsigned len, first, last; unsigned i;
+ /* Limit range to one page */ + len = min_t(unsigned, PAGE_SIZE - from, count); + + /* First and last blocks in range within page */ + first = from >> inode->i_blkbits; + last = (from + len - 1) >> inode->i_blkbits; + if (iop) { for (i = first; i <= last; i++) if (!test_bit(i, iop->uptodate))
From: Ivan Mironov mironov.ivan@gmail.com
[ Upstream commit 38355a5f9a22bfa5bd5b1bb79805aca39fa53729 ]
This happened when I tried to boot normal Fedora 29 system with latest available kernel (from fedora rawhide, plus some unrelated custom patches):
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP PTI CPU: 6 PID: 1422 Comm: libvirtd Tainted: G I 4.20.0-0.rc7.git3.hpsa2.1.fc29.x86_64 #1 Hardware name: HP ProLiant BL460c G6, BIOS I24 05/21/2018 RIP: 0010: (null) Code: Bad RIP value. RSP: 0018:ffffa47ccdc9fbe0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000003e8 RCX: ffffa47ccdc9fbf8 RDX: ffffa47ccdc9fc00 RSI: ffff97d9ee7b01f8 RDI: ffff97d9f0150b80 RBP: ffff97d9f0150b80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000003 R13: ffff97d9ef1e53e8 R14: 0000000000000009 R15: ffff97d9f0ac6730 FS: 00007f4d224ef700(0000) GS:ffff97d9fa200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000011ece52006 CR4: 00000000000206e0 Call Trace: ? bnx2x_chip_cleanup+0x195/0x610 [bnx2x] ? bnx2x_nic_unload+0x1e2/0x8f0 [bnx2x] ? bnx2x_reload_if_running+0x24/0x40 [bnx2x] ? bnx2x_set_features+0x79/0xa0 [bnx2x] ? __netdev_update_features+0x244/0x9e0 ? netlink_broadcast_filtered+0x136/0x4b0 ? netdev_update_features+0x22/0x60 ? dev_disable_lro+0x1c/0xe0 ? devinet_sysctl_forward+0x1c6/0x211 ? proc_sys_call_handler+0xab/0x100 ? __vfs_write+0x36/0x1a0 ? rcu_read_lock_sched_held+0x79/0x80 ? rcu_sync_lockdep_assert+0x2e/0x60 ? __sb_start_write+0x14c/0x1b0 ? vfs_write+0x159/0x1c0 ? vfs_write+0xba/0x1c0 ? ksys_write+0x52/0xc0 ? do_syscall_64+0x60/0x1f0 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe
After some investigation I figured out that recently added cleanup code tries to call VLAN filtering de-initialization function which exist only for newer hardware. Corresponding function pointer is not set (== 0) for older hardware, namely these chips:
#define CHIP_NUM_57710 0x164e #define CHIP_NUM_57711 0x164f #define CHIP_NUM_57711E 0x1650
And I have one of those in my test system:
Broadcom Inc. and subsidiaries NetXtreme II BCM57711E 10-Gigabit PCIe [14e4:1650]
Function bnx2x_init_vlan_mac_fp_objs() from drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h decides whether to initialize relevant pointers in bnx2x_sp_objs.vlan_obj or not.
This regression was introduced after v4.20-rc7, and still exists in v4.20 release.
Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence.") Signed-off-by: Ivan Mironov mironov.ivan@gmail.com Signed-off-by: Ivan Mironov mironov.ivan@gmail.com Acked-by: Sudarsana Kalluru Sudarsana.Kalluru@cavium.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index b164f705709d..3b5b47e98c73 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9360,10 +9360,16 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: %d\n", rc);
- /* Remove all currently configured VLANs */ - rc = bnx2x_del_all_vlans(bp); - if (rc < 0) - BNX2X_ERR("Failed to delete all VLANs\n"); + /* The whole *vlan_obj structure may be not initialized if VLAN + * filtering offload is not supported by hardware. Currently this is + * true for all hardware covered by CHIP_IS_E1x(). + */ + if (!CHIP_IS_E1x(bp)) { + /* Remove all currently configured VLANs */ + rc = bnx2x_del_all_vlans(bp); + if (rc < 0) + BNX2X_ERR("Failed to delete all VLANs\n"); + }
/* Disable LLH */ if (!CHIP_IS_E1(bp))
From: Junxiao Bi junxiao.bi@oracle.com
[ Upstream commit 532e1e54c8140188e192348c790317921cb2dc1c ]
mount.ocfs2 ignore the inconsistent error that journal is clean but local alloc is unrecovered. After mount, local alloc not empty, then reserver cluster didn't alloc a new local alloc window, reserveration map is empty(ocfs2_reservation_map.m_bitmap_len = 0), that triggered the following panic.
This issue was reported at
https://oss.oracle.com/pipermail/ocfs2-devel/2015-May/010854.html
and was advised to fixed during mount. But this is a very unusual inconsistent state, usually journal dirty flag should be cleared at the last stage of umount until every other things go right. We may need do further debug to check that. Any way to avoid possible futher corruption, mount should be abort and fsck should be run.
(mount.ocfs2,1765,1):ocfs2_load_local_alloc:353 ERROR: Local alloc hasn't been recovered! found = 6518, set = 6518, taken = 8192, off = 15912372 ocfs2: Mounting device (202,64) on (node 0, slot 3) with ordered data mode. o2dlm: Joining domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 8 ) 8 nodes ocfs2: Mounting device (202,80) on (node 0, slot 3) with ordered data mode. o2hb: Region 89CEAC63CC4F4D03AC185B44E0EE0F3F (xvdf) is now a quorum device o2net: Accepted connection from node yvwsoa17p (num 7) at 172.22.77.88:7777 o2dlm: Node 7 joins domain 64FE421C8C984E6D96ED12C55FEE2435 ( 0 1 2 3 4 5 6 7 8 ) 9 nodes o2dlm: Node 7 joins domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 7 8 ) 9 nodes ------------[ cut here ]------------ kernel BUG at fs/ocfs2/reservations.c:507! invalid opcode: 0000 [#1] SMP Modules linked in: ocfs2 rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd grace ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sunrpc ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 ovmapi ppdev parport_pc parport xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea acpi_cpufreq pcspkr i2c_piix4 i2c_core sg ext4 jbd2 mbcache2 sr_mod cdrom xen_blkfront pata_acpi ata_generic ata_piix floppy dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 4349 Comm: startWebLogic.s Not tainted 4.1.12-124.19.2.el6uek.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 09/06/2018 task: ffff8803fb04e200 ti: ffff8800ea4d8000 task.ti: ffff8800ea4d8000 RIP: 0010:[<ffffffffa05e96a8>] [<ffffffffa05e96a8>] __ocfs2_resv_find_window+0x498/0x760 [ocfs2] Call Trace: ocfs2_resmap_resv_bits+0x10d/0x400 [ocfs2] ocfs2_claim_local_alloc_bits+0xd0/0x640 [ocfs2] __ocfs2_claim_clusters+0x178/0x360 [ocfs2] ocfs2_claim_clusters+0x1f/0x30 [ocfs2] ocfs2_convert_inline_data_to_extents+0x634/0xa60 [ocfs2] ocfs2_write_begin_nolock+0x1c6/0x1da0 [ocfs2] ocfs2_write_begin+0x13e/0x230 [ocfs2] generic_perform_write+0xbf/0x1c0 __generic_file_write_iter+0x19c/0x1d0 ocfs2_file_write_iter+0x589/0x1360 [ocfs2] __vfs_write+0xb8/0x110 vfs_write+0xa9/0x1b0 SyS_write+0x46/0xb0 system_call_fastpath+0x18/0xd7 Code: ff ff 8b 75 b8 39 75 b0 8b 45 c8 89 45 98 0f 84 e5 fe ff ff 45 8b 74 24 18 41 8b 54 24 1c e9 56 fc ff ff 85 c0 0f 85 48 ff ff ff <0f> 0b 48 8b 05 cf c3 de ff 48 ba 00 00 00 00 00 00 00 10 48 85 RIP __ocfs2_resv_find_window+0x498/0x760 [ocfs2] RSP <ffff8800ea4db668> ---[ end trace 566f07529f2edf3c ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled
Link: http://lkml.kernel.org/r/20181121020023.3034-2-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi junxiao.bi@oracle.com Reviewed-by: Yiwen Jiang jiangyiwen@huawei.com Acked-by: Joseph Qi jiangqi903@gmail.com Cc: Jun Piao piaojun@huawei.com Cc: Mark Fasheh mfasheh@versity.com Cc: Joel Becker jlbec@evilplan.org Cc: Changwei Ge ge.changwei@h3c.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ocfs2/localalloc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 7642b6712c39..30208233f65b 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -345,13 +345,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) if (num_used || alloc->id1.bitmap1.i_used || alloc->id1.bitmap1.i_total - || la->la_bm_off) - mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" + || la->la_bm_off) { + mlog(ML_ERROR, "inconsistent detected, clean journal with" + " unrecovered local alloc, please run fsck.ocfs2!\n" "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ status = -EINVAL; + goto bail; + } + osb->local_alloc_bh = alloc_bh; osb->local_alloc_state = OCFS2_LA_ENABLED;
From: Brian Foster bfoster@redhat.com
[ Upstream commit 3fa750dcf29e8606e3969d13d8e188cc1c0f511d ]
write_cache_pages() is used in both background and integrity writeback scenarios by various filesystems. Background writeback is mostly concerned with cleaning a certain number of dirty pages based on various mm heuristics. It may not write the full set of dirty pages or wait for I/O to complete. Integrity writeback is responsible for persisting a set of dirty pages before the writeback job completes. For example, an fsync() call must perform integrity writeback to ensure data is on disk before the call returns.
write_cache_pages() unconditionally breaks out of its processing loop in the event of a ->writepage() error. This is fine for background writeback, which had no strict requirements and will eventually come around again. This can cause problems for integrity writeback on filesystems that might need to clean up state associated with failed page writeouts. For example, XFS performs internal delayed allocation accounting before returning a ->writepage() error, where applicable. If the current writeback happens to be associated with an unmount and write_cache_pages() completes the writeback prematurely due to error, the filesystem is unmounted in an inconsistent state if dirty+delalloc pages still exist.
To handle this problem, update write_cache_pages() to always process the full set of pages for integrity writeback regardless of ->writepage() errors. Save the first encountered error and return it to the caller once complete. This facilitates XFS (or any other fs that expects integrity writeback to process the entire set of dirty pages) to clean up its internal state completely in the event of persistent mapping errors. Background writeback continues to exit on the first error encountered.
[akpm@linux-foundation.org: fix typo in comment] Link: http://lkml.kernel.org/r/20181116134304.32440-1-bfoster@redhat.com Signed-off-by: Brian Foster bfoster@redhat.com Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/page-writeback.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3f690bae6b78..7d1010453fb9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2154,6 +2154,7 @@ int write_cache_pages(struct address_space *mapping, { int ret = 0; int done = 0; + int error; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); @@ -2227,25 +2228,31 @@ int write_cache_pages(struct address_space *mapping, goto continue_unlock;
trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); - ret = (*writepage)(page, wbc, data); - if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { + error = (*writepage)(page, wbc, data); + if (unlikely(error)) { + /* + * Handle errors according to the type of + * writeback. There's no need to continue for + * background writeback. Just push done_index + * past this page so media errors won't choke + * writeout for the entire file. For integrity + * writeback, we must process the entire dirty + * set regardless of errors because the fs may + * still have state to clear for each page. In + * that case we continue processing and return + * the first error. + */ + if (error == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); - ret = 0; - } else { - /* - * done_index is set past this page, - * so media errors will not choke - * background writeout for the entire - * file. This has consequences for - * range_cyclic semantics (ie. it may - * not be suitable for data integrity - * writeout). - */ + error = 0; + } else if (wbc->sync_mode != WB_SYNC_ALL) { + ret = error; done_index = page->index + 1; done = 1; break; } + if (!ret) + ret = error; }
/*
From: Aaron Lu aaron.lu@intel.com
[ Upstream commit 66f71da9dd38af17dc17209cdde7987d4679a699 ]
Since a2468cc9bfdf ("swap: choose swap device according to numa node"), avail_lists field of swap_info_struct is changed to an array with MAX_NUMNODES elements. This made swap_info_struct size increased to 40KiB and needs an order-4 page to hold it.
This is not optimal in that: 1 Most systems have way less than MAX_NUMNODES(1024) nodes so it is a waste of memory; 2 It could cause swapon failure if the swap device is swapped on after system has been running for a while, due to no order-4 page is available as pointed out by Vasily Averin.
Solve the above two issues by using nr_node_ids(which is the actual possible node number the running system has) for avail_lists instead of MAX_NUMNODES.
nr_node_ids is unknown at compile time so can't be directly used when declaring this array. What I did here is to declare avail_lists as zero element array and allocate space for it when allocating space for swap_info_struct. The reason why keep using array but not pointer is plist_for_each_entry needs the field to be part of the struct, so pointer will not work.
This patch is on top of Vasily Averin's fix commit. I think the use of kvzalloc for swap_info_struct is still needed in case nr_node_ids is really big on some systems.
Link: http://lkml.kernel.org/r/20181115083847.GA11129@intel.com Signed-off-by: Aaron Lu aaron.lu@intel.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Acked-by: Michal Hocko mhocko@suse.com Cc: Vasily Averin vvs@virtuozzo.com Cc: Huang Ying ying.huang@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/swap.h | 11 ++++++++++- mm/swapfile.c | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h index d8a07a4f171d..3d3630b3f63d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -233,7 +233,6 @@ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ - struct plist_node avail_lists[MAX_NUMNODES];/* entry in swap_avail_heads */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ @@ -274,6 +273,16 @@ struct swap_info_struct { */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ + struct plist_node avail_lists[0]; /* + * entries in swap_avail_heads, one + * entry per node. + * Must be last as the number of the + * array is nr_node_ids, which is not + * a fixed value so have to allocate + * dynamically. + * And it has to be an array so that + * plist_for_each_* can work. + */ };
#ifdef CONFIG_64BIT diff --git a/mm/swapfile.c b/mm/swapfile.c index 8688ae65ef58..6e06821623f6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2812,8 +2812,9 @@ static struct swap_info_struct *alloc_swap_info(void) struct swap_info_struct *p; unsigned int type; int i; + int size = sizeof(*p) + nr_node_ids * sizeof(struct plist_node);
- p = kvzalloc(sizeof(*p), GFP_KERNEL); + p = kvzalloc(size, GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM);
From: Peter Xu peterx@redhat.com
[ Upstream commit 3cfd22be0ad663248fadfc8f6ffa3e255c394552 ]
When the process being tracked does mremap() without UFFD_FEATURE_EVENT_REMAP on the corresponding tracking uffd file handle, we should not generate the remap event, and at the same time we should clear all the uffd flags on the new VMA. Without this patch, we can still have the VM_UFFD_MISSING|VM_UFFD_WP flags on the new VMA even the fault handling process does not even know the existance of the VMA.
Link: http://lkml.kernel.org/r/20181211053409.20317-1-peterx@redhat.com Signed-off-by: Peter Xu peterx@redhat.com Reviewed-by: Andrea Arcangeli aarcange@redhat.com Acked-by: Mike Rapoport rppt@linux.vnet.ibm.com Reviewed-by: William Kucharski william.kucharski@oracle.com Cc: Andrea Arcangeli aarcange@redhat.com Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: Kirill A. Shutemov kirill@shutemov.name Cc: Hugh Dickins hughd@google.com Cc: Pavel Emelyanov xemul@virtuozzo.com Cc: Pravin Shedge pravin.shedge4linux@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/userfaultfd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 7a85e609fc27..d8b8323e80f4 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -736,10 +736,18 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct userfaultfd_ctx *ctx;
ctx = vma->vm_userfaultfd_ctx.ctx; - if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) { + + if (!ctx) + return; + + if (ctx->features & UFFD_FEATURE_EVENT_REMAP) { vm_ctx->ctx = ctx; userfaultfd_ctx_get(ctx); WRITE_ONCE(ctx->mmap_changing, true); + } else { + /* Drop uffd context if remap feature not enabled */ + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); } }
From: Michal Hocko mhocko@suse.com
[ Upstream commit 7550c6079846a24f30d15ac75a941c8515dbedfb ]
Patch series "THP eligibility reporting via proc".
This series of three patches aims at making THP eligibility reporting much more robust and long term sustainable. The trigger for the change is a regression report [2] and the long follow up discussion. In short the specific application didn't have good API to query whether a particular mapping can be backed by THP so it has used VMA flags to workaround that. These flags represent a deep internal state of VMAs and as such they should be used by userspace with a great deal of caution.
A similar has happened for [3] when users complained that VM_MIXEDMAP is no longer set on DAX mappings. Again a lack of a proper API led to an abuse.
The first patch in the series tries to emphasise that that the semantic of flags might change and any application consuming those should be really careful.
The remaining two patches provide a more suitable interface to address [2] and provide a consistent API to query the THP status both for each VMA and process wide as well. [1]
http://lkml.kernel.org/r/20181120103515.25280-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/http://lkml.kernel.org/r/alpine.DEB.2.21.1809241054... [3] http://lkml.kernel.org/r/20181002100531.GC4135@quack2.suse.cz
This patch (of 3):
Even though vma flags exported via /proc/<pid>/smaps are explicitly documented to be not guaranteed for future compatibility the warning doesn't go far enough because it doesn't mention semantic changes to those flags. And they are important as well because these flags are a deep implementation internal to the MM code and the semantic might change at any time.
Let's consider two recent examples: http://lkml.kernel.org/r/20181002100531.GC4135@quack2.suse.cz : commit e1fb4a086495 "dax: remove VM_MIXEDMAP for fsdax and device dax" has : removed VM_MIXEDMAP flag from DAX VMAs. Now our testing shows that in the : mean time certain customer of ours started poking into /proc/<pid>/smaps : and looks at VMA flags there and if VM_MIXEDMAP is missing among the VMA : flags, the application just fails to start complaining that DAX support is : missing in the kernel.
http://lkml.kernel.org/r/alpine.DEB.2.21.1809241054050.224429@chino.kir.corp... : Commit 1860033237d4 ("mm: make PR_SET_THP_DISABLE immediately active") : introduced a regression in that userspace cannot always determine the set : of vmas where thp is ineligible. : Userspace relies on the "nh" flag being emitted as part of /proc/pid/smaps : to determine if a vma is eligible to be backed by hugepages. : Previous to this commit, prctl(PR_SET_THP_DISABLE, 1) would cause thp to : be disabled and emit "nh" as a flag for the corresponding vmas as part of : /proc/pid/smaps. After the commit, thp is disabled by means of an mm : flag and "nh" is not emitted. : This causes smaps parsing libraries to assume a vma is eligible for thp : and ends up puzzling the user on why its memory is not backed by thp.
In both cases userspace was relying on a semantic of a specific VMA flag. The primary reason why that happened is a lack of a proper interface. While this has been worked on and it will be fixed properly, it seems that our wording could see some refinement and be more vocal about semantic aspect of these flags as well.
Link: http://lkml.kernel.org/r/20181211143641.3503-2-mhocko@kernel.org Signed-off-by: Michal Hocko mhocko@suse.com Acked-by: Jan Kara jack@suse.cz Acked-by: Dan Williams dan.j.williams@intel.com Acked-by: David Rientjes rientjes@google.com Acked-by: Mike Rapoport rppt@linux.ibm.com Acked-by: Vlastimil Babka vbabka@suse.cz Cc: Dan Williams dan.j.williams@intel.com Cc: David Rientjes rientjes@google.com Cc: Paul Oppenheimer bepvte@gmail.com Cc: William Kucharski william.kucharski@oracle.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/filesystems/proc.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 12a5e6e693b6..2a4e63f5122c 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -496,7 +496,9 @@ manner. The codes are the following:
Note that there is no guarantee that every flag and associated mnemonic will be present in all further kernel releases. Things get changed, the flags may -be vanished or the reverse -- new added. +be vanished or the reverse -- new added. Interpretation of their meaning +might change in future as well. So each consumer of these flags has to +follow each specific kernel version for the exact semantic.
This file is only present if the CONFIG_MMU kernel configuration option is enabled.
From: Qian Cai cai@gmx.us
[ Upstream commit fed84c78527009d4f799a3ed9a566502fa026d82 ]
Kmemleak does not play well with KASAN (tested on both HPE Apollo 70 and Huawei TaiShan 2280 aarch64 servers).
After calling start_kernel()->setup_arch()->kasan_init(), kmemleak early log buffer went from something like 280 to 260000 which caused kmemleak disabled and crash dump memory reservation failed. The multitude of kmemleak_alloc() calls is from nested loops while KASAN is setting up full memory mappings, so let early kmemleak allocations skip those memblock_alloc_internal() calls came from kasan_init() given that those early KASAN memory mappings should not reference to other memory. Hence, no kmemleak false positives.
kasan_init kasan_map_populate [1] kasan_pgd_populate [2] kasan_pud_populate [3] kasan_pmd_populate [4] kasan_pte_populate [5] kasan_alloc_zeroed_page memblock_alloc_try_nid memblock_alloc_internal kmemleak_alloc
[1] for_each_memblock(memory, reg) [2] while (pgdp++, addr = next, addr != end) [3] while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp))) [4] while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp))) [5] while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)))
Link: http://lkml.kernel.org/r/1543442925-17794-1-git-send-email-cai@gmx.us Signed-off-by: Qian Cai cai@gmx.us Acked-by: Catalin Marinas catalin.marinas@arm.com Cc: Michal Hocko mhocko@suse.com Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: Alexander Potapenko glider@google.com Cc: Dmitry Vyukov dvyukov@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/mm/kasan_init.c | 2 +- include/linux/memblock.h | 1 + mm/memblock.c | 19 +++++++++++-------- 3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 63527e585aac..fcb2ca30b6f1 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -39,7 +39,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) { void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_ACCESSIBLE, node); + MEMBLOCK_ALLOC_KASAN, node); return __pa(p); }
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index aee299a6aa76..3ef3086ed52f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -320,6 +320,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 +#define MEMBLOCK_ALLOC_KASAN 1
/* We are using top down, so it is safe to use 0 here */ #define MEMBLOCK_LOW_LIMIT 0 diff --git a/mm/memblock.c b/mm/memblock.c index 81ae63ca78d0..f45a049532fe 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -262,7 +262,8 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, phys_addr_t kernel_end, ret;
/* pump up @end */ - if (end == MEMBLOCK_ALLOC_ACCESSIBLE) + if (end == MEMBLOCK_ALLOC_ACCESSIBLE || + end == MEMBLOCK_ALLOC_KASAN) end = memblock.current_limit;
/* avoid allocating the first page */ @@ -1412,13 +1413,15 @@ static void * __init memblock_alloc_internal( done: ptr = phys_to_virt(alloc);
- /* - * The min_count is set to 0 so that bootmem allocated blocks - * are never reported as leaks. This is because many of these blocks - * are only referred via the physical address which is not - * looked up by kmemleak. - */ - kmemleak_alloc(ptr, size, 0, 0); + /* Skip kmemleak for kasan_init() due to high volume. */ + if (max_addr != MEMBLOCK_ALLOC_KASAN) + /* + * The min_count is set to 0 so that bootmem allocated + * blocks are never reported as leaks. This is because many + * of these blocks are only referred via the physical + * address which is not looked up by kmemleak. + */ + kmemleak_alloc(ptr, size, 0, 0);
return ptr; }
linux-stable-mirror@lists.linaro.org