From: Sven Van Asbroeck thesven73@gmail.com
[ Upstream commit 62039b6aef63380ba7a37c113bbaeee8a55c5342 ]
When cancel_delayed_work() returns, the delayed work may still be running. This means that the core could potentially free the private structure (struct xadc) while the delayed work is still using it. This is a potential use-after-free.
Fix by calling cancel_delayed_work_sync(), which waits for any residual work to finish before returning.
Signed-off-by: Sven Van Asbroeck TheSven73@gmail.com Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 4a60497a1f19..e89711b30ae8 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1302,7 +1302,7 @@ static int xadc_remove(struct platform_device *pdev) } free_irq(irq, indio_dev); clk_disable_unprepare(xadc->clk); - cancel_delayed_work(&xadc->zynq_unmask_work); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); kfree(xadc->data); kfree(indio_dev->channels);
From: Kangjie Lu kjlu@umn.edu
[ Upstream commit 55c1fc0af29a6c1b92f217b7eb7581a882e0c07c ]
In case kmemdup fails, the fix goes to blk_err to avoid NULL pointer dereference.
Signed-off-by: Kangjie Lu kjlu@umn.edu Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/namespace_devs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 50b01d3eadd9..e3f228af59d1 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2234,9 +2234,12 @@ struct device *create_namespace_blk(struct nd_region *nd_region, if (!nsblk->uuid) goto blk_err; memcpy(name, nd_label->name, NSLABEL_NAME_LEN); - if (name[0]) + if (name[0]) { nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, GFP_KERNEL); + if (!nsblk->alt_name) + goto blk_err; + } res = nsblk_add_resource(nd_region, ndd, nsblk, __le64_to_cpu(nd_label->dpa)); if (!res)
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit 96dd86871e1fffbc39e4fa61c9c75ec54ee9af0f ]
According to HUTRR77 usage 0x29f from the consumer page is reserved for the Desktop application to present all running user’s application windows. Linux defines KEY_SCALE to request Compiz Scale (Expose) mode, so let's add the mapping.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-input.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d146a9b545ee..35422c419f52 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1016,6 +1016,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break; case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break;
+ case 0x29f: map_key_clear(KEY_SCALE); break; + default: map_key_clear(KEY_UNKNOWN); } break;
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit 7975a1d6a7afeb3eb61c971a153d24dd8fa032f3 ]
According to HUTRR73 usages 0x79, 0x7a and 0x7c from the consumer page correspond to Brightness Up/Down/Toggle keys, so let's add the mappings.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-input.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 35422c419f52..9eeac82e9542 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -886,6 +886,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break;
+ case 0x079: map_key_clear(KEY_KBDILLUMUP); break; + case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break; + case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break; + case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break;
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit c01908a14bf735b871170092807c618bb9dae654 ]
According to HUT 1.12 usage 0xb5 from the generic desktop page is reserved for switching between external and internal display, so let's add the mapping.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-input.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 9eeac82e9542..2597b0eddd64 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -677,6 +677,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel break; }
+ if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */ + switch (usage->hid & 0xf) { + case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break; + default: goto ignore; + } + break; + } + /* * Some lazy vendors declare 255 usages for System Control, * leading to the creation of ABS_X|Y axis and too many others.
From: Aditya Pakki pakki001@umn.edu
[ Upstream commit 486fa92df4707b5df58d6508728bdb9321a59766 ]
In case kmemdup fails, the fix releases resources and returns to avoid the NULL pointer dereference.
Signed-off-by: Aditya Pakki pakki001@umn.edu Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/btt_devs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index d58925295aa7..e610dd890263 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -190,14 +190,15 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL); - if (nd_btt->id < 0) { - kfree(nd_btt); - return NULL; - } + if (nd_btt->id < 0) + goto out_nd_btt;
nd_btt->lbasize = lbasize; - if (uuid) + if (uuid) { uuid = kmemdup(uuid, 16, GFP_KERNEL); + if (!uuid) + goto out_put_id; + } nd_btt->uuid = uuid; dev = &nd_btt->dev; dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id); @@ -212,6 +213,13 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL; } return dev; + +out_put_id: + ida_simple_remove(&nd_region->btt_ida, nd_btt->id); + +out_nd_btt: + kfree(nd_btt); + return NULL; }
struct device *nd_btt_create(struct nd_region *nd_region)
From: Peter Oberparleiter oberpar@linux.ibm.com
[ Upstream commit 2cc9637ce825f3a9f51f8f78af7474e9e85bfa5f ]
The DASD driver incorrectly limits the maximum number of blocks of ECKD DASD volumes to 32 bit numbers. Volumes with a capacity greater than 2^32-1 blocks are incorrectly recognized as smaller volumes.
This results in the following volume capacity limits depending on the formatted block size:
BLKSIZE MAX_GB MAX_CYL 512 2047 5843492 1024 4095 8676701 2048 8191 13634816 4096 16383 23860929
The same problem occurs when a volume with more than 17895697 cylinders is accessed in raw-track-access mode.
Fix this problem by adding an explicit type cast when calculating the maximum number of blocks.
Signed-off-by: Peter Oberparleiter oberpar@linux.ibm.com Reviewed-by: Stefan Haberland sth@linux.ibm.com Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/block/dasd_eckd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0a1e7f9b5239..0d5e2d92e05b 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2001,14 +2001,14 @@ static int dasd_eckd_end_analysis(struct dasd_block *block) blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block);
raw: - block->blocks = (private->real_cyl * + block->blocks = ((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk);
dev_info(&device->cdev->dev, - "DASD with %d KB/block, %d KB total size, %d KB/track, " + "DASD with %u KB/block, %lu KB total size, %u KB/track, " "%s\n", (block->bp_block >> 10), - ((private->real_cyl * + (((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk * (block->bp_block >> 9)) >> 1), ((blk_per_trk * block->bp_block) >> 10),
From: Felix Fietkau nbd@nbd.name
[ Upstream commit 40586e3fc400c00c11151804dcdc93f8c831c808 ]
The pointer to the last four bytes of the address is not guaranteed to be aligned, so we need to use __get_unaligned_cpu32 here
Signed-off-by: Felix Fietkau nbd@nbd.name Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 1ce068865629..130022091205 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); }
static const struct rhashtable_params mesh_rht_params = {
From: Andrei Otcheretianski andrei.otcheretianski@intel.com
[ Upstream commit 78be2d21cc1cd3069c6138dcfecec62583130171 ]
Looks that 100 chars isn't enough for messages, as we keep getting warnings popping from different places due to message shortening. Instead of trying to shorten the prints, just increase the buffer size.
Signed-off-by: Andrei Otcheretianski andrei.otcheretianski@intel.com Signed-off-by: Luca Coelho luciano.coelho@intel.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/trace_msg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120
DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf),
From: Felix Fietkau nbd@nbd.name
[ Upstream commit eb9b64e3a9f8483e6e54f4e03b2ae14ae5db2690 ]
skb->truesize can change due to memory reallocation or when adding extra fragments. Adjust fq->memory_usage accordingly
Signed-off-by: Felix Fietkau nbd@nbd.name Acked-by: Toke Høiland-Jørgensen toke@redhat.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 305a4655f23e..09c7aa519ca8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3125,6 +3125,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3158,6 +3159,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head) goto out;
+ orig_truesize = head->truesize; orig_len = head->len;
if (skb->len + head->len > max_amsdu_len) @@ -3212,6 +3214,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb;
out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len;
From: Sunil Dutt usdutt@codeaurora.org
[ Upstream commit d6db02a88a4aaa1cd7105137c67ddec7f3bdbc05 ]
This commit adds NL80211_FLAG_CLEAR_SKB flag to other NL commands that carry key data to ensure they do not stick around on heap after the SKB is freed.
Also introduced this flag for NL80211_CMD_VENDOR as there are sub commands which configure the keys.
Signed-off-by: Sunil Dutt usdutt@codeaurora.org Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/wireless/nl80211.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 46e9812d13c0..c1a2ad050e61 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12761,7 +12761,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -12812,7 +12813,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -12820,7 +12822,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -12849,7 +12852,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -13201,7 +13205,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -13256,7 +13261,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK,
From: Anson Huang anson.huang@nxp.com
[ Upstream commit bf2a7ca39fd3ab47ef71c621a7ee69d1813b1f97 ]
SNVS IRQ is requested before necessary driver data initialized, if there is a pending IRQ during driver probe phase, kernel NULL pointer panic will occur in IRQ handler. To avoid such scenario, just initialize necessary driver data before enabling IRQ. This patch is inspired by NXP's internal kernel tree.
Fixes: d3dc6e232215 ("input: keyboard: imx: add snvs power key driver") Signed-off-by: Anson Huang Anson.Huang@nxp.com Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/input/keyboard/snvs_pwrkey.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index 7544888c4749..b8dbde746b4e 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -156,6 +156,9 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; }
+ pdata->input = input; + platform_set_drvdata(pdev, pdata); + error = devm_request_irq(&pdev->dev, pdata->irq, imx_snvs_pwrkey_interrupt, 0, pdev->name, pdev); @@ -171,9 +174,6 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; }
- pdata->input = input; - platform_set_drvdata(pdev, pdata); - device_init_wakeup(&pdev->dev, pdata->wakeup);
return 0;
From: Martin Schwidefsky schwidefsky@de.ibm.com
[ Upstream commit 5712f3301a12c0c3de9cc423484496b0464f2faf ]
The spinlock in the raw3270_view structure is used by con3270, tty3270 and fs3270 in different ways. For con3270 the lock can be acquired in irq context, for tty3270 and fs3270 the highest context is bh.
Lockdep sees the view->lock as a single class and if the 3270 driver is used for the console the following message is generated:
WARNING: inconsistent lock state 5.1.0-rc3-05157-g5c168033979d #12 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. swapper/0/1 [HC0[0]:SC1[1]:HE1:SE0] takes: (____ptrval____) (&(&view->lock)->rlock){?.-.}, at: tty3270_update+0x7c/0x330
Introduce a lockdep subclass for the view lock to distinguish bh from irq locks.
Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com
Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/char/con3270.c | 2 +- drivers/s390/char/fs3270.c | 3 ++- drivers/s390/char/raw3270.c | 3 ++- drivers/s390/char/raw3270.h | 4 +++- drivers/s390/char/tty3270.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index be3e3c1206c2..1868ff803f43 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -629,7 +629,7 @@ con3270_init(void) (void (*)(unsigned long)) con3270_read_tasklet, (unsigned long) condev->read);
- raw3270_add_view(&condev->view, &con3270_fn, 1); + raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
INIT_LIST_HEAD(&condev->freemem); for (i = 0; i < CON3270_STRING_PAGES; i++) { diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index c4518168fd02..4f73a38c7cbd 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -463,7 +463,8 @@ fs3270_open(struct inode *inode, struct file *filp)
init_waitqueue_head(&fp->wait); fp->fs_pid = get_pid(task_pid(current)); - rc = raw3270_add_view(&fp->view, &fs3270_fn, minor); + rc = raw3270_add_view(&fp->view, &fs3270_fn, minor, + RAW3270_VIEW_LOCK_BH); if (rc) { fs3270_free_view(&fp->view); goto out; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 5d4f053d7c38..0f47fec35acc 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -919,7 +919,7 @@ raw3270_deactivate_view(struct raw3270_view *view) * Add view to device with minor "minor". */ int -raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) +raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass) { unsigned long flags; struct raw3270 *rp; @@ -941,6 +941,7 @@ raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) view->cols = rp->cols; view->ascebc = rp->ascebc; spin_lock_init(&view->lock); + lockdep_set_subclass(&view->lock, subclass); list_add(&view->list, &rp->view_list); rc = 0; spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 114ca7cbf889..3afaa35f7351 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -150,6 +150,8 @@ struct raw3270_fn { struct raw3270_view { struct list_head list; spinlock_t lock; +#define RAW3270_VIEW_LOCK_IRQ 0 +#define RAW3270_VIEW_LOCK_BH 1 atomic_t ref_count; struct raw3270 *dev; struct raw3270_fn *fn; @@ -158,7 +160,7 @@ struct raw3270_view { unsigned char *ascebc; /* ascii -> ebcdic table */ };
-int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int); +int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int); int raw3270_activate_view(struct raw3270_view *); void raw3270_del_view(struct raw3270_view *); void raw3270_deactivate_view(struct raw3270_view *); diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index e5ebe2fbee23..401688bf8fd3 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -978,7 +978,8 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) return PTR_ERR(tp);
rc = raw3270_add_view(&tp->view, &tty3270_fn, - tty->index + RAW3270_FIRSTMINOR); + tty->index + RAW3270_FIRSTMINOR, + RAW3270_VIEW_LOCK_BH); if (rc) { tty3270_free_view(tp); return rc;
From: Neil Armstrong narmstrong@baylibre.com
[ Upstream commit fbc87aa0f7c429999dc31f1bac3b2615008cac32 ]
The OX820 compatible is wrong is the driver, fix it.
Fixes: 2ea3401e2a84 ("clocksource/drivers/oxnas: Add OX820 compatible") Reported-by: Daniel Golle daniel@makrotopia.org Signed-off-by: Neil Armstrong narmstrong@baylibre.com Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clocksource/timer-oxnas-rps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index eed6feff8b5f..30c6f4ce672b 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -296,4 +296,4 @@ static int __init oxnas_rps_timer_init(struct device_node *np) TIMER_OF_DECLARE(ox810se_rps, "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init); TIMER_OF_DECLARE(ox820_rps, - "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init); + "oxsemi,ox820-rps-timer", oxnas_rps_timer_init);
From: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp
[ Upstream commit 238ffdc49ef98b15819cfd5e3fb23194e3ea3d39 ]
KMSAN will complain if valid address length passed to bind() is shorter than sizeof("struct sockaddr_mISDN"->family) bytes.
Signed-off-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/isdn/mISDN/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index c5603d1a07d6..65cb4aac8dce 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -712,10 +712,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; int err = 0;
- if (!maddr || maddr->family != AF_ISDN) + if (addr_len < sizeof(struct sockaddr_mISDN)) return -EINVAL;
- if (addr_len < sizeof(struct sockaddr_mISDN)) + if (!maddr || maddr->family != AF_ISDN) return -EINVAL;
lock_sock(sk);
From: Harald Freudenberger freude@linux.ibm.com
[ Upstream commit 6b1f16ba730d4c0cda1247568c3a1bf4fa3a2f2f ]
The debug feature entries have been used with up to 5 arguents (including the pointer to the format string) but there was only space reserved for 4 arguemnts. So now the registration does reserve space for 5 times a long value.
This fixes a sometime appearing weired value as the last value of an debug feature entry like this:
... pkey_sec2protkey zcrypt_send_cprb (cardnr=10 domain=12) failed with errno -2143346254
Signed-off-by: Harald Freudenberger freude@linux.ibm.com Reported-by: Christian Rund Christian.Rund@de.ibm.com Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/crypto/pkey_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index f61fa47135a6..bd0376dc7e1e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -49,7 +49,8 @@ static debug_info_t *debug_info;
static void __init pkey_debug_init(void) { - debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long)); + /* 5 arguments per dbf entry (including the format string ptr) */ + debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long)); debug_register_view(debug_info, &debug_sprintf_view); debug_set_level(debug_info, 3); }
From: Jian-Hong Pan jian-hong@endlessm.com
[ Upstream commit 0082517fa4bce073e7cf542633439f26538a14cc ]
Upon reboot, the Acer TravelMate X514-51T laptop appears to complete the shutdown process, but then it hangs in BIOS POST with a black screen.
The problem is intermittent - at some points it has appeared related to Secure Boot settings or different kernel builds, but ultimately we have not been able to identify the exact conditions that trigger the issue to come and go.
Besides, the EFI mode cannot be disabled in the BIOS of this model.
However, after extensive testing, we observe that using the EFI reboot method reliably avoids the issue in all cases.
So add a boot time quirk to use EFI reboot on such systems.
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=203119 Signed-off-by: Jian-Hong Pan jian-hong@endlessm.com Signed-off-by: Daniel Drake drake@endlessm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Borislav Petkov bp@alien8.de Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Matt Fleming matt@codeblueprint.co.uk Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Cc: linux@endlessm.com Link: http://lkml.kernel.org/r/20190412080152.3718-1-jian-hong@endlessm.com [ Fix !CONFIG_EFI build failure, clarify the code and the changelog a bit. ] Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++ include/linux/efi.h | 7 ++++++- 2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2126b9d27c34..c663d5fcff2e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; }
+/* + * Some machines don't handle the default ACPI reboot method and + * require the EFI reboot method: + */ +static int __init set_efi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) { + reboot_type = BOOT_EFI; + pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident); + } + return 0; +} + void __noreturn machine_real_restart(unsigned int type) { local_irq_disable(); @@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), }, }, + { /* Handle reboot issue on Acer TravelMate X514-51T */ + .callback = set_efi_reboot, + .ident = "Acer TravelMate X514-51T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"), + }, + },
/* Apple */ { /* Handle problems with rebooting on Apple MacBook5 */ diff --git a/include/linux/efi.h b/include/linux/efi.h index b68b7d199fee..2dab158b74c4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1518,7 +1518,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size);
-bool efi_runtime_disabled(void); +#ifdef CONFIG_EFI +extern bool efi_runtime_disabled(void); +#else +static inline bool efi_runtime_disabled(void) { return true; } +#endif + extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
enum efi_secureboot_mode {
From: Paolo Bonzini pbonzini@redhat.com
[ Upstream commit 1d487e9bf8ba66a7174c56a0029c54b1eca8f99c ]
These were found with smatch, and then generalized when applicable.
Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/lapic.c | 4 +++- include/linux/kvm_host.h | 10 ++++++---- virt/kvm/irqchip.c | 5 +++-- virt/kvm/kvm_main.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f7c34184342a..053e4937af0c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -133,6 +133,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, if (offset <= max_apic_id) { u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+ offset = array_index_nospec(offset, map->max_apic_id + 1); *cluster = &map->phys_map[offset]; *mask = dest_id & (0xffff >> (16 - cluster_size)); } else { @@ -829,7 +830,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (irq->dest_id > map->max_apic_id) { *bitmap = 0; } else { - *dst = &map->phys_map[irq->dest_id]; + u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); + *dst = &map->phys_map[dest_id]; *bitmap = 1; } return true; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 753c16633bac..026615e242d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -27,6 +27,7 @@ #include <linux/irqbypass.h> #include <linux/swait.h> #include <linux/refcount.h> +#include <linux/nospec.h> #include <asm/signal.h>
#include <linux/kvm.h> @@ -483,10 +484,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { - /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case - * the caller has read kvm->online_vcpus before (as is the case - * for kvm_for_each_vcpu, for example). - */ + int num_vcpus = atomic_read(&kvm->online_vcpus); + i = array_index_nospec(i, num_vcpus); + + /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); return kvm->vcpus[i]; } @@ -570,6 +571,7 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { + as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..0bd0683640bd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm, { struct kvm_kernel_irq_routing_entry *ei; int r; + u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
/* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and non-irqchip routing. */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[gsi], link) if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return -EINVAL;
- e->gsi = ue->gsi; + e->gsi = gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a373c60ef1c0..b91716b1b428 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2886,12 +2886,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_device_ops *ops = NULL; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int type; int ret;
if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) return -ENODEV;
- ops = kvm_device_ops_table[cd->type]; + type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); + ops = kvm_device_ops_table[type]; if (ops == NULL) return -ENODEV;
@@ -2906,7 +2908,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, dev->kvm = kvm;
mutex_lock(&kvm->lock); - ret = ops->create(dev, cd->type); + ret = ops->create(dev, type); if (ret < 0) { mutex_unlock(&kvm->lock); kfree(dev);
From: Vitaly Kuznetsov vkuznets@redhat.com
[ Upstream commit 7a223e06b1a411cef6c4cd7a9b9a33c8d225b10e ]
In __apic_accept_irq() interface trig_mode is int and actually on some code paths it is set above u8:
kvm_apic_set_irq() extracts it from 'struct kvm_lapic_irq' where trig_mode is u16. This is done on purpose as e.g. kvm_set_msi_irq() sets it to (1 << 15) & e->msi.data
kvm_apic_local_deliver sets it to reg & (1 << 15).
Fix the immediate issue by making 'tm' into u16. We may also want to adjust __apic_accept_irq() interface and use proper sizes for vector, level, trig_mode but this is not urgent.
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 9807c314c478..3bf41413ab15 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi, );
TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), + TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) - __field( __u8, tm ) + __field( __u16, tm ) __field( __u8, vec ) ),
From: Rikard Falkeborn rikard.falkeborn@gmail.com
[ Upstream commit f32c2877bcb068a718bb70094cd59ccc29d4d082 ]
There was a missing comparison with 0 when checking if type is "s64" or "u64". Therefore, the body of the if-statement was entered if "type" was "u64" or not "s64", which made the first strcmp() redundant since if type is "u64", it's not "s64".
If type is "s64", the body of the if-statement is not entered but since the remainder of the function consists of if-statements which will not be entered if type is "s64", we will just return "val", which is correct, albeit at the cost of a few more calls to strcmp(), i.e., it will behave just as if the if-statement was entered.
If type is neither "s64" or "u64", the body of the if-statement will be entered incorrectly and "val" returned. This means that any type that is checked after "s64" and "u64" is handled the same way as "s64" and "u64", i.e., the limiting of "val" to fit in for example "s8" is never reached.
This was introduced in the kernel tree when the sources were copied from trace-cmd in commit f7d82350e597 ("tools/events: Add files to create libtraceevent.a"), and in the trace-cmd repo in 1cdbae6035cei ("Implement typecasting in parser") when the function was introduced, i.e., it has always behaved the wrong way.
Detected by cppcheck.
Signed-off-by: Rikard Falkeborn rikard.falkeborn@gmail.com Reviewed-by: Steven Rostedt (VMware) rostedt@goodmis.org Cc: Tzvetomir Stoyanov tstoyanov@vmware.com Fixes: f7d82350e597 ("tools/events: Add files to create libtraceevent.a") Link: http://lkml.kernel.org/r/20190409091529.2686-1-rikard.falkeborn@gmail.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 3955ba9e6fcb..7989dd6289e7 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2206,7 +2206,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff;
if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val;
if (strcmp(type, "s8") == 0)
From: Johannes Weiner hannes@cmpxchg.org
[ Upstream commit 3b991208b897f52507168374033771a984b947b1 ]
During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's thrashing on the node that is about to be reclaimed. But when cgroups are enabled, we suddenly ignore the node scope and use the cgroup scope only. The result is that pressure bleeds between NUMA nodes depending on whether cgroups are merely compiled into Linux. This behavioral difference is unexpected and undesirable.
When the refault adaptivity of the inactive list was first introduced, there were no statistics at the lruvec level - the intersection of node and memcg - so it was better than nothing.
But now that we have that infrastructure, use lruvec_page_state() to make the list balancing decision always NUMA aware.
[hannes@cmpxchg.org: fix bisection hole] Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org Fixes: 2a2e48854d70 ("mm: vmscan: fix IO/refault regression in cache workingset transition") Signed-off-by: Johannes Weiner hannes@cmpxchg.org Reviewed-by: Shakeel Butt shakeelb@google.com Cc: Roman Gushchin guro@fb.com Cc: Michal Hocko mhocko@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/vmscan.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c index 9734e62654fa..144961f6f89c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2111,7 +2111,6 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct mem_cgroup *memcg, struct scan_control *sc, bool actual_reclaim) { enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; @@ -2132,16 +2131,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - /* * When refaults are being observed, it means a new workingset * is being established. Disable active list protection to get * rid of the stale workingset quickly. */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); if (file && actual_reclaim && lruvec->refaults != refaults) { inactive_ratio = 0; } else { @@ -2162,12 +2157,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, }
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct mem_cgroup *memcg, - struct scan_control *sc) + struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), - memcg, sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2267,7 +2260,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anonymous pages on the LRU in eligible zones. * Otherwise, the small LRU gets thrashed. */ - if (!inactive_list_is_low(lruvec, false, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, false, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_ANON; @@ -2285,7 +2278,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, true, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2438,7 +2431,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, memcg, sc); + lruvec, sc); } }
@@ -2505,7 +2498,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2830,12 +2823,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) unsigned long refaults; struct lruvec *lruvec;
- if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - lruvec = mem_cgroup_lruvec(pgdat, memcg); + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); lruvec->refaults = refaults; } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } @@ -3183,7 +3172,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON);
From: Dan Williams dan.j.williams@intel.com
[ Upstream commit 6041186a32585fc7a1d0f6cfe2f138b05fdc3c82 ]
When a module option, or core kernel argument, toggles a static-key it requires jump labels to be initialized early. While x86, PowerPC, and ARM64 arrange for jump_label_init() to be called before parse_args(), ARM does not.
Kernel command line: rdinit=/sbin/init page_alloc.shuffle=1 panic=-1 console=ttyAMA0,115200 page_alloc.shuffle=1 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at ./include/linux/jump_label.h:303 page_alloc_shuffle+0x12c/0x1ac static_key_enable(): static key 'page_alloc_shuffle_key+0x0/0x4' used before call to jump_label_init() Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc4-next-20190410-00003-g3367c36ce744 #1 Hardware name: ARM Integrator/CP (Device Tree) [<c0011c68>] (unwind_backtrace) from [<c000ec48>] (show_stack+0x10/0x18) [<c000ec48>] (show_stack) from [<c07e9710>] (dump_stack+0x18/0x24) [<c07e9710>] (dump_stack) from [<c001bb1c>] (__warn+0xe0/0x108) [<c001bb1c>] (__warn) from [<c001bb88>] (warn_slowpath_fmt+0x44/0x6c) [<c001bb88>] (warn_slowpath_fmt) from [<c0b0c4a8>] (page_alloc_shuffle+0x12c/0x1ac) [<c0b0c4a8>] (page_alloc_shuffle) from [<c0b0c550>] (shuffle_store+0x28/0x48) [<c0b0c550>] (shuffle_store) from [<c003e6a0>] (parse_args+0x1f4/0x350) [<c003e6a0>] (parse_args) from [<c0ac3c00>] (start_kernel+0x1c0/0x488)
Move the fallback call to jump_label_init() to occur before parse_args().
The redundant calls to jump_label_init() in other archs are left intact in case they have static key toggling use cases that are even earlier than option parsing.
Link: http://lkml.kernel.org/r/155544804466.1032396.13418949511615676665.stgit@dwi... Signed-off-by: Dan Williams dan.j.williams@intel.com Reported-by: Guenter Roeck groeck@google.com Reviewed-by: Kees Cook keescook@chromium.org Cc: Mathieu Desnoyers mathieu.desnoyers@efficios.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Mike Rapoport rppt@linux.ibm.com Cc: Russell King rmk@armlinux.org.uk Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- init/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/init/main.c b/init/main.c index 3d3d79c5a232..51067e2db509 100644 --- a/init/main.c +++ b/init/main.c @@ -550,6 +550,8 @@ asmlinkage __visible void __init start_kernel(void) page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line); + /* parameters may set static keys */ + jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, @@ -559,8 +561,6 @@ asmlinkage __visible void __init start_kernel(void) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg);
- jump_label_init(); - /* * These use large bootmem allocations and must precede * kmem_cache_init()
From: Florian Westphal fw@strlen.de
[ Upstream commit becf2319f320cae43e20cf179cc51a355a0deb5f ]
When an icmp error such as pkttoobig is received, conntrack checks if the "inner" header (header of packet that did not fit link mtu) is matches an existing connection, and, if so, sets that packet as being related to the conntrack entry it found.
It was recently reported that this "related" setting also works if the inner header is from another, different connection (i.e., artificial/forged icmp error).
Add a test, followup patch will add additional "inner dst matches outer dst in reverse direction" check before setting related state.
Link: https://www.synacktiv.com/posts/systems/icmp-reachable.html Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/Makefile | 2 +- .../netfilter/conntrack_icmp_related.sh | 283 ++++++++++++++++++ 2 files changed, 284 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/conntrack_icmp_related.sh
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index c9ff2b47bd1c..a37cb1192c6a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..b48e1833bc89 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + for i in 1 2;do ip netns del nsclient$i;done + for i in 1 2;do ip netns del nsrouter$i;done +} + +ipv4() { + echo -n 192.168.$1.2 +} + +ipv6 () { + echo -n dead:$1::2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + check_counter $n "unknown" "$expect" + if [ $? -ne 0 ] ;then + return 1 + fi + done + + return 0 +} + +for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + ip netns add $n + ip -net $n link set lo up +done + +DEV=veth0 +ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1 +DEV=veth0 +ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2 + +DEV=veth0 +ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2 + +DEV=veth0 +for i in 1 2; do + ip -net nsclient$i link set $DEV up + ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV + ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV +done + +ip -net nsrouter1 link set eth1 up +ip -net nsrouter1 link set veth0 up + +ip -net nsrouter2 link set eth1 up +ip -net nsrouter2 link set eth2 up + +ip -net nsclient1 route add default via 192.168.1.1 +ip -net nsclient1 -6 route add default via dead:1::1 + +ip -net nsclient2 route add default via 192.168.2.1 +ip -net nsclient2 route add default via dead:2::1 + +i=3 +ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1 +ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0 +ip -net nsrouter1 addr add dead:1::1/64 dev eth1 +ip -net nsrouter1 addr add dead:3::1/64 dev veth0 +ip -net nsrouter1 route add default via 192.168.3.10 +ip -net nsrouter1 -6 route add default via dead:3::10 + +ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1 +ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2 +ip -net nsrouter2 addr add dead:2::1/64 dev eth1 +ip -net nsrouter2 addr add dead:3::10/64 dev eth2 +ip -net nsrouter2 route add default via 192.168.3.1 +ip -net nsrouter2 route add default via dead:3::1 + +sleep 2 +for i in 4 6; do + ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in nsrouter1 nsrouter2; do +ip netns exec $netns nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain forward { + type filter hook forward priority 0; policy accept; + meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept + meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept + meta l4proto { icmp, icmpv6 } ct state new,established accept + counter name "unknown" drop + } +} +EOF +done + +ip netns exec nsclient1 nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + counter name "unknown" drop + } +} +EOF + +ip netns exec nsclient2 nft -f - <<EOF +table inet filter { + counter unknown { } + counter new { } + counter established { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept + counter name "unknown" drop + } + chain output { + type filter hook output priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" + counter name "unknown" drop + } +} +EOF + + +# make sure NAT core rewrites adress of icmp error if nat is used according to +# conntrack nat information (icmp error will be directed at nsrouter1 address, +# but it needs to be routed to nsclient1 address). +ip netns exec nsrouter1 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip protocol icmp oifname "veth0" counter masquerade + } +} +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip6 nexthdr icmpv6 oifname "veth0" counter masquerade + } +} +EOF + +ip netns exec nsrouter2 ip link set eth1 mtu 1280 +ip netns exec nsclient2 ip link set veth0 mtu 1280 +sleep 1 + +ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi +ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi + +check_unknown +if [ $? -ne 0 ]; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in nsrouter1 nsrouter2 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +check_counter nsclient2 "new" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +check_counter "nsrouter2" "related" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +cleanup +exit $ret
From: Julian Anastasov ja@ssi.bg
[ Upstream commit 0261ea1bd1eb0da5c0792a9119b8655cf33c80a3 ]
We can receive ICMP errors from client or from tunneling real server. While the former can be scheduled to real server, the latter should not be scheduled, they are decapsulated only when existing connection is found.
Fixes: 6044eeffafbe ("ipvs: attempt to schedule icmp packets") Signed-off-by: Julian Anastasov ja@ssi.bg Signed-off-by: Simon Horman horms@verge.net.au Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4278f5c947ab..d1c0378144f3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1635,7 +1635,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v;
- if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
From: Florian Westphal fw@strlen.de
[ Upstream commit 3c79107631db1f7fd32cf3f7368e4672004a3010 ]
else, we leak the addresses to userspace via ctnetlink events and dumps.
Compute an ID on demand based on the immutable parts of nf_conn struct.
Another advantage compared to using an address is that there is no immediate re-use of the same ID in case the conntrack entry is freed and reallocated again immediately.
Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID") Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID") Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 35 ++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++---- 3 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 792c3f6d30ce..93bbae8f9641 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -315,6 +315,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl);
+u32 nf_ct_get_id(const struct nf_conn *ct); + static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 06520bf30f29..fa49a627b681 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> @@ -300,6 +301,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 48dab1403b2c..c781c9a1a697 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/siphash.h>
#include <linux/netfilter.h> #include <net/netlink.h> @@ -445,7 +446,9 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0;
@@ -1179,8 +1182,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h);
if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2521,6 +2525,25 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
static const union nf_inet_addr any_addr;
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2568,7 +2591,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -2873,7 +2896,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; }
From: Petr Štetiar ynezz@true.cz
[ Upstream commit a1e8783db8e0d58891681bc1e6d9ada66eae8e20 ]
Currently it's not possible to use perf on ath79 due to genirq flags mismatch happening on static virtual IRQ 13 which is used for performance counters hardware IRQ 5.
On TP-Link Archer C7v5:
CPU0 2: 0 MIPS 2 ath9k 4: 318 MIPS 4 19000000.eth 7: 55034 MIPS 7 timer 8: 1236 MISC 3 ttyS0 12: 0 INTC 1 ehci_hcd:usb1 13: 0 gpio-ath79 2 keys 14: 0 gpio-ath79 5 keys 15: 31 AR724X PCI 1 ath10k_pci
$ perf top genirq: Flags mismatch irq 13. 00014c83 (mips_perf_pmu) vs. 00002003 (keys)
On TP-Link Archer C7v4:
CPU0 4: 0 MIPS 4 19000000.eth 5: 7135 MIPS 5 1a000000.eth 7: 98379 MIPS 7 timer 8: 30 MISC 3 ttyS0 12: 90028 INTC 0 ath9k 13: 5520 INTC 1 ehci_hcd:usb1 14: 4623 INTC 2 ehci_hcd:usb2 15: 32844 AR724X PCI 1 ath10k_pci 16: 0 gpio-ath79 16 keys 23: 0 gpio-ath79 23 keys
$ perf top genirq: Flags mismatch irq 13. 00014c80 (mips_perf_pmu) vs. 00000080 (ehci_hcd:usb1)
This problem is happening, because currently statically assigned virtual IRQ 13 for performance counters is not claimed during the initialization of MIPS PMU during the bootup, so the IRQ subsystem doesn't know, that this interrupt isn't available for further use.
So this patch fixes the issue by simply booking hardware IRQ 5 for MIPS PMU.
Tested-by: Kevin 'ldir' Darbyshire-Bryant ldir@darbyshire-bryant.me.uk Signed-off-by: Petr Štetiar ynezz@true.cz Acked-by: John Crispin john@phrozen.org Acked-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Paul Burton paul.burton@mips.com Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle ralf@linux-mips.org Cc: James Hogan jhogan@kernel.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Jason Cooper jason@lakedaemon.net Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/ath79/setup.c | 6 ------ drivers/irqchip/irq-ath79-misc.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 26a058d58d37..c7c31e214813 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -183,12 +183,6 @@ const char *get_system_type(void) return ath79_sys_type; }
-int get_c0_perfcount_int(void) -{ - return ATH79_MISC_IRQ(5); -} -EXPORT_SYMBOL_GPL(get_c0_perfcount_int); - unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index aa7290784636..0390603170b4 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -22,6 +22,15 @@ #define AR71XX_RESET_REG_MISC_INT_ENABLE 4
#define ATH79_MISC_IRQ_COUNT 32 +#define ATH79_MISC_PERF_IRQ 5 + +static int ath79_perfcount_irq; + +int get_c0_perfcount_int(void) +{ + return ath79_perfcount_irq; +} +EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
static void ath79_misc_irq_handler(struct irq_desc *desc) { @@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init( { void __iomem *base = domain->host_data;
+ ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ); + /* Disable and clear all interrupts */ __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 27b141fc234a3670d21bd742c35d7205d03cbb3a ]
clang points out that the return code from this function is undefined for one of the error paths:
../drivers/s390/net/ctcm_main.c:1595:7: warning: variable 'result' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1638:9: note: uninitialized use occurs here return result; ^~~~~~ ../drivers/s390/net/ctcm_main.c:1595:3: note: remove the 'if' if its condition is always false if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1539:12: note: initialize the variable 'result' to silence this warning int result; ^
Make it return -ENODEV here, as in the related failure cases. gcc has a known bug in underreporting some of these warnings when it has already eliminated the assignment of the return code based on some earlier optimization step.
Reviewed-by: Nathan Chancellor natechancellor@gmail.com Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Julian Wiedmann jwi@linux.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/net/ctcm_main.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 26363e0816fe..fbe35c2ac898 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1594,6 +1594,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev) if (priv->channel[direction] == NULL) { if (direction == CTCM_WRITE) channel_free(priv->channel[CTCM_READ]); + result = -ENODEV; goto out_dev; } priv->channel[direction]->netdev = dev;
From: Paul Kocialkowski paul.kocialkowski@bootlin.com
[ Upstream commit 02b92adbe33e6dbd15dc6e32540b22f47c4ff0a2 ]
Our sun4i_drv_unbind gets the drm device using dev_get_drvdata. However, that driver data is never set in sun4i_drv_bind.
Set it there to avoid getting a NULL pointer at unbind time.
Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Signed-off-by: Maxime Ripard maxime.ripard@bootlin.com Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-3-paul.koc... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 8d3c8070ed86..e09161cf312f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -96,6 +96,8 @@ static int sun4i_drv_bind(struct device *dev) ret = -ENOMEM; goto free_drm; } + + dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->engine_list); INIT_LIST_HEAD(&drv->tcon_list);
From: Po-Hsu Lin po-hsu.lin@canonical.com
[ Upstream commit 30c04d796b693e22405c38e9b78e9a364e4c77e6 ]
The run_netsocktests will be marked as passed regardless the actual test result from the ./socket:
selftests: net: run_netsocktests ======================================== -------------------- running socket test -------------------- [FAIL] ok 1..6 selftests: net: run_netsocktests [PASS]
This is because the test script itself has been successfully executed. Fix this by exit 1 when the test failed.
Signed-off-by: Po-Hsu Lin po-hsu.lin@canonical.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/net/run_netsocktests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index b093f39c298c..14e41faf2c57 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -7,7 +7,7 @@ echo "--------------------" ./socket if [ $? -ne 0 ]; then echo "[FAIL]" + exit 1 else echo "[PASS]" fi -
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit d4fad0a426c6e26f48c9a7cdd21a7fe9c198d645 ]
Initialize the flow input colorspaces to unknown and reset to that value when the channel gets disabled. This avoids the state getting mixed up with a previous mode.
Also keep the CSC settings for the background flow intact when disabling the foreground flow.
Root-caused-by: Jonathan Marek jonathan@marek.ca Signed-off-by: Lucas Stach l.stach@pengutronix.de Signed-off-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/ipu-v3/ipu-dp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c index 9b2b3fa479c4..5e44ff1f2085 100644 --- a/drivers/gpu/ipu-v3/ipu-dp.c +++ b/drivers/gpu/ipu-v3/ipu-dp.c @@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp, ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs, DP_COM_CONF_CSC_DEF_BOTH); } else { - if (flow->foreground.in_cs == flow->out_cs) + if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN || + flow->foreground.in_cs == flow->out_cs) /* * foreground identical to output, apply color * conversion on background @@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) struct ipu_dp_priv *priv = flow->priv; u32 reg, csc;
+ dp->in_cs = IPUV3_COLORSPACE_UNKNOWN; + if (!dp->foreground) return;
@@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
reg = readl(flow->base + DP_COM_CONF); csc = reg & DP_COM_CONF_CSC_DEF_MASK; - if (csc == DP_COM_CONF_CSC_DEF_FG) - reg &= ~DP_COM_CONF_CSC_DEF_MASK; + reg &= ~DP_COM_CONF_CSC_DEF_MASK; + if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG) + reg |= DP_COM_CONF_CSC_DEF_BG;
reg &= ~DP_COM_CONF_FG_EN; writel(reg, flow->base + DP_COM_CONF); @@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) mutex_init(&priv->mutex);
for (i = 0; i < IPUV3_NUM_FLOWS; i++) { + priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN; + priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN; priv->flow[i].foreground.foreground = true; priv->flow[i].base = priv->base + ipu_dp_flow_base[i]; priv->flow[i].priv = priv;
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit 7bcde275eb1d0ac8793c77c7e666a886eb16633d ]
In order to make sure that the plane color space gets reset correctly.
Signed-off-by: Lucas Stach l.stach@pengutronix.de Signed-off-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/imx/ipuv3-crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index d976391dfa31..957fbf8c55eb 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -79,7 +79,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, if (disable_partial) ipu_plane_disable(ipu_crtc->plane[1], true); if (disable_full) - ipu_plane_disable(ipu_crtc->plane[0], false); + ipu_plane_disable(ipu_crtc->plane[0], true); }
static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
From: Daniel Gomez dagmcr@gmail.com
[ Upstream commit 2f23a2a768bee7ad2ff1e9527c3f7e279e794a46 ]
Add missing <of_device_id> table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS.
Before this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995
After this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995 alias: of:N*T*Cmicrel,ksz8795C* alias: of:N*T*Cmicrel,ksz8795 alias: of:N*T*Cmicrel,ksz8864C* alias: of:N*T*Cmicrel,ksz8864 alias: of:N*T*Cmicrel,ks8995C* alias: of:N*T*Cmicrel,ks8995
Reported-by: Javier Martinez Canillas javier@dowhile0.org Signed-off-by: Daniel Gomez dagmcr@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/phy/spi_ks8995.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 1e2d4f1179da..45df03673e01 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -162,6 +162,14 @@ static const struct spi_device_id ks8995_id[] = { }; MODULE_DEVICE_TABLE(spi, ks8995_id);
+static const struct of_device_id ks8895_spi_of_match[] = { + { .compatible = "micrel,ks8995" }, + { .compatible = "micrel,ksz8864" }, + { .compatible = "micrel,ksz8795" }, + { }, + }; +MODULE_DEVICE_TABLE(of, ks8895_spi_of_match); + static inline u8 get_chip_id(u8 val) { return (val >> ID1_CHIPID_S) & ID1_CHIPID_M; @@ -529,6 +537,7 @@ static int ks8995_remove(struct spi_device *spi) static struct spi_driver ks8995_driver = { .driver = { .name = "spi-ks8995", + .of_match_table = of_match_ptr(ks8895_spi_of_match), }, .probe = ks8995_probe, .remove = ks8995_remove,
From: Daniel Gomez dagmcr@gmail.com
[ Upstream commit d04830531d0c4a99c897a44038e5da3d23331d2f ]
Add missing <of_device_id> table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS.
Before this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf
After this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf alias: of:N*T*Cst,st95hfC* alias: of:N*T*Cst,st95hf
Reported-by: Javier Martinez Canillas javier@dowhile0.org Signed-off-by: Daniel Gomez dagmcr@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nfc/st95hf/core.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2b26f762fbc3..01acb6e53365 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = { }; MODULE_DEVICE_TABLE(spi, st95hf_id);
+static const struct of_device_id st95hf_spi_of_match[] = { + { .compatible = "st,st95hf" }, + { }, +}; +MODULE_DEVICE_TABLE(of, st95hf_spi_of_match); + static int st95hf_probe(struct spi_device *nfc_spi_dev) { int ret; @@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = { .driver = { .name = "st95hf", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(st95hf_spi_of_match), }, .id_table = st95hf_id, .probe = st95hf_probe,
From: Pan Bian bianpan2016@163.com
[ Upstream commit bce1a78423961fce676ac65540a31b6ffd179e6d ]
The RMI4 function structure has been released in rmi_register_function if error occurs. However, it will be released again in the function rmi_create_function, which may result in a double-free bug.
Signed-off-by: Pan Bian bianpan2016@163.com Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/input/rmi4/rmi_driver.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index f5954981e9ee..997ccae7ee05 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -883,7 +883,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
error = rmi_register_function(fn); if (error) - goto err_put_fn; + return error;
if (pdt->function_number == 0x01) data->f01_container = fn; @@ -893,10 +893,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev, list_add_tail(&fn->node, &data->function_list);
return RMI_SCAN_CONTINUE; - -err_put_fn: - put_device(&fn->dev); - return error; }
void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
From: "David S. Miller" davem@davemloft.net
[ Upstream commit 2b4792eaa9f553764047d157365ed8b7787751a3 ]
Some drivers reference it via node_distance(), for example the NVME host driver core.
ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined! make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1
Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/sparc/mm/init_64.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 984e9d65ea0d..76977296dc9c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1383,6 +1383,7 @@ int __node_distance(int from, int to) } return numa_latency[from][to]; } +EXPORT_SYMBOL(__node_distance);
static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) {
From: David Miller davem@redhat.com
[ Upstream commit 5b4fc3882a649c9411dd0dcad2ddb78e911d340e ]
Right now if we get a corrupted user stack frame we do a do_exit(SIGILL) which is not helpful.
If under a debugger, this behavior causes the inferior process to exit. So the register and other state cannot be examined at the time of the event.
Instead, conditionally log a rate limited kernel log message and then force a SIGSEGV.
With bits and ideas borrowed (as usual) from powerpc.
Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/sparc/include/asm/switch_to_64.h | 3 ++- arch/sparc/kernel/process_64.c | 25 +++++++++++++++++++------ arch/sparc/kernel/rtrap_64.S | 1 + arch/sparc/kernel/signal32.c | 12 ++++++++++-- arch/sparc/kernel/signal_64.c | 6 +++++- 5 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h index 4ff29b1406a9..b1d4e2e3210f 100644 --- a/arch/sparc/include/asm/switch_to_64.h +++ b/arch/sparc/include/asm/switch_to_64.h @@ -67,6 +67,7 @@ do { save_and_clear_fpu(); \ } while(0)
void synchronize_user_stack(void); -void fault_in_user_windows(void); +struct pt_regs; +void fault_in_user_windows(struct pt_regs *);
#endif /* __SPARC64_SWITCH_TO_64_H */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 318efd784a0b..5640131e2abf 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -36,6 +36,7 @@ #include <linux/sysrq.h> #include <linux/nmi.h> #include <linux/context_tracking.h> +#include <linux/signal.h>
#include <linux/uaccess.h> #include <asm/page.h> @@ -528,7 +529,12 @@ static void stack_unaligned(unsigned long sp) force_sig_info(SIGBUS, &info, current); }
-void fault_in_user_windows(void) +static const char uwfault32[] = KERN_INFO \ + "%s[%d]: bad register window fault: SP %08lx (orig_sp %08lx) TPC %08lx O7 %08lx\n"; +static const char uwfault64[] = KERN_INFO \ + "%s[%d]: bad register window fault: SP %016lx (orig_sp %016lx) TPC %08lx O7 %016lx\n"; + +void fault_in_user_windows(struct pt_regs *regs) { struct thread_info *t = current_thread_info(); unsigned long window; @@ -541,9 +547,9 @@ void fault_in_user_windows(void) do { struct reg_window *rwin = &t->reg_window[window]; int winsize = sizeof(struct reg_window); - unsigned long sp; + unsigned long sp, orig_sp;
- sp = t->rwbuf_stkptrs[window]; + orig_sp = sp = t->rwbuf_stkptrs[window];
if (test_thread_64bit_stack(sp)) sp += STACK_BIAS; @@ -554,8 +560,16 @@ void fault_in_user_windows(void) stack_unaligned(sp);
if (unlikely(copy_to_user((char __user *)sp, - rwin, winsize))) + rwin, winsize))) { + if (show_unhandled_signals) + printk_ratelimited(is_compat_task() ? + uwfault32 : uwfault64, + current->comm, current->pid, + sp, orig_sp, + regs->tpc, + regs->u_regs[UREG_I7]); goto barf; + } } while (window--); } set_thread_wsaved(0); @@ -563,8 +577,7 @@ void fault_in_user_windows(void)
barf: set_thread_wsaved(window + 1); - user_exit(); - do_exit(SIGILL); + force_sig(SIGSEGV, current); }
asmlinkage long sparc_do_fork(unsigned long clone_flags, diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 0b21042ab181..ad88d60bb740 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -30,6 +30,7 @@ __handle_preemption: wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
__handle_user_windows: + add %sp, PTREGS_OFF, %o0 call fault_in_user_windows wrpr %g0, RTRAP_PSTATE, %pstate ba,pt %xcc, __handle_preemption_continue diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 5c572de64c74..879f8d86bc21 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -442,7 +442,11 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + if (show_unhandled_signals) + pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n", + current->comm, current->pid, (unsigned long)sf, + regs->tpc, regs->u_regs[UREG_I7]); + force_sigsegv(ksig->sig, current); return -EINVAL; }
@@ -573,7 +577,11 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + if (show_unhandled_signals) + pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n", + current->comm, current->pid, (unsigned long)sf, + regs->tpc, regs->u_regs[UREG_I7]); + force_sigsegv(ksig->sig, current); return -EINVAL; }
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 20426a1c28f2..2d0a50bde3f9 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -373,7 +373,11 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) get_sigframe(ksig, regs, sf_size);
if (invalid_frame_pointer (sf)) { - do_exit(SIGILL); /* won't return, actually */ + if (show_unhandled_signals) + pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n", + current->comm, current->pid, (unsigned long)sf, + regs->tpc, regs->u_regs[UREG_I7]); + force_sigsegv(ksig->sig, current); return -EINVAL; }
From: Huacai Chen chenhc@lemote.com
[ Upstream commit c61c7def1fa0a722610d89790e0255b74f3c07dd ]
Commit ea7e0480a4b6 ("MIPS: VDSO: Always map near top of user memory") set VDSO_RANDOMIZE_SIZE to 256MB for 64bit kernel. But take a look at arch/mips/mm/mmap.c we can see that MIN_GAP is 128MB, which means the mmap_base may be at (user_address_top - 128MB). This make the stack be surrounded by mmaped areas, then stack expanding fails and causes a segmentation fault. Therefore, VDSO_RANDOMIZE_SIZE should be less than MIN_GAP and this patch reduce it to 64MB.
Signed-off-by: Huacai Chen chenhc@lemote.com Signed-off-by: Paul Burton paul.burton@mips.com Fixes: ea7e0480a4b6 ("MIPS: VDSO: Always map near top of user memory") Patchwork: https://patchwork.linux-mips.org/patch/20910/ Cc: Ralf Baechle ralf@linux-mips.org Cc: James Hogan jhogan@kernel.org Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang zhangfx@lemote.com Cc: Zhangjin Wu wuzhangjin@gmail.com Cc: Huacai Chen chenhuacai@gmail.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/mips/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 8bbbab611a3f..0b86a01de956 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -81,7 +81,7 @@ extern unsigned int vced_count, vcei_count;
#endif
-#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M)
extern unsigned long mips_stack_top(void); #define STACK_TOP mips_stack_top()
From: Tang Junhui tang.junhui.linux@gmail.com
[ Upstream commit 2e17a262a2371d38d2ec03614a2675a32cef9912 ]
When bcache device is clean, dirty keys may still exist after journal replay, so we need to count these dirty keys even device in clean status, otherwise after writeback, the amount of dirty data would be incorrect.
Signed-off-by: Tang Junhui tang.junhui.linux@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Coly Li colyli@suse.de Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fe6e4c319b7c..9e875aba41b9 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1045,12 +1045,13 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, }
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); }
+ bch_sectors_dirty_init(&dc->disk); + bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev");
From: Erik Schmauss erik.schmauss@intel.com
[ Upstream commit 4abb951b73ff0a8a979113ef185651aa3c8da19b ]
The table load process omitted adding the operation region address range to the global list. This omission is problematic because the OS queries the global list to check for address range conflicts before deciding which drivers to load. This commit may result in warning messages that look like the following:
[ 7.871761] ACPI Warning: system_IO range 0x00000428-0x0000042F conflicts with op_region 0x00000400-0x0000047F (\PMIO) (20180531/utaddress-213) [ 7.871769] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
However, these messages do not signify regressions. It is a result of properly adding address ranges within the global address list.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200011 Tested-by: Jean-Marc Lenoir archlinux@jihemel.com Signed-off-by: Erik Schmauss erik.schmauss@intel.com Cc: All applicable stable@vger.kernel.org Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/acpi/acpica/dsopcode.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c index 0336df7ac47d..e8070f6ca835 100644 --- a/drivers/acpi/acpica/dsopcode.c +++ b/drivers/acpi/acpica/dsopcode.c @@ -451,6 +451,10 @@ acpi_ds_eval_region_operands(struct acpi_walk_state *walk_state, ACPI_FORMAT_UINT64(obj_desc->region.address), obj_desc->region.length));
+ status = acpi_ut_add_address_range(obj_desc->region.space_id, + obj_desc->region.address, + obj_desc->region.length, node); + /* Now the address and length are valid for this opregion */
obj_desc->region.flags |= AOPOBJ_DATA_VALID;
From: Jason Gunthorpe jgg@mellanox.com
[ Upstream commit 9a59739bd01f77db6fbe2955a4fce165f0f43568 ]
This enum has become part of the uABI, as both RXE and the ib_uverbs_post_send() command expect userspace to supply values from this enum. So it should be properly placed in include/uapi/rdma.
In userspace this enum is called 'enum ibv_wr_opcode' as part of libibverbs.h. That enum defines different values for IB_WR_LOCAL_INV, IB_WR_SEND_WITH_INV, and IB_WR_LSO. These were introduced (incorrectly, it turns out) into libiberbs in 2015.
The kernel has changed its mind on the numbering for several of the IB_WC values over the years, but has remained stable on IB_WR_LOCAL_INV and below.
Based on this we can conclude that there is no real user space user of the values beyond IB_WR_ATOMIC_FETCH_AND_ADD, as they have never worked via rdma-core. This is confirmed by inspection, only rxe uses the kernel enum and implements the latter operations. rxe has clearly never worked with these attributes from userspace. Other drivers that support these opcodes implement the functionality without calling out to the kernel.
To make IB_WR_SEND_WITH_INV and related work for RXE in userspace we choose to renumber the IB_WR enum in the kernel to match the uABI that userspace has bee using since before Soft RoCE was merged. This is an overall simpler configuration for the whole software stack, and obviously can't break anything existing.
Reported-by: Seth Howell seth.howell@intel.com Tested-by: Seth Howell seth.howell@intel.com Fixes: 8700e3e7c485 ("Soft RoCE driver") Cc: stable@vger.kernel.org Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- include/rdma/ib_verbs.h | 34 ++++++++++++++++++------------- include/uapi/rdma/ib_user_verbs.h | 20 +++++++++++++++++- 2 files changed, 39 insertions(+), 15 deletions(-)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5a24b4c700e5..9e76b2410d03 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1251,21 +1251,27 @@ struct ib_qp_attr { };
enum ib_wr_opcode { - IB_WR_RDMA_WRITE, - IB_WR_RDMA_WRITE_WITH_IMM, - IB_WR_SEND, - IB_WR_SEND_WITH_IMM, - IB_WR_RDMA_READ, - IB_WR_ATOMIC_CMP_AND_SWP, - IB_WR_ATOMIC_FETCH_AND_ADD, - IB_WR_LSO, - IB_WR_SEND_WITH_INV, - IB_WR_RDMA_READ_WITH_INV, - IB_WR_LOCAL_INV, - IB_WR_REG_MR, - IB_WR_MASKED_ATOMIC_CMP_AND_SWP, - IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, + /* These are shared with userspace */ + IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE, + IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM, + IB_WR_SEND = IB_UVERBS_WR_SEND, + IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP, + IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD, + IB_WR_LSO = IB_UVERBS_WR_TSO, + IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV, + IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV, + IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV, + IB_WR_MASKED_ATOMIC_CMP_AND_SWP = + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, + IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, + + /* These are kernel only and can not be issued by userspace */ + IB_WR_REG_MR = 0x20, IB_WR_REG_SIG_MR, + /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index e0e83a105953..e11b4def8630 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -751,10 +751,28 @@ struct ib_uverbs_sge { __u32 lkey; };
+enum ib_uverbs_wr_opcode { + IB_UVERBS_WR_RDMA_WRITE = 0, + IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1, + IB_UVERBS_WR_SEND = 2, + IB_UVERBS_WR_SEND_WITH_IMM = 3, + IB_UVERBS_WR_RDMA_READ = 4, + IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5, + IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6, + IB_UVERBS_WR_LOCAL_INV = 7, + IB_UVERBS_WR_BIND_MW = 8, + IB_UVERBS_WR_SEND_WITH_INV = 9, + IB_UVERBS_WR_TSO = 10, + IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, + /* Review enum ib_wr_opcode before modifying this */ +}; + struct ib_uverbs_send_wr { __u64 wr_id; __u32 num_sge; - __u32 opcode; + __u32 opcode; /* see enum ib_uverbs_wr_opcode */ __u32 send_flags; union { __u32 imm_data;
From: Goldwyn Rodrigues rgoldwyn@suse.de
[ Upstream commit a408e4a86b36bf98ad15b9ada531cf0e5118ac67 ]
Open a new file instance as opposed to changing file->f_mode when the file is not readable. This is done to accomodate overlayfs stacked file operations change. The real struct file is hidden behind the overlays struct file. So, any file->f_mode manipulations are not reflected on the real struct file. Open the file again in read mode if original file cannot be read, read and calculate the hash.
Signed-off-by: Goldwyn Rodrigues rgoldwyn@suse.com Cc: stable@vger.kernel.org (linux-4.19) Signed-off-by: Mimi Zohar zohar@linux.ibm.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- security/integrity/ima/ima_crypto.c | 54 ++++++++++++++++++----------- 1 file changed, 34 insertions(+), 20 deletions(-)
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index cb041af9eddb..af680b5b678a 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -232,7 +232,7 @@ static int ima_calc_file_hash_atfm(struct file *file, { loff_t i_size, offset; char *rbuf[2] = { NULL, }; - int rc, read = 0, rbuf_len, active = 0, ahash_rc = 0; + int rc, rbuf_len, active = 0, ahash_rc = 0; struct ahash_request *req; struct scatterlist sg[1]; struct ahash_completion res; @@ -279,11 +279,6 @@ static int ima_calc_file_hash_atfm(struct file *file, &rbuf_size[1], 0); }
- if (!(file->f_mode & FMODE_READ)) { - file->f_mode |= FMODE_READ; - read = 1; - } - for (offset = 0; offset < i_size; offset += rbuf_len) { if (!rbuf[1] && offset) { /* Not using two buffers, and it is not the first @@ -322,8 +317,6 @@ static int ima_calc_file_hash_atfm(struct file *file, /* wait for the last update request to complete */ rc = ahash_wait(ahash_rc, &res); out3: - if (read) - file->f_mode &= ~FMODE_READ; ima_free_pages(rbuf[0], rbuf_size[0]); ima_free_pages(rbuf[1], rbuf_size[1]); out2: @@ -358,7 +351,7 @@ static int ima_calc_file_hash_tfm(struct file *file, { loff_t i_size, offset = 0; char *rbuf; - int rc, read = 0; + int rc; SHASH_DESC_ON_STACK(shash, tfm);
shash->tfm = tfm; @@ -379,11 +372,6 @@ static int ima_calc_file_hash_tfm(struct file *file, if (!rbuf) return -ENOMEM;
- if (!(file->f_mode & FMODE_READ)) { - file->f_mode |= FMODE_READ; - read = 1; - } - while (offset < i_size) { int rbuf_len;
@@ -400,8 +388,6 @@ static int ima_calc_file_hash_tfm(struct file *file, if (rc) break; } - if (read) - file->f_mode &= ~FMODE_READ; kfree(rbuf); out: if (!rc) @@ -442,6 +428,8 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) { loff_t i_size; int rc; + struct file *f = file; + bool new_file_instance = false, modified_flags = false;
/* * For consistency, fail file's opened with the O_DIRECT flag on @@ -453,15 +441,41 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) return -EINVAL; }
- i_size = i_size_read(file_inode(file)); + /* Open a new file instance in O_RDONLY if we cannot read */ + if (!(file->f_mode & FMODE_READ)) { + int flags = file->f_flags & ~(O_WRONLY | O_APPEND | + O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL); + flags |= O_RDONLY; + f = dentry_open(&file->f_path, flags, file->f_cred); + if (IS_ERR(f)) { + /* + * Cannot open the file again, lets modify f_flags + * of original and continue + */ + pr_info_ratelimited("Unable to reopen file for reading.\n"); + f = file; + f->f_flags |= FMODE_READ; + modified_flags = true; + } else { + new_file_instance = true; + } + } + + i_size = i_size_read(file_inode(f));
if (ima_ahash_minsize && i_size >= ima_ahash_minsize) { - rc = ima_calc_file_ahash(file, hash); + rc = ima_calc_file_ahash(f, hash); if (!rc) - return 0; + goto out; }
- return ima_calc_file_shash(file, hash); + rc = ima_calc_file_shash(f, hash); +out: + if (new_file_instance) + fput(f); + else if (modified_flags) + f->f_flags &= ~FMODE_READ; + return rc; }
/*
From: Punit Agrawal punit.agrawal@arm.com
[ Upstream commit fd2ef358282c849c193aa36dadbf4f07f7dcd29b ]
PageTransCompoundMap() returns true for hugetlbfs and THP hugepages. This behaviour incorrectly leads to stage 2 faults for unsupported hugepage sizes (e.g., 64K hugepage with 4K pages) to be treated as THP faults.
Tighten the check to filter out hugetlbfs pages. This also leads to consistently mapping all unsupported hugepage sizes as PTE level entries at stage 2.
Signed-off-by: Punit Agrawal punit.agrawal@arm.com Reviewed-by: Suzuki Poulose suzuki.poulose@arm.com Cc: Christoffer Dall christoffer.dall@arm.com Cc: Marc Zyngier marc.zyngier@arm.com Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- virt/kvm/arm/mmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 225dc671ae31..1f4cac53b923 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1068,8 +1068,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; + struct page *page = pfn_to_page(pfn);
- if (PageTransCompoundMap(pfn_to_page(pfn))) { + /* + * PageTransCompoungMap() returns true for THP and + * hugetlbfs. Make sure the adjustment is done only for THP + * pages. + */ + if (!PageHuge(page) && PageTransCompoundMap(page)) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge
From: Hans Verkuil hans.verkuil@cisco.com
[ Upstream commit b915bf575d5b7774d0f22d57d6c143e07dcaade2 ]
This function is needed by both V4L2 and CEC, so move this to cec.h as a static inline since there are no obvious shared modules between the two subsystems.
This patch, together with the following ones, fixes a dependency bug: if CEC_CORE is disabled, then building adv7604 (and other HDMI receivers) will fail because an essential function is now stubbed out.
Signed-off-by: Hans Verkuil hans.verkuil@cisco.com Cc: stable@vger.kernel.org # for v4.17 and up Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/media/cec/cec-edid.c | 60 ------------------------------- include/media/cec.h | 70 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 60 deletions(-)
diff --git a/drivers/media/cec/cec-edid.c b/drivers/media/cec/cec-edid.c index 38e3fec6152b..19a31d4c8603 100644 --- a/drivers/media/cec/cec-edid.c +++ b/drivers/media/cec/cec-edid.c @@ -22,66 +22,6 @@ #include <linux/types.h> #include <media/cec.h>
-/* - * This EDID is expected to be a CEA-861 compliant, which means that there are - * at least two blocks and one or more of the extensions blocks are CEA-861 - * blocks. - * - * The returned location is guaranteed to be < size - 1. - */ -static unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size) -{ - unsigned int blocks = size / 128; - unsigned int block; - u8 d; - - /* Sanity check: at least 2 blocks and a multiple of the block size */ - if (blocks < 2 || size % 128) - return 0; - - /* - * If there are fewer extension blocks than the size, then update - * 'blocks'. It is allowed to have more extension blocks than the size, - * since some hardware can only read e.g. 256 bytes of the EDID, even - * though more blocks are present. The first CEA-861 extension block - * should normally be in block 1 anyway. - */ - if (edid[0x7e] + 1 < blocks) - blocks = edid[0x7e] + 1; - - for (block = 1; block < blocks; block++) { - unsigned int offset = block * 128; - - /* Skip any non-CEA-861 extension blocks */ - if (edid[offset] != 0x02 || edid[offset + 1] != 0x03) - continue; - - /* search Vendor Specific Data Block (tag 3) */ - d = edid[offset + 2] & 0x7f; - /* Check if there are Data Blocks */ - if (d <= 4) - continue; - if (d > 4) { - unsigned int i = offset + 4; - unsigned int end = offset + d; - - /* Note: 'end' is always < 'size' */ - do { - u8 tag = edid[i] >> 5; - u8 len = edid[i] & 0x1f; - - if (tag == 3 && len >= 5 && i + len <= end && - edid[i + 1] == 0x03 && - edid[i + 2] == 0x0c && - edid[i + 3] == 0x00) - return i + 4; - i += len + 1; - } while (i < end); - } - } - return 0; -} - u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, unsigned int *offset) { diff --git a/include/media/cec.h b/include/media/cec.h index df6b3bd31284..b7339cc6fd3d 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -435,4 +435,74 @@ static inline void cec_phys_addr_invalidate(struct cec_adapter *adap) cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false); }
+/** + * cec_get_edid_spa_location() - find location of the Source Physical Address + * + * @edid: the EDID + * @size: the size of the EDID + * + * This EDID is expected to be a CEA-861 compliant, which means that there are + * at least two blocks and one or more of the extensions blocks are CEA-861 + * blocks. + * + * The returned location is guaranteed to be <= size-2. + * + * This is an inline function since it is used by both CEC and V4L2. + * Ideally this would go in a module shared by both, but it is overkill to do + * that for just a single function. + */ +static inline unsigned int cec_get_edid_spa_location(const u8 *edid, + unsigned int size) +{ + unsigned int blocks = size / 128; + unsigned int block; + u8 d; + + /* Sanity check: at least 2 blocks and a multiple of the block size */ + if (blocks < 2 || size % 128) + return 0; + + /* + * If there are fewer extension blocks than the size, then update + * 'blocks'. It is allowed to have more extension blocks than the size, + * since some hardware can only read e.g. 256 bytes of the EDID, even + * though more blocks are present. The first CEA-861 extension block + * should normally be in block 1 anyway. + */ + if (edid[0x7e] + 1 < blocks) + blocks = edid[0x7e] + 1; + + for (block = 1; block < blocks; block++) { + unsigned int offset = block * 128; + + /* Skip any non-CEA-861 extension blocks */ + if (edid[offset] != 0x02 || edid[offset + 1] != 0x03) + continue; + + /* search Vendor Specific Data Block (tag 3) */ + d = edid[offset + 2] & 0x7f; + /* Check if there are Data Blocks */ + if (d <= 4) + continue; + if (d > 4) { + unsigned int i = offset + 4; + unsigned int end = offset + d; + + /* Note: 'end' is always < 'size' */ + do { + u8 tag = edid[i] >> 5; + u8 len = edid[i] & 0x1f; + + if (tag == 3 && len >= 5 && i + len <= end && + edid[i + 1] == 0x03 && + edid[i + 2] == 0x0c && + edid[i + 3] == 0x00) + return i + 4; + i += len + 1; + } while (i < end); + } + } + return 0; +} + #endif /* _MEDIA_CEC_H */
From: Hans Verkuil hans.verkuil@cisco.com
[ Upstream commit e81bff39489a06384822bb38ce7a59f9e365bbe9 ]
The cec_phys_addr_validate() function will be moved to V4L2, so use a simplified variant of that function in cec-api.c. cec now no longer calls cec_phys_addr_validate() and it can be safely moved to V4L2.
Signed-off-by: Hans Verkuil hans.verkuil@cisco.com Cc: stable@vger.kernel.org # for v4.17 and up Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/media/cec/cec-api.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/drivers/media/cec/cec-api.c b/drivers/media/cec/cec-api.c index a079f7fe018c..21a5f45e0259 100644 --- a/drivers/media/cec/cec-api.c +++ b/drivers/media/cec/cec-api.c @@ -113,6 +113,23 @@ static long cec_adap_g_phys_addr(struct cec_adapter *adap, return 0; }
+static int cec_validate_phys_addr(u16 phys_addr) +{ + int i; + + if (phys_addr == CEC_PHYS_ADDR_INVALID) + return 0; + for (i = 0; i < 16; i += 4) + if (phys_addr & (0xf << i)) + break; + if (i == 16) + return 0; + for (i += 4; i < 16; i += 4) + if ((phys_addr & (0xf << i)) == 0) + return -EINVAL; + return 0; +} + static long cec_adap_s_phys_addr(struct cec_adapter *adap, struct cec_fh *fh, bool block, __u16 __user *parg) { @@ -124,7 +141,7 @@ static long cec_adap_s_phys_addr(struct cec_adapter *adap, struct cec_fh *fh, if (copy_from_user(&phys_addr, parg, sizeof(phys_addr))) return -EFAULT;
- err = cec_phys_addr_validate(phys_addr, NULL, NULL); + err = cec_validate_phys_addr(phys_addr); if (err) return err; mutex_lock(&adap->lock);
From: Hans Verkuil hans.verkuil@cisco.com
[ Upstream commit e7da89926f6dc6cf855f5ffdf79ef99a1b115ca7 ]
When there is no EDID the CEC adapter should be unconfigured as well. So call cec_phys_addr_invalidate() when this happens.
Signed-off-by: Hans Verkuil hans.verkuil@cisco.com Cc: stable@vger.kernel.org # for v4.18 and up Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/media/i2c/adv7604.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index d2108aad3c65..26c3ec573a56 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2295,8 +2295,10 @@ static int adv76xx_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid) state->aspect_ratio.numerator = 16; state->aspect_ratio.denominator = 9;
- if (!state->edid.present) + if (!state->edid.present) { state->edid.blocks = 0; + cec_phys_addr_invalidate(state->cec_adap); + }
v4l2_dbg(2, debug, sd, "%s: clear EDID pad %d, edid.present = 0x%x\n", __func__, edid->pad, state->edid.present);
From: Hans Verkuil hans.verkuil@cisco.com
[ Upstream commit ab83203e181015b099720aff43ffabc1812e0fb3 ]
When there is no EDID the CEC adapter should be unconfigured as well. So call cec_phys_addr_invalidate() when this happens.
Signed-off-by: Hans Verkuil hans.verkuil@cisco.com Cc: stable@vger.kernel.org # for v4.18 and up Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/media/i2c/adv7842.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index f9c23173c9fa..dcce8d030e5d 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -799,8 +799,10 @@ static int edid_write_hdmi_segment(struct v4l2_subdev *sd, u8 port) /* Disable I2C access to internal EDID ram from HDMI DDC ports */ rep_write_and_or(sd, 0x77, 0xf3, 0x00);
- if (!state->hdmi_edid.present) + if (!state->hdmi_edid.present) { + cec_phys_addr_invalidate(state->cec_adap); return 0; + }
pa = cec_get_edid_phys_addr(edid, 256, &spa_loc); err = cec_phys_addr_validate(pa, &pa, NULL);
From: Miklos Szeredi mszeredi@redhat.com
[ Upstream commit 2d84a2d19b6150c6dbac1e6ebad9c82e4c123772 ]
In current fuse_drop_waiting() implementation it's possible that fuse_wait_aborted() will not be woken up in the unlikely case that fuse_abort_conn() + fuse_wait_aborted() runs in between checking fc->connected and calling atomic_dec(&fc->num_waiting).
Do the atomic_dec_and_test() unconditionally, which also provides the necessary barrier against reordering with the fc->connected check.
The explicit smp_mb() in fuse_wait_aborted() is not actually needed, since the spin_unlock() in fuse_abort_conn() provides the necessary RELEASE barrier after resetting fc->connected. However, this is not a performance sensitive path, and adding the explicit barrier makes it easier to document.
Signed-off-by: Miklos Szeredi mszeredi@redhat.com Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Cc: stable@vger.kernel.org #v4.19 Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/fuse/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 63fd33383413..af78ceead2dc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -133,9 +133,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
static void fuse_drop_waiting(struct fuse_conn *fc) { - if (fc->connected) { - atomic_dec(&fc->num_waiting); - } else if (atomic_dec_and_test(&fc->num_waiting)) { + /* + * lockess check of fc->connected is okay, because atomic_dec_and_test() + * provides a memory barrier mached with the one in fuse_wait_aborted() + * to ensure no wake-up is missed. + */ + if (atomic_dec_and_test(&fc->num_waiting) && + !READ_ONCE(fc->connected)) { /* wake up aborters */ wake_up_all(&fc->blocked_waitq); } @@ -2170,6 +2174,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
void fuse_wait_aborted(struct fuse_conn *fc) { + /* matches implicit memory barrier in fuse_drop_waiting() */ + smp_mb(); wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); }
From: Vignesh R vigneshr@ti.com
[ Upstream commit 5b277402deac0691226a947df71c581686bd4020 ]
Allow I2C_OMAP to be built for K3 platforms.
Signed-off-by: Vignesh R vigneshr@ti.com Reviewed-by: Grygorii Strashko grygorii.strashko@ti.com Signed-off-by: Wolfram Sang wsa@the-dreams.de Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 45a3f3ca29b3..75ea367ffd83 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -759,7 +759,7 @@ config I2C_OCORES
config I2C_OMAP tristate "OMAP I2C adapter" - depends on ARCH_OMAP + depends on ARCH_OMAP || ARCH_K3 default y if MACH_OMAP_H3 || MACH_OMAP_OSK help If you say yes to this option, support will be included for the
From: Ville Syrjälä ville.syrjala@linux.intel.com
[ Upstream commit 03981c6ebec4fc7056b9b45f847393aeac90d060 ]
I have a Thinkpad X220 Tablet in my hands that is losing vblank interrupts whenever LP3 watermarks are used.
If I nudge the latency value written to the WM3 register just by one in either direction the problem disappears. That to me suggests that the punit will not enter the corrsponding powersave mode (MPLL shutdown IIRC) unless the latency value in the register matches exactly what we read from SSKPD. Ie. it's not really a latency value but rather just a cookie by which the punit can identify the desired power saving state. On HSW/BDW this was changed such that we actually just write the WM level number into those bits, which makes much more sense given the observed behaviour.
We could try to handle this by disallowing LP3 watermarks only when vblank interrupts are enabled but we'd first have to prove that only vblank interrupts are affected, which seems unlikely. Also we can't grab the wm mutex from the vblank enable/disable hooks because those are called with various spinlocks held. Thus we'd have to redesigne the watermark locking. So to play it safe and keep the code simple we simply disable LP3 watermarks on all SNB machines.
To do that we simply zero out the latency values for watermark level 3, and we adjust the watermark computation to check for that. The behaviour now matches that of the g4x/vlv/skl wm code in the presence of a zeroed latency value.
v2: s/USHRT_MAX/U32_MAX/ for consistency with the types (Chris)
Cc: stable@vger.kernel.org Cc: Chris Wilson chris@chris-wilson.co.uk Acked-by: Chris Wilson chris@chris-wilson.co.uk Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101269 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103713 Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20181114173440.6730-1-ville.sy... Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/gpu/drm/i915/intel_pm.c | 41 ++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87cccb5f8c5d..96a5237741e0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2471,6 +2471,9 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, uint32_t method1, method2; int cpp;
+ if (mem_value == 0) + return U32_MAX; + if (!intel_wm_plane_visible(cstate, pstate)) return 0;
@@ -2500,6 +2503,9 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, uint32_t method1, method2; int cpp;
+ if (mem_value == 0) + return U32_MAX; + if (!intel_wm_plane_visible(cstate, pstate)) return 0;
@@ -2523,6 +2529,9 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, { int cpp;
+ if (mem_value == 0) + return U32_MAX; + if (!intel_wm_plane_visible(cstate, pstate)) return 0;
@@ -2981,6 +2990,34 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); }
+static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv) +{ + /* + * On some SNB machines (Thinkpad X220 Tablet at least) + * LP3 usage can cause vblank interrupts to be lost. + * The DEIIR bit will go high but it looks like the CPU + * never gets interrupted. + * + * It's not clear whether other interrupt source could + * be affected or if this is somehow limited to vblank + * interrupts only. To play it safe we disable LP3 + * watermarks entirely. + */ + if (dev_priv->wm.pri_latency[3] == 0 && + dev_priv->wm.spr_latency[3] == 0 && + dev_priv->wm.cur_latency[3] == 0) + return; + + dev_priv->wm.pri_latency[3] = 0; + dev_priv->wm.spr_latency[3] = 0; + dev_priv->wm.cur_latency[3] = 0; + + DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n"); + intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); + intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); + intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); +} + static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv) { intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); @@ -2997,8 +3034,10 @@ static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv) intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
- if (IS_GEN6(dev_priv)) + if (IS_GEN6(dev_priv)) { snb_wm_latency_quirk(dev_priv); + snb_wm_lp3_irq_quirk(dev_priv); + } }
static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
From: Amir Goldstein amir73il@gmail.com
[ Upstream commit 007d1e8395eaa59b0e7ad9eb2b53a40859446a88 ]
FS_EVENT_ON_CHILD gets a special treatment in fsnotify() because it is not a flag specifying an event type, but rather an extra flags that may be reported along with another event and control the handling of the event by the backend.
FS_ISDIR is also an "extra flag" and not an "event type" and therefore desrves the same treatment. With inotify/dnotify backends it was never possible to set FS_ISDIR in mark masks, so it did not matter. With fanotify backend, mark adding code jumps through hoops to avoid setting the FS_ISDIR in the commulative object mask.
Separate the constant ALL_FSNOTIFY_EVENTS to ALL_FSNOTIFY_FLAGS and ALL_FSNOTIFY_EVENTS, so the latter can be used to test for specific event types.
Signed-off-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/notify/fsnotify.c | 7 +++---- include/linux/fsnotify_backend.h | 9 +++++++-- 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 506da82ff3f1..dc080c642dd0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -192,7 +192,7 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0;
@@ -256,8 +256,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct fsnotify_iter_info iter_info; struct mount *mnt; int ret = 0; - /* global tests shouldn't care about events on child only the specific event */ - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
if (data_is == FSNOTIFY_EVENT_PATH) mnt = real_mount(((const struct path *)data)->mnt); @@ -380,7 +379,7 @@ static __init int fsnotify_init(void) { int ret;
- BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); + BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ce74278a454a..81052313adeb 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -67,15 +67,20 @@
#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM)
+/* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \ - FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \ + FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME) + +/* Extra flags that may be reported with event or control handling of events */ +#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
+#define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark;
On Tue 07-05-19 01:37:39, Sasha Levin wrote:
From: Amir Goldstein amir73il@gmail.com
[ Upstream commit 007d1e8395eaa59b0e7ad9eb2b53a40859446a88 ]
FS_EVENT_ON_CHILD gets a special treatment in fsnotify() because it is not a flag specifying an event type, but rather an extra flags that may be reported along with another event and control the handling of the event by the backend.
FS_ISDIR is also an "extra flag" and not an "event type" and therefore desrves the same treatment. With inotify/dnotify backends it was never possible to set FS_ISDIR in mark masks, so it did not matter. With fanotify backend, mark adding code jumps through hoops to avoid setting the FS_ISDIR in the commulative object mask.
Separate the constant ALL_FSNOTIFY_EVENTS to ALL_FSNOTIFY_FLAGS and ALL_FSNOTIFY_EVENTS, so the latter can be used to test for specific event types.
Signed-off-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin alexander.levin@microsoft.com
Sasha, why did you select this patch? It is just a cleanup with no user visible effect and was done mostly to simplify implementing following features...
Honza
fs/notify/fsnotify.c | 7 +++---- include/linux/fsnotify_backend.h | 9 +++++++-- 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 506da82ff3f1..dc080c642dd0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -192,7 +192,7 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL;
- __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
- __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0;
@@ -256,8 +256,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct fsnotify_iter_info iter_info; struct mount *mnt; int ret = 0;
- /* global tests shouldn't care about events on child only the specific event */
- __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
- __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
if (data_is == FSNOTIFY_EVENT_PATH) mnt = real_mount(((const struct path *)data)->mnt); @@ -380,7 +379,7 @@ static __init int fsnotify_init(void) { int ret;
- BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
- BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ce74278a454a..81052313adeb 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -67,15 +67,20 @@ #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM) +/* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \
FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \
FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME)
+/* Extra flags that may be reported with event or control handling of events */ +#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) +#define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark; -- 2.20.1
On Tue, May 07, 2019 at 03:23:30PM +0200, Jan Kara wrote:
On Tue 07-05-19 01:37:39, Sasha Levin wrote:
From: Amir Goldstein amir73il@gmail.com
[ Upstream commit 007d1e8395eaa59b0e7ad9eb2b53a40859446a88 ]
FS_EVENT_ON_CHILD gets a special treatment in fsnotify() because it is not a flag specifying an event type, but rather an extra flags that may be reported along with another event and control the handling of the event by the backend.
FS_ISDIR is also an "extra flag" and not an "event type" and therefore desrves the same treatment. With inotify/dnotify backends it was never possible to set FS_ISDIR in mark masks, so it did not matter. With fanotify backend, mark adding code jumps through hoops to avoid setting the FS_ISDIR in the commulative object mask.
Separate the constant ALL_FSNOTIFY_EVENTS to ALL_FSNOTIFY_FLAGS and ALL_FSNOTIFY_EVENTS, so the latter can be used to test for specific event types.
Signed-off-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Sasha Levin alexander.levin@microsoft.com
Sasha, why did you select this patch? It is just a cleanup with no user visible effect and was done mostly to simplify implementing following features...
Sigh, my script picked up the patch after this one (by mistake). I've dropped that one but missed this one twice(!). Thanks for the heads-up, I'll drop it.
-- Thanks, Sasha
From: Hugues Fruchet hugues.fruchet@st.com
[ Upstream commit c2c3f42df4dd9bb231d756bacb0c897f662c6d3c ]
ov5640_set_mode_exposure_calc() is checking binning value but binning value read is buggy, fix this. Rename ov5640_binning_on() to ov5640_get_binning() as per other similar functions.
Signed-off-by: Hugues Fruchet hugues.fruchet@st.com Reviewed-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Reviewed-by: Jacopo Mondi jacopo@jmondi.org Signed-off-by: Sakari Ailus sakari.ailus@linux.intel.com Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/media/i2c/ov5640.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index 39a2269c0bee..0366c8dc6ecf 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -1216,7 +1216,7 @@ static int ov5640_set_ae_target(struct ov5640_dev *sensor, int target) return ov5640_write_reg(sensor, OV5640_REG_AEC_CTRL1F, fast_low); }
-static int ov5640_binning_on(struct ov5640_dev *sensor) +static int ov5640_get_binning(struct ov5640_dev *sensor) { u8 temp; int ret; @@ -1224,8 +1224,8 @@ static int ov5640_binning_on(struct ov5640_dev *sensor) ret = ov5640_read_reg(sensor, OV5640_REG_TIMING_TC_REG21, &temp); if (ret) return ret; - temp &= 0xfe; - return temp ? 1 : 0; + + return temp & BIT(0); }
static int ov5640_set_virtual_channel(struct ov5640_dev *sensor) @@ -1293,7 +1293,7 @@ static int ov5640_set_mode_exposure_calc( if (ret < 0) return ret; prev_shutter = ret; - ret = ov5640_binning_on(sensor); + ret = ov5640_get_binning(sensor); if (ret < 0) return ret; if (ret && mode->id != OV5640_MODE_720P_1280_720 &&
From: Hugues Fruchet hugues.fruchet@st.com
[ Upstream commit a8f438c684eaa4cbe6c98828eb996d5ec53e24fb ]
When switching from auto to manual mode, V4L2 core is calling g_volatile_ctrl() in manual mode in order to get the manual initial value. Remove the manual mode check/return to not break this behaviour.
Signed-off-by: Hugues Fruchet hugues.fruchet@st.com Tested-by: Jacopo Mondi jacopo@jmondi.org Signed-off-by: Sakari Ailus sakari.ailus@linux.intel.com Signed-off-by: Mauro Carvalho Chehab mchehab+samsung@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/media/i2c/ov5640.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index 0366c8dc6ecf..acf5c8a55bbd 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -1900,16 +1900,12 @@ static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
switch (ctrl->id) { case V4L2_CID_AUTOGAIN: - if (!ctrl->val) - return 0; val = ov5640_get_gain(sensor); if (val < 0) return val; sensor->ctrls.gain->val = val; break; case V4L2_CID_EXPOSURE_AUTO: - if (ctrl->val == V4L2_EXPOSURE_MANUAL) - return 0; val = ov5640_get_exposure(sensor); if (val < 0) return val;
From: Paolo Abeni pabeni@redhat.com
[ Upstream commit 605108acfe6233b72e2f803aa1cb59a2af3001ca ]
Eric noted that with UDP GRO and NAPI timeout, we could keep a single UDP packet inside the GRO hash forever, if the related NAPI instance calls napi_gro_complete() at an higher frequency than the NAPI timeout. Willem noted that even TCP packets could be trapped there, till the next retransmission. This patch tries to address the issue, flushing the old packets - those with a NAPI_GRO_CB age before the current jiffy - before scheduling the NAPI timeout. The rationale is that such a timeout should be well below a jiffy and we are not flushing packets eligible for sane GRO.
v1 -> v2: - clarified the commit message and comment
RFC -> v1: - added 'Fixes tags', cleaned-up the wording.
Reported-by: Eric Dumazet eric.dumazet@gmail.com Fixes: 3b47d30396ba ("net: gro: add a per device gro flush timer") Signed-off-by: Paolo Abeni pabeni@redhat.com Acked-by: Willem de Bruijn willemb@google.com Acked-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c index 93a1b07990b8..90ec30d5b851 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5308,11 +5308,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) timeout = n->dev->gro_flush_timeout;
+ /* When the NAPI instance uses a timeout and keeps postponing + * it, we need to bound somehow the time packets are kept in + * the GRO layer + */ + napi_gro_flush(n, !!timeout); if (timeout) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - else - napi_gro_flush(n, false); } if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
[ Upstream commit 5cf99a0f3161bc3ae2391269d134d6bf7e26f00e ]
The tracefs file set_graph_function is used to only function graph functions that are listed in that file (or all functions if the file is empty). The way this is implemented is that the function graph tracer looks at every function, and if the current depth is zero and the function matches something in the file then it will trace that function. When other functions are called, the depth will be greater than zero (because the original function will be at depth zero), and all functions will be traced where the depth is greater than zero.
The issue is that when a function is first entered, and the handler that checks this logic is called, the depth is set to zero. If an interrupt comes in and a function in the interrupt handler is traced, its depth will be greater than zero and it will automatically be traced, even if the original function was not. But because the logic only looks at depth it may trace interrupts when it should not be.
The recent design change of the function graph tracer to fix other bugs caused the depth to be zero while the function graph callback handler is being called for a longer time, widening the race of this happening. This bug was actually there for a longer time, but because the race window was so small it seldom happened. The Fixes tag below is for the commit that widen the race window, because that commit belongs to a series that will also help fix the original bug.
Cc: stable@kernel.org Fixes: 39eb456dacb5 ("function_graph: Use new curr_ret_depth to manage depth instead of curr_ret_stack") Reported-by: Joe Lawrence joe.lawrence@redhat.com Tested-by: Joe Lawrence joe.lawrence@redhat.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- kernel/trace/trace.h | 57 ++++++++++++++++++++++++++-- kernel/trace/trace_functions_graph.c | 4 ++ kernel/trace/trace_irqsoff.c | 2 + kernel/trace/trace_sched_wakeup.c | 2 + 4 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 851cd1605085..a51e32de7c5f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -504,12 +504,44 @@ enum { * can only be modified by current, we can reuse trace_recursion. */ TRACE_IRQ_BIT, + + /* Set if the function is in the set_graph_function file */ + TRACE_GRAPH_BIT, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_BIT + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, };
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
+#define trace_recursion_depth() \ + (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) +#define trace_recursion_set_depth(depth) \ + do { \ + current->trace_recursion &= \ + ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ + current->trace_recursion |= \ + ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ + } while (0) + #define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT @@ -839,8 +871,9 @@ extern void __trace_graph_return(struct trace_array *tr, extern struct ftrace_hash *ftrace_graph_hash; extern struct ftrace_hash *ftrace_graph_notrace_hash;
-static inline int ftrace_graph_addr(unsigned long addr) +static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { + unsigned long addr = trace->func; int ret = 0;
preempt_disable_notrace(); @@ -851,6 +884,14 @@ static inline int ftrace_graph_addr(unsigned long addr) }
if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + + /* + * This needs to be cleared on the return functions + * when the depth is zero. + */ + trace_recursion_set(TRACE_GRAPH_BIT); + trace_recursion_set_depth(trace->depth); + /* * If no irqs are to be traced, but a set_graph_function * is set, and called by an interrupt handler, we still @@ -868,6 +909,13 @@ static inline int ftrace_graph_addr(unsigned long addr) return ret; }
+static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +{ + if (trace_recursion_test(TRACE_GRAPH_BIT) && + trace->depth == trace_recursion_depth()) + trace_recursion_clear(TRACE_GRAPH_BIT); +} + static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; @@ -881,7 +929,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) return ret; } #else -static inline int ftrace_graph_addr(unsigned long addr) +static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { return 1; } @@ -890,6 +938,8 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } +static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +{ } #endif /* CONFIG_DYNAMIC_FTRACE */
extern unsigned int fgraph_max_depth; @@ -897,7 +947,8 @@ extern unsigned int fgraph_max_depth; static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) { /* trace it when it is-nested-in or is a function enabled. */ - return !(trace->depth || ftrace_graph_addr(trace->func)) || + return !(trace_recursion_test(TRACE_GRAPH_BIT) || + ftrace_graph_addr(trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 169b3c44ee97..72d0d477f5c1 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -482,6 +482,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) int cpu; int pc;
+ ftrace_graph_addr_finish(trace); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->trace_buffer.data, cpu); @@ -505,6 +507,8 @@ void set_graph_array(struct trace_array *tr)
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace) { + ftrace_graph_addr_finish(trace); + if (tracing_thresh && (trace->rettime - trace->calltime < tracing_thresh)) return; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7758bc0617cb..2d9e12380dc3 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -204,6 +204,8 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) unsigned long flags; int pc;
+ ftrace_graph_addr_finish(trace); + if (!func_prolog_dec(tr, &data, &flags)) return;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 7d461dcd4831..0fa9dadf3f4f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -270,6 +270,8 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace) unsigned long flags; int pc;
+ ftrace_graph_addr_finish(trace); + if (!func_prolog_preempt_disable(tr, &data, &pc)) return;
From: Chris Wilson chris@chris-wilson.co.uk
[ Upstream commit 86c1c87d0e6241cbe35bd52badfc84b154e1b959 ]
According to intel_read_wm_latency() it is perfectly legal for one WM and all subsequent levels to be 0 (and the deeper powersaving states disabled), so don't shout *ERROR*, over and over again.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjala ville.syrjala@linux.intel.com Acked-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20180726161527.10516-1-chris@c... Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 96a5237741e0..cb377b003321 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2934,8 +2934,8 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, unsigned int latency = wm[level];
if (latency == 0) { - DRM_ERROR("%s WM%d latency not provided\n", - name, level); + DRM_DEBUG_KMS("%s WM%d latency not provided\n", + name, level); continue; }
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 0eeec01488da9b1403c8c29e73eacac8af9e4bf2 ]
I ran into a new warning on randconfig kernels:
drivers/scsi/raid_class.c: In function 'raid_match': drivers/scsi/raid_class.c:64:24: error: unused variable 'i' [-Werror=unused-variable]
This looks like a very old problem that for some reason was very hard to run into, but it is very easy to fix, by replacing the incorrect #ifdef with a simpler IS_ENABLED() check.
Fixes: fac829fdcaf4 ("[SCSI] raid_attrs: fix dependency problems") Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/scsi/raid_class.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c index 2c146b44d95f..cddd78893b46 100644 --- a/drivers/scsi/raid_class.c +++ b/drivers/scsi/raid_class.c @@ -63,8 +63,7 @@ static int raid_match(struct attribute_container *cont, struct device *dev) * emulated RAID devices, so start with SCSI */ struct raid_internal *i = ac_to_raid_internal(cont);
-#if defined(CONFIG_SCSI) || defined(CONFIG_SCSI_MODULE) - if (scsi_is_sdev_device(dev)) { + if (IS_ENABLED(CONFIG_SCSI) && scsi_is_sdev_device(dev)) { struct scsi_device *sdev = to_scsi_device(dev);
if (i->f->cookie != sdev->host->hostt) @@ -72,7 +71,6 @@ static int raid_match(struct attribute_container *cont, struct device *dev)
return i->f->is_raid(dev); } -#endif /* FIXME: look at other subsystems too */ return 0; }
From: Lubomir Rintel lkundrak@v3.sk
[ Upstream commit 33f49571d75024b1044cd02689ad2bdb4924cc80 ]
WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - FB_OLPC_DCON [=y] && STAGING [=y] && X86 [=y] && OLPC [=y] && FB [=y] && I2C [=y] && (GPIO_CS5535 [=n] || GPIO_CS5535 [=n]=n)
Signed-off-by: Lubomir Rintel lkundrak@v3.sk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/staging/olpc_dcon/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/staging/olpc_dcon/Kconfig b/drivers/staging/olpc_dcon/Kconfig index d277f048789e..8c6cc61d634b 100644 --- a/drivers/staging/olpc_dcon/Kconfig +++ b/drivers/staging/olpc_dcon/Kconfig @@ -2,6 +2,7 @@ config FB_OLPC_DCON tristate "One Laptop Per Child Display CONtroller support" depends on OLPC && FB depends on I2C + depends on BACKLIGHT_LCD_SUPPORT depends on (GPIO_CS5535 || GPIO_CS5535=n) select BACKLIGHT_CLASS_DEVICE ---help---
From: Thierry Reding treding@nvidia.com
[ Upstream commit 5f2b8b62786853341a20d4cd4948f9cbca3db002 ]
Setting up and tearing down debugfs is current unbalanced, as seen by this error during resume from suspend:
[ 752.134067] dwc-eth-dwmac 2490000.ethernet eth0: ERROR failed to create debugfs directory [ 752.134347] dwc-eth-dwmac 2490000.ethernet eth0: stmmac_hw_setup: failed debugFS registration
The imbalance happens because the driver creates the debugfs hierarchy when the device is opened and tears it down when the device is closed. There's little gain in that, and it could be argued that it is even surprising because it's not usually done for other devices. Fix the imbalance by moving the debugfs creation and teardown to the driver's ->probe() and ->remove() implementations instead.
Note that the ring descriptors cannot be read while the interface is down, so make sure to return an empty file when the descriptors_status debugfs file is read.
Signed-off-by: Thierry Reding treding@nvidia.com Acked-by: Jose Abreu joabreu@synopsys.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ecf3f8c1bc0e..3389545353a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2530,12 +2530,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) netdev_warn(priv->dev, "PTP init failed\n"); }
-#ifdef CONFIG_DEBUG_FS - ret = stmmac_init_fs(dev); - if (ret < 0) - netdev_warn(priv->dev, "%s: failed debugFS registration\n", - __func__); -#endif priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) { @@ -2729,10 +2723,6 @@ static int stmmac_release(struct net_device *dev)
netif_carrier_off(dev);
-#ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(dev); -#endif - stmmac_release_ptp(priv);
return 0; @@ -3837,6 +3827,9 @@ static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v) u32 tx_count = priv->plat->tx_queues_to_use; u32 queue;
+ if ((dev->flags & IFF_UP) == 0) + return 0; + for (queue = 0; queue < rx_count; queue++) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
@@ -4308,6 +4301,13 @@ int stmmac_dvr_probe(struct device *device, goto error_netdev_register; }
+#ifdef CONFIG_DEBUG_FS + ret = stmmac_init_fs(ndev); + if (ret < 0) + netdev_warn(priv->dev, "%s: failed debugFS registration\n", + __func__); +#endif + return ret;
error_netdev_register: @@ -4341,6 +4341,9 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
+#ifdef CONFIG_DEBUG_FS + stmmac_exit_fs(ndev); +#endif stmmac_stop_all_dma(priv);
priv->hw->mac->set_mac(priv->ioaddr, false);
From: Omar Sandoval osandov@fb.com
[ Upstream commit d6fd0ae25c6495674dc5a41a8d16bc8e0073276d ]
There's a race between close_ctree() and cleaner_kthread(). close_ctree() sets btrfs_fs_closing(), and the cleaner stops when it sees it set, but this is racy; the cleaner might have already checked the bit and could be cleaning stuff. In particular, if it deletes unused block groups, it will create delayed iputs for the free space cache inodes. As of "btrfs: don't run delayed_iputs in commit", we're no longer running delayed iputs after a commit. Therefore, if the cleaner creates more delayed iputs after delayed iputs are run in btrfs_commit_super(), we will leak inodes on unmount and get a busy inode crash from the VFS.
Fix it by parking the cleaner before we actually close anything. Then, any remaining delayed iputs will always be handled in btrfs_commit_super(). This also ensures that the commit in close_ctree() is really the last commit, so we can get rid of the commit in cleaner_kthread().
The fstest/generic/475 followed by 476 can trigger a crash that manifests as a slab corruption caused by accessing the freed kthread structure by a wake up function. Sample trace:
[ 5657.077612] BUG: unable to handle kernel NULL pointer dereference at 00000000000000cc [ 5657.079432] PGD 1c57a067 P4D 1c57a067 PUD da10067 PMD 0 [ 5657.080661] Oops: 0000 [#1] PREEMPT SMP [ 5657.081592] CPU: 1 PID: 5157 Comm: fsstress Tainted: G W 4.19.0-rc8-default+ #323 [ 5657.083703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626cc-prebuilt.qemu-project.org 04/01/2014 [ 5657.086577] RIP: 0010:shrink_page_list+0x2f9/0xe90 [ 5657.091937] RSP: 0018:ffffb5c745c8f728 EFLAGS: 00010287 [ 5657.092953] RAX: 0000000000000074 RBX: ffffb5c745c8f830 RCX: 0000000000000000 [ 5657.094590] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff9a8747fdf3d0 [ 5657.095987] RBP: ffffb5c745c8f9e0 R08: 0000000000000000 R09: 0000000000000000 [ 5657.097159] R10: ffff9a8747fdf5e8 R11: 0000000000000000 R12: ffffb5c745c8f788 [ 5657.098513] R13: ffff9a877f6ff2c0 R14: ffff9a877f6ff2c8 R15: dead000000000200 [ 5657.099689] FS: 00007f948d853b80(0000) GS:ffff9a877d600000(0000) knlGS:0000000000000000 [ 5657.101032] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5657.101953] CR2: 00000000000000cc CR3: 00000000684bd000 CR4: 00000000000006e0 [ 5657.103159] Call Trace: [ 5657.103776] shrink_inactive_list+0x194/0x410 [ 5657.104671] shrink_node_memcg.constprop.84+0x39a/0x6a0 [ 5657.105750] shrink_node+0x62/0x1c0 [ 5657.106529] try_to_free_pages+0x1a4/0x500 [ 5657.107408] __alloc_pages_slowpath+0x2c9/0xb20 [ 5657.108418] __alloc_pages_nodemask+0x268/0x2b0 [ 5657.109348] kmalloc_large_node+0x37/0x90 [ 5657.110205] __kmalloc_node+0x236/0x310 [ 5657.111014] kvmalloc_node+0x3e/0x70
Fixes: 30928e9baac2 ("btrfs: don't run delayed_iputs in commit") Signed-off-by: Omar Sandoval osandov@fb.com Reviewed-by: David Sterba dsterba@suse.com [ add trace ] Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/btrfs/disk-io.c | 51 ++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 36 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0bdc0c902e4..813834552aa1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1688,9 +1688,8 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; struct btrfs_fs_info *fs_info = root->fs_info; int again; - struct btrfs_trans_handle *trans;
- do { + while (1) { again = 0;
/* Make the cleaner go to sleep early. */ @@ -1739,42 +1738,16 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: + if (kthread_should_park()) + kthread_parkme(); + if (kthread_should_stop()) + return 0; if (!again) { set_current_state(TASK_INTERRUPTIBLE); - if (!kthread_should_stop()) - schedule(); + schedule(); __set_current_state(TASK_RUNNING); } - } while (!kthread_should_stop()); - - /* - * Transaction kthread is stopped before us and wakes us up. - * However we might have started a new transaction and COWed some - * tree blocks when deleting unused block groups for example. So - * make sure we commit the transaction we started to have a clean - * shutdown when evicting the btree inode - if it has dirty pages - * when we do the final iput() on it, eviction will trigger a - * writeback for it which will fail with null pointer dereferences - * since work queues and other resources were already released and - * destroyed by the time the iput/eviction/writeback is made. - */ - trans = btrfs_attach_transaction(root); - if (IS_ERR(trans)) { - if (PTR_ERR(trans) != -ENOENT) - btrfs_err(fs_info, - "cleaner transaction attach returned %ld", - PTR_ERR(trans)); - } else { - int ret; - - ret = btrfs_commit_transaction(trans); - if (ret) - btrfs_err(fs_info, - "cleaner open transaction commit returned %d", - ret); } - - return 0; }
static int transaction_kthread(void *arg) @@ -3713,6 +3686,13 @@ void close_ctree(struct btrfs_fs_info *fs_info) int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); + /* + * We don't want the cleaner to start new transactions, add more delayed + * iputs, etc. while we're closing. We can't use kthread_stop() yet + * because that frees the task_struct, and the transaction kthread might + * still try to wake up the cleaner. + */ + kthread_park(fs_info->cleaner_kthread);
/* wait for the qgroup rescan worker to stop */ btrfs_qgroup_wait_for_completion(fs_info, false); @@ -3740,9 +3720,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
if (!sb_rdonly(fs_info->sb)) { /* - * If the cleaner thread is stopped and there are - * block groups queued for removal, the deletion will be - * skipped when we quit the cleaner thread. + * The cleaner kthread is stopped, so do one final pass over + * unused block groups. */ btrfs_delete_unused_bgs(fs_info);
From: Alistair Strachan astrachan@google.com
[ Upstream commit cd01544a268ad8ee5b1dfe42c4393f1095f86879 ]
Commit
379d98ddf413 ("x86: vdso: Use $LD instead of $CC to link")
accidentally broke unwinding from userspace, because ld would strip the .eh_frame sections when linking.
Originally, the compiler would implicitly add --eh-frame-hdr when invoking the linker, but when this Makefile was converted from invoking ld via the compiler, to invoking it directly (like vmlinux does), the flag was missed. (The EH_FRAME section is important for the VDSO shared libraries, but not for vmlinux.)
Fix the problem by explicitly specifying --eh-frame-hdr, which restores parity with the old method.
See relevant bug reports for additional info:
https://bugzilla.kernel.org/show_bug.cgi?id=201741 https://bugzilla.redhat.com/show_bug.cgi?id=1659295
Fixes: 379d98ddf413 ("x86: vdso: Use $LD instead of $CC to link") Reported-by: Florian Weimer fweimer@redhat.com Reported-by: Carlos O'Donell carlos@redhat.com Reported-by: "H. J. Lu" hjl.tools@gmail.com Signed-off-by: Alistair Strachan astrachan@google.com Signed-off-by: Borislav Petkov bp@suse.de Tested-by: Laura Abbott labbott@redhat.com Cc: Andy Lutomirski luto@kernel.org Cc: Carlos O'Donell carlos@redhat.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Joel Fernandes joel@joelfernandes.org Cc: kernel-team@android.com Cc: Laura Abbott labbott@redhat.com Cc: stable stable@vger.kernel.org Cc: Thomas Gleixner tglx@linutronix.de Cc: X86 ML x86@kernel.org Link: https://lkml.kernel.org/r/20181214223637.35954-1-astrachan@google.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/x86/entry/vdso/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 839015f1b0de..ab7f730cf7f2 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -182,7 +182,8 @@ quiet_cmd_vdso = VDSO $@ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ - $(call ld-option, --build-id) -Bsymbolic + $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \ + -Bsymbolic GCOV_PROFILE := n
#
From: Martin Schwidefsky schwidefsky@de.ibm.com
[ Upstream commit 1071fc5779d9846fec56a4ff6089ab08cac1ab72 ]
Add three architecture overrideable functions to test if the p4d, pud, or pmd layer of a page table is folded or not.
Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- include/asm-generic/pgtable.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f00421dfacbd..0c21014a38f2 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1081,4 +1081,20 @@ static inline bool arch_has_pfn_modify_check(void) #endif #endif
+/* + * On some architectures it depends on the mm if the p4d/pud or pmd + * layer of the page table hierarchy is folded or not. + */ +#ifndef mm_p4d_folded +#define mm_p4d_folded(mm) __is_defined(__PAGETABLE_P4D_FOLDED) +#endif + +#ifndef mm_pud_folded +#define mm_pud_folded(mm) __is_defined(__PAGETABLE_PUD_FOLDED) +#endif + +#ifndef mm_pmd_folded +#define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED) +#endif + #endif /* _ASM_GENERIC_PGTABLE_H */
From: Mikhail Zaslonko zaslonko@linux.ibm.com
[ Upstream commit 2830bf6f05fb3e05bc4743274b806c821807a684 ]
If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct page access from is_mem_section_removable() or test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers:
Here are the the panic examples: CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y
kernel parameter mem=2050M -------------------------- page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: ( test_pages_in_a_zone+0xde/0x160) show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops
kernel parameter mem=3075M -------------------------- page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: ( is_mem_section_removable+0xb4/0x190) show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops
Fix the problem by initializing the last memory section of each zone in memmap_init_zone() till the very end, even if it goes beyond the zone end.
Michal said:
: This has alwways been problem AFAIU. It just went unnoticed because we : have zeroed memmaps during allocation before f7f99100d8d9 ("mm: stop : zeroing memory during allocation in vmemmap") and so the above test : would simply skip these ranges as belonging to zone 0 or provided a : garbage. : : So I guess we do care for post f7f99100d8d9 kernels mostly and : therefore Fixes: f7f99100d8d9 ("mm: stop zeroing memory during : allocation in vmemmap")
Link: http://lkml.kernel.org/r/20181212172712.34019-2-zaslonko@linux.ibm.com Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Mikhail Zaslonko zaslonko@linux.ibm.com Reviewed-by: Gerald Schaefer gerald.schaefer@de.ibm.com Suggested-by: Michal Hocko mhocko@kernel.org Acked-by: Michal Hocko mhocko@suse.com Reported-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Tested-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Cc: Dave Hansen dave.hansen@intel.com Cc: Alexander Duyck alexander.h.duyck@linux.intel.com Cc: Pasha Tatashin Pavel.Tatashin@microsoft.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- mm/page_alloc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 923deb33bf34..16c20d9e771f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5348,6 +5348,18 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, __init_single_pfn(pfn, zone, nid); } } +#ifdef CONFIG_SPARSEMEM + /* + * If the zone does not span the rest of the section then + * we should at least initialize those pages. Otherwise we + * could blow up on a poisoned page in some paths which depend + * on full sections being initialized (e.g. memory hotplug). + */ + while (end_pfn % PAGES_PER_SECTION) { + __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); + end_pfn++; + } +#endif }
static void __meminit zone_init_free_lists(struct zone *zone)
On Mon, May 6, 2019 at 10:40 PM Sasha Levin sashal@kernel.org wrote:
From: Mikhail Zaslonko zaslonko@linux.ibm.com
[ Upstream commit 2830bf6f05fb3e05bc4743274b806c821807a684 ]
If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct page access from is_mem_section_removable() or test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers:
Here are the the panic examples: CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y
kernel parameter mem=2050M
page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: ( test_pages_in_a_zone+0xde/0x160) show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops
kernel parameter mem=3075M
page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: ( is_mem_section_removable+0xb4/0x190) show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops
Fix the problem by initializing the last memory section of each zone in memmap_init_zone() till the very end, even if it goes beyond the zone end.
Michal said:
: This has alwways been problem AFAIU. It just went unnoticed because we : have zeroed memmaps during allocation before f7f99100d8d9 ("mm: stop : zeroing memory during allocation in vmemmap") and so the above test : would simply skip these ranges as belonging to zone 0 or provided a : garbage. : : So I guess we do care for post f7f99100d8d9 kernels mostly and : therefore Fixes: f7f99100d8d9 ("mm: stop zeroing memory during : allocation in vmemmap")
Link: http://lkml.kernel.org/r/20181212172712.34019-2-zaslonko@linux.ibm.com Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Mikhail Zaslonko zaslonko@linux.ibm.com Reviewed-by: Gerald Schaefer gerald.schaefer@de.ibm.com Suggested-by: Michal Hocko mhocko@kernel.org Acked-by: Michal Hocko mhocko@suse.com Reported-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Tested-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Cc: Dave Hansen dave.hansen@intel.com Cc: Alexander Duyck alexander.h.duyck@linux.intel.com Cc: Pasha Tatashin Pavel.Tatashin@microsoft.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com
mm/page_alloc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
Wasn't this patch reverted in Linus's tree for causing a regression on some platforms? If so I'm not sure we should pull this in as a candidate for stable should we, or am I missing something?
On Tue, May 7, 2019 at 9:31 AM Alexander Duyck alexander.duyck@gmail.com wrote:
Wasn't this patch reverted in Linus's tree for causing a regression on some platforms? If so I'm not sure we should pull this in as a candidate for stable should we, or am I missing something?
Good catch. It was reverted in commit 4aa9fc2a435a ("Revert "mm, memory_hotplug: initialize struct pages for the full memory section"").
We ended up with efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone") instead (and possibly others - this was just from looking for commit messages that mentioned that reverted commit).
Linus
On Tue, May 07, 2019 at 09:50:50AM -0700, Linus Torvalds wrote:
On Tue, May 7, 2019 at 9:31 AM Alexander Duyck alexander.duyck@gmail.com wrote:
Wasn't this patch reverted in Linus's tree for causing a regression on some platforms? If so I'm not sure we should pull this in as a candidate for stable should we, or am I missing something?
Good catch. It was reverted in commit 4aa9fc2a435a ("Revert "mm, memory_hotplug: initialize struct pages for the full memory section"").
We ended up with efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone") instead (and possibly others - this was just from looking for commit messages that mentioned that reverted commit).
I got it wrong then. I'll fix it up and get efad4e475c31 in instead. Thanks!
-- Thanks, Sasha
On Tue, 7 May 2019 13:02:08 -0400 Sasha Levin sashal@kernel.org wrote:
On Tue, May 07, 2019 at 09:50:50AM -0700, Linus Torvalds wrote:
On Tue, May 7, 2019 at 9:31 AM Alexander Duyck alexander.duyck@gmail.com wrote:
Wasn't this patch reverted in Linus's tree for causing a regression on some platforms? If so I'm not sure we should pull this in as a candidate for stable should we, or am I missing something?
Good catch. It was reverted in commit 4aa9fc2a435a ("Revert "mm, memory_hotplug: initialize struct pages for the full memory section"").
We ended up with efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone") instead (and possibly others - this was just from looking for commit messages that mentioned that reverted commit).
I got it wrong then. I'll fix it up and get efad4e475c31 in instead.
There were two commits replacing the reverted commit, fixing is_mem_section_removable() and test_pages_in_a_zone() respectively:
commit 24feb47c5fa5 ("mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone") commit efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone")
On Tue, May 7, 2019 at 10:02 AM Sasha Levin sashal@kernel.org wrote:
I got it wrong then. I'll fix it up and get efad4e475c31 in instead.
Careful. That one had a bug too, and we have 891cb2a72d82 ("mm, memory_hotplug: fix off-by-one in is_pageblock_removable").
All of these were *horribly* and subtly buggy, and might be intertwined with other issues. And only trigger on a few specific machines where the memory map layout is just right to trigger some special case or other, and you have just the right config.
It might be best to verify with Michal Hocko. Michal?
Linus
On Tue, May 07, 2019 at 10:15:19AM -0700, Linus Torvalds wrote:
On Tue, May 7, 2019 at 10:02 AM Sasha Levin sashal@kernel.org wrote:
I got it wrong then. I'll fix it up and get efad4e475c31 in instead.
Careful. That one had a bug too, and we have 891cb2a72d82 ("mm, memory_hotplug: fix off-by-one in is_pageblock_removable").
All of these were *horribly* and subtly buggy, and might be intertwined with other issues. And only trigger on a few specific machines where the memory map layout is just right to trigger some special case or other, and you have just the right config.
It might be best to verify with Michal Hocko. Michal?
Michal, is there a testcase I can plug into kselftests to make sure we got this right (and don't regress)? We care a lot about memory hotplug working right.
-- Thanks, Sasha
On Tue 07-05-19 13:18:06, Sasha Levin wrote:
On Tue, May 07, 2019 at 10:15:19AM -0700, Linus Torvalds wrote:
On Tue, May 7, 2019 at 10:02 AM Sasha Levin sashal@kernel.org wrote:
I got it wrong then. I'll fix it up and get efad4e475c31 in instead.
Careful. That one had a bug too, and we have 891cb2a72d82 ("mm, memory_hotplug: fix off-by-one in is_pageblock_removable").
All of these were *horribly* and subtly buggy, and might be intertwined with other issues. And only trigger on a few specific machines where the memory map layout is just right to trigger some special case or other, and you have just the right config.
It might be best to verify with Michal Hocko. Michal?
Michal, is there a testcase I can plug into kselftests to make sure we got this right (and don't regress)? We care a lot about memory hotplug working right.
As said in other email. The memory hotplug tends to work usually. It takes unexpected memory layouts which trigger corner cases. This makes testing really hard.
On Tue, May 07, 2019 at 07:32:24PM +0200, Michal Hocko wrote:
On Tue 07-05-19 13:18:06, Sasha Levin wrote:
Michal, is there a testcase I can plug into kselftests to make sure we got this right (and don't regress)? We care a lot about memory hotplug working right.
As said in other email. The memory hotplug tends to work usually. It takes unexpected memory layouts which trigger corner cases. This makes testing really hard.
Can we do something with qemu? Is it flexible enough to hotplug memory at the right boundaries?
On Tue, May 7, 2019 at 10:36 AM Matthew Wilcox willy@infradead.org wrote:
Can we do something with qemu? Is it flexible enough to hotplug memory at the right boundaries?
It's not just the actual hotplugged memory, it's things like how the e820 tables were laid out for the _regular_ non-hotplug stuff too, iirc to get the cases where something didn't work out.
I'm sure it *could* be emulated, and I'm sure some hotplug (and page poison errors etc) testing in qemu would be lovely and presumably some people do it, but all the cases so far have been about odd small special cases that people didn't think of and didn't hit. I'm not sure the qemu testing would think of them either..
Linus
On Tue 07-05-19 10:43:31, Linus Torvalds wrote:
On Tue, May 7, 2019 at 10:36 AM Matthew Wilcox willy@infradead.org wrote:
Can we do something with qemu? Is it flexible enough to hotplug memory at the right boundaries?
It's not just the actual hotplugged memory, it's things like how the e820 tables were laid out for the _regular_ non-hotplug stuff too, iirc to get the cases where something didn't work out.
I'm sure it *could* be emulated, and I'm sure some hotplug (and page poison errors etc) testing in qemu would be lovely and presumably some people do it, but all the cases so far have been about odd small special cases that people didn't think of and didn't hit. I'm not sure the qemu testing would think of them either..
Yes, this is exactly my point. It would be great to have those odd small special cases that we have met already available though. For a regression testing for them at least.
On Tue 07-05-19 10:36:55, Matthew Wilcox wrote:
On Tue, May 07, 2019 at 07:32:24PM +0200, Michal Hocko wrote:
On Tue 07-05-19 13:18:06, Sasha Levin wrote:
Michal, is there a testcase I can plug into kselftests to make sure we got this right (and don't regress)? We care a lot about memory hotplug working right.
As said in other email. The memory hotplug tends to work usually. It takes unexpected memory layouts which trigger corner cases. This makes testing really hard.
Can we do something with qemu? Is it flexible enough to hotplug memory at the right boundaries?
No idea. But I have tried to describe those layouts in the changelog so if somebody can come up with a way to reproduce them under kvm/qemu I would really appreciate that.
On Tue, May 07, 2019 at 10:36:55AM -0700, Matthew Wilcox wrote:
On Tue, May 07, 2019 at 07:32:24PM +0200, Michal Hocko wrote:
On Tue 07-05-19 13:18:06, Sasha Levin wrote:
Michal, is there a testcase I can plug into kselftests to make sure we got this right (and don't regress)? We care a lot about memory hotplug working right.
As said in other email. The memory hotplug tends to work usually. It takes unexpected memory layouts which trigger corner cases. This makes testing really hard.
Can we do something with qemu? Is it flexible enough to hotplug memory at the right boundaries?
That was my thinking too. qemu should be able to reproduce all these "unexpected" memory layouts we've had issue with so far and at the very least make sure we don't regress on those.
We're going to have (quite a) large amount of systems with "weird" memory layouts that do memory hotplug quite frequently in production, so this whole "tends to work usually" thing kinda scares me.
-- Thanks, Sasha
On Tue 07-05-19 13:45:14, Sasha Levin wrote: [...]
We're going to have (quite a) large amount of systems with "weird" memory layouts that do memory hotplug quite frequently in production, so this whole "tends to work usually" thing kinda scares me.
Memory hotplug is simply not production ready for those cases, unfortunately. It tends to work just fine with properly section aligned systems with memory being in the movable zones or for zone device. Everything beyond that is kinda long way to get there...
On Tue, 7 May 2019 13:18:06 -0400 Sasha Levin sashal@kernel.org wrote:
On Tue, May 07, 2019 at 10:15:19AM -0700, Linus Torvalds wrote:
On Tue, May 7, 2019 at 10:02 AM Sasha Levin sashal@kernel.org wrote:
I got it wrong then. I'll fix it up and get efad4e475c31 in instead.
Careful. That one had a bug too, and we have 891cb2a72d82 ("mm, memory_hotplug: fix off-by-one in is_pageblock_removable").
All of these were *horribly* and subtly buggy, and might be intertwined with other issues. And only trigger on a few specific machines where the memory map layout is just right to trigger some special case or other, and you have just the right config.
It might be best to verify with Michal Hocko. Michal?
Michal, is there a testcase I can plug into kselftests to make sure we got this right (and don't regress)? We care a lot about memory hotplug working right.
We hit the panics on s390 with special z/VM memory layout, but they both can be triggered simply by using mem= kernel parameter (and CONFIG_DEBUG_VM_PGFLAGS=y).
With "mem=3075M" (and w/o the commits efad4e475c31 + 24feb47c5fa5), it can be triggered by reading from /sys/devices/system/memory/memory<x>/valid_zones, or from /sys/devices/system/memory/memory<x>/removable, with <x> being the last memory block.
This is with 256MB section size and memory block size. On LPAR, with 256MB section size and 1GB memory block size, for some reason the "removable" issue doesn't trigger, only the "valid_zones" issue.
Using lsmem will also trigger it, as it reads both the valid_zones and the removable attribute for all memory blocks. So, a test with not-section-aligned mem= parameter and using lsmem could be an option.
Regards, Gerald
On Tue 07-05-19 10:15:19, Linus Torvalds wrote:
On Tue, May 7, 2019 at 10:02 AM Sasha Levin sashal@kernel.org wrote:
I got it wrong then. I'll fix it up and get efad4e475c31 in instead.
This patch is not marked for stable backports for good reasons.
Careful. That one had a bug too, and we have 891cb2a72d82 ("mm, memory_hotplug: fix off-by-one in is_pageblock_removable").
All of these were *horribly* and subtly buggy, and might be intertwined with other issues. And only trigger on a few specific machines where the memory map layout is just right to trigger some special case or other, and you have just the right config.
Yes, the code turned out to be much more tricky than we thought. There were several assumptions about alignment etc. Something that is really hard to test for because HW breaking those assumptions is rare. So I would discourage picking up some random patches in the memory hotplug for stable. Each patch needs a very careful consideration. In any case we really try hard to keep Fixes: tag accurate so at least those should be scanned.
On Tue, May 07, 2019 at 09:31:10AM -0700, Alexander Duyck wrote:
On Mon, May 6, 2019 at 10:40 PM Sasha Levin sashal@kernel.org wrote:
From: Mikhail Zaslonko zaslonko@linux.ibm.com
[ Upstream commit 2830bf6f05fb3e05bc4743274b806c821807a684 ]
If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct page access from is_mem_section_removable() or test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers:
Here are the the panic examples: CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y
kernel parameter mem=2050M
page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: ( test_pages_in_a_zone+0xde/0x160) show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops
kernel parameter mem=3075M
page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: ( is_mem_section_removable+0xb4/0x190) show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops
Fix the problem by initializing the last memory section of each zone in memmap_init_zone() till the very end, even if it goes beyond the zone end.
Michal said:
: This has alwways been problem AFAIU. It just went unnoticed because we : have zeroed memmaps during allocation before f7f99100d8d9 ("mm: stop : zeroing memory during allocation in vmemmap") and so the above test : would simply skip these ranges as belonging to zone 0 or provided a : garbage. : : So I guess we do care for post f7f99100d8d9 kernels mostly and : therefore Fixes: f7f99100d8d9 ("mm: stop zeroing memory during : allocation in vmemmap")
Link: http://lkml.kernel.org/r/20181212172712.34019-2-zaslonko@linux.ibm.com Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Mikhail Zaslonko zaslonko@linux.ibm.com Reviewed-by: Gerald Schaefer gerald.schaefer@de.ibm.com Suggested-by: Michal Hocko mhocko@kernel.org Acked-by: Michal Hocko mhocko@suse.com Reported-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Tested-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com Cc: Dave Hansen dave.hansen@intel.com Cc: Alexander Duyck alexander.h.duyck@linux.intel.com Cc: Pasha Tatashin Pavel.Tatashin@microsoft.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com
mm/page_alloc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
Wasn't this patch reverted in Linus's tree for causing a regression on some platforms? If so I'm not sure we should pull this in as a candidate for stable should we, or am I missing something?
I saw a follow-up patch that should be queued too, but I didn't see that this one got reverted.
-- Thanks, Sasha
From: Marc Zyngier marc.zyngier@arm.com
[ Upstream commit c987876a80e7bcb98a839f10dca9ce7fda4feced ]
Contrary to the non-VHE version of the TLB invalidation helpers, the VHE code has interrupts enabled, meaning that we can take an interrupt in the middle of such a sequence, and start running something else with HCR_EL2.TGE cleared.
That's really not a good idea.
Take the heavy-handed option and disable interrupts in __tlb_switch_to_guest_vhe, restoring them in __tlb_switch_to_host_vhe. The latter also gain an ISB in order to make sure that TGE really has taken effect.
Cc: stable@vger.kernel.org Acked-by: Christoffer Dall christoffer.dall@arm.com Reviewed-by: James Morse james.morse@arm.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/arm64/kvm/hyp/tlb.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 73464a96c365..db23c6e5c885 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -15,13 +15,18 @@ * along with this program. If not, see http://www.gnu.org/licenses/. */
+#include <linux/irqflags.h> + #include <asm/kvm_hyp.h> #include <asm/tlbflush.h>
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, + unsigned long *flags) { u64 val;
+ local_irq_save(*flags); + /* * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and * most TLB operations target EL2/EL0. In order to affect the @@ -36,7 +41,8 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) isb(); }
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, + unsigned long *flags) { write_sysreg(kvm->arch.vttbr, vttbr_el2); isb(); @@ -47,7 +53,8 @@ static hyp_alternate_select(__tlb_switch_to_guest, __tlb_switch_to_guest_vhe, ARM64_HAS_VIRT_HOST_EXTN);
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, + unsigned long flags) { /* * We're done with the TLB operation, let's restore the host's @@ -55,9 +62,12 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) */ write_sysreg(0, vttbr_el2); write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + isb(); + local_irq_restore(flags); }
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, + unsigned long flags) { write_sysreg(0, vttbr_el2); } @@ -69,11 +79,13 @@ static hyp_alternate_select(__tlb_switch_to_host,
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { + unsigned long flags; + dsb(ishst);
/* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm); + __tlb_switch_to_guest()(kvm, &flags);
/* * We could do so much better if we had the VA as well. @@ -116,36 +128,39 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) if (!has_vhe() && icache_is_vpipt()) __flush_icache_all();
- __tlb_switch_to_host()(kvm); + __tlb_switch_to_host()(kvm, flags); }
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) { + unsigned long flags; + dsb(ishst);
/* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm); + __tlb_switch_to_guest()(kvm, &flags);
__tlbi(vmalls12e1is); dsb(ish); isb();
- __tlb_switch_to_host()(kvm); + __tlb_switch_to_host()(kvm, flags); }
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); + unsigned long flags;
/* Switch to requested VMID */ - __tlb_switch_to_guest()(kvm); + __tlb_switch_to_guest()(kvm, &flags);
__tlbi(vmalle1); dsb(nsh); isb();
- __tlb_switch_to_host()(kvm); + __tlb_switch_to_host()(kvm, flags); }
void __hyp_text __kvm_flush_vm_context(void)
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
[ Upstream commit 7da66670775d201f633577f5b15a4bbeebaaa2b0 ]
Add AES128/192/256-CFB testvectors from NIST SP800-38A.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- crypto/tcrypt.c | 5 ++++ crypto/testmgr.c | 7 +++++ crypto/testmgr.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f7affe7cf0b4..76df552f099b 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1099,6 +1099,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) ret += tcrypt_test("xts(aes)"); ret += tcrypt_test("ctr(aes)"); ret += tcrypt_test("rfc3686(ctr(aes))"); + ret += tcrypt_test("cfb(aes)"); break;
case 11: @@ -1422,6 +1423,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) speed_template_16_24_32); test_cipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0, speed_template_16_24_32); + test_cipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_cipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); break;
case 201: diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d91278c01ea8..e65c8228ea47 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2631,6 +2631,13 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(aes_ccm_dec_tv_template) } } + }, { + .alg = "cfb(aes)", + .test = alg_test_skcipher, + .fips_allowed = 1, + .suite = { + .cipher = __VECS(aes_cfb_tv_template) + }, }, { .alg = "chacha20", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 12835f072614..5bd9c1400fee 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -16071,6 +16071,82 @@ static const struct cipher_testvec aes_cbc_dec_tv_template[] = { }, };
+static const struct cipher_testvec aes_cfb_tv_template[] = { + { /* From NIST SP800-38A */ + .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" + "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + .klen = 16, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ctext = "\x3b\x3f\xd9\x2e\xb7\x2d\xad\x20" + "\x33\x34\x49\xf8\xe8\x3c\xfb\x4a" + "\xc8\xa6\x45\x37\xa0\xb3\xa9\x3f" + "\xcd\xe3\xcd\xad\x9f\x1c\xe5\x8b" + "\x26\x75\x1f\x67\xa3\xcb\xb1\x40" + "\xb1\x80\x8c\xf1\x87\xa4\xf4\xdf" + "\xc0\x4b\x05\x35\x7c\x5d\x1c\x0e" + "\xea\xc4\xc6\x6f\x9f\xf7\xf2\xe6", + .len = 64, + }, { + .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" + "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + .klen = 24, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ctext = "\xcd\xc8\x0d\x6f\xdd\xf1\x8c\xab" + "\x34\xc2\x59\x09\xc9\x9a\x41\x74" + "\x67\xce\x7f\x7f\x81\x17\x36\x21" + "\x96\x1a\x2b\x70\x17\x1d\x3d\x7a" + "\x2e\x1e\x8a\x1d\xd5\x9b\x88\xb1" + "\xc8\xe6\x0f\xed\x1e\xfa\xc4\xc9" + "\xc0\x5f\x9f\x9c\xa9\x83\x4f\xa0" + "\x42\xae\x8f\xba\x58\x4b\x09\xff", + .len = 64, + }, { + .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" + "\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" + "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + .klen = 32, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ctext = "\xdc\x7e\x84\xbf\xda\x79\x16\x4b" + "\x7e\xcd\x84\x86\x98\x5d\x38\x60" + "\x39\xff\xed\x14\x3b\x28\xb1\xc8" + "\x32\x11\x3c\x63\x31\xe5\x40\x7b" + "\xdf\x10\x13\x24\x15\xe5\x4b\x92" + "\xa1\x3e\xd0\xa8\x26\x7a\xe2\xf9" + "\x75\xa3\x85\x74\x1a\xb9\xce\xf8" + "\x20\x31\x62\x3d\x55\xb1\xe4\x71", + .len = 64, + }, +}; + static const struct aead_testvec hmac_md5_ecb_cipher_null_enc_tv_template[] = { { /* Input data from RFC 2410 Case 1 */ #ifdef __LITTLE_ENDIAN
From: Nicholas Piggin npiggin@gmail.com
[ Upstream commit f2910f0e6835339e6ce82cef22fa15718b7e3bfa ]
GCC 4.6 is the minimum supported now.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Reviewed-by: Joel Stanley joel@jms.id.au Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/powerpc/Makefile | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-)
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 7452e50f4d1f..0f04c878113e 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -396,36 +396,9 @@ archprepare: checkbin # to stdout and these checks are run even on install targets. TOUT := .tmp_gas_check
-# Check gcc and binutils versions: -# - gcc-3.4 and binutils-2.14 are a fatal combination -# - Require gcc 4.0 or above on 64-bit -# - gcc-4.2.0 has issues compiling modules on 64-bit +# Check toolchain versions: +# - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: - @if test "$(cc-name)" != "clang" \ - && test "$(cc-version)" = "0304" ; then \ - if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \ - echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \ - echo 'correctly with gcc-3.4 and your version of binutils.'; \ - echo '*** Please upgrade your binutils or downgrade your gcc'; \ - false; \ - fi ; \ - fi - @if test "$(cc-name)" != "clang" \ - && test "$(cc-version)" -lt "0400" \ - && test "x${CONFIG_PPC64}" = "xy" ; then \ - echo -n "Sorry, GCC v4.0 or above is required to build " ; \ - echo "the 64-bit powerpc kernel." ; \ - false ; \ - fi - @if test "$(cc-name)" != "clang" \ - && test "$(cc-fullversion)" = "040200" \ - && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \ - echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \ - echo 'kernel with modules enabled.' ; \ - echo -n '*** Please use a different GCC version or ' ; \ - echo 'disable kernel modules' ; \ - false ; \ - fi @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ && $(LD) --version | head -1 | grep ' 2.24$$' >/dev/null ; then \ echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
Hi Sasha,
I don't think GCC 4.6 is the minimum supported for 4.14
As far as I can see, commit cafa0010cd51f ("Raise the minimum required gcc version to 4.6") has not been applied to 4.14 and I can't see any reason such a commit should apply on a stable branch.
Christophe
Le 07/05/2019 à 07:37, Sasha Levin a écrit :
From: Nicholas Piggin npiggin@gmail.com
[ Upstream commit f2910f0e6835339e6ce82cef22fa15718b7e3bfa ]
GCC 4.6 is the minimum supported now.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Reviewed-by: Joel Stanley joel@jms.id.au Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@microsoft.com
arch/powerpc/Makefile | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-)
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 7452e50f4d1f..0f04c878113e 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -396,36 +396,9 @@ archprepare: checkbin # to stdout and these checks are run even on install targets. TOUT := .tmp_gas_check -# Check gcc and binutils versions: -# - gcc-3.4 and binutils-2.14 are a fatal combination -# - Require gcc 4.0 or above on 64-bit -# - gcc-4.2.0 has issues compiling modules on 64-bit +# Check toolchain versions: +# - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin:
- @if test "$(cc-name)" != "clang" \
&& test "$(cc-version)" = "0304" ; then \
if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
echo 'correctly with gcc-3.4 and your version of binutils.'; \
echo '*** Please upgrade your binutils or downgrade your gcc'; \
false; \
fi ; \
- fi
- @if test "$(cc-name)" != "clang" \
&& test "$(cc-version)" -lt "0400" \
&& test "x${CONFIG_PPC64}" = "xy" ; then \
echo -n "Sorry, GCC v4.0 or above is required to build " ; \
echo "the 64-bit powerpc kernel." ; \
false ; \
fi
- @if test "$(cc-name)" != "clang" \
&& test "$(cc-fullversion)" = "040200" \
&& test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
echo 'kernel with modules enabled.' ; \
echo -n '*** Please use a different GCC version or ' ; \
echo 'disable kernel modules' ; \
false ; \
- fi @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ && $(LD) --version | head -1 | grep ' 2.24$$' >/dev/null ; then \ echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
From: Jerome Brunet jbrunet@baylibre.com
[ Upstream commit 9aec30371fb095a0c9415f3f0146ae269c3713d8 ]
When probing, if we fail to get the pwm due to probe deferal, we shouldn't print an error message. Just be silent in this case.
Signed-off-by: Jerome Brunet jbrunet@baylibre.com Signed-off-by: Jacek Anaszewski jacek.anaszewski@gmail.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/leds/leds-pwm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index 8d456dc6c5bf..83f9bbe57e02 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -101,8 +101,9 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv, led_data->pwm = devm_pwm_get(dev, led->name); if (IS_ERR(led_data->pwm)) { ret = PTR_ERR(led_data->pwm); - dev_err(dev, "unable to request PWM for %s: %d\n", - led->name, ret); + if (ret != -EPROBE_DEFER) + dev_err(dev, "unable to request PWM for %s: %d\n", + led->name, ret); return ret; }
From: Enric Balletbo i Serra enric.balletbo@collabora.com
[ Upstream commit 4eda776c3cefcb1f01b2d85bd8753f67606282b5 ]
'encoder' is dereferenced before it is null sanity checked, hence we potentially have a null pointer dereference bug. Instead, initialise drm_drv from encoder->dev->dev_private after we are sure 'encoder' is not null.
Fixes: 5182c1a556d7f ("drm/rockchip: add an common abstracted PSR driver") Cc: stable@vger.kernel.org Signed-off-by: Enric Balletbo i Serra enric.balletbo@collabora.com Signed-off-by: Heiko Stuebner heiko@sntech.de Link: https://patchwork.freedesktop.org/patch/msgid/20181013105654.11827-1-enric.b... Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/gpu/drm/rockchip/rockchip_drm_psr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c index a553e182ff53..32e7dba2bf5e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c @@ -221,13 +221,15 @@ EXPORT_SYMBOL(rockchip_drm_psr_flush_all); int rockchip_drm_psr_register(struct drm_encoder *encoder, void (*psr_set)(struct drm_encoder *, bool enable)) { - struct rockchip_drm_private *drm_drv = encoder->dev->dev_private; + struct rockchip_drm_private *drm_drv; struct psr_drv *psr; unsigned long flags;
if (!encoder || !psr_set) return -EINVAL;
+ drm_drv = encoder->dev->dev_private; + psr = kzalloc(sizeof(struct psr_drv), GFP_KERNEL); if (!psr) return -ENOMEM;
From: Adit Ranadive aditr@vmware.com
[ Upstream commit 6325e01b6cdf4636b721cf7259c1616e3cf28ce2 ]
Since the IB_WR_REG_MR opcode value changed, let's set the PVRDMA device opcodes explicitly.
Reported-by: Ruishuang Wang ruishuangw@vmware.com Fixes: 9a59739bd01f ("IB/rxe: Revise the ib_wr_opcode enum") Cc: stable@vger.kernel.org Reviewed-by: Bryan Tan bryantan@vmware.com Reviewed-by: Ruishuang Wang ruishuangw@vmware.com Reviewed-by: Vishnu Dasa vdasa@vmware.com Signed-off-by: Adit Ranadive aditr@vmware.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 35 +++++++++++++++++++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 6 ++++ include/uapi/rdma/vmw_pvrdma-abi.h | 1 + 3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 984aa3484928..4463e1c1a764 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -407,7 +407,40 @@ static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state)
static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) { - return (enum pvrdma_wr_opcode)op; + switch (op) { + case IB_WR_RDMA_WRITE: + return PVRDMA_WR_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: + return PVRDMA_WR_RDMA_WRITE_WITH_IMM; + case IB_WR_SEND: + return PVRDMA_WR_SEND; + case IB_WR_SEND_WITH_IMM: + return PVRDMA_WR_SEND_WITH_IMM; + case IB_WR_RDMA_READ: + return PVRDMA_WR_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: + return PVRDMA_WR_ATOMIC_CMP_AND_SWP; + case IB_WR_ATOMIC_FETCH_AND_ADD: + return PVRDMA_WR_ATOMIC_FETCH_AND_ADD; + case IB_WR_LSO: + return PVRDMA_WR_LSO; + case IB_WR_SEND_WITH_INV: + return PVRDMA_WR_SEND_WITH_INV; + case IB_WR_RDMA_READ_WITH_INV: + return PVRDMA_WR_RDMA_READ_WITH_INV; + case IB_WR_LOCAL_INV: + return PVRDMA_WR_LOCAL_INV; + case IB_WR_REG_MR: + return PVRDMA_WR_FAST_REG_MR; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + return PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP; + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: + return PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD; + case IB_WR_REG_SIG_MR: + return PVRDMA_WR_REG_SIG_MR; + default: + return PVRDMA_WR_ERROR; + } }
static inline enum ib_wc_status pvrdma_wc_status_to_ib( diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index d7162f2b7979..4d9c99dd366b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -695,6 +695,12 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) wqe_hdr->ex.imm_data = wr->ex.imm_data;
+ if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) { + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + switch (qp->ibqp.qp_type) { case IB_QPT_GSI: case IB_QPT_UD: diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index 912ea1556a0b..fd801c7be120 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -76,6 +76,7 @@ enum pvrdma_wr_opcode { PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD, PVRDMA_WR_BIND_MW, PVRDMA_WR_REG_SIG_MR, + PVRDMA_WR_ERROR, };
enum pvrdma_wc_status {
From: Heinrich Schuchardt xypron.glpk@gmx.de
[ Upstream commit 132ac39cffbcfed80ada38ef0fc6d34d95da7be6 ]
The memory area [0x4000000-0x4200000[ is occupied by the PSCI firmware. Any attempt to access it from Linux leads to an immediate crash.
So let's make the same memory reservation as the vendor kernel.
[gregory: added as comment that this region matches the mainline U-boot] Signed-off-by: Heinrich Schuchardt xypron.glpk@gmx.de Signed-off-by: Gregory CLEMENT gregory.clement@bootlin.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/arm64/boot/dts/marvell/armada-ap806.dtsi | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index 30d48ecf46e0..27d2bd85d1ae 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -65,6 +65,23 @@ method = "smc"; };
+ reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + /* + * This area matches the mapping done with a + * mainline U-Boot, and should be updated by the + * bootloader. + */ + + psci-area@4000000 { + reg = <0x0 0x4000000 0x0 0x200000>; + no-map; + }; + }; + ap806 { #address-cells = <2>; #size-cells = <2>;
From: Nicolas Pitre nicolas.pitre@linaro.org
[ Upstream commit 7e1d226345f89ad5d0216a9092c81386c89b4983 ]
Every invocation of notify_write() and notify_update() is performed under the console lock, except for one case. Let's fix that.
Signed-off-by: Nicolas Pitre nico@linaro.org Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 1fb5e7f409c4..6ff921cf9a9e 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2435,8 +2435,8 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co } con_flush(vc, draw_from, draw_to, &draw_x); console_conditional_schedule(); - console_unlock(); notify_update(vc); + console_unlock(); return n; }
From: Michal Hocko mhocko@suse.com
[ Upstream commit 4aa9fc2a435abe95a1e8d7f8c7b3d6356514b37a ]
This reverts commit 2830bf6f05fb3e05bc4743274b806c821807a684.
The underlying assumption that one sparse section belongs into a single numa node doesn't hold really. Robert Shteynfeld has reported a boot failure. The boot log was not captured but his memory layout is as follows:
Early memory node ranges node 1: [mem 0x0000000000001000-0x0000000000090fff] node 1: [mem 0x0000000000100000-0x00000000dbdf8fff] node 1: [mem 0x0000000100000000-0x0000001423ffffff] node 0: [mem 0x0000001424000000-0x0000002023ffffff]
This means that node0 starts in the middle of a memory section which is also in node1. memmap_init_zone tries to initialize padding of a section even when it is outside of the given pfn range because there are code paths (e.g. memory hotplug) which assume that the full worth of memory section is always initialized.
In this particular case, though, such a range is already intialized and most likely already managed by the page allocator. Scribbling over those pages corrupts the internal state and likely blows up when any of those pages gets used.
Reported-by: Robert Shteynfeld robert.shteynfeld@gmail.com Fixes: 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") Cc: stable@kernel.org Signed-off-by: Michal Hocko mhocko@suse.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- mm/page_alloc.c | 12 ------------ 1 file changed, 12 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 16c20d9e771f..923deb33bf34 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5348,18 +5348,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, __init_single_pfn(pfn, zone, nid); } } -#ifdef CONFIG_SPARSEMEM - /* - * If the zone does not span the rest of the section then - * we should at least initialize those pages. Otherwise we - * could blow up on a poisoned page in some paths which depend - * on full sections being initialized (e.g. memory hotplug). - */ - while (end_pfn % PAGES_PER_SECTION) { - __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); - end_pfn++; - } -#endif }
static void __meminit zone_init_free_lists(struct zone *zone)
On Mon, May 6, 2019 at 10:40 PM Sasha Levin sashal@kernel.org wrote:
From: Michal Hocko mhocko@suse.com
[ Upstream commit 4aa9fc2a435abe95a1e8d7f8c7b3d6356514b37a ]
This reverts commit 2830bf6f05fb3e05bc4743274b806c821807a684.
The underlying assumption that one sparse section belongs into a single numa node doesn't hold really. Robert Shteynfeld has reported a boot failure. The boot log was not captured but his memory layout is as follows:
Early memory node ranges node 1: [mem 0x0000000000001000-0x0000000000090fff] node 1: [mem 0x0000000000100000-0x00000000dbdf8fff] node 1: [mem 0x0000000100000000-0x0000001423ffffff] node 0: [mem 0x0000001424000000-0x0000002023ffffff]
This means that node0 starts in the middle of a memory section which is also in node1. memmap_init_zone tries to initialize padding of a section even when it is outside of the given pfn range because there are code paths (e.g. memory hotplug) which assume that the full worth of memory section is always initialized.
In this particular case, though, such a range is already intialized and most likely already managed by the page allocator. Scribbling over those pages corrupts the internal state and likely blows up when any of those pages gets used.
Reported-by: Robert Shteynfeld robert.shteynfeld@gmail.com Fixes: 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") Cc: stable@kernel.org Signed-off-by: Michal Hocko mhocko@suse.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com
mm/page_alloc.c | 12 ------------ 1 file changed, 12 deletions(-)
So it looks like you already had the revert of the earlier patch I pointed out enqueued as well. So you can probably at a minimum just drop this patch and the earlier patch that this reverts.
Thanks.
- Alex
From: Alexey Brodkin alexey.brodkin@synopsys.com
[ Upstream commit a66d972465d15b1d89281258805eb8b47d66bd36 ]
Initially we bumped into problem with 32-bit aligned atomic64_t on ARC, see [1]. And then during quite lengthly discussion Peter Z. mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense. If allocation is done by plain kmalloc() obtained buffer will be ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via devm_kmalloc() should have any other alignment?
This way we at least get the same behavior for both types of allocation.
[1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004009.html [2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004036.html
Signed-off-by: Alexey Brodkin abrodkin@synopsys.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: David Laight David.Laight@ACULAB.COM Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vineet Gupta vgupta@synopsys.com Cc: Will Deacon will.deacon@arm.com Cc: Greg KH greg@kroah.com Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/base/devres.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 71d577025285..e43a04a495a3 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -25,8 +25,14 @@ struct devres_node {
struct devres { struct devres_node node; - /* -- 3 pointers */ - unsigned long long data[]; /* guarantee ull alignment */ + /* + * Some archs want to perform DMA into kmalloc caches + * and need a guaranteed alignment larger than + * the alignment of a 64-bit integer. + * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same + * buffer alignment as if it was allocated by plain kmalloc(). + */ + u8 __aligned(ARCH_KMALLOC_MINALIGN) data[]; };
struct devres_group {
On Tue, May 07, 2019 at 01:38:01AM -0400, Sasha Levin wrote:
From: Alexey Brodkin alexey.brodkin@synopsys.com
[ Upstream commit a66d972465d15b1d89281258805eb8b47d66bd36 ]
Initially we bumped into problem with 32-bit aligned atomic64_t on ARC, see [1]. And then during quite lengthly discussion Peter Z. mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense. If allocation is done by plain kmalloc() obtained buffer will be ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via devm_kmalloc() should have any other alignment?
This way we at least get the same behavior for both types of allocation.
[1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004009.html [2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004036.html
Signed-off-by: Alexey Brodkin abrodkin@synopsys.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: David Laight David.Laight@ACULAB.COM Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vineet Gupta vgupta@synopsys.com Cc: Will Deacon will.deacon@arm.com Cc: Greg KH greg@kroah.com Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com
drivers/base/devres.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 71d577025285..e43a04a495a3 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -25,8 +25,14 @@ struct devres_node { struct devres { struct devres_node node;
- /* -- 3 pointers */
- unsigned long long data[]; /* guarantee ull alignment */
- /*
* Some archs want to perform DMA into kmalloc caches
* and need a guaranteed alignment larger than
* the alignment of a 64-bit integer.
* Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
* buffer alignment as if it was allocated by plain kmalloc().
*/
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
}; struct devres_group {
This is not needed in any of the older kernels, despite what the stable@ line said, as it ends up taking a lot of memory up for all other arches. That's why I only applied it to the one kernel version. I'm betting that it will be eventually reverted when people notice it as well :)
So can you please drop it from all of your trees?
thanks,
greg k-h
Hi Greg,
-----Original Message----- From: Greg Kroah-Hartman gregkh@linuxfoundation.org Sent: Tuesday, May 7, 2019 8:52 AM To: Sasha Levin sashal@kernel.org Cc: linux-kernel@vger.kernel.org; stable@vger.kernel.org; Alexey Brodkin abrodkin@synopsys.com; Alexey Brodkin abrodkin@synopsys.com; Geert Uytterhoeven geert@linux-m68k.org; David Laight David.Laight@aculab.com; Peter Zijlstra peterz@infradead.org; Thomas Gleixner tglx@linutronix.de; Vineet Gupta vgupta@synopsys.com; Will Deacon will.deacon@arm.com; Sasha Levin alexander.levin@microsoft.com Subject: Re: [PATCH AUTOSEL 4.14 72/95] devres: Align data[] to ARCH_KMALLOC_MINALIGN
On Tue, May 07, 2019 at 01:38:01AM -0400, Sasha Levin wrote:
From: Alexey Brodkin alexey.brodkin@synopsys.com
[ Upstream commit a66d972465d15b1d89281258805eb8b47d66bd36 ]
Initially we bumped into problem with 32-bit aligned atomic64_t on ARC, see [1]. And then during quite lengthly discussion Peter Z. mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense. If allocation is done by plain kmalloc() obtained buffer will be ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via devm_kmalloc() should have any other alignment?
This way we at least get the same behavior for both types of allocation.
[1] https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.infradead.org_pipe...
2Darc_2018- 2DJuly_004009.html&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=lqdeeSSEes0GFDDl656eViXO7breS55ytWkhpk5R81I&m=A YtkWKU38pzVfJMBuK0lUwxRyKT6dDfHoD3yO6OIB5k&s=e7e2sXKcjHDQdGSrKWM0jmpSOfhe0MFk4-nMZJe9En8&e=
[2] https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.infradead.org_pipe...
2Darc_2018- 2DJuly_004036.html&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=lqdeeSSEes0GFDDl656eViXO7breS55ytWkhpk5R81I&m=A YtkWKU38pzVfJMBuK0lUwxRyKT6dDfHoD3yO6OIB5k&s=L23zrl8rf2MmReUI8rT3FQpMiZU9H3Xjh9uVxJQe8dw&e=
Signed-off-by: Alexey Brodkin abrodkin@synopsys.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: David Laight David.Laight@ACULAB.COM Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vineet Gupta vgupta@synopsys.com Cc: Will Deacon will.deacon@arm.com Cc: Greg KH greg@kroah.com Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com
drivers/base/devres.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 71d577025285..e43a04a495a3 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -25,8 +25,14 @@ struct devres_node {
struct devres { struct devres_node node;
- /* -- 3 pointers */
- unsigned long long data[]; /* guarantee ull alignment */
- /*
* Some archs want to perform DMA into kmalloc caches
* and need a guaranteed alignment larger than
* the alignment of a 64-bit integer.
* Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
* buffer alignment as if it was allocated by plain kmalloc().
*/
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
};
struct devres_group {
This is not needed in any of the older kernels, despite what the stable@ line said, as it ends up taking a lot of memory up for all other arches. That's why I only applied it to the one kernel version. I'm betting that it will be eventually reverted when people notice it as well :)
That very well might become the case but then we're back to the initial problem, right? So maybe some other more future-proof solution should be implemented?
See initially we discussed simple explicit 8-byte alignment which won't change data layout for most of arches while fixing our issue on ARC but for some reason people were not happy with that proposal and that's how we ended-up with what we discuss here now.
-Alexey
On Tue, May 07, 2019 at 07:04:13AM +0000, Alexey Brodkin wrote:
Hi Greg,
-----Original Message----- From: Greg Kroah-Hartman gregkh@linuxfoundation.org Sent: Tuesday, May 7, 2019 8:52 AM To: Sasha Levin sashal@kernel.org Cc: linux-kernel@vger.kernel.org; stable@vger.kernel.org; Alexey Brodkin abrodkin@synopsys.com; Alexey Brodkin abrodkin@synopsys.com; Geert Uytterhoeven geert@linux-m68k.org; David Laight David.Laight@aculab.com; Peter Zijlstra peterz@infradead.org; Thomas Gleixner tglx@linutronix.de; Vineet Gupta vgupta@synopsys.com; Will Deacon will.deacon@arm.com; Sasha Levin alexander.levin@microsoft.com Subject: Re: [PATCH AUTOSEL 4.14 72/95] devres: Align data[] to ARCH_KMALLOC_MINALIGN
On Tue, May 07, 2019 at 01:38:01AM -0400, Sasha Levin wrote:
From: Alexey Brodkin alexey.brodkin@synopsys.com
[ Upstream commit a66d972465d15b1d89281258805eb8b47d66bd36 ]
Initially we bumped into problem with 32-bit aligned atomic64_t on ARC, see [1]. And then during quite lengthly discussion Peter Z. mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense. If allocation is done by plain kmalloc() obtained buffer will be ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via devm_kmalloc() should have any other alignment?
This way we at least get the same behavior for both types of allocation.
[1] https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.infradead.org_pipe...
2Darc_2018- 2DJuly_004009.html&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=lqdeeSSEes0GFDDl656eViXO7breS55ytWkhpk5R81I&m=A YtkWKU38pzVfJMBuK0lUwxRyKT6dDfHoD3yO6OIB5k&s=e7e2sXKcjHDQdGSrKWM0jmpSOfhe0MFk4-nMZJe9En8&e=
[2] https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.infradead.org_pipe...
2Darc_2018- 2DJuly_004036.html&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=lqdeeSSEes0GFDDl656eViXO7breS55ytWkhpk5R81I&m=A YtkWKU38pzVfJMBuK0lUwxRyKT6dDfHoD3yO6OIB5k&s=L23zrl8rf2MmReUI8rT3FQpMiZU9H3Xjh9uVxJQe8dw&e=
Signed-off-by: Alexey Brodkin abrodkin@synopsys.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: David Laight David.Laight@ACULAB.COM Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vineet Gupta vgupta@synopsys.com Cc: Will Deacon will.deacon@arm.com Cc: Greg KH greg@kroah.com Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com
drivers/base/devres.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 71d577025285..e43a04a495a3 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -25,8 +25,14 @@ struct devres_node {
struct devres { struct devres_node node;
- /* -- 3 pointers */
- unsigned long long data[]; /* guarantee ull alignment */
- /*
* Some archs want to perform DMA into kmalloc caches
* and need a guaranteed alignment larger than
* the alignment of a 64-bit integer.
* Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
* buffer alignment as if it was allocated by plain kmalloc().
*/
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
};
struct devres_group {
This is not needed in any of the older kernels, despite what the stable@ line said, as it ends up taking a lot of memory up for all other arches. That's why I only applied it to the one kernel version. I'm betting that it will be eventually reverted when people notice it as well :)
That very well might become the case but then we're back to the initial problem, right? So maybe some other more future-proof solution should be implemented?
Possibly yes.
See initially we discussed simple explicit 8-byte alignment which won't change data layout for most of arches while fixing our issue on ARC but for some reason people were not happy with that proposal and that's how we ended-up with what we discuss here now.
I'm not disagreeing that this is a valid solution for you, I wasn't part of the original discussion, sorry. Just that this probably isn't something that should be backported to older kernels at this point in time.
thanks,
greg k-h
Hi Greg,
[snip]
This is not needed in any of the older kernels, despite what the stable@ line said, as it ends up taking a lot of memory up for all other arches. That's why I only applied it to the one kernel version. I'm betting that it will be eventually reverted when people notice it as well :)
That very well might become the case but then we're back to the initial problem, right? So maybe some other more future-proof solution should be implemented?
Possibly yes.
See initially we discussed simple explicit 8-byte alignment which won't change data layout for most of arches while fixing our issue on ARC but for some reason people were not happy with that proposal and that's how we ended-up with what we discuss here now.
I'm not disagreeing that this is a valid solution for you, I wasn't part of the original discussion, sorry. Just that this probably isn't something that should be backported to older kernels at this point in time.
That's fine for now.
Then if it ever gets reverted in Linux tree hopefully I'll be CCed and it will be enough time to come up with a better fix.
-Alexey
From: Max Filippov jcmvbkbc@gmail.com
[ Upstream commit f37598be4e3896359e87c824be57ddddc280cc3f ]
Rename SPI controller node in the XTFPGA DTS to spi@... This fixes the following build warnings:
arch/xtensa/boot/dts/kc705_nommu.dtb: Warning (spi_bus_bridge): /soc/spi-master@0d0a0000: node name for SPI buses should be 'spi' arch/xtensa/boot/dts/kc705_nommu.dtb: Warning (spi_bus_reg): Failed prerequisite 'spi_bus_bridge' arch/xtensa/boot/dts/lx200mx.dtb: Warning (spi_bus_bridge): /soc/spi-master@0d0a0000: node name for SPI buses should be 'spi' arch/xtensa/boot/dts/lx200mx.dtb: Warning (spi_bus_reg): Failed prerequisite 'spi_bus_bridge' arch/xtensa/boot/dts/kc705.dtb: Warning (spi_bus_bridge): /soc/spi-master@0d0a0000: node name for SPI buses should be 'spi' arch/xtensa/boot/dts/kc705.dtb: Warning (spi_bus_reg): Failed prerequisite 'spi_bus_bridge' arch/xtensa/boot/dts/ml605.dtb: Warning (spi_bus_bridge): /soc/spi-master@0d0a0000: node name for SPI buses should be 'spi' arch/xtensa/boot/dts/ml605.dtb: Warning (spi_bus_reg): Failed prerequisite 'spi_bus_bridge' arch/xtensa/boot/dts/lx60.dtb: Warning (spi_bus_bridge): /soc/spi-master@0d0a0000: node name for SPI buses should be 'spi' arch/xtensa/boot/dts/lx60.dtb: Warning (spi_bus_reg): Failed prerequisite 'spi_bus_bridge'
Signed-off-by: Max Filippov jcmvbkbc@gmail.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/xtensa/boot/dts/xtfpga.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/xtensa/boot/dts/xtfpga.dtsi b/arch/xtensa/boot/dts/xtfpga.dtsi index 1090528825ec..e46ae07bab05 100644 --- a/arch/xtensa/boot/dts/xtfpga.dtsi +++ b/arch/xtensa/boot/dts/xtfpga.dtsi @@ -103,7 +103,7 @@ }; };
- spi0: spi-master@0d0a0000 { + spi0: spi@0d0a0000 { compatible = "cdns,xtfpga-spi"; #address-cells = <1>; #size-cells = <0>;
From: Anand Jain anand.jain@oracle.com
[ Upstream commit a9261d4125c97ce8624e9941b75dee1b43ad5df9 ]
It's not that impossible to imagine that a device OR a btrfs image is copied just by using the dd or the cp command. Which in case both the copies of the btrfs will have the same fsid. If on the system with automount enabled, the copied FS gets scanned.
We have a known bug in btrfs, that we let the device path be changed after the device has been mounted. So using this loop hole the new copied device would appears as if its mounted immediately after it's been copied.
For example:
Initially.. /dev/mmcblk0p4 is mounted as /
$ lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT mmcblk0 179:0 0 29.2G 0 disk |-mmcblk0p4 179:4 0 4G 0 part / |-mmcblk0p2 179:2 0 500M 0 part /boot |-mmcblk0p3 179:3 0 256M 0 part [SWAP] `-mmcblk0p1 179:1 0 256M 0 part /boot/efi
$ btrfs fi show Label: none uuid: 07892354-ddaa-4443-90ea-f76a06accaba Total devices 1 FS bytes used 1.40GiB devid 1 size 4.00GiB used 3.00GiB path /dev/mmcblk0p4
Copy mmcblk0 to sda
$ dd if=/dev/mmcblk0 of=/dev/sda
And immediately after the copy completes the change in the device superblock is notified which the automount scans using btrfs device scan and the new device sda becomes the mounted root device.
$ lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sda 8:0 1 14.9G 0 disk |-sda4 8:4 1 4G 0 part / |-sda2 8:2 1 500M 0 part |-sda3 8:3 1 256M 0 part `-sda1 8:1 1 256M 0 part mmcblk0 179:0 0 29.2G 0 disk |-mmcblk0p4 179:4 0 4G 0 part |-mmcblk0p2 179:2 0 500M 0 part /boot |-mmcblk0p3 179:3 0 256M 0 part [SWAP] `-mmcblk0p1 179:1 0 256M 0 part /boot/efi
$ btrfs fi show / Label: none uuid: 07892354-ddaa-4443-90ea-f76a06accaba Total devices 1 FS bytes used 1.40GiB devid 1 size 4.00GiB used 3.00GiB path /dev/sda4
The bug is quite nasty that you can't either unmount /dev/sda4 or /dev/mmcblk0p4. And the problem does not get solved until you take sda out of the system on to another system to change its fsid using the 'btrfstune -u' command.
Signed-off-by: Anand Jain anand.jain@oracle.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/btrfs/volumes.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 38ed8e259e00..bd1117720fc1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -696,6 +696,35 @@ static noinline int device_list_add(const char *path, return -EEXIST; }
+ /* + * We are going to replace the device path for a given devid, + * make sure it's the same device if the device is mounted + */ + if (device->bdev) { + struct block_device *path_bdev; + + path_bdev = lookup_bdev(path); + if (IS_ERR(path_bdev)) { + mutex_unlock(&fs_devices->device_list_mutex); + return ERR_CAST(path_bdev); + } + + if (device->bdev != path_bdev) { + bdput(path_bdev); + mutex_unlock(&fs_devices->device_list_mutex); + btrfs_warn_in_rcu(device->fs_info, + "duplicate device fsid:devid for %pU:%llu old:%s new:%s", + disk_super->fsid, devid, + rcu_str_deref(device->name), path); + return ERR_PTR(-EEXIST); + } + bdput(path_bdev); + btrfs_info_in_rcu(device->fs_info, + "device fsid %pU devid %llu moved old:%s new:%s", + disk_super->fsid, devid, + rcu_str_deref(device->name), path); + } + name = rcu_string_strdup(path, GFP_NOFS); if (!name) return -ENOMEM;
From: Cong Wang xiyou.wangcong@gmail.com
[ Upstream commit 1db817e75f5b9387b8db11e37d5f0624eb9223e0 ]
struct tcindex_filter_result contains two parts: struct tcf_exts and struct tcf_result.
For the local variable 'cr', its exts part is never used but initialized without being released properly on success path. So just completely remove the exts part to fix this leak.
For the local variable 'new_filter_result', it is never properly released if not used by 'r' on success path.
Cc: Jamal Hadi Salim jhs@mojatatu.com Cc: Jiri Pirko jiri@resnulli.us Signed-off-by: Cong Wang xiyou.wangcong@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/sched/cls_tcindex.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 52829fdc280b..75c7c7cc7499 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -322,9 +322,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr *est, bool ovr) { struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_filter_result cr; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e;
@@ -363,13 +363,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->h = p->h;
err = tcindex_filter_result_init(&new_filter_result); - if (err < 0) - goto errout1; - err = tcindex_filter_result_init(&cr); if (err < 0) goto errout1; if (old_r) - cr.res = r->res; + cr = r->res;
if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -460,8 +457,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, }
if (tb[TCA_TCINDEX_CLASSID]) { - cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr.res, base); + cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); + tcf_bind_filter(tp, &cr, base); }
if (old_r && old_r != r) { @@ -473,7 +470,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, }
oldp = p; - r->res = cr.res; + r->res = cr; tcf_exts_change(&r->exts, &e);
rcu_assign_pointer(tp->root, cp); @@ -492,6 +489,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ; /* nothing */
rcu_assign_pointer(*fp, f); + } else { + tcf_exts_destroy(&new_filter_result.exts); }
if (oldp) @@ -504,7 +503,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else if (balloc == 2) kfree(cp->h); errout1: - tcf_exts_destroy(&cr.exts); tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp);
From: yangerkun yangerkun@huawei.com
[ Upstream commit a46c68a318b08f819047843abf349aeee5d10ac2 ]
While do swap, we should make sure there has no new dirty page since we should swap i_data between two inode: 1.We should lock i_mmap_sem with write to avoid new pagecache from mmap read/write; 2.Change filemap_flush to filemap_write_and_wait and move them to the space protected by inode lock to avoid new pagecache from buffer read/write.
Signed-off-by: yangerkun yangerkun@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Cc: stable@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/ext4/ioctl.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3dbf4e414706..ca6d27bfcdd8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -116,9 +116,6 @@ static long swap_inode_boot_loader(struct super_block *sb, return PTR_ERR(inode_bl); ei_bl = EXT4_I(inode_bl);
- filemap_flush(inode->i_mapping); - filemap_flush(inode_bl->i_mapping); - /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ lock_two_nondirectories(inode, inode_bl); @@ -126,6 +123,15 @@ static long swap_inode_boot_loader(struct super_block *sb, truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0);
+ down_write(&EXT4_I(inode)->i_mmap_sem); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto err_out; + + err = filemap_write_and_wait(inode_bl->i_mapping); + if (err) + goto err_out; + /* Wait for all existing dio workers */ ext4_inode_block_unlocked_dio(inode); ext4_inode_block_unlocked_dio(inode_bl); @@ -135,7 +141,7 @@ static long swap_inode_boot_loader(struct super_block *sb, handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); if (IS_ERR(handle)) { err = -EINVAL; - goto journal_err_out; + goto err_out; }
/* Protect extent tree against block allocations via delalloc */ @@ -190,6 +196,8 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_journal_stop(handle); ext4_double_up_write_data_sem(inode, inode_bl);
+err_out: + up_write(&EXT4_I(inode)->i_mmap_sem); journal_err_out: ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl);
From: Matteo Croce mcroce@redhat.com
[ Upstream commit c22da36688d6298f2e546dcc43fdc1ad35036467 ]
Similarly to commit a7603ac1fc8c ("geneve: change NET_UDP_TUNNEL dependency to select"), GTP has a dependency on NET_UDP_TUNNEL which makes impossible to compile it if no other protocol depending on NET_UDP_TUNNEL is selected.
Fix this by changing the depends to a select, and drop NET_IP_TUNNEL from the select list, as it already depends on NET_UDP_TUNNEL.
Signed-off-by: Matteo Croce mcroce@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index aba0d652095b..f3357091e9d1 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -212,8 +212,8 @@ config GENEVE
config GTP tristate "GPRS Tunneling Protocol datapath (GTP-U)" - depends on INET && NET_UDP_TUNNEL - select NET_IP_TUNNEL + depends on INET + select NET_UDP_TUNNEL ---help--- This allows one to create gtp virtual interfaces that provide the GPRS Tunneling Protocol datapath (GTP-U). This tunneling protocol
From: Erik Schmauss erik.schmauss@intel.com
[ Upstream commit c5781ffbbd4f742a58263458145fe7f0ac01d9e0 ]
ACPICA commit b233720031a480abd438f2e9c643080929d144c3
ASL operation_regions declare a range of addresses that it uses. In a perfect world, the range of addresses should be used exclusively by the AML interpreter. The OS can use this information to decide which drivers to load so that the AML interpreter and device drivers use different regions of memory.
During table load, the address information is added to a global address range list. Each node in this list contains an address range as well as a namespace node of the operation_region. This list is deleted at ACPI shutdown.
Unfortunately, ASL operation_regions can be declared inside of control methods. Although this is not recommended, modern firmware contains such code. New module level code changes unintentionally removed the functionality of adding and removing nodes to the global address range list.
A few months ago, support for adding addresses has been re- implemented. However, the removal of the address range list was missed and resulted in some systems to crash due to the address list containing bogus namespace nodes from operation_regions declared in control methods. In order to fix the crash, this change removes dynamic operation_regions after control method termination.
Link: https://github.com/acpica/acpica/commit/b2337200 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202475 Fixes: 4abb951b73ff ("ACPICA: AML interpreter: add region addresses in global list during initialization") Reported-by: Michael J Gruber mjg@fedoraproject.org Signed-off-by: Erik Schmauss erik.schmauss@intel.com Signed-off-by: Bob Moore robert.moore@intel.com Cc: 4.20+ stable@vger.kernel.org # 4.20+ Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/acpi/acpica/nsobject.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c index 707b2aa501e1..099be6424255 100644 --- a/drivers/acpi/acpica/nsobject.c +++ b/drivers/acpi/acpica/nsobject.c @@ -222,6 +222,10 @@ void acpi_ns_detach_object(struct acpi_namespace_node *node) } }
+ if (obj_desc->common.type == ACPI_TYPE_REGION) { + acpi_ut_remove_address_range(obj_desc->region.space_id, node); + } + /* Clear the Node entry in all cases */
node->object = NULL;
From: Rasmus Villemoes linux@rasmusvillemoes.dk
[ Upstream commit 88ca66d8540ca26119b1428cddb96b37925bdf01 ]
The minimum supported gcc version is >= 4.6, so these can be removed.
Signed-off-by: Rasmus Villemoes linux@rasmusvillemoes.dk Signed-off-by: Borislav Petkov bp@suse.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Dan Williams dan.j.williams@intel.com Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: Ingo Molnar mingo@redhat.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: x86-ml x86@kernel.org Link: https://lkml.kernel.org/r/20190111084931.24601-1-linux@rasmusvillemoes.dk Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/x86/include/asm/bitops.h | 6 ------ arch/x86/include/asm/string_32.h | 20 -------------------- arch/x86/include/asm/string_64.h | 15 --------------- 3 files changed, 41 deletions(-)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 3fa039855b8f..e0964200c37f 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -36,13 +36,7 @@ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) -/* Technically wrong, but this avoids compilation errors on some gcc - versions. */ -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) -#else #define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) -#endif
#define ADDR BITOP_ADDR(addr)
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 55d392c6bd29..2fd165f1cffa 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -179,14 +179,7 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) * No 3D Now! */
-#if (__GNUC__ >= 4) #define memcpy(t, f, n) __builtin_memcpy(t, f, n) -#else -#define memcpy(t, f, n) \ - (__builtin_constant_p((n)) \ - ? __constant_memcpy((t), (f), (n)) \ - : __memcpy((t), (f), (n))) -#endif
#endif #endif /* !CONFIG_FORTIFY_SOURCE */ @@ -282,12 +275,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
{ int d0, d1; -#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 - /* Workaround for broken gcc 4.0 */ - register unsigned long eax asm("%eax") = pattern; -#else unsigned long eax = pattern; -#endif
switch (count % 4) { case 0: @@ -321,15 +309,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, size_t); #ifndef CONFIG_FORTIFY_SOURCE -#if (__GNUC__ >= 4) #define memset(s, c, count) __builtin_memset(s, c, count) -#else -#define memset(s, c, count) \ - (__builtin_constant_p(c) \ - ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ - (count)) \ - : __memset((s), (c), (count))) -#endif #endif /* !CONFIG_FORTIFY_SOURCE */
#define __HAVE_ARCH_MEMSET16 diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c2..aae9bd798bf0 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -32,21 +32,6 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len);
-#ifndef CONFIG_FORTIFY_SOURCE -#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 -#define memcpy(dst, src, len) \ -({ \ - size_t __len = (len); \ - void *__ret; \ - if (__builtin_constant_p(len) && __len >= 64) \ - __ret = __memcpy((dst), (src), __len); \ - else \ - __ret = __builtin_memcpy((dst), (src), __len); \ - __ret; \ -}) -#endif -#endif /* !CONFIG_FORTIFY_SOURCE */ - #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); void *__memset(void *s, int c, size_t n);
On 07/05/2019 07.38, Sasha Levin wrote:
From: Rasmus Villemoes linux@rasmusvillemoes.dk
[ Upstream commit 88ca66d8540ca26119b1428cddb96b37925bdf01 ]
The minimum supported gcc version is >= 4.6, so these can be removed.
Eh, that bump happened for the 4.19 kernel, so this is not true for the 4.14 branch. Has cafa0010cd51fb711fdcb50fc55f394c5f167a0a been applied to 4.14.y? Otherwise I don't think this is appropriate.
Rasmus
On Tue, May 07, 2019 at 07:57:01AM +0200, Rasmus Villemoes wrote:
On 07/05/2019 07.38, Sasha Levin wrote:
From: Rasmus Villemoes linux@rasmusvillemoes.dk
[ Upstream commit 88ca66d8540ca26119b1428cddb96b37925bdf01 ]
The minimum supported gcc version is >= 4.6, so these can be removed.
Eh, that bump happened for the 4.19 kernel, so this is not true for the 4.14 branch. Has cafa0010cd51fb711fdcb50fc55f394c5f167a0a been applied to 4.14.y? Otherwise I don't think this is appropriate.
No, that commit is not in 4.14, so we still have to "support" older versions of gcc there :(
Sasha, can you drop this?
thanks,
greg k-h
On Tue, May 07, 2019 at 08:15:03AM +0200, Greg KH wrote:
On Tue, May 07, 2019 at 07:57:01AM +0200, Rasmus Villemoes wrote:
On 07/05/2019 07.38, Sasha Levin wrote:
From: Rasmus Villemoes linux@rasmusvillemoes.dk
[ Upstream commit 88ca66d8540ca26119b1428cddb96b37925bdf01 ]
The minimum supported gcc version is >= 4.6, so these can be removed.
Eh, that bump happened for the 4.19 kernel, so this is not true for the 4.14 branch. Has cafa0010cd51fb711fdcb50fc55f394c5f167a0a been applied to 4.14.y? Otherwise I don't think this is appropriate.
No, that commit is not in 4.14, so we still have to "support" older versions of gcc there :(
Sasha, can you drop this?
Dropped, thanks!
-- Thanks, Sasha
From: KT Liao kt.liao@emc.com.tw
[ Upstream commit 738c06d0e4562e0acf9f2c7438a22b2d5afc67aa ]
There are many Lenovo laptops which need elan_i2c support, this patch adds relevant IDs to the Elan driver so that touchpads are recognized.
Signed-off-by: KT Liao kt.liao@emc.com.tw Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/input/mouse/elan_i2c_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 2ce805d31ed1..ad89ba143a0e 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1254,22 +1254,47 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0600", 0 }, { "ELAN0601", 0 }, { "ELAN0602", 0 }, + { "ELAN0603", 0 }, + { "ELAN0604", 0 }, { "ELAN0605", 0 }, + { "ELAN0606", 0 }, + { "ELAN0607", 0 }, { "ELAN0608", 0 }, { "ELAN0605", 0 }, { "ELAN0609", 0 }, { "ELAN060B", 0 }, { "ELAN060C", 0 }, + { "ELAN060F", 0 }, + { "ELAN0610", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0615", 0 }, + { "ELAN0616", 0 }, { "ELAN0617", 0 }, { "ELAN0618", 0 }, + { "ELAN0619", 0 }, + { "ELAN061A", 0 }, + { "ELAN061B", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN061E", 0 }, + { "ELAN061F", 0 }, { "ELAN0620", 0 }, { "ELAN0621", 0 }, { "ELAN0622", 0 }, + { "ELAN0623", 0 }, + { "ELAN0624", 0 }, + { "ELAN0625", 0 }, + { "ELAN0626", 0 }, + { "ELAN0627", 0 }, + { "ELAN0628", 0 }, + { "ELAN0629", 0 }, + { "ELAN062A", 0 }, + { "ELAN062B", 0 }, + { "ELAN062C", 0 }, + { "ELAN062D", 0 }, + { "ELAN0631", 0 }, + { "ELAN0632", 0 }, { "ELAN1000", 0 }, { } };
From: Florian Westphal fw@strlen.de
[ Upstream commit 0ef235c71755c5f36c50282fcf2d7d08709be344 ]
->destroy is only allowed to free data, or do other cleanups that do not have side effects on other state, such as visibility to other netlink requests.
Such things need to be done in ->deactivate. As a transaction can fail, we need to make sure we can undo such operations, therefore ->activate() has to be provided too.
So print a warning and refuse registration if expr->ops provides only one of the two operations.
v2: fix nft_expr_check_ops to not repeat same check twice (Jones Desougi)
Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/netfilter/nf_tables_api.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c445d57e3a5b..b149a7219084 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -220,6 +220,18 @@ static int nft_delchain(struct nft_ctx *ctx) return err; }
+/* either expr ops provide both activate/deactivate, or neither */ +static bool nft_expr_check_ops(const struct nft_expr_ops *ops) +{ + if (!ops) + return true; + + if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) + return false; + + return true; +} + static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@ -1724,6 +1736,9 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, */ int nft_register_expr(struct nft_expr_type *type) { + if (!nft_expr_check_ops(type->ops)) + return -EINVAL; + nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -1873,6 +1888,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); goto err1; } + if (!nft_expr_check_ops(ops)) { + err = -EINVAL; + goto err1; + } } else ops = type->ops;
From: Damian Kos dkos@cadence.com
[ Upstream commit e4056bbb6719fe713bfc4030ac78e8e97ddf7574 ]
This is basically the same fix as in commit fa68d4f8476b ("drm/rockchip: fix for mailbox read size") but for cdn_dp_mailbox_validate_receive function.
See patchwork.kernel.org/patch/10671981/ for details.
Signed-off-by: Damian Kos dkos@cadence.com Signed-off-by: Heiko Stuebner heiko@sntech.de Link: https://patchwork.freedesktop.org/patch/msgid/1542640463-18332-1-git-send-em... Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/gpu/drm/rockchip/cdn-dp-reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c index 0ed7e91471f6..4df201d21f27 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-reg.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c @@ -113,7 +113,7 @@ static int cdp_dp_mailbox_write(struct cdn_dp_device *dp, u8 val)
static int cdn_dp_mailbox_validate_receive(struct cdn_dp_device *dp, u8 module_id, u8 opcode, - u8 req_size) + u16 req_size) { u32 mbox_size, i; u8 header[4];
From: Ronnie Sahlberg lsahlber@redhat.com
[ Upstream commit 05fd5c2c61732152a6bddc318aae62d7e436629b ]
Commit 088aaf17aa79300cab14dbee2569c58cfafd7d6e introduced a leak where if SMB2_read() returned an error we would return without freeing the request buffer.
Cc: Stable stable@vger.kernel.org Signed-off-by: Ronnie Sahlberg lsahlber@redhat.com Reviewed-by: Pavel Shilovsky pshilov@microsoft.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/cifs/smb2pdu.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index fd2d199dd413..7936eac5a38a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2699,6 +2699,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_dbg(VFS, "Send error in read = %d\n", rc); } free_rsp_buf(resp_buftype, rsp_iov.iov_base); + cifs_small_buf_release(req); return rc == -ENODATA ? 0 : rc; }
From: Sebastian Andrzej Siewior bigeasy@linutronix.de
[ Upstream commit 12209993e98c5fa1855c467f22a24e3d5b8be205 ]
There is one user of __kernel_fpu_begin() and before invoking it, it invokes preempt_disable(). So it could invoke kernel_fpu_begin() right away. The 32bit version of arch_efi_call_virt_setup() and arch_efi_call_virt_teardown() does this already.
The comment above *kernel_fpu*() claims that before invoking __kernel_fpu_begin() preemption should be disabled and that KVM is a good example of doing it. Well, KVM doesn't do that since commit
f775b13eedee2 ("x86,kvm: move qemu/guest FPU switching out to vcpu_run")
so it is not an example anymore.
With EFI gone as the last user of __kernel_fpu_{begin|end}(), both can be made static and not exported anymore.
Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Rik van Riel riel@surriel.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: "Jason A. Donenfeld" Jason@zx2c4.com Cc: Andy Lutomirski luto@kernel.org Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Ingo Molnar mingo@redhat.com Cc: Nicolai Stange nstange@suse.de Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: kvm ML kvm@vger.kernel.org Cc: linux-efi linux-efi@vger.kernel.org Cc: x86-ml x86@kernel.org Link: https://lkml.kernel.org/r/20181129150210.2k4mawt37ow6c2vq@linutronix.de Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/x86/include/asm/efi.h | 6 ++---- arch/x86/include/asm/fpu/api.h | 15 +++++---------- arch/x86/kernel/fpu/core.c | 6 ++---- 3 files changed, 9 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a399c1ebf6f0..96fd0251f8f5 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -82,8 +82,7 @@ struct efi_scratch { #define arch_efi_call_virt_setup() \ ({ \ efi_sync_low_kernel_mappings(); \ - preempt_disable(); \ - __kernel_fpu_begin(); \ + kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ if (efi_scratch.use_pgd) { \ @@ -104,8 +103,7 @@ struct efi_scratch { } \ \ firmware_restrict_branch_speculation_end(); \ - __kernel_fpu_end(); \ - preempt_enable(); \ + kernel_fpu_end(); \ })
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a9caac9d4a72..b56d504af654 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -12,17 +12,12 @@ #define _ASM_X86_FPU_API_H
/* - * Careful: __kernel_fpu_begin/end() must be called with preempt disabled - * and they don't touch the preempt state on their own. - * If you enable preemption after __kernel_fpu_begin(), preempt notifier - * should call the __kernel_fpu_end() to prevent the kernel/user FPU - * state from getting corrupted. KVM for example uses this model. - * - * All other cases use kernel_fpu_begin/end() which disable preemption - * during kernel FPU usage. + * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It + * disables preemption so be careful if you intend to use it for long periods + * of time. + * If you intend to use the FPU in softirq you need to check first with + * irq_fpu_usable() if it is possible. */ -extern void __kernel_fpu_begin(void); -extern void __kernel_fpu_end(void); extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2ea85b32421a..2e5003fef51a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -93,7 +93,7 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable);
-void __kernel_fpu_begin(void) +static void __kernel_fpu_begin(void) { struct fpu *fpu = ¤t->thread.fpu;
@@ -111,9 +111,8 @@ void __kernel_fpu_begin(void) __cpu_invalidate_fpregs_state(); } } -EXPORT_SYMBOL(__kernel_fpu_begin);
-void __kernel_fpu_end(void) +static void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu;
@@ -122,7 +121,6 @@ void __kernel_fpu_end(void)
kernel_fpu_enable(); } -EXPORT_SYMBOL(__kernel_fpu_end);
void kernel_fpu_begin(void) {
From: Jun Xiao xiaojun2@hisilicon.com
[ Upstream commit c77804be53369dd4c15bfc376cf9b45948194cab ]
Commit 308c6cafde01 ("net: hns: All ports can not work when insmod hns ko after rmmod.") add phy_stop in hns_nic_init_phy(), In the branch of "net", this method is effective, but in the branch of "net-next", it will cause a WARNING when hns modules loaded, reference to commit 2b3e88ea6528 ("net: phy: improve phy state checking"):
[10.092168] ------------[ cut here ]------------ [10.092171] called from state READY [10.092189] WARNING: CPU: 4 PID: 1 at ../drivers/net/phy/phy.c:854 phy_stop+0x90/0xb0 [10.092192] Modules linked in: [10.092197] CPU: 4 PID:1 Comm:swapper/0 Not tainted 4.20.0-rc7-next-20181220 #1 [10.092200] Hardware name: Huawei TaiShan 2280 /D05, BIOS Hisilicon D05 UEFI 16.12 Release 05/15/2017 [10.092202] pstate: 60000005 (nZCv daif -PAN -UAO) [10.092205] pc : phy_stop+0x90/0xb0 [10.092208] lr : phy_stop+0x90/0xb0 [10.092209] sp : ffff00001159ba90 [10.092212] x29: ffff00001159ba90 x28: 0000000000000007 [10.092215] x27: ffff000011180068 x26: ffff0000110a5620 [10.092218] x25: ffff0000113b6000 x24: ffff842f96dac000 [10.092221] x23: 0000000000000000 x22: 0000000000000000 [10.092223] x21: ffff841fb8425e18 x20: ffff801fb3a56438 [10.092226] x19: ffff801fb3a56000 x18: ffffffffffffffff [10.092228] x17: 0000000000000000 x16: 0000000000000000 [10.092231] x15: ffff00001122d6c8 x14: ffff00009159b7b7 [10.092234] x13: ffff00001159b7c5 x12: ffff000011245000 [10.092236] x11: 0000000005f5e0ff x10: ffff00001159b750 [10.092239] x9 : 00000000ffffffd0 x8 : 0000000000000465 [10.092242] x7 : ffff0000112457f8 x6 : ffff0000113bd7ce [10.092245] x5 : 0000000000000000 x4 : 0000000000000000 [10.092247] x3 : 00000000ffffffff x2 : ffff000011245828 [10.092250] x1 : 4b5860bd05871300 x0 : 0000000000000000 [10.092253] Call trace: [10.092255] phy_stop+0x90/0xb0 [10.092260] hns_nic_init_phy+0xf8/0x110 [10.092262] hns_nic_try_get_ae+0x4c/0x3b0 [10.092264] hns_nic_dev_probe+0x1fc/0x480 [10.092268] platform_drv_probe+0x50/0xa0 [10.092271] really_probe+0x1f4/0x298 [10.092273] driver_probe_device+0x58/0x108 [10.092275] __driver_attach+0xdc/0xe0 [10.092278] bus_for_each_dev+0x74/0xc8 [10.092280] driver_attach+0x20/0x28 [10.092283] bus_add_driver+0x1b8/0x228 [10.092285] driver_register+0x60/0x110 [10.092288] __platform_driver_register+0x40/0x48 [10.092292] hns_nic_dev_driver_init+0x18/0x20 [10.092296] do_one_initcall+0x5c/0x180 [10.092299] kernel_init_freeable+0x198/0x240 [10.092303] kernel_init+0x10/0x108 [10.092306] ret_from_fork+0x10/0x18 [10.092308] ---[ end trace 1396dd0278e397eb ]---
This WARNING occurred because of calling phy_stop before phy_start.
The root cause of the problem in commit '308c6cafde01' is:
Reference to hns_nic_init_phy, the flag phydev->supported is changed after phy_connect_direct. The flag phydev->supported is 0x6ff when hns modules is loaded, so will not change Fiber Port power(Reference to marvell.c), which is power on at default. Then the flag phydev->supported is changed to 0x6f, so Fiber Port power is off when removing hns modules. When hns modules installed again, the flag phydev->supported is default value 0x6ff, so will not change Fiber Port power(now is off), causing mac link not up problem.
So the solution is change phy flags before phy_connect_direct.
Fixes: 308c6cafde01 ("net: hns: All ports can not work when insmod hns ko after rmmod.") Signed-off-by: Yonglong Liu liuyonglong@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d30c28fba249..7f14f06e868a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1269,6 +1269,12 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (!h->phy_dev) return 0;
+ phy_dev->supported &= h->if_support; + phy_dev->advertising = phy_dev->supported; + + if (h->phy_if == PHY_INTERFACE_MODE_XGMII) + phy_dev->autoneg = false; + if (h->phy_if != PHY_INTERFACE_MODE_XGMII) { phy_dev->dev_flags = 0;
@@ -1280,15 +1286,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (unlikely(ret)) return -ENODEV;
- phy_dev->supported &= h->if_support; - phy_dev->advertising = phy_dev->supported; - - if (h->phy_if == PHY_INTERFACE_MODE_XGMII) - phy_dev->autoneg = false; - - if (h->phy_if == PHY_INTERFACE_MODE_SGMII) - phy_stop(phy_dev); - return 0; }
From: Jan Kara jack@suse.cz
[ Upstream commit cae85cb8add35f678cf487139d05e083ce2f570a ]
Aneesh has reported that PPC triggers the following warning when excercising DAX code:
IP set_pte_at+0x3c/0x190 LR insert_pfn+0x208/0x280 Call Trace: insert_pfn+0x68/0x280 dax_iomap_pte_fault.isra.7+0x734/0xa40 __xfs_filemap_fault+0x280/0x2d0 do_wp_page+0x48c/0xa40 __handle_mm_fault+0x8d0/0x1fd0 handle_mm_fault+0x140/0x250 __do_page_fault+0x300/0xd60 handle_page_fault+0x18
Now that is WARN_ON in set_pte_at which is
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
The problem is that on some architectures set_pte_at() cannot cope with a situation where there is already some (different) valid entry present.
Use ptep_set_access_flags() instead to modify the pfn which is built to deal with modifying existing PTE.
Link: http://lkml.kernel.org/r/20190311084537.16029-1-jack@suse.cz Fixes: b2770da64254 "mm: add vm_insert_mixed_mkwrite()" Signed-off-by: Jan Kara jack@suse.cz Reported-by: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com Reviewed-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com Acked-by: Dan Williams dan.j.williams@intel.com Cc: Chandan Rajendra chandan@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- mm/memory.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c index f99b64ca1303..e9bce27bc18c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1813,10 +1813,12 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; } - entry = *pte; - goto out_mkwrite; - } else - goto out_unlock; + entry = pte_mkyoung(*pte); + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (ptep_set_access_flags(vma, addr, pte, entry, 1)) + update_mmu_cache(vma, addr, pte); + } + goto out_unlock; }
/* Ok, finally just insert the thing.. */ @@ -1825,7 +1827,6 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, else entry = pte_mkspecial(pfn_t_pte(pfn, prot));
-out_mkwrite: if (mkwrite) { entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma);
From: zhengliang zhengliang6@huawei.com
[ Upstream commit a0770e13c8da83bdb64738c0209ab02dd3cfff8b ]
v4: Rearrange the previous three versions.
The following scenario could lead to data block override by mistake.
TASK A | TASK kworker | TASK B | TASK C | | | open | | | write | | | close | | | | f2fs_write_data_pages | | | f2fs_write_cache_pages | | | f2fs_outplace_write_data | | | f2fs_allocate_data_block (get block in seg S, | | | S is full, and only | | | have this valid data | | | block) | | | allocate_segment | | | locate_dirty_segment (mark S as PRE) | | | f2fs_submit_page_write (submit but is not | | | written on dev) | | unlink | | | iput_final | | | f2fs_drop_inode | | | f2fs_truncate | | | (not evict) | | | | | write_checkpoint | | | flush merged bio but not wait file data writeback | | | set_prefree_as_free (mark S as FREE) | | | | update NODE/DATA | | | allocate_segment (select S) | writeback done | |
So we need to guarantee io complete before truncate inode in f2fs_drop_inode.
Reviewed-by: Chao Yu yuchao0@huawei.com Signed-off-by: Zheng Liang zhengliang6@huawei.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/f2fs/super.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4c169ba50c0f..06b75737b1a0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -668,6 +668,10 @@ static int f2fs_drop_inode(struct inode *inode) sb_start_intwrite(inode->i_sb); f2fs_i_size_write(inode, 0);
+ f2fs_submit_merged_write_cond(F2FS_I_SB(inode), + inode, NULL, 0, DATA); + truncate_inode_pages_final(inode->i_mapping); + if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode);
From: Andy Duan fugang.duan@nxp.com
[ Upstream commit d7c3a206e6338e4ccdf030719dec028e26a521d5 ]
Some SOC like i.MX6SX clock have some limits: - ahb clock should be disabled before ipg. - ahb and ipg clocks are required for MAC MII bus. So, move the ahb clock to runtime management together with ipg clock.
Signed-off-by: Fugang Duan fugang.duan@nxp.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/freescale/fec_main.c | 30 ++++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index ce55c8f7f33a..ad3aabc39cc2 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1851,13 +1851,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) int ret;
if (enable) { - ret = clk_prepare_enable(fep->clk_ahb); - if (ret) - return ret; - ret = clk_prepare_enable(fep->clk_enet_out); if (ret) - goto failed_clk_enet_out; + return ret;
if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1875,7 +1871,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) if (ret) goto failed_clk_ref; } else { - clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1894,8 +1889,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); -failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ahb);
return ret; } @@ -3455,6 +3448,9 @@ fec_probe(struct platform_device *pdev) ret = clk_prepare_enable(fep->clk_ipg); if (ret) goto failed_clk_ipg; + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + goto failed_clk_ahb;
fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { @@ -3546,6 +3542,9 @@ fec_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); failed_regulator: + clk_disable_unprepare(fep->clk_ahb); +failed_clk_ahb: + clk_disable_unprepare(fep->clk_ipg); failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: @@ -3669,6 +3668,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev);
+ clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg);
return 0; @@ -3678,8 +3678,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + int ret;
- return clk_prepare_enable(fep->clk_ipg); + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + return ret; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + + return 0; + +failed_clk_ipg: + clk_disable_unprepare(fep->clk_ahb); + return ret; }
static const struct dev_pm_ops fec_pm_ops = {
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit d4d0e40977ac450f32f2db5e4d8e23c9d2578899 ]
The driver cannot guarantee in the prepare phase that it will be able to write an MDB entry to the device. In case the driver returned success during the prepare phase, but then failed to add the entry in the commit phase, a WARNING [1] will be generated by the switchdev core.
Fix this by doing the work in the prepare phase instead.
[1] [ 358.544486] swp12s0: Commit of object (id=2) failed. [ 358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0 [ 358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350 [ 358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 358.580582] Workqueue: events switchdev_deferred_process_work [ 358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0 ... [ 358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286 [ 358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000 [ 358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8 [ 358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000 [ 358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000 [ 358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200 [ 358.659790] FS: 0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000 [ 358.668820] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0 [ 358.683188] Call Trace: [ 358.685918] switchdev_port_obj_add_deferred+0x13/0x60 [ 358.691655] switchdev_deferred_process+0x6b/0xf0 [ 358.696907] switchdev_deferred_process_work+0xa/0x10 [ 358.702548] process_one_work+0x1f5/0x3f0 [ 358.707022] worker_thread+0x28/0x3c0 [ 358.711099] ? process_one_work+0x3f0/0x3f0 [ 358.715768] kthread+0x10d/0x130 [ 358.719369] ? __kthread_create_on_node+0x180/0x180 [ 358.724815] ret_from_fork+0x35/0x40
Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel idosch@mellanox.com Reported-by: Alex Kushnarov alexanderk@mellanox.com Tested-by: Alex Kushnarov alexanderk@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 3ba9f2c079b2..8a1788108f52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1320,7 +1320,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0;
- if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0;
bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit a8c133b06183c529c51cd0d54eb57d6b7078370c ]
The EMAD workqueue is used to handle retransmission of EMAD packets that contain configuration data for the device's firmware.
Given the workers need to allocate these packets and that the code is not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM flag.
Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") Signed-off-by: Ido Schimmel idosch@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index cced009da869..070fd3f7fadf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -600,7 +600,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0;
- emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq;
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc ]
The ordered workqueue is used to offload various objects such as routes and neighbours in the order they are notified.
It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker tries to flush a non-WQ_MEM_RECLAIM workqueue.
[1] [97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker [97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130 ... [97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 [97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] [97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130 ... [97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086 [97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000 [97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046 [97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0 [97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0 [97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0 [97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000 [97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0 [97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [97703.543115] Call Trace: [97703.543129] __flush_work+0xbd/0x1e0 [97703.543137] ? __cancel_work_timer+0x136/0x1b0 [97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0 [97703.543154] __cancel_work_timer+0x136/0x1b0 [97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core] [97703.543184] cancel_work_sync+0x10/0x20 [97703.543191] rhashtable_free_and_destroy+0x23/0x140 [97703.543198] rhashtable_destroy+0xd/0x10 [97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum] [97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum] [97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum] [97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum] [97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum] [97703.543484] process_one_work+0x1fd/0x400 [97703.543493] worker_thread+0x34/0x410 [97703.543500] kthread+0x121/0x140 [97703.543507] ? process_one_work+0x400/0x400 [97703.543512] ? kthread_park+0x90/0x90 [97703.543523] ret_from_fork+0x35/0x40
Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload") Signed-off-by: Ido Schimmel idosch@mellanox.com Reported-by: Semion Lisyansky semionl@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 070fd3f7fadf..33262c09c703 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1815,7 +1815,7 @@ static int __init mlxsw_core_module_init(void) mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); if (!mlxsw_wq) return -ENOMEM; - mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { err = -ENOMEM;
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit b442fed1b724af0de087912a5718ddde1b87acbb ]
The workqueue is used to periodically update the networking stack about activity / statistics of various objects such as neighbours and TC actions.
It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag.
Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue") Signed-off-by: Ido Schimmel idosch@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 33262c09c703..fad26046e159 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1812,7 +1812,7 @@ static int __init mlxsw_core_module_init(void) { int err;
- mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); if (!mlxsw_wq) return -ENOMEM; mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit d7ee81ad09f072eab1681877fc71ec05f9c1ae92 ]
This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands").
I'm not totally sure, but I think that commit description may have overstated the danger. I was under the impression that this data came from the firmware? If you can't trust your networking firmware, then you're already in trouble.
Anyway, these days we add bounds checking where ever we can and we call it kernel hardening. Better safe than sorry.
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/nfc/nci/hci.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + }
/* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + }
ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE;
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit 6491d698396fd5da4941980a35ca7c162a672016 ]
This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes") where we changed NFC_HCI_MAX_PIPES from 127 to 128.
As the comment next to the define explains, the pipe identifier is 7 bits long. The highest possible pipe is 127, but the number of possible pipes is 128. As the code is now, then there is potential for an out of bounds array access:
net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one? 'ndev->hci_dev->pipes[pipe]' '0-127 == 127'
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- include/net/nfc/nci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 87499b6b35d6..df5c69db68af 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -166,7 +166,7 @@ struct nci_conn_info { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NCI_HCI_MAX_PIPES 127 +#define NCI_HCI_MAX_PIPES 128
struct nci_hci_gate { u8 gate;
From: Masami Hiramatsu mhiramat@kernel.org
[ Upstream commit b191fa96ea6dc00d331dcc28c1f7db5e075693a0 ]
Avoid kretprobe recursion loop bg by setting a dummy kprobes to current_kprobe per-CPU variable.
This bug has been introduced with the asm-coded trampoline code, since previously it used another kprobe for hooking the function return placeholder (which only has a nop) and trampoline handler was called from that kprobe.
This revives the old lost kprobe again.
With this fix, we don't see deadlock anymore.
And you can see that all inner-called kretprobe are skipped.
event_1 235 0 event_2 19375 19612
The 1st column is recorded count and the 2nd is missed count. Above shows (event_1 rec) + (event_2 rec) ~= (event_2 missed) (some difference are here because the counter is racy)
Reported-by: Andrea Righi righi.andrea@gmail.com Tested-by: Andrea Righi righi.andrea@gmail.com Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Acked-by: Steven Rostedt rostedt@goodmis.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Mathieu Desnoyers mathieu.desnoyers@efficios.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: stable@vger.kernel.org Fixes: c9becf58d935 ("[PATCH] kretprobe: kretprobe-booster") Link: http://lkml.kernel.org/r/155094064889.6137.972160690963039.stgit@devbox Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/x86/kernel/kprobes/core.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 56cf6c263254..9d7bb8de2917 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -744,11 +744,16 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+static struct kprobe kretprobe_kprobe = { + .addr = (void *)kretprobe_trampoline, +}; + /* * Called from kretprobe_trampoline */ __visible __used void *trampoline_handler(struct pt_regs *regs) { + struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -758,6 +763,17 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false;
+ preempt_disable(); + + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kcb = get_kprobe_ctlblk(); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ @@ -833,10 +849,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); }
recycle_rp_inst(ri, &empty_rp); @@ -852,6 +867,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
+ __this_cpu_write(current_kprobe, NULL); + preempt_enable(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri);
linux-stable-mirror@lists.linaro.org