From: Sven Van Asbroeck thesven73@gmail.com
[ Upstream commit 62039b6aef63380ba7a37c113bbaeee8a55c5342 ]
When cancel_delayed_work() returns, the delayed work may still be running. This means that the core could potentially free the private structure (struct xadc) while the delayed work is still using it. This is a potential use-after-free.
Fix by calling cancel_delayed_work_sync(), which waits for any residual work to finish before returning.
Signed-off-by: Sven Van Asbroeck TheSven73@gmail.com Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 3f6be5ac049a..1960694e8007 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1320,7 +1320,7 @@ static int xadc_remove(struct platform_device *pdev) } free_irq(xadc->irq, indio_dev); clk_disable_unprepare(xadc->clk); - cancel_delayed_work(&xadc->zynq_unmask_work); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); kfree(xadc->data); kfree(indio_dev->channels);
From: Sven Van Asbroeck thesven73@gmail.com
[ Upstream commit 862e4644fd2d7df8998edc65e0963ea2f567bde9 ]
If probe errors out after request_irq(), its error path does not explicitly cancel the delayed work, which may have been scheduled by the interrupt handler.
This means the delayed work may still be running when the core frees the private structure (struct xadc). This is a potential use-after-free.
Fix by inserting cancel_delayed_work_sync() in the probe error path.
Signed-off-by: Sven Van Asbroeck TheSven73@gmail.com Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/adc/xilinx-xadc-core.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 1960694e8007..15e1a103f37d 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1290,6 +1290,7 @@ static int xadc_probe(struct platform_device *pdev)
err_free_irq: free_irq(xadc->irq, indio_dev); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); err_clk_disable_unprepare: clk_disable_unprepare(xadc->clk); err_free_samplerate_trigger:
From: Sven Van Asbroeck thesven73@gmail.com
[ Upstream commit 2e4b88f73966adead360e47621df0183586fac32 ]
In remove, the clock is disabled before canceling the delayed work. This means that the delayed work may be touching unclocked hardware.
Fix by disabling the clock after the delayed work is fully canceled. This is consistent with the probe error path order.
Signed-off-by: Sven Van Asbroeck TheSven73@gmail.com Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 15e1a103f37d..1ae86e7359f7 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1320,8 +1320,8 @@ static int xadc_remove(struct platform_device *pdev) iio_triggered_buffer_cleanup(indio_dev); } free_irq(xadc->irq, indio_dev); - clk_disable_unprepare(xadc->clk); cancel_delayed_work_sync(&xadc->zynq_unmask_work); + clk_disable_unprepare(xadc->clk); kfree(xadc->data); kfree(indio_dev->channels);
From: Dan Williams dan.j.williams@intel.com
[ Upstream commit 351f339faa308c1c1461314a18c832239a841ca0 ]
The dynamic-debug statements for command payload output only get emitted when the command is not ND_CMD_CALL. Move the output payload dumping ahead of the early return path for ND_CMD_CALL.
Fixes: 31eca76ba2fc9 ("...whitelisted dimm command marshaling mechanism") Reported-by: Vishal Verma vishal.l.verma@intel.com Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/nfit/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 925dbc751322..8340c81b258b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -542,6 +542,12 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, goto out; }
+ dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, + cmd_name, out_obj->buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, + out_obj->buffer.pointer, + min_t(u32, 128, out_obj->buffer.length), true); + if (call_pkg) { call_pkg->nd_fw_size = out_obj->buffer.length; memcpy(call_pkg->nd_payload + call_pkg->nd_size_in, @@ -560,12 +566,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return 0; }
- dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, - cmd_name, out_obj->buffer.length); - print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, - out_obj->buffer.pointer, - min_t(u32, 128, out_obj->buffer.length), true); - for (i = 0, offset = 0; i < desc->out_num; i++) { u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, (u32 *) out_obj->buffer.pointer,
From: Kangjie Lu kjlu@umn.edu
[ Upstream commit 55c1fc0af29a6c1b92f217b7eb7581a882e0c07c ]
In case kmemdup fails, the fix goes to blk_err to avoid NULL pointer dereference.
Signed-off-by: Kangjie Lu kjlu@umn.edu Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/namespace_devs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 54d79837f7c6..73a444c41cde 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2251,9 +2251,12 @@ static struct device *create_namespace_blk(struct nd_region *nd_region, if (!nsblk->uuid) goto blk_err; memcpy(name, nd_label->name, NSLABEL_NAME_LEN); - if (name[0]) + if (name[0]) { nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, GFP_KERNEL); + if (!nsblk->alt_name) + goto blk_err; + } res = nsblk_add_resource(nd_region, ndd, nsblk, __le64_to_cpu(nd_label->dpa)); if (!res)
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit 96dd86871e1fffbc39e4fa61c9c75ec54ee9af0f ]
According to HUTRR77 usage 0x29f from the consumer page is reserved for the Desktop application to present all running user’s application windows. Linux defines KEY_SCALE to request Compiz Scale (Expose) mode, so let's add the mapping.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-input.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index a3916e58dbf5..150b14623749 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1025,6 +1025,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break; case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break;
+ case 0x29f: map_key_clear(KEY_SCALE); break; + default: map_key_clear(KEY_UNKNOWN); } break;
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit 7975a1d6a7afeb3eb61c971a153d24dd8fa032f3 ]
According to HUTRR73 usages 0x79, 0x7a and 0x7c from the consumer page correspond to Brightness Up/Down/Toggle keys, so let's add the mappings.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-input.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 150b14623749..eeb9ba7e8fc6 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -895,6 +895,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break;
+ case 0x079: map_key_clear(KEY_KBDILLUMUP); break; + case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break; + case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break; + case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break;
From: Dmitry Torokhov dmitry.torokhov@gmail.com
[ Upstream commit c01908a14bf735b871170092807c618bb9dae654 ]
According to HUT 1.12 usage 0xb5 from the generic desktop page is reserved for switching between external and internal display, so let's add the mapping.
Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hid/hid-input.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index eeb9ba7e8fc6..be0a6958a65a 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -677,6 +677,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel break; }
+ if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */ + switch (usage->hid & 0xf) { + case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break; + default: goto ignore; + } + break; + } + /* * Some lazy vendors declare 255 usages for System Control, * leading to the creation of ABS_X|Y axis and too many others.
From: Aditya Pakki pakki001@umn.edu
[ Upstream commit 486fa92df4707b5df58d6508728bdb9321a59766 ]
In case kmemdup fails, the fix releases resources and returns to avoid the NULL pointer dereference.
Signed-off-by: Aditya Pakki pakki001@umn.edu Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/btt_devs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 795ad4ff35ca..e341498876ca 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -190,14 +190,15 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL); - if (nd_btt->id < 0) { - kfree(nd_btt); - return NULL; - } + if (nd_btt->id < 0) + goto out_nd_btt;
nd_btt->lbasize = lbasize; - if (uuid) + if (uuid) { uuid = kmemdup(uuid, 16, GFP_KERNEL); + if (!uuid) + goto out_put_id; + } nd_btt->uuid = uuid; dev = &nd_btt->dev; dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id); @@ -212,6 +213,13 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL; } return dev; + +out_put_id: + ida_simple_remove(&nd_region->btt_ida, nd_btt->id); + +out_nd_btt: + kfree(nd_btt); + return NULL; }
struct device *nd_btt_create(struct nd_region *nd_region)
From: Peter Oberparleiter oberpar@linux.ibm.com
[ Upstream commit 2cc9637ce825f3a9f51f8f78af7474e9e85bfa5f ]
The DASD driver incorrectly limits the maximum number of blocks of ECKD DASD volumes to 32 bit numbers. Volumes with a capacity greater than 2^32-1 blocks are incorrectly recognized as smaller volumes.
This results in the following volume capacity limits depending on the formatted block size:
BLKSIZE MAX_GB MAX_CYL 512 2047 5843492 1024 4095 8676701 2048 8191 13634816 4096 16383 23860929
The same problem occurs when a volume with more than 17895697 cylinders is accessed in raw-track-access mode.
Fix this problem by adding an explicit type cast when calculating the maximum number of blocks.
Signed-off-by: Peter Oberparleiter oberpar@linux.ibm.com Reviewed-by: Stefan Haberland sth@linux.ibm.com Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/block/dasd_eckd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6e294b4d3635..f89f9d02e788 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2004,14 +2004,14 @@ static int dasd_eckd_end_analysis(struct dasd_block *block) blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block);
raw: - block->blocks = (private->real_cyl * + block->blocks = ((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk);
dev_info(&device->cdev->dev, - "DASD with %d KB/block, %d KB total size, %d KB/track, " + "DASD with %u KB/block, %lu KB total size, %u KB/track, " "%s\n", (block->bp_block >> 10), - ((private->real_cyl * + (((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk * (block->bp_block >> 9)) >> 1), ((blk_per_trk * block->bp_block) >> 10),
From: Felix Fietkau nbd@nbd.name
[ Upstream commit 40586e3fc400c00c11151804dcdc93f8c831c808 ]
The pointer to the last four bytes of the address is not guaranteed to be aligned, so we need to use __get_unaligned_cpu32 here
Signed-off-by: Felix Fietkau nbd@nbd.name Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index c3a7396fb955..49a90217622b 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); }
static const struct rhashtable_params mesh_rht_params = {
From: Andrei Otcheretianski andrei.otcheretianski@intel.com
[ Upstream commit 78be2d21cc1cd3069c6138dcfecec62583130171 ]
Looks that 100 chars isn't enough for messages, as we keep getting warnings popping from different places due to message shortening. Instead of trying to shorten the prints, just increase the buffer size.
Signed-off-by: Andrei Otcheretianski andrei.otcheretianski@intel.com Signed-off-by: Luca Coelho luciano.coelho@intel.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/trace_msg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120
DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf),
From: Ilan Peer ilan.peer@intel.com
[ Upstream commit 08a75a887ee46828b54600f4bb7068d872a5edd5 ]
The support added for regulatory WMM rules did not handle the case of regulatory domain intersections. Fix it.
Signed-off-by: Ilan Peer ilan.peer@intel.com Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Luca Coelho luciano.coelho@intel.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/wireless/reg.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+)
diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8002ace7c9f6..8a47297ff206 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1287,6 +1287,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, return dfs_region1; }
+static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1, + const struct ieee80211_wmm_ac *wmm_ac2, + struct ieee80211_wmm_ac *intersect) +{ + intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min); + intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max); + intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot); + intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn); +} + /* * Helper for regdom_intersect(), this does the real * mathematical intersection fun @@ -1301,6 +1311,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule1, *power_rule2; struct ieee80211_power_rule *power_rule; + const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2; + struct ieee80211_wmm_rule *wmm_rule; u32 freq_diff, max_bandwidth1, max_bandwidth2;
freq_range1 = &rule1->freq_range; @@ -1311,6 +1323,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, power_rule2 = &rule2->power_rule; power_rule = &intersected_rule->power_rule;
+ wmm_rule1 = &rule1->wmm_rule; + wmm_rule2 = &rule2->wmm_rule; + wmm_rule = &intersected_rule->wmm_rule; + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, @@ -1354,6 +1370,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms, rule2->dfs_cac_ms);
+ if (rule1->has_wmm && rule2->has_wmm) { + u8 ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + reg_wmm_rules_intersect(&wmm_rule1->client[ac], + &wmm_rule2->client[ac], + &wmm_rule->client[ac]); + reg_wmm_rules_intersect(&wmm_rule1->ap[ac], + &wmm_rule2->ap[ac], + &wmm_rule->ap[ac]); + } + + intersected_rule->has_wmm = true; + } else if (rule1->has_wmm) { + *wmm_rule = *wmm_rule1; + intersected_rule->has_wmm = true; + } else if (rule2->has_wmm) { + *wmm_rule = *wmm_rule2; + intersected_rule->has_wmm = true; + } else { + intersected_rule->has_wmm = false; + } + if (!is_valid_reg_rule(intersected_rule)) return -EINVAL;
From: Felix Fietkau nbd@nbd.name
[ Upstream commit eb9b64e3a9f8483e6e54f4e03b2ae14ae5db2690 ]
skb->truesize can change due to memory reallocation or when adding extra fragments. Adjust fq->memory_usage accordingly
Signed-off-by: Felix Fietkau nbd@nbd.name Acked-by: Toke Høiland-Jørgensen toke@redhat.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 743cde66aaf6..2f726cde9998 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3185,6 +3185,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3218,6 +3219,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head) goto out;
+ orig_truesize = head->truesize; orig_len = head->len;
if (skb->len + head->len > max_amsdu_len) @@ -3272,6 +3274,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb;
out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len;
From: Sunil Dutt usdutt@codeaurora.org
[ Upstream commit d6db02a88a4aaa1cd7105137c67ddec7f3bdbc05 ]
This commit adds NL80211_FLAG_CLEAR_SKB flag to other NL commands that carry key data to ensure they do not stick around on heap after the SKB is freed.
Also introduced this flag for NL80211_CMD_VENDOR as there are sub commands which configure the keys.
Signed-off-by: Sunil Dutt usdutt@codeaurora.org Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/wireless/nl80211.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 295cd8d5554f..048e004ed0ee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13392,7 +13392,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -13443,7 +13444,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -13451,7 +13453,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -13480,7 +13483,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -13832,7 +13836,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -13887,7 +13892,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK,
From: Anson Huang anson.huang@nxp.com
[ Upstream commit bf2a7ca39fd3ab47ef71c621a7ee69d1813b1f97 ]
SNVS IRQ is requested before necessary driver data initialized, if there is a pending IRQ during driver probe phase, kernel NULL pointer panic will occur in IRQ handler. To avoid such scenario, just initialize necessary driver data before enabling IRQ. This patch is inspired by NXP's internal kernel tree.
Fixes: d3dc6e232215 ("input: keyboard: imx: add snvs power key driver") Signed-off-by: Anson Huang Anson.Huang@nxp.com Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/input/keyboard/snvs_pwrkey.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index effb63205d3d..4c67cf30a5d9 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -148,6 +148,9 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; }
+ pdata->input = input; + platform_set_drvdata(pdev, pdata); + error = devm_request_irq(&pdev->dev, pdata->irq, imx_snvs_pwrkey_interrupt, 0, pdev->name, pdev); @@ -163,9 +166,6 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; }
- pdata->input = input; - platform_set_drvdata(pdev, pdata); - device_init_wakeup(&pdev->dev, pdata->wakeup);
return 0;
From: Li RongQing lirongqing@baidu.com
[ Upstream commit 9dc6488e84b0f64df17672271664752488cd6a25 ]
If offset is not zero and length is bigger than PAGE_SIZE, this will cause to out of boundary access to a page memory
Fixes: 98cc093cba1e ("block, THP: make block_device_operations.rw_page support THP") Co-developed-by: Liang ZhiCheng liangzhicheng@baidu.com Signed-off-by: Liang ZhiCheng liangzhicheng@baidu.com Signed-off-by: Li RongQing lirongqing@baidu.com Reviewed-by: Ira Weiny ira.weiny@intel.com Reviewed-by: Jeff Moyer jmoyer@redhat.com Signed-off-by: Dan Williams dan.j.williams@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvdimm/pmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1d432c5ed275..cff027fc2676 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -113,13 +113,13 @@ static void write_pmem(void *pmem_addr, struct page *page,
while (len) { mem = kmap_atomic(page); - chunk = min_t(unsigned int, len, PAGE_SIZE); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); memcpy_flushcache(pmem_addr, mem + off, chunk); kunmap_atomic(mem); len -= chunk; off = 0; page++; - pmem_addr += PAGE_SIZE; + pmem_addr += chunk; } }
@@ -132,7 +132,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
while (len) { mem = kmap_atomic(page); - chunk = min_t(unsigned int, len, PAGE_SIZE); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); if (rem) @@ -140,7 +140,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, len -= chunk; off = 0; page++; - pmem_addr += PAGE_SIZE; + pmem_addr += chunk; } return BLK_STS_OK; }
From: Alexander Wetzel alexander@wetzel-home.de
[ Upstream commit 78ad2341521d5ea96cb936244ed4c4c4ef9ec13b ]
Restore SW_CRYPTO_CONTROL operation on AP_VLAN interfaces for unicast keys, the original override was intended to be done for group keys as those are treated specially by mac80211 and would always have been rejected.
Now the situation is that AP_VLAN support must be enabled by the driver if it can support it (meaning it can support software crypto GTK TX).
Thus, also simplify the code - if we get here with AP_VLAN and non- pairwise key, software crypto must be used (driver doesn't know about the interface) and can be used (driver must've advertised AP_VLAN if it also uses SW_CRYPTO_CONTROL).
Fixes: db3bdcb9c3ff ("mac80211: allow AP_VLAN operation on crypto controlled devices") Signed-off-by: Alexander Wetzel alexander@wetzel-home.de [rewrite commit message] Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/key.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/net/mac80211/key.c b/net/mac80211/key.c index c054ac85793c..f20bb39f492d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ - if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + ret = 1; goto out_unsupported; + } }
ret = drv_set_key(key->local, SET_KEY, sdata, @@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return 0; + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; - } return 0; default: return -EINVAL;
From: Martin Schwidefsky schwidefsky@de.ibm.com
[ Upstream commit 5712f3301a12c0c3de9cc423484496b0464f2faf ]
The spinlock in the raw3270_view structure is used by con3270, tty3270 and fs3270 in different ways. For con3270 the lock can be acquired in irq context, for tty3270 and fs3270 the highest context is bh.
Lockdep sees the view->lock as a single class and if the 3270 driver is used for the console the following message is generated:
WARNING: inconsistent lock state 5.1.0-rc3-05157-g5c168033979d #12 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. swapper/0/1 [HC0[0]:SC1[1]:HE1:SE0] takes: (____ptrval____) (&(&view->lock)->rlock){?.-.}, at: tty3270_update+0x7c/0x330
Introduce a lockdep subclass for the view lock to distinguish bh from irq locks.
Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com
Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/char/con3270.c | 2 +- drivers/s390/char/fs3270.c | 3 ++- drivers/s390/char/raw3270.c | 3 ++- drivers/s390/char/raw3270.h | 4 +++- drivers/s390/char/tty3270.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index fd2146bcc0ad..e17364e13d2f 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -629,7 +629,7 @@ con3270_init(void) (void (*)(unsigned long)) con3270_read_tasklet, (unsigned long) condev->read);
- raw3270_add_view(&condev->view, &con3270_fn, 1); + raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
INIT_LIST_HEAD(&condev->freemem); for (i = 0; i < CON3270_STRING_PAGES; i++) { diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 16a4e8528bbc..2f9905ee047c 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -463,7 +463,8 @@ fs3270_open(struct inode *inode, struct file *filp)
init_waitqueue_head(&fp->wait); fp->fs_pid = get_pid(task_pid(current)); - rc = raw3270_add_view(&fp->view, &fs3270_fn, minor); + rc = raw3270_add_view(&fp->view, &fs3270_fn, minor, + RAW3270_VIEW_LOCK_BH); if (rc) { fs3270_free_view(&fp->view); goto out; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index f8cd2935fbfd..63a41b168761 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -920,7 +920,7 @@ raw3270_deactivate_view(struct raw3270_view *view) * Add view to device with minor "minor". */ int -raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) +raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass) { unsigned long flags; struct raw3270 *rp; @@ -942,6 +942,7 @@ raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) view->cols = rp->cols; view->ascebc = rp->ascebc; spin_lock_init(&view->lock); + lockdep_set_subclass(&view->lock, subclass); list_add(&view->list, &rp->view_list); rc = 0; spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 114ca7cbf889..3afaa35f7351 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -150,6 +150,8 @@ struct raw3270_fn { struct raw3270_view { struct list_head list; spinlock_t lock; +#define RAW3270_VIEW_LOCK_IRQ 0 +#define RAW3270_VIEW_LOCK_BH 1 atomic_t ref_count; struct raw3270 *dev; struct raw3270_fn *fn; @@ -158,7 +160,7 @@ struct raw3270_view { unsigned char *ascebc; /* ascii -> ebcdic table */ };
-int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int); +int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int); int raw3270_activate_view(struct raw3270_view *); void raw3270_del_view(struct raw3270_view *); void raw3270_deactivate_view(struct raw3270_view *); diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 5b8af2782282..81067f5bb178 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -980,7 +980,8 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) return PTR_ERR(tp);
rc = raw3270_add_view(&tp->view, &tty3270_fn, - tty->index + RAW3270_FIRSTMINOR); + tty->index + RAW3270_FIRSTMINOR, + RAW3270_VIEW_LOCK_BH); if (rc) { tty3270_free_view(tp); return rc;
From: Martin Leung martin.leung@amd.com
[ Upstream commit f4bbebf8e7eb4d294b040ab2d2ba71e70e69b930 ]
[Why] AUX takes longer to reply when using active DP-DVI dongle on some asics resulting in up to 2000+ us edid read (timeout).
[How] 1. Adjust AUX poll to match spec 2. Extend the SW timeout. This does not affect normal operation since we exit the loop as soon as AUX acks.
Signed-off-by: Martin Leung martin.leung@amd.com Reviewed-by: Jun Lei Jun.Lei@amd.com Acked-by: Joshua Aberback Joshua.Aberback@amd.com Acked-by: Leo Li sunpeng.li@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 9 ++++++--- drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 3f5b2e6f7553..df936edac5c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -189,6 +189,12 @@ static void submit_channel_request( 1, 0); } + + REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); + + REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, + 10, aux110->timeout_period/10); + /* set the delay and the number of bytes to write */
/* The length include @@ -241,9 +247,6 @@ static void submit_channel_request( } }
- REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); - REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, - 10, aux110->timeout_period/10); REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1); }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index f7caab85dc80..2c6f50b4245a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -69,11 +69,11 @@ enum { /* This is the timeout as defined in DP 1.2a, * at most within ~240usec. That means, * increasing this timeout will not affect normal operation, * and we'll timeout after - * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec. + * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 2400usec. * This timeout is especially important for - * resume from S3 and CTS. + * converters, resume from S3, and CTS. */ - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4 + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 6 }; struct aux_engine_dce110 { struct aux_engine base;
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 99834eead2a04e93a120abb112542b87c42ff5e1 ]
When this is disabled, we get a link failure:
drivers/clocksource/timer-npcm7xx.o: In function `npcm7xx_timer_init': timer-npcm7xx.c:(.init.text+0xf): undefined reference to `timer_of_init'
Fixes: 1c00289ecd12 ("clocksource/drivers/npcm: Add NPCM7xx timer driver") Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clocksource/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index c1ddafa4c299..4d37f018d846 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -136,6 +136,7 @@ config VT8500_TIMER config NPCM7XX_TIMER bool "NPCM7xx timer driver" if COMPILE_TEST depends on HAS_IOMEM + select TIMER_OF select CLKSRC_MMIO help Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture,
From: Neil Armstrong narmstrong@baylibre.com
[ Upstream commit fbc87aa0f7c429999dc31f1bac3b2615008cac32 ]
The OX820 compatible is wrong is the driver, fix it.
Fixes: 2ea3401e2a84 ("clocksource/drivers/oxnas: Add OX820 compatible") Reported-by: Daniel Golle daniel@makrotopia.org Signed-off-by: Neil Armstrong narmstrong@baylibre.com Signed-off-by: Daniel Lezcano daniel.lezcano@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/clocksource/timer-oxnas-rps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index eed6feff8b5f..30c6f4ce672b 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -296,4 +296,4 @@ static int __init oxnas_rps_timer_init(struct device_node *np) TIMER_OF_DECLARE(ox810se_rps, "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init); TIMER_OF_DECLARE(ox820_rps, - "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init); + "oxsemi,ox820-rps-timer", oxnas_rps_timer_init);
From: David Ahern dsahern@gmail.com
[ Upstream commit a5f622984a623df9a84cf43f6b098d8dd76fbe05 ]
A couple of tests are verifying a route has been removed. The helper expects the prefix as the first part of the expected output. When checking that a route has been deleted the prefix is empty leading to an invalid ip command:
$ ip ro ls match Command line is not complete. Try option "help"
Fix by moving the comparison of expected output and output to a new function that is used by both check_route and check_route6. Use the new helper for the 2 checks on route removal.
Also, remove the reset of 'set -x' in route_setup which overrides the user managed setting.
Fixes: d69faad76584c ("selftests: fib_tests: Add prefix route tests with metric") Signed-off-by: David Ahern dsahern@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/net/fib_tests.sh | 94 ++++++++++-------------- 1 file changed, 40 insertions(+), 54 deletions(-)
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a4ccde0e473b..2f190aa8fc5f 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -602,6 +602,39 @@ run_cmd() return $rc }
+check_expected() +{ + local out="$1" + local expected="$2" + local rc=0 + + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy ''; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route6() @@ -646,31 +679,7 @@ check_route6() local rc=0
out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy ''; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" }
route_cleanup() @@ -714,7 +723,7 @@ route_setup() $IP addr add 172.16.103.2/24 dev veth4 $IP addr add 172.16.104.1/24 dev dummy1
- set +ex + set +e }
# assumption is that basic add of a single path route works @@ -949,7 +958,8 @@ ipv6_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route6 "" + out=$($IP -6 ro ls match 2001:db8:104::/64) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" @@ -1009,34 +1019,9 @@ check_route() local pfx="172.16.104.0/24" local expected="$1" local out - local rc=0
out=$($IP ro ls match ${pfx}) - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy ''; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" }
# assumption is that basic add of a single path route works @@ -1301,7 +1286,8 @@ ipv4_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route "" + out=$($IP ro ls match 172.16.104.0/24) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down"
From: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp
[ Upstream commit 238ffdc49ef98b15819cfd5e3fb23194e3ea3d39 ]
KMSAN will complain if valid address length passed to bind() is shorter than sizeof("struct sockaddr_mISDN"->family) bytes.
Signed-off-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/isdn/mISDN/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 18c0a1281914..b2abc44fa5cb 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -711,10 +711,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; int err = 0;
- if (!maddr || maddr->family != AF_ISDN) + if (addr_len < sizeof(struct sockaddr_mISDN)) return -EINVAL;
- if (addr_len < sizeof(struct sockaddr_mISDN)) + if (!maddr || maddr->family != AF_ISDN) return -EINVAL;
lock_sock(sk);
From: Colin Ian King colin.king@canonical.com
[ Upstream commit 0a2c34f18c94b596562bf3d019fceab998b8b584 ]
Currently if a pci dma mapping failure is detected a free'd memblock address is returned rather than a NULL (that indicates an error). Fix this by ensuring NULL is returned on this error case.
Addresses-Coverity: ("Use after free") Fixes: 528f727279ae ("vxge: code cleanup and reorganization") Signed-off-by: Colin Ian King colin.king@canonical.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/neterion/vxge/vxge-config.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index bf4302e45dcd..28f765664702 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -2365,6 +2365,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size, dma_object->addr))) { vxge_os_dma_free(devh->pdev, memblock, &dma_object->acc_handle); + memblock = NULL; goto exit; }
From: Colin Ian King colin.king@canonical.com
[ Upstream commit 1dc2b3d65523780ed1972d446c76e62e13f3e8f5 ]
The err2 error return path calls qede_ptp_disable that cleans up on an error and frees ptp. After this, the free'd ptp is dereferenced when ptp->clock is set to NULL and the code falls-through to error path err1 that frees ptp again.
Fix this by calling qede_ptp_disable and exiting via an error return path that does not set ptp->clock or kfree ptp.
Addresses-Coverity: ("Write to pointer after free") Fixes: 035744975aec ("qede: Add support for PTP resource locking.") Signed-off-by: Colin Ian King colin.king@canonical.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 013ff567283c..5e574c3b625e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -490,18 +490,17 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc)
ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); if (IS_ERR(ptp->clock)) { - rc = -EINVAL; DP_ERR(edev, "PTP clock registration failed\n"); + qede_ptp_disable(edev); + rc = -EINVAL; goto err2; }
return 0;
-err2: - qede_ptp_disable(edev); - ptp->clock = NULL; err1: kfree(ptp); +err2: edev->ptp = NULL;
return rc;
From: Marc Dionne marc.dionne@auristor.com
[ Upstream commit 21bd68f196ca91fc0f3d9bd1b32f6e530e8c1c88 ]
__pagevec_release() complains loudly if any page in the vector is still locked. The pages need to be locked for generic_error_remove_page(), but that function doesn't actually unlock them.
Unlock the pages afterwards.
Signed-off-by: Marc Dionne marc.dionne@auristor.com Signed-off-by: David Howells dhowells@redhat.com Tested-by: Jonathan Billings jsbillin@umich.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/fs/afs/write.c b/fs/afs/write.c index 19c04caf3c01..e00461a6de9a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -253,6 +253,7 @@ static void afs_kill_pages(struct address_space *mapping, first = page->index + 1; lock_page(page); generic_error_remove_page(mapping, page); + unlock_page(page); }
__pagevec_release(&pv);
From: David Francis David.Francis@amd.com
[ Upstream commit c238bfe0be9ef7420f7669a69e27c8c8f4d8a568 ]
[Why] On some compositors, with two monitors attached, VT terminal switch can cause a graphical issue by the following means:
There are two streams, one for each monitor. Each stream has one plane
current state: M1:S1->P1 M2:S2->P2
The user calls for a terminal switch and a commit is made to change both planes to linear swizzle mode. In atomic check, a new dc_state is constructed with new planes on each stream
new state: M1:S1->P3 M2:S2->P4
In commit tail, each stream is committed, one at a time. The first stream (S1) updates properly, triggerring a full update and replacing the state
current state: M1:S1->P3 M2:S2->P4
The update for S2 comes in, but dc detects that there is no difference between the stream and plane in the new and current states, and so triggers a fast update. The fast update does not program swizzle, so the second monitor is corrupted
[How] Add a flag to dc_plane_state that forces full updates
When a stream undergoes a full update, set this flag on all changed planes, then clear it on the current stream
Subsequent streams will get full updates as a result
Signed-off-by: David Francis David.Francis@amd.com Signed-off-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com Reviewed-by: Roman Li Roman.Li@amd.com Acked-by: Bhawanpreet Lakha <Bhawanpreet Lakha@amd.com> Acked-by: Nicholas Kazlauskas Nicholas.Kazlauskas@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 3 +++ 2 files changed, 22 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index bb0cda727605..e3f5e5d6f0c1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1213,6 +1213,11 @@ static enum surface_update_type det_surface_update(const struct dc *dc, return UPDATE_TYPE_FULL; }
+ if (u->surface->force_full_update) { + update_flags->bits.full_update = 1; + return UPDATE_TYPE_FULL; + } + type = get_plane_info_update_type(u); elevate_update_type(&overall_type, type);
@@ -1467,6 +1472,14 @@ void dc_commit_updates_for_stream(struct dc *dc, }
dc_resource_state_copy_construct(state, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) + new_pipe->plane_state->force_full_update = true; + } }
@@ -1510,6 +1523,12 @@ void dc_commit_updates_for_stream(struct dc *dc, dc->current_state = context; dc_release_state(old);
+ for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->plane_state && pipe_ctx->stream == stream) + pipe_ctx->plane_state->force_full_update = false; + } } /*let's use current_state to update watermark etc*/ if (update_type >= UPDATE_TYPE_FULL) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6c9990bef267..4094b4f50111 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -505,6 +505,9 @@ struct dc_plane_state { struct dc_plane_status status; struct dc_context *ctx;
+ /* HACK: Workaround for forcing full reprogramming under some conditions */ + bool force_full_update; + /* private to dc_surface.c */ enum dc_irq_source irq_source; struct kref refcount;
From: Harald Freudenberger freude@linux.ibm.com
[ Upstream commit 6b1f16ba730d4c0cda1247568c3a1bf4fa3a2f2f ]
The debug feature entries have been used with up to 5 arguents (including the pointer to the format string) but there was only space reserved for 4 arguemnts. So now the registration does reserve space for 5 times a long value.
This fixes a sometime appearing weired value as the last value of an debug feature entry like this:
... pkey_sec2protkey zcrypt_send_cprb (cardnr=10 domain=12) failed with errno -2143346254
Signed-off-by: Harald Freudenberger freude@linux.ibm.com Reported-by: Christian Rund Christian.Rund@de.ibm.com Signed-off-by: Martin Schwidefsky schwidefsky@de.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/crypto/pkey_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 1b4001e0285f..b16344479959 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -45,7 +45,8 @@ static debug_info_t *debug_info;
static void __init pkey_debug_init(void) { - debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long)); + /* 5 arguments per dbf entry (including the format string ptr) */ + debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long)); debug_register_view(debug_info, &debug_sprintf_view); debug_set_level(debug_info, 3); }
From: Sami Tolvanen samitolvanen@google.com
[ Upstream commit 6a03469a1edc94da52b65478f1e00837add869a3 ]
With CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y, we compile the kernel with -fdata-sections, which also splits the .bss section.
The new section, with a new .bss.* name, which pattern gets missed by the main x86 linker script which only expects the '.bss' name. This results in the discarding of the second part and a too small, truncated .bss section and an unhappy, non-working kernel.
Use the common BSS_MAIN macro in the linker script to properly capture and merge all the generated BSS sections.
Signed-off-by: Sami Tolvanen samitolvanen@google.com Reviewed-by: Nick Desaulniers ndesaulniers@google.com Reviewed-by: Kees Cook keescook@chromium.org Cc: Borislav Petkov bp@alien8.de Cc: Kees Cook keescook@chromium.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Nicholas Piggin npiggin@gmail.com Cc: Nick Desaulniers ndesaulniers@google.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Link: http://lkml.kernel.org/r/20190415164956.124067-1-samitolvanen@google.com [ Extended the changelog. ] Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index c63bab98780c..85e6d5620188 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -372,7 +372,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) - *(.bss) + *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .;
From: Jian-Hong Pan jian-hong@endlessm.com
[ Upstream commit 0082517fa4bce073e7cf542633439f26538a14cc ]
Upon reboot, the Acer TravelMate X514-51T laptop appears to complete the shutdown process, but then it hangs in BIOS POST with a black screen.
The problem is intermittent - at some points it has appeared related to Secure Boot settings or different kernel builds, but ultimately we have not been able to identify the exact conditions that trigger the issue to come and go.
Besides, the EFI mode cannot be disabled in the BIOS of this model.
However, after extensive testing, we observe that using the EFI reboot method reliably avoids the issue in all cases.
So add a boot time quirk to use EFI reboot on such systems.
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=203119 Signed-off-by: Jian-Hong Pan jian-hong@endlessm.com Signed-off-by: Daniel Drake drake@endlessm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Borislav Petkov bp@alien8.de Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Matt Fleming matt@codeblueprint.co.uk Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Cc: linux@endlessm.com Link: http://lkml.kernel.org/r/20190412080152.3718-1-jian-hong@endlessm.com [ Fix !CONFIG_EFI build failure, clarify the code and the changelog a bit. ] Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++ include/linux/efi.h | 7 ++++++- 2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 725624b6c0c0..8fd3cedd9acc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; }
+/* + * Some machines don't handle the default ACPI reboot method and + * require the EFI reboot method: + */ +static int __init set_efi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) { + reboot_type = BOOT_EFI; + pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident); + } + return 0; +} + void __noreturn machine_real_restart(unsigned int type) { local_irq_disable(); @@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), }, }, + { /* Handle reboot issue on Acer TravelMate X514-51T */ + .callback = set_efi_reboot, + .ident = "Acer TravelMate X514-51T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"), + }, + },
/* Apple */ { /* Handle problems with rebooting on Apple MacBook5 */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 401e4b254e30..cc3391796c0b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1564,7 +1564,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size);
-bool efi_runtime_disabled(void); +#ifdef CONFIG_EFI +extern bool efi_runtime_disabled(void); +#else +static inline bool efi_runtime_disabled(void) { return true; } +#endif + extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
enum efi_secureboot_mode {
From: Peter Zijlstra peterz@infradead.org
[ Upstream commit 780e0106d468a2962b16b52fdf42898f2639e0a0 ]
Revert the following commit:
515ab7c41306: ("x86/mm: Align TLB invalidation info")
I found out (the hard way) that under some .config options (notably L1_CACHE_SHIFT=7) and compiler combinations this on-stack alignment leads to a 320 byte stack usage, which then triggers a KASAN stack warning elsewhere.
Using 320 bytes of stack space for a 40 byte structure is ludicrous and clearly not right.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Linus Torvalds torvalds@linux-foundation.org Acked-by: Nadav Amit namit@vmware.com Cc: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Dave Hansen dave.hansen@intel.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Fixes: 515ab7c41306 ("x86/mm: Align TLB invalidation info") Link: http://lkml.kernel.org/r/20190416080335.GM7905@worktop.programming.kicks-ass... [ Minor changelog edits. ] Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a6d1b0241aea..a6836ab0fcc7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -694,7 +694,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, { int cpu;
- struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { + struct flush_tlb_info info = { .mm = mm, };
From: Liran Alon liran.alon@oracle.com
[ Upstream commit 672ff6cff80ca43bf3258410d2b887036969df5f ]
Before this change, reading a VMware pseduo PMC will succeed even when PMU is not supported by guest. This can easily be seen by running kvm-unit-test vmware_backdoors with "-cpu host,-pmu" option.
Reviewed-by: Mihai Carabas mihai.carabas@oracle.com Signed-off-by: Liran Alon liran.alon@oracle.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/pmu.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 58ead7db71a3..e39741997893 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) { bool fast_mode = idx & (1u << 31); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; u64 ctr_val;
+ if (!pmu->version) + return 1; + if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
From: Paolo Bonzini pbonzini@redhat.com
[ Upstream commit 1d487e9bf8ba66a7174c56a0029c54b1eca8f99c ]
These were found with smatch, and then generalized when applicable.
Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/lapic.c | 4 +++- include/linux/kvm_host.h | 10 ++++++---- virt/kvm/irqchip.c | 5 +++-- virt/kvm/kvm_main.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3692de84c420..d2f5aa220355 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -133,6 +133,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, if (offset <= max_apic_id) { u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+ offset = array_index_nospec(offset, map->max_apic_id + 1); *cluster = &map->phys_map[offset]; *mask = dest_id & (0xffff >> (16 - cluster_size)); } else { @@ -896,7 +897,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (irq->dest_id > map->max_apic_id) { *bitmap = 0; } else { - *dst = &map->phys_map[irq->dest_id]; + u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); + *dst = &map->phys_map[dest_id]; *bitmap = 1; } return true; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 23c242a7ac52..30efb3663892 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -28,6 +28,7 @@ #include <linux/irqbypass.h> #include <linux/swait.h> #include <linux/refcount.h> +#include <linux/nospec.h> #include <asm/signal.h>
#include <linux/kvm.h> @@ -491,10 +492,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { - /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case - * the caller has read kvm->online_vcpus before (as is the case - * for kvm_for_each_vcpu, for example). - */ + int num_vcpus = atomic_read(&kvm->online_vcpus); + i = array_index_nospec(i, num_vcpus); + + /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); return kvm->vcpus[i]; } @@ -578,6 +579,7 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { + as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..0bd0683640bd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm, { struct kvm_kernel_irq_routing_entry *ei; int r; + u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
/* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and non-irqchip routing. */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[gsi], link) if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return -EINVAL;
- e->gsi = ue->gsi; + e->gsi = gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a79df88b546..e909d9907b50 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2887,12 +2887,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_device_ops *ops = NULL; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int type; int ret;
if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) return -ENODEV;
- ops = kvm_device_ops_table[cd->type]; + type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); + ops = kvm_device_ops_table[type]; if (ops == NULL) return -ENODEV;
@@ -2907,7 +2909,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, dev->kvm = kvm;
mutex_lock(&kvm->lock); - ret = ops->create(dev, cd->type); + ret = ops->create(dev, type); if (ret < 0) { mutex_unlock(&kvm->lock); kfree(dev);
From: Vitaly Kuznetsov vkuznets@redhat.com
[ Upstream commit 7a223e06b1a411cef6c4cd7a9b9a33c8d225b10e ]
In __apic_accept_irq() interface trig_mode is int and actually on some code paths it is set above u8:
kvm_apic_set_irq() extracts it from 'struct kvm_lapic_irq' where trig_mode is u16. This is done on purpose as e.g. kvm_set_msi_irq() sets it to (1 << 15) & e->msi.data
kvm_apic_local_deliver sets it to reg & (1 << 15).
Fix the immediate issue by making 'tm' into u16. We may also want to adjust __apic_accept_irq() interface and use proper sizes for vector, level, trig_mode but this is not urgent.
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0f997683404f..b3f219b7c840 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi, );
TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), + TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) - __field( __u8, tm ) + __field( __u16, tm ) __field( __u8, vec ) ),
From: Rikard Falkeborn rikard.falkeborn@gmail.com
[ Upstream commit f32c2877bcb068a718bb70094cd59ccc29d4d082 ]
There was a missing comparison with 0 when checking if type is "s64" or "u64". Therefore, the body of the if-statement was entered if "type" was "u64" or not "s64", which made the first strcmp() redundant since if type is "u64", it's not "s64".
If type is "s64", the body of the if-statement is not entered but since the remainder of the function consists of if-statements which will not be entered if type is "s64", we will just return "val", which is correct, albeit at the cost of a few more calls to strcmp(), i.e., it will behave just as if the if-statement was entered.
If type is neither "s64" or "u64", the body of the if-statement will be entered incorrectly and "val" returned. This means that any type that is checked after "s64" and "u64" is handled the same way as "s64" and "u64", i.e., the limiting of "val" to fit in for example "s8" is never reached.
This was introduced in the kernel tree when the sources were copied from trace-cmd in commit f7d82350e597 ("tools/events: Add files to create libtraceevent.a"), and in the trace-cmd repo in 1cdbae6035cei ("Implement typecasting in parser") when the function was introduced, i.e., it has always behaved the wrong way.
Detected by cppcheck.
Signed-off-by: Rikard Falkeborn rikard.falkeborn@gmail.com Reviewed-by: Steven Rostedt (VMware) rostedt@goodmis.org Cc: Tzvetomir Stoyanov tstoyanov@vmware.com Fixes: f7d82350e597 ("tools/events: Add files to create libtraceevent.a") Link: http://lkml.kernel.org/r/20190409091529.2686-1-rikard.falkeborn@gmail.com Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 10985d991ed2..6ccfd13d5cf9 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2192,7 +2192,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff;
if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val;
if (strcmp(type, "s8") == 0)
From: Tony Camuso tcamuso@redhat.com
[ Upstream commit a885bcfd152f97b25005298ab2d6b741aed9b49c ]
The intended behavior of function ipmi_hardcode_init_one() is to default to kcs interface when no type argument is presented when initializing ipmi with hard coded addresses.
However, the array of char pointers allocated on the stack by function ipmi_hardcode_init() was not inited to zeroes, so it contained stack debris.
Consequently, passing the cruft stored in this array to function ipmi_hardcode_init_one() caused a crash when it was unable to detect that the char * being passed was nonsense and tried to access the address specified by the bogus pointer.
The fix is simply to initialize the si_type array to zeroes, so if there were no type argument given to at the command line, function ipmi_hardcode_init_one() could properly default to the kcs interface.
Signed-off-by: Tony Camuso tcamuso@redhat.com Message-Id: 1554837603-40299-1-git-send-email-tcamuso@redhat.com Signed-off-by: Corey Minyard cminyard@mvista.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/char/ipmi/ipmi_si_hardcode.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/char/ipmi/ipmi_si_hardcode.c b/drivers/char/ipmi/ipmi_si_hardcode.c index 9ae2405c28bb..0c28e872ad3a 100644 --- a/drivers/char/ipmi/ipmi_si_hardcode.c +++ b/drivers/char/ipmi/ipmi_si_hardcode.c @@ -200,6 +200,8 @@ void __init ipmi_hardcode_init(void) char *str; char *si_type[SI_MAX_PARMS];
+ memset(si_type, 0, sizeof(si_type)); + /* Parse out the si_type string into its components. */ str = si_type_str; if (*str != '\0') {
From: Claudiu Manoil claudiu.manoil@nxp.com
[ Upstream commit a8fd48b50deaa20808bbf0f6685f6f1acba6a64c ]
Preemption disabled at: [<ffff000008cabd54>] dev_set_rx_mode+0x1c/0x38 Call trace: [<ffff00000808a5c0>] dump_backtrace+0x0/0x3d0 [<ffff00000808a9a4>] show_stack+0x14/0x20 [<ffff000008e6c0c0>] dump_stack+0xac/0xe4 [<ffff0000080fe76c>] ___might_sleep+0x164/0x238 [<ffff0000080fe890>] __might_sleep+0x50/0x88 [<ffff0000082261e4>] kmem_cache_alloc+0x17c/0x1d0 [<ffff000000ea0ae8>] ocelot_set_rx_mode+0x108/0x188 [mscc_ocelot_common] [<ffff000008cabcf0>] __dev_set_rx_mode+0x58/0xa0 [<ffff000008cabd5c>] dev_set_rx_mode+0x24/0x38
Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support")
Signed-off-by: Claudiu Manoil claudiu.manoil@nxp.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/mscc/ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 0bdd3c400c92..10291198decd 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -605,7 +605,7 @@ static int ocelot_mact_mc_add(struct ocelot_port *port, struct netdev_hw_addr *hw_addr) { struct ocelot *ocelot = port->ocelot; - struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL); + struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
if (!ha) return -ENOMEM;
From: Baoquan He bhe@redhat.com
[ Upstream commit ec3937107ab43f3e8b2bc9dad95710043c462ff7 ]
kernel_randomize_memory() uses __PHYSICAL_MASK_SHIFT to calculate the maximum amount of system RAM supported. The size of the direct mapping section is obtained from the smaller one of the below two values:
(actual system RAM size + padding size) vs (max system RAM size supported)
This calculation is wrong since commit
b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52").
In it, __PHYSICAL_MASK_SHIFT was changed to be 52, regardless of whether the kernel is using 4-level or 5-level page tables. Thus, it will always use 4 PB as the maximum amount of system RAM, even in 4-level paging mode where it should actually be 64 TB.
Thus, the size of the direct mapping section will always be the sum of the actual system RAM size plus the padding size.
Even when the amount of system RAM is 64 TB, the following layout will still be used. Obviously KALSR will be weakened significantly.
|____|_______actual RAM_______|_padding_|______the rest_______| 0 64TB ~120TB
Instead, it should be like this:
|____|_______actual RAM_______|_________the rest______________| 0 64TB ~120TB
The size of padding region is controlled by CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING, which is 10 TB by default.
The above issue only exists when CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING is set to a non-zero value, which is the case when CONFIG_MEMORY_HOTPLUG is enabled. Otherwise, using __PHYSICAL_MASK_SHIFT doesn't affect KASLR.
Fix it by replacing __PHYSICAL_MASK_SHIFT with MAX_PHYSMEM_BITS.
[ bp: Massage commit message. ]
Fixes: b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52") Signed-off-by: Baoquan He bhe@redhat.com Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Thomas Garnier thgarnie@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Andy Lutomirski luto@kernel.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Ingo Molnar mingo@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: frank.ramsay@hpe.com Cc: herbert@gondor.apana.org.au Cc: kirill@shutemov.name Cc: mike.travis@hpe.com Cc: thgarnie@google.com Cc: x86-ml x86@kernel.org Cc: yamada.masahiro@socionext.com Link: https://lkml.kernel.org/r/20190417083536.GE7065@MiWiFi-R3L-srv Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/mm/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 61db77b0eda9..0988971069c9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -93,7 +93,7 @@ void __init kernel_randomize_memory(void) if (!kaslr_memory_enabled()) return;
- kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); + kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT); kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
/*
From: Christoph Hellwig hch@lst.de
[ Upstream commit 144ec97493af34efdb77c5aba146e9c7de8d0a06 ]
Instead of relying on the now removed NULL argument to pci_alloc_consistent, switch to the generic DMA API, and store the struct device so that we can pass it.
Fixes: 4167b2ad5182 ("PCI: Remove NULL device handling from PCI DMA API") Reported-by: Matthew Whitehead tedheadster@gmail.com Signed-off-by: Christoph Hellwig hch@lst.de Tested-by: Matthew Whitehead tedheadster@gmail.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/aic7xxx/aic7770_osm.c | 1 + drivers/scsi/aic7xxx/aic7xxx.h | 1 + drivers/scsi/aic7xxx/aic7xxx_osm.c | 10 ++++------ drivers/scsi/aic7xxx/aic7xxx_osm_pci.c | 1 + 4 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c index 3d401d02c019..bdd177e3d762 100644 --- a/drivers/scsi/aic7xxx/aic7770_osm.c +++ b/drivers/scsi/aic7xxx/aic7770_osm.c @@ -91,6 +91,7 @@ aic7770_probe(struct device *dev) ahc = ahc_alloc(&aic7xxx_driver_template, name); if (ahc == NULL) return (ENOMEM); + ahc->dev = dev; error = aic7770_config(ahc, aic7770_ident_table + edev->id.driver_data, eisaBase); if (error != 0) { diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h index 4ce4e903a759..7f6e83296dfa 100644 --- a/drivers/scsi/aic7xxx/aic7xxx.h +++ b/drivers/scsi/aic7xxx/aic7xxx.h @@ -949,6 +949,7 @@ struct ahc_softc { * Platform specific device information. */ ahc_dev_softc_t dev_softc; + struct device *dev;
/* * Bus specific device information. diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index c6be3aeb302b..306d0bf33478 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -861,8 +861,8 @@ int ahc_dmamem_alloc(struct ahc_softc *ahc, bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { - *vaddr = pci_alloc_consistent(ahc->dev_softc, - dmat->maxsize, mapp); + /* XXX: check if we really need the GFP_ATOMIC and unwind this mess! */ + *vaddr = dma_alloc_coherent(ahc->dev, dmat->maxsize, mapp, GFP_ATOMIC); if (*vaddr == NULL) return ENOMEM; return 0; @@ -872,8 +872,7 @@ void ahc_dmamem_free(struct ahc_softc *ahc, bus_dma_tag_t dmat, void* vaddr, bus_dmamap_t map) { - pci_free_consistent(ahc->dev_softc, dmat->maxsize, - vaddr, map); + dma_free_coherent(ahc->dev, dmat->maxsize, vaddr, map); }
int @@ -1124,8 +1123,7 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa
host->transportt = ahc_linux_transport_template;
- retval = scsi_add_host(host, - (ahc->dev_softc ? &ahc->dev_softc->dev : NULL)); + retval = scsi_add_host(host, ahc->dev); if (retval) { printk(KERN_WARNING "aic7xxx: scsi_add_host failed\n"); scsi_host_put(host); diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c index 0fc14dac7070..717d8d1082ce 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c @@ -250,6 +250,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } ahc->dev_softc = pci; + ahc->dev = &pci->dev; error = ahc_pci_config(ahc, entry); if (error != 0) { ahc_free(ahc);
From: Johannes Weiner hannes@cmpxchg.org
[ Upstream commit 3b991208b897f52507168374033771a984b947b1 ]
During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's thrashing on the node that is about to be reclaimed. But when cgroups are enabled, we suddenly ignore the node scope and use the cgroup scope only. The result is that pressure bleeds between NUMA nodes depending on whether cgroups are merely compiled into Linux. This behavioral difference is unexpected and undesirable.
When the refault adaptivity of the inactive list was first introduced, there were no statistics at the lruvec level - the intersection of node and memcg - so it was better than nothing.
But now that we have that infrastructure, use lruvec_page_state() to make the list balancing decision always NUMA aware.
[hannes@cmpxchg.org: fix bisection hole] Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org Fixes: 2a2e48854d70 ("mm: vmscan: fix IO/refault regression in cache workingset transition") Signed-off-by: Johannes Weiner hannes@cmpxchg.org Reviewed-by: Shakeel Butt shakeelb@google.com Cc: Roman Gushchin guro@fb.com Cc: Michal Hocko mhocko@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/vmscan.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3830066018c1..ee545d1e9894 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2190,7 +2190,6 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct mem_cgroup *memcg, struct scan_control *sc, bool actual_reclaim) { enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; @@ -2211,16 +2210,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - /* * When refaults are being observed, it means a new workingset * is being established. Disable active list protection to get * rid of the stale workingset quickly. */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); if (file && actual_reclaim && lruvec->refaults != refaults) { inactive_ratio = 0; } else { @@ -2241,12 +2236,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, }
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct mem_cgroup *memcg, - struct scan_control *sc) + struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), - memcg, sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2346,7 +2339,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anonymous pages on the LRU in eligible zones. * Otherwise, the small LRU gets thrashed. */ - if (!inactive_list_is_low(lruvec, false, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, false, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_ANON; @@ -2364,7 +2357,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, true, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2517,7 +2510,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, memcg, sc); + lruvec, sc); } }
@@ -2584,7 +2577,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2982,12 +2975,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) unsigned long refaults; struct lruvec *lruvec;
- if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - lruvec = mem_cgroup_lruvec(pgdat, memcg); + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); lruvec->refaults = refaults; } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } @@ -3344,7 +3333,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON);
From: Dan Williams dan.j.williams@intel.com
[ Upstream commit 6041186a32585fc7a1d0f6cfe2f138b05fdc3c82 ]
When a module option, or core kernel argument, toggles a static-key it requires jump labels to be initialized early. While x86, PowerPC, and ARM64 arrange for jump_label_init() to be called before parse_args(), ARM does not.
Kernel command line: rdinit=/sbin/init page_alloc.shuffle=1 panic=-1 console=ttyAMA0,115200 page_alloc.shuffle=1 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at ./include/linux/jump_label.h:303 page_alloc_shuffle+0x12c/0x1ac static_key_enable(): static key 'page_alloc_shuffle_key+0x0/0x4' used before call to jump_label_init() Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc4-next-20190410-00003-g3367c36ce744 #1 Hardware name: ARM Integrator/CP (Device Tree) [<c0011c68>] (unwind_backtrace) from [<c000ec48>] (show_stack+0x10/0x18) [<c000ec48>] (show_stack) from [<c07e9710>] (dump_stack+0x18/0x24) [<c07e9710>] (dump_stack) from [<c001bb1c>] (__warn+0xe0/0x108) [<c001bb1c>] (__warn) from [<c001bb88>] (warn_slowpath_fmt+0x44/0x6c) [<c001bb88>] (warn_slowpath_fmt) from [<c0b0c4a8>] (page_alloc_shuffle+0x12c/0x1ac) [<c0b0c4a8>] (page_alloc_shuffle) from [<c0b0c550>] (shuffle_store+0x28/0x48) [<c0b0c550>] (shuffle_store) from [<c003e6a0>] (parse_args+0x1f4/0x350) [<c003e6a0>] (parse_args) from [<c0ac3c00>] (start_kernel+0x1c0/0x488)
Move the fallback call to jump_label_init() to occur before parse_args().
The redundant calls to jump_label_init() in other archs are left intact in case they have static key toggling use cases that are even earlier than option parsing.
Link: http://lkml.kernel.org/r/155544804466.1032396.13418949511615676665.stgit@dwi... Signed-off-by: Dan Williams dan.j.williams@intel.com Reported-by: Guenter Roeck groeck@google.com Reviewed-by: Kees Cook keescook@chromium.org Cc: Mathieu Desnoyers mathieu.desnoyers@efficios.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Mike Rapoport rppt@linux.ibm.com Cc: Russell King rmk@armlinux.org.uk Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- init/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/init/main.c b/init/main.c index e083fac08aed..020972fed117 100644 --- a/init/main.c +++ b/init/main.c @@ -568,6 +568,8 @@ asmlinkage __visible void __init start_kernel(void) page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line); + /* parameters may set static keys */ + jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, @@ -577,8 +579,6 @@ asmlinkage __visible void __init start_kernel(void) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg);
- jump_label_init(); - /* * These use large bootmem allocations and must precede * kmem_cache_init()
From: Florian Westphal fw@strlen.de
[ Upstream commit becf2319f320cae43e20cf179cc51a355a0deb5f ]
When an icmp error such as pkttoobig is received, conntrack checks if the "inner" header (header of packet that did not fit link mtu) is matches an existing connection, and, if so, sets that packet as being related to the conntrack entry it found.
It was recently reported that this "related" setting also works if the inner header is from another, different connection (i.e., artificial/forged icmp error).
Add a test, followup patch will add additional "inner dst matches outer dst in reverse direction" check before setting related state.
Link: https://www.synacktiv.com/posts/systems/icmp-reachable.html Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/netfilter/Makefile | 2 +- .../netfilter/conntrack_icmp_related.sh | 283 ++++++++++++++++++ 2 files changed, 284 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/conntrack_icmp_related.sh
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index c9ff2b47bd1c..a37cb1192c6a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..b48e1833bc89 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + for i in 1 2;do ip netns del nsclient$i;done + for i in 1 2;do ip netns del nsrouter$i;done +} + +ipv4() { + echo -n 192.168.$1.2 +} + +ipv6 () { + echo -n dead:$1::2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + check_counter $n "unknown" "$expect" + if [ $? -ne 0 ] ;then + return 1 + fi + done + + return 0 +} + +for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + ip netns add $n + ip -net $n link set lo up +done + +DEV=veth0 +ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1 +DEV=veth0 +ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2 + +DEV=veth0 +ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2 + +DEV=veth0 +for i in 1 2; do + ip -net nsclient$i link set $DEV up + ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV + ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV +done + +ip -net nsrouter1 link set eth1 up +ip -net nsrouter1 link set veth0 up + +ip -net nsrouter2 link set eth1 up +ip -net nsrouter2 link set eth2 up + +ip -net nsclient1 route add default via 192.168.1.1 +ip -net nsclient1 -6 route add default via dead:1::1 + +ip -net nsclient2 route add default via 192.168.2.1 +ip -net nsclient2 route add default via dead:2::1 + +i=3 +ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1 +ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0 +ip -net nsrouter1 addr add dead:1::1/64 dev eth1 +ip -net nsrouter1 addr add dead:3::1/64 dev veth0 +ip -net nsrouter1 route add default via 192.168.3.10 +ip -net nsrouter1 -6 route add default via dead:3::10 + +ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1 +ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2 +ip -net nsrouter2 addr add dead:2::1/64 dev eth1 +ip -net nsrouter2 addr add dead:3::10/64 dev eth2 +ip -net nsrouter2 route add default via 192.168.3.1 +ip -net nsrouter2 route add default via dead:3::1 + +sleep 2 +for i in 4 6; do + ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in nsrouter1 nsrouter2; do +ip netns exec $netns nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain forward { + type filter hook forward priority 0; policy accept; + meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept + meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept + meta l4proto { icmp, icmpv6 } ct state new,established accept + counter name "unknown" drop + } +} +EOF +done + +ip netns exec nsclient1 nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + counter name "unknown" drop + } +} +EOF + +ip netns exec nsclient2 nft -f - <<EOF +table inet filter { + counter unknown { } + counter new { } + counter established { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept + counter name "unknown" drop + } + chain output { + type filter hook output priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" + counter name "unknown" drop + } +} +EOF + + +# make sure NAT core rewrites adress of icmp error if nat is used according to +# conntrack nat information (icmp error will be directed at nsrouter1 address, +# but it needs to be routed to nsclient1 address). +ip netns exec nsrouter1 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip protocol icmp oifname "veth0" counter masquerade + } +} +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip6 nexthdr icmpv6 oifname "veth0" counter masquerade + } +} +EOF + +ip netns exec nsrouter2 ip link set eth1 mtu 1280 +ip netns exec nsclient2 ip link set veth0 mtu 1280 +sleep 1 + +ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi +ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi + +check_unknown +if [ $? -ne 0 ]; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in nsrouter1 nsrouter2 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +check_counter nsclient2 "new" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +check_counter "nsrouter2" "related" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +cleanup +exit $ret
From: Julian Anastasov ja@ssi.bg
[ Upstream commit 0261ea1bd1eb0da5c0792a9119b8655cf33c80a3 ]
We can receive ICMP errors from client or from tunneling real server. While the former can be scheduled to real server, the latter should not be scheduled, they are decapsulated only when existing connection is found.
Fixes: 6044eeffafbe ("ipvs: attempt to schedule icmp packets") Signed-off-by: Julian Anastasov ja@ssi.bg Signed-off-by: Simon Horman horms@verge.net.au Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3f963ea22277..a42c1bc7c698 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1647,7 +1647,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v;
- if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
From: Florian Westphal fw@strlen.de
[ Upstream commit 3c79107631db1f7fd32cf3f7368e4672004a3010 ]
else, we leak the addresses to userspace via ctnetlink events and dumps.
Compute an ID on demand based on the immutable parts of nf_conn struct.
Another advantage compared to using an address is that there is no immediate re-use of the same ID in case the conntrack entry is freed and reallocated again immediately.
Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID") Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID") Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 35 ++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++---- 3 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 7e012312cd61..f45141bdbb83 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -313,6 +313,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl);
+u32 nf_ct_get_id(const struct nf_conn *ct); + static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9a249478abf2..27eff89fad01 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> @@ -424,6 +425,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 036207ecaf16..47e5a076522d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/siphash.h>
#include <linux/netfilter.h> #include <net/netlink.h> @@ -487,7 +488,9 @@ static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct)
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0;
@@ -1275,8 +1278,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, }
if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2675,6 +2679,25 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
static const union nf_inet_addr any_addr;
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2722,7 +2745,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -3027,7 +3050,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; }
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit 33d1c018179d0a30c39cc5f1682b77867282694b ]
I believe that "hook->num" can be up to UINT_MAX. Shifting more than 31 bits would is undefined in C but in practice it would lead to shift wrapping. That would lead to an array overflow in nf_tables_addchain():
ops->hook = hook.type->hooks[ops->hooknum];
Fixes: fe19c04ca137 ("netfilter: nf_tables: remove nhooks field from struct nft_af_info") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1af54119bafc..f272f9538c44 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1496,7 +1496,7 @@ static int nft_chain_parse_hook(struct net *net, if (IS_ERR(type)) return PTR_ERR(type); } - if (!(type->hook_mask & (1 << hook->num))) + if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP;
if (type->type == NFT_CHAIN_T_NAT &&
From: Petr Å tetiar ynezz@true.cz
[ Upstream commit a1e8783db8e0d58891681bc1e6d9ada66eae8e20 ]
Currently it's not possible to use perf on ath79 due to genirq flags mismatch happening on static virtual IRQ 13 which is used for performance counters hardware IRQ 5.
On TP-Link Archer C7v5:
CPU0 2: 0 MIPS 2 ath9k 4: 318 MIPS 4 19000000.eth 7: 55034 MIPS 7 timer 8: 1236 MISC 3 ttyS0 12: 0 INTC 1 ehci_hcd:usb1 13: 0 gpio-ath79 2 keys 14: 0 gpio-ath79 5 keys 15: 31 AR724X PCI 1 ath10k_pci
$ perf top genirq: Flags mismatch irq 13. 00014c83 (mips_perf_pmu) vs. 00002003 (keys)
On TP-Link Archer C7v4:
CPU0 4: 0 MIPS 4 19000000.eth 5: 7135 MIPS 5 1a000000.eth 7: 98379 MIPS 7 timer 8: 30 MISC 3 ttyS0 12: 90028 INTC 0 ath9k 13: 5520 INTC 1 ehci_hcd:usb1 14: 4623 INTC 2 ehci_hcd:usb2 15: 32844 AR724X PCI 1 ath10k_pci 16: 0 gpio-ath79 16 keys 23: 0 gpio-ath79 23 keys
$ perf top genirq: Flags mismatch irq 13. 00014c80 (mips_perf_pmu) vs. 00000080 (ehci_hcd:usb1)
This problem is happening, because currently statically assigned virtual IRQ 13 for performance counters is not claimed during the initialization of MIPS PMU during the bootup, so the IRQ subsystem doesn't know, that this interrupt isn't available for further use.
So this patch fixes the issue by simply booking hardware IRQ 5 for MIPS PMU.
Tested-by: Kevin 'ldir' Darbyshire-Bryant ldir@darbyshire-bryant.me.uk Signed-off-by: Petr Å tetiar ynezz@true.cz Acked-by: John Crispin john@phrozen.org Acked-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Paul Burton paul.burton@mips.com Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle ralf@linux-mips.org Cc: James Hogan jhogan@kernel.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Jason Cooper jason@lakedaemon.net Signed-off-by: Sasha Levin sashal@kernel.org --- arch/mips/ath79/setup.c | 6 ------ drivers/irqchip/irq-ath79-misc.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 4c7a93f4039a..7c0b2e6cdfbd 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -211,12 +211,6 @@ const char *get_system_type(void) return ath79_sys_type; }
-int get_c0_perfcount_int(void) -{ - return ATH79_MISC_IRQ(5); -} -EXPORT_SYMBOL_GPL(get_c0_perfcount_int); - unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index aa7290784636..0390603170b4 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -22,6 +22,15 @@ #define AR71XX_RESET_REG_MISC_INT_ENABLE 4
#define ATH79_MISC_IRQ_COUNT 32 +#define ATH79_MISC_PERF_IRQ 5 + +static int ath79_perfcount_irq; + +int get_c0_perfcount_int(void) +{ + return ath79_perfcount_irq; +} +EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
static void ath79_misc_irq_handler(struct irq_desc *desc) { @@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init( { void __iomem *base = domain->host_data;
+ ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ); + /* Disable and clear all interrupts */ __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 27b141fc234a3670d21bd742c35d7205d03cbb3a ]
clang points out that the return code from this function is undefined for one of the error paths:
../drivers/s390/net/ctcm_main.c:1595:7: warning: variable 'result' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1638:9: note: uninitialized use occurs here return result; ^~~~~~ ../drivers/s390/net/ctcm_main.c:1595:3: note: remove the 'if' if its condition is always false if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1539:12: note: initialize the variable 'result' to silence this warning int result; ^
Make it return -ENODEV here, as in the related failure cases. gcc has a known bug in underreporting some of these warnings when it has already eliminated the assignment of the return code based on some earlier optimization step.
Reviewed-by: Nathan Chancellor natechancellor@gmail.com Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Julian Wiedmann jwi@linux.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/net/ctcm_main.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 7617d21cb296..f63c5c871d3d 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev) if (priv->channel[direction] == NULL) { if (direction == CTCM_WRITE) channel_free(priv->channel[CTCM_READ]); + result = -ENODEV; goto out_dev; } priv->channel[direction]->netdev = dev;
From: Paul Kocialkowski paul.kocialkowski@bootlin.com
[ Upstream commit 02b92adbe33e6dbd15dc6e32540b22f47c4ff0a2 ]
Our sun4i_drv_unbind gets the drm device using dev_get_drvdata. However, that driver data is never set in sun4i_drv_bind.
Set it there to avoid getting a NULL pointer at unbind time.
Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Signed-off-by: Maxime Ripard maxime.ripard@bootlin.com Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-3-paul.koc... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 8b0cd08034e0..7cac01c72c02 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -92,6 +92,8 @@ static int sun4i_drv_bind(struct device *dev) ret = -ENOMEM; goto free_drm; } + + dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list);
From: Paul Kocialkowski paul.kocialkowski@bootlin.com
[ Upstream commit f5a9ed867c83875546c9aadd4ed8e785e9adcc3c ]
For our component-backed driver to be properly removed, we need to delete the component master in sun4i_drv_remove and make sure to call component_unbind_all in the master's unbind so that all components are unbound when the master is.
Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Signed-off-by: Maxime Ripard maxime.ripard@bootlin.com Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-4-paul.koc... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/sun4i/sun4i_drv.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 7cac01c72c02..62703630090a 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -160,6 +160,8 @@ static void sun4i_drv_unbind(struct device *dev) drm_mode_config_cleanup(drm); of_reserved_mem_device_release(dev); drm_dev_put(drm); + + component_unbind_all(dev, NULL); }
static const struct component_master_ops sun4i_drv_master_ops = { @@ -407,6 +409,8 @@ static int sun4i_drv_probe(struct platform_device *pdev)
static int sun4i_drv_remove(struct platform_device *pdev) { + component_master_del(&pdev->dev, &sun4i_drv_master_ops); + return 0; }
From: Po-Hsu Lin po-hsu.lin@canonical.com
[ Upstream commit 30c04d796b693e22405c38e9b78e9a364e4c77e6 ]
The run_netsocktests will be marked as passed regardless the actual test result from the ./socket:
selftests: net: run_netsocktests ======================================== -------------------- running socket test -------------------- [FAIL] ok 1..6 selftests: net: run_netsocktests [PASS]
This is because the test script itself has been successfully executed. Fix this by exit 1 when the test failed.
Signed-off-by: Po-Hsu Lin po-hsu.lin@canonical.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/net/run_netsocktests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index b093f39c298c..14e41faf2c57 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -7,7 +7,7 @@ echo "--------------------" ./socket if [ $? -ne 0 ]; then echo "[FAIL]" + exit 1 else echo "[PASS]" fi -
From: Andrei Vagin avagin@gmail.com
[ Upstream commit d48668052b2603b6262459625c86108c493588dd ]
It doesn't log a packet if sysctl_log_invalid isn't equal to protonum OR sysctl_log_invalid isn't equal to IPPROTO_RAW. This sentence is always true. I believe we need to replace OR to AND.
Cc: Florian Westphal fw@strlen.de Fixes: c4f3db1595827 ("netfilter: conntrack: add and use nf_l4proto_log_invalid") Signed-off-by: Andrei Vagin avagin@gmail.com Acked-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_conntrack_proto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 51c5d7eec0a3..e903ef9b96cf 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -86,7 +86,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, struct va_format vaf; va_list args;
- if (net->ct.sysctl_log_invalid != protonum || + if (net->ct.sysctl_log_invalid != protonum && net->ct.sysctl_log_invalid != IPPROTO_RAW) return;
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit d4fad0a426c6e26f48c9a7cdd21a7fe9c198d645 ]
Initialize the flow input colorspaces to unknown and reset to that value when the channel gets disabled. This avoids the state getting mixed up with a previous mode.
Also keep the CSC settings for the background flow intact when disabling the foreground flow.
Root-caused-by: Jonathan Marek jonathan@marek.ca Signed-off-by: Lucas Stach l.stach@pengutronix.de Signed-off-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/ipu-v3/ipu-dp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c index 9b2b3fa479c4..5e44ff1f2085 100644 --- a/drivers/gpu/ipu-v3/ipu-dp.c +++ b/drivers/gpu/ipu-v3/ipu-dp.c @@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp, ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs, DP_COM_CONF_CSC_DEF_BOTH); } else { - if (flow->foreground.in_cs == flow->out_cs) + if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN || + flow->foreground.in_cs == flow->out_cs) /* * foreground identical to output, apply color * conversion on background @@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) struct ipu_dp_priv *priv = flow->priv; u32 reg, csc;
+ dp->in_cs = IPUV3_COLORSPACE_UNKNOWN; + if (!dp->foreground) return;
@@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
reg = readl(flow->base + DP_COM_CONF); csc = reg & DP_COM_CONF_CSC_DEF_MASK; - if (csc == DP_COM_CONF_CSC_DEF_FG) - reg &= ~DP_COM_CONF_CSC_DEF_MASK; + reg &= ~DP_COM_CONF_CSC_DEF_MASK; + if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG) + reg |= DP_COM_CONF_CSC_DEF_BG;
reg &= ~DP_COM_CONF_FG_EN; writel(reg, flow->base + DP_COM_CONF); @@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) mutex_init(&priv->mutex);
for (i = 0; i < IPUV3_NUM_FLOWS; i++) { + priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN; + priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN; priv->flow[i].foreground.foreground = true; priv->flow[i].base = priv->base + ipu_dp_flow_base[i]; priv->flow[i].priv = priv;
From: Lucas Stach l.stach@pengutronix.de
[ Upstream commit 7bcde275eb1d0ac8793c77c7e666a886eb16633d ]
In order to make sure that the plane color space gets reset correctly.
Signed-off-by: Lucas Stach l.stach@pengutronix.de Signed-off-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/imx/ipuv3-crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 7d4b710b837a..11e2dcdd6b18 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -78,7 +78,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, if (disable_partial) ipu_plane_disable(ipu_crtc->plane[1], true); if (disable_full) - ipu_plane_disable(ipu_crtc->plane[0], false); + ipu_plane_disable(ipu_crtc->plane[0], true); }
static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
From: Tigran Tadevosyan tigran.tadevosyan@arm.com
[ Upstream commit c3143967807adb1357c36b68a7563fc0c4e1f615 ]
When CONFIG_ARM_MPU is not defined, the base address of v7M SCB register is not initialized with correct value. This prevents enabling I/D caches when the L1 cache poilcy is applied in kernel.
Fixes: 3c24121039c9da14692eb48f6e39565b28c0f3cf ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") Signed-off-by: Tigran Tadevosyan tigran.tadevosyan@arm.com Signed-off-by: Vladimir Murzin vladimir.murzin@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/kernel/head-nommu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index ec29de250076..cab89479d15e 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -133,9 +133,9 @@ __secondary_data: */ .text __after_proc_init: -#ifdef CONFIG_ARM_MPU M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) +#ifdef CONFIG_ARM_MPU M_CLASS(ldr r3, [r12, 0x50]) AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0 and r3, r3, #(MMFR0_PMSA) @ PMSA field
From: Daniel Gomez dagmcr@gmail.com
[ Upstream commit 2f23a2a768bee7ad2ff1e9527c3f7e279e794a46 ]
Add missing <of_device_id> table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS.
Before this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995
After this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995 alias: of:N*T*Cmicrel,ksz8795C* alias: of:N*T*Cmicrel,ksz8795 alias: of:N*T*Cmicrel,ksz8864C* alias: of:N*T*Cmicrel,ksz8864 alias: of:N*T*Cmicrel,ks8995C* alias: of:N*T*Cmicrel,ks8995
Reported-by: Javier Martinez Canillas javier@dowhile0.org Signed-off-by: Daniel Gomez dagmcr@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/phy/spi_ks8995.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index f17b3441779b..d8ea4147dfe7 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -162,6 +162,14 @@ static const struct spi_device_id ks8995_id[] = { }; MODULE_DEVICE_TABLE(spi, ks8995_id);
+static const struct of_device_id ks8895_spi_of_match[] = { + { .compatible = "micrel,ks8995" }, + { .compatible = "micrel,ksz8864" }, + { .compatible = "micrel,ksz8795" }, + { }, + }; +MODULE_DEVICE_TABLE(of, ks8895_spi_of_match); + static inline u8 get_chip_id(u8 val) { return (val >> ID1_CHIPID_S) & ID1_CHIPID_M; @@ -529,6 +537,7 @@ static int ks8995_remove(struct spi_device *spi) static struct spi_driver ks8995_driver = { .driver = { .name = "spi-ks8995", + .of_match_table = of_match_ptr(ks8895_spi_of_match), }, .probe = ks8995_probe, .remove = ks8995_remove,
From: Daniel Gomez dagmcr@gmail.com
[ Upstream commit d04830531d0c4a99c897a44038e5da3d23331d2f ]
Add missing <of_device_id> table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS.
Before this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf
After this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf alias: of:N*T*Cst,st95hfC* alias: of:N*T*Cst,st95hf
Reported-by: Javier Martinez Canillas javier@dowhile0.org Signed-off-by: Daniel Gomez dagmcr@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nfc/st95hf/core.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2b26f762fbc3..01acb6e53365 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = { }; MODULE_DEVICE_TABLE(spi, st95hf_id);
+static const struct of_device_id st95hf_spi_of_match[] = { + { .compatible = "st,st95hf" }, + { }, +}; +MODULE_DEVICE_TABLE(of, st95hf_spi_of_match); + static int st95hf_probe(struct spi_device *nfc_spi_dev) { int ret; @@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = { .driver = { .name = "st95hf", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(st95hf_spi_of_match), }, .id_table = st95hf_id, .probe = st95hf_probe,
From: Qian Cai cai@lca.pw
[ Upstream commit 0d02113b31b2017dd349ec9df2314e798a90fa6e ]
The first kmemleak_scan() call after boot would trigger the crash below because this callpath:
kernel_init free_initmem mem_encrypt_free_decrypted_mem free_init_pages
unmaps memory inside the .bss when DEBUG_PAGEALLOC=y.
kmemleak_init() will register the .data/.bss sections and then kmemleak_scan() will scan those addresses and dereference them looking for pointer references. If free_init_pages() frees and unmaps pages in those sections, kmemleak_scan() will crash if referencing one of those addresses:
BUG: unable to handle kernel paging request at ffffffffbd402000 CPU: 12 PID: 325 Comm: kmemleak Not tainted 5.1.0-rc4+ #4 RIP: 0010:scan_block Call Trace: scan_gray_list kmemleak_scan kmemleak_scan_thread kthread ret_from_fork
Since kmemleak_free_part() is tolerant to unknown objects (not tracked by kmemleak), it is fine to call it from free_init_pages() even if not all address ranges passed to this function are known to kmemleak.
[ bp: Massage. ]
Fixes: b3f0907c71e0 ("x86/mm: Add .bss..decrypted section to hold shared variables") Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Catalin Marinas catalin.marinas@arm.com Cc: Andy Lutomirski luto@kernel.org Cc: Brijesh Singh brijesh.singh@amd.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: x86-ml x86@kernel.org Link: https://lkml.kernel.org/r/20190423165811.36699-1-cai@lca.pw Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/mm/init.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d883869437b5..fb5f29c60019 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -6,6 +6,7 @@ #include <linux/bootmem.h> /* for max_low_pfn */ #include <linux/swapfile.h> #include <linux/swapops.h> +#include <linux/kmemleak.h>
#include <asm/set_memory.h> #include <asm/e820/api.h> @@ -767,6 +768,11 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) if (debug_pagealloc_enabled()) { pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", begin, end - 1); + /* + * Inform kmemleak about the hole in the memory since the + * corresponding pages will be unmapped. + */ + kmemleak_free_part((void *)begin, end - begin); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); } else { /*
From: Paul Kocialkowski paul.kocialkowski@bootlin.com
[ Upstream commit e02bc29b2cfa7806830d6da8b2322cddd67e8dfe ]
Our components may still be using the DRM device driver (if only to access our driver's private data), so make sure to unbind them before the final drm_dev_put.
Also release our reserved memory after component unbind instead of before to match reverse creation order.
Fixes: f5a9ed867c83 ("drm/sun4i: Fix component unbinding and component master deletion") Signed-off-by: Paul Kocialkowski paul.kocialkowski@bootlin.com Reviewed-by: Chen-Yu Tsai wens@csie.org Link: https://patchwork.freedesktop.org/patch/msgid/20190424090413.6918-1-paul.koc... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/sun4i/sun4i_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 62703630090a..57f61ec4bc6b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -158,10 +158,11 @@ static void sun4i_drv_unbind(struct device *dev) drm_kms_helper_poll_fini(drm); sun4i_framebuffer_free(drm); drm_mode_config_cleanup(drm); - of_reserved_mem_device_release(dev); - drm_dev_put(drm);
component_unbind_all(dev, NULL); + of_reserved_mem_device_release(dev); + + drm_dev_put(drm); }
static const struct component_master_ops sun4i_drv_master_ops = {
From: Pan Bian bianpan2016@163.com
[ Upstream commit bce1a78423961fce676ac65540a31b6ffd179e6d ]
The RMI4 function structure has been released in rmi_register_function if error occurs. However, it will be released again in the function rmi_create_function, which may result in a double-free bug.
Signed-off-by: Pan Bian bianpan2016@163.com Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/input/rmi4/rmi_driver.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index fc3ab93b7aea..7fb358f96195 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
error = rmi_register_function(fn); if (error) - goto err_put_fn; + return error;
if (pdt->function_number == 0x01) data->f01_container = fn; @@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev, list_add_tail(&fn->node, &data->function_list);
return RMI_SCAN_CONTINUE; - -err_put_fn: - put_device(&fn->dev); - return error; }
void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
From: Nikolay Borisov nborisov@suse.com
[ Upstream commit a3d46aea46f99d134b4e0726e4826b824c3e5980 ]
Recent multi-page biovec rework allowed creation of bios that can span large regions - up to 128 megabytes in the case of btrfs. OTOH btrfs' submission path currently allocates a contiguous array to store the checksums for every bio submitted. This means we can request up to (128mb / BTRFS_SECTOR_SIZE) * 4 bytes + 32bytes of memory from kmalloc. On busy systems with possibly fragmented memory said kmalloc can fail which will trigger BUG_ON due to improper error handling IO submission context in btrfs.
Until error handling is improved or bios in btrfs limited to a more manageable size (e.g. 1m) let's use kvmalloc to fallback to vmalloc for such large allocations. There is no hard requirement that the memory allocated for checksums during IO submission has to be contiguous, but this is a simple fix that does not require several non-contiguous allocations.
For small writes this is unlikely to have any visible effect since kmalloc will still satisfy allocation requests as usual. For larger requests the code will just fallback to vmalloc.
We've performed evaluation on several workload types and there was no significant difference kmalloc vs kvmalloc.
Signed-off-by: Nikolay Borisov nborisov@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/file-item.c | 15 +++++++++++---- fs/btrfs/ordered-data.c | 3 ++- 2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ba74827beb32..2ba8de8c0d17 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -7,6 +7,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/highmem.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -434,9 +435,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, unsigned long this_sum_bytes = 0; int i; u64 offset; + unsigned nofs_flag; + + nofs_flag = memalloc_nofs_save(); + sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), + GFP_KERNEL); + memalloc_nofs_restore(nofs_flag);
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), - GFP_NOFS); if (!sums) return BLK_STS_RESOURCE;
@@ -479,8 +484,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
bytes_left = bio->bi_iter.bi_size - total_bytes;
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left), - GFP_NOFS); + nofs_flag = memalloc_nofs_save(); + sums = kvzalloc(btrfs_ordered_sum_size(fs_info, + bytes_left), GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); BUG_ON(!sums); /* -ENOMEM */ sums->len = bytes_left; ordered = btrfs_lookup_ordered_extent(inode, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 0c4ef208b8b9..b92edf0600e3 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -6,6 +6,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/writeback.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" @@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); list_del(&sum->list); - kfree(sum); + kvfree(sum); } kmem_cache_free(btrfs_ordered_extent_cache, entry); }
From: Lijun Ou oulijun@huawei.com
[ Upstream commit 2557fabd6e29f349bfa0ac13f38ac98aa5eafc74 ]
When the maximum send wr delivered by the user is zero, the qp does not have a sq.
When allocating the sq db buffer to store the user sq pi pointer and map it to the kernel mode, max_send_wr is used as the trigger condition, while the kernel does not consider the max_send_wr trigger condition when mapmping db. It will cause sq record doorbell map fail and create qp fail.
The failed print information as follows:
hns3 0000:7d:00.1: Send cmd: tail - 418, opcode - 0x8504, flag - 0x0011, retval - 0x0000 hns3 0000:7d:00.1: Send cmd: 0xe59dc000 0x00000000 0x00000000 0x00000000 0x00000116 0x0000ffff hns3 0000:7d:00.1: sq record doorbell map failed! hns3 0000:7d:00.1: Create RC QP failed
Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") Signed-off-by: Lijun Ou oulijun@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index efb7e961ca65..2fa4fb17f6d3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -494,7 +494,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) { - if (attr->qp_type == IB_QPT_XRC_TGT) + if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr) return 0;
return 1;
From: David Hildenbrand david@redhat.com
[ Upstream commit 89c02e69fc5245f8a2f34b58b42d43a737af1a5e ]
Right now we are using find_memory_block() to get the node id for the pfn range to online. We are missing to drop a reference to the memory block device. While the device still gets unregistered via device_unregister(), resulting in no user visible problem, the device is never released via device_release(), resulting in a memory leak. Fix that by properly using a put_device().
Link: http://lkml.kernel.org/r/20190411110955.1430-1-david@redhat.com Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug") Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Reviewed-by: Wei Yang richard.weiyang@gmail.com Acked-by: Michal Hocko mhocko@suse.com Acked-by: Pankaj Gupta pagupta@redhat.com Cc: David Hildenbrand david@redhat.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Qian Cai cai@lca.pw Cc: Arun KS arunks@codeaurora.org Cc: Mathieu Malaterre malat@debian.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/memory_hotplug.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 156991edec2a..af6735562215 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -901,6 +901,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ */ mem = find_memory_block(__pfn_to_section(pfn)); nid = mem->nid; + put_device(&mem->dev);
/* associate pfn range with the zone */ zone = move_pfn_range(online_type, nid, pfn, nr_pages);
From: Nicholas Piggin npiggin@gmail.com
[ Upstream commit 1b5fc84aba170bdfe3533396ca9662ceea1609b7 ]
The NMI IPI timeout logic is broken, if __smp_send_nmi_ipi() times out on the first condition, delay_us will be zero which will send it into the second spin loop with no timeout so it will spin forever.
Fixes: 5b73151fff63 ("powerpc: NMI IPI make NMI IPIs fully sychronous") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/powerpc/kernel/smp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 61c1fadbc644..22abba5f4cf0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -499,7 +499,7 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool if (delay_us) { delay_us--; if (!delay_us) - break; + goto timeout; } }
@@ -510,10 +510,11 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool if (delay_us) { delay_us--; if (!delay_us) - break; + goto timeout; } }
+timeout: if (!cpumask_empty(&nmi_ipi_pending_mask)) { /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */ ret = 0;
From: Nicholas Piggin npiggin@gmail.com
[ Upstream commit 88b9a3d1425a436e95c41f09986fdae2daee437a ]
The xmon debugger IPI handler waits in the callback function while xmon is still active. This means they don't complete the IPI, and the initiator always times out waiting for them.
Things manage to work after the timeout because there is some fallback logic to keep NMI IPI state sane in case of the timeout, but this is a bit ugly.
This patch changes NMI IPI back to half-asynchronous (i.e., wait for everyone to call in, do not wait for IPI function to complete), but the complexity is avoided by going one step further and allowing new IPIs to be issued before the IPI functions to all complete.
If synchronization against that is required, it is left up to the caller, but current callers don't require that. In fact with the timeout handling, callers must be able to cope with this already.
Fixes: 5b73151fff63 ("powerpc: NMI IPI make NMI IPIs fully sychronous") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/powerpc/kernel/smp.c | 93 ++++++++++++--------------------------- 1 file changed, 29 insertions(+), 64 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 22abba5f4cf0..6dc43205382b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -338,13 +338,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) * NMI IPIs may not be recoverable, so should not be used as ongoing part of * a running system. They can be used for crash, debug, halt/reboot, etc. * - * NMI IPIs are globally single threaded. No more than one in progress at - * any time. - * * The IPI call waits with interrupts disabled until all targets enter the - * NMI handler, then the call returns. + * NMI handler, then returns. Subsequent IPIs can be issued before targets + * have returned from their handlers, so there is no guarantee about + * concurrency or re-entrancy. * - * No new NMI can be initiated until targets exit the handler. + * A new NMI can be issued before all targets exit the handler. * * The IPI call may time out without all targets entering the NMI handler. * In that case, there is some logic to recover (and ignore subsequent @@ -355,7 +354,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); static struct cpumask nmi_ipi_pending_mask; -static int nmi_ipi_busy_count = 0; +static bool nmi_ipi_busy = false; static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
static void nmi_ipi_lock_start(unsigned long *flags) @@ -394,7 +393,7 @@ static void nmi_ipi_unlock_end(unsigned long *flags) */ int smp_handle_nmi_ipi(struct pt_regs *regs) { - void (*fn)(struct pt_regs *); + void (*fn)(struct pt_regs *) = NULL; unsigned long flags; int me = raw_smp_processor_id(); int ret = 0; @@ -405,29 +404,17 @@ int smp_handle_nmi_ipi(struct pt_regs *regs) * because the caller may have timed out. */ nmi_ipi_lock_start(&flags); - if (!nmi_ipi_busy_count) - goto out; - if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask)) - goto out; - - fn = nmi_ipi_function; - if (!fn) - goto out; - - cpumask_clear_cpu(me, &nmi_ipi_pending_mask); - nmi_ipi_busy_count++; - nmi_ipi_unlock(); - - ret = 1; - - fn(regs); - - nmi_ipi_lock(); - if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */ - nmi_ipi_busy_count--; -out: + if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) { + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + fn = READ_ONCE(nmi_ipi_function); + WARN_ON_ONCE(!fn); + ret = 1; + } nmi_ipi_unlock_end(&flags);
+ if (fn) + fn(regs); + return ret; }
@@ -453,7 +440,7 @@ static void do_smp_send_nmi_ipi(int cpu, bool safe) * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. * - fn is the target callback function. * - delay_us > 0 is the delay before giving up waiting for targets to - * complete executing the handler, == 0 specifies indefinite delay. + * begin executing the handler, == 0 specifies indefinite delay. */ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe) { @@ -467,31 +454,33 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool if (unlikely(!smp_ops)) return 0;
- /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */ nmi_ipi_lock_start(&flags); - while (nmi_ipi_busy_count) { + while (nmi_ipi_busy) { nmi_ipi_unlock_end(&flags); - spin_until_cond(nmi_ipi_busy_count == 0); + spin_until_cond(!nmi_ipi_busy); nmi_ipi_lock_start(&flags); } - + nmi_ipi_busy = true; nmi_ipi_function = fn;
+ WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask)); + if (cpu < 0) { /* ALL_OTHERS */ cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); cpumask_clear_cpu(me, &nmi_ipi_pending_mask); } else { - /* cpumask starts clear */ cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); } - nmi_ipi_busy_count++; + nmi_ipi_unlock();
+ /* Interrupts remain hard disabled */ + do_smp_send_nmi_ipi(cpu, safe);
nmi_ipi_lock(); - /* nmi_ipi_busy_count is held here, so unlock/lock is okay */ + /* nmi_ipi_busy is set here, so unlock/lock is okay */ while (!cpumask_empty(&nmi_ipi_pending_mask)) { nmi_ipi_unlock(); udelay(1); @@ -499,34 +488,19 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool if (delay_us) { delay_us--; if (!delay_us) - goto timeout; + break; } }
- while (nmi_ipi_busy_count > 1) { - nmi_ipi_unlock(); - udelay(1); - nmi_ipi_lock(); - if (delay_us) { - delay_us--; - if (!delay_us) - goto timeout; - } - } - -timeout: if (!cpumask_empty(&nmi_ipi_pending_mask)) { /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */ ret = 0; cpumask_clear(&nmi_ipi_pending_mask); } - if (nmi_ipi_busy_count > 1) { - /* Timeout waiting for CPUs to execute fn */ - ret = 0; - nmi_ipi_busy_count = 1; - }
- nmi_ipi_busy_count--; + nmi_ipi_function = NULL; + nmi_ipi_busy = false; + nmi_ipi_unlock_end(&flags);
return ret; @@ -594,17 +568,8 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) static void nmi_stop_this_cpu(struct pt_regs *regs) { /* - * This is a special case because it never returns, so the NMI IPI - * handling would never mark it as done, which makes any later - * smp_send_nmi_ipi() call spin forever. Mark it done now. - * * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ - nmi_ipi_lock(); - if (nmi_ipi_busy_count > 1) - nmi_ipi_busy_count--; - nmi_ipi_unlock(); - spin_begin(); while (1) spin_cpu_relax();
From: Heiner Kallweit hkallweit1@gmail.com
[ Upstream commit 5ceaeb99ffb4dc002d20f6ac243c19a85e2c7a76 ]
This patches fixes few issues in mv88e6390x_port_set_cmode().
1. When entering the function the old cmode may be 0, in this case mv88e6390x_serdes_get_lane() returns -ENODEV. As result we bail out and have no chance to set a new mode. Therefore deal properly with -ENODEV.
2. Once we have disabled power and irq, let's set the cached cmode to 0. This reflects the actual status and is cleaner if we bail out with an error in the following function calls.
3. The cached cmode is used by mv88e6390x_serdes_get_lane(), mv88e6390_serdes_power_lane() and mv88e6390_serdes_irq_enable(). Currently we set the cached mode to the new one at the very end of the function only, means until then we use the old one what may be wrong.
4. When calling mv88e6390_serdes_irq_enable() we use the lane value belonging to the old cmode. Get the lane belonging to the new cmode before calling this function.
It's hard to provide a good "Fixes" tag because quite a few smaller changes have been done to the code in question recently.
Fixes: d235c48b40d3 ("net: dsa: mv88e6xxx: power serdes on/off for 10G interfaces on 6390X") Signed-off-by: Heiner Kallweit hkallweit1@gmail.com Reviewed-by: Florian Fainelli f.fainelli@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/dsa/mv88e6xxx/port.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 7fffce734f0a..fdeddbfa829d 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -379,18 +379,22 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, return 0;
lane = mv88e6390x_serdes_get_lane(chip, port); - if (lane < 0) + if (lane < 0 && lane != -ENODEV) return lane;
- if (chip->ports[port].serdes_irq) { - err = mv88e6390_serdes_irq_disable(chip, port, lane); + if (lane >= 0) { + if (chip->ports[port].serdes_irq) { + err = mv88e6390_serdes_irq_disable(chip, port, lane); + if (err) + return err; + } + + err = mv88e6390x_serdes_power(chip, port, false); if (err) return err; }
- err = mv88e6390x_serdes_power(chip, port, false); - if (err) - return err; + chip->ports[port].cmode = 0;
if (cmode) { err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); @@ -404,6 +408,12 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, if (err) return err;
+ chip->ports[port].cmode = cmode; + + lane = mv88e6390x_serdes_get_lane(chip, port); + if (lane < 0) + return lane; + err = mv88e6390x_serdes_power(chip, port, true); if (err) return err; @@ -415,8 +425,6 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, } }
- chip->ports[port].cmode = cmode; - return 0; }
From: Jan Kara jack@suse.cz
[ Upstream commit cae85cb8add35f678cf487139d05e083ce2f570a ]
Aneesh has reported that PPC triggers the following warning when excercising DAX code:
IP set_pte_at+0x3c/0x190 LR insert_pfn+0x208/0x280 Call Trace: insert_pfn+0x68/0x280 dax_iomap_pte_fault.isra.7+0x734/0xa40 __xfs_filemap_fault+0x280/0x2d0 do_wp_page+0x48c/0xa40 __handle_mm_fault+0x8d0/0x1fd0 handle_mm_fault+0x140/0x250 __do_page_fault+0x300/0xd60 handle_page_fault+0x18
Now that is WARN_ON in set_pte_at which is
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
The problem is that on some architectures set_pte_at() cannot cope with a situation where there is already some (different) valid entry present.
Use ptep_set_access_flags() instead to modify the pfn which is built to deal with modifying existing PTE.
Link: http://lkml.kernel.org/r/20190311084537.16029-1-jack@suse.cz Fixes: b2770da64254 "mm: add vm_insert_mixed_mkwrite()" Signed-off-by: Jan Kara jack@suse.cz Reported-by: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com Reviewed-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com Acked-by: Dan Williams dan.j.williams@intel.com Cc: Chandan Rajendra chandan@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- mm/memory.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c index 9c69278173b7..e0010cb870e0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1796,10 +1796,12 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; } - entry = *pte; - goto out_mkwrite; - } else - goto out_unlock; + entry = pte_mkyoung(*pte); + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (ptep_set_access_flags(vma, addr, pte, entry, 1)) + update_mmu_cache(vma, addr, pte); + } + goto out_unlock; }
/* Ok, finally just insert the thing.. */ @@ -1808,7 +1810,6 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, else entry = pte_mkspecial(pfn_t_pte(pfn, prot));
-out_mkwrite: if (mkwrite) { entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma);
From: "Gustavo A. R. Silva" gustavo@embeddedor.com
[ Upstream commit e82adc1074a7356f1158233551df9e86b7ebfb82 ]
Currently there is no check on platform_get_irq() return value in case it fails, hence never actually reporting any errors and causing unexpected behavior when using such value as argument for function regmap_irq_get_virq().
Fix this by adding a proper check, a message error and return *irq* in case platform_get_irq() fails.
Addresses-Coverity-ID: 1443899 ("Improper use of negative value") Fixes: d2061f9cc32d ("usb: typec: add driver for Intel Whiskey Cove PMIC USB Type-C PHY") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva gustavo@embeddedor.com Reviewed-by: Guenter Roeck linux@roeck-us.net Acked-by: Heikki Krogerus heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/usb/typec/typec_wcove.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c index 423208e19383..6770afd40765 100644 --- a/drivers/usb/typec/typec_wcove.c +++ b/drivers/usb/typec/typec_wcove.c @@ -615,8 +615,13 @@ static int wcove_typec_probe(struct platform_device *pdev) wcove->dev = &pdev->dev; wcove->regmap = pmic->regmap;
- irq = regmap_irq_get_virq(pmic->irq_chip_data_chgr, - platform_get_irq(pdev, 0)); + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; + } + + irq = regmap_irq_get_virq(pmic->irq_chip_data_chgr, irq); if (irq < 0) return irq;
From: zhengliang zhengliang6@huawei.com
[ Upstream commit a0770e13c8da83bdb64738c0209ab02dd3cfff8b ]
v4: Rearrange the previous three versions.
The following scenario could lead to data block override by mistake.
TASK A | TASK kworker | TASK B | TASK C | | | open | | | write | | | close | | | | f2fs_write_data_pages | | | f2fs_write_cache_pages | | | f2fs_outplace_write_data | | | f2fs_allocate_data_block (get block in seg S, | | | S is full, and only | | | have this valid data | | | block) | | | allocate_segment | | | locate_dirty_segment (mark S as PRE) | | | f2fs_submit_page_write (submit but is not | | | written on dev) | | unlink | | | iput_final | | | f2fs_drop_inode | | | f2fs_truncate | | | (not evict) | | | | | write_checkpoint | | | flush merged bio but not wait file data writeback | | | set_prefree_as_free (mark S as FREE) | | | | update NODE/DATA | | | allocate_segment (select S) | writeback done | |
So we need to guarantee io complete before truncate inode in f2fs_drop_inode.
Reviewed-by: Chao Yu yuchao0@huawei.com Signed-off-by: Zheng Liang zhengliang6@huawei.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- fs/f2fs/super.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2264f27fd26d..036eb1e0d557 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -890,6 +890,10 @@ static int f2fs_drop_inode(struct inode *inode) sb_start_intwrite(inode->i_sb); f2fs_i_size_write(inode, 0);
+ f2fs_submit_merged_write_cond(F2FS_I_SB(inode), + inode, NULL, 0, DATA); + truncate_inode_pages_final(inode->i_mapping); + if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode);
From: Pablo Neira Ayuso pablo@netfilter.org
[ Upstream commit 3f3a390dbd59d236f62cff8e8b20355ef7069e3d ]
Smatch reports:
net/netfilter/nf_tables_api.c:2167 nf_tables_expr_destroy() error: dereferencing freed memory 'expr->ops'
net/netfilter/nf_tables_api.c 2162 static void nf_tables_expr_destroy(const struct nft_ctx *ctx, 2163 struct nft_expr *expr) 2164 { 2165 if (expr->ops->destroy) 2166 expr->ops->destroy(ctx, expr); ^^^^ --> 2167 module_put(expr->ops->type->owner); ^^^^^^^^^ 2168 }
Smatch says there are three functions which free expr->ops.
Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f272f9538c44..ef7ff13a7b99 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2113,9 +2113,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, static void nf_tables_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { + const struct nft_expr_type *type = expr->ops->type; + if (expr->ops->destroy) expr->ops->destroy(ctx, expr); - module_put(expr->ops->type->owner); + module_put(type->owner); }
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit b25a31bf0ca091aa8bdb9ab329b0226257568bbe ]
->release_ops() callback releases resources and this is used in error path. If nf_tables_newrule() fails after ->select_ops(), it should release resources. but it can not call ->destroy() because that should be called after ->init(). At this point, ->release_ops() should be used for releasing resources.
Test commands: modprobe -rv xt_tcpudp iptables-nft -I INPUT -m tcp <-- error command lsmod
Result: Module Size Used by xt_tcpudp 20480 2 <-- it should be 0
Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ef7ff13a7b99..ebfcfe1dcbdb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2719,8 +2719,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { - if (info[i].ops != NULL) + if (info[i].ops) { module_put(info[i].ops->type->owner); + if (info[i].ops->type->release_ops) + info[i].ops->type->release_ops(info[i].ops); + } } kvfree(info); return err;
From: Andy Duan fugang.duan@nxp.com
[ Upstream commit d7c3a206e6338e4ccdf030719dec028e26a521d5 ]
Some SOC like i.MX6SX clock have some limits: - ahb clock should be disabled before ipg. - ahb and ipg clocks are required for MAC MII bus. So, move the ahb clock to runtime management together with ipg clock.
Signed-off-by: Fugang Duan fugang.duan@nxp.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/freescale/fec_main.c | 30 ++++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 7b98bb75ba8a..ad41ace0a27a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1850,13 +1850,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) int ret;
if (enable) { - ret = clk_prepare_enable(fep->clk_ahb); - if (ret) - return ret; - ret = clk_prepare_enable(fep->clk_enet_out); if (ret) - goto failed_clk_enet_out; + return ret;
if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1876,7 +1872,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
phy_reset_after_clk_enable(ndev->phydev); } else { - clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1895,8 +1890,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); -failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ahb);
return ret; } @@ -3485,6 +3478,9 @@ fec_probe(struct platform_device *pdev) ret = clk_prepare_enable(fep->clk_ipg); if (ret) goto failed_clk_ipg; + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + goto failed_clk_ahb;
fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { @@ -3578,6 +3574,9 @@ fec_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); failed_regulator: + clk_disable_unprepare(fep->clk_ahb); +failed_clk_ahb: + clk_disable_unprepare(fep->clk_ipg); failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: @@ -3701,6 +3700,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev);
+ clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg);
return 0; @@ -3710,8 +3710,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + int ret;
- return clk_prepare_enable(fep->clk_ipg); + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + return ret; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + + return 0; + +failed_clk_ipg: + clk_disable_unprepare(fep->clk_ahb); + return ret; }
static const struct dev_pm_ops fec_pm_ops = {
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit d4d0e40977ac450f32f2db5e4d8e23c9d2578899 ]
The driver cannot guarantee in the prepare phase that it will be able to write an MDB entry to the device. In case the driver returned success during the prepare phase, but then failed to add the entry in the commit phase, a WARNING [1] will be generated by the switchdev core.
Fix this by doing the work in the prepare phase instead.
[1] [ 358.544486] swp12s0: Commit of object (id=2) failed. [ 358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0 [ 358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350 [ 358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 358.580582] Workqueue: events switchdev_deferred_process_work [ 358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0 ... [ 358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286 [ 358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000 [ 358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8 [ 358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000 [ 358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000 [ 358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200 [ 358.659790] FS: 0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000 [ 358.668820] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0 [ 358.683188] Call Trace: [ 358.685918] switchdev_port_obj_add_deferred+0x13/0x60 [ 358.691655] switchdev_deferred_process+0x6b/0xf0 [ 358.696907] switchdev_deferred_process_work+0xa/0x10 [ 358.702548] process_one_work+0x1f5/0x3f0 [ 358.707022] worker_thread+0x28/0x3c0 [ 358.711099] ? process_one_work+0x3f0/0x3f0 [ 358.715768] kthread+0x10d/0x130 [ 358.719369] ? __kthread_create_on_node+0x180/0x180 [ 358.724815] ret_from_fork+0x35/0x40
Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel idosch@mellanox.com Reported-by: Alex Kushnarov alexanderk@mellanox.com Tested-by: Alex Kushnarov alexanderk@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index af673abdb482..a4f237f815d1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1585,7 +1585,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0;
- if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0;
bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit a8c133b06183c529c51cd0d54eb57d6b7078370c ]
The EMAD workqueue is used to handle retransmission of EMAD packets that contain configuration data for the device's firmware.
Given the workers need to allocate these packets and that the code is not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM flag.
Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") Signed-off-by: Ido Schimmel idosch@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f7154f358f27..426aea8ad72c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0;
- emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq;
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc ]
The ordered workqueue is used to offload various objects such as routes and neighbours in the order they are notified.
It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker tries to flush a non-WQ_MEM_RECLAIM workqueue.
[1] [97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker [97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130 ... [97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 [97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] [97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130 ... [97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086 [97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000 [97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046 [97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0 [97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0 [97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0 [97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000 [97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0 [97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [97703.543115] Call Trace: [97703.543129] __flush_work+0xbd/0x1e0 [97703.543137] ? __cancel_work_timer+0x136/0x1b0 [97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0 [97703.543154] __cancel_work_timer+0x136/0x1b0 [97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core] [97703.543184] cancel_work_sync+0x10/0x20 [97703.543191] rhashtable_free_and_destroy+0x23/0x140 [97703.543198] rhashtable_destroy+0xd/0x10 [97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum] [97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum] [97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum] [97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum] [97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum] [97703.543484] process_one_work+0x1fd/0x400 [97703.543493] worker_thread+0x34/0x410 [97703.543500] kthread+0x121/0x140 [97703.543507] ? process_one_work+0x400/0x400 [97703.543512] ? kthread_park+0x90/0x90 [97703.543523] ret_from_fork+0x35/0x40
Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload") Signed-off-by: Ido Schimmel idosch@mellanox.com Reported-by: Semion Lisyansky semionl@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 426aea8ad72c..7482db0767af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1878,7 +1878,7 @@ static int __init mlxsw_core_module_init(void) mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); if (!mlxsw_wq) return -ENOMEM; - mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { err = -ENOMEM;
From: Ido Schimmel idosch@mellanox.com
[ Upstream commit b442fed1b724af0de087912a5718ddde1b87acbb ]
The workqueue is used to periodically update the networking stack about activity / statistics of various objects such as neighbours and TC actions.
It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag.
Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue") Signed-off-by: Ido Schimmel idosch@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 7482db0767af..2e6df5804b35 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1875,7 +1875,7 @@ static int __init mlxsw_core_module_init(void) { int err;
- mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); if (!mlxsw_wq) return -ENOMEM; mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
From: Jakub Kicinski jakub.kicinski@netronome.com
[ Upstream commit 5a03bc73abed6ae196c15e9950afde19d48be12c ]
Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made freeing of IV and record sequence number conditional to SW path only, but commit e8f69799810c ("net/tls: Add generic NIC offload infrastructure") also allocates that state for the device offload configuration. Remember to free it.
Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski jakub.kicinski@netronome.com Reviewed-by: Dirk van der Merwe dirk.vandermerwe@netronome.com Reviewed-by: Simon Horman simon.horman@netronome.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/tls/tls_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index c9588b682db4..6fab4340fd77 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock);
static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) + if (ctx->tx_conf == TLS_HW) { kfree(tls_offload_ctx_tx(ctx)); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + }
if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx));
From: Jakub Kicinski jakub.kicinski@netronome.com
[ Upstream commit 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 ]
This reverts the first part of commit 4e485d06bb8c ("strparser: Call skb_unclone conditionally"). To build a message with multiple fragments we need our own root of frag_list. We can't simply use the frag_list of orig_skb, because it will lead to linking all orig_skbs together creating very long frag chains, and causing stack overflow on kfree_skb() (which is called recursively on the frag_lists).
BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5) kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP RIP: 0010:free_one_page+0x2b/0x490
Call Trace: __free_pages_ok+0x143/0x2c0 skb_release_data+0x8e/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0
[...]
skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 __kfree_skb+0xe/0x20 tcp_disconnect+0xd6/0x4d0 tcp_close+0xf4/0x430 ? tcp_check_oom+0xf0/0xf0 tls_sk_proto_close+0xe4/0x1e0 [tls] inet_release+0x36/0x60 __sock_release+0x37/0xa0 sock_close+0x11/0x20 __fput+0xa2/0x1d0 task_work_run+0x89/0xb0 exit_to_usermode_loop+0x9a/0xa0 do_syscall_64+0xc0/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Let's leave the second unclone conditional, as I'm not entirely sure what is its purpose :)
Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally") Signed-off-by: Jakub Kicinski jakub.kicinski@netronome.com Reviewed-by: Dirk van der Merwe dirk.vandermerwe@netronome.com Reviewed-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/strparser/strparser.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index da1a676860ca..0f4e42792878 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ - if (skb_has_frag_list(head)) { - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - return 0; - } + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + return 0; }
if (unlikely(skb_shinfo(head)->frag_list)) {
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit d7ee81ad09f072eab1681877fc71ec05f9c1ae92 ]
This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands").
I'm not totally sure, but I think that commit description may have overstated the danger. I was under the impression that this data came from the firmware? If you can't trust your networking firmware, then you're already in trouble.
Anyway, these days we add bounds checking where ever we can and we call it kernel hardening. Better safe than sorry.
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- net/nfc/nci/hci.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + }
/* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + }
ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE;
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit 6491d698396fd5da4941980a35ca7c162a672016 ]
This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes") where we changed NFC_HCI_MAX_PIPES from 127 to 128.
As the comment next to the define explains, the pipe identifier is 7 bits long. The highest possible pipe is 127, but the number of possible pipes is 128. As the code is now, then there is potential for an out of bounds array access:
net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one? 'ndev->hci_dev->pipes[pipe]' '0-127 == 127'
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- include/net/nfc/nci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 87499b6b35d6..df5c69db68af 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -166,7 +166,7 @@ struct nci_conn_info { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NCI_HCI_MAX_PIPES 127 +#define NCI_HCI_MAX_PIPES 128
struct nci_hci_gate { u8 gate;
From: Masami Hiramatsu mhiramat@kernel.org
[ Upstream commit b191fa96ea6dc00d331dcc28c1f7db5e075693a0 ]
Avoid kretprobe recursion loop bg by setting a dummy kprobes to current_kprobe per-CPU variable.
This bug has been introduced with the asm-coded trampoline code, since previously it used another kprobe for hooking the function return placeholder (which only has a nop) and trampoline handler was called from that kprobe.
This revives the old lost kprobe again.
With this fix, we don't see deadlock anymore.
And you can see that all inner-called kretprobe are skipped.
event_1 235 0 event_2 19375 19612
The 1st column is recorded count and the 2nd is missed count. Above shows (event_1 rec) + (event_2 rec) ~= (event_2 missed) (some difference are here because the counter is racy)
Reported-by: Andrea Righi righi.andrea@gmail.com Tested-by: Andrea Righi righi.andrea@gmail.com Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Acked-by: Steven Rostedt rostedt@goodmis.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Mathieu Desnoyers mathieu.desnoyers@efficios.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: stable@vger.kernel.org Fixes: c9becf58d935 ("[PATCH] kretprobe: kretprobe-booster") Link: http://lkml.kernel.org/r/155094064889.6137.972160690963039.stgit@devbox Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- arch/x86/kernel/kprobes/core.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index acb901b43ce4..544bc2dfe408 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -749,11 +749,16 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+static struct kprobe kretprobe_kprobe = { + .addr = (void *)kretprobe_trampoline, +}; + /* * Called from kretprobe_trampoline */ __visible __used void *trampoline_handler(struct pt_regs *regs) { + struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -763,6 +768,17 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false;
+ preempt_disable(); + + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kcb = get_kprobe_ctlblk(); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ @@ -838,10 +854,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); }
recycle_rp_inst(ri, &empty_rp); @@ -857,6 +872,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
+ __this_cpu_write(current_kprobe, NULL); + preempt_enable(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri);
linux-stable-mirror@lists.linaro.org