The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 758999246965eeb8b253d47e72f7bfe508804b16
Gitweb: https://git.kernel.org/tip/758999246965eeb8b253d47e72f7bfe508804b16
Author: Xiaochen Shen <xiaochen.shen(a)intel.com>
AuthorDate: Sat, 31 Oct 2020 03:11:28 +08:00
Committer: Borislav Petkov <bp(a)suse.de>
CommitterDate: Tue, 24 Nov 2020 12:13:37 +01:00
x86/resctrl: Add necessary kernfs_put() calls to prevent refcount leak
On resource group creation via a mkdir an extra kernfs_node reference is
obtained by kernfs_get() to ensure that the rdtgroup structure remains
accessible for the rdtgroup_kn_unlock() calls where it is removed on
deletion. Currently the extra kernfs_node reference count is only
dropped by kernfs_put() in rdtgroup_kn_unlock() while the rdtgroup
structure is removed in a few other locations that lack the matching
reference drop.
In call paths of rmdir and umount, when a control group is removed,
kernfs_remove() is called to remove the whole kernfs nodes tree of the
control group (including the kernfs nodes trees of all child monitoring
groups), and then rdtgroup structure is freed by kfree(). The rdtgroup
structures of all child monitoring groups under the control group are
freed by kfree() in free_all_child_rdtgrp().
Before calling kfree() to free the rdtgroup structures, the kernfs node
of the control group itself as well as the kernfs nodes of all child
monitoring groups still take the extra references which will never be
dropped to 0 and the kernfs nodes will never be freed. It leads to
reference count leak and kernfs_node_cache memory leak.
For example, reference count leak is observed in these two cases:
(1) mount -t resctrl resctrl /sys/fs/resctrl
mkdir /sys/fs/resctrl/c1
mkdir /sys/fs/resctrl/c1/mon_groups/m1
umount /sys/fs/resctrl
(2) mkdir /sys/fs/resctrl/c1
mkdir /sys/fs/resctrl/c1/mon_groups/m1
rmdir /sys/fs/resctrl/c1
The same reference count leak issue also exists in the error exit paths
of mkdir in mkdir_rdt_prepare() and rdtgroup_mkdir_ctrl_mon().
Fix this issue by following changes to make sure the extra kernfs_node
reference on rdtgroup is dropped before freeing the rdtgroup structure.
(1) Introduce rdtgroup removal helper rdtgroup_remove() to wrap up
kernfs_put() and kfree().
(2) Call rdtgroup_remove() in rdtgroup removal path where the rdtgroup
structure is about to be freed by kfree().
(3) Call rdtgroup_remove() or kernfs_put() as appropriate in the error
exit paths of mkdir where an extra reference is taken by kernfs_get().
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Reported-by: Willem de Bruijn <willemb(a)google.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1604085088-31707-1-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 32 +++++++++++++++++++------
1 file changed, 25 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2ab1266..6f4ca4b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -507,6 +507,24 @@ unlock:
return ret ?: nbytes;
}
+/**
+ * rdtgroup_remove - the helper to remove resource group safely
+ * @rdtgrp: resource group to remove
+ *
+ * On resource group creation via a mkdir, an extra kernfs_node reference is
+ * taken to ensure that the rdtgroup structure remains accessible for the
+ * rdtgroup_kn_unlock() calls where it is removed.
+ *
+ * Drop the extra reference here, then free the rdtgroup structure.
+ *
+ * Return: void
+ */
+static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+{
+ kernfs_put(rdtgrp->kn);
+ kfree(rdtgrp);
+}
+
struct task_move_callback {
struct callback_head work;
struct rdtgroup *rdtgrp;
@@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head)
(rdtgrp->flags & RDT_DELETED)) {
current->closid = 0;
current->rmid = 0;
- kfree(rdtgrp);
+ rdtgroup_remove(rdtgrp);
}
if (unlikely(current->flags & PF_EXITING))
@@ -2065,8 +2083,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
rdtgroup_pseudo_lock_remove(rdtgrp);
kernfs_unbreak_active_protection(kn);
- kernfs_put(rdtgrp->kn);
- kfree(rdtgrp);
+ rdtgroup_remove(rdtgrp);
} else {
kernfs_unbreak_active_protection(kn);
}
@@ -2341,7 +2358,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
if (atomic_read(&sentry->waitcount) != 0)
sentry->flags = RDT_DELETED;
else
- kfree(sentry);
+ rdtgroup_remove(sentry);
}
}
@@ -2383,7 +2400,7 @@ static void rmdir_all_sub(void)
if (atomic_read(&rdtgrp->waitcount) != 0)
rdtgrp->flags = RDT_DELETED;
else
- kfree(rdtgrp);
+ rdtgroup_remove(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
@@ -2818,7 +2835,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
* kernfs_remove() will drop the reference count on "kn" which
* will free it. But we still need it to stick around for the
* rdtgroup_kn_unlock(kn) call. Take one extra reference here,
- * which will be dropped inside rdtgroup_kn_unlock().
+ * which will be dropped by kernfs_put() in rdtgroup_remove().
*/
kernfs_get(kn);
@@ -2859,6 +2876,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
out_idfree:
free_rmid(rdtgrp->mon.rmid);
out_destroy:
+ kernfs_put(rdtgrp->kn);
kernfs_remove(rdtgrp->kn);
out_free_rgrp:
kfree(rdtgrp);
@@ -2871,7 +2889,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
{
kernfs_remove(rgrp->kn);
free_rmid(rgrp->mon.rmid);
- kfree(rgrp);
+ rdtgroup_remove(rgrp);
}
/*
pte_accessible() is used by ptep_clear_flush() to figure out whether TLB
invalidation is necessary when unmapping pages for reclaim. Although our
implementation is correct according to the architecture, returning true
only for valid, young ptes in the absence of racing page-table
modifications, this is in fact flawed due to lazy invalidation of old
ptes in ptep_clear_flush_young() where we elide the expensive DSB
instruction for completing the TLB invalidation.
Rather than penalise the aging path, adjust pte_accessible() to return
true for any valid pte, even if the access flag is cleared.
Cc: <stable(a)vger.kernel.org>
Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()")
Reported-by: Yu Zhao <yuzhao(a)google.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
---
arch/arm64/include/asm/pgtable.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4ff12a7adcfd..1bdf51f01e73 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -115,8 +115,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
-#define pte_valid_young(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
#define pte_valid_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
@@ -126,7 +124,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
* remapped as PROT_NONE but are yet to be flushed from the TLB.
*/
#define pte_accessible(mm, pte) \
- (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
+ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
/*
* p??_access_permitted() is true for valid user mappings (subject to the
--
2.29.2.454.gaff20da3a2-goog
From: Zhuguangqing <zhuguangqing(a)xiaomi.com>
If state has not changed successfully and we updated cpufreq_state,
next time when the new state is equal to cpufreq_state (not changed
successfully last time), we will return directly and miss a
freq_qos_update_request() that should have been.
Fixes: 5130802ddbb1 ("thermal: cpu_cooling: Switch to QoS requests for freq limits")
Cc: v5.4+ <stable(a)vger.kernel.org> # v5.4+
Signed-off-by: Zhuguangqing <zhuguangqing(a)xiaomi.com>
Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
v2:
- Add Fixes: 5130802ddbb1 in log.
- Add Cc: v5.4+ <stable(a)vger.kernel.org> # v5.4+ in log.
- Add Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org> in log.
- Delete an extra blank line.
---
drivers/thermal/cpufreq_cooling.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index cc2959f22f01..612f063c1cfc 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -438,13 +438,11 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
if (cpufreq_cdev->cpufreq_state == state)
return 0;
- cpufreq_cdev->cpufreq_state = state;
-
frequency = get_state_freq(cpufreq_cdev, state);
ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency);
-
if (ret > 0) {
+ cpufreq_cdev->cpufreq_state = state;
cpus = cpufreq_cdev->policy->cpus;
max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus));
capacity = frequency * max_capacity;
--
2.17.1
From: Yangtao Li <frank(a)allwinnertech.com>
It is found on many allwinner soc that there is a low probability that
the interrupt status cannot be read in sunxi_pinctrl_irq_handler. This
will cause the interrupt status of a gpio bank to always be active on
gic, preventing gic from responding to other spi interrupts correctly.
So we should call the chained_irq_* each time enter sunxi_pinctrl_irq_handler().
Cc: stable(a)vger.kernel.org
Signed-off-by: Yangtao Li <frank(a)allwinnertech.com>
---
drivers/pinctrl/sunxi/pinctrl-sunxi.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 9d8b59dafa4b..dc8d39ae045b 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -1141,20 +1141,22 @@ static void sunxi_pinctrl_irq_handler(struct irq_desc *desc)
WARN_ON(bank == pctl->desc->irq_banks);
+ chained_irq_enter(chip, desc);
+
reg = sunxi_irq_status_reg_from_bank(pctl->desc, bank);
val = readl(pctl->membase + reg);
if (val) {
int irqoffset;
- chained_irq_enter(chip, desc);
for_each_set_bit(irqoffset, &val, IRQ_PER_BANK) {
int pin_irq = irq_find_mapping(pctl->domain,
bank * IRQ_PER_BANK + irqoffset);
generic_handle_irq(pin_irq);
}
- chained_irq_exit(chip, desc);
}
+
+ chained_irq_exit(chip, desc);
}
static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl,
--
2.28.0
This is the start of the stable review cycle for the 4.9.246 release.
There are 47 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 25 Nov 2020 12:17:50 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.246-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.9.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.9.246-rc1
Chen Yu <yu.c.chen(a)intel.com>
x86/microcode/intel: Check patch signature before saving microcode for early loading
Thomas Richter <tmricht(a)linux.ibm.com>
s390/cpum_sf.c: fix file permission for cpum_sfb_size
Johannes Berg <johannes.berg(a)intel.com>
mac80211: free sta in sta_info_insert_finish() on errors
Felix Fietkau <nbd(a)nbd.name>
mac80211: minstrel: fix tx status processing corner case
Felix Fietkau <nbd(a)nbd.name>
mac80211: minstrel: remove deferred sampling code
Max Filippov <jcmvbkbc(a)gmail.com>
xtensa: disable preemption around cache alias management calls
Michał Mirosław <mirq-linux(a)rere.qmqm.pl>
regulator: fix memory leak with repeated set_machine_constraints()
Hans de Goede <hdegoede(a)redhat.com>
iio: accel: kxcjk1013: Replace is_smo8500_device with an acpi_type enum
Jan Kara <jack(a)suse.cz>
ext4: fix bogus warning in ext4_update_dx_flag()
Vamshi K Sthambamkadi <vamshi.k.sthambamkadi(a)gmail.com>
efivarfs: fix memory leak in efivarfs_create()
Fugang Duan <fugang.duan(a)nxp.com>
tty: serial: imx: keep console clocks always on
Takashi Iwai <tiwai(a)suse.de>
ALSA: mixart: Fix mutex deadlock
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: ctl: fix error path at adding user-defined element set
Daniel Axtens <dja(a)axtens.net>
powerpc/uaccess-flush: fix missing includes in kup-radix.h
Yicong Yang <yangyicong(a)hisilicon.com>
libfs: fix error cast of negative value in simple_attr_write()
Darrick J. Wong <darrick.wong(a)oracle.com>
xfs: revert "xfs: fix rmap key and record comparison functions"
Nishanth Menon <nm(a)ti.com>
regulator: ti-abb: Fix array out of bound read access on the first transition
Zhang Qilong <zhangqilong3(a)huawei.com>
MIPS: Alchemy: Fix memleak in alchemy_clk_setup_cpu
Wu Bo <wubo.oduw(a)gmail.com>
can: m_can: m_can_handle_state_change(): fix state change
Colin Ian King <colin.king(a)canonical.com>
can: peak_usb: fix potential integer overflow on shift of a int
Alejandro Concepcion Rodriguez <alejandro(a)acoro.eu>
can: dev: can_restart(): post buffer from the right context
Leo Yan <leo.yan(a)linaro.org>
perf lock: Don't free "lock_seq_stat" if read_count isn't zero
Fabio Estevam <festevam(a)gmail.com>
ARM: dts: imx50-evk: Fix the chip select 1 IOMUX
Sergey Matyukevich <geomatsi(a)gmail.com>
arm: dts: imx6qdl-udoo: fix rgmii phy-mode for ksz9031 phy
Randy Dunlap <rdunlap(a)infradead.org>
MIPS: export has_transparent_hugepage() for modules
Dan Carpenter <dan.carpenter(a)oracle.com>
Input: adxl34x - clean up a data type in adxl34x_probe()
Darrick J. Wong <darrick.wong(a)oracle.com>
vfs: remove lockdep bogosity in __sb_start_write
Will Deacon <will(a)kernel.org>
arm64: psci: Avoid printing in cpu_psci_cpu_die()
Jianqun Xu <jay.xu(a)rock-chips.com>
pinctrl: rockchip: enable gpio pclk for rockchip_gpio_to_irq
Ido Schimmel <idosch(a)nvidia.com>
mlxsw: core: Use variable timeout for EMAD retries
Joel Stanley <joel(a)jms.id.au>
net: ftgmac100: Fix crash when removing driver
Ryan Sharpelletti <sharpelletti(a)google.com>
tcp: only postpone PROBE_RTT if RTT is < current min_rtt estimate
Filip Moc <dev(a)moc6.cz>
net: usb: qmi_wwan: Set DTR quirk for MR400
Xin Long <lucien.xin(a)gmail.com>
sctp: change to hold/put transport for proto_unreach_timer
Zhang Changzhong <zhangchangzhong(a)huawei.com>
qlcnic: fix error return code in qlcnic_83xx_restart_hw()
Xie He <xie.he.0141(a)gmail.com>
net: x25: Increase refcnt of "struct x25_neigh" in x25_rx_call_request
Aya Levin <ayal(a)nvidia.com>
net/mlx4_core: Fix init_hca fields offset
Paul Moore <paul(a)paul-moore.com>
netlabel: fix an uninitialized warning in netlbl_unlabel_staticlist()
Paul Moore <paul(a)paul-moore.com>
netlabel: fix our progress tracking in netlbl_unlabel_staticlist()
Florian Fainelli <f.fainelli(a)gmail.com>
net: Have netpoll bring-up DSA management interface
Heiner Kallweit <hkallweit1(a)gmail.com>
net: bridge: add missing counters to ndo_get_stats64 callback
Zhang Changzhong <zhangchangzhong(a)huawei.com>
net: b44: fix error return code in b44_init_one()
Wang Hai <wanghai38(a)huawei.com>
inet_diag: Fix error path to cancel the meseage in inet_req_diag_fill()
Wang Hai <wanghai38(a)huawei.com>
devlink: Add missing genlmsg_cancel() in devlink_nl_sb_port_pool_fill()
Edwin Peer <edwin.peer(a)broadcom.com>
bnxt_en: read EEPROM A2h address using page 0
Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
atm: nicstar: Unmap DMA on send error
Zhang Changzhong <zhangchangzhong(a)huawei.com>
ah6: fix error return code in ah6_input()
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/dts/imx50-evk.dts | 2 +-
arch/arm/boot/dts/imx6qdl-udoo.dtsi | 2 +-
arch/arm64/kernel/psci.c | 5 +--
arch/mips/alchemy/common/clock.c | 9 +++-
arch/mips/mm/tlb-r4k.c | 1 +
arch/powerpc/include/asm/book3s/64/kup-radix.h | 1 +
arch/s390/kernel/perf_cpum_sf.c | 2 +-
arch/x86/kernel/cpu/microcode/intel.c | 48 +---------------------
arch/xtensa/mm/cache.c | 14 +++++++
drivers/atm/nicstar.c | 2 +
drivers/iio/accel/kxcjk-1013.c | 15 ++++---
drivers/input/misc/adxl34x.c | 2 +-
drivers/net/can/dev.c | 2 +-
drivers/net/can/m_can/m_can.c | 4 +-
drivers/net/can/usb/peak_usb/pcan_usb_core.c | 4 +-
drivers/net/ethernet/broadcom/b44.c | 3 +-
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +-
drivers/net/ethernet/faraday/ftgmac100.c | 4 ++
drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +--
drivers/net/ethernet/mellanox/mlx4/fw.h | 4 +-
drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +-
.../net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 3 +-
drivers/net/usb/qmi_wwan.c | 2 +-
drivers/pinctrl/pinctrl-rockchip.c | 2 +
drivers/regulator/core.c | 29 ++++++-------
drivers/regulator/ti-abb-regulator.c | 12 +++++-
drivers/tty/serial/imx.c | 20 ++-------
fs/efivarfs/super.c | 1 +
fs/ext4/ext4.h | 3 +-
fs/libfs.c | 6 ++-
fs/super.c | 33 ++-------------
fs/xfs/libxfs/xfs_rmap_btree.c | 16 ++++----
net/bridge/br_device.c | 1 +
net/core/devlink.c | 6 ++-
net/core/netpoll.c | 22 ++++++++--
net/ipv4/inet_diag.c | 4 +-
net/ipv4/tcp_bbr.c | 2 +-
net/ipv6/ah6.c | 3 +-
net/mac80211/rc80211_minstrel.c | 27 +++---------
net/mac80211/rc80211_minstrel.h | 1 -
net/mac80211/sta_info.c | 14 ++-----
net/netlabel/netlabel_unlabeled.c | 17 +++++---
net/sctp/input.c | 4 +-
net/sctp/sm_sideeffect.c | 4 +-
net/sctp/transport.c | 2 +-
net/x25/af_x25.c | 1 +
sound/core/control.c | 2 +-
sound/pci/mixart/mixart_core.c | 5 +--
tools/perf/builtin-lock.c | 2 +-
50 files changed, 174 insertions(+), 208 deletions(-)