This is a note to let you know that I've just added the patch titled
nvmem: core: add a missing of_node_put
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
>From 63879e2964bceee2aa5bbe8b99ea58bba28bb64f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Date: Fri, 11 Jun 2021 11:23:21 +0100
Subject: nvmem: core: add a missing of_node_put
'for_each_child_of_node' performs an of_node_get on each iteration, so a
return from the middle of the loop requires an of_node_put.
Fixes: e888d445ac33 ("nvmem: resolve cells from DT at registration time")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20210611102321.11509-1-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/nvmem/core.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 4d1c4f83b22f..20799e622b5b 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -690,15 +690,17 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
continue;
if (len < 2 * sizeof(u32)) {
dev_err(dev, "nvmem: invalid reg on %pOF\n", child);
+ of_node_put(child);
return -EINVAL;
}
cell = kzalloc(sizeof(*cell), GFP_KERNEL);
- if (!cell)
+ if (!cell) {
+ of_node_put(child);
return -ENOMEM;
+ }
cell->nvmem = nvmem;
- cell->np = of_node_get(child);
cell->offset = be32_to_cpup(addr++);
cell->bytes = be32_to_cpup(addr);
cell->name = kasprintf(GFP_KERNEL, "%pOFn", child);
@@ -719,11 +721,12 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
cell->name, nvmem->stride);
/* Cells already added will be freed later. */
kfree_const(cell->name);
- of_node_put(cell->np);
kfree(cell);
+ of_node_put(child);
return -EINVAL;
}
+ cell->np = of_node_get(child);
nvmem_cell_add(cell);
}
--
2.32.0
We can deadlock when rmmod'ing the driver or going through firmware
reset, because the cfg80211_unregister_wdev() has to bring down the link
for us, ... which then grab the same wiphy lock.
nl80211_del_interface() already handles a very similar case, with a nice
description:
/*
* We hold RTNL, so this is safe, without RTNL opencount cannot
* reach 0, and thus the rdev cannot be deleted.
*
* We need to do it for the dev_close(), since that will call
* the netdev notifiers, and we need to acquire the mutex there
* but don't know if we get there from here or from some other
* place (e.g. "ip link set ... down").
*/
mutex_unlock(&rdev->wiphy.mtx);
...
Do similarly for mwifiex teardown, by ensuring we bring the link down
first.
Sample deadlock trace:
[ 247.103516] INFO: task rmmod:2119 blocked for more than 123 seconds.
[ 247.110630] Not tainted 5.12.4 #5
[ 247.115796] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 247.124557] task:rmmod state:D stack: 0 pid: 2119 ppid: 2114 flags:0x00400208
[ 247.133905] Call trace:
[ 247.136644] __switch_to+0x130/0x170
[ 247.140643] __schedule+0x714/0xa0c
[ 247.144548] schedule_preempt_disabled+0x88/0xf4
[ 247.149714] __mutex_lock_common+0x43c/0x750
[ 247.154496] mutex_lock_nested+0x5c/0x68
[ 247.158884] cfg80211_netdev_notifier_call+0x280/0x4e0 [cfg80211]
[ 247.165769] raw_notifier_call_chain+0x4c/0x78
[ 247.170742] call_netdevice_notifiers_info+0x68/0xa4
[ 247.176305] __dev_close_many+0x7c/0x138
[ 247.180693] dev_close_many+0x7c/0x10c
[ 247.184893] unregister_netdevice_many+0xfc/0x654
[ 247.190158] unregister_netdevice_queue+0xb4/0xe0
[ 247.195424] _cfg80211_unregister_wdev+0xa4/0x204 [cfg80211]
[ 247.201816] cfg80211_unregister_wdev+0x20/0x2c [cfg80211]
[ 247.208016] mwifiex_del_virtual_intf+0xc8/0x188 [mwifiex]
[ 247.214174] mwifiex_uninit_sw+0x158/0x1b0 [mwifiex]
[ 247.219747] mwifiex_remove_card+0x38/0xa0 [mwifiex]
[ 247.225316] mwifiex_pcie_remove+0xd0/0xe0 [mwifiex_pcie]
[ 247.231451] pci_device_remove+0x50/0xe0
[ 247.235849] device_release_driver_internal+0x110/0x1b0
[ 247.241701] driver_detach+0x5c/0x9c
[ 247.245704] bus_remove_driver+0x84/0xb8
[ 247.250095] driver_unregister+0x3c/0x60
[ 247.254486] pci_unregister_driver+0x2c/0x90
[ 247.259267] cleanup_module+0x18/0xcdc [mwifiex_pcie]
Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/linux-wireless/98392296-40ee-6300-369c-32e16cff3725…
Link: https://lore.kernel.org/linux-wireless/ab4d00ce52f32bd8e45ad0448a44737e@bew…
Reported-by: Maximilian Luz <luzmaximilian(a)gmail.com>
Reported-by: dave(a)bewaar.me
Cc: Johannes Berg <johannes(a)sipsolutions.net>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
---
drivers/net/wireless/marvell/mwifiex/main.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 529dfd8b7ae8..17399d4aa129 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1445,11 +1445,18 @@ static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter)
if (!priv)
continue;
rtnl_lock();
- wiphy_lock(adapter->wiphy);
if (priv->netdev &&
- priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED)
+ priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) {
+ /*
+ * Close the netdev now, because if we do it later, the
+ * netdev notifiers will need to acquire the wiphy lock
+ * again --> deadlock.
+ */
+ dev_close(priv->wdev.netdev);
+ wiphy_lock(adapter->wiphy);
mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev);
- wiphy_unlock(adapter->wiphy);
+ wiphy_unlock(adapter->wiphy);
+ }
rtnl_unlock();
}
--
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: uvcvideo: Fix pixel format change for Elgato Cam Link 4K
Author: Benjamin Drung <bdrung(a)posteo.de>
Date: Sat Jun 5 22:15:36 2021 +0200
The Elgato Cam Link 4K HDMI video capture card reports to support three
different pixel formats, where the first format depends on the connected
HDMI device.
```
$ v4l2-ctl -d /dev/video0 --list-formats-ext
ioctl: VIDIOC_ENUM_FMT
Type: Video Capture
[0]: 'NV12' (Y/CbCr 4:2:0)
Size: Discrete 3840x2160
Interval: Discrete 0.033s (29.970 fps)
[1]: 'NV12' (Y/CbCr 4:2:0)
Size: Discrete 3840x2160
Interval: Discrete 0.033s (29.970 fps)
[2]: 'YU12' (Planar YUV 4:2:0)
Size: Discrete 3840x2160
Interval: Discrete 0.033s (29.970 fps)
```
Changing the pixel format to anything besides the first pixel format
does not work:
```
$ v4l2-ctl -d /dev/video0 --try-fmt-video pixelformat=YU12
Format Video Capture:
Width/Height : 3840/2160
Pixel Format : 'NV12' (Y/CbCr 4:2:0)
Field : None
Bytes per Line : 3840
Size Image : 12441600
Colorspace : sRGB
Transfer Function : Rec. 709
YCbCr/HSV Encoding: Rec. 709
Quantization : Default (maps to Limited Range)
Flags :
```
User space applications like VLC might show an error message on the
terminal in that case:
```
libv4l2: error set_fmt gave us a different result than try_fmt!
```
Depending on the error handling of the user space applications, they
might display a distorted video, because they use the wrong pixel format
for decoding the stream.
The Elgato Cam Link 4K responds to the USB video probe
VS_PROBE_CONTROL/VS_COMMIT_CONTROL with a malformed data structure: The
second byte contains bFormatIndex (instead of being the second byte of
bmHint). The first byte is always zero. The third byte is always 1.
The firmware bug was reported to Elgato on 2020-12-01 and it was
forwarded by the support team to the developers as feature request.
There is no firmware update available since then. The latest firmware
for Elgato Cam Link 4K as of 2021-03-23 has MCU 20.02.19 and FPGA 67.
Therefore correct the malformed data structure for this device. The
change was successfully tested with VLC, OBS, and Chromium using
different pixel formats (YUYV, NV12, YU12), resolutions (3840x2160,
1920x1080), and frame rates (29.970 and 59.940 fps).
Cc: stable(a)vger.kernel.org
Signed-off-by: Benjamin Drung <bdrung(a)posteo.de>
Signed-off-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
drivers/media/usb/uvc/uvc_video.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
---
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index a777b389a66e..e16464606b14 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -127,10 +127,37 @@ int uvc_query_ctrl(struct uvc_device *dev, u8 query, u8 unit,
static void uvc_fixup_video_ctrl(struct uvc_streaming *stream,
struct uvc_streaming_control *ctrl)
{
+ static const struct usb_device_id elgato_cam_link_4k = {
+ USB_DEVICE(0x0fd9, 0x0066)
+ };
struct uvc_format *format = NULL;
struct uvc_frame *frame = NULL;
unsigned int i;
+ /*
+ * The response of the Elgato Cam Link 4K is incorrect: The second byte
+ * contains bFormatIndex (instead of being the second byte of bmHint).
+ * The first byte is always zero. The third byte is always 1.
+ *
+ * The UVC 1.5 class specification defines the first five bits in the
+ * bmHint bitfield. The remaining bits are reserved and should be zero.
+ * Therefore a valid bmHint will be less than 32.
+ *
+ * Latest Elgato Cam Link 4K firmware as of 2021-03-23 needs this fix.
+ * MCU: 20.02.19, FPGA: 67
+ */
+ if (usb_match_one_id(stream->dev->intf, &elgato_cam_link_4k) &&
+ ctrl->bmHint > 255) {
+ u8 corrected_format_index = ctrl->bmHint >> 8;
+
+ uvc_dbg(stream->dev, VIDEO,
+ "Correct USB video probe response from {bmHint: 0x%04x, bFormatIndex: %u} to {bmHint: 0x%04x, bFormatIndex: %u}\n",
+ ctrl->bmHint, ctrl->bFormatIndex,
+ 1, corrected_format_index);
+ ctrl->bmHint = 1;
+ ctrl->bFormatIndex = corrected_format_index;
+ }
+
for (i = 0; i < stream->nformats; ++i) {
if (stream->format[i].index == ctrl->bFormatIndex) {
format = &stream->format[i];
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: uvcvideo: Fix pixel format change for Elgato Cam Link 4K
Author: Benjamin Drung <bdrung(a)posteo.de>
Date: Sat Jun 5 22:15:36 2021 +0200
The Elgato Cam Link 4K HDMI video capture card reports to support three
different pixel formats, where the first format depends on the connected
HDMI device.
```
$ v4l2-ctl -d /dev/video0 --list-formats-ext
ioctl: VIDIOC_ENUM_FMT
Type: Video Capture
[0]: 'NV12' (Y/CbCr 4:2:0)
Size: Discrete 3840x2160
Interval: Discrete 0.033s (29.970 fps)
[1]: 'NV12' (Y/CbCr 4:2:0)
Size: Discrete 3840x2160
Interval: Discrete 0.033s (29.970 fps)
[2]: 'YU12' (Planar YUV 4:2:0)
Size: Discrete 3840x2160
Interval: Discrete 0.033s (29.970 fps)
```
Changing the pixel format to anything besides the first pixel format
does not work:
```
$ v4l2-ctl -d /dev/video0 --try-fmt-video pixelformat=YU12
Format Video Capture:
Width/Height : 3840/2160
Pixel Format : 'NV12' (Y/CbCr 4:2:0)
Field : None
Bytes per Line : 3840
Size Image : 12441600
Colorspace : sRGB
Transfer Function : Rec. 709
YCbCr/HSV Encoding: Rec. 709
Quantization : Default (maps to Limited Range)
Flags :
```
User space applications like VLC might show an error message on the
terminal in that case:
```
libv4l2: error set_fmt gave us a different result than try_fmt!
```
Depending on the error handling of the user space applications, they
might display a distorted video, because they use the wrong pixel format
for decoding the stream.
The Elgato Cam Link 4K responds to the USB video probe
VS_PROBE_CONTROL/VS_COMMIT_CONTROL with a malformed data structure: The
second byte contains bFormatIndex (instead of being the second byte of
bmHint). The first byte is always zero. The third byte is always 1.
The firmware bug was reported to Elgato on 2020-12-01 and it was
forwarded by the support team to the developers as feature request.
There is no firmware update available since then. The latest firmware
for Elgato Cam Link 4K as of 2021-03-23 has MCU 20.02.19 and FPGA 67.
Therefore correct the malformed data structure for this device. The
change was successfully tested with VLC, OBS, and Chromium using
different pixel formats (YUYV, NV12, YU12), resolutions (3840x2160,
1920x1080), and frame rates (29.970 and 59.940 fps).
Cc: stable(a)vger.kernel.org
Signed-off-by: Benjamin Drung <bdrung(a)posteo.de>
Signed-off-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
drivers/media/usb/uvc/uvc_video.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
---
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index a777b389a66e..e16464606b14 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -127,10 +127,37 @@ int uvc_query_ctrl(struct uvc_device *dev, u8 query, u8 unit,
static void uvc_fixup_video_ctrl(struct uvc_streaming *stream,
struct uvc_streaming_control *ctrl)
{
+ static const struct usb_device_id elgato_cam_link_4k = {
+ USB_DEVICE(0x0fd9, 0x0066)
+ };
struct uvc_format *format = NULL;
struct uvc_frame *frame = NULL;
unsigned int i;
+ /*
+ * The response of the Elgato Cam Link 4K is incorrect: The second byte
+ * contains bFormatIndex (instead of being the second byte of bmHint).
+ * The first byte is always zero. The third byte is always 1.
+ *
+ * The UVC 1.5 class specification defines the first five bits in the
+ * bmHint bitfield. The remaining bits are reserved and should be zero.
+ * Therefore a valid bmHint will be less than 32.
+ *
+ * Latest Elgato Cam Link 4K firmware as of 2021-03-23 needs this fix.
+ * MCU: 20.02.19, FPGA: 67
+ */
+ if (usb_match_one_id(stream->dev->intf, &elgato_cam_link_4k) &&
+ ctrl->bmHint > 255) {
+ u8 corrected_format_index = ctrl->bmHint >> 8;
+
+ uvc_dbg(stream->dev, VIDEO,
+ "Correct USB video probe response from {bmHint: 0x%04x, bFormatIndex: %u} to {bmHint: 0x%04x, bFormatIndex: %u}\n",
+ ctrl->bmHint, ctrl->bFormatIndex,
+ 1, corrected_format_index);
+ ctrl->bmHint = 1;
+ ctrl->bFormatIndex = corrected_format_index;
+ }
+
for (i = 0; i < stream->nformats; ++i) {
if (stream->format[i].index == ctrl->bFormatIndex) {
format = &stream->format[i];
The redzone area for SLUB exists between s->object_size and s->inuse
(which is at least the word-aligned object_size). If a cache were created
with an object_size smaller than sizeof(void *), the in-object stored
freelist pointer would overwrite the redzone (e.g. with boot param
"slub_debug=ZF"):
BUG test (Tainted: G B ): Right Redzone overwritten
-----------------------------------------------------------------------------
INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb
INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200
INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620
Redzone (____ptrval____): bb bb bb bb bb bb bb bb ........
Object (____ptrval____): f6 f4 a5 40 1d e8 ...@..
Redzone (____ptrval____): 1a aa ..
Padding (____ptrval____): 00 00 00 00 00 00 00 00 ........
Store the freelist pointer out of line when object_size is smaller than
sizeof(void *) and redzoning is enabled.
Additionally remove the "smaller than sizeof(void *)" check under
CONFIG_DEBUG_VM in kmem_cache_sanity_check() as it is now redundant:
SLAB and SLOB both handle small sizes.
(Note that no caches within this size range are known to exist in the
kernel currently.)
Fixes: 81819f0fc828 ("SLUB core")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kees Cook <keescook(a)chromium.org>
---
mm/slab_common.c | 3 +--
mm/slub.c | 8 +++++---
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a4a571428c51..7cab77655f11 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size);
#ifdef CONFIG_DEBUG_VM
static int kmem_cache_sanity_check(const char *name, unsigned int size)
{
- if (!name || in_interrupt() || size < sizeof(void *) ||
- size > KMALLOC_MAX_SIZE) {
+ if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
pr_err("kmem_cache_create(%s) integrity check failed\n", name);
return -EINVAL;
}
diff --git a/mm/slub.c b/mm/slub.c
index f91d9fe7d0d8..f58cfd456548 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3734,15 +3734,17 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*/
s->inuse = size;
- if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
- s->ctor)) {
+ if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+ ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
+ s->ctor) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
* kmem_cache_free.
*
* This is the case if we do RCU, have a constructor or
- * destructor or are poisoning the objects.
+ * destructor, are poisoning the objects, or are
+ * redzoning an object smaller than sizeof(void *).
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the
--
2.25.1
Since LLVM commit 3787ee4, the '-stack-alignment' flag has been dropped [1],
leading to the following error message when building a LTO kernel with
Clang-13 and LLD-13:
ld.lld: error: -plugin-opt=-: ld.lld: Unknown command line argument
'-stack-alignment=8'. Try 'ld.lld --help'
ld.lld: Did you mean '--stackrealign=8'?
It also appears that the '-code-model' flag is not necessary anymore starting
with LLVM-9 [2].
Drop '-code-model' and make '-stack-alignment' conditional on LLD < 13.0.0.
This is for linux-stable 5.12.
Another patch will be submitted for 5.13 shortly (unless there are objections).
Discussion: https://github.com/ClangBuiltLinux/linux/issues/1377
[1]: https://reviews.llvm.org/D103048
[2]: https://reviews.llvm.org/D52322
Signed-off-by: Tor Vic <torvic9(a)mailbox.org>
---
arch/x86/Makefile | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f2e5bf..2855a1a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -192,8 +192,9 @@ endif
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
- -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
endif
ifdef CONFIG_X86_NEED_RELOCS
--
2.32.0
try_grab_compound_head() is used to grab a reference to a page from
get_user_pages_fast(), which is only protected against concurrent
freeing of page tables (via local_irq_save()), but not against
concurrent TLB flushes, freeing of data pages, or splitting of compound
pages.
Because no reference is held to the page when try_grab_compound_head()
is called, the page may have been freed and reallocated by the time its
refcount has been elevated; therefore, once we're holding a stable
reference to the page, the caller re-checks whether the PTE still points
to the same page (with the same access rights).
The problem is that try_grab_compound_head() has to grab a reference on
the head page; but between the time we look up what the head page is and
the time we actually grab a reference on the head page, the compound
page may have been split up (either explicitly through split_huge_page()
or by freeing the compound page to the buddy allocator and then
allocating its individual order-0 pages).
If that happens, get_user_pages_fast() may end up returning the right
page but lifting the refcount on a now-unrelated page, leading to
use-after-free of pages.
To fix it:
Re-check whether the pages still belong together after lifting the
refcount on the head page.
Move anything else that checks compound_head(page) below the refcount
increment.
This can't actually happen on bare-metal x86 (because there, disabling
IRQs locks out remote TLB flushes), but it can happen on virtualized x86
(e.g. under KVM) and probably also on arm64. The race window is pretty
narrow, and constantly allocating and shattering hugepages isn't exactly
fast; for now I've only managed to reproduce this in an x86 KVM guest with
an artificially widened timing window (by adding a loop that repeatedly
calls `inl(0x3f8 + 5)` in `try_get_compound_head()` to force VM exits,
so that PV TLB flushes are used instead of IPIs).
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Kirill A. Shutemov <kirill(a)shutemov.name>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: stable(a)vger.kernel.org
Fixes: 7aef4172c795 ("mm: handle PTE-mapped tail pages in gerneric fast gup implementaiton")
Signed-off-by: Jann Horn <jannh(a)google.com>
---
mm/gup.c | 54 +++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 15 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index 3ded6a5f26b2..1f9c0ac15073 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -43,8 +43,21 @@ static void hpage_pincount_sub(struct page *page, int refs)
atomic_sub(refs, compound_pincount_ptr(page));
}
+/* Equivalent to calling put_page() @refs times. */
+static void put_page_refs(struct page *page, int refs)
+{
+ VM_BUG_ON_PAGE(page_ref_count(page) < refs, page);
+ /*
+ * Calling put_page() for each ref is unnecessarily slow. Only the last
+ * ref needs a put_page().
+ */
+ if (refs > 1)
+ page_ref_sub(page, refs - 1);
+ put_page(page);
+}
+
/*
* Return the compound head page with ref appropriately incremented,
* or NULL if that failed.
*/
@@ -55,8 +68,23 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
if (WARN_ON_ONCE(page_ref_count(head) < 0))
return NULL;
if (unlikely(!page_cache_add_speculative(head, refs)))
return NULL;
+
+ /*
+ * At this point we have a stable reference to the head page; but it
+ * could be that between the compound_head() lookup and the refcount
+ * increment, the compound page was split, in which case we'd end up
+ * holding a reference on a page that has nothing to do with the page
+ * we were given anymore.
+ * So now that the head page is stable, recheck that the pages still
+ * belong together.
+ */
+ if (unlikely(compound_head(page) != head)) {
+ put_page_refs(head, refs);
+ return NULL;
+ }
+
return head;
}
/*
@@ -94,25 +122,28 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
if (unlikely((flags & FOLL_LONGTERM) &&
!is_pinnable_page(page)))
return NULL;
+ /*
+ * CAUTION: Don't use compound_head() on the page before this
+ * point, the result won't be stable.
+ */
+ page = try_get_compound_head(page, refs);
+ if (!page)
+ return NULL;
+
/*
* When pinning a compound page of order > 1 (which is what
* hpage_pincount_available() checks for), use an exact count to
* track it, via hpage_pincount_add/_sub().
*
* However, be sure to *also* increment the normal page refcount
* field at least once, so that the page really is pinned.
*/
- if (!hpage_pincount_available(page))
- refs *= GUP_PIN_COUNTING_BIAS;
-
- page = try_get_compound_head(page, refs);
- if (!page)
- return NULL;
-
if (hpage_pincount_available(page))
hpage_pincount_add(page, refs);
+ else
+ page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
orig_refs);
@@ -134,16 +165,9 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
else
refs *= GUP_PIN_COUNTING_BIAS;
}
- VM_BUG_ON_PAGE(page_ref_count(page) < refs, page);
- /*
- * Calling put_page() for each ref is unnecessarily slow. Only the last
- * ref needs a put_page().
- */
- if (refs > 1)
- page_ref_sub(page, refs - 1);
- put_page(page);
+ put_page_refs(page, refs);
}
/**
* try_grab_page() - elevate a page's refcount by a flag-dependent amount
base-commit: 614124bea77e452aa6df7a8714e8bc820b489922
--
2.32.0.272.g935e593368-goog
This is the start of the stable review cycle for the 4.19.194 release.
There are 58 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 10 Jun 2021 17:59:18 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.194-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.194-rc1
Jan Beulich <jbeulich(a)suse.com>
xen-pciback: redo VF placement in the virtual topology
Cheng Jian <cj.chengjian(a)huawei.com>
sched/fair: Optimize select_idle_cpu
Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
ACPI: EC: Look for ECDT EC after calling acpi_load_tables()
Erik Schmauss <erik.schmauss(a)intel.com>
ACPI: probe ECDT before loading AML tables regardless of module-level code flag
Marc Zyngier <maz(a)kernel.org>
KVM: arm64: Fix debug register indexing
Sean Christopherson <seanjc(a)google.com>
KVM: SVM: Truncate GPR value for DR and CR accesses in !64-bit mode
Anand Jain <anand.jain(a)oracle.com>
btrfs: fix unmountable seed device after fstrim
Song Liu <songliubraving(a)fb.com>
perf/core: Fix corner case in perf_rotate_context()
Ian Rogers <irogers(a)google.com>
perf/cgroups: Don't rotate events for cgroups unnecessarily
Michael Chan <michael.chan(a)broadcom.com>
bnxt_en: Remove the setting of dev_port.
Björn Töpel <bjorn.topel(a)gmail.com>
selftests/bpf: Avoid running unprivileged tests with alignment requirements
Björn Töpel <bjorn.topel(a)gmail.com>
selftests/bpf: add "any alignment" annotation for some tests
David S. Miller <davem(a)davemloft.net>
bpf: Apply F_NEEDS_EFFICIENT_UNALIGNED_ACCESS to more ACCEPT test cases.
David S. Miller <davem(a)davemloft.net>
bpf: Make more use of 'any' alignment in test_verifier.c
David S. Miller <davem(a)davemloft.net>
bpf: Adjust F_NEEDS_EFFICIENT_UNALIGNED_ACCESS handling in test_verifier.c
David S. Miller <davem(a)davemloft.net>
bpf: Add BPF_F_ANY_ALIGNMENT.
Joe Stringer <joe(a)wand.net.nz>
selftests/bpf: Generalize dummy program types
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: test make sure to run unpriv test cases in test_verifier
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: fix test suite to enable all unpriv program types
Mina Almasry <almasrymina(a)google.com>
mm, hugetlb: fix simple resv_huge_pages underflow on UFFDIO_COPY
Josef Bacik <josef(a)toxicpanda.com>
btrfs: fixup error handling in fixup_inode_link_counts
Josef Bacik <josef(a)toxicpanda.com>
btrfs: return errors from btrfs_del_csums in cleanup_ref_head
Josef Bacik <josef(a)toxicpanda.com>
btrfs: fix error handling in btrfs_del_csums
Josef Bacik <josef(a)toxicpanda.com>
btrfs: mark ordered extent and inode with error if we fail to finish
Thomas Gleixner <tglx(a)linutronix.de>
x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect
Junxiao Bi <junxiao.bi(a)oracle.com>
ocfs2: fix data corruption by fallocate
Mark Rutland <mark.rutland(a)arm.com>
pid: take a reference when initializing `cad_pid`
Phil Elwell <phil(a)raspberrypi.com>
usb: dwc2: Fix build in periphal-only mode
Ye Bin <yebin10(a)huawei.com>
ext4: fix bug on in ext4_es_cache_extent as ext4_split_extent_at failed
Marek Vasut <marex(a)denx.de>
ARM: dts: imx6q-dhcom: Add PU,VDD1P1,VDD2P5 regulators
Carlos M <carlos.marr.pz(a)gmail.com>
ALSA: hda: Fix for mute key LED for HP Pavilion 15-CK0xx
Takashi Iwai <tiwai(a)suse.de>
ALSA: timer: Fix master timer notification
Ahelenia Ziemiańska <nabijaczleweli(a)nabijaczleweli.xyz>
HID: multitouch: require Finger field to mark Win8 reports as MT
Pavel Skripkin <paskripkin(a)gmail.com>
net: caif: fix memory leak in cfusbl_device_notify
Pavel Skripkin <paskripkin(a)gmail.com>
net: caif: fix memory leak in caif_device_notify
Pavel Skripkin <paskripkin(a)gmail.com>
net: caif: add proper error handling
Pavel Skripkin <paskripkin(a)gmail.com>
net: caif: added cfserl_release function
Lin Ma <linma(a)zju.edu.cn>
Bluetooth: use correct lock to prevent UAF of hdev object
Lin Ma <linma(a)zju.edu.cn>
Bluetooth: fix the erroneous flush_work() order
Hoang Le <hoang.h.le(a)dektech.com.au>
tipc: fix unique bearer names sanity check
Hoang Le <hoang.h.le(a)dektech.com.au>
tipc: add extack messages for bearer/media failure
Magnus Karlsson <magnus.karlsson(a)intel.com>
ixgbevf: add correct exception tracing for XDP
Wei Yongjun <weiyongjun1(a)huawei.com>
ieee802154: fix error return code in ieee802154_llsec_getparams()
Zhen Lei <thunder.leizhen(a)huawei.com>
ieee802154: fix error return code in ieee802154_add_iface()
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nfnetlink_cthelper: hit EBUSY on updates if size mismatches
Arnd Bergmann <arnd(a)arndb.de>
HID: i2c-hid: fix format string mismatch
Zhen Lei <thunder.leizhen(a)huawei.com>
HID: pidff: fix error return code in hid_pidff_init()
Julian Anastasov <ja(a)ssi.bg>
ipvs: ignore IP_VS_SVC_F_HASHED flag when adding service
Max Gurtovoy <mgurtovoy(a)nvidia.com>
vfio/platform: fix module_put call in error flow
Wei Yongjun <weiyongjun1(a)huawei.com>
samples: vfio-mdev: fix error handing in mdpy_fb_probe()
Randy Dunlap <rdunlap(a)infradead.org>
vfio/pci: zap_vma_ptes() needs MMU
Zhen Lei <thunder.leizhen(a)huawei.com>
vfio/pci: Fix error return code in vfio_ecap_init()
Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
efi: cper: fix snprintf() use in cper_dimm_err_location()
Heiner Kallweit <hkallweit1(a)gmail.com>
efi: Allow EFI_MEMORY_XP and EFI_MEMORY_RO both to be cleared
Anant Thazhemadam <anant.thazhemadam(a)gmail.com>
nl80211: validate key indexes for cfg80211_registered_device
Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
ALSA: usb: update old-style static const declaration
Grant Grundler <grundler(a)chromium.org>
net: usb: cdc_ncm: don't spew notifications
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/dts/imx6q-dhcom-som.dtsi | 12 ++
arch/arm64/kvm/sys_regs.c | 42 ++--
arch/x86/include/asm/apic.h | 1 +
arch/x86/kernel/apic/apic.c | 1 +
arch/x86/kernel/apic/vector.c | 20 ++
arch/x86/kvm/svm.c | 8 +-
drivers/acpi/bus.c | 42 ++--
drivers/firmware/efi/cper.c | 4 +-
drivers/firmware/efi/memattr.c | 5 -
drivers/hid/hid-multitouch.c | 10 +-
drivers/hid/i2c-hid/i2c-hid-core.c | 4 +-
drivers/hid/usbhid/hid-pidff.c | 1 +
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 -
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +
drivers/net/usb/cdc_ncm.c | 12 +-
drivers/usb/dwc2/core_intr.c | 4 +
drivers/vfio/pci/Kconfig | 1 +
drivers/vfio/pci/vfio_pci_config.c | 2 +-
drivers/vfio/platform/vfio_platform_common.c | 2 +-
drivers/xen/xen-pciback/vpci.c | 14 +-
fs/btrfs/extent-tree.c | 12 +-
fs/btrfs/file-item.c | 10 +-
fs/btrfs/inode.c | 12 ++
fs/btrfs/tree-log.c | 13 +-
fs/ext4/extents.c | 43 +++--
fs/ocfs2/file.c | 55 +++++-
include/linux/perf_event.h | 5 +
include/linux/usb/usbnet.h | 2 +
include/net/caif/caif_dev.h | 2 +-
include/net/caif/cfcnfg.h | 2 +-
include/net/caif/cfserl.h | 1 +
include/uapi/linux/bpf.h | 14 ++
init/main.c | 2 +-
kernel/bpf/syscall.c | 7 +-
kernel/bpf/verifier.c | 3 +
kernel/events/core.c | 62 +++---
kernel/sched/fair.c | 7 +-
mm/hugetlb.c | 14 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_sock.c | 4 +-
net/caif/caif_dev.c | 13 +-
net/caif/caif_usb.c | 14 +-
net/caif/cfcnfg.c | 16 +-
net/caif/cfserl.c | 5 +
net/ieee802154/nl-mac.c | 4 +-
net/ieee802154/nl-phy.c | 4 +-
net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
net/netfilter/nfnetlink_cthelper.c | 8 +-
net/nfc/llcp_sock.c | 2 +
net/tipc/bearer.c | 94 ++++++---
net/wireless/core.h | 2 +
net/wireless/nl80211.c | 7 +-
net/wireless/util.c | 39 +++-
samples/vfio-mdev/mdpy-fb.c | 13 +-
sound/core/timer.c | 3 +-
sound/pci/hda/patch_realtek.c | 1 +
sound/usb/mixer_quirks.c | 2 +-
tools/include/uapi/linux/bpf.h | 14 ++
tools/lib/bpf/bpf.c | 8 +-
tools/lib/bpf/bpf.h | 2 +-
tools/testing/selftests/bpf/test_align.c | 4 +-
tools/testing/selftests/bpf/test_verifier.c | 224 ++++++++++++++++------
63 files changed, 676 insertions(+), 275 deletions(-)