This commit resolves an issue in the tegra-xudc USB gadget driver that
incorrectly fetched USB3 PHY instances. The problem stemmed from the
assumption of a one-to-one correspondence between USB2 and USB3 PHY
names and their association with physical USB ports in the device tree.
Previously, the driver associated USB3 PHY names directly with the USB3
instance number, leading to mismatches when mapping the physical USB
ports. For instance, if using USB3-1 PHY, the driver expect the
corresponding PHY name as 'usb3-1'. However, the physical USB ports in
the device tree were designated as USB2-0 and USB3-0 as we only have
one device controller, causing a misalignment.
This commit rectifies the issue by adjusting the PHY naming logic.
Now, the driver correctly correlates the USB2 and USB3 PHY instances,
allowing the USB2-0 and USB3-1 PHYs to form a physical USB port pair
while accurately reflecting their configuration in the device tree by
naming them USB2-0 and USB3-0, respectively.
The change ensures that the PHY and PHY names align appropriately,
resolving the mismatch between physical USB ports and their associated
names in the device tree.
Fixes: b4e19931c98a ("usb: gadget: tegra-xudc: Support multiple device modes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wayne Chang <waynec(a)nvidia.com>
---
V1 -> V2:no change
drivers/usb/gadget/udc/tegra-xudc.c | 39 ++++++++++++++++++-----------
1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index cb85168fd00c..7aa46d426f31 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -3491,8 +3491,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc)
static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
{
- int err = 0, usb3;
- unsigned int i;
+ int err = 0, usb3_companion_port;
+ unsigned int i, j;
xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys,
sizeof(*xudc->utmi_phy), GFP_KERNEL);
@@ -3520,7 +3520,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
if (IS_ERR(xudc->utmi_phy[i])) {
err = PTR_ERR(xudc->utmi_phy[i]);
dev_err_probe(xudc->dev, err,
- "failed to get usb2-%d PHY\n", i);
+ "failed to get PHY for phy-name usb2-%d\n", i);
goto clean_up;
} else if (xudc->utmi_phy[i]) {
/* Get usb-phy, if utmi phy is available */
@@ -3539,19 +3539,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
}
/* Get USB3 phy */
- usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
- if (usb3 < 0)
+ usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
+ if (usb3_companion_port < 0)
continue;
- snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3);
- xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
- if (IS_ERR(xudc->usb3_phy[i])) {
- err = PTR_ERR(xudc->usb3_phy[i]);
- dev_err_probe(xudc->dev, err,
- "failed to get usb3-%d PHY\n", usb3);
- goto clean_up;
- } else if (xudc->usb3_phy[i])
- dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3);
+ for (j = 0; j < xudc->soc->num_phys; j++) {
+ snprintf(phy_name, sizeof(phy_name), "usb3-%d", j);
+ xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
+ if (IS_ERR(xudc->usb3_phy[i])) {
+ err = PTR_ERR(xudc->usb3_phy[i]);
+ dev_err_probe(xudc->dev, err,
+ "failed to get PHY for phy-name usb3-%d\n", j);
+ goto clean_up;
+ } else if (xudc->usb3_phy[i]) {
+ int usb2_port =
+ tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]);
+ int usb3_port =
+ tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]);
+ if (usb3_port == usb3_companion_port) {
+ dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n",
+ usb2_port, usb3_port, i);
+ break;
+ }
+ }
+ }
}
return err;
--
2.25.1
when AOP_WRITEPAGE_ACTIVATE is returned (as NFS does when it detects
congestion) it is important that the folio is redirtied.
nfs_writepage_locked() doesn't do this, so files can become corrupted as
writes can be lost.
Note that this is not needed in v6.8 as AOP_WRITEPAGE_ACTIVATE cannot be
returned. It is needed for kernels v5.18..v6.7. Prior to 6.3 the patch
is different as it needs to mention "page", not "folio".
Reported-and-tested-by: Jacek Tomaka <Jacek.Tomaka(a)poczta.fm>
Fixes: 6df25e58532b ("nfs: remove reliance on bdi congestion")
Signed-off-by: NeilBrown <neilb(a)suse.de>
---
fs/nfs/write.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b664caea8b4e..9e345d3c305a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -668,8 +668,10 @@ static int nfs_writepage_locked(struct folio *folio,
int err;
if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
+ NFS_SERVER(inode)->write_congested) {
+ folio_redirty_for_writepage(wbc, folio);
return AOP_WRITEPAGE_ACTIVATE;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0, false,
--
2.43.0
The commit 80dd33cf72d1 ("drivers: base: Fix device link removal")
introduces a workqueue to release the consumer and supplier devices used
in the devlink.
In the job queued, devices are release and in turn, when all the
references to these devices are dropped, the release function of the
device itself is called.
Nothing is present to provide some synchronisation with this workqueue
in order to ensure that all ongoing releasing operations are done and
so, some other operations can be started safely.
For instance, in the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are released and related devlinks are removed
(jobs pushed in the workqueue).
During the step 2, OF nodes are destroyed but, without any
synchronisation with devlink removal jobs, of_overlay_remove() can raise
warnings related to missing of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2
Indeed, the missing of_node_put() call is going to be done, too late,
from the workqueue job execution.
Introduce device_link_wait_removal() to offer a way to synchronize
operations waiting for the end of devlink removals (i.e. end of
workqueue jobs).
Also, as a flushing operation is done on the workqueue, the workqueue
used is moved from a system-wide workqueue to a local one.
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
Tested-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
---
drivers/base/core.c | 26 +++++++++++++++++++++++---
include/linux/device.h | 1 +
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d5f4e4aac09b..48b28c59c592 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -532,12 +533,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4099,9 +4114,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/include/linux/device.h b/include/linux/device.h
index 1795121dee9a..d7d8305a72e8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1249,6 +1249,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
--
2.43.0
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_changeset_destroy() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
Reviewed-by: Saravana Kannan <saravanak(a)google.com>
Tested-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
---
drivers/of/dynamic.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f..4d57a4e34105 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * When a device is deleted, the device links to/from it are also queued
+ * for deletion. Until these device links are freed, the devices
+ * themselves aren't freed. If the device being deleted is due to an
+ * overlay change, this device might be holding a reference to a device
+ * node that will be freed. So, wait until all already pending device
+ * links are deleted before freeing a device node. This ensures we don't
+ * free any device node that has a non-zero reference count.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
--
2.43.0
Hello,
54d217406afe250d7a768783baaa79a035f21d38 fixed an issue in
drm_dp_add_payload_part2 that lead to a NULL pointer dereference in
case state is NULL.
The change was (accidentally?) reverted in
5aa1dfcdf0a429e4941e2eef75b006a8c7a8ac49 and the problem reappeared.
The issue is rather spurious, but I've had it appear when unplugging a
thunderbolt dock.
#regzbot introduced 5aa1dfcdf0a429e4941e2eef75b006a8c7a8ac49
There are reports that since version 6.7 update-grub fails to find the
device of the root on systems without initrd and on a single device.
This looks like the device name changed in the output of
/proc/self/mountinfo:
6.5-rc5 working
18 1 0:16 / / rw,noatime - btrfs /dev/sda8 ...
6.7 not working:
17 1 0:15 / / rw,noatime - btrfs /dev/root ...
and "update-grub" shows this error:
/usr/sbin/grub-probe: error: cannot find a device for / (is /dev mounted?)
This looks like it's related to the device name, but grub-probe
recognizes the "/dev/root" path and tries to find the underlying device.
However there's a special case for some filesystems, for btrfs in
particular.
The generic root device detection heuristic is not done and it all
relies on reading the device infos by a btrfs specific ioctl. This ioctl
returns the device name as it was saved at the time of device scan (in
this case it's /dev/root).
The change in 6.7 for temp_fsid to allow several single device
filesystem to exist with the same fsid (and transparently generate a new
UUID at mount time) was to skip caching/registering such devices.
This also skipped mounted device. One step of scanning is to check if
the device name hasn't changed, and if yes then update the cached value.
This broke the grub-probe as it always read the device /dev/root and
couldn't find it in the system. A temporary workaround is to create a
symlink but this does not survive reboot.
The right fix is to allow updating the device path of a mounted
filesystem even if this is a single device one.
In the fix, check if the device's major:minor number matches with the
cached device. If they do, then we can allow the scan to happen so that
device_list_add() can take care of updating the device path. The file
descriptor remains unchanged.
This does not affect the temp_fsid feature, the UUID of the mounted
filesystem remains the same and the matching is based on device major:minor
which is unique per mounted filesystem.
This covers the path when the device (that exists for all mounted
devices) name changes, updating /dev/root to /dev/sdx. Any other single
device with filesystem and is not mounted is still skipped.
Note that if a system is booted and initial mount is done on the
/dev/root device, this will be the cached name of the device. Only after
the command "btrfs device scan" it will change as it triggers the
rename.
The fix was verified by users whose systems were affected.
CC: stable(a)vger.kernel.org # 6.7+
Fixes: bc27d6f0aa0e ("btrfs: scan but don't register device on single device filesystem")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=218353
Link: https://lore.kernel.org/lkml/CAKLYgeJ1tUuqLcsquwuFqjDXPSJpEiokrWK2gisPKDZLs…
Signed-off-by: Anand Jain <anand.jain(a)oracle.com>
Tested-by: Alex Romosan <aromosan(a)gmail.com>
Tested-by: CHECK_1234543212345(a)protonmail.com
---
v2:
Updated git commit log from [PATCH] with permission. Thx.
[PATCH] btrfs: always scan a single device when mounted
Add Tested-by.
fs/btrfs/volumes.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 474ab7ed65ea..192c540a650c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1299,6 +1299,31 @@ int btrfs_forget_devices(dev_t devt)
return ret;
}
+static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
+ dev_t devt, bool mount_arg_dev)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+ struct btrfs_device *device;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (device->devt == devt) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+ return false;
+ }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ }
+
+ if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
+ !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING))
+ return true;
+
+ return false;
+}
+
/*
* Look for a btrfs signature on a device. This may be called out of the mount path
* and we are not allowed to call set_blocksize during the scan. The superblock
@@ -1316,6 +1341,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
struct btrfs_device *device = NULL;
struct bdev_handle *bdev_handle;
u64 bytenr, bytenr_orig;
+ dev_t devt = 0;
int ret;
lockdep_assert_held(&uuid_mutex);
@@ -1355,18 +1381,16 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
goto error_bdev_put;
}
- if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
- !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) {
- dev_t devt;
+ ret = lookup_bdev(path, &devt);
+ if (ret)
+ btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
+ path, ret);
- ret = lookup_bdev(path, &devt);
- if (ret)
- btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
- path, ret);
- else
+ if (btrfs_skip_registration(disk_super, devt, mount_arg_dev)) {
+ pr_debug("BTRFS: skip registering single non-seed device %s\n",
+ path);
+ if (devt)
btrfs_free_stale_devices(devt, NULL);
-
- pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
device = NULL;
goto free_disk_super;
}
--
2.39.3
This commit resolves an issue in the tegra-xudc USB gadget driver that
incorrectly fetched USB3 PHY instances. The problem stemmed from the
assumption of a one-to-one correspondence between USB2 and USB3 PHY
names and their association with physical USB ports in the device tree.
Previously, the driver associated USB3 PHY names directly with the USB3
instance number, leading to mismatches when mapping the physical USB
ports. For instance, if using USB3-1 PHY, the driver expect the
corresponding PHY name as 'usb3-1'. However, the physical USB ports in
the device tree were designated as USB2-0 and USB3-0 as we only have
one device controller, causing a misalignment.
This commit rectifies the issue by adjusting the PHY naming logic.
Now, the driver correctly correlates the USB2 and USB3 PHY instances,
allowing the USB2-0 and USB3-1 PHYs to form a physical USB port pair
while accurately reflecting their configuration in the device tree by
naming them USB2-0 and USB3-0, respectively.
The change ensures that the PHY and PHY names align appropriately,
resolving the mismatch between physical USB ports and their associated
names in the device tree.
Fixes: b4e19931c98a ("usb: gadget: tegra-xudc: Support multiple device modes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wayne Chang <waynec(a)nvidia.com>
---
drivers/usb/gadget/udc/tegra-xudc.c | 39 ++++++++++++++++++-----------
1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index cb85168fd00c..7aa46d426f31 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -3491,8 +3491,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc)
static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
{
- int err = 0, usb3;
- unsigned int i;
+ int err = 0, usb3_companion_port;
+ unsigned int i, j;
xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys,
sizeof(*xudc->utmi_phy), GFP_KERNEL);
@@ -3520,7 +3520,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
if (IS_ERR(xudc->utmi_phy[i])) {
err = PTR_ERR(xudc->utmi_phy[i]);
dev_err_probe(xudc->dev, err,
- "failed to get usb2-%d PHY\n", i);
+ "failed to get PHY for phy-name usb2-%d\n", i);
goto clean_up;
} else if (xudc->utmi_phy[i]) {
/* Get usb-phy, if utmi phy is available */
@@ -3539,19 +3539,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
}
/* Get USB3 phy */
- usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
- if (usb3 < 0)
+ usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
+ if (usb3_companion_port < 0)
continue;
- snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3);
- xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
- if (IS_ERR(xudc->usb3_phy[i])) {
- err = PTR_ERR(xudc->usb3_phy[i]);
- dev_err_probe(xudc->dev, err,
- "failed to get usb3-%d PHY\n", usb3);
- goto clean_up;
- } else if (xudc->usb3_phy[i])
- dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3);
+ for (j = 0; j < xudc->soc->num_phys; j++) {
+ snprintf(phy_name, sizeof(phy_name), "usb3-%d", j);
+ xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
+ if (IS_ERR(xudc->usb3_phy[i])) {
+ err = PTR_ERR(xudc->usb3_phy[i]);
+ dev_err_probe(xudc->dev, err,
+ "failed to get PHY for phy-name usb3-%d\n", j);
+ goto clean_up;
+ } else if (xudc->usb3_phy[i]) {
+ int usb2_port =
+ tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]);
+ int usb3_port =
+ tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]);
+ if (usb3_port == usb3_companion_port) {
+ dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n",
+ usb2_port, usb3_port, i);
+ break;
+ }
+ }
+ }
}
return err;
--
2.25.1
From: Xiubo Li <xiubli(a)redhat.com>
The osd code has remove cursor initilizing code and this will make
the sparse read state into a infinite loop. We should initialize
the cursor just before each sparse-read in messnger v2.
Cc: stable(a)vger.kernel.org
URL: https://tracker.ceph.com/issues/64607
Fixes: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket")
Reported-by: Luis Henriques <lhenriques(a)suse.de>
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
V2:
- Just removed the unnecessary 'sparse_read_total' check.
net/ceph/messenger_v2.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index a0ca5414b333..ab3ab130a911 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
+ con->in_msg->sparse_read_total);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
--
2.43.0
This is the start of the stable review cycle for the 5.15.151 release.
There are 83 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 07 Mar 2024 11:31:11 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.151-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.151-rc2
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Gal Pressman <gal(a)nvidia.com>
Revert "tls: rx: move counting TlsDecryptErrors for sync"
Jakub Kicinski <kuba(a)kernel.org>
net: tls: fix async vs NIC crypto offload
Martynas Pumputis <m(a)lambda.lt>
bpf: Derive source IP addr via bpf_*_fib_lookup()
Louis DeLosSantos <louis.delos.devel(a)gmail.com>
bpf: Add table ID to bpf_fib_lookup BPF helper
Martin KaFai Lau <martin.lau(a)kernel.org>
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Teach lockdep about icc_bw_lock order"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Fix locking for runpm vs reclaim"
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Drop oob_skb ref before purging queue in GC.
Max Krummenacher <max.krummenacher(a)toradex.com>
Revert "drm/bridge: lt8912b: Register and attach our DSI device at probe"
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Abeni <pabeni(a)redhat.com>
mptcp: push at DSS boundaries
Geliang Tang <tanggeliang(a)kylinos.cn>
mptcp: add needs_id for netlink appending addr
Jean Sacren <sakiwit(a)gmail.com>
mptcp: clean up harmless false expressions
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter in v6
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter
Paolo Abeni <pabeni(a)redhat.com>
mptcp: rename timer related helper to less confusing names
Paolo Abeni <pabeni(a)redhat.com>
mptcp: process pending subflow error on close
Paolo Abeni <pabeni(a)redhat.com>
mptcp: move __mptcp_error_report in protocol.c
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Tadeusz Struk <tstruk(a)gigaio.com>
dmaengine: ptdma: use consistent DMA masks
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: firewire-lib: fix to check cycle continuity
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
fbcon: always restore the old font data in fbcon_do_set_font()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Sabrina Dubroca <sd(a)queasysnail.net>
tls: decrement decrypt_pending if no async completion will be called
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: use async as an in-out argument
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: assume crypto always calls our callback
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: move counting TlsDecryptErrors for sync
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't track the async count
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: factor out writing ContentType to cmsg
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: wrap decryption arguments in a structure
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't report text length from the bowels of decrypt
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: drop unnecessary arguments from tls_setup_from_iter()
Jakub Kicinski <kuba(a)kernel.org>
tls: hw: rx: use return value of tls_device_decrypted() to carry status
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: refactor decrypt_skb_update()
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't issue wake ups when data is decrypted
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the decryption status in socket context
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the record type in socket context
Oleksij Rempel <linux(a)rempel-privat.de>
igb: extend PTP timestamp adjustments to i211
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Florian Westphal <fw(a)strlen.de>
netfilter: bridge: confirm multicast packets before passing them up the stack
Florian Westphal <fw(a)strlen.de>
netfilter: let reset rules clean out conntrack entries
Florian Westphal <fw(a)strlen.de>
netfilter: make function op structures const
Florian Westphal <fw(a)strlen.de>
netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
Florian Westphal <fw(a)strlen.de>
netfilter: nfnetlink_queue: silence bogus compiler warning
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Jakub Raczynski <j.raczynski(a)samsung.com>
stmmac: Clear variable when destroying workqueue
Justin Iurman <justin.iurman(a)uliege.be>
uapi: in6: replace temporary label with rfc9486
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Jakub Kicinski <kuba(a)kernel.org>
veth: try harder when allocating queue memory
Vasily Averin <vvs(a)openvz.org>
net: enable memcg accounting for veth queues
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Jakub Kicinski <kuba(a)kernel.org>
net: veth: clear GRO when clearing XDP even when down
Doug Smythies <dsmythies(a)telus.net>
cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: disallow timeout for anonymous sets
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/pgtable.h | 2 +-
arch/x86/kernel/cpu/intel.c | 178 ++++++------
drivers/cpufreq/intel_pstate.c | 3 +
drivers/dma/fsl-qdma.c | 25 +-
drivers/dma/ptdma/ptdma-dmaengine.c | 2 -
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 +-
drivers/interconnect/core.c | 18 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 +++-
drivers/mtd/nand/spi/gigadevice.c | 6 +-
drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +-
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/net/veth.c | 40 +--
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
drivers/video/fbdev/core/fbcon.c | 8 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
include/linux/netfilter.h | 14 +-
include/net/ipv6_stubs.h | 5 +
include/net/netfilter/nf_conntrack.h | 8 +
include/net/strparser.h | 4 +
include/net/tls.h | 11 +-
include/uapi/linux/bpf.h | 37 ++-
include/uapi/linux/in6.h | 2 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/bridge/br_netfilter_hooks.c | 96 +++++++
net/bridge/netfilter/nf_conntrack_bridge.c | 30 ++
net/core/filter.c | 67 ++++-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +-
net/ipv4/netfilter/nf_reject_ipv4.c | 1 +
net/ipv6/addrconf.c | 7 +-
net/ipv6/af_inet6.c | 1 +
net/ipv6/netfilter/nf_reject_ipv6.c | 1 +
net/mptcp/diag.c | 3 +
net/mptcp/pm_netlink.c | 30 +-
net/mptcp/protocol.c | 123 +++++++--
net/mptcp/subflow.c | 36 ---
net/netfilter/core.c | 45 +--
net/netfilter/nf_conntrack_core.c | 21 +-
net/netfilter/nf_conntrack_netlink.c | 4 +-
net/netfilter/nf_conntrack_proto_tcp.c | 35 +++
net/netfilter/nf_nat_core.c | 2 +-
net/netfilter/nf_tables_api.c | 7 +
net/netfilter/nfnetlink_queue.c | 10 +-
net/netfilter/nft_compat.c | 20 ++
net/netlink/af_netlink.c | 2 +-
net/tls/tls_device.c | 6 +-
net/tls/tls_sw.c | 316 ++++++++++------------
net/unix/garbage.c | 22 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
sound/firewire/amdtp-stream.c | 2 +-
tools/include/uapi/linux/bpf.h | 37 ++-
tools/testing/selftests/net/mptcp/config | 2 +
69 files changed, 991 insertions(+), 529 deletions(-)
From: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
Commit e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB
partitions.") added a mask check for 'part_type', but the mask used was
wrong leading to the code intended for rpmb also being executed for GP3.
On some MMCs (but not all) this would make gp3 partition inaccessible:
armadillo:~# head -c 1 < /dev/mmcblk2gp3
head: standard input: I/O error
armadillo:~# dmesg -c
[ 422.976583] mmc2: running CQE recovery
[ 423.058182] mmc2: running CQE recovery
[ 423.137607] mmc2: running CQE recovery
[ 423.137802] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 4 prio class 0
[ 423.237125] mmc2: running CQE recovery
[ 423.318206] mmc2: running CQE recovery
[ 423.397680] mmc2: running CQE recovery
[ 423.397837] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
[ 423.408287] Buffer I/O error on dev mmcblk2gp3, logical block 0, async page read
the part_type values of interest here are defined as follow:
main 0
boot0 1
boot1 2
rpmb 3
gp0 4
gp1 5
gp2 6
gp3 7
so mask with EXT_CSD_PART_CONFIG_ACC_MASK (7) to correctly identify rpmb
Fixes: e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB partitions.")
Cc: stable(a)vger.kernel.org
Cc: Jorge Ramirez-Ortiz <jorge(a)foundries.io>
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
---
A couple of notes:
- this doesn't fail on all eMMCs, I can still access gp3 on some models
but it seems to fail reliably with micron's "G1M15L"
- I've encountered this on the 5.10 backport (in 5.10.208), so that'll
need to be backported everywhere the fix was taken...
Thanks!
---
drivers/mmc/core/block.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 32d49100dff5..86efa6084696 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -874,10 +874,11 @@ static const struct block_device_operations mmc_bdops = {
static int mmc_blk_part_switch_pre(struct mmc_card *card,
unsigned int part_type)
{
- const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB;
+ const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK;
+ const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB;
int ret = 0;
- if ((part_type & mask) == mask) {
+ if ((part_type & mask) == rpmb) {
if (card->ext_csd.cmdq_en) {
ret = mmc_cmdq_disable(card);
if (ret)
@@ -892,10 +893,11 @@ static int mmc_blk_part_switch_pre(struct mmc_card *card,
static int mmc_blk_part_switch_post(struct mmc_card *card,
unsigned int part_type)
{
- const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB;
+ const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK;
+ const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB;
int ret = 0;
- if ((part_type & mask) == mask) {
+ if ((part_type & mask) == rpmb) {
mmc_retune_unpause(card->host);
if (card->reenable_cmdq && !card->ext_csd.cmdq_en)
ret = mmc_cmdq_enable(card);
---
base-commit: 5847c9777c303a792202c609bd761dceb60f4eed
change-id: 20240306-mmc-partswitch-c3a50b5084ae
Best regards,
--
Dominique Martinet | Asmadeus
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_changeset_destroy() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/of/dynamic.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f..169e2a9ae22f 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,12 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * Wait for any ongoing device link removals before destroying some of
+ * nodes.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
--
2.43.0
The commit 80dd33cf72d1 ("drivers: base: Fix device link removal")
introduces a workqueue to release the consumer and supplier devices used
in the devlink.
In the job queued, devices are release and in turn, when all the
references to these devices are dropped, the release function of the
device itself is called.
Nothing is present to provide some synchronisation with this workqueue
in order to ensure that all ongoing releasing operations are done and
so, some other operations can be started safely.
For instance, in the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are released and related devlinks are removed
(jobs pushed in the workqueue).
During the step 2, OF nodes are destroyed but, without any
synchronisation with devlink removal jobs, of_overlay_remove() can raise
warnings related to missing of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2
Indeed, the missing of_node_put() call is going to be done, too late,
from the workqueue job execution.
Introduce device_link_wait_removal() to offer a way to synchronize
operations waiting for the end of devlink removals (i.e. end of
workqueue jobs).
Also, as a flushing operation is done on the workqueue, the workqueue
used is moved from a system-wide workqueue to a local one.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/base/core.c | 26 +++++++++++++++++++++++---
include/linux/device.h | 1 +
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d5f4e4aac09b..48b28c59c592 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -532,12 +533,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4099,9 +4114,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/include/linux/device.h b/include/linux/device.h
index 1795121dee9a..d7d8305a72e8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1249,6 +1249,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
--
2.43.0
The quilt patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been removed from the -mm tree. Its filename was
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Wed, 6 Mar 2024 14:03:56 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was not free. This isn't present in get_swap_device()
because it doesn't make sense in general due to the race between getting
the reference and swapoff. So I've added an equivalent check directly in
free_swap_and_cache().
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240306140356.3974886-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1232,6 +1232,11 @@ static unsigned char __swap_entry_free_l
* with get_swap_device() and put_swap_device(), unless the swap
* functions call get/put_swap_device() by themselves.
*
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1609,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
+ if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+ put_swap_device(p);
+ return 0;
+ }
+
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
The patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been added to the -mm mm-unstable branch. Its filename is
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Wed, 6 Mar 2024 14:03:56 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was not free. This isn't present in get_swap_device()
because it doesn't make sense in general due to the race between getting
the reference and swapoff. So I've added an equivalent check directly in
free_swap_and_cache().
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240306140356.3974886-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1232,6 +1232,11 @@ static unsigned char __swap_entry_free_l
* with get_swap_device() and put_swap_device(), unless the swap
* functions call get/put_swap_device() by themselves.
*
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1609,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
+ if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+ put_swap_device(p);
+ return 0;
+ }
+
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
---
v3->v4
- remove unused variable declaration in function uas_submit_urbs.
drivers/usb/storage/uas.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9707f53cfda9..5930cfc03111 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
--
2.32.0
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
---
v2->v3
- Modify the description of this patch.
- An error is returned directly when submitting URB fails.
drivers/usb/storage/uas.c | 27 +++++++++++++--------------
1 file changed, 13 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9707f53cfda9..689396777b6f 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,16 +541,15 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
@@ -562,9 +561,9 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +571,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +581,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +591,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +601,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +610,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +620,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +697,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
--
2.32.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d5566623ad..c37af50f312d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -424,8 +424,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index fcb7eb6a6eca..c0a8f3c9e256 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -447,8 +447,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 034b0662fd3b..9cfd53eaeb4e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -441,8 +441,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
Hello everyone,
#regzbot introduced v6.7.5..v6.7.6
I'm experiencing an issue where kexec does a full firmware reboot
instead of kexec reboot.
Issue first submitted at OpenSuse bugzilla [0].
OS details as follows:
Distributor ID: openSUSE
Description: openSUSE Tumbleweed-Slowroll
Release: 20240213
Issue has been reproduced by building kernel from source.
kexec works as expected in kernel v6.7.5.
kexec does full firmware reboot in kernel v6.7.6.
I followed the docs here [1] to perform git bisect and find the culprit,
hope it's alright as I'm quite out of my depth here.
Git bisect logs:
git bisect start
# status: waiting for both good and bad commits
# bad: [b631f5b445dc3379f67ff63a2e4c58f22d4975dc] Linux 6.7.6
git bisect bad b631f5b445dc3379f67ff63a2e4c58f22d4975dc
# status: waiting for good commit(s), bad commit known
# good: [004dcea13dc10acaf1486d9939be4c793834c13c] Linux 6.7.5
git bisect good 004dcea13dc10acaf1486d9939be4c793834c13c
Let me know if there's anything else I can do to help troubleshoot the
issue.
[0]: https://bugzilla.suse.com/show_bug.cgi?id=1220541
[1]: https://docs.kernel.org/admin-guide/bug-bisect.html
Kind regards,
Pavin Joseph.
I'm announcing the release of the 4.19.309 kernel.
All users of the 4.19 kernel series must upgrade.
The updated 4.19.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.19.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 ++--
drivers/mmc/core/mmc.c | 2 ++
drivers/net/gtp.c | 12 ++++++------
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 ++-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++-
fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++----
fs/cachefiles/bind.c | 3 +++
net/bluetooth/hci_core.c | 7 ++++---
net/bluetooth/hci_event.c | 9 ++++++++-
net/bluetooth/l2cap_core.c | 8 +++++++-
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 ++
sound/core/Makefile | 1 -
17 files changed, 64 insertions(+), 24 deletions(-)
Alexander Ofitserov (1):
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Arnd Bergmann (1):
efi/capsule-loader: fix incorrect allocation size
Arturas Moskvinas (1):
gpio: 74x164: Enable output pins after registers are reset
Baokun Li (1):
cachefiles: fix memory leak in cachefiles_add_cache()
David Sterba (1):
btrfs: dev-replace: properly validate device names
Greg Kroah-Hartman (1):
Linux 4.19.309
Hans de Goede (1):
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Ivan Semenov (1):
mmc: core: Fix eMMC initialization with 1-bit bus connection
Javier Carrasco (1):
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Johannes Berg (1):
wifi: nl80211: reject iftype change with mesh ID change
Kai-Heng Feng (1):
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz (1):
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Oleksij Rempel (1):
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Ryosuke Yasuoka (1):
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Takashi Iwai (1):
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Ying Hsu (1):
Bluetooth: Avoid potential use-after-free in hci_error_reset
Yunjian Wang (1):
tun: Fix xdp_rxq_info's queue_index when detaching
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from
a test case. But there has been agreement based on code review that this
is possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was not free. This isn't present in get_swap_device()
because it doesn't make sense in general due to the race between getting
the reference and swapoff. So I've added an equivalent check directly in
free_swap_and_cache().
Details of how to provoke one possible issue (thanks to David
Hildenbrand for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Cc: stable(a)vger.kernel.org
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
Hi Andrew,
Please replace v1 of this patch in mm-unstable with this version.
Changes since v1:
- Added comments for get_swap_device() as suggested by David
- Moved check that swap entry is not free from get_swap_device() to
free_swap_and_cache() since there are some paths that legitimately call with
a free offset.
I haven't addressed the recommendation by Huang Ying [1] to also revert commit
23b230ba8ac3 ("mm/swap: print bad swap offset entry in get_swap_device"). It
should be done separately to this, and and we need to conclude discussion
first.
[1] https://lore.kernel.org/all/875xy0842q.fsf@yhuang6-desk2.ccr.corp.intel.com/
Thanks,
Ryan
mm/swapfile.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2b3a2d85e350..1155a6304119 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1232,6 +1232,11 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
* with get_swap_device() and put_swap_device(), unless the swap
* functions call get/put_swap_device() by themselves.
*
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1609,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entry)
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
+ if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+ put_swap_device(p);
+ return 0;
+ }
+
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
--
2.25.1
This is the start of the stable review cycle for the 5.10.212 release.
There are 41 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 07 Mar 2024 11:31:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.212-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.212-rc2
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Chuanhong Guo <gch981213(a)gmail.com>
mtd: spinand: gigadevice: fix Quad IO for GD5F1GQ5UExxG
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Reto Schneider <reto.schneider(a)husqvarnagroup.com>
mtd: spinand: gigadevice: Support GD5F1GQ5UExxG
zhenwei pi <pizhenwei(a)bytedance.com>
crypto: virtio/akcipher - Fix stack overflow on memcpy
Hans de Goede <hdegoede(a)redhat.com>
platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/pgtable.h | 2 +-
arch/x86/kernel/cpu/intel.c | 178 +++++++++++----------
.../crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +-
drivers/dma/fsl-qdma.c | 25 +--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 ++++--
drivers/mtd/nand/spi/gigadevice.c | 81 ++++++++--
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/platform/x86/touchscreen_dmi.c | 4 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 ++-
fs/cachefiles/bind.c | 3 +
fs/ext4/mballoc.c | 39 ++---
fs/hugetlbfs/inode.c | 6 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +++-
net/ipv6/addrconf.c | 7 +-
net/mptcp/diag.c | 3 +
net/mptcp/protocol.c | 49 ++++++
net/netfilter/nft_compat.c | 20 +++
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
36 files changed, 430 insertions(+), 196 deletions(-)
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c93dd6a31c31..c5968021fde0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -423,8 +423,12 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_test_swapbacked(folio)) {
__folio_set_swapbacked(newfolio);
if (folio_test_swapcache(folio)) {
+ int i;
+
folio_set_swapcache(newfolio);
- newfolio->private = folio_get_private(folio);
+ for (i = 0; i < nr; i++)
+ set_page_private(folio_page(newfolio, i),
+ page_private(folio_page(folio, i)));
}
entries = nr;
} else {
--
2.43.0
Fuzzing of 5.10 stable branch reports a slab-out-of-bounds error in
ata_scsi_pass_thru.
The error is fixed in 5.18 by commit
ce70fd9a551af7424a7dace2a1ba05a7de8eae27.
Backporting this commit would require significant changes to the code so
it is bettter to use a simple fix for that particular error.
The problem is that the length of the received SCSI command is not
validated if scsi_op == VARIABLE_LENGTH_CMD. It can lead to out-of-bounds
reading if the user sends a request with SCSI command of length less than
32.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Signed-off-by: Artem Sadovnikov <ancowi69(a)gmail.com>
Signed-off-by: Mikhail Ivanov <iwanov-23(a)bk.ru>
---
drivers/ata/libata-scsi.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index dfa090ccd21c..77589e911d3d 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4065,6 +4065,9 @@ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
if (unlikely(!scmd->cmd_len))
goto bad_cdb_len;
+
+ if (scsi_op == VARIABLE_LENGTH_CMD && scmd->cmd_len < 32)
+ goto bad_cdb_len;
if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
if (unlikely(scmd->cmd_len > dev->cdb_len))
--
2.25.1
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Looks like the undelayed vblank gets signalled exactly when
the active period ends. That is a problem for DSB+VRR when
we are already in vblank and expect DSB to start executing
as soon as we send the push. Instead of starting the DSB
just keeps on waiting for the undelayed vblank which won't
signal until the end of the next frame's active period,
which is far too late.
The end result is that DSB won't have even started
executing by the time the flips/etc. have completed.
We then wait for an extra 1ms, after which we terminate
the DSB and report a timeout:
[drm] *ERROR* [CRTC:80:pipe A] DSB 0 timed out waiting for idle (current head=0xfedf4000, head=0x0, tail=0x1080)
To fix this let's configure DSB to use the so called VRR
"safe window" instead of the undelayed vblank to trigger
the DSB vblank logic, when VRR is enabled.
Cc: stable(a)vger.kernel.org
Fixes: 34d8311f4a1c ("drm/i915/dsb: Re-instate DSB for LUT updates")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9927
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/intel_dsb.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index d62e050185e7..e4515bf92038 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency));
}
+static u32 dsb_chicken(struct intel_crtc *crtc)
+{
+ if (crtc->mode_flags & I915_MODE_FLAG_VRR)
+ return DSB_CTRL_WAIT_SAFE_WINDOW |
+ DSB_CTRL_NO_WAIT_VBLANK |
+ DSB_INST_WAIT_SAFE_WINDOW |
+ DSB_INST_NO_WAIT_VBLANK;
+ else
+ return 0;
+}
+
static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
int dewake_scanline)
{
@@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id),
ctrl | DSB_ENABLE);
+ intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id),
+ dsb_chicken(crtc));
+
intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id),
intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf));
--
2.43.0
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Looks like TRANS_CHICKEN bit 31 means something totally different
depending on the platform:
TGL: generate VRR "safe window" for DSB
ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
So far we've only set this on ADL/DG2, but when using DSB+VRR
we also need to set it on TGL.
And a quick test on MTL says it doesn't need this bit for either
of those purposes, even though it's still documented as valid
in bspec.
Cc: stable(a)vger.kernel.org
Fixes: 34d8311f4a1c ("drm/i915/dsb: Re-instate DSB for LUT updates")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9927
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/intel_vrr.c | 7 ++++---
drivers/gpu/drm/i915/i915_reg.h | 2 +-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 5d905f932cb4..eb5bd0743902 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -187,10 +187,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state)
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
/*
- * TRANS_SET_CONTEXT_LATENCY with VRR enabled
- * requires this chicken bit on ADL/DG2.
+ * This bit seems to have two meanings depending on the platform:
+ * TGL: generate VRR "safe window" for DSB vblank waits
+ * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
*/
- if (DISPLAY_VER(dev_priv) == 13)
+ if (IS_DISPLAY_VER(dev_priv, 12, 13))
intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
0, PIPE_VBLANK_WITH_DELAY);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e00557e1a57f..3b2e49ce29ba 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4599,7 +4599,7 @@
#define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \
_MTL_CHICKEN_TRANS_A, \
_MTL_CHICKEN_TRANS_B)
-#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */
+#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */
#define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */
#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27)
#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x)
--
2.43.0
Hello There,
Would you be interested in building your website?
We are a professional Web Design & Development or Mobile Apps development
company based in India.
Our aim is to be the best in service and as such we offer a premium service
at very competitive prices.
*We specialize in:-*
1. Website Design
2. Web Development
3. Responsive Websites
4. PHP Development
5. E-Commerce Solutions
6. Mobile Apps Development
We operate 24 x7. I will be happy to send you links to price list, money
back guarantee, client rankings, client testimonials, “How we are different
from others?”, and “Why should you choose us?” on receiving a response from
you.
Drop me a line if you need any assistance.
*Kind Regards*
Anjali
This is the start of the stable review cycle for the 5.4.271 release.
There are 25 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.271-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.271-rc1
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
-------------
Diffstat:
Makefile | 4 +-
arch/x86/kernel/cpu/intel.c | 178 +++++++++++++++--------------
drivers/dma/fsl-qdma.c | 25 ++--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/mmc/core/mmc.c | 2 +
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +++-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 9 +-
net/bluetooth/l2cap_core.c | 8 +-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +++--
net/ipv6/addrconf.c | 7 +-
net/netfilter/nft_compat.c | 20 ++++
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 +
sound/core/Makefile | 1 -
25 files changed, 226 insertions(+), 143 deletions(-)
Programming PMU events in the host that count during guest execution is
a feature supported by perf, e.g.
perf stat -e cpu_cycles:G ./lkvm run
While this works for VHE, the guest/host event bitmaps are not carried
through to the hypervisor in the nVHE configuration. Make
kvm_pmu_update_vcpu_events() conditional on whether or not _hardware_
supports PMUv3 rather than if the vCPU as vPMU enabled.
Cc: stable(a)vger.kernel.org
Fixes: 84d751a019a9 ("KVM: arm64: Pass pmu events to hyp via vcpu")
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
---
include/kvm/arm_pmu.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 4b9d8fb393a8..df32355e3e38 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
+ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)
--
2.44.0.278.ge034bb2e1d-goog
This is the start of the stable review cycle for the 4.19.309 release.
There are 16 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.309-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.309-rc1
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <o.rempel(a)pengutronix.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
-------------
Diffstat:
Makefile | 4 ++--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 ++--
drivers/mmc/core/mmc.c | 2 ++
drivers/net/gtp.c | 12 ++++++------
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 ++-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++-
fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++----
fs/cachefiles/bind.c | 3 +++
net/bluetooth/hci_core.c | 7 ++++---
net/bluetooth/hci_event.c | 9 ++++++++-
net/bluetooth/l2cap_core.c | 8 +++++++-
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 ++
sound/core/Makefile | 1 -
17 files changed, 65 insertions(+), 25 deletions(-)
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_overlay_remove() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/of/overlay.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 2ae7e9d24a64..7a010a62b9d8 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "OF: overlay: " fmt
+#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -853,6 +854,14 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
{
int i;
+ /*
+ * Wait for any ongoing device link removals before removing some of
+ * nodes. Drop the global lock while waiting
+ */
+ mutex_unlock(&of_mutex);
+ device_link_wait_removal();
+ mutex_lock(&of_mutex);
+
if (ovcs->cset.entries.next)
of_changeset_destroy(&ovcs->cset);
@@ -862,7 +871,6 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
ovcs->id = 0;
}
-
for (i = 0; i < ovcs->count; i++) {
of_node_put(ovcs->fragments[i].target);
of_node_put(ovcs->fragments[i].overlay);
--
2.43.0
Since commit b2cb2ae22278f1918f7526b89760ee00b4a81393 ("mfd: axp20x:
Generalise handling without interrupt"), interrupt info part for the
AXP15060 PMIC is not needed anymore for Statfive Visionfive 2 board.
And this would cause kernel to try to enable interrupt line 0, which is
not expected. So delete this part from device tree.
Cc: stable(a)vger.kernel.org
Fixes: b2cb2ae22278 ("mfd: axp20x: Generalise handling without interrupt")
Reported-by: Bo Gan <ganboing(a)gmail.com>
Signed-off-by: Shengyu Qu <wiagn233(a)outlook.com>
---
arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi | 3 ---
1 file changed, 3 deletions(-)
diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
index b89e9791efa7..6bebabe3fa37 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
@@ -189,9 +189,6 @@ &i2c5 {
axp15060: pmic@36 {
compatible = "x-powers,axp15060";
reg = <0x36>;
- interrupts = <0>;
- interrupt-controller;
- #interrupt-cells = <1>;
regulators {
vcc_3v3: dcdc1 {
--
2.39.2
Clang enables -Wenum-enum-conversion and -Wenum-compare-conditional
under -Wenum-conversion. A recent change in Clang strengthened these
warnings and they appear frequently in common builds, primarily due to
several instances in common headers but there are quite a few drivers
that have individual instances as well.
include/linux/vmstat.h:508:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
508 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~ ^
509 | item];
| ~~~~
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:955:24: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional]
955 | flags |= is_new_rate ? IWL_MAC_BEACON_CCK
| ^ ~~~~~~~~~~~~~~~~~~
956 | : IWL_MAC_BEACON_CCK_V1;
| ~~~~~~~~~~~~~~~~~~~~~
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:1120:21: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional]
1120 | 0) > 10 ?
| ^
1121 | IWL_MAC_BEACON_FILS :
| ~~~~~~~~~~~~~~~~~~~
1122 | IWL_MAC_BEACON_FILS_V1;
| ~~~~~~~~~~~~~~~~~~~~~~
While doing arithmetic with different types of enums may be potentially
problematic, inspecting several instances of the warning does not reveal
any obvious problems. To silence the warnings at the source level, an
integral cast must be added to each mismatched enum (which is incredibly
ugly when done frequently) or the value must moved out of the enum to a
macro, which can remove the type safety offered by enums in other
places, such as assignments that would trigger -Wenum-conversion.
As the warnings do not appear to have a high signal to noise ratio and
the source level silencing options are not sustainable, disable the
warnings unconditionally, as they will be enabled with -Wenum-conversion
and are supported in all versions of clang that can build the kernel.
Cc: stable(a)vger.kernel.org
Closes: https://github.com/ClangBuiltLinux/linux/issues/2002
Link: https://github.com/llvm/llvm-project/commit/8c2ae42b3e1c6aa7c18f873edcebff7…
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
scripts/Makefile.extrawarn | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index a9e552a1e910..6053aa22b8f5 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -81,6 +81,14 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
# Warn if there is an enum types mismatch
KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
+ifdef CONFIG_CC_IS_CLANG
+# Clang enables these extra warnings under -Wenum-conversion but the kernel
+# performs arithmetic using or has conditionals returning enums of different
+# types in several different places, which is rarely a bug in the kernel's
+# case, so disable the warnings.
+KBUILD_CFLAGS += -Wno-enum-compare-conditional
+KBUILD_CFLAGS += -Wno-enum-enum-conversion
+endif
#
# W=1 - warnings which may be relevant and do not occur too often
---
base-commit: 90d35da658da8cff0d4ecbb5113f5fac9d00eb72
change-id: 20240304-disable-extra-clang-enum-warnings-bf574c7c99fd
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 171573613c39..893ea04498f7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -514,8 +514,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 034b0662fd3b..9cfd53eaeb4e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -441,8 +441,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index fcb7eb6a6eca..c0a8f3c9e256 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -447,8 +447,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
- The series includes a dependency commit f87bc8dc7a7c ("x86/asm: Add
_ASM_RIP() macro for x86-64 (%rip) suffix").
- Patch 2 includes a change that adds runtime patching for jmp (instead
of verw in original series) due to lack of rip-relative relocation
support in kernels <v6.5.
- Fixed warning:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction.
- Resolved merge conflicts in:
syscall_return_via_sysret in entry_64.S
swapgs_restore_regs_and_return_to_usermode in entry_64.S.
__vmx_vcpu_run in vmenter.S.
vmx_update_fb_clear_dis in vmx.c.
- Boot tested with KASLR and KPTI enabled.
- Verified VERW being executed with mitigation ON.
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
H. Peter Anvin (Intel) (1):
x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix
Pawan Gupta (5):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry.S | 23 ++++++++++++++++++++++
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 10 ++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/asm.h | 5 +++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/irqflags.h | 1 +
arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 12 ++++++++----
15 files changed, 111 insertions(+), 45 deletions(-)
---
base-commit: 9985c44f239fa0db0f3b4a1aee80794f113c135c
change-id: 20240304-delay-verw-backport-5-10-y-00aad69432f4
Best regards,
--
Thanks,
Pawan
Hi,
this series does basically two things:
1. Disables automatic load balancing as adviced by the hardware
workaround.
2. Assigns all the CCS slices to one single user engine. The user
will then be able to query only one CCS engine
I'm using here the "Requires: " tag, but I'm not sure the commit
id will be valid, on the other hand, I don't know what commit id
I should use.
Thanks Tvrtko, Matt and John for your reviews!
Andi
Changelog
=========
v2 -> v3
- Simplified the algorithm for creating the list of the exported
uabi engines. (Patch 1) (Thanks, Tvrtko)
- Consider the fused engines when creating the uabi engine list
(Patch 2) (Thanks, Matt)
- Patch 4 now uses a the refactoring from patch 1, in a cleaner
outcome.
v1 -> v2
- In Patch 1 use the correct workaround number (thanks Matt).
- In Patch 2 do not add the extra CCS engines to the exposed UABI
engine list and adapt the engine counting accordingly (thanks
Tvrtko).
- Reword the commit of Patch 2 (thanks John).
Andi Shyti (4):
drm/i915/gt: Refactor uabi engine class/instance list creation
drm/i915/gt: Do not exposed fused off engines.
drm/i915/gt: Disable HW load balancing for CCS
drm/i915/gt: Enable only one CCS for compute workload
drivers/gpu/drm/i915/gt/intel_engine_user.c | 52 ++++++++++++++++-----
drivers/gpu/drm/i915/gt/intel_gt.c | 11 +++++
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 ++
drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 +++
4 files changed, 60 insertions(+), 12 deletions(-)
--
2.43.0
The first kiocb_set_cancel_fn() argument may point at a struct kiocb
that is not embedded inside struct aio_kiocb. With the current code,
depending on the compiler, the req->ki_ctx read happens either before
the IOCB_AIO_RW test or after that test. Move the req->ki_ctx read such
that it is guaranteed that the IOCB_AIO_RW test happens first.
Reported-by: Eric Biggers <ebiggers(a)kernel.org>
Cc: Benjamin LaHaise <ben(a)communityfibre.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: stable(a)vger.kernel.org
Fixes: b820de741ae4 ("fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
fs/aio.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index da18dbcfcb22..9cdaa2faa536 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -589,8 +589,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
- struct kioctx *ctx = req->ki_ctx;
+ struct aio_kiocb *req;
+ struct kioctx *ctx;
unsigned long flags;
/*
@@ -600,9 +600,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
if (!(iocb->ki_flags & IOCB_AIO_RW))
return;
+ req = container_of(iocb, struct aio_kiocb, rw);
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
+ ctx = req->ki_ctx;
+
spin_lock_irqsave(&ctx->ctx_lock, flags);
list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
Currently arm64's switch_mm() doesn't always have an smp_mb()
which the core scheduler code has depended upon since commit:
commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid")
If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear()
can unset the activly used cid when it fails to observe active task after it
sets lazy_put.
By adding an smp_mb() in arm64's check_and_switch_context(),
Guarantee to observe active task after sched_mm_cid_remote_clear()
success to set lazy_put.
Signed-off-by: levi.yun <yeoreum.yun(a)arm.com>
Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid")
Cc: <stable(a)vger.kernel.org> # 6.4.x
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Aaron Lu <aaron.lu(a)intel.com>
---
I'm really sorry if you got this multiple times.
I had some problems with the SMTP server...
arch/arm64/mm/context.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 188197590fc9..7a9e8e6647a0 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -268,6 +268,11 @@ void check_and_switch_context(struct mm_struct *mm)
*/
if (!system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
+
+ /*
+ * See the comments on switch_mm_cid describing user -> user transition.
+ */
+ smp_mb();
}
unsigned long arm64_mm_context_get(struct mm_struct *mm)
--
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
Patch "fs/aio: Make io_cancel() generate completions again" is based on the
assumption that calling kiocb->ki_cancel() does not complete R/W requests.
This is incorrect: the two drivers that call kiocb_set_cancel_fn() callers
set a cancellation function that calls usb_ep_dequeue(). According to its
documentation, usb_ep_dequeue() calls the completion routine with status
-ECONNRESET. Hence this revert.
Cc: Benjamin LaHaise <ben(a)communityfibre.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+b91eb2ed18f599dd3c31(a)syzkaller.appspotmail.com
Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
fs/aio.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 28223f511931..da18dbcfcb22 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2165,11 +2165,14 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
#endif
/* sys_io_cancel:
- * Attempts to cancel an iocb previously passed to io_submit(). If the
- * operation is successfully cancelled 0 is returned. May fail with
- * -EFAULT if any of the data structures pointed to are invalid. May
- * fail with -EINVAL if aio_context specified by ctx_id is invalid. Will
- * fail with -ENOSYS if not implemented.
+ * Attempts to cancel an iocb previously passed to io_submit. If
+ * the operation is successfully cancelled, the resulting event is
+ * copied into the memory pointed to by result without being placed
+ * into the completion queue and 0 is returned. May fail with
+ * -EFAULT if any of the data structures pointed to are invalid.
+ * May fail with -EINVAL if aio_context specified by ctx_id is
+ * invalid. May fail with -EAGAIN if the iocb specified was not
+ * cancelled. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
@@ -2200,12 +2203,14 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
}
spin_unlock_irq(&ctx->ctx_lock);
- /*
- * The result argument is no longer used - the io_event is always
- * delivered via the ring buffer.
- */
- if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
- aio_complete_rw(&kiocb->rw, -EINTR);
+ if (!ret) {
+ /*
+ * The result argument is no longer used - the io_event is
+ * always delivered via the ring buffer. -EINPROGRESS indicates
+ * cancellation is progress:
+ */
+ ret = -EINPROGRESS;
+ }
percpu_ref_put(&ctx->users);
This is the start of the stable review cycle for the 5.15.151 release.
There are 84 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.151-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.151-rc1
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Gal Pressman <gal(a)nvidia.com>
Revert "tls: rx: move counting TlsDecryptErrors for sync"
Jakub Kicinski <kuba(a)kernel.org>
net: tls: fix async vs NIC crypto offload
Martynas Pumputis <m(a)lambda.lt>
bpf: Derive source IP addr via bpf_*_fib_lookup()
Louis DeLosSantos <louis.delos.devel(a)gmail.com>
bpf: Add table ID to bpf_fib_lookup BPF helper
Martin KaFai Lau <martin.lau(a)kernel.org>
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Teach lockdep about icc_bw_lock order"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Fix locking for runpm vs reclaim"
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Drop oob_skb ref before purging queue in GC.
Max Krummenacher <max.krummenacher(a)toradex.com>
Revert "drm/bridge: lt8912b: Register and attach our DSI device at probe"
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Abeni <pabeni(a)redhat.com>
mptcp: push at DSS boundaries
Geliang Tang <tanggeliang(a)kylinos.cn>
mptcp: add needs_id for netlink appending addr
Jean Sacren <sakiwit(a)gmail.com>
mptcp: clean up harmless false expressions
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter in v6
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter
Paolo Abeni <pabeni(a)redhat.com>
mptcp: rename timer related helper to less confusing names
Paolo Abeni <pabeni(a)redhat.com>
mptcp: process pending subflow error on close
Paolo Abeni <pabeni(a)redhat.com>
mptcp: move __mptcp_error_report in protocol.c
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Zong Li <zong.li(a)sifive.com>
riscv: add CALLER_ADDRx support
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Tadeusz Struk <tstruk(a)gigaio.com>
dmaengine: ptdma: use consistent DMA masks
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: firewire-lib: fix to check cycle continuity
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
fbcon: always restore the old font data in fbcon_do_set_font()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Sabrina Dubroca <sd(a)queasysnail.net>
tls: decrement decrypt_pending if no async completion will be called
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: use async as an in-out argument
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: assume crypto always calls our callback
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: move counting TlsDecryptErrors for sync
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't track the async count
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: factor out writing ContentType to cmsg
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: wrap decryption arguments in a structure
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't report text length from the bowels of decrypt
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: drop unnecessary arguments from tls_setup_from_iter()
Jakub Kicinski <kuba(a)kernel.org>
tls: hw: rx: use return value of tls_device_decrypted() to carry status
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: refactor decrypt_skb_update()
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't issue wake ups when data is decrypted
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the decryption status in socket context
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the record type in socket context
Oleksij Rempel <linux(a)rempel-privat.de>
igb: extend PTP timestamp adjustments to i211
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Florian Westphal <fw(a)strlen.de>
netfilter: bridge: confirm multicast packets before passing them up the stack
Florian Westphal <fw(a)strlen.de>
netfilter: let reset rules clean out conntrack entries
Florian Westphal <fw(a)strlen.de>
netfilter: make function op structures const
Florian Westphal <fw(a)strlen.de>
netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
Florian Westphal <fw(a)strlen.de>
netfilter: nfnetlink_queue: silence bogus compiler warning
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Jakub Raczynski <j.raczynski(a)samsung.com>
stmmac: Clear variable when destroying workqueue
Justin Iurman <justin.iurman(a)uliege.be>
uapi: in6: replace temporary label with rfc9486
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Jakub Kicinski <kuba(a)kernel.org>
veth: try harder when allocating queue memory
Vasily Averin <vvs(a)openvz.org>
net: enable memcg accounting for veth queues
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Jakub Kicinski <kuba(a)kernel.org>
net: veth: clear GRO when clearing XDP even when down
Doug Smythies <dsmythies(a)telus.net>
cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: disallow timeout for anonymous sets
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/ftrace.h | 5 +
arch/riscv/include/asm/pgtable.h | 2 +-
arch/riscv/kernel/Makefile | 2 +
arch/riscv/kernel/return_address.c | 48 ++++
arch/x86/kernel/cpu/intel.c | 178 ++++++------
drivers/cpufreq/intel_pstate.c | 3 +
drivers/dma/fsl-qdma.c | 25 +-
drivers/dma/ptdma/ptdma-dmaengine.c | 2 -
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 +-
drivers/interconnect/core.c | 18 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 +++-
drivers/mtd/nand/spi/gigadevice.c | 6 +-
drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +-
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/net/veth.c | 40 +--
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
drivers/video/fbdev/core/fbcon.c | 8 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
include/linux/netfilter.h | 14 +-
include/net/ipv6_stubs.h | 5 +
include/net/netfilter/nf_conntrack.h | 8 +
include/net/strparser.h | 4 +
include/net/tls.h | 11 +-
include/uapi/linux/bpf.h | 37 ++-
include/uapi/linux/in6.h | 2 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/bridge/br_netfilter_hooks.c | 96 +++++++
net/bridge/netfilter/nf_conntrack_bridge.c | 30 ++
net/core/filter.c | 67 ++++-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +-
net/ipv4/netfilter/nf_reject_ipv4.c | 1 +
net/ipv6/addrconf.c | 7 +-
net/ipv6/af_inet6.c | 1 +
net/ipv6/netfilter/nf_reject_ipv6.c | 1 +
net/mptcp/diag.c | 3 +
net/mptcp/pm_netlink.c | 30 +-
net/mptcp/protocol.c | 123 +++++++--
net/mptcp/subflow.c | 36 ---
net/netfilter/core.c | 45 +--
net/netfilter/nf_conntrack_core.c | 21 +-
net/netfilter/nf_conntrack_netlink.c | 4 +-
net/netfilter/nf_conntrack_proto_tcp.c | 35 +++
net/netfilter/nf_nat_core.c | 2 +-
net/netfilter/nf_tables_api.c | 7 +
net/netfilter/nfnetlink_queue.c | 10 +-
net/netfilter/nft_compat.c | 20 ++
net/netlink/af_netlink.c | 2 +-
net/tls/tls_device.c | 6 +-
net/tls/tls_sw.c | 316 ++++++++++------------
net/unix/garbage.c | 22 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
sound/firewire/amdtp-stream.c | 2 +-
tools/include/uapi/linux/bpf.h | 37 ++-
tools/testing/selftests/net/mptcp/config | 2 +
72 files changed, 1046 insertions(+), 529 deletions(-)
Larry Finger <Larry.Finger(a)gmail.com> wrote:
> From: Nick Morrow <morrownr(a)gmail.com>
>
> Add VID/PIDs that are known to be missing for this driver.
>
> Removed /* 8811CU */ and /* 8821CU */ as they are redundant
> since the file is specific to those chips.
>
> Removed /* TOTOLINK A650UA v3 */ as the manufacturer. It has a REALTEK
> VID so it may not be specific to this adapter.
>
> Verified and tested.
>
> Cc: stable(a)vger.kernel.org
> Signed-off-by: Nick Morrow <morrownr(a)gmail.com>
> Signed-off-by: Larry Finger <Larry.Finger(a)lwfinger.net>
> Acked-by: Ping-Ke Shih <pkshih(a)realtek.com>
Patch applied to wireless-next.git, thanks.
b8a62478f3b1 wifi: rtw88: Add missing VID/PIDs for 8811CU and 8821CU
--
https://patchwork.kernel.org/project/linux-wireless/patch/4ume7mjw63u7.XlMU…https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatc…
The patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been added to the -mm mm-unstable branch. Its filename is
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Tue, 5 Mar 2024 15:13:49 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was valid. This wasn't present in get_swap_device() so
I've added it. I couldn't find any existing get_swap_device() call sites
where this extra check would cause any false alarms.
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240305151349.3781428-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1281,7 +1281,9 @@ struct swap_info_struct *get_swap_device
smp_rmb();
offset = swp_offset(entry);
if (offset >= si->max)
- goto put_out;
+ goto bad_offset;
+ if (data_race(!si->swap_map[swp_offset(entry)]))
+ goto bad_free;
return si;
bad_nofile:
@@ -1289,9 +1291,14 @@ bad_nofile:
out:
return NULL;
put_out:
- pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
percpu_ref_put(&si->users);
return NULL;
+bad_offset:
+ pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
+ goto put_out;
+bad_free:
+ pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
+ goto put_out;
}
static unsigned char __swap_entry_free(struct swap_info_struct *p,
@@ -1609,13 +1616,14 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d5566623ad..c37af50f312d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -424,8 +424,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of
mmiowb()") remove all mmiowb() in drivers, but it says:
"NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation."
The mmio in radeon_ring_commit() is protected by a mutex rather than a
spinlock, but in the mutex fastpath it behaves similar to spinlock. We
can add mmiowb() calls in the radeon driver but the maintainer says he
doesn't like such a workaround, and radeon is not the only example of
mutex protected mmio.
So we should extend the mmiowb tracking system from spinlock to mutex,
and maybe other locking primitives. This is not easy and error prone, so
we solve it in the architectural code, by simply defining the __io_aw()
hook as mmiowb(). And we no longer need to override queued_spin_unlock()
so use the generic definition.
Without this, we get such an error when run 'glxgears' on weak ordering
architectures such as LoongArch:
radeon 0000:04:00.0: ring 0 stalled for more than 10324msec
radeon 0000:04:00.0: ring 3 stalled for more than 10240msec
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3)
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.…
Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loon…
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/include/asm/Kbuild | 1 +
arch/loongarch/include/asm/io.h | 2 ++
arch/loongarch/include/asm/qspinlock.h | 18 ------------------
3 files changed, 3 insertions(+), 18 deletions(-)
delete mode 100644 arch/loongarch/include/asm/qspinlock.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index a97c0edbb866..2dbec7853ae8 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
+generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += segment.h
generic-y += user.h
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b66..4a8adcca329b 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#define __io_aw() mmiowb()
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad591..000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- compiletime_assert_atomic_type(lock->locked);
- c_sync();
- WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
--
2.43.0
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reinstate commit 88b065943cb5 ("drm/i915/dsi: Do display on
sequence later on icl+"), for the most part. Turns out some
machines (eg. Chuwi Minibook X) really do need that updated order.
It is also the order the Windows driver uses.
However we can't just undo the revert since that would again
break Lenovo 82TQ. After staring at the VBT sequences for both
machines I've concluded that the Lenovo 82TQ sequences look
somewhat broken:
- INIT_OTP is not present at all
- what should be in INIT_OTP is found in DISPLAY_ON
- what should be in DISPLAY_ON is found in BACKLIGHT_ON
(along with the actual backlight stuff)
The Chuwi Minibook X on the other hand has a full complement
of sequences in its VBT.
So let's try to deal with the broken sequences in the
Lenovo 82TQ VBT by simply swapping the (non-existent)
INIT_OTP sequence with the DISPLAY_ON sequence. Thus we
execute DISPLAY_ON when intending to execute INIT_OTP,
and execute nothing at all when intending to execute
DISPLAY_ON. That should be 100% equivalent to the
revert, for such broken VBTs.
Cc: stable(a)vger.kernel.org
Fixes: dc524d05974f ("Revert "drm/i915/dsi: Do display on sequence later on icl+"")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/10071
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10334
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/icl_dsi.c | 3 +-
drivers/gpu/drm/i915/display/intel_bios.c | 43 +++++++++++++++++++----
2 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index eda4a8b88590..ac456a2275db 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 343726de9aa7..373291d10af9 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915,
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
*/
-static void fixup_mipi_sequences(struct drm_i915_private *i915,
- struct intel_panel *panel)
+static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
{
u8 *init_otp;
int len;
- /* Limit this to VLV for now. */
- if (!IS_VALLEYVIEW(i915))
- return;
-
/* Limit this to v1 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
panel->vbt.dsi.seq_version != 1)
@@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915,
panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
}
+/*
+ * Some machines (eg. Lenovo 82TQ) appear to have broken
+ * VBT sequences:
+ * - INIT_OTP is not present at all
+ * - what should be in INIT_OTP is in DISPLAY_ON
+ * - what should be in DISPLAY_ON is in BACKLIGHT_ON
+ * (along with the actual backlight stuff)
+ *
+ * To make those work we simply swap DISPLAY_ON and INIT_OTP.
+ *
+ * TODO: Do we need to limit this to specific machines,
+ * or examine the contents of the sequences to
+ * avoid false positives?
+ */
+static void icl_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] &&
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) {
+ drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n");
+
+ swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP],
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]);
+ }
+}
+
+static void fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (DISPLAY_VER(i915) >= 11)
+ icl_fixup_mipi_sequences(i915, panel);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_fixup_mipi_sequences(i915, panel);
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *i915,
struct intel_panel *panel)
--
2.43.0
With the to-be-fixed commit, the reset_work handler cleared 'host->mrq'
outside of the spinlock protected critical section. That leaves a small
race window during execution of 'tmio_mmc_reset()' where the done_work
handler could grab a pointer to the now invalid 'host->mrq'. Both would
use it to call mmc_request_done() causing problems (see link below).
However, 'host->mrq' cannot simply be cleared earlier inside the
critical section. That would allow new mrqs to come in asynchronously
while the actual reset of the controller still needs to be done. So,
like 'tmio_mmc_set_ios()', an ERR_PTR is used to prevent new mrqs from
coming in but still avoiding concurrency between work handlers.
Reported-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Closes: https://lore.kernel.org/all/20240220061356.3001761-1-dirk.behme@de.bosch.co…
Fixes: df3ef2d3c92c ("mmc: protect the tmio_mmc driver against a theoretical race")
Signed-off-by: Wolfram Sang <wsa+renesas(a)sang-engineering.com>
Tested-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Reviewed-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Cc: stable(a)vger.kernel.org # 3.0+
---
Change since v1/RFT: added Dirk's tags and stable tag
@Ulf: this is nasty, subtle stuff. Would be awesome to have it in 6.8
already!
drivers/mmc/host/tmio_mmc_core.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index be7f18fd4836..c253d176db69 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work)
else
mrq->cmd->error = -ETIMEDOUT;
+ /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */
+ host->mrq = ERR_PTR(-EBUSY);
host->cmd = NULL;
host->data = NULL;
--
2.43.0
If VM_BIND is enabled on the client the legacy submission ioctl can't be
used, however if a client tries to do so regardless it will return an
error. In this case the clients mutex remained unlocked leading to a
deadlock inside nouveau_drm_postclose or any other nouveau ioctl call.
Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI")
Cc: Danilo Krummrich <dakr(a)redhat.com>
Cc: <stable(a)vger.kernel.org> # v6.6+
Signed-off-by: Karol Herbst <kherbst(a)redhat.com>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Reviewed-by: Danilo Krummrich <dakr(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240304183157.1587152-1-kher…
---
drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 49c2bcbef1299..5a887d67dc0e8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -764,7 +764,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
return -ENOMEM;
if (unlikely(nouveau_cli_uvmm(cli)))
- return -ENOSYS;
+ return nouveau_abi16_put(abi16, -ENOSYS);
list_for_each_entry(temp, &abi16->channels, head) {
if (temp->chan->chid == req->channel) {
--
2.44.0
Ring expansion checker may incorrectly assume a completely full ring
is empty, missing the need for expansion.
This is due to a special empty ring case where the dequeue ends up
ahead of the enqueue pointer. This is seen when enqueued TRBs fill up
exactly a segment, with enqueue then pointing to the end link TRB.
Once those TRBs are handled the dequeue pointer will follow the link
TRB and end up pointing to the first entry on the next segment, past
the enqueue.
This same enqueue - dequeue condition can be true if a ring is full,
with enqueue ending on that last link TRB before the dequeue pointer
on the next segment.
This can be seen when queuing several ~510 small URBs via usbfs in
one go before a single one is handled (i.e. dequeue not moved from first
entry in segment).
Expand the ring already when enqueue reaches the link TRB before the
dequeue segment, instead of expanding it when enqueue moves into the
dequeue segment.
Reported-by: Chris Yokum <linux-usb(a)mail.totalphase.com>
Closes: https://lore.kernel.org/all/949223224.833962.1709339266739.JavaMail.zimbra@…
Tested-by: Chris Yokum <linux-usb(a)mail.totalphase.com>
Fixes: f5af638f0609 ("xhci: Fix transfer ring expansion size calculation")
Cc: stable(a)vger.kernel.org # v6.5+
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f0d8a607ff21..4f64b814d4aa 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -326,7 +326,13 @@ static unsigned int xhci_ring_expansion_needed(struct xhci_hcd *xhci, struct xhc
/* how many trbs will be queued past the enqueue segment? */
trbs_past_seg = enq_used + num_trbs - (TRBS_PER_SEGMENT - 1);
- if (trbs_past_seg <= 0)
+ /*
+ * Consider expanding the ring already if num_trbs fills the current
+ * segment (i.e. trbs_past_seg == 0), not only when num_trbs goes into
+ * the next segment. Avoids confusing full ring with special empty ring
+ * case below
+ */
+ if (trbs_past_seg < 0)
return 0;
/* Empty ring special case, enqueue stuck on link trb while dequeue advanced */
--
2.25.1
This is the start of the stable review cycle for the 5.10.212 release.
There are 42 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.212-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.212-rc1
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Chuanhong Guo <gch981213(a)gmail.com>
mtd: spinand: gigadevice: fix Quad IO for GD5F1GQ5UExxG
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Zong Li <zong.li(a)sifive.com>
riscv: add CALLER_ADDRx support
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Reto Schneider <reto.schneider(a)husqvarnagroup.com>
mtd: spinand: gigadevice: Support GD5F1GQ5UExxG
zhenwei pi <pizhenwei(a)bytedance.com>
crypto: virtio/akcipher - Fix stack overflow on memcpy
Hans de Goede <hdegoede(a)redhat.com>
platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/ftrace.h | 5 +
arch/riscv/include/asm/pgtable.h | 2 +-
arch/riscv/kernel/Makefile | 2 +
arch/riscv/kernel/return_address.c | 48 ++++++
arch/x86/kernel/cpu/intel.c | 178 +++++++++++----------
.../crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +-
drivers/dma/fsl-qdma.c | 25 +--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 ++++--
drivers/mtd/nand/spi/gigadevice.c | 81 ++++++++--
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/platform/x86/touchscreen_dmi.c | 4 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 ++-
fs/cachefiles/bind.c | 3 +
fs/ext4/mballoc.c | 39 ++---
fs/hugetlbfs/inode.c | 6 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +++-
net/ipv6/addrconf.c | 7 +-
net/mptcp/diag.c | 3 +
net/mptcp/protocol.c | 49 ++++++
net/netfilter/nft_compat.c | 20 +++
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
39 files changed, 485 insertions(+), 196 deletions(-)
[BUG]
When using zoned devices (zbc), scrub would always report super block
errors like the following:
# btrfs scrub start -fB /mnt/btrfs/
Starting scrub on devid 1
scrub done for b7b5c759-1baa-4561-a0ca-b8d0babcde56
Scrub started: Tue Mar 5 12:49:14 2024
Status: finished
Duration: 0:00:00
Total to scrub: 288.00KiB
Rate: 288.00KiB/s
Error summary: super=2
Corrected: 0
Uncorrectable: 0
Unverified: 0
[CAUSE]
Since the very beginning of scrub, we always go with btrfs_sb_offset()
to grab the super blocks.
This is fine for regular btrfs filesystems, but for zoned btrfs, super
blocks are stored in dedicated zones with a ring buffer like structure.
This means the old btrfs_sb_offset() is not able to give the correct
bytenr for us to grabbing the super blocks, thus except the primary
super block, the rest would be garbage and cause the above false alerts.
[FIX]
Instead of btrfs_sb_offset(), go with btrfs_sb_log_location() which is
zoned friendly, to grab the correct super block location.
This would introduce new error patterns, as btrfs_sb_log_location() can
fail with extra errors.
Here for -ENOENT we just end the scrub as there are no more super
blocks.
For other errors, we record it as a super block error and exit.
Reported-by: WA AM <waautomata(a)gmail.com>
Link: https://lore.kernel.org/all/CANU2Z0EvUzfYxczLgGUiREoMndE9WdQnbaawV5Fv5gNXpt…
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: Johannes Thumshirn <Johannes.Thumshirn(a)wdc.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/scrub.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
---
Changelog:
v2:
- Use READ to replace the number 0
- Continue checking the next super block if we hit a non-ENOENT error
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4bd0e60db59..201b547aac4c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2788,7 +2788,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev)
{
int i;
- u64 bytenr;
u64 gen;
int ret = 0;
struct page *page;
@@ -2812,7 +2811,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
+ u64 bytenr;
+
+ ret = btrfs_sb_log_location(scrub_dev, i, READ, &bytenr);
+ if (ret == -ENOENT)
+ break;
+ if (ret < 0) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.super_errors++;
+ spin_unlock(&sctx->stat_lock);
+ continue;
+ }
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
--
2.44.0
[BUG]
When using zoned devices (zbc), scrub would always report super block
errors like the following:
# btrfs scrub start -fB /mnt/btrfs/
Starting scrub on devid 1
scrub done for b7b5c759-1baa-4561-a0ca-b8d0babcde56
Scrub started: Tue Mar 5 12:49:14 2024
Status: finished
Duration: 0:00:00
Total to scrub: 288.00KiB
Rate: 288.00KiB/s
Error summary: super=2
Corrected: 0
Uncorrectable: 0
Unverified: 0
[CAUSE]
Since the very beginning of scrub, we always go with btrfs_sb_offset()
to grab the super blocks.
This is fine for regular btrfs filesystems, but for zoned btrfs, super
blocks are stored in dedicated zones with a ring buffer like structure.
This means the old btrfs_sb_offset() is not able to give the correct
bytenr for us to grabbing the super blocks, thus except the primary
super block, the rest would be garbage and cause the above false alerts.
[FIX]
Instead of btrfs_sb_offset(), go with btrfs_sb_log_location() which is
zoned friendly, to grab the correct super block location.
This would introduce new error patterns, as btrfs_sb_log_location() can
fail with extra errors.
Here for -ENOENT we just end the scrub as there are no more super
blocks.
For other errors, we record it as a super block error and exit.
Reported-by: WA AM <waautomata(a)gmail.com>
Link: https://lore.kernel.org/all/CANU2Z0EvUzfYxczLgGUiREoMndE9WdQnbaawV5Fv5gNXpt…
CC: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Johannes Thumshirn <Johannes.Thumshirn(a)wdc.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/scrub.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4bd0e60db59..e1b67baa4072 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2788,7 +2788,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev)
{
int i;
- u64 bytenr;
u64 gen;
int ret = 0;
struct page *page;
@@ -2812,7 +2811,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
+ u64 bytenr;
+
+ ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
+ if (ret == -ENOENT)
+ break;
+ if (ret < 0) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.super_errors++;
+ spin_unlock(&sctx->stat_lock);
+ break;
+ }
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
--
2.44.0
There are indications that ASPM L0s is not working very well on this
machine so disable it also for the NVMe and modem controllers for now.
Note that this is done as a precaution based on problems with the Wi-Fi
on the X13s as well as the NVMe, modem and Wi-Fi on the sc8280xp-crd
reference design (the NVMe controller on my X13s does not support L0s
and the machine lacks a modem).
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index 9567b82db9a5..057e4d9d3c0f 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -695,6 +695,8 @@ keyboard@68 {
};
&pcie2a {
+ aspm-no-l0s;
+
perst-gpios = <&tlmm 143 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 145 GPIO_ACTIVE_LOW>;
@@ -714,6 +716,8 @@ &pcie2a_phy {
};
&pcie3a {
+ aspm-no-l0s;
+
perst-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
--
2.43.0
There are indications that ASPM L0s is not working very well on this
machine so disable it also for the modem and Wi-Fi controllers for now.
This specifically avoids having the modem and Wi-Fi controllers bounce
in an out of L0s when not used (the modem still bounces in and out of
L1) as well as intermittent Correctable errors on the Wi-Fi link when
not used.
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
arch/arm64/boot/dts/qcom/sc8280xp-crd.dts | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
index 7e94a68d5d9f..8fc0380f65a0 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
@@ -546,6 +546,8 @@ &pcie2a_phy {
};
&pcie3a {
+ aspm-no-l0s;
+
perst-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
@@ -566,6 +568,7 @@ &pcie3a_phy {
&pcie4 {
max-link-speed = <2>;
+ aspm-no-l0s;
perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>;
--
2.43.0
Commit 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting
1.9.0 ops") started enabling ASPM unconditionally when the hardware
claims to support it. This triggers Correctable Errors for some PCIe
devices on machines like the Lenovo ThinkPad X13s, which could indicate
an incomplete driver ASPM implementation or that the hardware does in
fact not support L0s.
Add support for disabling ASPM L0s in the devicetree when it is not
supported on a particular machine and controller.
Note that only the 1.9.0 ops enable ASPM currently.
Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops")
Cc: stable(a)vger.kernel.org # 6.7
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-qcom.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 09d485df34b9..0fb5dc06d2ef 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -273,6 +273,25 @@ static int qcom_pcie_start_link(struct dw_pcie *pci)
return 0;
}
+static void qcom_pcie_clear_aspm_l0s(struct dw_pcie *pci)
+{
+ u16 offset;
+ u32 val;
+
+ if (!of_property_read_bool(pci->dev->of_node, "aspm-no-l0s"))
+ return;
+
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+ dw_pcie_dbi_ro_wr_en(pci);
+
+ val = readl(pci->dbi_base + offset + PCI_EXP_LNKCAP);
+ val &= ~PCI_EXP_LNKCAP_ASPM_L0S;
+ writel(val, pci->dbi_base + offset + PCI_EXP_LNKCAP);
+
+ dw_pcie_dbi_ro_wr_dis(pci);
+}
+
static void qcom_pcie_clear_hpc(struct dw_pcie *pci)
{
u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
@@ -962,6 +981,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
{
+ qcom_pcie_clear_aspm_l0s(pcie->pci);
qcom_pcie_clear_hpc(pcie->pci);
return 0;
--
2.43.0
Since kernel version 5.4.217 LTS, there has been an issue with the kernel live patching feature becoming unavailable.
When compiling the sample code for kernel live patching, the following message is displayed when enabled:
livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
Reproduction steps:
1.git checkout v5.4.269 -b v5.4.269
2.make defconfig
3. Set CONFIG_LIVEPATCH=y、CONFIG_SAMPLE_LIVEPATCH=m
4. make -j bzImage
5. make samples/livepatch/livepatch-sample.ko
6. qemu-system-x86_64 -kernel arch/x86_64/boot/bzImage -nographic -append "console=ttyS0" -initrd initrd.img -m 1024M
7. insmod livepatch-sample.ko
Kernel live patch cannot complete successfully.
After some debugging, the immediate cause of the patch failure is an error in stack checking. The logs are as follows:
[ 340.974853] livepatch: klp_check_stack: kworker/u256:0:23486 has an unreliable stack
[ 340.974858] livepatch: klp_check_stack: kworker/u256:1:23487 has an unreliable stack
[ 340.974863] livepatch: klp_check_stack: kworker/u256:2:23488 has an unreliable stack
[ 340.974868] livepatch: klp_check_stack: kworker/u256:5:23489 has an unreliable stack
[ 340.974872] livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
......
BTW,if you use the v5.4.217 tag for testing, make sure to set CONFIG_RETPOLINE = y and CONFIG_LIVEPATCH = y, and other steps are consistent with v5.4.269
After investigation, The problem is strongly related to the commit 8afd1c7da2b0 ("x86/speculation: Change FILL_RETURN_BUFFER to work with objtool"),
which would cause incorrect ORC entries to be generated, and the v5.4.217 version can undo this commit to make kernel livepatch work normally.
It is a back-ported upstream patch with some code adjustments,from the git log, the author also mentioned no intra-function call validation support.
Based on commit 6e1f54a4985b63bc1b55a09e5e75a974c5d6719b (Linux 5.4.269), This patchset adds stack validation support for intra-function calls,
allowing the kernel live patching feature to work correctly.
Alexandre Chartre (2):
objtool: is_fentry_call() crashes if call has no destination
objtool: Add support for intra-function calls
Rui Qi (1):
x86/speculation: Support intra-function call validation
arch/x86/include/asm/nospec-branch.h | 7 ++
include/linux/frame.h | 11 ++++
.../Documentation/stack-validation.txt | 8 +++
tools/objtool/arch/x86/decode.c | 6 ++
tools/objtool/check.c | 64 +++++++++++++++++--
5 files changed, 91 insertions(+), 5 deletions(-)
--
2.39.2 (Apple Git-143)
> From: Parav Pandit <parav(a)nvidia.com>
> Sent: Tuesday, March 5, 2024 12:06 PM
>
> When the PCI device is surprise removed, requests won't complete from the
> device. These IOs are never completed and disk deletion hangs indefinitely.
>
> Fix it by aborting the IOs which the device will never complete when the VQ is
> broken.
>
> With this fix now fio completes swiftly.
> An alternative of IO timeout has been considered, however timeout can take
> very long time. For unresponsive device, quickly completing the request with
> error enables users and upper layer to react quickly.
>
> Verified with multiple device unplug cycles with pending IOs in virtio used ring
> and some pending with device.
>
> Also verified without surprise removal.
>
> Fixes: 43bb40c5b926 ("virtio_pci: Support surprise removal of virtio pci
> device")
> Cc: stable(a)vger.kernel.org
> Reported-by: lirongqing(a)baidu.com
> Closes:
> https://lore.kernel.org/virtualization/c45dd68698cd47238c55fb73ca9b4741
> @baidu.com/
> Co-developed-by: Chaitanya Kulkarni <kch(a)nvidia.com>
> Signed-off-by: Chaitanya Kulkarni <kch(a)nvidia.com>
> Signed-off-by: Parav Pandit <parav(a)nvidia.com>
> ---
Please ignore this patch, I am still debugging and verifying it.
Incorrectly it got CCed to stable.
I am sorry for the noise.
> changelog:
> v0->v1:
> - addressed comments from Ming and Michael
> - changed the flow to not depend on broken vq anymore to avoid
> the race of missing the detection
> - flow updated to quiesce request queue and device, followed by
> syncing with any ongoing interrupt handler or callbacks,
> finally finishing the requests which are not completed by device
> with error.
> ---
> drivers/block/virtio_blk.c | 46 +++++++++++++++++++++++++++++++++++-
> --
> 1 file changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index
> 2bf14a0e2815..1956172b4b1a 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -1562,9 +1562,52 @@ static int virtblk_probe(struct virtio_device *vdev)
> return err;
> }
>
> +static bool virtblk_cancel_request(struct request *rq, void *data) {
> + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
> +
> + vbr->in_hdr.status = VIRTIO_BLK_S_IOERR;
> + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq))
> + blk_mq_complete_request(rq);
> +
> + return true;
> +}
> +
> static void virtblk_remove(struct virtio_device *vdev) {
> struct virtio_blk *vblk = vdev->priv;
> + int i;
> +
> + /* Block upper layer to not get any new requests */
> + blk_mq_quiesce_queue(vblk->disk->queue);
> +
> + mutex_lock(&vblk->vdev_mutex);
> +
> + /* Stop all the virtqueues and configuration change notification and
> also
> + * synchronize with pending interrupt handlers.
> + */
> + virtio_reset_device(vdev);
> +
> + mutex_unlock(&vblk->vdev_mutex);
> +
> + /* Syncronize with any callback handlers for request completion */
> + for (i = 0; i < vblk->num_vqs; i++)
> + virtblk_done(vblk->vqs[i].vq);
> +
> + blk_sync_queue(vblk->disk->queue);
> +
> + /* At this point block layer and device/transport are quiet;
> + * hence, safely complete all the pending requests with error.
> + */
> + blk_mq_tagset_busy_iter(&vblk->tag_set, virtblk_cancel_request,
> vblk);
> + blk_mq_tagset_wait_completed_request(&vblk->tag_set);
> +
> + /*
> + * Unblock any pending dispatch I/Os before we destroy device. From
> + * del_gendisk() -> __blk_mark_disk_dead(disk) will set GD_DEAD flag,
> + * that will make sure any new I/O from bio_queue_enter() to fail.
> + */
> + blk_mq_unquiesce_queue(vblk->disk->queue);
>
> /* Make sure no work handler is accessing the device. */
> flush_work(&vblk->config_work);
> @@ -1574,9 +1617,6 @@ static void virtblk_remove(struct virtio_device
> *vdev)
>
> mutex_lock(&vblk->vdev_mutex);
>
> - /* Stop all the virtqueues. */
> - virtio_reset_device(vdev);
> -
> /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
> vblk->vdev = NULL;
>
> --
> 2.34.1
The quilt patch titled
Subject: init/Kconfig: lower GCC version check for -Warray-bounds
has been removed from the -mm tree. Its filename was
init-kconfig-lower-gcc-version-check-for-warray-bounds.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kees Cook <keescook(a)chromium.org>
Subject: init/Kconfig: lower GCC version check for -Warray-bounds
Date: Fri, 23 Feb 2024 09:08:27 -0800
We continue to see false positives from -Warray-bounds even in GCC 10,
which is getting reported in a few places[1] still:
security/security.c:811:2: warning: `memcpy' offset 32 is out of the bounds [0, 0] [-Warray-bounds]
Lower the GCC version check from 11 to 10.
Link: https://lkml.kernel.org/r/20240223170824.work.768-kees@kernel.org
Reported-by: Lu Yao <yaolu(a)kylinos.cn>
Closes: https://lore.kernel.org/lkml/20240117014541.8887-1-yaolu@kylinos.cn/
Link: https://lore.kernel.org/linux-next/65d84438.620a0220.7d171.81a7@mx.google.c… [1]
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Reviewed-by: Paul Moore <paul(a)paul-moore.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: "Gustavo A. R. Silva" <gustavoars(a)kernel.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Marc Aur��le La France <tsi(a)tuyoix.net>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
init/Kconfig | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/init/Kconfig~init-kconfig-lower-gcc-version-check-for-warray-bounds
+++ a/init/Kconfig
@@ -876,14 +876,14 @@ config CC_IMPLICIT_FALLTHROUGH
default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
-# Currently, disable gcc-11+ array-bounds globally.
+# Currently, disable gcc-10+ array-bounds globally.
# It's still broken in gcc-13, so no upper bound yet.
-config GCC11_NO_ARRAY_BOUNDS
+config GCC10_NO_ARRAY_BOUNDS
def_bool y
config CC_NO_ARRAY_BOUNDS
bool
- default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
+ default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS
# Currently, disable -Wstringop-overflow for GCC globally.
config GCC_NO_STRINGOP_OVERFLOW
_
Patches currently in -mm which might be from keescook(a)chromium.org are
The quilt patch titled
Subject: mm, mmap: fix vma_merge() case 7 with vma_ops->close
has been removed from the -mm tree. Its filename was
mm-mmap-fix-vma_merge-case-7-with-vma_ops-close.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, mmap: fix vma_merge() case 7 with vma_ops->close
Date: Thu, 22 Feb 2024 22:59:31 +0100
When debugging issues with a workload using SysV shmem, Michal Hocko has
come up with a reproducer that shows how a series of mprotect() operations
can result in an elevated shm_nattch and thus leak of the resource.
The problem is caused by wrong assumptions in vma_merge() commit
714965ca8252 ("mm/mmap: start distinguishing if vma can be removed in
mergeability test"). The shmem vmas have a vma_ops->close callback that
decrements shm_nattch, and we remove the vma without calling it.
vma_merge() has thus historically avoided merging vma's with
vma_ops->close and commit 714965ca8252 was supposed to keep it that way.
It relaxed the checks for vma_ops->close in can_vma_merge_after() assuming
that it is never called on a vma that would be a candidate for removal.
However, the vma_merge() code does also use the result of this check in
the decision to remove a different vma in the merge case 7.
A robust solution would be to refactor vma_merge() code in a way that the
vma_ops->close check is only done for vma's that are actually going to be
removed, and not as part of the preliminary checks. That would both solve
the existing bug, and also allow additional merges that the checks
currently prevent unnecessarily in some cases.
However to fix the existing bug first with a minimized risk, and for
easier stable backports, this patch only adds a vma_ops->close check to
the buggy case 7 specifically. All other cases of vma removal are covered
by the can_vma_merge_before() check that includes the test for
vma_ops->close.
The reproducer code, adapted from Michal Hocko's code:
int main(int argc, char *argv[]) {
int segment_id;
size_t segment_size = 20 * PAGE_SIZE;
char * sh_mem;
struct shmid_ds shmid_ds;
key_t key = 0x1234;
segment_id = shmget(key, segment_size,
IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
sh_mem = (char *)shmat(segment_id, NULL, 0);
mprotect(sh_mem + 2*PAGE_SIZE, PAGE_SIZE, PROT_NONE);
mprotect(sh_mem + PAGE_SIZE, PAGE_SIZE, PROT_WRITE);
mprotect(sh_mem + 2*PAGE_SIZE, PAGE_SIZE, PROT_WRITE);
shmdt(sh_mem);
shmctl(segment_id, IPC_STAT, &shmid_ds);
printf("nattch after shmdt(): %lu (expected: 0)\n", shmid_ds.shm_nattch);
if (shmctl(segment_id, IPC_RMID, 0))
printf("IPCRM failed %d\n", errno);
return (shmid_ds.shm_nattch) ? 1 : 0;
}
Link: https://lkml.kernel.org/r/20240222215930.14637-2-vbabka@suse.cz
Fixes: 714965ca8252 ("mm/mmap: start distinguishing if vma can be removed in mergeability test")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mmap.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
--- a/mm/mmap.c~mm-mmap-fix-vma_merge-case-7-with-vma_ops-close
+++ a/mm/mmap.c
@@ -954,13 +954,21 @@ static struct vm_area_struct
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
+ /*
+ * can_vma_merge_after() assumed we would not be
+ * removing prev vma, so it skipped the check
+ * for vm_ops->close, but we are removing curr
+ */
+ if (curr->vm_ops && curr->vm_ops->close)
+ err = -EINVAL;
remove = curr;
} else { /* case 5 */
adjust = curr;
adj_start = (end - curr->vm_start);
}
+ if (!err)
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else { /* merge_next */
vma_start_write(next);
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
The quilt patch titled
Subject: mm: userfaultfd: fix unexpected change to src_folio when UFFDIO_MOVE fails
has been removed from the -mm tree. Its filename was
mm-userfaultfd-fix-unexpected-change-to-src_folio-when-uffdio_move-fails.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Qi Zheng <zhengqi.arch(a)bytedance.com>
Subject: mm: userfaultfd: fix unexpected change to src_folio when UFFDIO_MOVE fails
Date: Thu, 22 Feb 2024 16:08:15 +0800
After ptep_clear_flush(), if we find that src_folio is pinned we will fail
UFFDIO_MOVE and put src_folio back to src_pte entry, but the change to
src_folio->{mapping,index} is not restored in this process. This is not
what we expected, so fix it.
This can cause the rmap for that page to be invalid, possibly resulting
in memory corruption. At least swapout+migration would no longer work,
because we might fail to locate the mappings of that folio.
Link: https://lkml.kernel.org/r/20240222080815.46291-1-zhengqi.arch@bytedance.com
Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/userfaultfd.c~mm-userfaultfd-fix-unexpected-change-to-src_folio-when-uffdio_move-fails
+++ a/mm/userfaultfd.c
@@ -914,9 +914,6 @@ static int move_present_pte(struct mm_st
goto out;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
orig_src_pte = ptep_clear_flush(src_vma, src_addr, src_pte);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -925,6 +922,9 @@ static int move_present_pte(struct mm_st
goto out;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
_
Patches currently in -mm which might be from zhengqi.arch(a)bytedance.com are
mm-pgtable-correct-the-wrong-comment-about-ptdesc-__page_flags.patch
mm-pgtable-add-missing-pt_index-to-struct-ptdesc.patch
s390-supplement-for-ptdesc-conversion.patch
The quilt patch titled
Subject: mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
has been removed from the -mm tree. Its filename was
mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
Date: Wed, 21 Feb 2024 12:43:58 +0100
Sven reports an infinite loop in __alloc_pages_slowpath() for costly order
__GFP_RETRY_MAYFAIL allocations that are also GFP_NOIO. Such combination
can happen in a suspend/resume context where a GFP_KERNEL allocation can
have __GFP_IO masked out via gfp_allowed_mask.
Quoting Sven:
1. try to do a "costly" allocation (order > PAGE_ALLOC_COSTLY_ORDER)
with __GFP_RETRY_MAYFAIL set.
2. page alloc's __alloc_pages_slowpath tries to get a page from the
freelist. This fails because there is nothing free of that costly
order.
3. page alloc tries to reclaim by calling __alloc_pages_direct_reclaim,
which bails out because a zone is ready to be compacted; it pretends
to have made a single page of progress.
4. page alloc tries to compact, but this always bails out early because
__GFP_IO is not set (it's not passed by the snd allocator, and even
if it were, we are suspending so the __GFP_IO flag would be cleared
anyway).
5. page alloc believes reclaim progress was made (because of the
pretense in item 3) and so it checks whether it should retry
compaction. The compaction retry logic thinks it should try again,
because:
a) reclaim is needed because of the early bail-out in item 4
b) a zonelist is suitable for compaction
6. goto 2. indefinite stall.
(end quote)
The immediate root cause is confusing the COMPACT_SKIPPED returned from
__alloc_pages_direct_compact() (step 4) due to lack of __GFP_IO to be
indicating a lack of order-0 pages, and in step 5 evaluating that in
should_compact_retry() as a reason to retry, before incrementing and
limiting the number of retries. There are however other places that
wrongly assume that compaction can happen while we lack __GFP_IO.
To fix this, introduce gfp_compaction_allowed() to abstract the __GFP_IO
evaluation and switch the open-coded test in try_to_compact_pages() to use
it.
Also use the new helper in:
- compaction_ready(), which will make reclaim not bail out in step 3, so
there's at least one attempt to actually reclaim, even if chances are
small for a costly order
- in_reclaim_compaction() which will make should_continue_reclaim()
return false and we don't over-reclaim unnecessarily
- in __alloc_pages_slowpath() to set a local variable can_compact,
which is then used to avoid retrying reclaim/compaction for costly
allocations (step 5) if we can't compact and also to skip the early
compaction attempt that we do in some cases
Link: https://lkml.kernel.org/r/20240221114357.13655-2-vbabka@suse.cz
Fixes: 3250845d0526 ("Revert "mm, oom: prevent premature OOM killer invocation for high order request"")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Sven van Ashbrook <svenva(a)chromium.org>
Closes: https://lore.kernel.org/all/CAG-rBihs_xMKb3wrMO1%2B-%2Bp4fowP9oy1pa_OTkfxBz…
Tested-by: Karthikeyan Ramasubramanian <kramasub(a)chromium.org>
Cc: Brian Geffon <bgeffon(a)google.com>
Cc: Curtis Malainey <cujomalainey(a)chromium.org>
Cc: Jaroslav Kysela <perex(a)perex.cz>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Takashi Iwai <tiwai(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/gfp.h | 9 +++++++++
mm/compaction.c | 7 +------
mm/page_alloc.c | 10 ++++++----
mm/vmscan.c | 5 ++++-
4 files changed, 20 insertions(+), 11 deletions(-)
--- a/include/linux/gfp.h~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/include/linux/gfp.h
@@ -353,6 +353,15 @@ static inline bool gfp_has_io_fs(gfp_t g
return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}
+/*
+ * Check if the gfp flags allow compaction - GFP_NOIO is a really
+ * tricky context because the migration might require IO.
+ */
+static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
+{
+ return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
+}
+
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
--- a/mm/compaction.c~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/mm/compaction.c
@@ -2723,16 +2723,11 @@ enum compact_result try_to_compact_pages
unsigned int alloc_flags, const struct alloc_context *ac,
enum compact_priority prio, struct page **capture)
{
- int may_perform_io = (__force int)(gfp_mask & __GFP_IO);
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- /*
- * Check if the GFP flags allow compaction - GFP_NOIO is really
- * tricky context because the migration might require IO
- */
- if (!may_perform_io)
+ if (!gfp_compaction_allowed(gfp_mask))
return COMPACT_SKIPPED;
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
--- a/mm/page_alloc.c~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/mm/page_alloc.c
@@ -4041,6 +4041,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+ bool can_compact = gfp_compaction_allowed(gfp_mask);
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
@@ -4111,7 +4112,7 @@ restart:
* Don't try this for allocations that are allowed to ignore
* watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
- if (can_direct_reclaim &&
+ if (can_direct_reclaim && can_compact &&
(costly_order ||
(order > 0 && ac->migratetype != MIGRATE_MOVABLE))
&& !gfp_pfmemalloc_allowed(gfp_mask)) {
@@ -4209,9 +4210,10 @@ retry:
/*
* Do not retry costly high order allocations unless they are
- * __GFP_RETRY_MAYFAIL
+ * __GFP_RETRY_MAYFAIL and we can compact
*/
- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+ if (costly_order && (!can_compact ||
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -4224,7 +4226,7 @@ retry:
* implementation of the compaction depends on the sufficient amount
* of free memory (see __compaction_suitable)
*/
- if (did_some_progress > 0 &&
+ if (did_some_progress > 0 && can_compact &&
should_compact_retry(ac, order, alloc_flags,
compact_result, &compact_priority,
&compaction_retries))
--- a/mm/vmscan.c~mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations
+++ a/mm/vmscan.c
@@ -5753,7 +5753,7 @@ static void shrink_lruvec(struct lruvec
/* Use reclaim/compaction for costly allocs or under memory pressure */
static bool in_reclaim_compaction(struct scan_control *sc)
{
- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
+ if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
sc->priority < DEF_PRIORITY - 2))
return true;
@@ -5998,6 +5998,9 @@ static inline bool compaction_ready(stru
{
unsigned long watermark;
+ if (!gfp_compaction_allowed(sc->gfp_mask))
+ return false;
+
/* Allocation can already succeed, nothing to do */
if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
sc->reclaim_idx, 0))
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
[apanyaki: backport to v5.10-stable]
Signed-off-by: Andrew Paniakin <apanyaki(a)amazon.com>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 24e39984914f..f608663bdfd5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -885,8 +885,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -929,8 +929,6 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -943,6 +941,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
During the handoff from earlycon to the real console driver, we have
two separate drivers operating on the same device concurrently. In the
case of the 8250 driver these concurrent accesses cause problems due
to the driver's use of banked registers, controlled by LCR.DLAB. It is
possible for the setup(), config_port(), pm() and set_mctrl() callbacks
to set DLAB, which can cause the earlycon code that intends to access
TX to instead access DLL, leading to missed output and corruption on
the serial line due to unintended modifications to the baud rate.
In particular, for setup() we have:
univ8250_console_setup()
-> serial8250_console_setup()
-> uart_set_options()
-> serial8250_set_termios()
-> serial8250_do_set_termios()
-> serial8250_do_set_divisor()
For config_port() we have:
serial8250_config_port()
-> autoconfig()
For pm() we have:
serial8250_pm()
-> serial8250_do_pm()
-> serial8250_set_sleep()
For set_mctrl() we have (for some devices):
serial8250_set_mctrl()
-> omap8250_set_mctrl()
-> __omap8250_set_mctrl()
To avoid such problems, let's make it so that the console is locked
during pre-registration calls to these callbacks, which will prevent
the earlycon driver from running concurrently.
Remove the partial solution to this problem in the 8250 driver
that locked the console only during autoconfig_irq(), as this would
result in a deadlock with the new approach. The console continues
to be locked during autoconfig_irq() because it can only be called
through uart_configure_port().
Although this patch introduces more locking than strictly necessary
(and in particular it also locks during the call to rs485_config()
which is not affected by this issue as far as I can tell), it follows
the principle that it is the responsibility of the generic console
code to manage the earlycon handoff by ensuring that earlycon and real
console driver code cannot run concurrently, and not the individual
drivers.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Link: https://linux-review.googlesource.com/id/I7cf8124dcebf8618e6b2ee543fa5b2553…
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
---
drivers/tty/serial/8250/8250_port.c | 6 ------
drivers/tty/serial/serial_core.c | 12 ++++++++++++
kernel/printk/printk.c | 21 ++++++++++++++++++---
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 8ca061d3bbb9..1d65055dde27 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1329,9 +1329,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
inb_p(ICP);
}
- if (uart_console(port))
- console_lock();
-
/* forget possible initially masked and pending IRQ */
probe_irq_off(probe_irq_on());
save_mcr = serial8250_in_MCR(up);
@@ -1371,9 +1368,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
if (port->flags & UPF_FOURPORT)
outb_p(save_ICP, ICP);
- if (uart_console(port))
- console_unlock();
-
port->irq = (irq > 0) ? irq : 0;
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index d6a58a9e072a..ff85ebd3a007 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2608,7 +2608,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
port->type = PORT_UNKNOWN;
flags |= UART_CONFIG_TYPE;
}
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
port->ops->config_port(port, flags);
+ if (uart_console(port))
+ console_unlock();
}
if (port->type != PORT_UNKNOWN) {
@@ -2616,6 +2621,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_report_port(drv, port);
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
+
/* Power up port for set_mctrl() */
uart_change_pm(state, UART_PM_STATE_ON);
@@ -2632,6 +2641,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_rs485_config(port);
+ if (uart_console(port))
+ console_unlock();
+
/*
* If this driver supports console, and it hasn't been
* successfully registered yet, try to re-register it.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f2444b581e16..f51e4e5a869d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3263,6 +3263,21 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
+static int console_call_setup(struct console *newcon, char *options)
+{
+ int err;
+
+ if (!newcon->setup)
+ return 0;
+
+ /* Synchronize with possible boot console. */
+ console_lock();
+ err = newcon->setup(newcon, options);
+ console_unlock();
+
+ return err;
+}
+
/*
* This is called by register_console() to try to match
* the newly registered console with any of the ones selected
@@ -3298,8 +3313,8 @@ static int try_enable_preferred_console(struct console *newcon,
if (_braille_register_console(newcon, c))
return 0;
- if (newcon->setup &&
- (err = newcon->setup(newcon, c->options)) != 0)
+ err = console_call_setup(newcon, c->options);
+ if (err != 0)
return err;
}
newcon->flags |= CON_ENABLED;
@@ -3325,7 +3340,7 @@ static void try_enable_default_console(struct console *newcon)
if (newcon->index < 0)
newcon->index = 0;
- if (newcon->setup && newcon->setup(newcon, NULL) != 0)
+ if (console_call_setup(newcon, NULL) != 0)
return;
newcon->flags |= CON_ENABLED;
--
2.44.0.rc1.240.g4c46232300-goog
During the handoff from earlycon to the real console driver, we have
two separate drivers operating on the same device concurrently. In the
case of the 8250 driver these concurrent accesses cause problems due
to the driver's use of banked registers, controlled by LCR.DLAB. It is
possible for the setup(), config_port(), pm() and set_mctrl() callbacks
to set DLAB, which can cause the earlycon code that intends to access
TX to instead access DLL, leading to missed output and corruption on
the serial line due to unintended modifications to the baud rate.
In particular, for setup() we have:
univ8250_console_setup()
-> serial8250_console_setup()
-> uart_set_options()
-> serial8250_set_termios()
-> serial8250_do_set_termios()
-> serial8250_do_set_divisor()
For config_port() we have:
serial8250_config_port()
-> autoconfig()
For pm() we have:
serial8250_pm()
-> serial8250_do_pm()
-> serial8250_set_sleep()
For set_mctrl() we have (for some devices):
serial8250_set_mctrl()
-> omap8250_set_mctrl()
-> __omap8250_set_mctrl()
To avoid such problems, let's make it so that the console is locked
during pre-registration calls to these callbacks, which will prevent
the earlycon driver from running concurrently.
Remove the partial solution to this problem in the 8250 driver
that locked the console only during autoconfig_irq(), as this would
result in a deadlock with the new approach. The console continues
to be locked during autoconfig_irq() because it can only be called
through uart_configure_port().
Although this patch introduces more locking than strictly necessary
(and in particular it also locks during the call to rs485_config()
which is not affected by this issue as far as I can tell), it follows
the principle that it is the responsibility of the generic console
code to manage the earlycon handoff by ensuring that earlycon and real
console driver code cannot run concurrently, and not the individual
drivers.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Link: https://linux-review.googlesource.com/id/I7cf8124dcebf8618e6b2ee543fa5b2553…
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
---
v3:
- switch to if (err)
v2:
- add some comments
drivers/tty/serial/8250/8250_port.c | 6 ------
drivers/tty/serial/serial_core.c | 12 ++++++++++++
kernel/printk/printk.c | 21 ++++++++++++++++++---
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 8ca061d3bbb9..1d65055dde27 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1329,9 +1329,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
inb_p(ICP);
}
- if (uart_console(port))
- console_lock();
-
/* forget possible initially masked and pending IRQ */
probe_irq_off(probe_irq_on());
save_mcr = serial8250_in_MCR(up);
@@ -1371,9 +1368,6 @@ static void autoconfig_irq(struct uart_8250_port *up)
if (port->flags & UPF_FOURPORT)
outb_p(save_ICP, ICP);
- if (uart_console(port))
- console_unlock();
-
port->irq = (irq > 0) ? irq : 0;
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index d6a58a9e072a..ff85ebd3a007 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2608,7 +2608,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
port->type = PORT_UNKNOWN;
flags |= UART_CONFIG_TYPE;
}
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
port->ops->config_port(port, flags);
+ if (uart_console(port))
+ console_unlock();
}
if (port->type != PORT_UNKNOWN) {
@@ -2616,6 +2621,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_report_port(drv, port);
+ /* Synchronize with possible boot console. */
+ if (uart_console(port))
+ console_lock();
+
/* Power up port for set_mctrl() */
uart_change_pm(state, UART_PM_STATE_ON);
@@ -2632,6 +2641,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
uart_rs485_config(port);
+ if (uart_console(port))
+ console_unlock();
+
/*
* If this driver supports console, and it hasn't been
* successfully registered yet, try to re-register it.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f2444b581e16..89f2aa2e1172 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3263,6 +3263,21 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
+static int console_call_setup(struct console *newcon, char *options)
+{
+ int err;
+
+ if (!newcon->setup)
+ return 0;
+
+ /* Synchronize with possible boot console. */
+ console_lock();
+ err = newcon->setup(newcon, options);
+ console_unlock();
+
+ return err;
+}
+
/*
* This is called by register_console() to try to match
* the newly registered console with any of the ones selected
@@ -3298,8 +3313,8 @@ static int try_enable_preferred_console(struct console *newcon,
if (_braille_register_console(newcon, c))
return 0;
- if (newcon->setup &&
- (err = newcon->setup(newcon, c->options)) != 0)
+ err = console_call_setup(newcon, c->options);
+ if (err)
return err;
}
newcon->flags |= CON_ENABLED;
@@ -3325,7 +3340,7 @@ static void try_enable_default_console(struct console *newcon)
if (newcon->index < 0)
newcon->index = 0;
- if (newcon->setup && newcon->setup(newcon, NULL) != 0)
+ if (console_call_setup(newcon, NULL) != 0)
return;
newcon->flags |= CON_ENABLED;
--
2.44.0.278.ge034bb2e1d-goog
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
[apanyaki: backport to v5.15-stable]
Signed-off-by: Andrew Paniakin <apanyaki(a)amazon.com>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index ee691b20d4a3..04ff194fecf4 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -936,8 +936,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -981,8 +981,6 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -1000,6 +998,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 00f8e349921d..96b96516c980 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
[BUG]
Currently btrfs can create subvolume with an invalid qgroup inherit
without triggering any error:
# mkfs.btrfs -O quota -f $dev
# mount $dev $mnt
# btrfs subvolume create -i 2/0 $mnt/subv1
# btrfs qgroup show -prce --sync $mnt
Qgroupid Referenced Exclusive Path
-------- ---------- --------- ----
0/5 16.00KiB 16.00KiB <toplevel>
0/256 16.00KiB 16.00KiB subv1
[CAUSE]
We only do a very basic size check for btrfs_qgroup_inherit structure,
but never really verify if the values are correct.
Thus in btrfs_qgroup_inherit() function, we have to skip non-existing
qgroups, and never return any error.
[FIX]
Fix the behavior and introduce extra checks:
- Introduce early check for btrfs_qgroup_inherit structure
Not only the size, but also all the qgroup ids would be verifyed.
And the timing is very early, so we can return error early.
This early check is very important for snapshot creation, as snapshot
is delayed to transaction commit.
- Drop support for btrfs_qgroup_inherit::num_ref_copies and
num_excl_copies
Those two members are used to specify to copy refr/excl numbers from
other qgroups.
This would definitely mark qgroup inconsistent, and btrfs-progs has
dropped the support for them for a long time.
It's time to drop the support for kernel.
- Verify the supported btrfs_qgroup_inherit::flags
Just in case we want to add extra flags for btrfs_qgroup_inherit.
Now above subvolume creation would fail with -ENOENT other than silently
ignore the non-existing qgroup.
CC: stable(a)vger.kernel.org
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/ioctl.c | 16 +++---------
fs/btrfs/qgroup.c | 52 ++++++++++++++++++++++++++++++++++++++
fs/btrfs/qgroup.h | 3 +++
include/uapi/linux/btrfs.h | 1 +
4 files changed, 59 insertions(+), 13 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8b80fbea1e72..c19ce2e292dc 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1382,7 +1382,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
- u64 nums;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
if (vol_args->size < sizeof(*inherit) ||
vol_args->size > PAGE_SIZE) {
@@ -1395,19 +1395,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
goto free_args;
}
- if (inherit->num_qgroups > PAGE_SIZE ||
- inherit->num_ref_copies > PAGE_SIZE ||
- inherit->num_excl_copies > PAGE_SIZE) {
- ret = -EINVAL;
+ ret = btrfs_qgroup_check_inherit(fs_info, inherit, vol_args->size);
+ if (ret < 0)
goto free_inherit;
- }
-
- nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
- 2 * inherit->num_excl_copies;
- if (vol_args->size != struct_size(inherit, qgroups, nums)) {
- ret = -EINVAL;
- goto free_inherit;
- }
}
ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4fa83c76b37b..66968092b554 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3046,6 +3046,58 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
return ret;
}
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size)
+{
+ if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
+ return -EOPNOTSUPP;
+ if (size < sizeof(*inherit) || size > PAGE_SIZE)
+ return -EINVAL;
+
+ /*
+ * In the past we allow btrfs_qgroup_inherit to specify to copy
+ * refr/excl numbers directly from other qgroups.
+ * This behavior has been disable in btrfs-progs for a very long time,
+ * but here we should also disable them for kernel, as this behavior
+ * is known to mark qgroup inconsistent, and a rescan would wipe out the
+ * change anyway.
+ *
+ * So here we just reject any btrfs_qgroup_inherit with num_ref_copies or
+ * num_excl_copies.
+ */
+ if (inherit->num_ref_copies || inherit->num_excl_copies)
+ return -EINVAL;
+
+ if (inherit->num_qgroups > PAGE_SIZE)
+ return -EINVAL;
+
+ if (size != struct_size(inherit, qgroups, inherit->num_qgroups))
+ return -EINVAL;
+
+ /*
+ * Now check all the remaining qgroups, they should all:
+ * - Exist
+ * - Be higher level qgroups.
+ */
+ for (int i = 0; i < inherit->num_qgroups; i++) {
+ struct btrfs_qgroup *qgroup;
+ u64 qgroupid = inherit->qgroups[i];
+
+ if (btrfs_qgroup_level(qgroupid) == 0)
+ return -EINVAL;
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ spin_unlock(&fs_info->qgroup_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ }
+ return 0;
+}
+
static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
u64 inode_rootid,
struct btrfs_qgroup_inherit **inherit)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 1f664261c064..706640be0ec2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -350,6 +350,9 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
u64 objectid, u64 inode_rootid,
struct btrfs_qgroup_inherit *inherit);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index f8bc34a6bcfa..cdf6ad872149 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -92,6 +92,7 @@ struct btrfs_qgroup_limit {
* struct btrfs_qgroup_inherit.flags
*/
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
+#define BTRFS_QGROUP_INHERIT_FLAGS_SUPP (BTRFS_QGROUP_INHERIT_SET_LIMITS)
struct btrfs_qgroup_inherit {
__u64 flags;
--
2.43.2
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x adf1bb78dab55e36d4d557aa2fb446ebcfe9e5ce
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030417-jaws-icky-a0f2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
adf1bb78dab5 ("mptcp: fix snd_wnd initialization for passive socket")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From adf1bb78dab55e36d4d557aa2fb446ebcfe9e5ce Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Fri, 23 Feb 2024 17:14:15 +0100
Subject: [PATCH] mptcp: fix snd_wnd initialization for passive socket
Such value should be inherited from the first subflow, but
passive sockets always used 'rsk_rcv_wnd'.
Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-5-16…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 442fa7d9b57a..2c8f931c6d5b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3211,7 +3211,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
On Mon, Mar 4, 2024 at 3:00 PM Xiubo Li <xiubli(a)redhat.com> wrote:
>
>
> On 3/4/24 19:02, Ilya Dryomov wrote:
>
> On Mon, Mar 4, 2024 at 2:02 AM Xiubo Li <xiubli(a)redhat.com> wrote:
>
> On 3/2/24 00:16, Ilya Dryomov wrote:
>
> On Thu, Feb 29, 2024 at 5:22 AM <xiubli(a)redhat.com> wrote:
>
> From: Xiubo Li <xiubli(a)redhat.com>
>
> The osd code has remove cursor initilizing code and this will make
> the sparse read state into a infinite loop. We should initialize
> the cursor just before each sparse-read in messnger v2.
>
> Cc: stable(a)vger.kernel.org
> URL: https://tracker.ceph.com/issues/64607
> Fixes: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket")
> Reported-by: Luis Henriques <lhenriques(a)suse.de>
> Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
> ---
> net/ceph/messenger_v2.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
> index a0ca5414b333..7ae0f80100f4 100644
> --- a/net/ceph/messenger_v2.c
> +++ b/net/ceph/messenger_v2.c
> @@ -2025,6 +2025,7 @@ static int prepare_sparse_read_cont(struct ceph_connection *con)
> static int prepare_sparse_read_data(struct ceph_connection *con)
> {
> struct ceph_msg *msg = con->in_msg;
> + u64 len = con->in_msg->sparse_read_total ? : data_len(con->in_msg);
>
> Hi Xiubo,
>
> Why is sparse_read_total being tested here? Isn't this function
> supposed to be called only for sparse reads, after the state is set to
> IN_S_PREPARE_SPARSE_DATA based on a similar test:
>
> if (msg->sparse_read_total)
> con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
> else
> con->v2.in_state = IN_S_PREPARE_READ_DATA;
>
> Then the patch could be simplified and just be:
>
> diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
> index a0ca5414b333..ab3ab130a911 100644
> --- a/net/ceph/messenger_v2.c
> +++ b/net/ceph/messenger_v2.c
> @@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct
> ceph_connection *con)
> if (!con_secure(con))
> con->in_data_crc = -1;
>
> + ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
> + con->in_msg->sparse_read_total);
> +
> reset_in_kvecs(con);
> con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
> con->v2.data_len_remain = data_len(msg);
>
> Else where should we do the test like this ?
>
> Hi Xiubo,
>
> I suspect you copied this test from prepare_message_data() in msgr1,
> where that function is called unconditionally for all reads. In msgr2,
> prepare_sparse_read_data() is called conditionally, so the test just
> seems bogus.
>
> That said, CephFS is the only user of sparse read code, so you should
> know better at this point ;)
>
> As we know the 'sparse_read_total' it's a dedicated member and will be set only in sparse-read case.
>
> In msgr1 for all the read cases they will call "prepare_message_data()", so I just did this check in "prepare_message_data()".
Right.
>
> While for msgr2 it's a little different from msg1 and it won't call 'prepare_read_data()' for all the reads, and for sparse-read it has its own dedicated helper, which is "prepare_sparse_read_data()".
Right.
> So I just did this check in 'prepare_sparse_read_data()' instead.
This where I'm getting lost. Why do a "is this a sparse read" check in
a helper that is dedicated to sparse reads and isn't called for anything
else?
> For "prepare_read_data()" it doesn't make any sense to check "sparse_read_total".
Right, so why doesn't the same go for prepare_sparse_read_data()?
Thanks,
Ilya
v2:
- Dropped already backported patch "x86/bugs: Add asm helpers for
executing VERW".
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
- Boot tested with KASLR and KPTI enabled.
- Rebased to v6.7.8
v1: https://lore.kernel.org/r/20240226-delay-verw-backport-6-7-y-v1-0-ab25f6431…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 2/7: A conflict was resolved for the hunk
swapgs_restore_regs_and_return_to_usermode.
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
Pawan Gupta (4):
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/arch/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 12 ------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 20 +++++++++++++++----
11 files changed, 75 insertions(+), 45 deletions(-)
---
base-commit: d6d6c49dbf4512f1421f5e42896e2d70dc121f9a
change-id: 20240226-delay-verw-backport-6-7-y-a2cb3f26bb90
Best regards,
--
Thanks,
Pawan
From: Xiubo Li <xiubli(a)redhat.com>
The osd code has remove cursor initilizing code and this will make
the sparse read state into a infinite loop. We should initialize
the cursor just before each sparse-read in messnger v2.
Cc: stable(a)vger.kernel.org
URL: https://tracker.ceph.com/issues/64607
Fixes: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket")
Reported-by: Luis Henriques <lhenriques(a)suse.de>
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
net/ceph/messenger_v2.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index a0ca5414b333..7ae0f80100f4 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2025,6 +2025,7 @@ static int prepare_sparse_read_cont(struct ceph_connection *con)
static int prepare_sparse_read_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
+ u64 len = con->in_msg->sparse_read_total ? : data_len(con->in_msg);
dout("%s: starting sparse read\n", __func__);
@@ -2034,6 +2035,8 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg, len);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
--
2.43.0
Here are two patches fixing issues in MPTCP diag.sh kselftest:
- Patch 1 makes sure the exit code is '1' in case of error, and not the
test ID, not to return an exit code that would be wrongly interpreted
by the ksefltests framework, e.g. '4' means 'skip'.
- Patch 2 avoids waiting for unnecessary conditions, which can cause
timeouts in some very slow environments.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Geliang Tang (1):
selftests: mptcp: diag: return KSFT_FAIL not test_cnt
Matthieu Baerts (NGI0) (1):
selftests: mptcp: diag: avoid extra waiting
tools/testing/selftests/net/mptcp/diag.sh | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
---
base-commit: 1c61728be22c1cb49c1be88693e72d8c06b1c81e
change-id: 20240301-upstream-net-20240301-selftests-mptcp-diag-exit-timeout-207d7925b7c0
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
when AOP_WRITEPAGE_ACTIVATE is returned (as NFS does when it detects
congestion) it is important that the page is redirtied.
nfs_writepage_locked() doesn't do this, so files can become corrupted as
writes can be lost.
Note that this is not needed in v6.8 as AOP_WRITEPAGE_ACTIVATE cannot be
returned. It is needed for kernels v5.18..v6.7. From 6.3 onward the patch
is different as it needs to mention "folio", not "page".
Reported-and-tested-by: Jacek Tomaka <Jacek.Tomaka(a)poczta.fm>
Fixes: 6df25e58532b ("nfs: remove reliance on bdi congestion")
Signed-off-by: NeilBrown <neilb(a)suse.de>
---
fs/nfs/write.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f41d24b54fd1..6a0606668417 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page,
int err;
if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
+ NFS_SERVER(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0,
--
2.43.0
From: Max Krummenacher <max.krummenacher(a)toradex.com>
This reverts commit ef4a40953c8076626875ff91c41e210fcee7a6fd.
The commit was applied to make further commits apply cleanly, but the
commit depends on other commits in the same patchset. I.e. the
controlling DSI host would need a change too. Thus one would need to
backport the full patchset changing the DSI hosts and all downstream
DSI device drivers.
Revert the commit and fix up the conflicts with the backported fixes
to the lt8912b driver.
Signed-off-by: Max Krummenacher <max.krummenacher(a)toradex.com>
Conflicts:
drivers/gpu/drm/bridge/lontium-lt8912b.c
---
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
index 6891863ed5104..e16b0fc0cda0f 100644
--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
+++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
@@ -571,6 +571,10 @@ static int lt8912_bridge_attach(struct drm_bridge *bridge,
if (ret)
goto error;
+ ret = lt8912_attach_dsi(lt);
+ if (ret)
+ goto error;
+
return 0;
error:
@@ -726,15 +730,8 @@ static int lt8912_probe(struct i2c_client *client,
drm_bridge_add(<->bridge);
- ret = lt8912_attach_dsi(lt);
- if (ret)
- goto err_attach;
-
return 0;
-err_attach:
- drm_bridge_remove(<->bridge);
- lt8912_free_i2c(lt);
err_i2c:
lt8912_put_dt(lt);
err_dt_parse:
--
2.42.0
Please consider the patches below for backporting to v6.1. They should
all apply cleanly in the given order.
These are prerequisites for NX compat support on x86, but the
remaining changes do not apply cleanly and will be sent as a patch
series at a later date.
By themselves, these changes not only constitute a reasonable cleanup,
they are also needed for future support of x86s [0] CPUs that are no
longer able to transition out of long mode.
Documentation/x86/boot.rst | 2 +-
arch/x86/Kconfig | 17 +
arch/x86/boot/compressed/Makefile | 8 +-
arch/x86/boot/compressed/efi_mixed.S | 383 +++++++++++++++++++
arch/x86/boot/compressed/efi_thunk_64.S | 195 ----------
arch/x86/boot/compressed/head_32.S | 25 +-
arch/x86/boot/compressed/head_64.S | 566 ++++++-----------------------
arch/x86/boot/compressed/mem_encrypt.S | 152 +++++++-
arch/x86/boot/compressed/misc.c | 34 +-
arch/x86/boot/compressed/misc.h | 2 -
arch/x86/boot/compressed/pgtable.h | 10 +-
arch/x86/boot/compressed/pgtable_64.c | 87 ++---
arch/x86/boot/header.S | 2 +-
arch/x86/boot/tools/build.c | 2 +
drivers/firmware/efi/efi.c | 22 ++
drivers/firmware/efi/libstub/alignedmem.c | 5 +-
drivers/firmware/efi/libstub/arm64-stub.c | 6 +-
drivers/firmware/efi/libstub/efistub.h | 6 +-
drivers/firmware/efi/libstub/mem.c | 3 +-
drivers/firmware/efi/libstub/randomalloc.c | 5 +-
drivers/firmware/efi/libstub/x86-stub.c | 53 ++-
drivers/firmware/efi/vars.c | 13 +-
include/linux/decompress/mm.h | 2 +-
23 files changed, 805 insertions(+), 795 deletions(-)
[0] https://www.intel.com/content/www/us/en/developer/articles/technical/envisi…
9cf42bca30e9 efi: libstub: use EFI_LOADER_CODE region when moving the
kernel in memory
cb8bda8ad443 x86/boot/compressed: Rename efi_thunk_64.S to efi-mixed.S
e2ab9eab324c x86/boot/compressed: Move 32-bit entrypoint code into .text section
5c3a85f35b58 x86/boot/compressed: Move bootargs parsing out of 32-bit
startup code
91592b5c0c2f x86/boot/compressed: Move efi32_pe_entry into .text section
73a6dec80e2a x86/boot/compressed: Move efi32_entry out of head_64.S
7f22ca396778 x86/boot/compressed: Move efi32_pe_entry() out of head_64.S
4b52016247ae x86/boot/compressed, efi: Merge multiple definitions of
image_offset into one
630f337f0c4f x86/boot/compressed: Simplify IDT/GDT preserve/restore in
the EFI thunk
6aac80a8da46 x86/boot/compressed: Avoid touching ECX in
startup32_set_idt_entry()
d73a257f7f86 x86/boot/compressed: Pull global variable reference into
startup32_load_idt()
c6355995ba47 x86/boot/compressed: Move startup32_load_idt() into .text section
9ea813be3d34 x86/boot/compressed: Move startup32_load_idt() out of head_64.S
b5d854cd4b6a x86/boot/compressed: Move startup32_check_sev_cbit() into .text
9d7eaae6a071 x86/boot/compressed: Move startup32_check_sev_cbit() out
of head_64.S
30c9ca16a527 x86/boot/compressed: Adhere to calling convention in
get_sev_encryption_bit()
61de13df9590 x86/boot/compressed: Only build mem_encrypt.S if AMD_MEM_ENCRYPT=y
bad267f9e18f efi: verify that variable services are supported
0217a40d7ba6 efi: efivars: prevent double registration
cc3fdda2876e x86/efi: Make the deprecated EFI handover protocol optional
7734a0f31e99 x86/boot: Robustify calling startup_{32,64}() from the
decompressor code
d2d7a54f69b6 x86/efistub: Branch straight to kernel entry point from C code
df9215f15206 x86/efistub: Simplify and clean up handover entry code
127920645876 x86/decompressor: Avoid magic offsets for EFI handover entrypoint
d7156b986d4c x86/efistub: Clear BSS in EFI handover protocol entrypoint
8b63cba746f8 x86/decompressor: Store boot_params pointer in callee save register
00c6b0978ec1 x86/decompressor: Assign paging related global variables earlier
e8972a76aa90 x86/decompressor: Call trampoline as a normal function
918a7a04e717 x86/decompressor: Use standard calling convention for trampoline
bd328aa01ff7 x86/decompressor: Avoid the need for a stack in the
32-bit trampoline
64ef578b6b68 x86/decompressor: Call trampoline directly from C code
f97b67a773cd x86/decompressor: Only call the trampoline when changing
paging levels
cb83cece57e1 x86/decompressor: Pass pgtable address to trampoline directly
03dda95137d3 x86/decompressor: Merge trampoline cleanup with switching code
24388292e2d7 x86/decompressor: Move global symbol references to C code
8217ad0a435f decompress: Use 8 byte alignment
An anon THP page is first added to swap cache before reclaiming it.
Initially, each tail page contains the proper swap entry value(stored in
->private field) which is filled from add_to_swap_cache(). After
migrating the THP page sitting on the swap cache, only the swap entry of
the head page is filled(see folio_migrate_mapping()).
Now when this page is tried to split(one case is when this page is again
migrated, see migrate_pages()->try_split_thp()), the tail pages
->private is not stored with proper swap entry values. When this tail
page is now try to be freed, as part of it delete_from_swap_cache() is
called which operates on the wrong swap cache index and eventually
replaces the wrong swap cache index with shadow/NULL value, frees the
page.
This leads to the state with a swap cache containing the freed page.
This issue can manifest in many forms and the most common thing observed
is the rcu stall during the swapin (see mapping_get_entry()).
On the recent kernels, this issues is indirectly getting fixed with the
series[1], to be specific[2].
When tried to back port this series, it is observed many merge
conflicts and also seems dependent on many other changes. As backporting
to LTS branches is not a trivial one, the similar change from [2] is
picked as a fix.
[1] https://lore.kernel.org/all/20230821160849.531668-1-david@redhat.com/
[2] https://lore.kernel.org/all/20230821160849.531668-5-david@redhat.com/
Closes: https://lore.kernel.org/linux-mm/69cb784f-578d-ded1-cd9f-c6db04696336@quici…
Fixes: 3417013e0d18 ("mm/migrate: Add folio_migrate_mapping()")
Cc: <stable(a)vger.kernel.org> # see patch description, applicable to <=6.1
Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
---
mm/huge_memory.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5957794..cc5273f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2477,6 +2477,8 @@ static void __split_huge_page_tail(struct page *head, int tail,
if (!folio_test_swapcache(page_folio(head))) {
VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
page_tail->private = 0;
+ } else {
+ set_page_private(page_tail, (unsigned long)head->private + tail);
}
/* Page flags must be visible before we make the page non-compound. */
--
2.7.4
Hi Greg and Sasha,
Please apply upstream commit 5b750b22530f ("drm/amd/display: Increase
frame warning limit with KASAN or KCSAN in dml") to linux-6.1.y, as it
is needed to avoid instances of -Wframe-larger-than in allmodconfig,
which has -Werror enabled. It applies cleanly for me and it is already
in 6.6 and 6.7. The fixes tag is not entirely accurate and commit
e63e35f0164c ("drm/amd/display: Increase frame-larger-than for all
display_mode_vba files"), which was recently applied to that tree,
depends on it (I should have made that clearer in the patch).
If there are any issues, please let me know.
Cheers,
Nathan
On some pinephones the video output sometimes freezes (flips between two
frames) [1]. It seems to be that the reason for this behaviour is that
PLL-MIPI is outside its limits, and the GPU is not running at a fixed
rate.
In this patch series I propose the following changes:
1. sunxi-ng: Adhere to the following constraints given in the
Allwinner A64 Manual regarding PLL-MIPI:
* M/N <= 3
* (PLL_VIDEO0)/M >= 24MHz
* 500MHz <= clockrate <= 1400MHz
2. Remove two operating points from the A64 DTS OPPs, so that the GPU
runs at a fixed rate of 432 MHz.
Note, that when pinning the GPU to 432 MHz the issue [1] completely
disappears for me. I've searched the BSP and could not find any
indication that supports the idea of having the three OPPs. The only
frequency I found in the BPSs for A64 is 432 MHz, which has also proven
stable for me.
Another bigger change compared to the previous version is that I've
removed the patch to adapt the XBD599 panel's timings to Allwinner A64's
PLL-MIPI new constraints from this series. Mainly, because I'm currently
evaluationg other options that may or may not work. (It may work at
least until HDMI support is upstreamed.) I'll probably resend the patch
at a later point in time.
I very much appreciate your feedback!
[1] https://gitlab.com/postmarketOS/pmaports/-/issues/805
Signed-off-by: Frank Oltmanns <frank(a)oltmanns.dev>
---
Changes in v3:
- dts: Pin GPU to 432 MHz.
- nkm and a64: Move minimum and maximum rate handling to the common part
of the sunxi-ng driver.
- Removed st7703 patch from series.
- Link to v2: https://lore.kernel.org/r/20240205-pinephone-pll-fixes-v2-0-96a46a2d8c9b@ol…
Changes in v2:
- dts: Increase minimum GPU frequency to 192 MHz.
- nkm and a64: Add minimum and maximum rate for PLL-MIPI.
- nkm: Use the same approach for skipping invalid rates in
ccu_nkm_find_best() as in ccu_nkm_find_best_with_parent_adj().
- nkm: Improve names for ratio struct members and hence get rid of
describing comments.
- nkm and a64: Correct description in the commit messages: M/N <= 3
- Remove patches for nm as they were not needed.
- st7703: Rework the commit message to cover more background for the
change.
- Link to v1: https://lore.kernel.org/r/20231218-pinephone-pll-fixes-v1-0-e238b6ed6dc1@ol…
---
Frank Oltmanns (5):
clk: sunxi-ng: common: Support minimum and maximum rate
clk: sunxi-ng: a64: Set minimum and maximum rate for PLL-MIPI
clk: sunxi-ng: nkm: Support constraints on m/n ratio and parent rate
clk: sunxi-ng: a64: Add constraints on PLL-MIPI's n/m ratio and parent rate
arm64: dts: allwinner: a64: Run GPU at 432 MHz
arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 8 --------
drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 14 +++++++++-----
drivers/clk/sunxi-ng/ccu_common.c | 15 +++++++++++++++
drivers/clk/sunxi-ng/ccu_common.h | 3 +++
drivers/clk/sunxi-ng/ccu_nkm.c | 21 +++++++++++++++++++++
drivers/clk/sunxi-ng/ccu_nkm.h | 2 ++
6 files changed, 50 insertions(+), 13 deletions(-)
---
base-commit: 216c1282dde38ca87ebdf1ccacee5a0682901574
change-id: 20231218-pinephone-pll-fixes-0ccdfde273e4
Best regards,
--
Frank Oltmanns <frank(a)oltmanns.dev>
Svacer reports NULL-pointer dereference and double free issues in
do_bank_switch() in case sdw_ml_sync_bank_switch() returns an error
not on the first iteration of the list_for_each_entry() loop. These
problems are present in 5.10, 5.15 and 6.1 stable releases. These problems
have been fixed by the following upstream patch that can be cleanly
applied to 5.10, 5.15 and 6.1 branches.
Hi stable maintainers,
The following patch in mainline is listed as a fix for CVE-2023-2176:
8d037973d48c026224ab285e6a06985ccac6f7bf (RDMA/core: Refactor rdma_bind_addr)
And the following is a fix for a regression in the above patch:
0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 (RDMA/core: Update CMA destination address on rdma_resolve_addr)
To my knowledge, at least back to v6.1 is vulnerable to this same bug.
Since these should apply directly to 6.1.y, can these be picked up for that branch?
Regards,
Brennan
v2:
- Runtime patch jmp instead of verw in macro CLEAR_CPU_BUFFERS due to
lack of relative addressing support in relocations in kernels <v6.5.
- Rebased to v6.1.80
- Boot tested with KASLR and KPTI enabled.
- Fixed warning:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction
- Verified VERW being executed with mitigation ON, and not being
executed with mitigation turned OFF.
- Rebased to v6.1.80.
v1: https://lore.kernel.org/r/20240226-delay-verw-backport-6-1-y-v1-0-b3a2c5b9b…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 1/6 includes a minor fix that is queued for upstream:
https://lore.kernel.org/lkml/170899674562.398.6398007479766564897.tip-bot2@…
Patch 1,2,5 and 6 needed conflict resolution.
I saw a few new warnings:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction
I tried using REACHABLE, but that did not fix the warning.
For the below warning:
vmlinux.o: warning: objtool: .altinstr_replacement+0x17: unsupported relocation in alternatives section
not sure if this is related to this series or a pre-existing warning, I
will check later without this series.
I am not too concerned because the alternative did substitute verw
correctly:
entry_SYSCALL_64:
...
0xffffffff8200013d <+253>: swapgs
0xffffffff82000140 <+256>: verw 0xffffffff82000000
0xffffffff82000148 <+264>: sysretq
0xffffffff8200014b <+267>: int3
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
Pawan Gupta (5):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry.S | 23 ++++++++++++++++++++++
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 12 ++++++++----
13 files changed, 106 insertions(+), 46 deletions(-)
---
base-commit: a3eb3a74aa8c94e6c8130b55f3b031f29162868c
change-id: 20240226-delay-verw-backport-6-1-y-4b0cec84087c
Best regards,
--
Thanks,
Pawan
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b9cd26f640a308ea314ad23532de9a8592cd09d2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030448-walrus-tribunal-7b38@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
b9cd26f640a3 ("mptcp: push at DSS boundaries")
1094c6fe7280 ("mptcp: fix possible divide by zero")
8ce568ed06ce ("mptcp: drop tx skb cache")
4e14867d5e91 ("mptcp: tune re-injections for csum enabled mode")
2948d0a1e5ae ("mptcp: factor out __mptcp_retrans helper()")
eaeef1ce55ec ("mptcp: fix memory accounting on allocation error")
d489ded1a369 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b9cd26f640a308ea314ad23532de9a8592cd09d2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Fri, 23 Feb 2024 17:14:14 +0100
Subject: [PATCH] mptcp: push at DSS boundaries
when inserting not contiguous data in the subflow write queue,
the protocol creates a new skb and prevent the TCP stack from
merging it later with already queued skbs by setting the EOR marker.
Still no push flag is explicitly set at the end of previous GSO
packet, making the aggregation on the receiver side sub-optimal -
and packetdrill self-tests less predictable.
Explicitly mark the end of not contiguous DSS with the push flag.
Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-4-16…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 948606a537da..442fa7d9b57a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
Hi there!
Are you interested in increasing your customer base through your company's website?
We're transforming the virtual world into tangible profits by creating functional, responsive websites and profitable online stores, optimized for search engine rankings.
Whether you need a simple website or a complex web application, our team of experts utilizes advanced tools to deliver fast and user-friendly products.
Would you be interested in hearing more about what we can do for you in this regard?
Best regards
Ray Galt
There is an issue with ACPI overlay table removal specifically related
to I2C multiplexers.
Consider an ACPI SSDT Overlay that defines a PCA9548 I2C mux on an
existing I2C bus. When this table is loaded we see the creation of a
device for the overall PCA9548 chip and 8 further devices - one
i2c_adapter each for the mux channels. These are all bound to their
ACPI equivalents via an eventual invocation of acpi_bind_one().
When we unload the SSDT overlay we run into the problem. The ACPI
devices are deleted as normal via acpi_device_del_work_fn() and the
acpi_device_del_list.
However, the following warning and stack trace is output as the
deletion does not go smoothly:
------------[ cut here ]------------
kernfs: can not remove 'physical_node', no directory
WARNING: CPU: 1 PID: 11 at fs/kernfs/dir.c:1674 kernfs_remove_by_name_ns+0xb9/0xc0
Modules linked in:
CPU: 1 PID: 11 Comm: kworker/u128:0 Not tainted 6.8.0-rc6+ #1
Hardware name: congatec AG conga-B7E3/conga-B7E3, BIOS 5.13 05/16/2023
Workqueue: kacpi_hotplug acpi_device_del_work_fn
RIP: 0010:kernfs_remove_by_name_ns+0xb9/0xc0
Code: e4 00 48 89 ef e8 07 71 db ff 5b b8 fe ff ff ff 5d 41 5c 41 5d e9 a7 55 e4 00 0f 0b eb a6 48 c7 c7 f0 38 0d 9d e8 97 0a d5 ff <0f> 0b eb dc 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
RSP: 0018:ffff9f864008fb28 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff8ef90a8d4940 RCX: 0000000000000000
RDX: ffff8f000e267d10 RSI: ffff8f000e25c780 RDI: ffff8f000e25c780
RBP: ffff8ef9186f9870 R08: 0000000000013ffb R09: 00000000ffffbfff
R10: 00000000ffffbfff R11: ffff8f000e0a0000 R12: ffff9f864008fb50
R13: ffff8ef90c93dd60 R14: ffff8ef9010d0958 R15: ffff8ef9186f98c8
FS: 0000000000000000(0000) GS:ffff8f000e240000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f48f5253a08 CR3: 00000003cb82e000 CR4: 00000000003506f0
Call Trace:
<TASK>
? kernfs_remove_by_name_ns+0xb9/0xc0
? __warn+0x7c/0x130
? kernfs_remove_by_name_ns+0xb9/0xc0
? report_bug+0x171/0x1a0
? handle_bug+0x3c/0x70
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? kernfs_remove_by_name_ns+0xb9/0xc0
? kernfs_remove_by_name_ns+0xb9/0xc0
acpi_unbind_one+0x108/0x180
device_del+0x18b/0x490
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
device_unregister+0xd/0x30
i2c_del_adapter.part.0+0x1bf/0x250
i2c_mux_del_adapters+0xa1/0xe0
i2c_device_remove+0x1e/0x80
device_release_driver_internal+0x19a/0x200
bus_remove_device+0xbf/0x100
device_del+0x157/0x490
? __pfx_device_match_fwnode+0x10/0x10
? srso_return_thunk+0x5/0x5f
device_unregister+0xd/0x30
i2c_acpi_notify+0x10f/0x140
notifier_call_chain+0x58/0xd0
blocking_notifier_call_chain+0x3a/0x60
acpi_device_del_work_fn+0x85/0x1d0
process_one_work+0x134/0x2f0
worker_thread+0x2f0/0x410
? __pfx_worker_thread+0x10/0x10
kthread+0xe3/0x110
? __pfx_kthread+0x10/0x10
ret_from_fork+0x2f/0x50
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1b/0x30
</TASK>
---[ end trace 0000000000000000 ]---
...
repeated 7 more times, 1 for each channel of the mux
...
The issue is that the binding of the ACPI devices to their peer I2C
adapters is not correctly cleaned up. Digging deeper into the issue we
see that the deletion order is such that the ACPI devices matching the
mux channel i2c adapters are deleted first during the SSDT overlay
removal. For each of the channels we see a call to i2c_acpi_notify()
with ACPI_RECONFIG_DEVICE_REMOVE but, because these devices are not
actually i2c_clients, nothing is done for them.
Later on, after each of the mux channels has been dealt with, we come
to delete the i2c_client representing the PCA9548 device. This is the
call stack we see above, whereby the kernel cleans up the i2c_client
including destruction of the mux and its channel adapters. At this
point we do attempt to unbind from the ACPI peers but those peers no
longer exist and so we hit the kernfs errors.
The fix is to augment i2c_acpi_notify() to handle i2c_adapters. But,
given that the life cycle of the adapters is linked to the i2c_client,
instead of deleting the i2c_adapters during the i2c_acpi_notify(), we
just trigger unbinding of the ACPI device from the adapter device, and
allow the clean up of the adapter to continue in the way it always has.
Signed-off-by: Hamish Martin <hamish.martin(a)alliedtelesis.co.nz>
Reviewed-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)kernel.org>
Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications")
Cc: <stable(a)vger.kernel.org> # v4.8+
---
drivers/i2c/i2c-core-acpi.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index d6037a328669..67fa8deccef6 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -445,6 +445,11 @@ static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
return i2c_find_device_by_fwnode(acpi_fwnode_handle(adev));
}
+static struct i2c_adapter *i2c_acpi_find_adapter_by_adev(struct acpi_device *adev)
+{
+ return i2c_find_adapter_by_fwnode(acpi_fwnode_handle(adev));
+}
+
static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
void *arg)
{
@@ -471,11 +476,17 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
break;
client = i2c_acpi_find_client_by_adev(adev);
- if (!client)
- break;
+ if (client) {
+ i2c_unregister_device(client);
+ put_device(&client->dev);
+ }
+
+ adapter = i2c_acpi_find_adapter_by_adev(adev);
+ if (adapter) {
+ acpi_device_notify_remove(&adapter->dev);
+ put_device(&adapter->dev);
+ }
- i2c_unregister_device(client);
- put_device(&client->dev);
break;
}
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d5fbeff1ab812b6c473b6924bee8748469462e2c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100421-divisible-bacterium-18b5@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Sat, 16 Sep 2023 12:52:46 +0200
Subject: [PATCH] mptcp: move __mptcp_error_report in protocol.c
This will simplify the next patch ("mptcp: process pending subflow error
on close").
No functional change intended.
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a7fc16f5175d..915860027b1a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ continue;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+ continue;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ break;
+ }
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9bf3c7bc1762..2f40c23fdb0d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
-void __mptcp_error_report(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- WRITE_ONCE(sk->sk_err, -err);
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
-}
-
static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
After the 'Fixes' commit mentioned below, which is a partial backport,
the MPTCP worker was no longer marking the first subflow as "UNCONNECTED"
when the socket was transitioning to TCP_CLOSE state.
As a result, in v6.1, it was no longer possible to reconnect to the just
disconnected socket. Continue to do that like before, only for the first
subflow.
A few refactoring have been done around the 'msk->subflow' in later
versions, and it looks like this is not needed to do that there, but
still needed in v6.1. Without that, the 'disconnect' tests from the
mptcp_connect.sh selftest fail: they repeat the transfer 3 times by
reconnecting to the server each time.
Fixes: 7857e35ef10e ("mptcp: get rid of msk->subflow")
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Notes:
- This is specific to the 6.1 version having the partial backport.
---
net/mptcp/protocol.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cdabb00648bd2..125825db642cc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2440,6 +2440,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
__mptcp_subflow_disconnect(ssk, subflow, flags);
+ if (msk->subflow && ssk == msk->subflow->sk)
+ msk->subflow->state = SS_UNCONNECTED;
release_sock(ssk);
goto out;
--
2.43.0
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x cf7c2789822db8b5efa34f5ebcf1621bc0008d48
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030407-headway-blustery-23f1@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
cf7c2789822d ("iommufd: Fix protection fault in iommufd_test_syz_conv_iova")
bd7a282650b8 ("iommufd: Add iommufd_ctx to iommufd_put_object()")
65fe32f7a447 ("iommufd/selftest: Add nested domain allocation for mock domain")
2bdabb8e82f5 ("iommu: Pass in parent domain with user_data to domain_alloc_user op")
b5021cb264e6 ("iommufd: Share iommufd_hwpt_alloc with IOMMUFD_OBJ_HWPT_NESTED")
89db31635c87 ("iommufd: Derive iommufd_hwpt_paging from iommufd_hw_pagetable")
58d84f430dc7 ("iommufd/device: Wrap IOMMUFD_OBJ_HWPT_PAGING-only configurations")
9744a7ab62cc ("iommufd: Rename IOMMUFD_OBJ_HW_PAGETABLE to IOMMUFD_OBJ_HWPT_PAGING")
2ccabf81ddff ("iommufd: Only enforce cache coherency in iommufd_hw_pagetable_alloc")
a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP")
7adf267d66d1 ("iommufd/selftest: Test IOMMU_HWPT_SET_DIRTY_TRACKING")
266ce58989ba ("iommufd/selftest: Test IOMMU_HWPT_ALLOC_DIRTY_TRACKING")
e04b23c8d4ed ("iommufd/selftest: Expand mock_domain with dev_flags")
421a511a293f ("iommu/amd: Access/Dirty bit support in IOPTEs")
134288158a41 ("iommu/amd: Add domain_alloc_user based domain allocation")
b9a60d6f850e ("iommufd: Add IOMMU_HWPT_GET_DIRTY_BITMAP")
e2a4b2947849 ("iommufd: Add IOMMU_HWPT_SET_DIRTY_TRACKING")
5f9bdbf4c658 ("iommufd: Add a flag to enforce dirty tracking on attach")
750e2e902b71 ("iommu: Add iommu_domain ops for dirty tracking")
b5f9e63278d6 ("iommufd: Correct IOMMU_HWPT_ALLOC_NEST_PARENT description")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cf7c2789822db8b5efa34f5ebcf1621bc0008d48 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc(a)nvidia.com>
Date: Thu, 22 Feb 2024 13:23:47 -0800
Subject: [PATCH] iommufd: Fix protection fault in iommufd_test_syz_conv_iova
Syzkaller reported the following bug:
general protection fault, probably for non-canonical address 0xdffffc0000000038: 0000 [#1] SMP KASAN
KASAN: null-ptr-deref in range [0x00000000000001c0-0x00000000000001c7]
Call Trace:
lock_acquire
lock_acquire+0x1ce/0x4f0
down_read+0x93/0x4a0
iommufd_test_syz_conv_iova+0x56/0x1f0
iommufd_test_access_rw.isra.0+0x2ec/0x390
iommufd_test+0x1058/0x1e30
iommufd_fops_ioctl+0x381/0x510
vfs_ioctl
__do_sys_ioctl
__se_sys_ioctl
__x64_sys_ioctl+0x170/0x1e0
do_syscall_x64
do_syscall_64+0x71/0x140
This is because the new iommufd_access_change_ioas() sets access->ioas to
NULL during its process, so the lock might be gone in a concurrent racing
context.
Fix this by doing the same access->ioas sanity as iommufd_access_rw() and
iommufd_access_pin_pages() functions do.
Cc: stable(a)vger.kernel.org
Fixes: 9227da7816dd ("iommufd: Add iommufd_access_change_ioas(_id) helpers")
Link: https://lore.kernel.org/r/3f1932acaf1dd494d404c04364d73ce8f57f3e5e.17086366…
Reported-by: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc(a)nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 2bfe77bd351d..d59e199a8705 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -63,8 +63,8 @@ enum {
* In syzkaller mode the 64 bit IOVA is converted into an nth area and offset
* value. This has a much smaller randomization space and syzkaller can hit it.
*/
-static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
- u64 *iova)
+static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
+ u64 *iova)
{
struct syz_layout {
__u32 nth_area;
@@ -88,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
return 0;
}
+static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access,
+ u64 *iova)
+{
+ unsigned long ret;
+
+ mutex_lock(&access->ioas_lock);
+ if (!access->ioas) {
+ mutex_unlock(&access->ioas_lock);
+ return 0;
+ }
+ ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova);
+ mutex_unlock(&access->ioas_lock);
+ return ret;
+}
+
void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
unsigned int ioas_id, u64 *iova, u32 *flags)
{
@@ -100,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
ioas = iommufd_get_ioas(ucmd->ictx, ioas_id);
if (IS_ERR(ioas))
return;
- *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
+ *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova);
iommufd_put_object(ucmd->ictx, &ioas->obj);
}
@@ -1161,7 +1176,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
}
if (flags & MOCK_FLAGS_ACCESS_SYZ)
- iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
+ iova = iommufd_test_syz_conv_iova(staccess->access,
&cmd->access_pages.iova);
npages = (ALIGN(iova + length, PAGE_SIZE) -
@@ -1263,8 +1278,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
}
if (flags & MOCK_FLAGS_ACCESS_SYZ)
- iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
- &cmd->access_rw.iova);
+ iova = iommufd_test_syz_conv_iova(staccess->access,
+ &cmd->access_rw.iova);
rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
if (rc)
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 4774848fef6041716a4883217eb75f6b10eb183b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030445-rants-grading-f698@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
4774848fef60 ("riscv: Add a custom ISA extension for the [ms]envcfg CSR")
aec3353963b8 ("riscv: add ISA extension parsing for vector crypto")
0d8295ed975b ("riscv: add ISA extension parsing for scalar crypto")
e45f463a9b01 ("riscv: add ISA extension parsing for Zbc")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4774848fef6041716a4883217eb75f6b10eb183b Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland(a)sifive.com>
Date: Tue, 27 Feb 2024 22:55:34 -0800
Subject: [PATCH] riscv: Add a custom ISA extension for the [ms]envcfg CSR
The [ms]envcfg CSR was added in version 1.12 of the RISC-V privileged
ISA (aka S[ms]1p12). However, bits in this CSR are defined by several
other extensions which may be implemented separately from any particular
version of the privileged ISA (for example, some unrelated errata may
prevent an implementation from claiming conformance with Ss1p12). As a
result, Linux cannot simply use the privileged ISA version to determine
if the CSR is present. It must also check if any of these other
extensions are implemented. It also cannot probe the existence of the
CSR at runtime, because Linux does not require Sstrict, so (in the
absence of additional information) it cannot know if a CSR at that
address is [ms]envcfg or part of some non-conforming vendor extension.
Since there are several standard extensions that imply the existence of
the [ms]envcfg CSR, it becomes unwieldy to check for all of them
wherever the CSR is accessed. Instead, define a custom Xlinuxenvcfg ISA
extension bit that is implied by the other extensions and denotes that
the CSR exists as defined in the privileged ISA, containing at least one
of the fields common between menvcfg and senvcfg.
This extension does not need to be parsed from the devicetree or ISA
string because it can only be implemented as a subset of some other
standard extension.
Cc: <stable(a)vger.kernel.org> # v6.7+
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Reviewed-by: Andrew Jones <ajones(a)ventanamicro.com>
Link: https://lore.kernel.org/r/20240228065559.3434837-3-samuel.holland@sifive.com
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5340f818746b..1f2d2599c655 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,8 @@
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
+#define RISCV_ISA_EXT_XLINUXENVCFG 127
+
#define RISCV_ISA_EXT_MAX 128
#define RISCV_ISA_EXT_INVALID U32_MAX
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index c5b13f7dd482..dacffef68ce2 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -201,6 +201,16 @@ static const unsigned int riscv_zvbb_exts[] = {
RISCV_ISA_EXT_ZVKB
};
+/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+ RISCV_ISA_EXT_XLINUXENVCFG
+};
+
/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
@@ -250,8 +260,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
__RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
- __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
- __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
+ __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8e9f25a290ae0016353c9ea13314c95fb3207812
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030401-cushy-sterility-1d74@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8e9f25a290ae ("mmc: sdhci-xenon: fix PHY init clock stability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e9f25a290ae0016353c9ea13314c95fb3207812 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 22:09:30 +0200
Subject: [PATCH] mmc: sdhci-xenon: fix PHY init clock stability
Each time SD/mmc phy is initialized, at times, in some of
the attempts, phy fails to completes its initialization
which results into timeout error. Per the HW spec, it is
a pre-requisite to ensure a stable SD clock before a phy
initialization is attempted.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 8cf3a375de65..c3096230a969 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/iopoll.h>
#include <linux/of_address.h>
#include "sdhci-pltfm.h"
@@ -216,6 +217,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
return 0;
}
+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+ u32 reg;
+ int err;
+
+ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+ if (err)
+ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+ return err;
+}
+
/*
* eMMC 5.0/5.1 PHY init/re-init.
* eMMC PHY init should be executed after:
@@ -232,6 +246,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+ int ret = xenon_check_stability_internal_clk(host);
+
+ if (ret)
+ return ret;
+
reg = sdhci_readl(host, phy_regs->timing_adj);
reg |= XENON_PHY_INITIALIZAION;
sdhci_writel(host, reg, phy_regs->timing_adj);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8e9f25a290ae0016353c9ea13314c95fb3207812
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030400-debate-conclude-99e5@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8e9f25a290ae ("mmc: sdhci-xenon: fix PHY init clock stability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e9f25a290ae0016353c9ea13314c95fb3207812 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 22:09:30 +0200
Subject: [PATCH] mmc: sdhci-xenon: fix PHY init clock stability
Each time SD/mmc phy is initialized, at times, in some of
the attempts, phy fails to completes its initialization
which results into timeout error. Per the HW spec, it is
a pre-requisite to ensure a stable SD clock before a phy
initialization is attempted.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 8cf3a375de65..c3096230a969 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/iopoll.h>
#include <linux/of_address.h>
#include "sdhci-pltfm.h"
@@ -216,6 +217,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
return 0;
}
+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+ u32 reg;
+ int err;
+
+ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+ if (err)
+ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+ return err;
+}
+
/*
* eMMC 5.0/5.1 PHY init/re-init.
* eMMC PHY init should be executed after:
@@ -232,6 +246,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+ int ret = xenon_check_stability_internal_clk(host);
+
+ if (ret)
+ return ret;
+
reg = sdhci_readl(host, phy_regs->timing_adj);
reg |= XENON_PHY_INITIALIZAION;
sdhci_writel(host, reg, phy_regs->timing_adj);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030409-purchase-uselessly-906f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
09e23823ae9a ("mmc: sdhci-xenon: add timeout for PHY init complete")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 21:17:14 +0200
Subject: [PATCH] mmc: sdhci-xenon: add timeout for PHY init complete
AC5X spec says PHY init complete bit must be polled until zero.
We see cases in which timeout can take longer than the standard
calculation on AC5X, which is expected following the spec comment above.
According to the spec, we must wait as long as it takes for that bit to
toggle on AC5X.
Cap that with 100 delay loops so we won't get stuck forever.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index c3096230a969..cc9d28b75eb9 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -110,6 +110,8 @@
#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
#define XENON_LOGIC_TIMING_VALUE 0x00AA8977
+#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
+
/*
* List offset of PHY registers and some special register values
* in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -278,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
/* get the wait time */
wait /= clock;
wait++;
- /* wait for host eMMC PHY init completes */
- udelay(wait);
- reg = sdhci_readl(host, phy_regs->timing_adj);
- reg &= XENON_PHY_INITIALIZAION;
- if (reg) {
+ /*
+ * AC5X spec says bit must be polled until zero.
+ * We see cases in which timeout can take longer
+ * than the standard calculation on AC5X, which is
+ * expected following the spec comment above.
+ * According to the spec, we must wait as long as
+ * it takes for that bit to toggle on AC5X.
+ * Cap that with 100 delay loops so we won't get
+ * stuck here forever:
+ */
+
+ ret = read_poll_timeout(sdhci_readl, reg,
+ !(reg & XENON_PHY_INITIALIZAION),
+ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+ false, host, phy_regs->timing_adj);
+ if (ret)
dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
- wait);
- return -ETIMEDOUT;
- }
+ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
- return 0;
+ return ret;
}
#define ARMADA_3700_SOC_PAD_1_8V 0x1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030408-hyphen-moonwalk-5e0e@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
09e23823ae9a ("mmc: sdhci-xenon: add timeout for PHY init complete")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 Mon Sep 17 00:00:00 2001
From: Elad Nachman <enachman(a)marvell.com>
Date: Thu, 22 Feb 2024 21:17:14 +0200
Subject: [PATCH] mmc: sdhci-xenon: add timeout for PHY init complete
AC5X spec says PHY init complete bit must be polled until zero.
We see cases in which timeout can take longer than the standard
calculation on AC5X, which is expected following the spec comment above.
According to the spec, we must wait as long as it takes for that bit to
toggle on AC5X.
Cap that with 100 delay loops so we won't get stuck forever.
Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC")
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Elad Nachman <enachman(a)marvell.com>
Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index c3096230a969..cc9d28b75eb9 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -110,6 +110,8 @@
#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
#define XENON_LOGIC_TIMING_VALUE 0x00AA8977
+#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
+
/*
* List offset of PHY registers and some special register values
* in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -278,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
/* get the wait time */
wait /= clock;
wait++;
- /* wait for host eMMC PHY init completes */
- udelay(wait);
- reg = sdhci_readl(host, phy_regs->timing_adj);
- reg &= XENON_PHY_INITIALIZAION;
- if (reg) {
+ /*
+ * AC5X spec says bit must be polled until zero.
+ * We see cases in which timeout can take longer
+ * than the standard calculation on AC5X, which is
+ * expected following the spec comment above.
+ * According to the spec, we must wait as long as
+ * it takes for that bit to toggle on AC5X.
+ * Cap that with 100 delay loops so we won't get
+ * stuck here forever:
+ */
+
+ ret = read_poll_timeout(sdhci_readl, reg,
+ !(reg & XENON_PHY_INITIALIZAION),
+ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+ false, host, phy_regs->timing_adj);
+ if (ret)
dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
- wait);
- return -ETIMEDOUT;
- }
+ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
- return 0;
+ return ret;
}
#define ARMADA_3700_SOC_PAD_1_8V 0x1
When not configured for wakeup lis3lv02d_i2c_suspend() will call
lis3lv02d_poweroff() even if the device has already been turned off
by the runtime-suspend handler and if configured for wakeup and
the device is runtime-suspended at this point then it is not turned
back on to serve as a wakeup source.
Before commit b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting
of the reg_ctrl callback"), lis3lv02d_poweroff() failed to disable
the regulators which as a side effect made calling poweroff() twice ok.
Now that poweroff() correctly disables the regulators, doing this twice
triggers a WARN() in the regulator core:
unbalanced disables for regulator-dummy
WARNING: CPU: 1 PID: 92 at drivers/regulator/core.c:2999 _regulator_disable
...
Fix lis3lv02d_i2c_suspend() to not call poweroff() a second time if
already runtime-suspended and add a poweron() call when necessary to
make wakeup work.
lis3lv02d_i2c_resume() has similar issues, with an added weirness that
it always powers on the device if it is runtime suspended, after which
the first runtime-resume will call poweron() again, causing the enabled
count for the regulator to increase by 1 every suspend/resume. These
unbalanced regulator_enable() calls cause the regulator to never
be turned off and trigger the following WARN() on driver unbind:
WARNING: CPU: 1 PID: 1724 at drivers/regulator/core.c:2396 _regulator_put
Fix this by making lis3lv02d_i2c_resume() mirror the new suspend().
Fixes: b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback")
Reported-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Closes: https://lore.kernel.org/regressions/5fc6da74-af0a-4aac-b4d5-a000b39a63a5@mo…
Cc: stable(a)vger.kernel.org
Cc: regressions(a)lists.linux.dev
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
index c6eb27d46cb0..15119584473c 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -198,8 +198,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct lis3lv02d *lis3 = i2c_get_clientdata(client);
- if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+ /* Turn on for wakeup if turned off by runtime suspend */
+ if (lis3->pdata && lis3->pdata->wakeup_flags) {
+ if (pm_runtime_suspended(dev))
+ lis3lv02d_poweron(lis3);
+ /* For non wakeup turn off if not already turned off by runtime suspend */
+ } else if (!pm_runtime_suspended(dev))
lis3lv02d_poweroff(lis3);
+
return 0;
}
@@ -208,13 +214,12 @@ static int lis3lv02d_i2c_resume(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct lis3lv02d *lis3 = i2c_get_clientdata(client);
- /*
- * pm_runtime documentation says that devices should always
- * be powered on at resume. Pm_runtime turns them off after system
- * wide resume is complete.
- */
- if (!lis3->pdata || !lis3->pdata->wakeup_flags ||
- pm_runtime_suspended(dev))
+ /* Turn back off if turned on for wakeup and runtime suspended*/
+ if (lis3->pdata && lis3->pdata->wakeup_flags) {
+ if (pm_runtime_suspended(dev))
+ lis3lv02d_poweroff(lis3);
+ /* For non wakeup turn back on if not runtime suspended */
+ } else if (!pm_runtime_suspended(dev))
lis3lv02d_poweron(lis3);
return 0;
--
2.43.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 51d31149a88b5c5a8d2d33f06df93f6187a25b4c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030406-ambiguity-strict-2ed2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
51d31149a88b ("ceph: switch to corrected encoding of max_xattr_size in mdsmap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51d31149a88b5c5a8d2d33f06df93f6187a25b4c Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Mon, 19 Feb 2024 13:14:32 +0800
Subject: [PATCH] ceph: switch to corrected encoding of max_xattr_size in
mdsmap
The addition of bal_rank_mask with encoding version 17 was merged
into ceph.git in Oct 2022 and made it into v18.2.0 release normally.
A few months later, the much delayed addition of max_xattr_size got
merged, also with encoding version 17, placed before bal_rank_mask
in the encoding -- but it didn't make v18.2.0 release.
The way this ended up being resolved on the MDS side is that
bal_rank_mask will continue to be encoded in version 17 while
max_xattr_size is now encoded in version 18. This does mean that
older kernels will misdecode version 17, but this is also true for
v18.2.0 and v18.2.1 clients in userspace.
The best we can do is backport this adjustment -- see ceph.git
commit 78abfeaff27fee343fb664db633de5b221699a73 for details.
[ idryomov: changelog ]
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/64440
Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size")
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Patrick Donnelly <pdonnell(a)ibm.com>
Reviewed-by: Venky Shankar <vshankar(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index fae97c25ce58..8109aba66e02 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
ceph_decode_skip_8(p, end, bad_ext);
/* required_client_features */
ceph_decode_skip_set(p, end, 64, bad_ext);
+ /* bal_rank_mask */
+ ceph_decode_skip_string(p, end, bad_ext);
+ }
+ if (mdsmap_ev >= 18) {
ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
- } else {
- /* This forces the usage of the (sync) SETXATTR Op */
- m->m_max_xattr_size = 0;
}
bad_ext:
doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index 89f1931f1ba6..1f2171dd01bf 100644
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -27,7 +27,11 @@ struct ceph_mdsmap {
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u64 m_max_file_size;
- u64 m_max_xattr_size; /* maximum size for xattrs blob */
+ /*
+ * maximum size for xattrs blob.
+ * Zeroed by default to force the usage of the (sync) SETXATTR Op.
+ */
+ u64 m_max_xattr_size;
u32 m_max_mds; /* expected up:active mds number */
u32 m_num_active_mds; /* actual up:active mds number */
u32 possible_max_rank; /* possible max rank index */
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 51d31149a88b5c5a8d2d33f06df93f6187a25b4c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030405-life-despair-ad2e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
51d31149a88b ("ceph: switch to corrected encoding of max_xattr_size in mdsmap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51d31149a88b5c5a8d2d33f06df93f6187a25b4c Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Mon, 19 Feb 2024 13:14:32 +0800
Subject: [PATCH] ceph: switch to corrected encoding of max_xattr_size in
mdsmap
The addition of bal_rank_mask with encoding version 17 was merged
into ceph.git in Oct 2022 and made it into v18.2.0 release normally.
A few months later, the much delayed addition of max_xattr_size got
merged, also with encoding version 17, placed before bal_rank_mask
in the encoding -- but it didn't make v18.2.0 release.
The way this ended up being resolved on the MDS side is that
bal_rank_mask will continue to be encoded in version 17 while
max_xattr_size is now encoded in version 18. This does mean that
older kernels will misdecode version 17, but this is also true for
v18.2.0 and v18.2.1 clients in userspace.
The best we can do is backport this adjustment -- see ceph.git
commit 78abfeaff27fee343fb664db633de5b221699a73 for details.
[ idryomov: changelog ]
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/64440
Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size")
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Patrick Donnelly <pdonnell(a)ibm.com>
Reviewed-by: Venky Shankar <vshankar(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index fae97c25ce58..8109aba66e02 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
ceph_decode_skip_8(p, end, bad_ext);
/* required_client_features */
ceph_decode_skip_set(p, end, 64, bad_ext);
+ /* bal_rank_mask */
+ ceph_decode_skip_string(p, end, bad_ext);
+ }
+ if (mdsmap_ev >= 18) {
ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
- } else {
- /* This forces the usage of the (sync) SETXATTR Op */
- m->m_max_xattr_size = 0;
}
bad_ext:
doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index 89f1931f1ba6..1f2171dd01bf 100644
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -27,7 +27,11 @@ struct ceph_mdsmap {
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u64 m_max_file_size;
- u64 m_max_xattr_size; /* maximum size for xattrs blob */
+ /*
+ * maximum size for xattrs blob.
+ * Zeroed by default to force the usage of the (sync) SETXATTR Op.
+ */
+ u64 m_max_xattr_size;
u32 m_max_mds; /* expected up:active mds number */
u32 m_num_active_mds; /* actual up:active mds number */
u32 possible_max_rank; /* possible max rank index */
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a79f949a5ce1d45329d63742c2a995f2b47f9852
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030434-underling-helmet-8fbe@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Wed, 7 Feb 2024 14:47:32 -0500
Subject: [PATCH] dmaengine: fsl-edma: correct max_segment_size setting
Correcting the previous setting of 0x3fff to the actual value of 0x7fff.
Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and
utilization of FIELD_GET to obtain the accurate maximum value.
Cc: stable(a)vger.kernel.org
Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy")
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x a79f949a5ce1d45329d63742c2a995f2b47f9852
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030433-nickname-giblet-5b8d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Wed, 7 Feb 2024 14:47:32 -0500
Subject: [PATCH] dmaengine: fsl-edma: correct max_segment_size setting
Correcting the previous setting of 0x3fff to the actual value of 0x7fff.
Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and
utilization of FIELD_GET to obtain the accurate maximum value.
Cc: stable(a)vger.kernel.org
Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy")
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x a79f949a5ce1d45329d63742c2a995f2b47f9852
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030433-wand-unicorn-9af0@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li(a)nxp.com>
Date: Wed, 7 Feb 2024 14:47:32 -0500
Subject: [PATCH] dmaengine: fsl-edma: correct max_segment_size setting
Correcting the previous setting of 0x3fff to the actual value of 0x7fff.
Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and
utilization of FIELD_GET to obtain the accurate maximum value.
Cc: stable(a)vger.kernel.org
Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy")
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b979f2d50a099f3402418d7ff5f26c3952fb08bb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030455-jolly-catcall-c2e8@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
b979f2d50a09 ("soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free")
2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
f86955f2b1ff ("soc: qcom: pmic_glink: fix connector type to be DisplayPort")
5692aeea5bcb ("soc: qcom: pmic: Fix resource leaks in a device_for_each_child_node() loop")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:25 +0100
Subject: [PATCH] soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free
A recent DRM series purporting to simplify support for "transparent
bridges" and handling of probe deferrals ironically exposed a
use-after-free issue on pmic_glink_altmode probe deferral.
This has manifested itself as the display subsystem occasionally failing
to initialise and NULL-pointer dereferences during boot of machines like
the Lenovo ThinkPad X13s.
Specifically, the dp-hpd bridge is currently registered before all
resources have been acquired which means that it can also be
deregistered on probe deferrals.
In the meantime there is a race window where the new aux bridge driver
(or PHY driver previously) may have looked up the dp-hpd bridge and
stored a (non-reference-counted) pointer to the bridge which is about to
be deallocated.
When the display controller is later initialised, this triggers a
use-after-free when attaching the bridges:
dp -> aux -> dp-hpd (freed)
which may, for example, result in the freed bridge failing to attach:
[drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16
or a NULL-pointer dereference:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
...
Call trace:
drm_bridge_attach+0x70/0x1a8 [drm]
drm_aux_bridge_attach+0x24/0x38 [aux_bridge]
drm_bridge_attach+0x80/0x1a8 [drm]
dp_bridge_init+0xa8/0x15c [msm]
msm_dp_modeset_init+0x28/0xc4 [msm]
The DRM bridge implementation is clearly fragile and implicitly built on
the assumption that bridges may never go away. In this case, the fix is
to move the bridge registration in the pmic_glink_altmode driver to
after all resources have been looked up.
Incidentally, with the new dp-hpd bridge implementation, which registers
child devices, this is also a requirement due to a long-standing issue
in driver core that can otherwise lead to a probe deferral loop (see
commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")).
[DB: slightly fixed commit message by adding the word 'commit']
Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support")
Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
Cc: <stable(a)vger.kernel.org> # 6.3
Cc: Bjorn Andersson <andersson(a)kernel.org>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+l…
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index 5fcd0fdd2faa..b3808fc24c69 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
struct work_struct work;
- struct device *bridge;
+ struct auxiliary_device *bridge;
enum typec_orientation orientation;
u16 svid;
@@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
else
pmic_glink_altmode_enable_usb(altmode, alt_port);
- drm_aux_hpd_bridge_notify(alt_port->bridge,
+ drm_aux_hpd_bridge_notify(&alt_port->bridge->dev,
alt_port->hpd_state ?
connector_status_connected :
connector_status_disconnected);
@@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
alt_port->index = port;
INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
- alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+ alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode));
if (IS_ERR(alt_port->bridge)) {
fwnode_handle_put(fwnode);
return PTR_ERR(alt_port->bridge);
@@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
}
}
+ for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) {
+ alt_port = &altmode->ports[port];
+ if (!alt_port->bridge)
+ continue;
+
+ ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge);
+ if (ret)
+ return ret;
+ }
+
altmode->client = devm_pmic_glink_register_client(dev,
altmode->owner_id,
pmic_glink_altmode_callback,
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x b979f2d50a099f3402418d7ff5f26c3952fb08bb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030452-unlatch-jailer-3f13@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
b979f2d50a09 ("soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free")
2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:25 +0100
Subject: [PATCH] soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free
A recent DRM series purporting to simplify support for "transparent
bridges" and handling of probe deferrals ironically exposed a
use-after-free issue on pmic_glink_altmode probe deferral.
This has manifested itself as the display subsystem occasionally failing
to initialise and NULL-pointer dereferences during boot of machines like
the Lenovo ThinkPad X13s.
Specifically, the dp-hpd bridge is currently registered before all
resources have been acquired which means that it can also be
deregistered on probe deferrals.
In the meantime there is a race window where the new aux bridge driver
(or PHY driver previously) may have looked up the dp-hpd bridge and
stored a (non-reference-counted) pointer to the bridge which is about to
be deallocated.
When the display controller is later initialised, this triggers a
use-after-free when attaching the bridges:
dp -> aux -> dp-hpd (freed)
which may, for example, result in the freed bridge failing to attach:
[drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16
or a NULL-pointer dereference:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
...
Call trace:
drm_bridge_attach+0x70/0x1a8 [drm]
drm_aux_bridge_attach+0x24/0x38 [aux_bridge]
drm_bridge_attach+0x80/0x1a8 [drm]
dp_bridge_init+0xa8/0x15c [msm]
msm_dp_modeset_init+0x28/0xc4 [msm]
The DRM bridge implementation is clearly fragile and implicitly built on
the assumption that bridges may never go away. In this case, the fix is
to move the bridge registration in the pmic_glink_altmode driver to
after all resources have been looked up.
Incidentally, with the new dp-hpd bridge implementation, which registers
child devices, this is also a requirement due to a long-standing issue
in driver core that can otherwise lead to a probe deferral loop (see
commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")).
[DB: slightly fixed commit message by adding the word 'commit']
Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support")
Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE")
Cc: <stable(a)vger.kernel.org> # 6.3
Cc: Bjorn Andersson <andersson(a)kernel.org>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+l…
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index 5fcd0fdd2faa..b3808fc24c69 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
struct work_struct work;
- struct device *bridge;
+ struct auxiliary_device *bridge;
enum typec_orientation orientation;
u16 svid;
@@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
else
pmic_glink_altmode_enable_usb(altmode, alt_port);
- drm_aux_hpd_bridge_notify(alt_port->bridge,
+ drm_aux_hpd_bridge_notify(&alt_port->bridge->dev,
alt_port->hpd_state ?
connector_status_connected :
connector_status_disconnected);
@@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
alt_port->index = port;
INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
- alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+ alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode));
if (IS_ERR(alt_port->bridge)) {
fwnode_handle_put(fwnode);
return PTR_ERR(alt_port->bridge);
@@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
}
}
+ for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) {
+ alt_port = &altmode->ports[port];
+ if (!alt_port->bridge)
+ continue;
+
+ ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge);
+ if (ret)
+ return ret;
+ }
+
altmode->client = devm_pmic_glink_register_client(dev,
altmode->owner_id,
pmic_glink_altmode_callback,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e2b54eaf28df0c978626c9736b94f003b523b451
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030455-ensure-outward-f8cc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e2b54eaf28df ("btrfs: fix double free of anonymous device after snapshot creation failure")
e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
caae78e03234 ("btrfs: move common inode creation code into btrfs_create_new_inode()")
3538d68dbd97 ("btrfs: reserve correct number of items for inode creation")
5f465bf1f15a ("btrfs: factor out common part of btrfs_{mknod,create,mkdir}()")
a1fd0c35ffe3 ("btrfs: allocate inode outside of btrfs_new_inode()")
305eaac00911 ("btrfs: set inode flags earlier in btrfs_new_inode()")
6437d4583531 ("btrfs: move btrfs_get_free_objectid() call into btrfs_new_inode()")
23c24ef8e418 ("btrfs: don't pass parent objectid to btrfs_new_inode() explicitly")
75b993cf4305 ("btrfs: remove unused mnt_userns parameter from __btrfs_set_acl")
c51fa51190f9 ("btrfs: remove unnecessary set_nlink() in btrfs_create_subvol_root()")
6d831f7ef9f0 ("btrfs: remove unnecessary inode_set_bytes(0) call")
9124e15f2798 ("btrfs: remove unnecessary btrfs_i_size_write(0) calls")
81512e89f2b7 ("btrfs: get rid of btrfs_add_nondir()")
2256e901f5bd ("btrfs: fix anon_dev leak in create_subvol()")
c16218714307 ("btrfs: reserve correct number of items for rename")
1b58ae0e4d3e ("btrfs: skip transaction commit after failure to create subvolume")
33fab972497a ("btrfs: fix double free of anon_dev after failure to create subvolume")
b7ef5f3a6f37 ("btrfs: loop only once over data sizes array when inserting an item batch")
086dcbfa50d3 ("btrfs: insert items in batches when logging a directory when possible")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e2b54eaf28df0c978626c9736b94f003b523b451 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 23 Feb 2024 16:38:43 +0000
Subject: [PATCH] btrfs: fix double free of anonymous device after snapshot
creation failure
When creating a snapshot we may do a double free of an anonymous device
in case there's an error committing the transaction. The second free may
result in freeing an anonymous device number that was allocated by some
other subsystem in the kernel or another btrfs filesystem.
The steps that lead to this:
1) At ioctl.c:create_snapshot() we allocate an anonymous device number
and assign it to pending_snapshot->anon_dev;
2) Then we call btrfs_commit_transaction() and end up at
transaction.c:create_pending_snapshot();
3) There we call btrfs_get_new_fs_root() and pass it the anonymous device
number stored in pending_snapshot->anon_dev;
4) btrfs_get_new_fs_root() frees that anonymous device number because
btrfs_lookup_fs_root() returned a root - someone else did a lookup
of the new root already, which could some task doing backref walking;
5) After that some error happens in the transaction commit path, and at
ioctl.c:create_snapshot() we jump to the 'fail' label, and after
that we free again the same anonymous device number, which in the
meanwhile may have been reallocated somewhere else, because
pending_snapshot->anon_dev still has the same value as in step 1.
Recently syzbot ran into this and reported the following trace:
------------[ cut here ]------------
ida_free called for id=51 which is not allocated.
WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525
Modules linked in:
CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525
Code: 10 42 80 3c 28 (...)
RSP: 0018:ffffc90015a67300 EFLAGS: 00010246
RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000
RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000
RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4
R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246
R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246
FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0
Call Trace:
<TASK>
btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346
create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837
create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931
btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404
create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848
btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998
btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044
__btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306
btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393
btrfs_ioctl+0xa74/0xd40
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:871 [inline]
__se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857
do_syscall_64+0xfb/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
RIP: 0033:0x7fca3e67dda9
Code: 28 00 00 00 (...)
RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9
RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003
RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658
</TASK>
Where we get an explicit message where we attempt to free an anonymous
device number that is not currently allocated. It happens in a different
code path from the example below, at btrfs_get_root_ref(), so this change
may not fix the case triggered by syzbot.
To fix at least the code path from the example above, change
btrfs_get_root_ref() and its callers to receive a dev_t pointer argument
for the anonymous device number, so that in case it frees the number, it
also resets it to 0, so that up in the call chain we don't attempt to do
the double free.
CC: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/
Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e71ef97d0a7c..c843563914ca 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
- * pass 0 for new allocation.
+ * pass NULL for a new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev,
+ u64 objectid, dev_t *anon_dev,
bool check_ref)
{
struct btrfs_root *root;
@@ -1342,9 +1342,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* that common but still possible. In that case, we just need
* to free the anon_dev.
*/
- if (unlikely(anon_dev)) {
- free_anon_bdev(anon_dev);
- anon_dev = 0;
+ if (unlikely(anon_dev && *anon_dev)) {
+ free_anon_bdev(*anon_dev);
+ *anon_dev = 0;
}
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1366,7 +1366,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
goto fail;
}
- ret = btrfs_init_fs_root(root, anon_dev);
+ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
if (ret)
goto fail;
@@ -1402,7 +1402,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
- if (anon_dev)
+ if (anon_dev && *anon_dev)
root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
@@ -1418,7 +1418,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}
/*
@@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
* the anonymous block device id
*
* @objectid: tree objectid
- * @anon_dev: if zero, allocate a new anonymous block device or use the
- * parameter value
+ * @anon_dev: if NULL, allocate a new anonymous block device or use the
+ * parameter value if not NULL
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev)
+ u64 objectid, dev_t *anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 9413726b329b..eb3473d1c1ac 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev);
+ u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fb2323b323bf..b004e3b75311 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
free_extent_buffer(leaf);
leaf = NULL;
- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c52807d97efa..bf8e64c766b6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
pending->snap = NULL;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e2b54eaf28df0c978626c9736b94f003b523b451
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024030451-curtly-phoney-bfc5@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e2b54eaf28df ("btrfs: fix double free of anonymous device after snapshot creation failure")
e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
caae78e03234 ("btrfs: move common inode creation code into btrfs_create_new_inode()")
3538d68dbd97 ("btrfs: reserve correct number of items for inode creation")
5f465bf1f15a ("btrfs: factor out common part of btrfs_{mknod,create,mkdir}()")
a1fd0c35ffe3 ("btrfs: allocate inode outside of btrfs_new_inode()")
305eaac00911 ("btrfs: set inode flags earlier in btrfs_new_inode()")
6437d4583531 ("btrfs: move btrfs_get_free_objectid() call into btrfs_new_inode()")
23c24ef8e418 ("btrfs: don't pass parent objectid to btrfs_new_inode() explicitly")
75b993cf4305 ("btrfs: remove unused mnt_userns parameter from __btrfs_set_acl")
c51fa51190f9 ("btrfs: remove unnecessary set_nlink() in btrfs_create_subvol_root()")
6d831f7ef9f0 ("btrfs: remove unnecessary inode_set_bytes(0) call")
9124e15f2798 ("btrfs: remove unnecessary btrfs_i_size_write(0) calls")
81512e89f2b7 ("btrfs: get rid of btrfs_add_nondir()")
2256e901f5bd ("btrfs: fix anon_dev leak in create_subvol()")
c16218714307 ("btrfs: reserve correct number of items for rename")
1b58ae0e4d3e ("btrfs: skip transaction commit after failure to create subvolume")
33fab972497a ("btrfs: fix double free of anon_dev after failure to create subvolume")
b7ef5f3a6f37 ("btrfs: loop only once over data sizes array when inserting an item batch")
086dcbfa50d3 ("btrfs: insert items in batches when logging a directory when possible")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e2b54eaf28df0c978626c9736b94f003b523b451 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 23 Feb 2024 16:38:43 +0000
Subject: [PATCH] btrfs: fix double free of anonymous device after snapshot
creation failure
When creating a snapshot we may do a double free of an anonymous device
in case there's an error committing the transaction. The second free may
result in freeing an anonymous device number that was allocated by some
other subsystem in the kernel or another btrfs filesystem.
The steps that lead to this:
1) At ioctl.c:create_snapshot() we allocate an anonymous device number
and assign it to pending_snapshot->anon_dev;
2) Then we call btrfs_commit_transaction() and end up at
transaction.c:create_pending_snapshot();
3) There we call btrfs_get_new_fs_root() and pass it the anonymous device
number stored in pending_snapshot->anon_dev;
4) btrfs_get_new_fs_root() frees that anonymous device number because
btrfs_lookup_fs_root() returned a root - someone else did a lookup
of the new root already, which could some task doing backref walking;
5) After that some error happens in the transaction commit path, and at
ioctl.c:create_snapshot() we jump to the 'fail' label, and after
that we free again the same anonymous device number, which in the
meanwhile may have been reallocated somewhere else, because
pending_snapshot->anon_dev still has the same value as in step 1.
Recently syzbot ran into this and reported the following trace:
------------[ cut here ]------------
ida_free called for id=51 which is not allocated.
WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525
Modules linked in:
CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525
Code: 10 42 80 3c 28 (...)
RSP: 0018:ffffc90015a67300 EFLAGS: 00010246
RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000
RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000
RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4
R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246
R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246
FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0
Call Trace:
<TASK>
btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346
create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837
create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931
btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404
create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848
btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998
btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044
__btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306
btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393
btrfs_ioctl+0xa74/0xd40
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:871 [inline]
__se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857
do_syscall_64+0xfb/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
RIP: 0033:0x7fca3e67dda9
Code: 28 00 00 00 (...)
RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9
RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003
RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658
</TASK>
Where we get an explicit message where we attempt to free an anonymous
device number that is not currently allocated. It happens in a different
code path from the example below, at btrfs_get_root_ref(), so this change
may not fix the case triggered by syzbot.
To fix at least the code path from the example above, change
btrfs_get_root_ref() and its callers to receive a dev_t pointer argument
for the anonymous device number, so that in case it frees the number, it
also resets it to 0, so that up in the call chain we don't attempt to do
the double free.
CC: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/
Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e71ef97d0a7c..c843563914ca 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
- * pass 0 for new allocation.
+ * pass NULL for a new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev,
+ u64 objectid, dev_t *anon_dev,
bool check_ref)
{
struct btrfs_root *root;
@@ -1342,9 +1342,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* that common but still possible. In that case, we just need
* to free the anon_dev.
*/
- if (unlikely(anon_dev)) {
- free_anon_bdev(anon_dev);
- anon_dev = 0;
+ if (unlikely(anon_dev && *anon_dev)) {
+ free_anon_bdev(*anon_dev);
+ *anon_dev = 0;
}
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1366,7 +1366,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
goto fail;
}
- ret = btrfs_init_fs_root(root, anon_dev);
+ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
if (ret)
goto fail;
@@ -1402,7 +1402,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
- if (anon_dev)
+ if (anon_dev && *anon_dev)
root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
@@ -1418,7 +1418,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}
/*
@@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
* the anonymous block device id
*
* @objectid: tree objectid
- * @anon_dev: if zero, allocate a new anonymous block device or use the
- * parameter value
+ * @anon_dev: if NULL, allocate a new anonymous block device or use the
+ * parameter value if not NULL
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev)
+ u64 objectid, dev_t *anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 9413726b329b..eb3473d1c1ac 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev);
+ u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fb2323b323bf..b004e3b75311 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
free_extent_buffer(leaf);
leaf = NULL;
- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c52807d97efa..bf8e64c766b6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
pending->snap = NULL;
v2:
- Dropped already backported patch "x86/bugs: Add asm helpers for
executing VERW"
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
- Booted fine with KASLR and KPTI enabled.
- Rebased to v6.6.20
v1: https://lore.kernel.org/r/20240226-delay-verw-backport-6-6-y-v1-0-aa17b2922…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
Patch 1/6 includes a minor fix that is queued for upstream:
https://lore.kernel.org/lkml/170899674562.398.6398007479766564897.tip-bot2@…
Patch 2/6 needed a conflict to be resolved for the hunk
swapgs_restore_regs_and_return_to_usermode.
This is only compile and boot tested on qemu.
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
Pawan Gupta (4):
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/arch/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/nospec-branch.h | 12 ------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 20 +++++++++++++++----
11 files changed, 75 insertions(+), 45 deletions(-)
---
base-commit: 9b4a8eac17f0d840729384618b4b1e876233026c
change-id: 20240226-delay-verw-backport-6-6-y-2cda3298e600
Best regards,
--
Thanks,
Pawan
This is a note to let you know that I've just added the patch titled
iio: accel: adxl367: fix I2C FIFO data register
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 11dadb631007324c7a8bcb2650eda88ed2b9eed0 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur(a)gmail.com>
Date: Wed, 7 Feb 2024 05:36:51 +0200
Subject: iio: accel: adxl367: fix I2C FIFO data register
As specified in the datasheet, the I2C FIFO data register is
0x18, not 0x42. 0x42 was used by mistake when adapting the
ADXL372 driver.
Fix this mistake.
Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver")
Signed-off-by: Cosmin Tanislav <demonsingur(a)gmail.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://lore.kernel.org/r/20240207033657.206171-2-demonsingur@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/accel/adxl367_i2c.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c
index b595fe94f3a3..62c74bdc0d77 100644
--- a/drivers/iio/accel/adxl367_i2c.c
+++ b/drivers/iio/accel/adxl367_i2c.c
@@ -11,7 +11,7 @@
#include "adxl367.h"
-#define ADXL367_I2C_FIFO_DATA 0x42
+#define ADXL367_I2C_FIFO_DATA 0x18
struct adxl367_i2c_state {
struct regmap *regmap;
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: accel: adxl367: fix DEVID read after reset
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 1b926914bbe4e30cb32f268893ef7d82a85275b8 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur(a)gmail.com>
Date: Wed, 7 Feb 2024 05:36:50 +0200
Subject: iio: accel: adxl367: fix DEVID read after reset
regmap_read_poll_timeout() will not sleep before reading,
causing the first read to return -ENXIO on I2C, since the
chip does not respond to it while it is being reset.
The datasheet specifies that a soft reset operation has a
latency of 7.5ms.
Add a 15ms sleep between reset and reading the DEVID register,
and switch to a simple regmap_read() call.
Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver")
Signed-off-by: Cosmin Tanislav <demonsingur(a)gmail.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://lore.kernel.org/r/20240207033657.206171-1-demonsingur@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/accel/adxl367.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c
index 90b7ae6d42b7..484fe2e9fb17 100644
--- a/drivers/iio/accel/adxl367.c
+++ b/drivers/iio/accel/adxl367.c
@@ -1429,9 +1429,11 @@ static int adxl367_verify_devid(struct adxl367_state *st)
unsigned int val;
int ret;
- ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val,
- val == ADXL367_DEVID_AD, 1000, 10000);
+ ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val);
if (ret)
+ return dev_err_probe(st->dev, ret, "Failed to read dev id\n");
+
+ if (val != ADXL367_DEVID_AD)
return dev_err_probe(st->dev, -ENODEV,
"Invalid dev id 0x%02X, expected 0x%02X\n",
val, ADXL367_DEVID_AD);
@@ -1510,6 +1512,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops,
if (ret)
return ret;
+ fsleep(15000);
+
ret = adxl367_verify_devid(st);
if (ret)
return ret;
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: pressure: Fixes BMP38x and BMP390 SPI support
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From a9dd9ba323114f366eb07f1d9630822f8df6cbb2 Mon Sep 17 00:00:00 2001
From: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Date: Mon, 19 Feb 2024 20:13:59 +0100
Subject: iio: pressure: Fixes BMP38x and BMP390 SPI support
According to the datasheet of BMP38x and BMP390 devices, for an SPI
read operation the first byte that is returned needs to be dropped,
and the rest of the bytes are the actual data returned from the
sensor.
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Fixes: 8d329309184d ("iio: pressure: bmp280: Add support for BMP380 sensor family")
Signed-off-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Acked-by: Angel Iglesias <ang.iglesiasg(a)gmail.com>
Link: https://lore.kernel.org/r/20240219191359.18367-1-vassilisamir@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-spi.c | 50 ++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index e8a5fed07e88..a444d4b2978b 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -4,6 +4,7 @@
*
* Inspired by the older BMP085 driver drivers/misc/bmp085-spi.c
*/
+#include <linux/bits.h>
#include <linux/module.h>
#include <linux/spi/spi.h>
#include <linux/err.h>
@@ -35,6 +36,34 @@ static int bmp280_regmap_spi_read(void *context, const void *reg,
return spi_write_then_read(spi, reg, reg_size, val, val_size);
}
+static int bmp380_regmap_spi_read(void *context, const void *reg,
+ size_t reg_size, void *val, size_t val_size)
+{
+ struct spi_device *spi = to_spi_device(context);
+ u8 rx_buf[4];
+ ssize_t status;
+
+ /*
+ * Maximum number of consecutive bytes read for a temperature or
+ * pressure measurement is 3.
+ */
+ if (val_size > 3)
+ return -EINVAL;
+
+ /*
+ * According to the BMP3xx datasheets, for a basic SPI read opertion,
+ * the first byte needs to be dropped and the rest are the requested
+ * data.
+ */
+ status = spi_write_then_read(spi, reg, 1, rx_buf, val_size + 1);
+ if (status)
+ return status;
+
+ memcpy(val, rx_buf + 1, val_size);
+
+ return 0;
+}
+
static struct regmap_bus bmp280_regmap_bus = {
.write = bmp280_regmap_spi_write,
.read = bmp280_regmap_spi_read,
@@ -42,10 +71,19 @@ static struct regmap_bus bmp280_regmap_bus = {
.val_format_endian_default = REGMAP_ENDIAN_BIG,
};
+static struct regmap_bus bmp380_regmap_bus = {
+ .write = bmp280_regmap_spi_write,
+ .read = bmp380_regmap_spi_read,
+ .read_flag_mask = BIT(7),
+ .reg_format_endian_default = REGMAP_ENDIAN_BIG,
+ .val_format_endian_default = REGMAP_ENDIAN_BIG,
+};
+
static int bmp280_spi_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
const struct bmp280_chip_info *chip_info;
+ struct regmap_bus *bmp_regmap_bus;
struct regmap *regmap;
int ret;
@@ -58,8 +96,18 @@ static int bmp280_spi_probe(struct spi_device *spi)
chip_info = spi_get_device_match_data(spi);
+ switch (chip_info->chip_id[0]) {
+ case BMP380_CHIP_ID:
+ case BMP390_CHIP_ID:
+ bmp_regmap_bus = &bmp380_regmap_bus;
+ break;
+ default:
+ bmp_regmap_bus = &bmp280_regmap_bus;
+ break;
+ }
+
regmap = devm_regmap_init(&spi->dev,
- &bmp280_regmap_bus,
+ bmp_regmap_bus,
&spi->dev,
chip_info->regmap_config);
if (IS_ERR(regmap)) {
--
2.44.0
When userspace opens the console, we call set_termios() passing a
termios with the console's configured baud rate. Currently this causes
dw8250_set_termios() to disable and then re-enable the UART clock at
the same frequency as it was originally. This can cause corruption
of any concurrent console output. Fix it by skipping the reclocking
if we are already at the correct rate.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/I2e3761d239cbf29ed41412e5338f30bff…
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: stable(a)vger.kernel.org
---
drivers/tty/serial/8250/8250_dw.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
--
2.44.0.rc1.240.g4c46232300-goog
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: use mask for write_enable bitfield
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 5b4e4b72034f85f7a0cdd147d3d729c5a22c8764 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:22 +0100
Subject: iio: adc: rockchip_saradc: use mask for write_enable bitfield
Some of the registers on the SARADCv2 have bits write protected except
if another bit is set. This is usually done by having the lowest 16 bits
store the data to write and the highest 16 bits specify which of the 16
lowest bits should have their value written to the hardware block.
The write_enable mask for the channel selection was incorrect because it
was just the value shifted by 16 bits, which means it would only ever
write bits and never clear them. So e.g. if someone starts a conversion
on channel 5, the lowest 4 bits would be 0x5, then starts a conversion
on channel 0, it would still be 5.
Instead of shifting the value by 16 as the mask, let's use the OR'ing of
the appropriate masks shifted by 16.
Note that this is not an issue currently because the only SARADCv2
currently supported has a reset defined in its Device Tree, that reset
resets the SARADC controller before starting a conversion on a channel.
However, this reset is handled as optional by the probe function and
thus proper masking should be used in the event an SARADCv2 without a
reset ever makes it upstream.
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-2-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 2da8d6f3241a..1c0042fbbb54 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -102,12 +102,12 @@ static void rockchip_saradc_start_v2(struct rockchip_saradc *info, int chn)
writel_relaxed(0xc, info->regs + SARADC_T_DAS_SOC);
writel_relaxed(0x20, info->regs + SARADC_T_PD_SOC);
val = FIELD_PREP(SARADC2_EN_END_INT, 1);
- val |= val << 16;
+ val |= SARADC2_EN_END_INT << 16;
writel_relaxed(val, info->regs + SARADC2_END_INT_EN);
val = FIELD_PREP(SARADC2_START, 1) |
FIELD_PREP(SARADC2_SINGLE_MODE, 1) |
FIELD_PREP(SARADC2_CONV_CHANNELS, chn);
- val |= val << 16;
+ val |= (SARADC2_START | SARADC2_SINGLE_MODE | SARADC2_CONV_CHANNELS) << 16;
writel(val, info->regs + SARADC2_CONV_CON);
}
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From b0a4546df24a4f8c59b2d05ae141bd70ceccc386 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:21 +0100
Subject: iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
The SARADCv2 on RK3588 (the only SoC currently supported that has an
SARADCv2) selects the channel through the channel_sel bitfield which is
the 4 lowest bits, therefore the mask should be GENMASK(3, 0) and not
GENMASK(15, 0).
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-1-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index dd94667a623b..2da8d6f3241a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -52,7 +52,7 @@
#define SARADC2_START BIT(4)
#define SARADC2_SINGLE_MODE BIT(5)
-#define SARADC2_CONV_CHANNELS GENMASK(15, 0)
+#define SARADC2_CONV_CHANNELS GENMASK(3, 0)
struct rockchip_saradc;
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: use mask for write_enable bitfield
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From 5b4e4b72034f85f7a0cdd147d3d729c5a22c8764 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:22 +0100
Subject: iio: adc: rockchip_saradc: use mask for write_enable bitfield
Some of the registers on the SARADCv2 have bits write protected except
if another bit is set. This is usually done by having the lowest 16 bits
store the data to write and the highest 16 bits specify which of the 16
lowest bits should have their value written to the hardware block.
The write_enable mask for the channel selection was incorrect because it
was just the value shifted by 16 bits, which means it would only ever
write bits and never clear them. So e.g. if someone starts a conversion
on channel 5, the lowest 4 bits would be 0x5, then starts a conversion
on channel 0, it would still be 5.
Instead of shifting the value by 16 as the mask, let's use the OR'ing of
the appropriate masks shifted by 16.
Note that this is not an issue currently because the only SARADCv2
currently supported has a reset defined in its Device Tree, that reset
resets the SARADC controller before starting a conversion on a channel.
However, this reset is handled as optional by the probe function and
thus proper masking should be used in the event an SARADCv2 without a
reset ever makes it upstream.
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-2-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index 2da8d6f3241a..1c0042fbbb54 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -102,12 +102,12 @@ static void rockchip_saradc_start_v2(struct rockchip_saradc *info, int chn)
writel_relaxed(0xc, info->regs + SARADC_T_DAS_SOC);
writel_relaxed(0x20, info->regs + SARADC_T_PD_SOC);
val = FIELD_PREP(SARADC2_EN_END_INT, 1);
- val |= val << 16;
+ val |= SARADC2_EN_END_INT << 16;
writel_relaxed(val, info->regs + SARADC2_END_INT_EN);
val = FIELD_PREP(SARADC2_START, 1) |
FIELD_PREP(SARADC2_SINGLE_MODE, 1) |
FIELD_PREP(SARADC2_CONV_CHANNELS, chn);
- val |= val << 16;
+ val |= (SARADC2_START | SARADC2_SINGLE_MODE | SARADC2_CONV_CHANNELS) << 16;
writel(val, info->regs + SARADC2_CONV_CON);
}
--
2.44.0
This is a note to let you know that I've just added the patch titled
iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From b0a4546df24a4f8c59b2d05ae141bd70ceccc386 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Date: Fri, 23 Feb 2024 13:45:21 +0100
Subject: iio: adc: rockchip_saradc: fix bitmask for channels on SARADCv2
The SARADCv2 on RK3588 (the only SoC currently supported that has an
SARADCv2) selects the channel through the channel_sel bitfield which is
the 4 lowest bits, therefore the mask should be GENMASK(3, 0) and not
GENMASK(15, 0).
Fixes: 757953f8ec69 ("iio: adc: rockchip_saradc: Add support for RK3588")
Signed-off-by: Quentin Schulz <quentin.schulz(a)theobroma-systems.com>
Reviewed-by: Heiko Stuebner <heiko(a)sntech.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Link: https://lore.kernel.org/r/20240223-saradcv2-chan-mask-v1-1-84b06a0f623a@the…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rockchip_saradc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index dd94667a623b..2da8d6f3241a 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -52,7 +52,7 @@
#define SARADC2_START BIT(4)
#define SARADC2_SINGLE_MODE BIT(5)
-#define SARADC2_CONV_CHANNELS GENMASK(15, 0)
+#define SARADC2_CONV_CHANNELS GENMASK(3, 0)
struct rockchip_saradc;
--
2.44.0
The problem seems to be in fs/ntfs3/frecord.c. Original messages were
in German, so here's my translation (original at the end of the post):
[...]
CC lib/zstd/common/zstd_common.o
CC arch/x86/kernel/cpu/feat_ctl.o
fs/ntfs3/frecord.c: In Funktion »ni_read_frame«:
fs/ntfs3/frecord.c:2460:16: Error: variable >>i_size<< is not used"
[-Werror=unused-variable]
2460 | loff_t i_size = i_size_read(&ni->vfs_inode);
| ^~~~~~
AR lib/zstd/built-in.a
[...]
cc1: All warnings are treated as errors
make[4]: *** [scripts/Makefile.build:243: fs/ntfs3/frecord.o] error 1
make[3]: *** [scripts/Makefile.build:480: fs/ntfs3] error 2
make[3]: *** Waiting for not yet finished processes....
[...]
Let me know if you need further information. Thanks.
Rainer
--
Original messages:
[...]
CC lib/zstd/common/zstd_common.o
CC arch/x86/kernel/cpu/feat_ctl.o
fs/ntfs3/frecord.c: In Funktion »ni_read_frame«:
fs/ntfs3/frecord.c:2460:16: Fehler: Variable »i_size« wird nicht
verwendet [-Werror=unused-variable]
2460 | loff_t i_size = i_size_read(&ni->vfs_inode);
| ^~~~~~
AR lib/zstd/built-in.a
CC lib/bug.o
CC fs/udf/inode.o
CC arch/x86/kernel/cpu/intel.o
CC arch/x86/kernel/process_64.o
CC kernel/events/callchain.o
CC lib/buildid.o
cc1: Alle Warnungen werden als Fehler behandelt
make[4]: *** [scripts/Makefile.build:243: fs/ntfs3/frecord.o] Fehler 1
make[3]: *** [scripts/Makefile.build:480: fs/ntfs3] Fehler 2
make[3]: *** Es wird auf noch nicht beendete Prozesse gewartet....
CC kernel/events/hw_breakpoint.o
CC lib/clz_tab.o
CC lib/cmdline.o
CC fs/udf/lowlevel.o
[...]
--
The truth always turns out to be simpler than you thought.
Richard Feynman
Unused USB ports may have bogus location data in ACPI PLD tables.
This causes port peering failures as these unused USB2 and USB3 ports
location may match.
Due to these failures the driver prints a
"usb: port power management may be unreliable" warning, and
unnecessarily blocks port power off during runtime suspend.
This was debugged on a couple DELL systems where the unused ports
all returned zeroes in their location data.
Similar bugreports exist for other systems.
Don't try to peer or match ports that have connect type set to
USB_PORT_NOT_USED.
Fixes: 3bfd659baec8 ("usb: find internal hub tier mismatch via acpi")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218465
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218486
Tested-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Link: https://lore.kernel.org/linux-usb/5406d361-f5b7-4309-b0e6-8c94408f7d75@molg…
Cc: stable(a)vger.kernel.org # v3.16+
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
v1 -> v2
- Improve commit message
- Add missing Fixes, Closes and Link tags
- send this patch separately for easier picking to usb-linus
drivers/usb/core/port.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index c628c1abc907..4d63496f98b6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -573,7 +573,7 @@ static int match_location(struct usb_device *peer_hdev, void *p)
struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev);
struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent);
- if (!peer_hub)
+ if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED)
return 0;
hcd = bus_to_hcd(hdev->bus);
@@ -584,7 +584,8 @@ static int match_location(struct usb_device *peer_hdev, void *p)
for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) {
peer = peer_hub->ports[port1 - 1];
- if (peer && peer->location == port_dev->location) {
+ if (peer && peer->connect_type != USB_PORT_NOT_USED &&
+ peer->location == port_dev->location) {
link_peers_report(port_dev, peer);
return 1; /* done */
}
--
2.25.1
From: Christian Gmeiner <cgmeiner(a)igalia.com>
The hwdb selection logic as a feature that allows it to mark some fields
as 'don't care'. If we match with such a field we memcpy(..)
the current etnaviv_chip_identity into ident.
This step can overwrite some id values read from the GPU with the
'don't care' value.
Fix this issue by restoring the affected values after the memcpy(..).
As this is crucial for user space to know when this feature works as
expected increment the minor version too.
Fixes: 4078a1186dd3 ("drm/etnaviv: update hwdb selection logic")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Gmeiner <cgmeiner(a)igalia.com>
---
drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 6228ce603248..9a2965741dab 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -494,7 +494,7 @@ static const struct drm_driver etnaviv_drm_driver = {
.desc = "etnaviv DRM",
.date = "20151214",
.major = 1,
- .minor = 3,
+ .minor = 4,
};
/*
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index 67201242438b..1e38d66702f1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -265,6 +265,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
{
struct etnaviv_chip_identity *ident = &gpu->identity;
+ const u32 product_id = ident->product_id;
+ const u32 customer_id = ident->customer_id;
+ const u32 eco_id = ident->eco_id;
int i;
for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) {
@@ -278,6 +281,17 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
etnaviv_chip_identities[i].eco_id == ~0U)) {
memcpy(ident, &etnaviv_chip_identities[i],
sizeof(*ident));
+
+ /* Restore some id values if ~0U aka 'don't care' is used. */
+ if (etnaviv_chip_identities[i].product_id == ~0U)
+ ident->product_id = product_id;
+
+ if (etnaviv_chip_identities[i].customer_id == ~0U)
+ ident->customer_id = customer_id;
+
+ if (etnaviv_chip_identities[i].eco_id == ~0U)
+ ident->eco_id = eco_id;
+
return true;
}
}
--
2.44.0
From: Christian Gmeiner <cgmeiner(a)igalia.com>
The hwdb selection logic as a feature that allows it to mark some fields
as 'don't care'. If we match with such a field we memcpy(..)
the current etnaviv_chip_identity into ident.
This step can overwrite some id values read from the GPU with the
'don't care' value.
Fix this issue by restoring the affected values after the memcpy(..).
As this is crucial for user space to know when this feature works as
expected increment the minor version too.
Fixes: 4078a1186dd3 ("drm/etnaviv: update hwdb selection logic")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Gmeiner <cgmeiner(a)igalia.com>
---
V1 -> V2: Fixed patch subject line and removed not needed if clauses.
drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 6228ce603248..9a2965741dab 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -494,7 +494,7 @@ static const struct drm_driver etnaviv_drm_driver = {
.desc = "etnaviv DRM",
.date = "20151214",
.major = 1,
- .minor = 3,
+ .minor = 4,
};
/*
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index 67201242438b..8665f2658d51 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -265,6 +265,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
{
struct etnaviv_chip_identity *ident = &gpu->identity;
+ const u32 product_id = ident->product_id;
+ const u32 customer_id = ident->customer_id;
+ const u32 eco_id = ident->eco_id;
int i;
for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) {
@@ -278,6 +281,12 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)
etnaviv_chip_identities[i].eco_id == ~0U)) {
memcpy(ident, &etnaviv_chip_identities[i],
sizeof(*ident));
+
+ /* Restore some id values as ~0U aka 'don't care' might been used. */
+ ident->product_id = product_id;
+ ident->customer_id = customer_id;
+ ident->eco_id = eco_id;
+
return true;
}
}
--
2.44.0
psci_init_system_suspend() invokes suspend_set_ops() very early during
bootup even before kernel command line for mem_sleep_default is setup.
This leads to kernel command line mem_sleep_default=s2idle not working
as mem_sleep_current gets changed to deep via suspend_set_ops() and never
changes back to s2idle.
Set mem_sleep_current along with mem_sleep_default during kernel command
line setup as default suspend mode.
Fixes: faf7ec4a92c0 ("drivers: firmware: psci: add system suspend support")
CC: stable(a)vger.kernel.org # 5.4+
Signed-off-by: Maulik Shah <quic_mkshah(a)quicinc.com>
---
Changes in v2:
- Set mem_sleep_current during mem_sleep_default kernel command line setup
- Update commit message accordingly
- Retain Fixes: tag
- Link to v1: https://lore.kernel.org/r/20240219-suspend_ops_late_init-v1-1-6330ca9597fa@…
---
kernel/power/suspend.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 742eb26618cc..e3ae93bbcb9b 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -192,6 +192,7 @@ static int __init mem_sleep_default_setup(char *str)
if (mem_sleep_labels[state] &&
!strcmp(str, mem_sleep_labels[state])) {
mem_sleep_default = state;
+ mem_sleep_current = state;
break;
}
---
base-commit: d37e1e4c52bc60578969f391fb81f947c3e83118
change-id: 20240219-suspend_ops_late_init-27fb0b15baee
Best regards,
--
Maulik Shah <quic_mkshah(a)quicinc.com>
In case of a memory allocation failure in the volumes loop we can only
process the already allocated scan_eba and fm_eba array elements on the
error path - others are still uninitialized.
Found by Linux Verification Center (linuxtesting.org).
Fixes: 00abf3041590 ("UBI: Add self_check_eba()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
drivers/mtd/ubi/eba.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 8d1f0e05892c..6f5eadb1598d 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1557,6 +1557,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap,
GFP_KERNEL);
if (!fm_eba[i]) {
ret = -ENOMEM;
+ kfree(scan_eba[i]);
goto out_free;
}
@@ -1592,7 +1593,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap,
}
out_free:
- for (i = 0; i < num_volumes; i++) {
+ while (--i >= 0) {
if (!ubi->volumes[i])
continue;
--
2.43.2
From: Siddh Raman Pant <code(a)siddh.me>
[ Upstream commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 ]
The userspace can configure a loop using an ioctl call, wherein
a configuration of type loop_config is passed (see lo_ioctl()'s
case on line 1550 of drivers/block/loop.c). This proceeds to call
loop_configure() which in turn calls loop_set_status_from_info()
(see line 1050 of loop.c), passing &config->info which is of type
loop_info64*. This function then sets the appropriate values, like
the offset.
loop_device has lo_offset of type loff_t (see line 52 of loop.c),
which is typdef-chained to long long, whereas loop_info64 has
lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
The function directly copies offset from info to the device as
follows (See line 980 of loop.c):
lo->lo_offset = info->lo_offset;
This results in an overflow, which triggers a warning in iomap_iter()
due to a call to iomap_iter_done() which has:
WARN_ON_ONCE(iter->iomap.offset > iter->pos);
Thus, check for negative value during loop_set_status_from_info().
Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a2…
Reported-and-tested-by: syzbot+a8e049cd3abd342936b6(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Siddh Raman Pant <code(a)siddh.me>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Genjian Zhang <zhanggenjian(a)kylinos.cn>
---
drivers/block/loop.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0fefd21f0c71..c1caa3e2355f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1271,6 +1271,11 @@ loop_set_status_from_info(struct loop_device *lo,
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
+ /* loff_t vars have been assigned __u64 */
+ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
+ return -EOVERFLOW;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
--
2.25.1
The [ms]envcfg CSR was added in version 1.12 of the RISC-V privileged
ISA (aka S[ms]1p12). However, bits in this CSR are defined by several
other extensions which may be implemented separately from any particular
version of the privileged ISA (for example, some unrelated errata may
prevent an implementation from claiming conformance with Ss1p12). As a
result, Linux cannot simply use the privileged ISA version to determine
if the CSR is present. It must also check if any of these other
extensions are implemented. It also cannot probe the existence of the
CSR at runtime, because Linux does not require Sstrict, so (in the
absence of additional information) it cannot know if a CSR at that
address is [ms]envcfg or part of some non-conforming vendor extension.
Since there are several standard extensions that imply the existence of
the [ms]envcfg CSR, it becomes unwieldy to check for all of them
wherever the CSR is accessed. Instead, define a custom Xlinuxenvcfg ISA
extension bit that is implied by the other extensions and denotes that
the CSR exists as defined in the privileged ISA, containing at least one
of the fields common between menvcfg and senvcfg.
This extension does not need to be parsed from the devicetree or ISA
string because it can only be implemented as a subset of some other
standard extension.
Cc: <stable(a)vger.kernel.org> # v6.7+
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
---
Changes in v4:
- New patch for v4
arch/riscv/include/asm/hwcap.h | 2 ++
arch/riscv/kernel/cpufeature.c | 14 ++++++++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5340f818746b..1f2d2599c655 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,8 @@
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
+#define RISCV_ISA_EXT_XLINUXENVCFG 127
+
#define RISCV_ISA_EXT_MAX 128
#define RISCV_ISA_EXT_INVALID U32_MAX
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index c5b13f7dd482..dacffef68ce2 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -201,6 +201,16 @@ static const unsigned int riscv_zvbb_exts[] = {
RISCV_ISA_EXT_ZVKB
};
+/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+ RISCV_ISA_EXT_XLINUXENVCFG
+};
+
/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
@@ -250,8 +260,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
__RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
- __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
- __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
+ __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
--
2.43.1
I'm unsure if this is just an LVM bug, or a BTRFS+LVM interaction bug,
but LVM is definitely involved somehow.
Upgrading from 5.10 to 6.1, I noticed one of my filesystems was
read-only. In dmesg, I found:
BTRFS error (device dm-75): bdev /dev/mapper/lvm-brokenDisk errs: wr
0, rd 0, flush 1, corrupt 0, gen 0
BTRFS warning (device dm-75): chunk 13631488 missing 1 devices, max
tolerance is 0 for writable mount
BTRFS: error (device dm-75) in write_all_supers:4379: errno=-5 IO
failure (errors while submitting device barriers.)
BTRFS info (device dm-75: state E): forced readonly
BTRFS warning (device dm-75: state E): Skipping commit of aborted transaction.
BTRFS: error (device dm-75: state EA) in cleanup_transaction:1992:
errno=-5 IO failure
At first I suspected a btrfs error, but a scrub found no errors, and
it continued to be read-write on 5.10 kernels.
Here is my setup:
/dev/lvm/brokenDisk is a lvm-on-lvm volume. I have /dev/sd{a,b,c,d}
(of varying sizes) in a lower VG, which has three LVs, all raid1
volumes. Two of the volumes are further used as PV's for an upper VGs.
One of the upper VGs has no issues. The non-PV LV has no issue. The
remaining one, /dev/lowerVG/lvmPool, hosting nested LVM, is used as a
PV for VG "lvm", and has 3 volumes inside. Two of those volumes have
no issues (and are btrfs), but the last one is /dev/lvm/brokenDisk.
This volume is the only one that exhibits this behavior, so something
is special.
Or described as layers:
/dev/sd{a,b,c,d} => PV => VG "lowerVG"
/dev/lowerVG/single (RAID1 LV) => BTRFS, works fine
/dev/lowerVG/works (RAID1 LV) => PV => VG "workingUpper"
/dev/workingUpper/{a,b,c} => BTRFS, works fine
/dev/lowerVG/lvmPool (RAID1 LV) => PV => VG "lvm"
/dev/lvm/{a,b} => BTRFS, works fine
/dev/lvm/brokenDisk => BTRFS, Exhibits errors
After some investigation, here is what I've found:
1. This regression was introduced in 5.19. 5.18 and earlier kernels I
can keep this filesystem rw and everything works as expected, while
5.19.0 and later the filesystem is immediately ro on any write
attempt. I couldn't build rc1, but I did confirm rc2 already has this
regression.
2. Passing /dev/lvm/brokenDisk to a KVM VM as /dev/vdb with an
unaffected kernel inside the vm exhibits the ro barrier problem on
unaffected kernels.
3. Passing /dev/lowerVG/lvmPool to a KVM VM as /dev/vdb with an
affected kernel inside the VM and using LVM inside the VM exhibits
correct behavior (I can keep the filesystem rw, no barrier errors on
host or guest)
4. A discussion in IRC with BTRFS folks, and they think the BTRFS
filesystem is fine (btrfs check and btrfs scrub also agree)
5. The dmesg error can be delayed indefinitely by not writing to the
disk, or reading with noatime
6. This affects Debian, Ubuntu, NixOS, and Solus, so I'm fairly
certain it's distro-agnostic, and purely a kernel issue.
7. I can't reproduce this with other LVM-on-LVM setups, so I think the
asymmetric nature of the raid1 volume is potentially contributing
8. There are no new smart errors/failures on any of the disks, disks are healthy
9. I previously had raidintegrity=y and caching enabled. They didn't
affect the issue
#regzbot introduced v5.18..v5.19-rc2
Patrick
From: Yishai Hadas <yishaih(a)nvidia.com>
[ Upstream commit be551ee1574280ef8afbf7c271212ac3e38933ef ]
Relax DEVX access upon modify commands to be UVERBS_ACCESS_READ.
The kernel doesn't need to protect what firmware protects, or what
causes no damage to anyone but the user.
As firmware needs to protect itself from parallel access to the same
object, don't block parallel modify/query commands on the same object in
the kernel side.
This change will allow user space application to run parallel updates to
different entries in the same bulk object.
Tested-by: Tamar Mashiah <tmashiah(a)nvidia.com>
Signed-off-by: Yishai Hadas <yishaih(a)nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur(a)nvidia.com>
Link: https://lore.kernel.org/r/7407d5ed35dc427c1097699e12b49c01e1073406.17064339…
Signed-off-by: Leon Romanovsky <leon(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/mlx5/devx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 26cc7bbcdfe6a..7a3b56c150799 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2811,7 +2811,7 @@ DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
UVERBS_IDR_ANY_OBJECT,
- UVERBS_ACCESS_WRITE,
+ UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
--
2.43.0
The special casing was originally added in pre-git history; reproducing
the commit log here:
> commit a318a92567d77
> Author: Andrew Morton <akpm(a)osdl.org>
> Date: Sun Sep 21 01:42:22 2003 -0700
>
> [PATCH] Speed up direct-io hugetlbpage handling
>
> This patch short-circuits all the direct-io page dirtying logic for
> higher-order pages. Without this, we pointlessly bounce BIOs up to
> keventd all the time.
In the last twenty years, compound pages have become used for more than
just hugetlb. Rewrite these functions to operate on folios instead
of pages and remove the special case for hugetlbfs; I don't think
it's needed any more (and if it is, we can put it back in as a call
to folio_test_hugetlb()).
This was found by inspection; as far as I can tell, this bug can lead
to pages used as the destination of a direct I/O read not being marked
as dirty. If those pages are then reclaimed by the MM without being
dirtied for some other reason, they won't be written out. Then when
they're faulted back in, they will not contain the data they should.
It'll take a pretty unusual setup to produce this problem with several
races all going the wrong way.
This problem predates the folio work; it could for example have been
triggered by mmaping a THP in tmpfs and using that as the target of an
O_DIRECT read.
Fixes: 800d8c63b2e98 ("shmem: add huge pages support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
---
block/bio.c | 46 ++++++++++++++++++++++++----------------------
1 file changed, 24 insertions(+), 22 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 8672179213b9..f46d8ec71fbd 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1171,13 +1171,22 @@ EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
+ struct folio_iter fi;
+
+ bio_for_each_folio_all(fi, bio) {
+ struct page *page;
+ size_t done = 0;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (mark_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- bio_release_page(bio, bvec->bv_page);
+ if (mark_dirty) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
+ }
+ page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
+ do {
+ bio_release_page(bio, page++);
+ done += PAGE_SIZE;
+ } while (done < fi.length);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1455,18 +1464,12 @@ EXPORT_SYMBOL(bio_free_pages);
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
*
- * The problem is that we cannot run set_page_dirty() from interrupt context
+ * The problem is that we cannot run folio_mark_dirty() from interrupt context
* because the required locks are not interrupt-safe. So what we can do is to
* mark the pages dirty _before_ performing IO. And in interrupt context,
* check that the pages are still dirty. If so, fine. If not, redirty them
* in process context.
*
- * We special-case compound pages here: normally this means reads into hugetlb
- * pages. The logic in here doesn't really work right for compound pages
- * because the VM does not uniformly chase down the head page in all cases.
- * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
- * handle them at all. So we skip compound pages here at an early stage.
- *
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
@@ -1482,12 +1485,12 @@ EXPORT_SYMBOL(bio_free_pages);
*/
void bio_set_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
+ bio_for_each_folio_all(fi, bio) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
}
}
@@ -1530,12 +1533,11 @@ static void bio_dirty_fn(struct work_struct *work)
void bio_check_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
+ struct folio_iter fi;
unsigned long flags;
- struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
+ bio_for_each_folio_all(fi, bio) {
+ if (!folio_test_dirty(fi.folio))
goto defer;
}
--
2.40.1
Resolving a frequency to an efficient one should not transgress policy->max
(which can be set for thermal reason) and policy->min. Currently there is
possibility where scaling_cur_freq can exceed scaling_max_freq when
scaling_max_freq is inefficient frequency. Add additional check to ensure
that resolving a frequency will respect policy->min/max.
Cc: <stable(a)vger.kernel.org>
Fixes: 1f39fa0dccff ("cpufreq: Introducing CPUFREQ_RELATION_E")
Signed-off-by: Shivnandan Kumar <quic_kshivnan(a)quicinc.com>
--
Changes in v2:
-rename function name from cpufreq_table_index_is_in_limits to cpufreq_is_in_limits
-remove redundant outer parenthesis in return statement
-Make comment single line
--
---
include/linux/cpufreq.h | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index afda5f24d3dd..7741244dee6e 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1021,6 +1021,19 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
efficiencies);
}
+static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy,
+ int idx)
+{
+ unsigned int freq;
+
+ if (idx < 0)
+ return false;
+
+ freq = policy->freq_table[idx].frequency;
+
+ return freq == clamp_val(freq, policy->min, policy->max);
+}
+
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -1054,7 +1067,8 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
return 0;
}
- if (idx < 0 && efficiencies) {
+ /* Limit frequency index to honor policy->min/max */
+ if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
--
2.25.1
On Thu, Feb 29, 2024 at 11:37:28AM +0300, Vasiliy Kovalev wrote:
[...]
> This patch fixes another problem, but a similar one, since the sequence is
> incorrect when registering subsystems.
>
> Initially, the registration sequence in the gtp module was as follows:
>
> 1) rtnl_link_register();
>
> 2) genl_register_family();
>
> 3) register_pernet_subsys();
>
> During debugging of the module, when starting the syzkaller reproducer, it
> turned out that after genl_register_family() (2),
>
> without waiting for register_pernet_subsys()(3), the /.dumpit/ event is
> triggered, in which the data of the unregistered pernet subsystem is
> accessed.
>
> That is, the bug was fixed by the commit
>
> 136cfaca2256 ("gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp()")[1]
>
> and the registration sequence became as follows:
>
> 1) rtnl_link_register();
>
> 2) register_pernet_subsys();
>
> 3) genl_register_family();
>
> However, syzkaller has discovered another problem:
>
> after registering rtnl_link_register, the .newlink event is triggered, in
> which the data of the unregistered pernet subsystem is accessed.
>
> This problem is reproducible on current stable kernels and the latest
> upstream kernel 6.8-rc6, in which the patch 136cfaca2256 [1] is applied.
>
> Therefore, the correct sequence should be as follows:
>
> 1)register_pernet_subsys();
>
> 2) rtnl_link_register();
>
> 3) genl_register_family();
>
> The proposed patch is developed on top of the commit changes [1], does not
> conflict with it and fixes the described bug.
>
> [1] https://lore.kernel.org/lkml/20240220160434.29bcaf43@kernel.org/T/#mb1f72c2…
Thanks for explaining, fix LGTM.
From: Hans de Goede <hdegoede(a)redhat.com>
[ Upstream commit 551539a8606e28cb2a130f8ef3e9834235b456c4 ]
The DMI strings used for the LattePanda board DMI quirks are very generic.
Using the dmidecode database from https://linux-hardware.org/ shows
that the chosen DMI strings also match the following 2 laptops
which also have a rt5645 codec:
Insignia NS-P11W7100 https://linux-hardware.org/?computer=E092FFF8BA04
Insignia NS-P10W8100 https://linux-hardware.org/?computer=AFB6C0BF7934
All 4 hw revisions of the LattePanda board have "S70CR" in their BIOS
version DMI strings:
DF-BI-7-S70CR100-*
DF-BI-7-S70CR110-*
DF-BI-7-S70CR200-*
LP-BS-7-S70CR700-*
See e.g. https://linux-hardware.org/?computer=D98250A817C0
Add a partial (non exact) DMI match on this string to make the LattePanda
board DMI match more precise to avoid false-positive matches.
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://msgid.link/r/20240211212736.179605-1-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
sound/soc/codecs/rt5645.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 37ad3bee66a47..352aefddc7d70 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3796,6 +3796,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"),
+ /*
+ * Above strings are too generic, LattePanda BIOS versions for
+ * all 4 hw revisions are:
+ * DF-BI-7-S70CR100-*
+ * DF-BI-7-S70CR110-*
+ * DF-BI-7-S70CR200-*
+ * LP-BS-7-S70CR700-*
+ * Do a partial match for S70CR to avoid false positive matches.
+ */
+ DMI_MATCH(DMI_BIOS_VERSION, "S70CR"),
},
.driver_data = (void *)&lattepanda_board_platform_data,
},
--
2.43.0
From: Filipe Manana <fdmanana(a)suse.com>
[ Upstream commit 1693d5442c458ae8d5b0d58463b873cd879569ed ]
Add a helper function to determine if a block group is being used and make
use of it at btrfs_delete_unused_bgs(). This helper will also be used in
future code changes.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/btrfs/block-group.c | 3 +--
fs/btrfs/block-group.h | 7 +++++++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 4ca6828586af5..a2bac7196d18b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1330,8 +1330,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
}
spin_lock(&block_group->lock);
- if (block_group->reserved || block_group->pinned ||
- block_group->used || block_group->ro ||
+ if (btrfs_is_block_group_used(block_group) || block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index a15868d607a92..f042c1c85a255 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -211,6 +211,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
return (block_group->start + block_group->length);
}
+static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
+{
+ lockdep_assert_held(&bg->lock);
+
+ return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+}
+
static inline bool btrfs_is_block_group_data_only(
struct btrfs_block_group *block_group)
{
--
2.43.0