From: Maud Spierings <maudspierings(a)gocontroll.com>
Throughout the various probe functions &indio_dev->dev is used before it
is initialized. This caused a kernel panic in st_sensors_power_enable
when the call to devm_regulator_bulk_get_enable() fails and then calls
dev_err_probe() with the uninitialized device.
This seems to only cause a panic with dev_err_probe(), dev_err,
dev_warn and dev_info don't seem to cause a panic, but are fixed
as well.
---
When I search for general &indio_dev->dev usage, I see quite a lot more
hits, but I am not sure if there are issues with those too.
This issue has existed for a long time it seems and therefore it is
nearly impossible to find a proper fixes tag. I would love to see it at
least backported to 6.12 as that is where I encountered it, and I
believe the patch should apply without conflicts.
The investigation into this issue can be found in this thread [1]
[1]: https://lore.kernel.org/all/AM7P189MB100986A83D2F28AF3FFAF976E39EA@AM7P189M…
Signed-off-by: Maud Spierings <maudspierings(a)gocontroll.com>
---
drivers/iio/accel/st_accel_core.c | 10 +++----
drivers/iio/common/st_sensors/st_sensors_core.c | 35 +++++++++++-----------
drivers/iio/common/st_sensors/st_sensors_trigger.c | 18 +++++------
3 files changed, 31 insertions(+), 32 deletions(-)
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 99cb661fabb2d9cc1943fa8d0a6f3becb71126e6..a7961c610ed203d039bbf298c8883031a578fb0b 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
union acpi_object *ont;
union acpi_object *elements;
acpi_status status;
+ struct device *parent = indio_dev->dev.parent;
int ret = -EINVAL;
unsigned int val;
int i, j;
@@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
};
- adev = ACPI_COMPANION(indio_dev->dev.parent);
+ adev = ACPI_COMPANION(parent);
if (!adev)
return -ENXIO;
@@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
if (status == AE_NOT_FOUND) {
return -ENXIO;
} else if (ACPI_FAILURE(status)) {
- dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
- status);
+ dev_warn(parent, "failed to execute _ONT: %d\n", status);
return status;
}
@@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
}
ret = 0;
- dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n");
+ dev_info(parent, "computed mount matrix from ACPI\n");
out:
kfree(buffer.pointer);
if (ret)
- dev_dbg(&indio_dev->dev,
+ dev_dbg(parent,
"failed to apply ACPI orientation data: %d\n", ret);
return ret;
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 8ce1dccfea4f5aaff45d3d40f6542323dd1f0b09..11cbf561b16d41f429745abb516c137cfbb302bb 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
return err;
st_accel_set_fullscale_error:
- dev_err(&indio_dev->dev, "failed to set new fullscale.\n");
+ dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n");
return err;
}
@@ -231,7 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev)
ARRAY_SIZE(regulator_names),
regulator_names);
if (err)
- return dev_err_probe(&indio_dev->dev, err,
+ return dev_err_probe(parent, err,
"unable to enable supplies\n");
return 0;
@@ -241,13 +241,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS");
static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
/* Sensor does not support interrupts */
if (!sdata->sensor_settings->drdy_irq.int1.addr &&
!sdata->sensor_settings->drdy_irq.int2.addr) {
if (pdata->drdy_int_pin)
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"DRDY on pin INT%d specified, but sensor does not support interrupts\n",
pdata->drdy_int_pin);
return 0;
@@ -256,29 +257,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
switch (pdata->drdy_int_pin) {
case 1:
if (!sdata->sensor_settings->drdy_irq.int1.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT1 not available.\n");
+ dev_err(parent, "DRDY on INT1 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 1;
break;
case 2:
if (!sdata->sensor_settings->drdy_irq.int2.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT2 not available.\n");
+ dev_err(parent, "DRDY on INT2 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 2;
break;
default:
- dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n");
+ dev_err(parent, "DRDY on pdata not valid.\n");
return -EINVAL;
}
if (pdata->open_drain) {
if (!sdata->sensor_settings->drdy_irq.int1.addr_od &&
!sdata->sensor_settings->drdy_irq.int2.addr_od)
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"open drain requested but unsupported.\n");
else
sdata->int_pin_open_drain = true;
@@ -336,6 +335,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS");
int st_sensors_init_sensor(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
struct st_sensors_platform_data *of_pdata;
int err = 0;
@@ -343,7 +343,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mutex_init(&sdata->odr_lock);
/* If OF/DT pdata exists, it will take precedence of anything else */
- of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata);
+ of_pdata = st_sensors_dev_probe(parent, pdata);
if (IS_ERR(of_pdata))
return PTR_ERR(of_pdata);
if (of_pdata)
@@ -370,7 +370,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
if (err < 0)
return err;
} else
- dev_info(&indio_dev->dev, "Full-scale not possible\n");
+ dev_info(parent, "Full-scale not possible\n");
err = st_sensors_set_odr(indio_dev, sdata->odr);
if (err < 0)
@@ -405,7 +405,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mask = sdata->sensor_settings->drdy_irq.int2.mask_od;
}
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"set interrupt line to open drain mode on pin %d\n",
sdata->drdy_int_pin);
err = st_sensors_write_data_with_mask(indio_dev, addr,
@@ -593,21 +593,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS");
int st_sensors_verify_id(struct iio_dev *indio_dev)
{
struct st_sensor_data *sdata = iio_priv(indio_dev);
+ struct device *parent = indio_dev->dev.parent;
int wai, err;
if (sdata->sensor_settings->wai_addr) {
err = regmap_read(sdata->regmap,
sdata->sensor_settings->wai_addr, &wai);
if (err < 0) {
- dev_err(&indio_dev->dev,
- "failed to read Who-Am-I register.\n");
- return err;
+ return dev_err_probe(parent, err,
+ "failed to read Who-Am-I register.\n");
}
if (sdata->sensor_settings->wai != wai) {
- dev_warn(&indio_dev->dev,
- "%s: WhoAmI mismatch (0x%x).\n",
- indio_dev->name, wai);
+ dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n",
+ indio_dev->name, wai);
}
}
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 9d4bf822a15dfcdd6c2835f6b9d7698cd3cb0b08..32c3278968089699dff5329e943d92b151b55fdf 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger",
indio_dev->name);
if (sdata->trig == NULL) {
- dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
+ dev_err(parent, "failed to allocate iio trigger.\n");
return -ENOMEM;
}
@@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
case IRQF_TRIGGER_FALLING:
case IRQF_TRIGGER_LOW:
if (!sdata->sensor_settings->drdy_irq.addr_ihl) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n");
if (irq_trig == IRQF_TRIGGER_FALLING)
irq_trig = IRQF_TRIGGER_RISING;
@@ -156,21 +156,21 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->sensor_settings->drdy_irq.mask_ihl, 1);
if (err < 0)
return err;
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the falling edge or active low level\n");
}
break;
case IRQF_TRIGGER_RISING:
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the rising edge\n");
break;
case IRQF_TRIGGER_HIGH:
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts active high level\n");
break;
default:
/* This is the most preferred mode, if possible */
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig);
irq_trig = IRQF_TRIGGER_RISING;
}
@@ -179,7 +179,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
if (irq_trig == IRQF_TRIGGER_FALLING ||
irq_trig == IRQF_TRIGGER_RISING) {
if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"edge IRQ not supported w/o stat register.\n");
return -EOPNOTSUPP;
}
@@ -214,13 +214,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig->name,
sdata->trig);
if (err) {
- dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
+ dev_err(parent, "failed to request trigger IRQ.\n");
return err;
}
err = devm_iio_trigger_register(parent, sdata->trig);
if (err < 0) {
- dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
+ dev_err(parent, "failed to register iio trigger.\n");
return err;
}
indio_dev->trig = iio_trigger_get(sdata->trig);
---
base-commit: 7bac2c97af4078d7a627500c9bcdd5b033f97718
change-id: 20250522-st_iio_fix-1c58fdd4d420
Best regards,
--
Maud Spierings <maudspierings(a)gocontroll.com>
The function sdm845_slim_snd_hw_params() calls the functuion
snd_soc_dai_set_channel_map() but does not check its return
value. A proper implementation can be found in msm_snd_hw_params().
Add error handling for snd_soc_dai_set_channel_map(). If the
function fails and it is not a unsupported error, return the
error code immediately.
Fixes: 5caf64c633a3 ("ASoC: qcom: sdm845: add support to DB845c and Lenovo Yoga")
Cc: stable(a)vger.kernel.org # v5.6
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
sound/soc/qcom/sdm845.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c
index a479d7e5b7fb..314ff68506d9 100644
--- a/sound/soc/qcom/sdm845.c
+++ b/sound/soc/qcom/sdm845.c
@@ -91,6 +91,10 @@ static int sdm845_slim_snd_hw_params(struct snd_pcm_substream *substream,
else
ret = snd_soc_dai_set_channel_map(cpu_dai, tx_ch_cnt,
tx_ch, 0, NULL);
+ if (ret != 0 && ret != -ENOTSUPP) {
+ dev_err(rtd->dev, "failed to set cpu chan map, err:%d\n", ret);
+ return ret;
+ }
}
return 0;
--
2.42.0.windows.2
On 5/21/25 11:48, Maud Spierings wrote:
> On 5/21/25 11:29, Christian Heusel wrote:
>> On 25/05/21 10:53AM, Maud Spierings wrote:
>>> I've just experienced an Issue that I think may be a regression.
>>>
>>> I'm enabling a device which incorporates a lis2dw12 accelerometer,
>>> currently
>>> I am running 6.12 lts, so 6.12.29 as of typing this message.
>>
>> Could you check whether the latest mainline release (at the time this is
>> v6.15-rc7) is also affected? If that's not the case the bug might
>> already be fixed ^_^
>
> Unfortunately doesn't seem to be the case, still gets the panic. I also
> tried 6.12(.0), but that also has the panic, so it is definitely older
> than this lts.
>
>> Also as you said that this is a regression, what is the last revision
>> that the accelerometer worked with?
>
> Thats a difficult one to pin down, I'm moving from the nxp vendor kernel
> to mainline, the last working one that I know sure is 5.10.72 of that
> vendor kernel.
I did some more digging and the latest lts it seems to work with is
6.1.139, 6.6.91 also crashes. So it seems to be a very old regression.
>>> This is where my ability to fix thing fizzles out and so here I am
>>> asking
>>> for assistance.
>>>
>>> Kind regards,
>>> Maud
>>
>> Cheers,
>> Chris
>
> From: Parav Pandit <parav(a)nvidia.com>
> Sent: Thursday, May 22, 2025 1:19 PM
> To: Max Gurtovoy <mgurtovoy(a)nvidia.com>; Israel Rukshin
> <israelr(a)nvidia.com>
> Cc: Parav Pandit <parav(a)nvidia.com>; stable(a)vger.kernel.org; NBU-Contact-
> Li Rongqing (EXTERNAL) <lirongqing(a)baidu.com>
> Subject: [PATCH v6] virtio_blk: Fix disk deletion hang on device surprise
> removal
>
> When the PCI device is surprise removed, requests may not complete the
> device as the VQ is marked as broken. Due to this, the disk deletion hangs.
>
> Fix it by aborting the requests when the VQ is broken.
>
> With this fix now fio completes swiftly.
> An alternative of IO timeout has been considered, however when the driver
> knows about unresponsive block device, swiftly clearing them enables users
> and upper layers to react quickly.
>
> Verified with multiple device unplug iterations with pending requests in virtio
> used ring and some pending with the device.
>
> Fixes: 43bb40c5b926 ("virtio_pci: Support surprise removal of virtio pci
> device")
> Cc: stable(a)vger.kernel.org
> Reported-by: Li RongQing <lirongqing(a)baidu.com>
> Closes:
> https://lore.kernel.org/virtualization/c45dd68698cd47238c55fb73ca9b4741
> @baidu.com/
> Signed-off-by: Parav Pandit <parav(a)nvidia.com>
>
This is an internal patch, which got CCed to stable by mistake.
Please ignore this patch for stable kernels.
It is still under internal review.
I am sorry for the noise.
> ---
> v1->v2: (internal v5->v6):
> - Addressed comments from Stephan
> - fixed spelling to 'waiting'
> v1->v2: (internal v4->v5):
> - Addressed comments from MST
> - removed the vq broken check in queue_rq(s)
> ---
> drivers/block/virtio_blk.c | 85
> ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 85 insertions(+)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index
> 7cffea01d868..04f24ec20405 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -1554,6 +1554,89 @@ static int virtblk_probe(struct virtio_device *vdev)
> return err;
> }
>
> +static bool virtblk_request_cancel(struct request *rq, void *data) {
> + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
> + struct virtio_blk *vblk = data;
> + struct virtio_blk_vq *vq;
> + unsigned long flags;
> +
> + vq = &vblk->vqs[rq->mq_hctx->queue_num];
> +
> + spin_lock_irqsave(&vq->lock, flags);
> +
> + vbr->in_hdr.status = VIRTIO_BLK_S_IOERR;
> + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq))
> + blk_mq_complete_request(rq);
> +
> + spin_unlock_irqrestore(&vq->lock, flags);
> + return true;
> +}
> +
> +static void virtblk_broken_device_cleanup(struct virtio_blk *vblk) {
> + struct request_queue *q = vblk->disk->queue;
> +
> + return;
> +
> + if (!virtqueue_is_broken(vblk->vqs[0].vq))
> + return;
> +
> + /* Start freezing the queue, so that new requests keeps waiting at the
> + * door of bio_queue_enter(). We cannot fully freeze the queue
> because
> + * freezed queue is an empty queue and there are pending requests,
> so
> + * only start freezing it.
> + */
> + blk_freeze_queue_start(q);
> +
> + /* When quiescing completes, all ongoing dispatches have completed
> + * and no new dispatch will happen towards the driver.
> + * This ensures that later when cancel is attempted, then are not
> + * getting processed by the queue_rq() or queue_rqs() handlers.
> + */
> + blk_mq_quiesce_queue(q);
> +
> + /*
> + * Synchronize with any ongoing VQ callbacks, effectively quiescing
> + * the device and preventing it from completing further requests
> + * to the block layer. Any outstanding, incomplete requests will be
> + * completed by virtblk_request_cancel().
> + */
> + virtio_synchronize_cbs(vblk->vdev);
> +
> + /* At this point, no new requests can enter the queue_rq() and
> + * completion routine will not complete any new requests either for
> the
> + * broken vq. Hence, it is safe to cancel all requests which are
> + * started.
> + */
> + blk_mq_tagset_busy_iter(&vblk->tag_set, virtblk_request_cancel,
> vblk);
> + blk_mq_tagset_wait_completed_request(&vblk->tag_set);
> +
> + /* All pending requests are cleaned up. Time to resume so that disk
> + * deletion can be smooth. Start the HW queues so that when queue is
> + * unquiesced requests can again enter the driver.
> + */
> + blk_mq_start_stopped_hw_queues(q, true);
> +
> + /* Unquiescing will trigger dispatching any pending requests to the
> + * driver which has crossed bio_queue_enter() to the driver.
> + */
> + blk_mq_unquiesce_queue(q);
> +
> + /* Wait for all pending dispatches to terminate which may have been
> + * initiated after unquiescing.
> + */
> + blk_mq_freeze_queue_wait(q);
> +
> + /* Mark the disk dead so that once queue unfreeze, the requests
> + * waiting at the door of bio_queue_enter() can be aborted right away.
> + */
> + blk_mark_disk_dead(vblk->disk);
> +
> + /* Unfreeze the queue so that any waiting requests will be aborted. */
> + blk_mq_unfreeze_queue_nomemrestore(q);
> +}
> +
> static void virtblk_remove(struct virtio_device *vdev) {
> struct virtio_blk *vblk = vdev->priv;
> @@ -1561,6 +1644,8 @@ static void virtblk_remove(struct virtio_device
> *vdev)
> /* Make sure no work handler is accessing the device. */
> flush_work(&vblk->config_work);
>
> + virtblk_broken_device_cleanup(vblk);
> +
> del_gendisk(vblk->disk);
> blk_mq_free_tag_set(&vblk->tag_set);
>
> --
> 2.34.1
Hi,
I'd like to report a regression which seems related to the latest
ITS mitigations in Linux 6.1.x:
The server in question is a Supermicro SYS-120C-TN10R with
a "Intel(R) Xeon(R) Silver 4310 CPU @ 2.10GHz" CPU, running
Debian Bookworm. The full output of /proc/cpuinfo is attached
as cpuinfo.txt
In addition to the kernel changes between 6.1.135 and 6.1.139
there is also some additional invariant, namely the Intel microcode
loaded at early boot:
On Linux 6.1.135 every works fine with both the 20250211 and
20250512 microcode releases (kern.log is attached as
6.1.135-feb-microcode.log and 6.1.135-may-microcode.log)
With 6.1.139 and the February microcode, oopses appear related
to clear_bhb_loop() (which may be related to "x86/its: Align
RETs in BHB clear sequence to avoid thunking"?). This is
captured in 6.1.139-feb-microcode.log.
With 6.1.139 and the May microcode, the system mostly
crashes on bootup (in my tests it crashed in three out of
four attempts). I've captured both the crash
(6.1.139-may-microcode-crash.log) and a working boot
(6.1.139-may-microcode-noncrash.log).
If you need any additional information, please let me know!
Cheers,
Moritz
Generally PASID support requires ACS settings that usually create
single device groups, but there are some niche cases where we can get
multi-device groups and still have working PASID support. The primary
issue is that PCI switches are not required to treat PASID tagged TLPs
specially so appropriate ACS settings are required to route all TLPs to
the host bridge if PASID is going to work properly.
pci_enable_pasid() does check that each device that will use PASID has
the proper ACS settings to achieve this routing.
However, no-PASID devices can be combined with PASID capable devices
within the same topology using non-uniform ACS settings. In this case
the no-PASID devices may not have strict route to host ACS flags and
end up being grouped with the PASID devices.
This configuration fails to allow use of the PASID within the iommu
core code which wrongly checks if the no-PASID device supports PASID.
Fix this by ignoring no-PASID devices during the PASID validation. They
will never issue a PASID TLP anyhow so they can be ignored.
Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tushar Dave <tdave(a)nvidia.com>
---
changes in v4:
- rebase to 6.15-rc7
drivers/iommu/iommu.c | 43 ++++++++++++++++++++++++++++---------------
1 file changed, 28 insertions(+), 15 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4f91a740c15f..9d728800a862 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3366,10 +3366,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, old);
- if (ret)
- goto err_revert;
+ if (device->dev->iommu->max_pasids > 0) {
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, old);
+ if (ret)
+ goto err_revert;
+ }
}
return 0;
@@ -3379,15 +3381,18 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
for_each_group_device(group, device) {
if (device == last_gdev)
break;
- /*
- * If no old domain, undo the succeeded devices/pasid.
- * Otherwise, rollback the succeeded devices/pasid to the old
- * domain. And it is a driver bug to fail attaching with a
- * previously good domain.
- */
- if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ if (device->dev->iommu->max_pasids > 0) {
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to
+ * the old domain. And it is a driver bug to fail
+ * attaching with a previously good domain.
+ */
+ if (!old ||
+ WARN_ON(old->ops->set_dev_pasid(old, device->dev,
pasid, domain)))
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
return ret;
}
@@ -3398,8 +3403,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
{
struct group_device *device;
- for_each_group_device(group, device)
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ for_each_group_device(group, device) {
+ if (device->dev->iommu->max_pasids > 0)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
/*
@@ -3440,7 +3447,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
--
2.34.1
Hi! After updating to linux-6.12.29, I see lots of "fail"-messages
during boot:
May 19 23:39:09 LUX kernel: [ 4.819552] amdgpu 0000:30:00.0: amdgpu:
[drm] amdgpu: DP AUX transfer fail:4
Bisecting for drivers/gpu/drm/amd had this result:
> git bisect bad
2d63e66f7ba7b88b87e72155a33b970c81cf4664 is the first bad commit
commit 2d63e66f7ba7b88b87e72155a33b970c81cf4664 (HEAD)
Author: Wayne Lin <Wayne.Lin(a)amd.com>
Date: Sun Apr 20 19:22:14 2025 +0800
drm/amd/display: Fix wrong handling for AUX_DEFER case
commit 65924ec69b29296845c7f628112353438e63ea56 upstream.
The system (Ryzen 3 5600G, latest BIOS) is stable so far but the
error-messages are not nice to see. Thanks.
Rainer Fiebig
--
The truth always turns out to be simpler than you thought.
Richard Feynman
The patch fixes a deadlock which can be triggered by an internal
syzkaller [1] reproducer and captured by bpftrace script [2] and its log
[3] in this scenario:
Process 1 Process 2
--- ---
hugetlb_fault
mutex_lock(B) // take B
filemap_lock_hugetlb_folio
filemap_lock_folio
__filemap_get_folio
folio_lock(A) // take A
hugetlb_wp
mutex_unlock(B) // release B
... hugetlb_fault
... mutex_lock(B) // take B
filemap_lock_hugetlb_folio
filemap_lock_folio
__filemap_get_folio
folio_lock(A) // blocked
unmap_ref_private
...
mutex_lock(B) // retake and blocked
This is a ABBA deadlock involving two locks:
- Lock A: pagecache_folio lock
- Lock B: hugetlb_fault_mutex_table lock
The deadlock occurs between two processes as follows:
1. The first process (let’s call it Process 1) is handling a
copy-on-write (COW) operation on a hugepage via hugetlb_wp. Due to
insufficient reserved hugetlb pages, Process 1, owner of the reserved
hugetlb page, attempts to unmap a hugepage owned by another process
(non-owner) to satisfy the reservation. Before unmapping, Process 1
acquires lock B (hugetlb_fault_mutex_table lock) and then lock A
(pagecache_folio lock). To proceed with the unmap, it releases Lock B
but retains Lock A. After the unmap, Process 1 tries to reacquire Lock
B. However, at this point, Lock B has already been acquired by another
process.
2. The second process (Process 2) enters the hugetlb_fault handler
during the unmap operation. It successfully acquires Lock B
(hugetlb_fault_mutex_table lock) that was just released by Process 1,
but then attempts to acquire Lock A (pagecache_folio lock), which is
still held by Process 1.
As a result, Process 1 (holding Lock A) is blocked waiting for Lock B
(held by Process 2), while Process 2 (holding Lock B) is blocked waiting
for Lock A (held by Process 1), constructing a ABBA deadlock scenario.
The error message:
INFO: task repro_20250402_:13229 blocked for more than 64 seconds.
Not tainted 6.15.0-rc3+ #24
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:repro_20250402_ state:D stack:25856 pid:13229 tgid:13228 ppid:3513 task_flags:0x400040 flags:0x00004006
Call Trace:
<TASK>
__schedule+0x1755/0x4f50
schedule+0x158/0x330
schedule_preempt_disabled+0x15/0x30
__mutex_lock+0x75f/0xeb0
hugetlb_wp+0xf88/0x3440
hugetlb_fault+0x14c8/0x2c30
trace_clock_x86_tsc+0x20/0x20
do_user_addr_fault+0x61d/0x1490
exc_page_fault+0x64/0x100
asm_exc_page_fault+0x26/0x30
RIP: 0010:__put_user_4+0xd/0x20
copy_process+0x1f4a/0x3d60
kernel_clone+0x210/0x8f0
__x64_sys_clone+0x18d/0x1f0
do_syscall_64+0x6a/0x120
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x41b26d
</TASK>
INFO: task repro_20250402_:13229 is blocked on a mutex likely owned by task repro_20250402_:13250.
task:repro_20250402_ state:D stack:28288 pid:13250 tgid:13228 ppid:3513 task_flags:0x400040 flags:0x00000006
Call Trace:
<TASK>
__schedule+0x1755/0x4f50
schedule+0x158/0x330
io_schedule+0x92/0x110
folio_wait_bit_common+0x69a/0xba0
__filemap_get_folio+0x154/0xb70
hugetlb_fault+0xa50/0x2c30
trace_clock_x86_tsc+0x20/0x20
do_user_addr_fault+0xace/0x1490
exc_page_fault+0x64/0x100
asm_exc_page_fault+0x26/0x30
RIP: 0033:0x402619
</TASK>
INFO: task repro_20250402_:13250 blocked for more than 65 seconds.
Not tainted 6.15.0-rc3+ #24
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:repro_20250402_ state:D stack:28288 pid:13250 tgid:13228 ppid:3513 task_flags:0x400040 flags:0x00000006
Call Trace:
<TASK>
__schedule+0x1755/0x4f50
schedule+0x158/0x330
io_schedule+0x92/0x110
folio_wait_bit_common+0x69a/0xba0
__filemap_get_folio+0x154/0xb70
hugetlb_fault+0xa50/0x2c30
trace_clock_x86_tsc+0x20/0x20
do_user_addr_fault+0xace/0x1490
exc_page_fault+0x64/0x100
asm_exc_page_fault+0x26/0x30
RIP: 0033:0x402619
</TASK>
Showing all locks held in the system:
1 lock held by khungtaskd/35:
#0: ffffffff879a7440 (rcu_read_lock){....}-{1:3}, at: debug_show_all_locks+0x30/0x180
2 locks held by repro_20250402_/13229:
#0: ffff888017d801e0 (&mm->mmap_lock){++++}-{4:4}, at: lock_mm_and_find_vma+0x37/0x300
#1: ffff888000fec848 (&hugetlb_fault_mutex_table[i]){+.+.}-{4:4}, at: hugetlb_wp+0xf88/0x3440
3 locks held by repro_20250402_/13250:
#0: ffff8880177f3d08 (vm_lock){++++}-{0:0}, at: do_user_addr_fault+0x41b/0x1490
#1: ffff888000fec848 (&hugetlb_fault_mutex_table[i]){+.+.}-{4:4}, at: hugetlb_fault+0x3b8/0x2c30
#2: ffff8880129500e8 (&resv_map->rw_sema){++++}-{4:4}, at: hugetlb_fault+0x494/0x2c30
Link: https://drive.google.com/file/d/1DVRnIW-vSayU5J1re9Ct_br3jJQU6Vpb/view?usp=… [1]
Link: https://github.com/bboymimi/bpftracer/blob/master/scripts/hugetlb_lock_debu… [2]
Link: https://drive.google.com/file/d/1bWq2-8o-BJAuhoHWX7zAhI6ggfhVzQUI/view?usp=… [3]
Fixes: 40549ba8f8e0 ("hugetlb: use new vma_lock for pmd sharing synchronization")
Cc: stable(a)vger.kernel.org
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Florent Revest <revest(a)google.com>
Cc: Gavin Shan <gshan(a)redhat.com>
Suggested-by: Oscar Salvador <osalvador(a)suse.de>
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
---
V1 -> V2
Suggested-by Oscar Salvador:
- Use folio_test_locked to replace the unnecessary parameter passing.
mm/hugetlb.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7ae38bfb9096..ed501f134eff 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6226,6 +6226,12 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
u32 hash;
folio_put(old_folio);
+ /*
+ * The pagecache_folio needs to be unlocked to avoid
+ * deadlock when the child unmaps the folio.
+ */
+ if (pagecache_folio)
+ folio_unlock(pagecache_folio);
/*
* Drop hugetlb_fault_mutex and vma_lock before
* unmapping. unmapping needs to hold vma_lock
@@ -6823,8 +6829,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
out_ptl:
spin_unlock(vmf.ptl);
+ /*
+ * hugetlb_wp() might have already unlocked pagecache_folio, so
+ * skip it if that is the case.
+ */
if (pagecache_folio) {
- folio_unlock(pagecache_folio);
+ if (folio_test_locked(pagecache_folio))
+ folio_unlock(pagecache_folio);
folio_put(pagecache_folio);
}
out_mutex:
base-commit: 4a95bc121ccdaee04c4d72f84dbfa6b880a514b6
--
2.43.0
Commit 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo")
introduced an error in the TX DMA handling for 8250_omap.
When the OMAP_DMA_TX_KICK flag is set, the "skip_byte" is pulled from
the kfifo and emitted directly in order to start the DMA. While the
kfifo is updated, dma->tx_size is not decreased. This leads to
uart_xmit_advance() called in omap_8250_dma_tx_complete() advancing the
kfifo by one too much.
In practice, transmitting N bytes has been seen to result in the last
N-1 bytes being sent repeatedly.
This change fixes the problem by moving all of the dma setup after the
OMAP_DMA_TX_KICK handling and using kfifo_len() instead of the DMA size
for the 4-byte cutoff check. This slightly changes the behaviour at
buffer wraparound, but it still transmits the correct bytes somehow.
Now, the "skip_byte" would no longer be accounted to the stats. As
previously, dma->tx_size included also this skip byte, up->icount.tx was
updated by aforementioned uart_xmit_advance() in
omap_8250_dma_tx_complete(). Fix this by using the uart_fifo_out()
helper instead of bare kfifo_get().
Based on patch by Mans Rullgard <mans(a)mansr.com>
Fixes: 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo")
Reported-by: Mans Rullgard <mans(a)mansr.com>
Cc: stable(a)vger.kernel.org
---
The same as for the original patch, I would appreaciate if someone
actually tests this one on a real HW too.
A patch to optimize the driver to use 2 sgls is still welcome. I will
not add it without actually having the HW.
---
drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index c9b1c689a045..bb23afdd63f2 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -1151,16 +1151,6 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
return 0;
}
- sg_init_table(&sg, 1);
- ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1,
- UART_XMIT_SIZE, dma->tx_addr);
- if (ret != 1) {
- serial8250_clear_THRI(p);
- return 0;
- }
-
- dma->tx_size = sg_dma_len(&sg);
-
if (priv->habit & OMAP_DMA_TX_KICK) {
unsigned char c;
u8 tx_lvl;
@@ -1185,18 +1175,22 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
ret = -EBUSY;
goto err;
}
- if (dma->tx_size < 4) {
+ if (kfifo_len(&tport->xmit_fifo) < 4) {
ret = -EINVAL;
goto err;
}
- if (!kfifo_get(&tport->xmit_fifo, &c)) {
+ if (!uart_fifo_out(&p->port, &c, 1)) {
ret = -EINVAL;
goto err;
}
skip_byte = c;
- /* now we need to recompute due to kfifo_get */
- kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1,
- UART_XMIT_SIZE, dma->tx_addr);
+ }
+
+ sg_init_table(&sg, 1);
+ ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, UART_XMIT_SIZE, dma->tx_addr);
+ if (ret != 1) {
+ ret = -EINVAL;
+ goto err;
}
desc = dmaengine_prep_slave_sg(dma->txchan, &sg, 1, DMA_MEM_TO_DEV,
@@ -1206,6 +1200,7 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
goto err;
}
+ dma->tx_size = sg_dma_len(&sg);
dma->tx_running = 1;
desc->callback = omap_8250_dma_tx_complete;
--
2.49.0
Commit 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo")
introduced an error in the TX DMA handling for 8250_omap.
When the OMAP_DMA_TX_KICK flag is set, the "skip_byte" is pulled from
the kfifo and emitted directly in order to start the DMA. While the
kfifo is updated, dma->tx_size is not decreased. This leads to
uart_xmit_advance() called in omap_8250_dma_tx_complete() advancing the
kfifo by one too much.
In practice, transmitting N bytes has been seen to result in the last
N-1 bytes being sent repeatedly.
This change fixes the problem by moving all of the dma setup after the
OMAP_DMA_TX_KICK handling and using kfifo_len() instead of the DMA size
for the 4-byte cutoff check. This slightly changes the behaviour at
buffer wraparound, but it still transmits the correct bytes somehow.
Now, the "skip_byte" would no longer be accounted to the stats. As
previously, dma->tx_size included also this skip byte, up->icount.tx was
updated by aforementioned uart_xmit_advance() in
omap_8250_dma_tx_complete(). Fix this by using the uart_fifo_out()
helper instead of bare kfifo_get().
Based on patch by Mans Rullgard <mans(a)mansr.com>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
Fixes: 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo")
Link: https://lore.kernel.org/all/20250506150748.3162-1-mans@mansr.com/
Reported-by: Mans Rullgard <mans(a)mansr.com>
Cc: stable(a)vger.kernel.org
---
[v2] S-O-B added
---
drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 2a0ce11f405d..72ae08d6204f 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -1173,16 +1173,6 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
return 0;
}
- sg_init_table(&sg, 1);
- ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1,
- UART_XMIT_SIZE, dma->tx_addr);
- if (ret != 1) {
- serial8250_clear_THRI(p);
- return 0;
- }
-
- dma->tx_size = sg_dma_len(&sg);
-
if (priv->habit & OMAP_DMA_TX_KICK) {
unsigned char c;
u8 tx_lvl;
@@ -1207,18 +1197,22 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
ret = -EBUSY;
goto err;
}
- if (dma->tx_size < 4) {
+ if (kfifo_len(&tport->xmit_fifo) < 4) {
ret = -EINVAL;
goto err;
}
- if (!kfifo_get(&tport->xmit_fifo, &c)) {
+ if (!uart_fifo_out(&p->port, &c, 1)) {
ret = -EINVAL;
goto err;
}
skip_byte = c;
- /* now we need to recompute due to kfifo_get */
- kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1,
- UART_XMIT_SIZE, dma->tx_addr);
+ }
+
+ sg_init_table(&sg, 1);
+ ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, UART_XMIT_SIZE, dma->tx_addr);
+ if (ret != 1) {
+ ret = -EINVAL;
+ goto err;
}
desc = dmaengine_prep_slave_sg(dma->txchan, &sg, 1, DMA_MEM_TO_DEV,
@@ -1228,6 +1222,7 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
goto err;
}
+ dma->tx_size = sg_dma_len(&sg);
dma->tx_running = 1;
desc->callback = omap_8250_dma_tx_complete;
--
2.49.0
From: Wander Lairson Costa <wander(a)redhat.com>
[ Upstream commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 ]
When running the following command:
while true; do
stress-ng --cyclic 30 --timeout 30s --minimize --quiet
done
a warning is eventually triggered:
WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794
setup_new_dl_entity+0x13e/0x180
...
Call Trace:
<TASK>
? show_trace_log_lvl+0x1c4/0x2df
? enqueue_dl_entity+0x631/0x6e0
? setup_new_dl_entity+0x13e/0x180
? __warn+0x7e/0xd0
? report_bug+0x11a/0x1a0
? handle_bug+0x3c/0x70
? exc_invalid_op+0x14/0x70
? asm_exc_invalid_op+0x16/0x20
enqueue_dl_entity+0x631/0x6e0
enqueue_task_dl+0x7d/0x120
__do_set_cpus_allowed+0xe3/0x280
__set_cpus_allowed_ptr_locked+0x140/0x1d0
__set_cpus_allowed_ptr+0x54/0xa0
migrate_enable+0x7e/0x150
rt_spin_unlock+0x1c/0x90
group_send_sig_info+0xf7/0x1a0
? kill_pid_info+0x1f/0x1d0
kill_pid_info+0x78/0x1d0
kill_proc_info+0x5b/0x110
__x64_sys_kill+0x93/0xc0
do_syscall_64+0x5c/0xf0
entry_SYSCALL_64_after_hwframe+0x6e/0x76
RIP: 0033:0x7f0dab31f92b
This warning occurs because set_cpus_allowed dequeues and enqueues tasks
with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning
is triggered. A boosted task already had its parameters set by
rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary,
hence the WARN_ON call.
Check if we are requeueing a boosted task and avoid calling
setup_new_dl_entity if that's the case.
Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline")
Signed-off-by: Wander Lairson Costa <wander(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Juri Lelli <juri.lelli(a)redhat.com>
Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
kernel/sched/deadline.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6548bd90c5c3..e2ff343d1c42 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1516,6 +1516,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
+ !is_dl_boosted(dl_se) &&
dl_time_before(dl_se->deadline,
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
setup_new_dl_entity(dl_se);
--
2.34.1
The patch 099606a7b2d5 didn't cleanly apply to 5.15 due to the
significant difference in codebases.
I've tried to manually bring it back to 5.15 via some minor conflict
resolution but also invoking the newly introduced API using inverted
logic as the conditional statements present in 5.15 are the opposite of
those in 6.1 xen/swiotlib.
v2 of this patch was added and dropped due to some issues in testing.
However, after further verification this version seems to be right as
is.
I kindly request Juergen's ack specifically before this is added to
stable queue as this patch differs quite significantly compared to the
original.
Changes in v2:
Include correct upstream SHA in the commit message
Changes in v3:
Patch remains the same, however further verification and testing was
done.
Harshvardhan Jha (1):
xen/swiotlb: relax alignment requirements
drivers/xen/swiotlb-xen.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
--
2.47.1
From: Wander Lairson Costa <wander(a)redhat.com>
[ Upstream commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 ]
When running the following command:
while true; do
stress-ng --cyclic 30 --timeout 30s --minimize --quiet
done
a warning is eventually triggered:
WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794
setup_new_dl_entity+0x13e/0x180
...
Call Trace:
<TASK>
? show_trace_log_lvl+0x1c4/0x2df
? enqueue_dl_entity+0x631/0x6e0
? setup_new_dl_entity+0x13e/0x180
? __warn+0x7e/0xd0
? report_bug+0x11a/0x1a0
? handle_bug+0x3c/0x70
? exc_invalid_op+0x14/0x70
? asm_exc_invalid_op+0x16/0x20
enqueue_dl_entity+0x631/0x6e0
enqueue_task_dl+0x7d/0x120
__do_set_cpus_allowed+0xe3/0x280
__set_cpus_allowed_ptr_locked+0x140/0x1d0
__set_cpus_allowed_ptr+0x54/0xa0
migrate_enable+0x7e/0x150
rt_spin_unlock+0x1c/0x90
group_send_sig_info+0xf7/0x1a0
? kill_pid_info+0x1f/0x1d0
kill_pid_info+0x78/0x1d0
kill_proc_info+0x5b/0x110
__x64_sys_kill+0x93/0xc0
do_syscall_64+0x5c/0xf0
entry_SYSCALL_64_after_hwframe+0x6e/0x76
RIP: 0033:0x7f0dab31f92b
This warning occurs because set_cpus_allowed dequeues and enqueues tasks
with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning
is triggered. A boosted task already had its parameters set by
rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary,
hence the WARN_ON call.
Check if we are requeueing a boosted task and avoid calling
setup_new_dl_entity if that's the case.
Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline")
Signed-off-by: Wander Lairson Costa <wander(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Juri Lelli <juri.lelli(a)redhat.com>
Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
kernel/sched/deadline.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 7f378fa0b6ed..e1371227a3bf 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1656,6 +1656,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
+ !is_dl_boosted(dl_se) &&
dl_time_before(dl_se->deadline,
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
setup_new_dl_entity(dl_se);
--
2.34.1
From: Wander Lairson Costa <wander(a)redhat.com>
[ Upstream commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 ]
When running the following command:
while true; do
stress-ng --cyclic 30 --timeout 30s --minimize --quiet
done
a warning is eventually triggered:
WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794
setup_new_dl_entity+0x13e/0x180
...
Call Trace:
<TASK>
? show_trace_log_lvl+0x1c4/0x2df
? enqueue_dl_entity+0x631/0x6e0
? setup_new_dl_entity+0x13e/0x180
? __warn+0x7e/0xd0
? report_bug+0x11a/0x1a0
? handle_bug+0x3c/0x70
? exc_invalid_op+0x14/0x70
? asm_exc_invalid_op+0x16/0x20
enqueue_dl_entity+0x631/0x6e0
enqueue_task_dl+0x7d/0x120
__do_set_cpus_allowed+0xe3/0x280
__set_cpus_allowed_ptr_locked+0x140/0x1d0
__set_cpus_allowed_ptr+0x54/0xa0
migrate_enable+0x7e/0x150
rt_spin_unlock+0x1c/0x90
group_send_sig_info+0xf7/0x1a0
? kill_pid_info+0x1f/0x1d0
kill_pid_info+0x78/0x1d0
kill_proc_info+0x5b/0x110
__x64_sys_kill+0x93/0xc0
do_syscall_64+0x5c/0xf0
entry_SYSCALL_64_after_hwframe+0x6e/0x76
RIP: 0033:0x7f0dab31f92b
This warning occurs because set_cpus_allowed dequeues and enqueues tasks
with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning
is triggered. A boosted task already had its parameters set by
rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary,
hence the WARN_ON call.
Check if we are requeueing a boosted task and avoid calling
setup_new_dl_entity if that's the case.
Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline")
Signed-off-by: Wander Lairson Costa <wander(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Juri Lelli <juri.lelli(a)redhat.com>
Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
kernel/sched/deadline.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 66eb68c59f0b..5bb8915b1ca4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1514,6 +1514,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
+ !is_dl_boosted(dl_se) &&
dl_time_before(dl_se->deadline,
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
setup_new_dl_entity(dl_se);
--
2.34.1