From: Suganath Prabu S <suganath-prabu.subramani(a)broadcom.com>
[ Upstream commit 4588e65cfd66fc8bbd9969ea730db39b60a36a30 ]
Avoid scanning SAS/SATA devices in channel 1 when SAS transport is
enabled, as the SAS/SATA devices are exposed through channel 0.
Signed-off-by: Suganath Prabu S <suganath-prabu.subramani(a)broadcom.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar(a)broadcom.com>
Link: https://lore.kernel.org/stable/20251120071955.463475-1-suganath-prabu.subra…
Link: https://patch.msgid.link/20251120071955.463475-1-suganath-prabu.subramani@b…
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
### DEPENDENCY VERIFICATION
The `non_stl` field and `sas_transport_enabled` were both introduced in
commit c4723e68a0d81 ("scsi: mpi3mr: Enable STL on HBAs where multipath
is disabled") from August 2022 (Linux 6.0 cycle).
This means the patch is only applicable to stable kernels 6.1.y and
later (not 5.15.y).
### SUMMARY
**Bug being fixed**: Duplicate SAS/SATA device entries appearing in both
channel 0 (SAS transport) and channel 1 (generic SCSI) when SAS
transport is enabled, causing user confusion and management issues.
**Stable kernel criteria assessment**:
- ✅ **Obviously correct**: Simple conditional checks with clear logic
- ✅ **Fixes a real bug**: Duplicate device enumeration is a real user-
visible issue
- ✅ **Small and contained**: Only 4 lines of actual code change
- ✅ **No new features**: Just corrects existing device enumeration logic
- ✅ **Tested**: Merged through maintainer tree with proper sign-offs
- ✅ **Intentional stable submission**: Link to stable mailing list
present
**Risk vs Benefit**:
- **Risk**: Very low - simple conditional check, worst case is device
visibility issue
- **Benefit**: Fixes confusing duplicate device entries for MPI3MR users
with SAS transport
**Concerns**:
1. Requires commit c4723e68a0d81 to be present (6.1.y and later only)
2. Version bump in header should be stripped for stable backport
The explicit submission to the stable mailing list, the small surgical
nature of the fix, and the clear bug it addresses make this a valid
stable backport candidate for kernels 6.1.y and newer.
**YES**
drivers/scsi/mpi3mr/mpi3mr.h | 4 ++--
drivers/scsi/mpi3mr/mpi3mr_os.c | 4 +++-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 6742684e2990a..31d68c151b207 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -56,8 +56,8 @@ extern struct list_head mrioc_list;
extern int prot_mask;
extern atomic64_t event_counter;
-#define MPI3MR_DRIVER_VERSION "8.15.0.5.50"
-#define MPI3MR_DRIVER_RELDATE "12-August-2025"
+#define MPI3MR_DRIVER_VERSION "8.15.0.5.51"
+#define MPI3MR_DRIVER_RELDATE "18-November-2025"
#define MPI3MR_DRIVER_NAME "mpi3mr"
#define MPI3MR_DRIVER_LICENSE "GPL"
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index b88633e1efe27..d4ca878d08869 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -1184,6 +1184,8 @@ static void mpi3mr_update_tgtdev(struct mpi3mr_ioc *mrioc,
if (is_added == true)
tgtdev->io_throttle_enabled =
(flags & MPI3_DEVICE0_FLAGS_IO_THROTTLING_REQUIRED) ? 1 : 0;
+ if (!mrioc->sas_transport_enabled)
+ tgtdev->non_stl = 1;
switch (flags & MPI3_DEVICE0_FLAGS_MAX_WRITE_SAME_MASK) {
case MPI3_DEVICE0_FLAGS_MAX_WRITE_SAME_256_LB:
@@ -4844,7 +4846,7 @@ static int mpi3mr_target_alloc(struct scsi_target *starget)
spin_lock_irqsave(&mrioc->tgtdev_lock, flags);
if (starget->channel == mrioc->scsi_device_channel) {
tgt_dev = __mpi3mr_get_tgtdev_by_perst_id(mrioc, starget->id);
- if (tgt_dev && !tgt_dev->is_hidden) {
+ if (tgt_dev && !tgt_dev->is_hidden && tgt_dev->non_stl) {
scsi_tgt_priv_data->starget = starget;
scsi_tgt_priv_data->dev_handle = tgt_dev->dev_handle;
scsi_tgt_priv_data->perst_id = tgt_dev->perst_id;
--
2.51.0
In etm_setup_aux(), when a user sink is obtained via
coresight_get_sink_by_id(), it increments the reference count of the
sink device. However, if the sink is used in path building, the path
holds a reference, but the initial reference from
coresight_get_sink_by_id() is not released, causing a reference count
leak. We should release the initial reference after the path is built.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 0e6c20517596 ("coresight: etm-perf: Allow an event to use different sinks")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- modified the patch as suggestions.
---
drivers/hwtracing/coresight/coresight-etm-perf.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 17afa0f4cdee..56d012ab6d3a 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -454,6 +454,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
goto err;
out:
+ if (user_sink) {
+ put_device(&user_sink->dev);
+ user_sink = NULL;
+ }
+
return event_data;
err:
--
2.17.1
The driver_override_show function reads the driver_override string
without holding the device_lock. However, the store function modifies
and frees the string while holding the device_lock. This creates a race
condition where the string can be freed by the store function while
being read by the show function, leading to a use-after-free.
To fix this, replace the rpmsg_string_attr macro with explicit show and
store functions. The new driver_override_store uses the standard
driver_set_override helper. Since the introduction of
driver_set_override, the comments in include/linux/rpmsg.h have stated
that this helper must be used to set or clear driver_override, but the
implementation was not updated until now.
Because driver_set_override modifies and frees the string while holding
the device_lock, the new driver_override_show now correctly holds the
device_lock during the read operation to prevent the race.
Additionally, since rpmsg_string_attr has only ever been used for
driver_override, removing the macro simplifies the code.
Fixes: 39e47767ec9b ("rpmsg: Add driver_override device attribute for rpmsg_device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
---
I verified this with a stress test that continuously writes/reads the
attribute. It triggered KASAN and leaked bytes like a0 f4 81 9f a3 ff ff
(likely kernel pointers). Since driver_override is world-readable (0644),
this allows unprivileged users to leak kernel pointers and bypass KASLR.
Similar races were fixed in other buses (e.g., commits 9561475db680 and
91d44c1afc61). Currently, 9 of 11 buses handle this correctly; this patch
fixes one of the remaining two.
---
drivers/rpmsg/rpmsg_core.c | 66 ++++++++++++++++----------------------
1 file changed, 27 insertions(+), 39 deletions(-)
diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 5d661681a9b6..96964745065b 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -352,50 +352,38 @@ field##_show(struct device *dev, \
} \
static DEVICE_ATTR_RO(field);
-#define rpmsg_string_attr(field, member) \
-static ssize_t \
-field##_store(struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t sz) \
-{ \
- struct rpmsg_device *rpdev = to_rpmsg_device(dev); \
- const char *old; \
- char *new; \
- \
- new = kstrndup(buf, sz, GFP_KERNEL); \
- if (!new) \
- return -ENOMEM; \
- new[strcspn(new, "\n")] = '\0'; \
- \
- device_lock(dev); \
- old = rpdev->member; \
- if (strlen(new)) { \
- rpdev->member = new; \
- } else { \
- kfree(new); \
- rpdev->member = NULL; \
- } \
- device_unlock(dev); \
- \
- kfree(old); \
- \
- return sz; \
-} \
-static ssize_t \
-field##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct rpmsg_device *rpdev = to_rpmsg_device(dev); \
- \
- return sprintf(buf, "%s\n", rpdev->member); \
-} \
-static DEVICE_ATTR_RW(field)
-
/* for more info, see Documentation/ABI/testing/sysfs-bus-rpmsg */
rpmsg_show_attr(name, id.name, "%s\n");
rpmsg_show_attr(src, src, "0x%x\n");
rpmsg_show_attr(dst, dst, "0x%x\n");
rpmsg_show_attr(announce, announce ? "true" : "false", "%s\n");
-rpmsg_string_attr(driver_override, driver_override);
+
+static ssize_t driver_override_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rpmsg_device *rpdev = to_rpmsg_device(dev);
+ int ret;
+
+ ret = driver_set_override(dev, &rpdev->driver_override, buf, count);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rpmsg_device *rpdev = to_rpmsg_device(dev);
+ ssize_t len;
+
+ device_lock(dev);
+ len = sysfs_emit(buf, "%s\n", rpdev->driver_override);
+ device_unlock(dev);
+ return len;
+}
+static DEVICE_ATTR_RW(driver_override);
static ssize_t modalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
--
2.43.0
Hi Sacha,
Em Sat, 13 Dec 2025 04:49:42 -0500
Sasha Levin <sashal(a)kernel.org> escreveu:
> This is a note to let you know that I've just added the patch titled
>
> RAS: Report all ARM processor CPER information to userspace
>
> to the 6.18-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> ras-report-all-arm-processor-cper-information-to-use.patch
> and it can be found in the queue-6.18 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
You should also backport this patch(*):
96b010536ee0 efi/cper: align ARM CPER type with UEFI 2.9A/2.10 specs
It fixes a bug at the UEFI parser for the ARM Processor Error record:
basically, the specs were not clear about how the error type should be
reported. The Kernel implementation were assuming that this was an
enum, but UEFI errata 2.9A make it clear that the value is a bitmap.
So, basically, all kernels up to 6.18 are not parsing the field the
expected way: only "Cache error" was properly reported. The other
3 types were wrong.
(*) You could need to backport those patches as well:
a976d790f494 efi/cper: Add a new helper function to print bitmasks
8ad2c72e21ef efi/cper: Adjust infopfx size to accept an extra space
Regards,
Mauro
Thanks,
Mauro
On 12/15/25 09:37, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> block: fix cached zone reports on devices with native zone append
>
> to the 6.18-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> block-fix-cached-zone-reports-on-devices-with-native.patch
> and it can be found in the queue-6.18 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Sasha,
This is a fix for a new feature that was queued for and is now added to 6.19. So
backporting this to stable and LTS kernels is not advisable.
--
Damien Le Moal
Western Digital Research
On 12/13/25 20:09, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> block: mq-deadline: Remove support for zone write locking
>
> to the 6.6-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> block-mq-deadline-remove-support-for-zone-write-lock.patch
> and it can be found in the queue-6.6 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Sasha,
Zone write locking in the mq-deadline scheduler was replaced with the generic
zone write plugging in the block layer in 6.10. That was not backported as that
is a new feature. So removing zone write locking in 6.6 will break support for
SMR drives and other zoned block devices. Removing it from 6.6 is thus not OK.
Please undo this.
> commit bf2022eaa2291ad1243b0711d5bd03ba4105ffbb
> Author: Damien Le Moal <dlemoal(a)kernel.org>
> Date: Mon Apr 8 10:41:21 2024 +0900
>
> block: mq-deadline: Remove support for zone write locking
>
> [ Upstream commit fde02699c242e88a71286677d27cc890a959b67f ]
>
> With the block layer generic plugging of write operations for zoned
> block devices, mq-deadline, or any other scheduler, can only ever
> see at most one write operation per zone at any time. There is thus no
> sequentiality requirements for these writes and thus no need to tightly
> control the dispatching of write requests using zone write locking.
>
> Remove all the code that implement this control in the mq-deadline
> scheduler and remove advertizing support for the
> ELEVATOR_F_ZBD_SEQ_WRITE elevator feature.
>
> Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org>
> Reviewed-by: Hannes Reinecke <hare(a)suse.de>
> Reviewed-by: Christoph Hellwig <hch(a)lst.de>
> Reviewed-by: Bart Van Assche <bvanassche(a)acm.org>
> Tested-by: Hans Holmberg <hans.holmberg(a)wdc.com>
> Tested-by: Dennis Maisenbacher <dennis.maisenbacher(a)wdc.com>
> Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com>
> Link: https://lore.kernel.org/r/20240408014128.205141-22-dlemoal@kernel.org
> Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
> Stable-dep-of: d60055cf5270 ("block/mq-deadline: Switch back to a single dispatch list")
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 78a8aa204c156..23638b03d7b3d 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -102,7 +102,6 @@ struct deadline_data {
> int prio_aging_expire;
>
> spinlock_t lock;
> - spinlock_t zone_lock;
> };
>
> /* Maps an I/O priority class to a deadline scheduler priority. */
> @@ -157,8 +156,7 @@ deadline_latter_request(struct request *rq)
> }
>
> /*
> - * Return the first request for which blk_rq_pos() >= @pos. For zoned devices,
> - * return the first request after the start of the zone containing @pos.
> + * Return the first request for which blk_rq_pos() >= @pos.
> */
> static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
> enum dd_data_dir data_dir, sector_t pos)
> @@ -170,14 +168,6 @@ static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
> return NULL;
>
> rq = rb_entry_rq(node);
> - /*
> - * A zoned write may have been requeued with a starting position that
> - * is below that of the most recently dispatched request. Hence, for
> - * zoned writes, start searching from the start of a zone.
> - */
> - if (blk_rq_is_seq_zoned_write(rq))
> - pos = round_down(pos, rq->q->limits.chunk_sectors);
> -
> while (node) {
> rq = rb_entry_rq(node);
> if (blk_rq_pos(rq) >= pos) {
> @@ -308,36 +298,6 @@ static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
> return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
> }
>
> -/*
> - * Check if rq has a sequential request preceding it.
> - */
> -static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
> -{
> - struct request *prev = deadline_earlier_request(rq);
> -
> - if (!prev)
> - return false;
> -
> - return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
> -}
> -
> -/*
> - * Skip all write requests that are sequential from @rq, even if we cross
> - * a zone boundary.
> - */
> -static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
> - struct request *rq)
> -{
> - sector_t pos = blk_rq_pos(rq);
> -
> - do {
> - pos += blk_rq_sectors(rq);
> - rq = deadline_latter_request(rq);
> - } while (rq && blk_rq_pos(rq) == pos);
> -
> - return rq;
> -}
> -
> /*
> * For the specified data direction, return the next request to
> * dispatch using arrival ordered lists.
> @@ -346,40 +306,10 @@ static struct request *
> deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
> enum dd_data_dir data_dir)
> {
> - struct request *rq, *rb_rq, *next;
> - unsigned long flags;
> -
> if (list_empty(&per_prio->fifo_list[data_dir]))
> return NULL;
>
> - rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
> - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
> - return rq;
> -
> - /*
> - * Look for a write request that can be dispatched, that is one with
> - * an unlocked target zone. For some HDDs, breaking a sequential
> - * write stream can lead to lower throughput, so make sure to preserve
> - * sequential write streams, even if that stream crosses into the next
> - * zones and these zones are unlocked.
> - */
> - spin_lock_irqsave(&dd->zone_lock, flags);
> - list_for_each_entry_safe(rq, next, &per_prio->fifo_list[DD_WRITE],
> - queuelist) {
> - /* Check whether a prior request exists for the same zone. */
> - rb_rq = deadline_from_pos(per_prio, data_dir, blk_rq_pos(rq));
> - if (rb_rq && blk_rq_pos(rb_rq) < blk_rq_pos(rq))
> - rq = rb_rq;
> - if (blk_req_can_dispatch_to_zone(rq) &&
> - (blk_queue_nonrot(rq->q) ||
> - !deadline_is_seq_write(dd, rq)))
> - goto out;
> - }
> - rq = NULL;
> -out:
> - spin_unlock_irqrestore(&dd->zone_lock, flags);
> -
> - return rq;
> + return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
> }
>
> /*
> @@ -390,36 +320,8 @@ static struct request *
> deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
> enum dd_data_dir data_dir)
> {
> - struct request *rq;
> - unsigned long flags;
> -
> - rq = deadline_from_pos(per_prio, data_dir,
> - per_prio->latest_pos[data_dir]);
> - if (!rq)
> - return NULL;
> -
> - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
> - return rq;
> -
> - /*
> - * Look for a write request that can be dispatched, that is one with
> - * an unlocked target zone. For some HDDs, breaking a sequential
> - * write stream can lead to lower throughput, so make sure to preserve
> - * sequential write streams, even if that stream crosses into the next
> - * zones and these zones are unlocked.
> - */
> - spin_lock_irqsave(&dd->zone_lock, flags);
> - while (rq) {
> - if (blk_req_can_dispatch_to_zone(rq))
> - break;
> - if (blk_queue_nonrot(rq->q))
> - rq = deadline_latter_request(rq);
> - else
> - rq = deadline_skip_seq_writes(dd, rq);
> - }
> - spin_unlock_irqrestore(&dd->zone_lock, flags);
> -
> - return rq;
> + return deadline_from_pos(per_prio, data_dir,
> + per_prio->latest_pos[data_dir]);
> }
>
> /*
> @@ -525,10 +427,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
> rq = next_rq;
> }
>
> - /*
> - * For a zoned block device, if we only have writes queued and none of
> - * them can be dispatched, rq will be NULL.
> - */
> if (!rq)
> return NULL;
>
> @@ -549,10 +447,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
> prio = ioprio_class_to_prio[ioprio_class];
> dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
> dd->per_prio[prio].stats.dispatched++;
> - /*
> - * If the request needs its target zone locked, do it.
> - */
> - blk_req_zone_write_lock(rq);
> rq->rq_flags |= RQF_STARTED;
> return rq;
> }
> @@ -736,7 +630,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
> dd->fifo_batch = fifo_batch;
> dd->prio_aging_expire = prio_aging_expire;
> spin_lock_init(&dd->lock);
> - spin_lock_init(&dd->zone_lock);
>
> /* We dispatch from request queue wide instead of hw queue */
> blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
> @@ -818,12 +711,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>
> lockdep_assert_held(&dd->lock);
>
> - /*
> - * This may be a requeue of a write request that has locked its
> - * target zone. If it is the case, this releases the zone lock.
> - */
> - blk_req_zone_write_unlock(rq);
> -
> prio = ioprio_class_to_prio[ioprio_class];
> per_prio = &dd->per_prio[prio];
> if (!rq->elv.priv[0]) {
> @@ -855,18 +742,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
> */
> rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
> insert_before = &per_prio->fifo_list[data_dir];
> -#ifdef CONFIG_BLK_DEV_ZONED
> - /*
> - * Insert zoned writes such that requests are sorted by
> - * position per zone.
> - */
> - if (blk_rq_is_seq_zoned_write(rq)) {
> - struct request *rq2 = deadline_latter_request(rq);
> -
> - if (rq2 && blk_rq_zone_no(rq2) == blk_rq_zone_no(rq))
> - insert_before = &rq2->queuelist;
> - }
> -#endif
> list_add_tail(&rq->queuelist, insert_before);
> }
> }
> @@ -901,33 +776,8 @@ static void dd_prepare_request(struct request *rq)
> rq->elv.priv[0] = NULL;
> }
>
> -static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
> -{
> - struct deadline_data *dd = hctx->queue->elevator->elevator_data;
> - enum dd_prio p;
> -
> - for (p = 0; p <= DD_PRIO_MAX; p++)
> - if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
> - return true;
> -
> - return false;
> -}
> -
> /*
> * Callback from inside blk_mq_free_request().
> - *
> - * For zoned block devices, write unlock the target zone of
> - * completed write requests. Do this while holding the zone lock
> - * spinlock so that the zone is never unlocked while deadline_fifo_request()
> - * or deadline_next_request() are executing. This function is called for
> - * all requests, whether or not these requests complete successfully.
> - *
> - * For a zoned block device, __dd_dispatch_request() may have stopped
> - * dispatching requests if all the queued requests are write requests directed
> - * at zones that are already locked due to on-going write requests. To ensure
> - * write request dispatch progress in this case, mark the queue as needing a
> - * restart to ensure that the queue is run again after completion of the
> - * request and zones being unlocked.
> */
> static void dd_finish_request(struct request *rq)
> {
> @@ -942,21 +792,8 @@ static void dd_finish_request(struct request *rq)
> * called dd_insert_requests(). Skip requests that bypassed I/O
> * scheduling. See also blk_mq_request_bypass_insert().
> */
> - if (!rq->elv.priv[0])
> - return;
> -
> - atomic_inc(&per_prio->stats.completed);
> -
> - if (blk_queue_is_zoned(q)) {
> - unsigned long flags;
> -
> - spin_lock_irqsave(&dd->zone_lock, flags);
> - blk_req_zone_write_unlock(rq);
> - spin_unlock_irqrestore(&dd->zone_lock, flags);
> -
> - if (dd_has_write_work(rq->mq_hctx))
> - blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
> - }
> + if (rq->elv.priv[0])
> + atomic_inc(&per_prio->stats.completed);
> }
>
> static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
> @@ -1280,7 +1117,6 @@ static struct elevator_type mq_deadline = {
> .elevator_attrs = deadline_attrs,
> .elevator_name = "mq-deadline",
> .elevator_alias = "deadline",
> - .elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
> .elevator_owner = THIS_MODULE,
> };
> MODULE_ALIAS("mq-deadline-iosched");
--
Damien Le Moal
Western Digital Research
Hi,
After a stable kernel update, the hwclock command seems no longer
functional on my SPARC system with an ST M48T59Y-70PC1 RTC:
# hwclock
[...long delay...]
hwclock: select() to /dev/rtc0 to wait for clock tick timed out
On prior kernels, there is no problem:
# hwclock
2025-10-22 22:21:04.806992-04:00
I reproduced the same failure on 6.18-rc2 and bisected to this commit:
commit 795cda8338eab036013314dbc0b04aae728880ab
Author: Esben Haabendal <esben(a)geanix.com>
Date: Fri May 16 09:23:35 2025 +0200
rtc: interface: Fix long-standing race when setting alarm
This commit was backported to all current 6.x stable branches,
as well as 5.15.x, so they all have the same regression.
Reverting this commit on top of 6.18-rc2 corrects the problem.
Let me know if you need any more info!
Thanks,
Nick
Hi,
With Live Design International 2025 (LDI) concluded, I wanted to personally offer you exclusive access to our Visitor Contact List, featuring 16,956 fully verified leads, along with all on-site walk-ins.
Each entry includes Name, Job Title, Company, Website, Address, Phone, Official Email and more.
If you’d like more details, simply reply, “Send me pricing.”
Kind Regards,
Rose
Sr. Demand Generation
P.S. Not the right fit? Reply “Unfollow” to opt out.
When of_find_net_device_by_node() successfully acquires a reference to
a network device but the subsequent call to dsa_port_parse_cpu()
fails, dsa_port_parse_of() returns without releasing the reference
count on the network device.
of_find_net_device_by_node() increments the reference count of the
returned structure, which should be balanced with a corresponding
put_device() when the reference is no longer needed.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: deff710703d8 ("net: dsa: Allow default tag protocol to be overridden from DT")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- simplified the patch as suggestions;
- modified the Fixes tag as suggestions.
---
net/dsa/dsa.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a20efabe778f..31b409a47491 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1247,6 +1247,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
const char *name = of_get_property(dn, "label", NULL);
bool link = of_property_read_bool(dn, "link");
+ int err = 0;
dp->dn = dn;
@@ -1260,7 +1261,11 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
return -EPROBE_DEFER;
user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
- return dsa_port_parse_cpu(dp, conduit, user_protocol);
+ err = dsa_port_parse_cpu(dp, conduit, user_protocol);
+ if (err)
+ put_device(conduit);
+
+ return err;
}
if (link)
--
2.17.1
When the filesystem is being mounted, the kernel panics while the data
regarding slot map allocation to the local node, is being written to the
disk. This occurs because the value of slot map buffer head block
number, which should have been greater than or equal to
`OCFS2_SUPER_BLOCK_BLKNO` (evaluating to 2) is less than it, indicative
of disk metadata corruption. This triggers
BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) in ocfs2_write_block(),
causing the kernel to panic.
This is fixed by introducing an if condition block in
ocfs2_update_disk_slot(), right before calling ocfs2_write_block(), which
checks if `bh->b_blocknr` is lesser than `OCFS2_SUPER_BLOCK_BLKNO`; if
yes, then ocfs2_error is called, which prints the error log, for
debugging purposes, and the return value of ocfs2_error() is returned
back to caller of ocfs2_update_disk_slot() i.e. ocfs2_find_slot(). If
the return value is zero. then error code EIO is returned.
Reported-by: syzbot+c818e5c4559444f88aa0(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c818e5c4559444f88aa0
Tested-by: syzbot+c818e5c4559444f88aa0(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Prithvi Tambewagh <activprithvi(a)gmail.com>
---
v1->v2:
- Remove usage of le16_to_cpu() from ocfs2_error()
- Cast bh->b_blocknr to unsigned long long
- Remove type casting for OCFS2_SUPER_BLOCK_BLKNO
- Fix Sparse warnings reported in v1 by kernel test robot
- Update title from 'ocfs2: Fix kernel BUG in ocfs2_write_block' to
'ocfs2: fix kernel BUG in ocfs2_write_block'
v1 link: https://lore.kernel.org/all/20251206154819.175479-1-activprithvi@gmail.com/…
fs/ocfs2/slot_map.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index e544c704b583..e916a2e8f92d 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -193,6 +193,16 @@ static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
else
ocfs2_update_disk_slot_old(si, slot_num, &bh);
spin_unlock(&osb->osb_lock);
+ if (bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) {
+ status = ocfs2_error(osb->sb,
+ "Invalid Slot Map Buffer Head "
+ "Block Number : %llu, Should be >= %d",
+ (unsigned long long)bh->b_blocknr,
+ OCFS2_SUPER_BLOCK_BLKNO);
+ if (!status)
+ return -EIO;
+ return status;
+ }
status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
if (status < 0)
base-commit: 24172e0d79900908cf5ebf366600616d29c9b417
--
2.43.0
Hi Exhibitor,
Hope you had a successful experience at LDI Show 2025 (Dec 3–9, Las Vegas). We have access to a verified list of 13,594 attendees across the live events, lighting, audio, staging, and production-technology sectors.
This includes lighting designers, audio engineers, production managers, AV integrators, stage/rigging technicians, venue operations heads, broadcast specialists, and other key live-event decision-makers.
Don’t miss the opportunity to connect with high-quality prospects after the event.
If interested, kindly reply “Send Pricing” to receive the details.
Best regards,
Megan Castillo
Senior Market Analyst
To opt-out, reply “Not Interested”.
kmb_probe() obtain a reference to a platform device by
of_find_device_by_node(). This call increases the reference count of
the returned device, which should be dropped by calling put_device()
when the device is no longer needed. However, the code fails to call
put_device() in several error handling paths and the normal device
removal path. This could result in reference count leaks that prevent
the proper cleanup of the platform device when the driver is unloaded
or during error recovery.
Add put_device() in all code paths where dsi_pdev is no longer needed,
including error paths and the normal removal path.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/gpu/drm/kmb/kmb_drv.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index 7c2eb1152fc2..9733337abe92 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -474,6 +474,8 @@ static void kmb_remove(struct platform_device *pdev)
/* Unregister DSI host */
kmb_dsi_host_unregister(kmb->kmb_dsi);
+ if (kmb->kmb_dsi && kmb->kmb_dsi->pdev)
+ put_device(&kmb->kmb_dsi->pdev->dev);
drm_atomic_helper_shutdown(drm);
}
@@ -518,17 +520,20 @@ static int kmb_probe(struct platform_device *pdev)
ret = kmb_dsi_host_bridge_init(get_device(&dsi_pdev->dev));
if (ret == -EPROBE_DEFER) {
- return -EPROBE_DEFER;
+ ret = -EPROBE_DEFER;
+ goto err_free2;
} else if (ret) {
DRM_ERROR("probe failed to initialize DSI host bridge\n");
- return ret;
+ goto err_free2;
}
/* Create DRM device */
kmb = devm_drm_dev_alloc(dev, &kmb_driver,
struct kmb_drm_private, drm);
- if (IS_ERR(kmb))
- return PTR_ERR(kmb);
+ if (IS_ERR(kmb)) {
+ ret = PTR_ERR(kmb);
+ goto err_free2;
+ }
dev_set_drvdata(dev, &kmb->drm);
@@ -577,7 +582,8 @@ static int kmb_probe(struct platform_device *pdev)
err_free1:
dev_set_drvdata(dev, NULL);
kmb_dsi_host_unregister(kmb->kmb_dsi);
-
+err_free2:
+ put_device(&dsi_pdev->dev);
return ret;
}
--
2.17.1
Changing the enable/disable sequence in commit c9b1150a68d9
("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable")
has caused regressions on multiple platforms: R-Car, MCDE, Rockchip.
This is an alternate series to Linus' series:
https://lore.kernel.org/all/20251202-mcde-drm-regression-thirdfix-v6-0-f1bf…
This series first reverts the original commit and reverts a fix for
mediatek which is no longer needed. It then exposes helper functions
from DRM core, and finally implements the new sequence only in the tidss
driver.
There is one more fix in upstream for the original commit, commit
5d91394f2361 ("drm/exynos: fimd: Guard display clock control with
runtime PM calls"), but I have not reverted that one as it looks like a
valid patch in its own.
I added Cc stable v6.17+ to all patches, but I didn't add Fixes tags, as
I wasn't sure what should they point to. But I could perhaps add Fixes:
<original commit> to all of these.
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
---
Linus Walleij (1):
drm/atomic-helper: Export and namespace some functions
Tomi Valkeinen (3):
Revert "drm/atomic-helper: Re-order bridge chain pre-enable and post-disable"
Revert "drm/mediatek: dsi: Fix DSI host and panel bridge pre-enable order"
drm/tidss: Fix enable/disable order
drivers/gpu/drm/drm_atomic_helper.c | 122 ++++++++++++++----
drivers/gpu/drm/mediatek/mtk_dsi.c | 6 -
drivers/gpu/drm/tidss/tidss_kms.c | 30 ++++-
include/drm/drm_atomic_helper.h | 22 ++++
include/drm/drm_bridge.h | 249 ++++++++++--------------------------
5 files changed, 214 insertions(+), 215 deletions(-)
---
base-commit: 88e721ab978a86426aa08da520de77430fa7bb84
change-id: 20251205-drm-seq-fix-b4ed1f56604b
Best regards,
--
Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
The following commit has been merged into the x86/boot branch of tip:
Commit-ID: adbf61cc47cb72b102682e690ad323e1eda652c2
Gitweb: https://git.kernel.org/tip/adbf61cc47cb72b102682e690ad323e1eda652c2
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 11 Nov 2025 14:53:57
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitterDate: Sun, 14 Dec 2025 09:19:03 +01:00
x86/acpi/boot: Correct acpi_is_processor_usable() check again
ACPI v6.3 defined a new "Online Capable" MADT LAPIC flag. This bit is
used in conjunction with the "Enabled" MADT LAPIC flag to determine if
a CPU can be enabled/hotplugged by the OS after boot.
Before the new bit was defined, the "Enabled" bit was explicitly
described like this (ACPI v6.0 wording provided):
"If zero, this processor is unusable, and the operating system
support will not attempt to use it"
This means that CPU hotplug (based on MADT) is not possible. Many BIOS
implementations follow this guidance. They may include LAPIC entries in
MADT for unavailable CPUs, but since these entries are marked with
"Enabled=0" it is expected that the OS will completely ignore these
entries.
However, QEMU will do the same (include entries with "Enabled=0") for
the purpose of allowing CPU hotplug within the guest.
Comment from QEMU function pc_madt_cpu_entry():
/* ACPI spec says that LAPIC entry for non present
* CPU may be omitted from MADT or it must be marked
* as disabled. However omitting non present CPU from
* MADT breaks hotplug on linux. So possible CPUs
* should be put in MADT but kept disabled.
*/
Recent Linux topology changes broke the QEMU use case. A following fix
for the QEMU use case broke bare metal topology enumeration.
Rework the Linux MADT LAPIC flags check to allow the QEMU use case only
for guests and to maintain the ACPI spec behavior for bare metal.
Remove an unnecessary check added to fix a bare metal case introduced by
the QEMU "fix".
[ bp: Change logic as Michal suggested. ]
[ mingo: Removed misapplied -stable tag. ]
Fixes: fed8d8773b8e ("x86/acpi/boot: Correct acpi_is_processor_usable() check")
Fixes: f0551af02130 ("x86/topology: Ignore non-present APIC IDs in a present package")
Closes: https://lore.kernel.org/r/20251024204658.3da9bf3f.michal.pecio@gmail.com
Reported-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: Michal Pecio <michal.pecio(a)gmail.com>
Tested-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
Link: https://lore.kernel.org/20251111145357.4031846-1-yazen.ghannam@amd.com
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/acpi/boot.c | 12 ++++++++----
arch/x86/kernel/cpu/topology.c | 15 ---------------
2 files changed, 8 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9fa321a..d6138b2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
#include <asm/smp.h>
#include <asm/i8259.h>
#include <asm/setup.h>
+#include <asm/hypervisor.h>
#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
@@ -164,11 +165,14 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags)
if (lapic_flags & ACPI_MADT_ENABLED)
return true;
- if (!acpi_support_online_capable ||
- (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
- return true;
+ if (acpi_support_online_capable)
+ return lapic_flags & ACPI_MADT_ONLINE_CAPABLE;
- return false;
+ /*
+ * QEMU expects legacy "Enabled=0" LAPIC entries to be counted as usable
+ * in order to support CPU hotplug in guests.
+ */
+ return !hypervisor_is_type(X86_HYPER_NATIVE);
}
static int __init
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index f55ea3c..23190a7 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,6 @@
#include <xen/xen.h>
#include <asm/apic.h>
-#include <asm/hypervisor.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
@@ -236,20 +235,6 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
cpuid_to_apicid[cpu] = apic_id;
topo_set_cpuids(cpu, apic_id, acpi_id);
} else {
- u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
-
- /*
- * Check for present APICs in the same package when running
- * on bare metal. Allow the bogosity in a guest.
- */
- if (hypervisor_is_type(X86_HYPER_NATIVE) &&
- topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
- pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
- apic_id);
- topo_info.nr_rejected_cpus++;
- return;
- }
-
topo_info.nr_disabled_cpus++;
}
The struct ip_tunnel_info has a flexible array member named
options that is protected by a counted_by(options_len)
attribute.
The compiler will use this information to enforce runtime bounds
checking deployed by FORTIFY_SOURCE string helpers.
As laid out in the GCC documentation, the counter must be
initialized before the first reference to the flexible array
member.
In the normal case the ip_tunnel_info_opts_set() helper is used
which would initialize options_len properly, however in the GRE
ERSPAN code a partial update is done, preventing the use of the
helper function.
Before this change the handling of ERSPAN traffic in GRE tunnels
would cause a kernel panic when the kernel is compiled with
GCC 15+ and having FORTIFY_SOURCE configured:
memcpy: detected buffer overflow: 4 byte write of buffer size 0
Call Trace:
<IRQ>
__fortify_panic+0xd/0xf
erspan_rcv.cold+0x68/0x83
? ip_route_input_slow+0x816/0x9d0
gre_rcv+0x1b2/0x1c0
gre_rcv+0x8e/0x100
? raw_v4_input+0x2a0/0x2b0
ip_protocol_deliver_rcu+0x1ea/0x210
ip_local_deliver_finish+0x86/0x110
ip_local_deliver+0x65/0x110
? ip_rcv_finish_core+0xd6/0x360
ip_rcv+0x186/0x1a0
Link: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-co…
Reported-at: https://launchpad.net/bugs/2129580
Fixes: bb5e62f2d547 ("net: Add options as a flexible array to struct ip_tunnel_info")
Signed-off-by: Frode Nordahl <fnordahl(a)ubuntu.com>
---
net/ipv4/ip_gre.c | 18 ++++++++++++++++--
net/ipv6/ip6_gre.c | 18 ++++++++++++++++--
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 761a53c6a89a..285a656c9e41 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -330,6 +330,22 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!tun_dst)
return PACKET_REJECT;
+ /* The struct ip_tunnel_info has a flexible array member named
+ * options that is protected by a counted_by(options_len)
+ * attribute.
+ *
+ * The compiler will use this information to enforce runtime bounds
+ * checking deployed by FORTIFY_SOURCE string helpers.
+ *
+ * As laid out in the GCC documentation, the counter must be
+ * initialized before the first reference to the flexible array
+ * member.
+ *
+ * Link: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-co…
+ */
+ info = &tun_dst->u.tun_info;
+ info->options_len = sizeof(*md);
+
/* skb can be uncloned in __iptunnel_pull_header, so
* old pkt_md is no longer valid and we need to reset
* it
@@ -344,10 +360,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
ERSPAN_V2_MDSIZE);
- info = &tun_dst->u.tun_info;
__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
info->key.tun_flags);
- info->options_len = sizeof(*md);
}
skb_reset_mac_header(skb);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c82a75510c0e..eb840a11b93b 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -535,6 +535,22 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (!tun_dst)
return PACKET_REJECT;
+ /* The struct ip_tunnel_info has a flexible array member named
+ * options that is protected by a counted_by(options_len)
+ * attribute.
+ *
+ * The compiler will use this information to enforce runtime bounds
+ * checking deployed by FORTIFY_SOURCE string helpers.
+ *
+ * As laid out in the GCC documentation, the counter must be
+ * initialized before the first reference to the flexible array
+ * member.
+ *
+ * Link: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-co…
+ */
+ info = &tun_dst->u.tun_info;
+ info->options_len = sizeof(*md);
+
/* skb can be uncloned in __iptunnel_pull_header, so
* old pkt_md is no longer valid and we need to reset
* it
@@ -543,7 +559,6 @@ static int ip6erspan_rcv(struct sk_buff *skb,
skb_network_header_len(skb);
pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
sizeof(*ershdr));
- info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
md2 = &md->u.md2;
@@ -551,7 +566,6 @@ static int ip6erspan_rcv(struct sk_buff *skb,
ERSPAN_V2_MDSIZE);
__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
info->key.tun_flags);
- info->options_len = sizeof(*md);
ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
--
2.43.0
synaptics_i2c_irq() schedules touch->dwork via mod_delayed_work().
The delayed work performs I2C transactions and may still be running
(or get queued) when the device is removed.
synaptics_i2c_remove() currently frees 'touch' without canceling
touch->dwork. If removal happens while the work is pending/running,
the work handler may dereference freed memory, leading to a potential
use-after-free.
Cancel the delayed work synchronously before unregistering/freeing
the device.
Fixes: eef3e4cab72e Input: add driver for Synaptics I2C touchpad
Reported-by: Minseong Kim <ii4gsp(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Minseong Kim <ii4gsp(a)gmail.com>
---
drivers/input/mouse/synaptics_i2c.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index a0d707e47d93..fe30bf9aea3a 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -593,6 +593,8 @@ static void synaptics_i2c_remove(struct i2c_client *client)
if (!polling_req)
free_irq(client->irq, touch);
+ cancel_delayed_work_sync(&touch->dwork);
+
input_unregister_device(touch->input);
kfree(touch);
}
--
2.39.5
KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka
SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs.
On first glance, it seems like the check should actually be for
guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the
host to support NPTs but the guest CPUID to not advertise it.
However, the consistency check is not architectural to begin with. The
APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor
that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored
if X86_FEATURE_NPT is not available for L1. Apart from the consistency
check, this is currently the case because NP_ENABLE is actually copied
from VMCB01 to VMCB02, not from VMCB12.
On the other hand, the APM does mention two other consistency checks for
NP_ENABLE, both of which are missing (paraphrased):
In Volume #2, 15.25.3 (24593—Rev. 3.42—March 2024):
If VMRUN is executed with hCR0.PG cleared to zero and NP_ENABLE set to
1, VMRUN terminates with #VMEXIT(VMEXIT_INVALID)
In Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
following conditions are considered illegal state combinations, in
addition to those mentioned in “Canonicalization and Consistency
Checks”:
• Any MBZ bit of nCR3 is set.
• Any G_PAT.PA field has an unsupported type encoding or any
reserved field in G_PAT has a nonzero value.
Replace the existing consistency check with consistency checks on
hCR0.PG and nCR3. Only perform the consistency checks if L1 has
X86_FEATURE_NPT and NP_ENABLE is set in VMCB12. The G_PAT consistency
check will be addressed separately.
As it is now possible for an L1 to run L2 with NP_ENABLE set but
ignored, also check that L1 has X86_FEATURE_NPT in nested_npt_enabled().
Pass L1's CR0 to __nested_vmcb_check_controls(). In
nested_vmcb_check_controls(), L1's CR0 is available through
kvm_read_cr0(), as vcpu->arch.cr0 is not updated to L2's CR0 until later
through nested_vmcb02_prepare_save() -> svm_set_cr0().
In svm_set_nested_state(), L1's CR0 is available in the captured save
area, as svm_get_nested_state() captures L1's save area when running L2,
and L1's CR0 is stashed in VMCB01 on nested VMRUN (in
nested_svm_vmrun()).
Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
---
arch/x86/kvm/svm/nested.c | 21 ++++++++++++++++-----
arch/x86/kvm/svm/svm.h | 3 ++-
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 74211c5c68026..87bcc5eff96e8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -325,7 +325,8 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
}
static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_ctrl_area_cached *control)
+ struct vmcb_ctrl_area_cached *control,
+ unsigned long l1_cr0)
{
if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;
@@ -333,8 +334,12 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
if (CC(control->asid == 0))
return false;
- if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
- return false;
+ if (nested_npt_enabled(to_svm(vcpu))) {
+ if (CC(!kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
+ return false;
+ if (CC(!(l1_cr0 & X86_CR0_PG)))
+ return false;
+ }
if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
MSRPM_SIZE)))
@@ -400,7 +405,12 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
- return __nested_vmcb_check_controls(vcpu, ctl);
+ /*
+ * Make sure we did not enter guest mode yet, in which case
+ * kvm_read_cr0() could return L2's CR0.
+ */
+ WARN_ON_ONCE(is_guest_mode(vcpu));
+ return __nested_vmcb_check_controls(vcpu, ctl, kvm_read_cr0(vcpu));
}
static
@@ -1831,7 +1841,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
ret = -EINVAL;
__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
- if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
+ /* 'save' contains L1 state saved from before VMRUN */
+ if (!__nested_vmcb_check_controls(vcpu, &ctl_cached, save->cr0))
goto out_free;
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f6fb70ddf7272..3e805a43ffcdb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -552,7 +552,8 @@ static inline bool gif_set(struct vcpu_svm *svm)
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
{
- return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
+ return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_NPT) &&
+ svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
--
2.51.2.1041.gc1ab5b90ca-goog