The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 192cdb1c907fd8df2d764c5bb17496e415e59391
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012737-lavish-haziness-80cc@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
192cdb1c907f ("cpufreq: intel_pstate: Refine computation of P-state for given frequency")
458b03f81afb ("cpufreq: intel_pstate: Drop redundant intel_pstate_get_hwp_cap() call")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 192cdb1c907fd8df2d764c5bb17496e415e59391 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 22 Jan 2024 15:18:11 +0100
Subject: [PATCH] cpufreq: intel_pstate: Refine computation of P-state for
given frequency
On systems using HWP, if a given frequency is equal to the maximum turbo
frequency or the maximum non-turbo frequency, the HWP performance level
corresponding to it is already known and can be used directly without
any computation.
Accordingly, adjust the code to use the known HWP performance levels in
the cases mentioned above.
This also helps to avoid limiting CPU capacity artificially in some
cases when the BIOS produces the HWP_CAP numbers using a different
E-core-to-P-core performance scaling factor than expected by the kernel.
Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores")
Cc: 6.1+ <stable(a)vger.kernel.org> # 6.1+
Tested-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2ca70b0b5fdc..ca94e60e705a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -529,6 +529,30 @@ static int intel_pstate_cppc_get_scaling(int cpu)
}
#endif /* CONFIG_ACPI_CPPC_LIB */
+static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq,
+ unsigned int relation)
+{
+ if (freq == cpu->pstate.turbo_freq)
+ return cpu->pstate.turbo_pstate;
+
+ if (freq == cpu->pstate.max_freq)
+ return cpu->pstate.max_pstate;
+
+ switch (relation) {
+ case CPUFREQ_RELATION_H:
+ return freq / cpu->pstate.scaling;
+ case CPUFREQ_RELATION_C:
+ return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling);
+ }
+
+ return DIV_ROUND_UP(freq, cpu->pstate.scaling);
+}
+
+static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq)
+{
+ return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L);
+}
+
/**
* intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
* @cpu: Target CPU.
@@ -546,6 +570,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu);
int scaling = cpu->pstate.scaling;
+ int freq;
pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
@@ -559,16 +584,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
perf_ctl_scaling);
- cpu->pstate.max_pstate_physical =
- DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
- scaling);
+ freq = perf_ctl_max_phys * perf_ctl_scaling;
+ cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq);
- cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+ freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+ cpu->pstate.min_freq = freq;
/*
* Cast the min P-state value retrieved via pstate_funcs.get_min() to
* the effective range of HWP performance levels.
*/
- cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
+ cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq);
}
static inline void update_turbo_state(void)
@@ -2528,13 +2553,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
* abstract values to represent performance rather than pure ratios.
*/
if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) {
- int scaling = cpu->pstate.scaling;
int freq;
freq = max_policy_perf * perf_ctl_scaling;
- max_policy_perf = DIV_ROUND_UP(freq, scaling);
+ max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
freq = min_policy_perf * perf_ctl_scaling;
- min_policy_perf = DIV_ROUND_UP(freq, scaling);
+ min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
}
pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
@@ -2908,18 +2932,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
cpufreq_freq_transition_begin(policy, &freqs);
- switch (relation) {
- case CPUFREQ_RELATION_L:
- target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
- break;
- case CPUFREQ_RELATION_H:
- target_pstate = freqs.new / cpu->pstate.scaling;
- break;
- default:
- target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
- break;
- }
-
+ target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation);
target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false);
freqs.new = target_pstate * cpu->pstate.scaling;
@@ -2937,7 +2950,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
update_turbo_state();
- target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+ target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq);
target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d8d222e09dab84a17bb65dda4b94d01c565f5327
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012736-provable-risotto-2228@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d8d222e09dab84a17bb65dda4b94d01c565f5327 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner(a)redhat.com>
Date: Tue, 16 Jan 2024 15:33:07 +1100
Subject: [PATCH] xfs: read only mounts with fsopen mount API are busted
Recently xfs/513 started failing on my test machines testing "-o
ro,norecovery" mount options. This was being emitted in dmesg:
[ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only.
Turns out, readonly mounts with the fsopen()/fsconfig() mount API
have been busted since day zero. It's only taken 5 years for debian
unstable to start using this "new" mount API, and shortly after this
I noticed xfs/513 had started to fail as per above.
The syscall trace is:
fsopen("xfs", FSOPEN_CLOEXEC) = 3
mount_setattr(-1, NULL, 0, NULL, 0) = -1 EINVAL (Invalid argument)
.....
fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0
fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0
fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0
fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument)
close(3) = 0
Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is
what threw out the error.
During mount instantiation, we call xfs_fs_validate_params() which
does:
/* No recovery flag requires a read-only mount */
if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
xfs_warn(mp, "no-recovery mounts must be read-only.");
return -EINVAL;
}
and xfs_is_readonly() checks internal mount flags for read only
state. This state is set in xfs_init_fs_context() from the
context superblock flag state:
/*
* Copy binary VFS mount flags we are interested in.
*/
if (fc->sb_flags & SB_RDONLY)
set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
With the old mount API, all of the VFS specific superblock flags
had already been parsed and set before xfs_init_fs_context() is
called, so this all works fine.
However, in the brave new fsopen/fsconfig world,
xfs_init_fs_context() is called from fsopen() context, before any
VFS superblock have been set or parsed. Hence if we use fsopen(),
the internal XFS readonly state is *never set*. Hence anything that
depends on xfs_is_readonly() actually returning true for read only
mounts is broken if fsopen() has been used to mount the filesystem.
Fix this by moving this internal state initialisation to
xfs_fs_fill_super() before we attempt to validate the parameters
that have been set prior to the FSCONFIG_CMD_CREATE call being made.
Signed-off-by: Dave Chinner <dchinner(a)redhat.com>
Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api")
cc: stable(a)vger.kernel.org
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Chandan Babu R <chandanbabu(a)kernel.org>
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index aff20ddd4a9f..5a2512d20bd0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1496,6 +1496,18 @@ xfs_fs_fill_super(
mp->m_super = sb;
+ /*
+ * Copy VFS mount flags from the context now that all parameter parsing
+ * is guaranteed to have been completed by either the old mount API or
+ * the newer fsopen/fsconfig API.
+ */
+ if (fc->sb_flags & SB_RDONLY)
+ set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+ if (fc->sb_flags & SB_DIRSYNC)
+ mp->m_features |= XFS_FEAT_DIRSYNC;
+ if (fc->sb_flags & SB_SYNCHRONOUS)
+ mp->m_features |= XFS_FEAT_WSYNC;
+
error = xfs_fs_validate_params(mp);
if (error)
return error;
@@ -1965,6 +1977,11 @@ static const struct fs_context_operations xfs_context_ops = {
.free = xfs_fs_free,
};
+/*
+ * WARNING: do not initialise any parameters in this function that depend on
+ * mount option parsing having already been performed as this can be called from
+ * fsopen() before any parameters have been set.
+ */
static int xfs_init_fs_context(
struct fs_context *fc)
{
@@ -1996,16 +2013,6 @@ static int xfs_init_fs_context(
mp->m_logbsize = -1;
mp->m_allocsize_log = 16; /* 64k */
- /*
- * Copy binary VFS mount flags we are interested in.
- */
- if (fc->sb_flags & SB_RDONLY)
- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
- if (fc->sb_flags & SB_DIRSYNC)
- mp->m_features |= XFS_FEAT_DIRSYNC;
- if (fc->sb_flags & SB_SYNCHRONOUS)
- mp->m_features |= XFS_FEAT_WSYNC;
-
fc->s_fs_info = mp;
fc->ops = &xfs_context_ops;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 437a310b22244d4e0b78665c3042e5d1c0f45306
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012719-remarry-magical-0c2e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
437a310b2224 ("firmware: arm_scmi: Check mailbox/SMT channel for consistency")
13fba878ccdd ("firmware: arm_scmi: Add priv parameter to scmi_rx_callback")
e9b21c96181c ("firmware: arm_scmi: Make .clear_channel optional")
ed7c04c1fea3 ("firmware: arm_scmi: Handle concurrent and out-of-order messages")
9ca5a1838e59 ("firmware: arm_scmi: Introduce monotonically increasing tokens")
3669032514be ("firmware: arm_scmi: Remove scmi_dump_header_dbg() helper")
e30d91d4ffda ("firmware: arm_scmi: Move reinit_completion from scmi_xfer_get to do_xfer")
0cb7af474e0d ("firmware: arm_scmi: Reset Rx buffer to max size during async commands")
d4f9dddd21f3 ("firmware: arm_scmi: Add dynamic scmi devices creation")
f5800e0bf6f9 ("firmware: arm_scmi: Add protocol modularization support")
a02d7c93c1f3 ("firmware: arm_scmi: Make notify_priv really private")
9162afa2ae99 ("firmware: arm_scmi: Cleanup unused core transfer helper wrappers")
51fe1b154e2f ("firmware: arm_scmi: Cleanup legacy protocol init code")
fe4894d968f4 ("firmware: arm_scmi: Port voltage protocol to new protocols interface")
b46d852718c1 ("firmware: arm_scmi: Port systempower protocol to new protocols interface")
9694a7f62359 ("firmware: arm_scmi: Port sensor protocol to new protocols interface")
7e0293442238 ("firmware: arm_scmi: Port reset protocol to new protocols interface")
887281c7519d ("firmware: arm_scmi: Port clock protocol to new protocols interface")
9bc8069c8567 ("firmware: arm_scmi: Port power protocol to new protocols interface")
1fec5e6b5233 ("firmware: arm_scmi: Port perf protocol to new protocols interface")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 437a310b22244d4e0b78665c3042e5d1c0f45306 Mon Sep 17 00:00:00 2001
From: Cristian Marussi <cristian.marussi(a)arm.com>
Date: Wed, 20 Dec 2023 17:21:12 +0000
Subject: [PATCH] firmware: arm_scmi: Check mailbox/SMT channel for consistency
On reception of a completion interrupt the shared memory area is accessed
to retrieve the message header at first and then, if the message sequence
number identifies a transaction which is still pending, the related
payload is fetched too.
When an SCMI command times out the channel ownership remains with the
platform until eventually a late reply is received and, as a consequence,
any further transmission attempt remains pending, waiting for the channel
to be relinquished by the platform.
Once that late reply is received the channel ownership is given back
to the agent and any pending request is then allowed to proceed and
overwrite the SMT area of the just delivered late reply; then the wait
for the reply to the new request starts.
It has been observed that the spurious IRQ related to the late reply can
be wrongly associated with the freshly enqueued request: when that happens
the SCMI stack in-flight lookup procedure is fooled by the fact that the
message header now present in the SMT area is related to the new pending
transaction, even though the real reply has still to arrive.
This race-condition on the A2P channel can be detected by looking at the
channel status bits: a genuine reply from the platform will have set the
channel free bit before triggering the completion IRQ.
Add a consistency check to validate such condition in the A2P ISR.
Reported-by: Xinglong Yang <xinglong.yang(a)cixtech.com>
Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06M…
Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type")
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Cristian Marussi <cristian.marussi(a)arm.com>
Tested-by: Xinglong Yang <xinglong.yang(a)cixtech.com>
Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla(a)arm.com>
diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h
index c46dc5215af7..00b165d1f502 100644
--- a/drivers/firmware/arm_scmi/common.h
+++ b/drivers/firmware/arm_scmi/common.h
@@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem,
void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem);
bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem,
struct scmi_xfer *xfer);
+bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem);
/* declarations for message passing transports */
struct scmi_msg_payld;
diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
index 19246ed1f01f..b8d470417e8f 100644
--- a/drivers/firmware/arm_scmi/mailbox.c
+++ b/drivers/firmware/arm_scmi/mailbox.c
@@ -45,6 +45,20 @@ static void rx_callback(struct mbox_client *cl, void *m)
{
struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl);
+ /*
+ * An A2P IRQ is NOT valid when received while the platform still has
+ * the ownership of the channel, because the platform at first releases
+ * the SMT channel and then sends the completion interrupt.
+ *
+ * This addresses a possible race condition in which a spurious IRQ from
+ * a previous timed-out reply which arrived late could be wrongly
+ * associated with the next pending transaction.
+ */
+ if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) {
+ dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n");
+ return;
+ }
+
scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL);
}
diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c
index 87b4f4d35f06..517d52fb3bcb 100644
--- a/drivers/firmware/arm_scmi/shmem.c
+++ b/drivers/firmware/arm_scmi/shmem.c
@@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem,
(SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR |
SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
}
+
+bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem)
+{
+ return (ioread32(&shmem->channel_status) &
+ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
+}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x edcf9725150e42beeca42d085149f4c88fa97afd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012712-spherical-huntsman-cd8b@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
043862b09cc0 ("NFSD: Add documenting comment for nfsd4_release_lockowner()")
bd8fdb6e545f ("NFSD: Modernize nfsd4_release_lockowner()")
ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
eb82dd393744 ("nfsd: convert fi_deleg_file and ls_file fields to nfsd_file")
fd4f83fd7dfb ("nfsd: convert nfs4_file->fi_fds array to use nfsd_files")
0c4b62b042fe ("nfsd4: show layout stateids")
16d36e099980 ("nfsd: show lock and deleg stateids")
78599c42ae3c ("nfsd4: add file to display list of client's opens")
97ad4031e295 ("nfsd4: add a client info file")
bf5ed3e3bb84 ("nfsd: make client/ directory names small ints")
e8a79fb14f6b ("nfsd: add nfsd/clients directory")
59f8e91b75ec ("nfsd4: use reference count to free client")
14ed14cc7c06 ("nfsd: rename cl_refcount")
2c830dd7209b ("nfsd: persist nfsd filesystem across mounts")
3ba75830ce17 ("nfsd4: drc containerization")
e333f3bbefe3 ("nfsd: Allow containers to set supported nfs versions")
029be5d03357 ("nfsd: Add custom rpcbind callbacks for knfsd")
642ee6b209c2 ("SUNRPC: Allow further customisation of RPC program registration")
8e5b67731d08 ("SUNRPC: Add a callback to initialise server requests")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb(a)suse.de>
Date: Mon, 22 Jan 2024 14:58:16 +1100
Subject: [PATCH] nfsd: fix RELEASE_LOCKOWNER
The test on so_count in nfsd4_release_lockowner() is nonsense and
harmful. Revert to using check_for_locks(), changing that to not sleep.
First: harmful.
As is documented in the kdoc comment for nfsd4_release_lockowner(), the
test on so_count can transiently return a false positive resulting in a
return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
clearly a protocol violation and with the Linux NFS client it can cause
incorrect behaviour.
If RELEASE_LOCKOWNER is sent while some other thread is still
processing a LOCK request which failed because, at the time that request
was received, the given owner held a conflicting lock, then the nfsd
thread processing that LOCK request can hold a reference (conflock) to
the lock owner that causes nfsd4_release_lockowner() to return an
incorrect error.
The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
it knows that the error is impossible. It assumes the lock owner was in
fact released so it feels free to use the same lock owner identifier in
some later locking request.
When it does reuse a lock owner identifier for which a previous RELEASE
failed, it will naturally use a lock_seqid of zero. However the server,
which didn't release the lock owner, will expect a larger lock_seqid and
so will respond with NFS4ERR_BAD_SEQID.
So clearly it is harmful to allow a false positive, which testing
so_count allows.
The test is nonsense because ... well... it doesn't mean anything.
so_count is the sum of three different counts.
1/ the set of states listed on so_stateids
2/ the set of active vfs locks owned by any of those states
3/ various transient counts such as for conflicting locks.
When it is tested against '2' it is clear that one of these is the
transient reference obtained by find_lockowner_str_locked(). It is not
clear what the other one is expected to be.
In practice, the count is often 2 because there is precisely one state
on so_stateids. If there were more, this would fail.
In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
all the lock states being removed, and so the lockowner being discarded
(it is removed when there are no more references which usually happens
when the lock state is discarded). When nfsd4_release_lockowner() finds
that the lock owner doesn't exist, it returns success.
The other case shows an so_count of '2' and precisely one state listed
in so_stateid. It appears that the Linux client uses a separate lock
owner for each file resulting in one lock state per lock owner, so this
test on '2' is safe. For another client it might not be safe.
So this patch changes check_for_locks() to use the (newish)
find_any_file_locked() so that it doesn't take a reference on the
nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
this check is it safe to restore the use of check_for_locks() rather
than testing so_count against the mysterious '2'.
Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
Signed-off-by: NeilBrown <neilb(a)suse.de>
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Cc: stable(a)vger.kernel.org # v6.2+
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2fa54cfd4882..6dc6340e2852 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct nfsd_file *nf = find_any_file(fp);
+ struct nfsd_file *nf;
struct inode *inode;
struct file_lock_context *flctx;
+ spin_lock(&fp->fi_lock);
+ nf = find_any_file_locked(fp);
if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
- return status;
+ goto out;
}
inode = file_inode(nf->nf_file);
@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
spin_unlock(&flctx->flc_lock);
}
- nfsd_file_put(nf);
+out:
+ spin_unlock(&fp->fi_lock);
return status;
}
@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * The lockowner's so_count is bumped when a lock record is added
- * or when copying a conflicting lock. The latter case is brief,
- * but can lead to fleeting false positives when looking for
- * locks-in-use.
+ * Check if theree are any locks still held and if not - free the lockowner
+ * and any lock state that is owned.
*
* Return values:
* %nfs_ok: lockowner released or not found
@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
spin_unlock(&clp->cl_lock);
return nfs_ok;
}
- if (atomic_read(&lo->lo_owner.so_count) != 2) {
- spin_unlock(&clp->cl_lock);
- nfs4_put_stateowner(&lo->lo_owner);
- return nfserr_locks_held;
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
+ }
}
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x edcf9725150e42beeca42d085149f4c88fa97afd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012708-unease-remedy-8d5e@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
043862b09cc0 ("NFSD: Add documenting comment for nfsd4_release_lockowner()")
bd8fdb6e545f ("NFSD: Modernize nfsd4_release_lockowner()")
ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb(a)suse.de>
Date: Mon, 22 Jan 2024 14:58:16 +1100
Subject: [PATCH] nfsd: fix RELEASE_LOCKOWNER
The test on so_count in nfsd4_release_lockowner() is nonsense and
harmful. Revert to using check_for_locks(), changing that to not sleep.
First: harmful.
As is documented in the kdoc comment for nfsd4_release_lockowner(), the
test on so_count can transiently return a false positive resulting in a
return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
clearly a protocol violation and with the Linux NFS client it can cause
incorrect behaviour.
If RELEASE_LOCKOWNER is sent while some other thread is still
processing a LOCK request which failed because, at the time that request
was received, the given owner held a conflicting lock, then the nfsd
thread processing that LOCK request can hold a reference (conflock) to
the lock owner that causes nfsd4_release_lockowner() to return an
incorrect error.
The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
it knows that the error is impossible. It assumes the lock owner was in
fact released so it feels free to use the same lock owner identifier in
some later locking request.
When it does reuse a lock owner identifier for which a previous RELEASE
failed, it will naturally use a lock_seqid of zero. However the server,
which didn't release the lock owner, will expect a larger lock_seqid and
so will respond with NFS4ERR_BAD_SEQID.
So clearly it is harmful to allow a false positive, which testing
so_count allows.
The test is nonsense because ... well... it doesn't mean anything.
so_count is the sum of three different counts.
1/ the set of states listed on so_stateids
2/ the set of active vfs locks owned by any of those states
3/ various transient counts such as for conflicting locks.
When it is tested against '2' it is clear that one of these is the
transient reference obtained by find_lockowner_str_locked(). It is not
clear what the other one is expected to be.
In practice, the count is often 2 because there is precisely one state
on so_stateids. If there were more, this would fail.
In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
all the lock states being removed, and so the lockowner being discarded
(it is removed when there are no more references which usually happens
when the lock state is discarded). When nfsd4_release_lockowner() finds
that the lock owner doesn't exist, it returns success.
The other case shows an so_count of '2' and precisely one state listed
in so_stateid. It appears that the Linux client uses a separate lock
owner for each file resulting in one lock state per lock owner, so this
test on '2' is safe. For another client it might not be safe.
So this patch changes check_for_locks() to use the (newish)
find_any_file_locked() so that it doesn't take a reference on the
nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
this check is it safe to restore the use of check_for_locks() rather
than testing so_count against the mysterious '2'.
Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
Signed-off-by: NeilBrown <neilb(a)suse.de>
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Cc: stable(a)vger.kernel.org # v6.2+
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2fa54cfd4882..6dc6340e2852 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct nfsd_file *nf = find_any_file(fp);
+ struct nfsd_file *nf;
struct inode *inode;
struct file_lock_context *flctx;
+ spin_lock(&fp->fi_lock);
+ nf = find_any_file_locked(fp);
if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
- return status;
+ goto out;
}
inode = file_inode(nf->nf_file);
@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
spin_unlock(&flctx->flc_lock);
}
- nfsd_file_put(nf);
+out:
+ spin_unlock(&fp->fi_lock);
return status;
}
@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * The lockowner's so_count is bumped when a lock record is added
- * or when copying a conflicting lock. The latter case is brief,
- * but can lead to fleeting false positives when looking for
- * locks-in-use.
+ * Check if theree are any locks still held and if not - free the lockowner
+ * and any lock state that is owned.
*
* Return values:
* %nfs_ok: lockowner released or not found
@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
spin_unlock(&clp->cl_lock);
return nfs_ok;
}
- if (atomic_read(&lo->lo_owner.so_count) != 2) {
- spin_unlock(&clp->cl_lock);
- nfs4_put_stateowner(&lo->lo_owner);
- return nfserr_locks_held;
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
+ }
}
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x edcf9725150e42beeca42d085149f4c88fa97afd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012705-lid-broadband-fb10@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
043862b09cc0 ("NFSD: Add documenting comment for nfsd4_release_lockowner()")
bd8fdb6e545f ("NFSD: Modernize nfsd4_release_lockowner()")
ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb(a)suse.de>
Date: Mon, 22 Jan 2024 14:58:16 +1100
Subject: [PATCH] nfsd: fix RELEASE_LOCKOWNER
The test on so_count in nfsd4_release_lockowner() is nonsense and
harmful. Revert to using check_for_locks(), changing that to not sleep.
First: harmful.
As is documented in the kdoc comment for nfsd4_release_lockowner(), the
test on so_count can transiently return a false positive resulting in a
return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
clearly a protocol violation and with the Linux NFS client it can cause
incorrect behaviour.
If RELEASE_LOCKOWNER is sent while some other thread is still
processing a LOCK request which failed because, at the time that request
was received, the given owner held a conflicting lock, then the nfsd
thread processing that LOCK request can hold a reference (conflock) to
the lock owner that causes nfsd4_release_lockowner() to return an
incorrect error.
The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
it knows that the error is impossible. It assumes the lock owner was in
fact released so it feels free to use the same lock owner identifier in
some later locking request.
When it does reuse a lock owner identifier for which a previous RELEASE
failed, it will naturally use a lock_seqid of zero. However the server,
which didn't release the lock owner, will expect a larger lock_seqid and
so will respond with NFS4ERR_BAD_SEQID.
So clearly it is harmful to allow a false positive, which testing
so_count allows.
The test is nonsense because ... well... it doesn't mean anything.
so_count is the sum of three different counts.
1/ the set of states listed on so_stateids
2/ the set of active vfs locks owned by any of those states
3/ various transient counts such as for conflicting locks.
When it is tested against '2' it is clear that one of these is the
transient reference obtained by find_lockowner_str_locked(). It is not
clear what the other one is expected to be.
In practice, the count is often 2 because there is precisely one state
on so_stateids. If there were more, this would fail.
In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
all the lock states being removed, and so the lockowner being discarded
(it is removed when there are no more references which usually happens
when the lock state is discarded). When nfsd4_release_lockowner() finds
that the lock owner doesn't exist, it returns success.
The other case shows an so_count of '2' and precisely one state listed
in so_stateid. It appears that the Linux client uses a separate lock
owner for each file resulting in one lock state per lock owner, so this
test on '2' is safe. For another client it might not be safe.
So this patch changes check_for_locks() to use the (newish)
find_any_file_locked() so that it doesn't take a reference on the
nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
this check is it safe to restore the use of check_for_locks() rather
than testing so_count against the mysterious '2'.
Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
Signed-off-by: NeilBrown <neilb(a)suse.de>
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Cc: stable(a)vger.kernel.org # v6.2+
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2fa54cfd4882..6dc6340e2852 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct nfsd_file *nf = find_any_file(fp);
+ struct nfsd_file *nf;
struct inode *inode;
struct file_lock_context *flctx;
+ spin_lock(&fp->fi_lock);
+ nf = find_any_file_locked(fp);
if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
- return status;
+ goto out;
}
inode = file_inode(nf->nf_file);
@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
spin_unlock(&flctx->flc_lock);
}
- nfsd_file_put(nf);
+out:
+ spin_unlock(&fp->fi_lock);
return status;
}
@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * The lockowner's so_count is bumped when a lock record is added
- * or when copying a conflicting lock. The latter case is brief,
- * but can lead to fleeting false positives when looking for
- * locks-in-use.
+ * Check if theree are any locks still held and if not - free the lockowner
+ * and any lock state that is owned.
*
* Return values:
* %nfs_ok: lockowner released or not found
@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
spin_unlock(&clp->cl_lock);
return nfs_ok;
}
- if (atomic_read(&lo->lo_owner.so_count) != 2) {
- spin_unlock(&clp->cl_lock);
- nfs4_put_stateowner(&lo->lo_owner);
- return nfserr_locks_held;
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
+ }
}
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x edcf9725150e42beeca42d085149f4c88fa97afd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012701-catfight-regular-bc98@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
043862b09cc0 ("NFSD: Add documenting comment for nfsd4_release_lockowner()")
bd8fdb6e545f ("NFSD: Modernize nfsd4_release_lockowner()")
ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb(a)suse.de>
Date: Mon, 22 Jan 2024 14:58:16 +1100
Subject: [PATCH] nfsd: fix RELEASE_LOCKOWNER
The test on so_count in nfsd4_release_lockowner() is nonsense and
harmful. Revert to using check_for_locks(), changing that to not sleep.
First: harmful.
As is documented in the kdoc comment for nfsd4_release_lockowner(), the
test on so_count can transiently return a false positive resulting in a
return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
clearly a protocol violation and with the Linux NFS client it can cause
incorrect behaviour.
If RELEASE_LOCKOWNER is sent while some other thread is still
processing a LOCK request which failed because, at the time that request
was received, the given owner held a conflicting lock, then the nfsd
thread processing that LOCK request can hold a reference (conflock) to
the lock owner that causes nfsd4_release_lockowner() to return an
incorrect error.
The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
it knows that the error is impossible. It assumes the lock owner was in
fact released so it feels free to use the same lock owner identifier in
some later locking request.
When it does reuse a lock owner identifier for which a previous RELEASE
failed, it will naturally use a lock_seqid of zero. However the server,
which didn't release the lock owner, will expect a larger lock_seqid and
so will respond with NFS4ERR_BAD_SEQID.
So clearly it is harmful to allow a false positive, which testing
so_count allows.
The test is nonsense because ... well... it doesn't mean anything.
so_count is the sum of three different counts.
1/ the set of states listed on so_stateids
2/ the set of active vfs locks owned by any of those states
3/ various transient counts such as for conflicting locks.
When it is tested against '2' it is clear that one of these is the
transient reference obtained by find_lockowner_str_locked(). It is not
clear what the other one is expected to be.
In practice, the count is often 2 because there is precisely one state
on so_stateids. If there were more, this would fail.
In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
all the lock states being removed, and so the lockowner being discarded
(it is removed when there are no more references which usually happens
when the lock state is discarded). When nfsd4_release_lockowner() finds
that the lock owner doesn't exist, it returns success.
The other case shows an so_count of '2' and precisely one state listed
in so_stateid. It appears that the Linux client uses a separate lock
owner for each file resulting in one lock state per lock owner, so this
test on '2' is safe. For another client it might not be safe.
So this patch changes check_for_locks() to use the (newish)
find_any_file_locked() so that it doesn't take a reference on the
nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
this check is it safe to restore the use of check_for_locks() rather
than testing so_count against the mysterious '2'.
Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
Signed-off-by: NeilBrown <neilb(a)suse.de>
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Cc: stable(a)vger.kernel.org # v6.2+
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2fa54cfd4882..6dc6340e2852 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct nfsd_file *nf = find_any_file(fp);
+ struct nfsd_file *nf;
struct inode *inode;
struct file_lock_context *flctx;
+ spin_lock(&fp->fi_lock);
+ nf = find_any_file_locked(fp);
if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
- return status;
+ goto out;
}
inode = file_inode(nf->nf_file);
@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
spin_unlock(&flctx->flc_lock);
}
- nfsd_file_put(nf);
+out:
+ spin_unlock(&fp->fi_lock);
return status;
}
@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * The lockowner's so_count is bumped when a lock record is added
- * or when copying a conflicting lock. The latter case is brief,
- * but can lead to fleeting false positives when looking for
- * locks-in-use.
+ * Check if theree are any locks still held and if not - free the lockowner
+ * and any lock state that is owned.
*
* Return values:
* %nfs_ok: lockowner released or not found
@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
spin_unlock(&clp->cl_lock);
return nfs_ok;
}
- if (atomic_read(&lo->lo_owner.so_count) != 2) {
- spin_unlock(&clp->cl_lock);
- nfs4_put_stateowner(&lo->lo_owner);
- return nfserr_locks_held;
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
+ }
}
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 02444f2ac26eae6385a65fcd66915084d15dffba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012707-pushover-sherry-f45f@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
02444f2ac26e ("btrfs: zoned: optimize hint byte for zoned allocator")
b271fee9a41c ("btrfs: zoned: factor out prepare_allocation_zoned()")
c2707a255623 ("btrfs: zoned: add a dedicated data relocation block group")
be1a1d7a5d24 ("btrfs: zoned: finish fully written block group")
a85f05e59bc1 ("btrfs: zoned: avoid chunk allocation if active block group has enough space")
a12b0dc0aa4d ("btrfs: move ffe_ctl one level up")
2e654e4bb9ac ("btrfs: zoned: activate block group on allocation")
afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
dafc340dbd10 ("btrfs: zoned: introduce physical_map to btrfs_block_group")
98173255bddd ("btrfs: zoned: calculate free space from zone capacity")
8eae532be753 ("btrfs: zoned: load zone capacity information from devices")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 02444f2ac26eae6385a65fcd66915084d15dffba Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Tue, 19 Dec 2023 01:02:29 +0900
Subject: [PATCH] btrfs: zoned: optimize hint byte for zoned allocator
Writing sequentially to a huge file on btrfs on a SMR HDD revealed a
decline of the performance (220 MiB/s to 30 MiB/s after 500 minutes).
The performance goes down because of increased latency of the extent
allocation, which is induced by a traversing of a lot of full block groups.
So, this patch optimizes the ffe_ctl->hint_byte by choosing a block group
with sufficient size from the active block group list, which does not
contain full block groups.
After applying the patch, the performance is maintained well.
Fixes: 2eda57089ea3 ("btrfs: zoned: implement sequential extent allocation")
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d260b970bec7..6d680031211a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4311,6 +4311,24 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
if (fs_info->data_reloc_bg)
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
spin_unlock(&fs_info->relocation_bg_lock);
+ } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
+ struct btrfs_block_group *block_group;
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ /*
+ * No lock is OK here because avail is monotinically
+ * decreasing, and this is just a hint.
+ */
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
}
return 0;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 02444f2ac26eae6385a65fcd66915084d15dffba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012705-replace-mumble-78cc@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
02444f2ac26e ("btrfs: zoned: optimize hint byte for zoned allocator")
b271fee9a41c ("btrfs: zoned: factor out prepare_allocation_zoned()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 02444f2ac26eae6385a65fcd66915084d15dffba Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Tue, 19 Dec 2023 01:02:29 +0900
Subject: [PATCH] btrfs: zoned: optimize hint byte for zoned allocator
Writing sequentially to a huge file on btrfs on a SMR HDD revealed a
decline of the performance (220 MiB/s to 30 MiB/s after 500 minutes).
The performance goes down because of increased latency of the extent
allocation, which is induced by a traversing of a lot of full block groups.
So, this patch optimizes the ffe_ctl->hint_byte by choosing a block group
with sufficient size from the active block group list, which does not
contain full block groups.
After applying the patch, the performance is maintained well.
Fixes: 2eda57089ea3 ("btrfs: zoned: implement sequential extent allocation")
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d260b970bec7..6d680031211a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4311,6 +4311,24 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
if (fs_info->data_reloc_bg)
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
spin_unlock(&fs_info->relocation_bg_lock);
+ } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
+ struct btrfs_block_group *block_group;
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ /*
+ * No lock is OK here because avail is monotinically
+ * decreasing, and this is just a hint.
+ */
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
}
return 0;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 02444f2ac26eae6385a65fcd66915084d15dffba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012704-outclass-seventy-a7bb@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
02444f2ac26e ("btrfs: zoned: optimize hint byte for zoned allocator")
b271fee9a41c ("btrfs: zoned: factor out prepare_allocation_zoned()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 02444f2ac26eae6385a65fcd66915084d15dffba Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Tue, 19 Dec 2023 01:02:29 +0900
Subject: [PATCH] btrfs: zoned: optimize hint byte for zoned allocator
Writing sequentially to a huge file on btrfs on a SMR HDD revealed a
decline of the performance (220 MiB/s to 30 MiB/s after 500 minutes).
The performance goes down because of increased latency of the extent
allocation, which is induced by a traversing of a lot of full block groups.
So, this patch optimizes the ffe_ctl->hint_byte by choosing a block group
with sufficient size from the active block group list, which does not
contain full block groups.
After applying the patch, the performance is maintained well.
Fixes: 2eda57089ea3 ("btrfs: zoned: implement sequential extent allocation")
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d260b970bec7..6d680031211a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4311,6 +4311,24 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
if (fs_info->data_reloc_bg)
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
spin_unlock(&fs_info->relocation_bg_lock);
+ } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
+ struct btrfs_block_group *block_group;
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ /*
+ * No lock is OK here because avail is monotinically
+ * decreasing, and this is just a hint.
+ */
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
}
return 0;
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 02444f2ac26eae6385a65fcd66915084d15dffba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012702-algorithm-mongoose-277c@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
02444f2ac26e ("btrfs: zoned: optimize hint byte for zoned allocator")
b271fee9a41c ("btrfs: zoned: factor out prepare_allocation_zoned()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 02444f2ac26eae6385a65fcd66915084d15dffba Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Tue, 19 Dec 2023 01:02:29 +0900
Subject: [PATCH] btrfs: zoned: optimize hint byte for zoned allocator
Writing sequentially to a huge file on btrfs on a SMR HDD revealed a
decline of the performance (220 MiB/s to 30 MiB/s after 500 minutes).
The performance goes down because of increased latency of the extent
allocation, which is induced by a traversing of a lot of full block groups.
So, this patch optimizes the ffe_ctl->hint_byte by choosing a block group
with sufficient size from the active block group list, which does not
contain full block groups.
After applying the patch, the performance is maintained well.
Fixes: 2eda57089ea3 ("btrfs: zoned: implement sequential extent allocation")
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d260b970bec7..6d680031211a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4311,6 +4311,24 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
if (fs_info->data_reloc_bg)
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
spin_unlock(&fs_info->relocation_bg_lock);
+ } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
+ struct btrfs_block_group *block_group;
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ /*
+ * No lock is OK here because avail is monotinically
+ * decreasing, and this is just a hint.
+ */
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
}
return 0;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 7081929ab2572920e94d70be3d332e5c9f97095a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012750-shore-gradually-d4bb@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
7081929ab257 ("btrfs: don't abort filesystem when attempting to snapshot deleted subvolume")
813febdbe6c9 ("btrfs: disable snapshot creation/deletion for extent tree v2")
4467af880929 ("btrfs: remove root argument from btrfs_unlink_inode()")
bd54f381a12a ("btrfs: do not pin logs too early during renames")
9a56fcd15a9c ("btrfs: make btrfs_update_inode take btrfs_inode")
76aea5379678 ("btrfs: make btrfs_inode_safe_disk_i_size_write take btrfs_inode")
2766ff61762c ("btrfs: update the number of bytes used by an inode atomically")
5893dfb98f25 ("btrfs: refactor btrfs_drop_extents() to make it easier to extend")
ac5887c8e013 ("btrfs: locking: remove all the blocking helpers")
a14b78ad06ab ("btrfs: introduce btrfs_inode_lock()/unlock()")
b8d8e1fd570a ("btrfs: introduce btrfs_write_check()")
c86537a42f86 ("btrfs: check FS error state bit early during write")
5e8b9ef30392 ("btrfs: move pos increment and pagecache extension to btrfs_buffered_write")
4e4cabece9f9 ("btrfs: split btrfs_direct_IO to read and write")
196d59ab9ccc ("btrfs: switch extent buffer tree lock to rw_semaphore")
0425e7badbdc ("btrfs: don't fallback to buffered read if we don't need to")
3c38c877fcb9 ("btrfs: sink inode argument in insert_ordered_extent_file_extent")
fc0d82e103c7 ("btrfs: sink total_data parameter in setup_items_for_insert")
3dc9dc8969dc ("btrfs: eliminate total_size parameter from setup_items_for_insert")
0cbb5bdfea26 ("btrfs: rename btrfs_insert_clone_extent() to a more generic name")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7081929ab2572920e94d70be3d332e5c9f97095a Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Thu, 4 Jan 2024 11:48:46 -0800
Subject: [PATCH] btrfs: don't abort filesystem when attempting to snapshot
deleted subvolume
If the source file descriptor to the snapshot ioctl refers to a deleted
subvolume, we get the following abort:
BTRFS: Transaction aborted (error -2)
WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs]
Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c
CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014
RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs]
RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027
RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840
RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998
R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe
R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80
FS: 00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0
Call Trace:
<TASK>
? create_pending_snapshot+0x1040/0x1190 [btrfs]
? __warn+0x81/0x130
? create_pending_snapshot+0x1040/0x1190 [btrfs]
? report_bug+0x171/0x1a0
? handle_bug+0x3a/0x70
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? create_pending_snapshot+0x1040/0x1190 [btrfs]
? create_pending_snapshot+0x1040/0x1190 [btrfs]
create_pending_snapshots+0x92/0xc0 [btrfs]
btrfs_commit_transaction+0x66b/0xf40 [btrfs]
btrfs_mksubvol+0x301/0x4d0 [btrfs]
btrfs_mksnapshot+0x80/0xb0 [btrfs]
__btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs]
btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs]
btrfs_ioctl+0x8a6/0x2650 [btrfs]
? kmem_cache_free+0x22/0x340
? do_sys_openat2+0x97/0xe0
__x64_sys_ioctl+0x97/0xd0
do_syscall_64+0x46/0xf0
entry_SYSCALL_64_after_hwframe+0x6e/0x76
RIP: 0033:0x7fe20abe83af
RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af
RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003
RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58
</TASK>
---[ end trace 0000000000000000 ]---
BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry
BTRFS info (device vdc: state EA): forced readonly
BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction.
BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry
This happens because create_pending_snapshot() initializes the new root
item as a copy of the source root item. This includes the refs field,
which is 0 for a deleted subvolume. The call to btrfs_insert_root()
therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then
finds the root and returns -ENOENT if refs == 0, which causes
create_pending_snapshot() to abort.
Fix it by checking the source root's refs before attempting the
snapshot, but after locking subvol_sem to avoid racing with deletion.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4e50b62db2a8..fea5d37528b8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
return -EOPNOTSUPP;
}
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return -ENOENT;
+
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7081929ab2572920e94d70be3d332e5c9f97095a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012747-marshland-overcoat-1e01@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
7081929ab257 ("btrfs: don't abort filesystem when attempting to snapshot deleted subvolume")
813febdbe6c9 ("btrfs: disable snapshot creation/deletion for extent tree v2")
4467af880929 ("btrfs: remove root argument from btrfs_unlink_inode()")
bd54f381a12a ("btrfs: do not pin logs too early during renames")
9a56fcd15a9c ("btrfs: make btrfs_update_inode take btrfs_inode")
76aea5379678 ("btrfs: make btrfs_inode_safe_disk_i_size_write take btrfs_inode")
2766ff61762c ("btrfs: update the number of bytes used by an inode atomically")
5893dfb98f25 ("btrfs: refactor btrfs_drop_extents() to make it easier to extend")
ac5887c8e013 ("btrfs: locking: remove all the blocking helpers")
a14b78ad06ab ("btrfs: introduce btrfs_inode_lock()/unlock()")
b8d8e1fd570a ("btrfs: introduce btrfs_write_check()")
c86537a42f86 ("btrfs: check FS error state bit early during write")
5e8b9ef30392 ("btrfs: move pos increment and pagecache extension to btrfs_buffered_write")
4e4cabece9f9 ("btrfs: split btrfs_direct_IO to read and write")
196d59ab9ccc ("btrfs: switch extent buffer tree lock to rw_semaphore")
0425e7badbdc ("btrfs: don't fallback to buffered read if we don't need to")
3c38c877fcb9 ("btrfs: sink inode argument in insert_ordered_extent_file_extent")
fc0d82e103c7 ("btrfs: sink total_data parameter in setup_items_for_insert")
3dc9dc8969dc ("btrfs: eliminate total_size parameter from setup_items_for_insert")
0cbb5bdfea26 ("btrfs: rename btrfs_insert_clone_extent() to a more generic name")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7081929ab2572920e94d70be3d332e5c9f97095a Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Thu, 4 Jan 2024 11:48:46 -0800
Subject: [PATCH] btrfs: don't abort filesystem when attempting to snapshot
deleted subvolume
If the source file descriptor to the snapshot ioctl refers to a deleted
subvolume, we get the following abort:
BTRFS: Transaction aborted (error -2)
WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs]
Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c
CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014
RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs]
RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027
RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840
RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998
R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe
R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80
FS: 00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0
Call Trace:
<TASK>
? create_pending_snapshot+0x1040/0x1190 [btrfs]
? __warn+0x81/0x130
? create_pending_snapshot+0x1040/0x1190 [btrfs]
? report_bug+0x171/0x1a0
? handle_bug+0x3a/0x70
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? create_pending_snapshot+0x1040/0x1190 [btrfs]
? create_pending_snapshot+0x1040/0x1190 [btrfs]
create_pending_snapshots+0x92/0xc0 [btrfs]
btrfs_commit_transaction+0x66b/0xf40 [btrfs]
btrfs_mksubvol+0x301/0x4d0 [btrfs]
btrfs_mksnapshot+0x80/0xb0 [btrfs]
__btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs]
btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs]
btrfs_ioctl+0x8a6/0x2650 [btrfs]
? kmem_cache_free+0x22/0x340
? do_sys_openat2+0x97/0xe0
__x64_sys_ioctl+0x97/0xd0
do_syscall_64+0x46/0xf0
entry_SYSCALL_64_after_hwframe+0x6e/0x76
RIP: 0033:0x7fe20abe83af
RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af
RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003
RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58
</TASK>
---[ end trace 0000000000000000 ]---
BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry
BTRFS info (device vdc: state EA): forced readonly
BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction.
BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry
This happens because create_pending_snapshot() initializes the new root
item as a copy of the source root item. This includes the refs field,
which is 0 for a deleted subvolume. The call to btrfs_insert_root()
therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then
finds the root and returns -ENOENT if refs == 0, which causes
create_pending_snapshot() to abort.
Fix it by checking the source root's refs before attempting the
snapshot, but after locking subvol_sem to avoid racing with deletion.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4e50b62db2a8..fea5d37528b8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
return -EOPNOTSUPP;
}
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return -ENOENT;
+
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 3324d0547861b16cf436d54abba7052e0c8aa9de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012738-abdomen-crimson-3018@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
3324d0547861 ("btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted")
60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
dd0734f2a866 ("btrfs: fix race between swap file activation and snapshot creation")
ee0d904fd9c5 ("btrfs: remove err variable from btrfs_delete_subvolume")
c3e1f96c37d0 ("btrfs: enumerate the type of exclusive operation in progress")
e85fde5162bf ("btrfs: qgroup: fix qgroup meta rsv leak for subvolume operations")
adca4d945c8d ("btrfs: qgroup: remove ASYNC_COMMIT mechanism in favor of reserve retry-after-EDQUOT")
c11fbb6ed0dd ("btrfs: reduce lock contention when creating snapshot")
63f018be577f ("btrfs: Remove __ prefix from btrfs_block_rsv_release")
dcc3eb9638c3 ("btrfs: convert snapshot/nocow exlcusion to drew lock")
0024652895e3 ("btrfs: rename btrfs_put_fs_root and btrfs_grab_fs_root")
bd647ce385ec ("btrfs: add a leak check for roots")
8260edba67a2 ("btrfs: make the init of static elements in fs_info separate")
ae18c37ad5a1 ("btrfs: move fs_info init work into it's own helper function")
141386e1a5d6 ("btrfs: free more things in btrfs_free_fs_info")
bc44d7c4b2b1 ("btrfs: push btrfs_grab_fs_root into btrfs_get_fs_root")
81f096edf047 ("btrfs: use btrfs_put_fs_root to free roots always")
0d4b0463011d ("btrfs: export and rename free_fs_info")
fbb0ce40d606 ("btrfs: hold a ref on the root in btrfs_check_uuid_tree_entry")
ca2037fba6af ("btrfs: hold a ref on the root in btrfs_recover_log_trees")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Thu, 4 Jan 2024 11:48:47 -0800
Subject: [PATCH] btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot
of subvolume being deleted
Sweet Tea spotted a race between subvolume deletion and snapshotting
that can result in the root item for the snapshot having the
BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:
Thread 1 | Thread 2
----------------------------------------------|----------
btrfs_delete_subvolume |
btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
|btrfs_mksubvol
| down_read(subvol_sem)
| create_snapshot
| ...
| create_pending_snapshot
| copy root item from source
down_write(subvol_sem) |
This flag is only checked in send and swap activate, which this would
cause to fail mysteriously.
create_snapshot() now checks the root refs to reject a deleted
subvolume, so we can fix this by locking subvol_sem earlier so that the
BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.
CC: stable(a)vger.kernel.org # 4.14+
Reported-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3e39610cc95..7bcc1c03437a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
u64 root_flags;
int ret;
+ down_write(&fs_info->subvol_sem);
+
/*
* Don't allow to delete a subvolume with send in progress. This is
* inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- down_write(&fs_info->subvol_sem);
-
ret = may_destroy_subvol(dest);
if (ret)
- goto out_up_write;
+ goto out_undead;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
- goto out_up_write;
+ goto out_undead;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
+out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- } else {
+ }
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (!ret) {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3324d0547861b16cf436d54abba7052e0c8aa9de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012736-narrow-thievish-cc02@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
3324d0547861 ("btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted")
60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
dd0734f2a866 ("btrfs: fix race between swap file activation and snapshot creation")
ee0d904fd9c5 ("btrfs: remove err variable from btrfs_delete_subvolume")
c3e1f96c37d0 ("btrfs: enumerate the type of exclusive operation in progress")
e85fde5162bf ("btrfs: qgroup: fix qgroup meta rsv leak for subvolume operations")
adca4d945c8d ("btrfs: qgroup: remove ASYNC_COMMIT mechanism in favor of reserve retry-after-EDQUOT")
c11fbb6ed0dd ("btrfs: reduce lock contention when creating snapshot")
63f018be577f ("btrfs: Remove __ prefix from btrfs_block_rsv_release")
dcc3eb9638c3 ("btrfs: convert snapshot/nocow exlcusion to drew lock")
0024652895e3 ("btrfs: rename btrfs_put_fs_root and btrfs_grab_fs_root")
bd647ce385ec ("btrfs: add a leak check for roots")
8260edba67a2 ("btrfs: make the init of static elements in fs_info separate")
ae18c37ad5a1 ("btrfs: move fs_info init work into it's own helper function")
141386e1a5d6 ("btrfs: free more things in btrfs_free_fs_info")
bc44d7c4b2b1 ("btrfs: push btrfs_grab_fs_root into btrfs_get_fs_root")
81f096edf047 ("btrfs: use btrfs_put_fs_root to free roots always")
0d4b0463011d ("btrfs: export and rename free_fs_info")
fbb0ce40d606 ("btrfs: hold a ref on the root in btrfs_check_uuid_tree_entry")
ca2037fba6af ("btrfs: hold a ref on the root in btrfs_recover_log_trees")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Thu, 4 Jan 2024 11:48:47 -0800
Subject: [PATCH] btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot
of subvolume being deleted
Sweet Tea spotted a race between subvolume deletion and snapshotting
that can result in the root item for the snapshot having the
BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:
Thread 1 | Thread 2
----------------------------------------------|----------
btrfs_delete_subvolume |
btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
|btrfs_mksubvol
| down_read(subvol_sem)
| create_snapshot
| ...
| create_pending_snapshot
| copy root item from source
down_write(subvol_sem) |
This flag is only checked in send and swap activate, which this would
cause to fail mysteriously.
create_snapshot() now checks the root refs to reject a deleted
subvolume, so we can fix this by locking subvol_sem earlier so that the
BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.
CC: stable(a)vger.kernel.org # 4.14+
Reported-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3e39610cc95..7bcc1c03437a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
u64 root_flags;
int ret;
+ down_write(&fs_info->subvol_sem);
+
/*
* Don't allow to delete a subvolume with send in progress. This is
* inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- down_write(&fs_info->subvol_sem);
-
ret = may_destroy_subvol(dest);
if (ret)
- goto out_up_write;
+ goto out_undead;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
- goto out_up_write;
+ goto out_undead;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
+out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- } else {
+ }
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (!ret) {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 3324d0547861b16cf436d54abba7052e0c8aa9de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012735-gentile-overture-2afa@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
3324d0547861 ("btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted")
60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
dd0734f2a866 ("btrfs: fix race between swap file activation and snapshot creation")
ee0d904fd9c5 ("btrfs: remove err variable from btrfs_delete_subvolume")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Thu, 4 Jan 2024 11:48:47 -0800
Subject: [PATCH] btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot
of subvolume being deleted
Sweet Tea spotted a race between subvolume deletion and snapshotting
that can result in the root item for the snapshot having the
BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:
Thread 1 | Thread 2
----------------------------------------------|----------
btrfs_delete_subvolume |
btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
|btrfs_mksubvol
| down_read(subvol_sem)
| create_snapshot
| ...
| create_pending_snapshot
| copy root item from source
down_write(subvol_sem) |
This flag is only checked in send and swap activate, which this would
cause to fail mysteriously.
create_snapshot() now checks the root refs to reject a deleted
subvolume, so we can fix this by locking subvol_sem earlier so that the
BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.
CC: stable(a)vger.kernel.org # 4.14+
Reported-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3e39610cc95..7bcc1c03437a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
u64 root_flags;
int ret;
+ down_write(&fs_info->subvol_sem);
+
/*
* Don't allow to delete a subvolume with send in progress. This is
* inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- down_write(&fs_info->subvol_sem);
-
ret = may_destroy_subvol(dest);
if (ret)
- goto out_up_write;
+ goto out_undead;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
- goto out_up_write;
+ goto out_undead;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
+out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- } else {
+ }
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (!ret) {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3324d0547861b16cf436d54abba7052e0c8aa9de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012733-expert-landlady-ce9c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
3324d0547861 ("btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted")
60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov(a)fb.com>
Date: Thu, 4 Jan 2024 11:48:47 -0800
Subject: [PATCH] btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot
of subvolume being deleted
Sweet Tea spotted a race between subvolume deletion and snapshotting
that can result in the root item for the snapshot having the
BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:
Thread 1 | Thread 2
----------------------------------------------|----------
btrfs_delete_subvolume |
btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
|btrfs_mksubvol
| down_read(subvol_sem)
| create_snapshot
| ...
| create_pending_snapshot
| copy root item from source
down_write(subvol_sem) |
This flag is only checked in send and swap activate, which this would
cause to fail mysteriously.
create_snapshot() now checks the root refs to reject a deleted
subvolume, so we can fix this by locking subvol_sem earlier so that the
BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.
CC: stable(a)vger.kernel.org # 4.14+
Reported-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me>
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3e39610cc95..7bcc1c03437a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
u64 root_flags;
int ret;
+ down_write(&fs_info->subvol_sem);
+
/*
* Don't allow to delete a subvolume with send in progress. This is
* inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- down_write(&fs_info->subvol_sem);
-
ret = may_destroy_subvol(dest);
if (ret)
- goto out_up_write;
+ goto out_undead;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
- goto out_up_write;
+ goto out_undead;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
+out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- } else {
+ }
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (!ret) {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f546c4282673497a06ecb6190b50ae7f6c85b02f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012740-mating-boxing-dd93@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
f546c4282673 ("btrfs: scrub: avoid use-after-free when chunk length is not 64K aligned")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f546c4282673497a06ecb6190b50ae7f6c85b02f Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 17 Jan 2024 11:02:25 +1030
Subject: [PATCH] btrfs: scrub: avoid use-after-free when chunk length is not
64K aligned
[BUG]
There is a bug report that, on a ext4-converted btrfs, scrub leads to
various problems, including:
- "unable to find chunk map" errors
BTRFS info (device vdb): scrub: started on devid 1
BTRFS critical (device vdb): unable to find chunk map for logical 2214744064 length 4096
BTRFS critical (device vdb): unable to find chunk map for logical 2214744064 length 45056
This would lead to unrepariable errors.
- Use-after-free KASAN reports:
==================================================================
BUG: KASAN: slab-use-after-free in __blk_rq_map_sg+0x18f/0x7c0
Read of size 8 at addr ffff8881013c9040 by task btrfs/909
CPU: 0 PID: 909 Comm: btrfs Not tainted 6.7.0-x64v3-dbg #11 c50636e9419a8354555555245df535e380563b2b
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 2023.11-2 12/24/2023
Call Trace:
<TASK>
dump_stack_lvl+0x43/0x60
print_report+0xcf/0x640
kasan_report+0xa6/0xd0
__blk_rq_map_sg+0x18f/0x7c0
virtblk_prep_rq.isra.0+0x215/0x6a0 [virtio_blk 19a65eeee9ae6fcf02edfad39bb9ddee07dcdaff]
virtio_queue_rqs+0xc4/0x310 [virtio_blk 19a65eeee9ae6fcf02edfad39bb9ddee07dcdaff]
blk_mq_flush_plug_list.part.0+0x780/0x860
__blk_flush_plug+0x1ba/0x220
blk_finish_plug+0x3b/0x60
submit_initial_group_read+0x10a/0x290 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
flush_scrub_stripes+0x38e/0x430 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
scrub_stripe+0x82a/0xae0 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
scrub_chunk+0x178/0x200 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
scrub_enumerate_chunks+0x4bc/0xa30 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
btrfs_scrub_dev+0x398/0x810 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
btrfs_ioctl+0x4b9/0x3020 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965]
__x64_sys_ioctl+0xbd/0x100
do_syscall_64+0x5d/0xe0
entry_SYSCALL_64_after_hwframe+0x63/0x6b
RIP: 0033:0x7f47e5e0952b
- Crash, mostly due to above use-after-free
[CAUSE]
The converted fs has the following data chunk layout:
item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 2214658048) itemoff 16025 itemsize 80
length 86016 owner 2 stripe_len 65536 type DATA|single
For above logical bytenr 2214744064, it's at the chunk end
(2214658048 + 86016 = 2214744064).
This means btrfs_submit_bio() would split the bio, and trigger endio
function for both of the two halves.
However scrub_submit_initial_read() would only expect the endio function
to be called once, not any more.
This means the first endio function would already free the bbio::bio,
leaving the bvec freed, thus the 2nd endio call would lead to
use-after-free.
[FIX]
- Make sure scrub_read_endio() only updates bits in its range
Since we may read less than 64K at the end of the chunk, we should not
touch the bits beyond chunk boundary.
- Make sure scrub_submit_initial_read() only to read the chunk range
This is done by calculating the real number of sectors we need to
read, and add sector-by-sector to the bio.
Thankfully the scrub read repair path won't need extra fixes:
- scrub_stripe_submit_repair_read()
With above fixes, we won't update error bit for range beyond chunk,
thus scrub_stripe_submit_repair_read() should never submit any read
beyond the chunk.
Reported-by: Rongrong <i(a)rong.moe>
Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure")
Tested-by: Rongrong <i(a)rong.moe>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a01807cbd4d4..2d81b1a18a04 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1098,12 +1098,22 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
static void scrub_read_endio(struct btrfs_bio *bbio)
{
struct scrub_stripe *stripe = bbio->private;
+ struct bio_vec *bvec;
+ int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
+ int num_sectors;
+ u32 bio_size = 0;
+ int i;
+
+ ASSERT(sector_nr < stripe->nr_sectors);
+ bio_for_each_bvec_all(bvec, &bbio->bio, i)
+ bio_size += bvec->bv_len;
+ num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
if (bbio->bio.bi_status) {
- bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
- bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors);
+ bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors);
+ bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors);
} else {
- bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
+ bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors);
}
bio_put(&bbio->bio);
if (atomic_dec_and_test(&stripe->pending_io)) {
@@ -1701,6 +1711,9 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_bio *bbio;
+ unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
+ stripe->bg->length - stripe->logical) >>
+ fs_info->sectorsize_bits;
int mirror = stripe->mirror_num;
ASSERT(stripe->bg);
@@ -1715,14 +1728,16 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
scrub_read_endio, stripe);
- /* Read the whole stripe. */
bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
- for (int i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) {
+ /* Read the whole range inside the chunk boundary. */
+ for (unsigned int cur = 0; cur < nr_sectors; cur++) {
+ struct page *page = scrub_stripe_get_page(stripe, cur);
+ unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur);
int ret;
- ret = bio_add_page(&bbio->bio, stripe->pages[i], PAGE_SIZE, 0);
+ ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
/* We should have allocated enough bio vectors. */
- ASSERT(ret == PAGE_SIZE);
+ ASSERT(ret == fs_info->sectorsize);
}
atomic_inc(&stripe->pending_io);
From: Filipe Manana <fdmanana(a)suse.com>
Here follows the backport of some directory related fixes for the stable
6.1 tree. I tested these on top of 6.1.75.
These were recently requested by a user for 5.15 stable:
https://lore.kernel.org/linux-btrfs/20240124225522.GA2614102@lxhi-087/
This request is to backport the same patches to 6.1, while the request
for 5.15 stabe is at:
https://lore.kernel.org/linux-btrfs/cover.1706183427.git.fdmanana@suse.com/
Filipe Manana (4):
btrfs: fix infinite directory reads
btrfs: set last dir index to the current last index when opening dir
btrfs: refresh dir last index during a rewinddir(3) call
btrfs: fix race between reading a directory and adding entries to it
fs/btrfs/ctree.h | 1 +
fs/btrfs/delayed-inode.c | 5 +-
fs/btrfs/delayed-inode.h | 1 +
fs/btrfs/inode.c | 150 +++++++++++++++++++++++++--------------
4 files changed, 102 insertions(+), 55 deletions(-)
--
2.40.1
Hi all,
This series resolves two independent but related issues that were
recently exposed by two LLVM changes.
https://github.com/llvm/llvm-project/commit/ec92d74a0ef89b9dd46aee6ec8aca6b…
exposes that '-no-pie' is not getting added to the linker flags with
clang, resulting in building objects with '-fno-PIE' that are linked
with '-pie', to which the linker rightfully errors with:
/usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE
/usr/sbin/ld: failed to set dynamic section sizes: bad value
https://github.com/llvm/llvm-project/commit/4bf8a688956a759b7b6b8d94f42d25c…
adds '.ltext' (and '.ltext.*' with '-ffunction-sections') when using
'-mcmodel=large' (which UML does), which causes a segmentation fault
with modpost.
I have tested these patches with all supported versions of clang,
noticing no regressions.
---
Nathan Chancellor (2):
um: Fix adding '-no-pie' for clang
modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS
arch/um/Makefile | 4 +++-
scripts/mod/modpost.c | 3 ++-
2 files changed, 5 insertions(+), 2 deletions(-)
---
base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
change-id: 20240118-fix-uml-clang-18-e365b0503a29
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
The cleanup can be dispatched while the atomic update is still active,
which means that the memory acquired in the atomic update needs to
not be invalidated by the cleanup. The buffer objects in vmw_plane_state
instead of using the builtin map_and_cache were trying to handle
the lifetime of the mapped memory themselves, leading to crashes.
Use the map_and_cache instead of trying to manage the lifetime of the
buffer objects held by the vmw_plane_state.
Fixes kernel oops'es in IGT's kms_cursor_legacy forked-bo.
Signed-off-by: Zack Rusin <zack.rusin(a)broadcom.com>
Fixes: bb6780aa5a1d ("drm/vmwgfx: Diff cursors when using cmds")
Cc: <stable(a)vger.kernel.org> # v6.2+
---
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index e2bfaf4522a6..cd4925346ed4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -185,13 +185,12 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h)
*/
static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps)
{
- bool is_iomem;
if (vps->surf) {
if (vps->surf_mapped)
return vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo);
return vps->surf->snooper.image;
} else if (vps->bo)
- return ttm_kmap_obj_virtual(&vps->bo->map, &is_iomem);
+ return vmw_bo_map_and_cache(vps->bo);
return NULL;
}
@@ -653,22 +652,12 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane,
{
struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane);
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
- bool is_iomem;
if (vps->surf_mapped) {
vmw_bo_unmap(vps->surf->res.guest_memory_bo);
vps->surf_mapped = false;
}
- if (vps->bo && ttm_kmap_obj_virtual(&vps->bo->map, &is_iomem)) {
- const int ret = ttm_bo_reserve(&vps->bo->tbo, true, false, NULL);
-
- if (likely(ret == 0)) {
- ttm_bo_kunmap(&vps->bo->map);
- ttm_bo_unreserve(&vps->bo->tbo);
- }
- }
-
vmw_du_cursor_plane_unmap_cm(vps);
vmw_du_put_cursor_mob(vcp, vps);
--
2.40.1
RISC-V PLIC cannot EOI disabled interrupts, as explained in the
description of Interrupt Completion in the PLIC spec:
"The PLIC signals it has completed executing an interrupt handler by
writing the interrupt ID it received from the claim to the claim/complete
register. The PLIC does not check whether the completion ID is the same
as the last claim ID for that target. If the completion ID does not match
an interrupt source that *is currently enabled* for the target, the
completion is silently ignored."
Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked")
ensured that by enabling the interrupt if needed before EOI.
Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask
operations") removed the interrupt enabling code from the previous
commit, because it assumes that interrupt should be enabled at the point
of EOI. However, this is incorrect: there is a small window after a hart
claiming an interrupt and before irq_desc->lock getting acquired,
interrupt can be disabled during this window. Thus, EOI can be invoked
while the interrupt is disabled, effectively nullify this EOI.
Make sure that interrupt is really enabled before EOI.
Fixes: a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
---
drivers/irqchip/irq-sifive-plic.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 5b7bc4fd9517..0857a516c35b 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -148,7 +148,13 @@ static void plic_irq_eoi(struct irq_data *d)
{
struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+ if (irqd_irq_disabled(d)) {
+ plic_toggle(handler, d->hwirq, 1);
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+ plic_toggle(handler, d->hwirq, 0);
+ } else {
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+ }
}
#ifdef CONFIG_SMP
--
2.39.2
Hi Maxim and Paolo,
This is the linux-stable backport request regarding the below patch.
KVM: x86: smm: preserve interrupt shadow in SMRAM
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
According to the below link, there may be a backport to stable kernels, while I
do not see it in the stable kernels.
https://gitlab.com/qemu-project/qemu/-/issues/1198
Would you mind sharing if there is already any existing backport, or please let
me know if I can send the backport to the linux-stable?
There are many conflicts unless we backport the entire patchset, e.g.,: I
choose 0x7f1a/0x7ecb for 32-bit/64-bit int_shadow in the smram.
--------------------------------
From 90f492c865a4b7ca6187a4fc9eebe451f3d6c17e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk(a)redhat.com>
Date: Fri, 26 Jan 2024 14:03:59 -0800
Subject: [PATCH linux-5.15.y 1/1] KVM: x86: smm: preserve interrupt shadow in SMRAM
[ Upstream commit fb28875fd7da184079150295da7ee8d80a70917e ]
When #SMI is asserted, the CPU can be in interrupt shadow due to sti or
mov ss.
It is not mandatory in Intel/AMD prm to have the #SMI blocked during the
shadow, and on top of that, since neither SVM nor VMX has true support
for SMI window, waiting for one instruction would mean single stepping
the guest.
Instead, allow #SMI in this case, but both reset the interrupt window and
stash its value in SMRAM to restore it on exit from SMM.
This fixes rare failures seen mostly on windows guests on VMX, when #SMI
falls on the sti instruction which mainfest in VM entry failure due
to EFLAGS.IF not being set, but STI interrupt window still being set
in the VMCS.
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Message-Id: <20221025124741.228045-24-mlevitsk(a)redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
Backport fb28875fd7da184079150295da7ee8d80a70917e from a big patchset
merge:
[PATCH RESEND v4 00/23] SMM emulation and interrupt shadow fixes
https://lore.kernel.org/all/20221025124741.228045-1-mlevitsk@redhat.com/
Since only the last patch is backported, there are many conflicts.
The core idea of the patch:
- Save the interruptibility before entering SMM.
- Load the interruptibility after leaving SMM.
Although the real offsets in smram buffer are the same, the bugfix and the
UEK5 use different offsets in the function calls. Here are some examples.
32-bit:
bugfix UEK6
smbase -> 0xFEF8 -> 0x7ef8
cr4 -> 0xFF14 -> 0x7f14
int_shadow -> 0xFF1A -> n/a
eip -> 0xFFF0 -> 0x7ff0
cr0 -> 0xFFFC -> 0x7ffc
64-bit:
bugfix UEK6
int_shadow -> 0xFECB -> n/a
efer -> 0xFEd0 -> 0x7ed0
smbase -> 0xFF00 -> 0x7f00
cr4 -> 0xFF48 -> 0x7f48
cr0 -> 0xFF58 -> 0x7f58
rip -> 0xFF78 -> 0x7f78
Therefore, we choose the below offsets for int_shadow:
32-bit: int_shadow = 0x7f1a
64-bit: int_shadow = 0x7ecb
Signed-off-by: Dongli Zhang <dongli.zhang(a)oracle.com>
---
arch/x86/kvm/emulate.c | 15 +++++++++++++--
arch/x86/kvm/x86.c | 6 ++++++
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 98b25a7..00df781b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2438,7 +2438,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
struct desc_ptr dt;
u16 selector;
u32 val, cr0, cr3, cr4;
- int i;
+ int i, r;
cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
@@ -2488,7 +2488,15 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
+ static_call(kvm_x86_set_interrupt_shadow)(ctxt->vcpu, 0);
+ ctxt->interruptibility = GET_SMSTATE(u8, smstate, 0x7f1a);
+
+ return r;
}
#ifdef CONFIG_X86_64
@@ -2559,6 +2567,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
return r;
}
+ static_call(kvm_x86_set_interrupt_shadow)(ctxt->vcpu, 0);
+ ctxt->interruptibility = GET_SMSTATE(u8, smstate, 0x7ecb);
+
return X86EMUL_CONTINUE;
}
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aa6f700..6b30d40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9400,6 +9400,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
/* revision id */
put_smstate(u32, buf, 0x7efc, 0x00020000);
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+
+ put_smstate(u8, buf, 0x7f1a, static_call(kvm_x86_get_interrupt_shadow)(vcpu));
}
#ifdef CONFIG_X86_64
@@ -9454,6 +9456,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
+
+ put_smstate(u8, buf, 0x7ecb, static_call(kvm_x86_get_interrupt_shadow)(vcpu));
}
#endif
@@ -9490,6 +9494,8 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0x8000);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
static_call(kvm_x86_set_cr0)(vcpu, cr0);
vcpu->arch.cr0 = cr0;
--
1.8.3.1
--------------------------------
Thank you very much!
Dongli Zhang
From: Wayne Lin <wayne.lin(a)amd.com>
link_rate sometime will be changed when DP MST connector hotplug, so
pbn_div also need be updated; otherwise, it will mismatch with
link_rate, causes no output in external monitor.
This is a backport of
commit 9cdef4f72037 ("drm/amd/display: pbn_div need be updated for hotplug event")
to 6.1. This fixes a display light up failure on some docking stations.
Cc: stable(a)vger.kernel.org
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Reviewed-by: Jerry Zuo <jerry.zuo(a)amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira(a)amd.com>
Signed-off-by: Wade Wang <wade.wang(a)hp.com>
Signed-off-by: Wayne Lin <wayne.lin(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit 9cdef4f720376ef0fb0febce1ed2377c19e531f9)
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 13e0b521e3db..f02e509d5fac 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6677,8 +6677,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
if (IS_ERR(mst_state))
return PTR_ERR(mst_state);
- if (!mst_state->pbn_div)
- mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_port->dc_link);
+ mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_port->dc_link);
if (!state->duplicated) {
int max_bpc = conn_state->max_requested_bpc;
--
2.42.0
From: "Maciej S. Szmigiero" <maciej.szmigiero(a)oracle.com>
The stable kernel version backport of the patch disabling XSAVES on AMD
Zen family 0x17 applied this change to the wrong function (init_amd_k6()),
one which isn't called for Zen CPUs.
Move the erratum to the init_amd_zn() function instead.
Add an explicit family 0x17 check to the erratum so nothing will break if
someone naively makes this kernel version call init_amd_zn() also for
family 0x19 in the future (as the current upstream code does).
Fixes: f028a7db9824 ("x86/CPU/AMD: Disable XSAVES on AMD family 0x17")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero(a)oracle.com>
---
arch/x86/kernel/cpu/amd.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 84667781c41d..5b75a4ff6802 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -271,15 +271,6 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
#endif
- /*
- * Work around Erratum 1386. The XSAVES instruction malfunctions in
- * certain circumstances on Zen1/2 uarch, and not all parts have had
- * updated microcode at the time of writing (March 2023).
- *
- * Affected parts all have no supervisor XSAVE states, meaning that
- * the XSAVEC instruction (which works fine) is equivalent.
- */
- clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
static void init_amd_k7(struct cpuinfo_x86 *c)
@@ -979,6 +970,17 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
set_cpu_cap(c, X86_FEATURE_BTC_NO);
}
+
+ /*
+ * Work around Erratum 1386. The XSAVES instruction malfunctions in
+ * certain circumstances on Zen1/2 uarch, and not all parts have had
+ * updated microcode at the time of writing (March 2023).
+ *
+ * Affected parts all have no supervisor XSAVE states, meaning that
+ * the XSAVEC instruction (which works fine) is equivalent.
+ */
+ if (c->x86 == 0x17)
+ clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
static bool cpu_has_zenbleed_microcode(void)
This is a backport of two upstream patch-sets:
1. "exact states comparison for iterator convergence checks"
https://lore.kernel.org/all/20231024000917.12153-1-eddyz87@gmail.com/
2. "verify callbacks as if they are called unknown number of times"
https://lore.kernel.org/all/20231121020701.26440-1-eddyz87@gmail.com/
Both patch-sets fix BPF verifier logic related to handling loops:
for bpf iterators, and for helper functions that accept callback
functions.
The backport of (2) was requested as a response to bug report by
Mateusz Gienieczko <mat.gienieczko(a)tum.de>.
The (1) is a dependency of (2).
The patch-set was tested by running BPF verifier selftests on my local
qemu-based setup.
Most of the commits could be cherry-picked but three required merging:
| Action | Upstream commit |
|--------+-------------------------------------------------------------------------------------------------|
| pick | 3c4e420cb653 ("bpf: move explored_state() closer to the beginning of verifier.c ") |
| pick | 4c97259abc9b ("bpf: extract same_callsites() as utility function ") |
| merge | 2793a8b015f7 ("bpf: exact states comparison for iterator convergence checks ") |
| pick | 389ede06c297 ("selftests/bpf: tests with delayed read/precision makrs in loop body ") |
| pick | 2a0992829ea3 ("bpf: correct loop detection for iterators convergence ") |
| pick | 64870feebecb ("selftests/bpf: test if state loops are detected in a tricky case ") |
| pick | b4d8239534fd ("bpf: print full verifier states on infinite loop detection ") |
| drop | dedd6c894110 ("Merge branch 'exact-states-comparison-for-iterator-convergence-checks' ") |
|--------+-------------------------------------------------------------------------------------------------|
| pick | 977bc146d4eb ("selftests/bpf: track tcp payload offset as scalar in xdp_synproxy ") |
| pick | 87eb0152bcc1 ("selftests/bpf: track string payload offset as scalar in strobemeta ") |
| pick | 683b96f9606a ("bpf: extract __check_reg_arg() utility function ") |
| pick | 58124a98cb8e ("bpf: extract setup_func_entry() utility function ") |
| merge | ab5cfac139ab ("bpf: verify callbacks as if they are called unknown number of times ") |
| pick | 958465e217db ("selftests/bpf: tests for iterating callbacks ") |
| pick | cafe2c21508a ("bpf: widening for callback iterators ") |
| pick | 9f3330aa644d ("selftests/bpf: test widening for iterating callbacks ") |
| merge | bb124da69c47 ("bpf: keep track of max number of bpf_loop callback iterations ") |
| pick | 57e2a52deeb1 ("selftests/bpf: check if max number of bpf_loop iterations is tracked ") |
| drop | acb12c859ac7 ("Merge branch 'verify-callbacks-as-if-they-are-called-unknown-number-of-times' ") |
Note:
I don't know how deal with merge commits, so I just dropped those.
These commits are empty but contain cover letters for both series,
so it might be useful to pick those (how?).
Eduard Zingerman (17):
bpf: move explored_state() closer to the beginning of verifier.c
bpf: extract same_callsites() as utility function
bpf: exact states comparison for iterator convergence checks
selftests/bpf: tests with delayed read/precision makrs in loop body
bpf: correct loop detection for iterators convergence
selftests/bpf: test if state loops are detected in a tricky case
bpf: print full verifier states on infinite loop detection
selftests/bpf: track tcp payload offset as scalar in xdp_synproxy
selftests/bpf: track string payload offset as scalar in strobemeta
bpf: extract __check_reg_arg() utility function
bpf: extract setup_func_entry() utility function
bpf: verify callbacks as if they are called unknown number of times
selftests/bpf: tests for iterating callbacks
bpf: widening for callback iterators
selftests/bpf: test widening for iterating callbacks
bpf: keep track of max number of bpf_loop callback iterations
selftests/bpf: check if max number of bpf_loop iterations is tracked
include/linux/bpf_verifier.h | 32 +
kernel/bpf/verifier.c | 875 ++++++++++++++----
.../selftests/bpf/prog_tests/verifier.c | 2 +
tools/testing/selftests/bpf/progs/cb_refs.c | 1 +
tools/testing/selftests/bpf/progs/iters.c | 695 ++++++++++++++
.../testing/selftests/bpf/progs/strobemeta.h | 78 +-
.../bpf/progs/verifier_iterating_callbacks.c | 242 +++++
.../bpf/progs/verifier_subprog_precision.c | 86 +-
.../selftests/bpf/progs/xdp_synproxy_kern.c | 84 +-
9 files changed, 1830 insertions(+), 265 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
--
2.43.0
This commit is for linux-4.19.y only, it has no direct upstream
equivalent.
Prior to commit 5f2fb52fac15 ("kbuild: rename hostprogs-y/always to
hostprogs/always-y"), always-y did not exist, making the backport of
mainline commit 1b1e38002648 ("powerpc: add crtsavres.o to always-y
instead of extra-y") to linux-4.19.y as commit b7b85ec5ec15 ("powerpc:
add crtsavres.o to always-y instead of extra-y") incorrect, breaking the
build with linkers that need crtsavres.o:
ld.lld: error: cannot open arch/powerpc/lib/crtsavres.o: No such file or directory
Backporting the aforementioned kbuild commit is not suitable for stable
due to its size and number of conflicts, so transform the always-y usage
to an equivalent form using always, which resolves the build issues.
Fixes: b7b85ec5ec15 ("powerpc: add crtsavres.o to always-y instead of extra-y")
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
arch/powerpc/lib/Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 6f1e57182876..f0aa6fc8c6b2 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -21,8 +21,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
# 64-bit linker creates .sfpr on demand for final link (vmlinux),
# so it is only needed for modules, and only for older linkers which
# do not support --save-restore-funcs
-ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
-always-$(CONFIG_PPC64) += crtsavres.o
+ifeq ($(call ld-ifversion, -lt, 225000000, y)$(CONFIG_PPC64),yy)
+always += crtsavres.o
endif
obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
---
base-commit: b060cfd3f707ad3c8ae8322e1b149ba7e2cf33e0
change-id: 20240126-4-19-fix-lib-powerpc-backport-6f4a823adf1a
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
These patches are a follow up fixes for CVE-2021-4037 &
CVE-2018-13405, LTP test creat09.c & openat04.c reproduces the
privilege escalation in v5.4, the two patches solves this and they are
already backported to the other stable kernels.
Hi stable team, JFYI, yesterdays mainline commit 556857aa1d0855 ("wifi:
ath11k: rely on mac80211 debugfs handling for vif") from Benjamin
contains no stable tag, but a Fixes: tag for a 6.7 commit. So it guess
you will pick it up anyway. Might be worth picking up rather sooner than
later, as it fixes a regression that according to Kalle causes ath11k to
crash during suspend:
https://lore.kernel.org/all/874jfjiolh.fsf@kernel.org/https://bugzilla.kernel.org/show_bug.cgi?id=218364
Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.
From: JaimeLiao <jaimeliao(a)mxic.com.tw>
Support for MX35LF{2,4}GE4AD chips was added in mainline through
upstream commit 5ece78de88739b4c68263e9f2582380c1fd8314f.
The patch was later adapted to 5.4.y and backported through
stable commit 85258ae3070848d9d0f6fbee385be2db80e8cf26.
Fix the backport mentioned right above as it is wrong: the bigger chip
features 4kiB pages and not 2kiB pages.
Fixes: 85258ae30708 ("mtd: spinand: macronix: Add support for MX35LFxGE4AD")
Cc: stable(a)vger.kernel.org # v5.4.y
Cc: Miquel Raynal <miquel.raynal(a)bootlin.com>
Signed-off-by: JaimeLiao <jaimeliao(a)mxic.com.tw>
---
Hello,
This is my third attempt to fix a stable kernel. This patch is not a
backport from Linus' tree per-se, but a fix of a backport. The original
mainline commit is fine but the backported one is not, we need to fix
the backported commit in the 5.4.y stable kernel, and this is what I am
attempting to do. Let me know if further explanations are needed.
Regards,
Jaime
---
drivers/mtd/nand/spi/macronix.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
index bbb1d68bce4a..f18c6cfe8ff5 100644
--- a/drivers/mtd/nand/spi/macronix.c
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -125,7 +125,7 @@ static const struct spinand_info macronix_spinand_table[] = {
SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, NULL)),
SPINAND_INFO("MX35LF4GE4AD", 0x37,
- NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+ NAND_MEMORG(1, 4096, 128, 64, 2048, 40, 1, 1, 1),
NAND_ECCREQ(8, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
--
2.25.1
Could this patch be backported to stable? I have seen similar OOMs with
reserved_highatomic:4096KB
Upstream commit: 04c8716f7b0075def05dc05646e2408f318167d2
Jocke
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 1b151e2435fc3a9b10c8946c6aebe9f3e1938c55
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012229-dealer-luster-6ff4@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
1b151e2435fc ("block: Remove special-casing of compound pages")
fd363244e883 ("block: Add BIO_PAGE_PINNED and associated infrastructure")
e51bab4e2058 ("block: Replace BIO_NO_PAGE_REF with BIO_PAGE_REFFED with inverted logic")
a450f49708ea ("iomap: Don't get an reference on ZERO_PAGE for direct I/O block zeroing")
7ee4ccf57484 ("block: set FOLL_PCI_P2PDMA in bio_map_user_iov()")
80bd4a7aab4c ("blk-mq: move the srcu_struct used for quiescing to the tagset")
e88811bc43b9 ("block: use on-stack page vec for <= UIO_FASTIOV")
480cb846c27b ("block: convert to advancing variants of iov_iter_get_pages{,_alloc}()")
e97424fd4472 ("block: fix leaking page ref on truncated direct io")
34cdb8c825f2 ("block: ensure bio_iov_add_page can't fail")
325347d965e7 ("block: ensure iov_iter advances for added pages")
46754bd05605 ("block: move ->bio_split to the gendisk")
5a97806f7dc0 ("block: change the blk_queue_split calling convention")
8374cfe647a1 ("Merge tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1b151e2435fc3a9b10c8946c6aebe9f3e1938c55 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Date: Mon, 14 Aug 2023 15:41:00 +0100
Subject: [PATCH] block: Remove special-casing of compound pages
The special casing was originally added in pre-git history; reproducing
the commit log here:
> commit a318a92567d77
> Author: Andrew Morton <akpm(a)osdl.org>
> Date: Sun Sep 21 01:42:22 2003 -0700
>
> [PATCH] Speed up direct-io hugetlbpage handling
>
> This patch short-circuits all the direct-io page dirtying logic for
> higher-order pages. Without this, we pointlessly bounce BIOs up to
> keventd all the time.
In the last twenty years, compound pages have become used for more than
just hugetlb. Rewrite these functions to operate on folios instead
of pages and remove the special case for hugetlbfs; I don't think
it's needed any more (and if it is, we can put it back in as a call
to folio_test_hugetlb()).
This was found by inspection; as far as I can tell, this bug can lead
to pages used as the destination of a direct I/O read not being marked
as dirty. If those pages are then reclaimed by the MM without being
dirtied for some other reason, they won't be written out. Then when
they're faulted back in, they will not contain the data they should.
It'll take a pretty unusual setup to produce this problem with several
races all going the wrong way.
This problem predates the folio work; it could for example have been
triggered by mmaping a THP in tmpfs and using that as the target of an
O_DIRECT read.
Fixes: 800d8c63b2e98 ("shmem: add huge pages support")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bio.c b/block/bio.c
index 816d412c06e9..5eba53ca953b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1145,13 +1145,22 @@ EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
+ struct folio_iter fi;
+
+ bio_for_each_folio_all(fi, bio) {
+ struct page *page;
+ size_t done = 0;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (mark_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- bio_release_page(bio, bvec->bv_page);
+ if (mark_dirty) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
+ }
+ page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
+ do {
+ bio_release_page(bio, page++);
+ done += PAGE_SIZE;
+ } while (done < fi.length);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1439,18 +1448,12 @@ EXPORT_SYMBOL(bio_free_pages);
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
*
- * The problem is that we cannot run set_page_dirty() from interrupt context
+ * The problem is that we cannot run folio_mark_dirty() from interrupt context
* because the required locks are not interrupt-safe. So what we can do is to
* mark the pages dirty _before_ performing IO. And in interrupt context,
* check that the pages are still dirty. If so, fine. If not, redirty them
* in process context.
*
- * We special-case compound pages here: normally this means reads into hugetlb
- * pages. The logic in here doesn't really work right for compound pages
- * because the VM does not uniformly chase down the head page in all cases.
- * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
- * handle them at all. So we skip compound pages here at an early stage.
- *
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
@@ -1466,12 +1469,12 @@ EXPORT_SYMBOL(bio_free_pages);
*/
void bio_set_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
+ bio_for_each_folio_all(fi, bio) {
+ folio_lock(fi.folio);
+ folio_mark_dirty(fi.folio);
+ folio_unlock(fi.folio);
}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
@@ -1515,12 +1518,11 @@ static void bio_dirty_fn(struct work_struct *work)
void bio_check_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
+ struct folio_iter fi;
unsigned long flags;
- struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
+ bio_for_each_folio_all(fi, bio) {
+ if (!folio_test_dirty(fi.folio))
goto defer;
}
Hi,
I would like the following patch to be applied to the stable
kernel 6.6:
3837a0379533 serial: sc16is7xx: improve regmap debugfs by using one
regmap per port
As noted in
https://lore.kernel.org/all/20231211171353.2901416-1-hugo@hugovil.com/raw :
---------------------
I did not originally add a "Cc: stable" tag for the above mentioned
commit, as it was intended only to improve debugging using debugfs. But
since then, I have been able to confirm that it also fixes a long
standing bug in our system where the Tx interrupt are no longer enabled
at some point when transmitting large RS-485 paquets that completely
fill the FIFO and thus require multiple and subsequent writes to the
FIFO once space in it becomes available. I have been investigating why,
but so far I haven't found the exact cause, altough I suspect it has
something to do with regmap caching...
---------------------
It applies cleanly and was tested on this kernel using a
custom board with a Variscite IMX8MN NANO SOM, a NewHaven LCD, and two
SC16IS752 on a SPI bus. The four UARTs are using RS-485 mode.
Thank you,
Hugo Villeneuve
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x d5078509c8b06c5c472a60232815e41af81c6446
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012629-unrobed-other-c41a@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
d5078509c8b0 ("serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d5078509c8b06c5c472a60232815e41af81c6446 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:12 -0500
Subject: [PATCH] serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()
Simplify and improve readability by replacing while(1) loop with
do {} while, and by using the keep_polling variable as the exit
condition, making it more explicit.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 44a11c89c949..8d257208cbf3 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -783,17 +783,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
static irqreturn_t sc16is7xx_irq(int irq, void *dev_id)
{
+ bool keep_polling;
+
struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id;
- while (1) {
- bool keep_polling = false;
+ do {
int i;
+ keep_polling = false;
+
for (i = 0; i < s->devtype->nr_uart; ++i)
keep_polling |= sc16is7xx_port_irq(s, i);
- if (!keep_polling)
- break;
- }
+ } while (keep_polling);
return IRQ_HANDLED;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d5078509c8b06c5c472a60232815e41af81c6446
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012628-railcar-scrutiny-38b1@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
d5078509c8b0 ("serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d5078509c8b06c5c472a60232815e41af81c6446 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:12 -0500
Subject: [PATCH] serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()
Simplify and improve readability by replacing while(1) loop with
do {} while, and by using the keep_polling variable as the exit
condition, making it more explicit.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 44a11c89c949..8d257208cbf3 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -783,17 +783,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
static irqreturn_t sc16is7xx_irq(int irq, void *dev_id)
{
+ bool keep_polling;
+
struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id;
- while (1) {
- bool keep_polling = false;
+ do {
int i;
+ keep_polling = false;
+
for (i = 0; i < s->devtype->nr_uart; ++i)
keep_polling |= sc16is7xx_port_irq(s, i);
- if (!keep_polling)
- break;
- }
+ } while (keep_polling);
return IRQ_HANDLED;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d5078509c8b06c5c472a60232815e41af81c6446
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012627-swimsuit-verbose-c14d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d5078509c8b0 ("serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d5078509c8b06c5c472a60232815e41af81c6446 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:12 -0500
Subject: [PATCH] serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()
Simplify and improve readability by replacing while(1) loop with
do {} while, and by using the keep_polling variable as the exit
condition, making it more explicit.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 44a11c89c949..8d257208cbf3 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -783,17 +783,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
static irqreturn_t sc16is7xx_irq(int irq, void *dev_id)
{
+ bool keep_polling;
+
struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id;
- while (1) {
- bool keep_polling = false;
+ do {
int i;
+ keep_polling = false;
+
for (i = 0; i < s->devtype->nr_uart; ++i)
keep_polling |= sc16is7xx_port_irq(s, i);
- if (!keep_polling)
- break;
- }
+ } while (keep_polling);
return IRQ_HANDLED;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d5078509c8b06c5c472a60232815e41af81c6446
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012626-mutilator-saline-742b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d5078509c8b0 ("serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d5078509c8b06c5c472a60232815e41af81c6446 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:12 -0500
Subject: [PATCH] serial: sc16is7xx: improve do/while loop in sc16is7xx_irq()
Simplify and improve readability by replacing while(1) loop with
do {} while, and by using the keep_polling variable as the exit
condition, making it more explicit.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 44a11c89c949..8d257208cbf3 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -783,17 +783,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
static irqreturn_t sc16is7xx_irq(int irq, void *dev_id)
{
+ bool keep_polling;
+
struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id;
- while (1) {
- bool keep_polling = false;
+ do {
int i;
+ keep_polling = false;
+
for (i = 0; i < s->devtype->nr_uart; ++i)
keep_polling |= sc16is7xx_port_irq(s, i);
- if (!keep_polling)
- break;
- }
+ } while (keep_polling);
return IRQ_HANDLED;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x ed647256e8f226241ecff7baaecdb8632ffc7ec1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012611-quickness-simmering-9ae2@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
ed647256e8f2 ("serial: sc16is7xx: remove obsolete loop in sc16is7xx_port_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
e045e18dbf3e ("Merge 6.7-rc5 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ed647256e8f226241ecff7baaecdb8632ffc7ec1 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:11 -0500
Subject: [PATCH] serial: sc16is7xx: remove obsolete loop in
sc16is7xx_port_irq()
Commit 834449872105 ("sc16is7xx: Fix for multi-channel stall") changed
sc16is7xx_port_irq() from looping multiple times when there was still
interrupts to serve. It simply changed the do {} while(1) loop to a
do {} while(0) loop, which makes the loop itself now obsolete.
Clean the code by removing this obsolete do {} while(0) loop.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-5-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index ced2446909a2..44a11c89c949 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -725,58 +725,55 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one)
static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
{
bool rc = true;
+ unsigned int iir, rxlen;
struct uart_port *port = &s->p[portno].port;
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
mutex_lock(&one->efr_lock);
- do {
- unsigned int iir, rxlen;
+ iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
+ if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
+ rc = false;
+ goto out_port_irq;
+ }
- iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
- if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
- rc = false;
- goto out_port_irq;
- }
+ iir &= SC16IS7XX_IIR_ID_MASK;
- iir &= SC16IS7XX_IIR_ID_MASK;
-
- switch (iir) {
- case SC16IS7XX_IIR_RDI_SRC:
- case SC16IS7XX_IIR_RLSE_SRC:
- case SC16IS7XX_IIR_RTOI_SRC:
- case SC16IS7XX_IIR_XOFFI_SRC:
- rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
-
- /*
- * There is a silicon bug that makes the chip report a
- * time-out interrupt but no data in the FIFO. This is
- * described in errata section 18.1.4.
- *
- * When this happens, read one byte from the FIFO to
- * clear the interrupt.
- */
- if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
- rxlen = 1;
-
- if (rxlen)
- sc16is7xx_handle_rx(port, rxlen, iir);
- break;
+ switch (iir) {
+ case SC16IS7XX_IIR_RDI_SRC:
+ case SC16IS7XX_IIR_RLSE_SRC:
+ case SC16IS7XX_IIR_RTOI_SRC:
+ case SC16IS7XX_IIR_XOFFI_SRC:
+ rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
+
+ /*
+ * There is a silicon bug that makes the chip report a
+ * time-out interrupt but no data in the FIFO. This is
+ * described in errata section 18.1.4.
+ *
+ * When this happens, read one byte from the FIFO to
+ * clear the interrupt.
+ */
+ if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
+ rxlen = 1;
+
+ if (rxlen)
+ sc16is7xx_handle_rx(port, rxlen, iir);
+ break;
/* CTSRTS interrupt comes only when CTS goes inactive */
- case SC16IS7XX_IIR_CTSRTS_SRC:
- case SC16IS7XX_IIR_MSI_SRC:
- sc16is7xx_update_mlines(one);
- break;
- case SC16IS7XX_IIR_THRI_SRC:
- sc16is7xx_handle_tx(port);
- break;
- default:
- dev_err_ratelimited(port->dev,
- "ttySC%i: Unexpected interrupt: %x",
- port->line, iir);
- break;
- }
- } while (0);
+ case SC16IS7XX_IIR_CTSRTS_SRC:
+ case SC16IS7XX_IIR_MSI_SRC:
+ sc16is7xx_update_mlines(one);
+ break;
+ case SC16IS7XX_IIR_THRI_SRC:
+ sc16is7xx_handle_tx(port);
+ break;
+ default:
+ dev_err_ratelimited(port->dev,
+ "ttySC%i: Unexpected interrupt: %x",
+ port->line, iir);
+ break;
+ }
out_port_irq:
mutex_unlock(&one->efr_lock);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x ed647256e8f226241ecff7baaecdb8632ffc7ec1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012610-unjustly-underdone-ddf3@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
ed647256e8f2 ("serial: sc16is7xx: remove obsolete loop in sc16is7xx_port_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
e045e18dbf3e ("Merge 6.7-rc5 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ed647256e8f226241ecff7baaecdb8632ffc7ec1 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:11 -0500
Subject: [PATCH] serial: sc16is7xx: remove obsolete loop in
sc16is7xx_port_irq()
Commit 834449872105 ("sc16is7xx: Fix for multi-channel stall") changed
sc16is7xx_port_irq() from looping multiple times when there was still
interrupts to serve. It simply changed the do {} while(1) loop to a
do {} while(0) loop, which makes the loop itself now obsolete.
Clean the code by removing this obsolete do {} while(0) loop.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-5-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index ced2446909a2..44a11c89c949 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -725,58 +725,55 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one)
static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
{
bool rc = true;
+ unsigned int iir, rxlen;
struct uart_port *port = &s->p[portno].port;
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
mutex_lock(&one->efr_lock);
- do {
- unsigned int iir, rxlen;
+ iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
+ if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
+ rc = false;
+ goto out_port_irq;
+ }
- iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
- if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
- rc = false;
- goto out_port_irq;
- }
+ iir &= SC16IS7XX_IIR_ID_MASK;
- iir &= SC16IS7XX_IIR_ID_MASK;
-
- switch (iir) {
- case SC16IS7XX_IIR_RDI_SRC:
- case SC16IS7XX_IIR_RLSE_SRC:
- case SC16IS7XX_IIR_RTOI_SRC:
- case SC16IS7XX_IIR_XOFFI_SRC:
- rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
-
- /*
- * There is a silicon bug that makes the chip report a
- * time-out interrupt but no data in the FIFO. This is
- * described in errata section 18.1.4.
- *
- * When this happens, read one byte from the FIFO to
- * clear the interrupt.
- */
- if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
- rxlen = 1;
-
- if (rxlen)
- sc16is7xx_handle_rx(port, rxlen, iir);
- break;
+ switch (iir) {
+ case SC16IS7XX_IIR_RDI_SRC:
+ case SC16IS7XX_IIR_RLSE_SRC:
+ case SC16IS7XX_IIR_RTOI_SRC:
+ case SC16IS7XX_IIR_XOFFI_SRC:
+ rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
+
+ /*
+ * There is a silicon bug that makes the chip report a
+ * time-out interrupt but no data in the FIFO. This is
+ * described in errata section 18.1.4.
+ *
+ * When this happens, read one byte from the FIFO to
+ * clear the interrupt.
+ */
+ if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
+ rxlen = 1;
+
+ if (rxlen)
+ sc16is7xx_handle_rx(port, rxlen, iir);
+ break;
/* CTSRTS interrupt comes only when CTS goes inactive */
- case SC16IS7XX_IIR_CTSRTS_SRC:
- case SC16IS7XX_IIR_MSI_SRC:
- sc16is7xx_update_mlines(one);
- break;
- case SC16IS7XX_IIR_THRI_SRC:
- sc16is7xx_handle_tx(port);
- break;
- default:
- dev_err_ratelimited(port->dev,
- "ttySC%i: Unexpected interrupt: %x",
- port->line, iir);
- break;
- }
- } while (0);
+ case SC16IS7XX_IIR_CTSRTS_SRC:
+ case SC16IS7XX_IIR_MSI_SRC:
+ sc16is7xx_update_mlines(one);
+ break;
+ case SC16IS7XX_IIR_THRI_SRC:
+ sc16is7xx_handle_tx(port);
+ break;
+ default:
+ dev_err_ratelimited(port->dev,
+ "ttySC%i: Unexpected interrupt: %x",
+ port->line, iir);
+ break;
+ }
out_port_irq:
mutex_unlock(&one->efr_lock);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ed647256e8f226241ecff7baaecdb8632ffc7ec1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012609-phonebook-scrimmage-36f9@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
ed647256e8f2 ("serial: sc16is7xx: remove obsolete loop in sc16is7xx_port_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
e045e18dbf3e ("Merge 6.7-rc5 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ed647256e8f226241ecff7baaecdb8632ffc7ec1 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:11 -0500
Subject: [PATCH] serial: sc16is7xx: remove obsolete loop in
sc16is7xx_port_irq()
Commit 834449872105 ("sc16is7xx: Fix for multi-channel stall") changed
sc16is7xx_port_irq() from looping multiple times when there was still
interrupts to serve. It simply changed the do {} while(1) loop to a
do {} while(0) loop, which makes the loop itself now obsolete.
Clean the code by removing this obsolete do {} while(0) loop.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-5-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index ced2446909a2..44a11c89c949 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -725,58 +725,55 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one)
static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
{
bool rc = true;
+ unsigned int iir, rxlen;
struct uart_port *port = &s->p[portno].port;
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
mutex_lock(&one->efr_lock);
- do {
- unsigned int iir, rxlen;
+ iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
+ if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
+ rc = false;
+ goto out_port_irq;
+ }
- iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
- if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
- rc = false;
- goto out_port_irq;
- }
+ iir &= SC16IS7XX_IIR_ID_MASK;
- iir &= SC16IS7XX_IIR_ID_MASK;
-
- switch (iir) {
- case SC16IS7XX_IIR_RDI_SRC:
- case SC16IS7XX_IIR_RLSE_SRC:
- case SC16IS7XX_IIR_RTOI_SRC:
- case SC16IS7XX_IIR_XOFFI_SRC:
- rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
-
- /*
- * There is a silicon bug that makes the chip report a
- * time-out interrupt but no data in the FIFO. This is
- * described in errata section 18.1.4.
- *
- * When this happens, read one byte from the FIFO to
- * clear the interrupt.
- */
- if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
- rxlen = 1;
-
- if (rxlen)
- sc16is7xx_handle_rx(port, rxlen, iir);
- break;
+ switch (iir) {
+ case SC16IS7XX_IIR_RDI_SRC:
+ case SC16IS7XX_IIR_RLSE_SRC:
+ case SC16IS7XX_IIR_RTOI_SRC:
+ case SC16IS7XX_IIR_XOFFI_SRC:
+ rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
+
+ /*
+ * There is a silicon bug that makes the chip report a
+ * time-out interrupt but no data in the FIFO. This is
+ * described in errata section 18.1.4.
+ *
+ * When this happens, read one byte from the FIFO to
+ * clear the interrupt.
+ */
+ if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
+ rxlen = 1;
+
+ if (rxlen)
+ sc16is7xx_handle_rx(port, rxlen, iir);
+ break;
/* CTSRTS interrupt comes only when CTS goes inactive */
- case SC16IS7XX_IIR_CTSRTS_SRC:
- case SC16IS7XX_IIR_MSI_SRC:
- sc16is7xx_update_mlines(one);
- break;
- case SC16IS7XX_IIR_THRI_SRC:
- sc16is7xx_handle_tx(port);
- break;
- default:
- dev_err_ratelimited(port->dev,
- "ttySC%i: Unexpected interrupt: %x",
- port->line, iir);
- break;
- }
- } while (0);
+ case SC16IS7XX_IIR_CTSRTS_SRC:
+ case SC16IS7XX_IIR_MSI_SRC:
+ sc16is7xx_update_mlines(one);
+ break;
+ case SC16IS7XX_IIR_THRI_SRC:
+ sc16is7xx_handle_tx(port);
+ break;
+ default:
+ dev_err_ratelimited(port->dev,
+ "ttySC%i: Unexpected interrupt: %x",
+ port->line, iir);
+ break;
+ }
out_port_irq:
mutex_unlock(&one->efr_lock);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x ed647256e8f226241ecff7baaecdb8632ffc7ec1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012608-velcro-clanking-b6f7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
ed647256e8f2 ("serial: sc16is7xx: remove obsolete loop in sc16is7xx_port_irq()")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
e045e18dbf3e ("Merge 6.7-rc5 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ed647256e8f226241ecff7baaecdb8632ffc7ec1 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:11 -0500
Subject: [PATCH] serial: sc16is7xx: remove obsolete loop in
sc16is7xx_port_irq()
Commit 834449872105 ("sc16is7xx: Fix for multi-channel stall") changed
sc16is7xx_port_irq() from looping multiple times when there was still
interrupts to serve. It simply changed the do {} while(1) loop to a
do {} while(0) loop, which makes the loop itself now obsolete.
Clean the code by removing this obsolete do {} while(0) loop.
Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-5-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index ced2446909a2..44a11c89c949 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -725,58 +725,55 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one)
static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
{
bool rc = true;
+ unsigned int iir, rxlen;
struct uart_port *port = &s->p[portno].port;
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
mutex_lock(&one->efr_lock);
- do {
- unsigned int iir, rxlen;
+ iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
+ if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
+ rc = false;
+ goto out_port_irq;
+ }
- iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG);
- if (iir & SC16IS7XX_IIR_NO_INT_BIT) {
- rc = false;
- goto out_port_irq;
- }
+ iir &= SC16IS7XX_IIR_ID_MASK;
- iir &= SC16IS7XX_IIR_ID_MASK;
-
- switch (iir) {
- case SC16IS7XX_IIR_RDI_SRC:
- case SC16IS7XX_IIR_RLSE_SRC:
- case SC16IS7XX_IIR_RTOI_SRC:
- case SC16IS7XX_IIR_XOFFI_SRC:
- rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
-
- /*
- * There is a silicon bug that makes the chip report a
- * time-out interrupt but no data in the FIFO. This is
- * described in errata section 18.1.4.
- *
- * When this happens, read one byte from the FIFO to
- * clear the interrupt.
- */
- if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
- rxlen = 1;
-
- if (rxlen)
- sc16is7xx_handle_rx(port, rxlen, iir);
- break;
+ switch (iir) {
+ case SC16IS7XX_IIR_RDI_SRC:
+ case SC16IS7XX_IIR_RLSE_SRC:
+ case SC16IS7XX_IIR_RTOI_SRC:
+ case SC16IS7XX_IIR_XOFFI_SRC:
+ rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
+
+ /*
+ * There is a silicon bug that makes the chip report a
+ * time-out interrupt but no data in the FIFO. This is
+ * described in errata section 18.1.4.
+ *
+ * When this happens, read one byte from the FIFO to
+ * clear the interrupt.
+ */
+ if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
+ rxlen = 1;
+
+ if (rxlen)
+ sc16is7xx_handle_rx(port, rxlen, iir);
+ break;
/* CTSRTS interrupt comes only when CTS goes inactive */
- case SC16IS7XX_IIR_CTSRTS_SRC:
- case SC16IS7XX_IIR_MSI_SRC:
- sc16is7xx_update_mlines(one);
- break;
- case SC16IS7XX_IIR_THRI_SRC:
- sc16is7xx_handle_tx(port);
- break;
- default:
- dev_err_ratelimited(port->dev,
- "ttySC%i: Unexpected interrupt: %x",
- port->line, iir);
- break;
- }
- } while (0);
+ case SC16IS7XX_IIR_CTSRTS_SRC:
+ case SC16IS7XX_IIR_MSI_SRC:
+ sc16is7xx_update_mlines(one);
+ break;
+ case SC16IS7XX_IIR_THRI_SRC:
+ sc16is7xx_handle_tx(port);
+ break;
+ default:
+ dev_err_ratelimited(port->dev,
+ "ttySC%i: Unexpected interrupt: %x",
+ port->line, iir);
+ break;
+ }
out_port_irq:
mutex_unlock(&one->efr_lock);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8a1060ce974919f2a79807527ad82ac39336eda2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012653-unframed-creation-e7d8@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8a1060ce9749 ("serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in case of probe error")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
41a308cbedb2 ("serial: sc16is7xx: remove unused line structure member")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a1060ce974919f2a79807527ad82ac39336eda2 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:08 -0500
Subject: [PATCH] serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in
case of probe error
If an error occurs during probing, the sc16is7xx_lines bitfield may be left
in a state that doesn't represent the correct state of lines allocation.
For example, in a system with two SC16 devices, if an error occurs only
during probing of channel (port) B of the second device, sc16is7xx_lines
final state will be 00001011b instead of the expected 00000011b.
This is caused in part because of the "i--" in the for/loop located in
the out_ports: error path.
Fix this by checking the return value of uart_add_one_port() and set line
allocation bit only if this was successful. This allows the refactor of
the obfuscated for(i--...) loop in the error path, and properly call
uart_remove_one_port() only when needed, and properly unset line allocation
bits.
Also use same mechanism in remove() when calling uart_remove_one_port().
Fixes: c64349722d14 ("sc16is7xx: support multiple devices")
Cc: <stable(a)vger.kernel.org>
Cc: Yury Norov <yury.norov(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-2-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index e40e4a99277e..17b90f971f96 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -407,19 +407,6 @@ static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
regmap_update_bits(one->regmap, reg, mask, val);
}
-static int sc16is7xx_alloc_line(void)
-{
- int i;
-
- BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG);
-
- for (i = 0; i < SC16IS7XX_MAX_DEVS; i++)
- if (!test_and_set_bit(i, &sc16is7xx_lines))
- break;
-
- return i;
-}
-
static void sc16is7xx_power(struct uart_port *port, int on)
{
sc16is7xx_port_update(port, SC16IS7XX_IER_REG,
@@ -1550,6 +1537,13 @@ static int sc16is7xx_probe(struct device *dev,
SC16IS7XX_IOCONTROL_SRESET_BIT);
for (i = 0; i < devtype->nr_uart; ++i) {
+ s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines,
+ SC16IS7XX_MAX_DEVS);
+ if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
+ ret = -ERANGE;
+ goto out_ports;
+ }
+
/* Initialize port data */
s->p[i].port.dev = dev;
s->p[i].port.irq = irq;
@@ -1569,14 +1563,8 @@ static int sc16is7xx_probe(struct device *dev,
s->p[i].port.rs485_supported = sc16is7xx_rs485_supported;
s->p[i].port.ops = &sc16is7xx_ops;
s->p[i].old_mctrl = 0;
- s->p[i].port.line = sc16is7xx_alloc_line();
s->p[i].regmap = regmaps[i];
- if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
- ret = -ENOMEM;
- goto out_ports;
- }
-
mutex_init(&s->p[i].efr_lock);
ret = uart_get_rs485_mode(&s->p[i].port);
@@ -1594,8 +1582,13 @@ static int sc16is7xx_probe(struct device *dev,
kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc);
kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc);
kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc);
+
/* Register port */
- uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ if (ret)
+ goto out_ports;
+
+ set_bit(s->p[i].port.line, &sc16is7xx_lines);
/* Enable EFR */
sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG,
@@ -1653,10 +1646,9 @@ static int sc16is7xx_probe(struct device *dev,
#endif
out_ports:
- for (i--; i >= 0; i--) {
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
- }
+ for (i = 0; i < devtype->nr_uart; i++)
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
kthread_stop(s->kworker_task);
@@ -1678,8 +1670,8 @@ static void sc16is7xx_remove(struct device *dev)
for (i = 0; i < s->devtype->nr_uart; i++) {
kthread_cancel_delayed_work_sync(&s->p[i].ms_work);
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
sc16is7xx_power(&s->p[i].port, 0);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8a1060ce974919f2a79807527ad82ac39336eda2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012652-underline-preformed-f7c0@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8a1060ce9749 ("serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in case of probe error")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
41a308cbedb2 ("serial: sc16is7xx: remove unused line structure member")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a1060ce974919f2a79807527ad82ac39336eda2 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:08 -0500
Subject: [PATCH] serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in
case of probe error
If an error occurs during probing, the sc16is7xx_lines bitfield may be left
in a state that doesn't represent the correct state of lines allocation.
For example, in a system with two SC16 devices, if an error occurs only
during probing of channel (port) B of the second device, sc16is7xx_lines
final state will be 00001011b instead of the expected 00000011b.
This is caused in part because of the "i--" in the for/loop located in
the out_ports: error path.
Fix this by checking the return value of uart_add_one_port() and set line
allocation bit only if this was successful. This allows the refactor of
the obfuscated for(i--...) loop in the error path, and properly call
uart_remove_one_port() only when needed, and properly unset line allocation
bits.
Also use same mechanism in remove() when calling uart_remove_one_port().
Fixes: c64349722d14 ("sc16is7xx: support multiple devices")
Cc: <stable(a)vger.kernel.org>
Cc: Yury Norov <yury.norov(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-2-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index e40e4a99277e..17b90f971f96 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -407,19 +407,6 @@ static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
regmap_update_bits(one->regmap, reg, mask, val);
}
-static int sc16is7xx_alloc_line(void)
-{
- int i;
-
- BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG);
-
- for (i = 0; i < SC16IS7XX_MAX_DEVS; i++)
- if (!test_and_set_bit(i, &sc16is7xx_lines))
- break;
-
- return i;
-}
-
static void sc16is7xx_power(struct uart_port *port, int on)
{
sc16is7xx_port_update(port, SC16IS7XX_IER_REG,
@@ -1550,6 +1537,13 @@ static int sc16is7xx_probe(struct device *dev,
SC16IS7XX_IOCONTROL_SRESET_BIT);
for (i = 0; i < devtype->nr_uart; ++i) {
+ s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines,
+ SC16IS7XX_MAX_DEVS);
+ if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
+ ret = -ERANGE;
+ goto out_ports;
+ }
+
/* Initialize port data */
s->p[i].port.dev = dev;
s->p[i].port.irq = irq;
@@ -1569,14 +1563,8 @@ static int sc16is7xx_probe(struct device *dev,
s->p[i].port.rs485_supported = sc16is7xx_rs485_supported;
s->p[i].port.ops = &sc16is7xx_ops;
s->p[i].old_mctrl = 0;
- s->p[i].port.line = sc16is7xx_alloc_line();
s->p[i].regmap = regmaps[i];
- if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
- ret = -ENOMEM;
- goto out_ports;
- }
-
mutex_init(&s->p[i].efr_lock);
ret = uart_get_rs485_mode(&s->p[i].port);
@@ -1594,8 +1582,13 @@ static int sc16is7xx_probe(struct device *dev,
kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc);
kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc);
kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc);
+
/* Register port */
- uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ if (ret)
+ goto out_ports;
+
+ set_bit(s->p[i].port.line, &sc16is7xx_lines);
/* Enable EFR */
sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG,
@@ -1653,10 +1646,9 @@ static int sc16is7xx_probe(struct device *dev,
#endif
out_ports:
- for (i--; i >= 0; i--) {
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
- }
+ for (i = 0; i < devtype->nr_uart; i++)
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
kthread_stop(s->kworker_task);
@@ -1678,8 +1670,8 @@ static void sc16is7xx_remove(struct device *dev)
for (i = 0; i < s->devtype->nr_uart; i++) {
kthread_cancel_delayed_work_sync(&s->p[i].ms_work);
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
sc16is7xx_power(&s->p[i].port, 0);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8a1060ce974919f2a79807527ad82ac39336eda2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012650-culture-bonus-603d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
8a1060ce9749 ("serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in case of probe error")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
41a308cbedb2 ("serial: sc16is7xx: remove unused line structure member")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a1060ce974919f2a79807527ad82ac39336eda2 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:08 -0500
Subject: [PATCH] serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in
case of probe error
If an error occurs during probing, the sc16is7xx_lines bitfield may be left
in a state that doesn't represent the correct state of lines allocation.
For example, in a system with two SC16 devices, if an error occurs only
during probing of channel (port) B of the second device, sc16is7xx_lines
final state will be 00001011b instead of the expected 00000011b.
This is caused in part because of the "i--" in the for/loop located in
the out_ports: error path.
Fix this by checking the return value of uart_add_one_port() and set line
allocation bit only if this was successful. This allows the refactor of
the obfuscated for(i--...) loop in the error path, and properly call
uart_remove_one_port() only when needed, and properly unset line allocation
bits.
Also use same mechanism in remove() when calling uart_remove_one_port().
Fixes: c64349722d14 ("sc16is7xx: support multiple devices")
Cc: <stable(a)vger.kernel.org>
Cc: Yury Norov <yury.norov(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-2-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index e40e4a99277e..17b90f971f96 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -407,19 +407,6 @@ static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
regmap_update_bits(one->regmap, reg, mask, val);
}
-static int sc16is7xx_alloc_line(void)
-{
- int i;
-
- BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG);
-
- for (i = 0; i < SC16IS7XX_MAX_DEVS; i++)
- if (!test_and_set_bit(i, &sc16is7xx_lines))
- break;
-
- return i;
-}
-
static void sc16is7xx_power(struct uart_port *port, int on)
{
sc16is7xx_port_update(port, SC16IS7XX_IER_REG,
@@ -1550,6 +1537,13 @@ static int sc16is7xx_probe(struct device *dev,
SC16IS7XX_IOCONTROL_SRESET_BIT);
for (i = 0; i < devtype->nr_uart; ++i) {
+ s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines,
+ SC16IS7XX_MAX_DEVS);
+ if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
+ ret = -ERANGE;
+ goto out_ports;
+ }
+
/* Initialize port data */
s->p[i].port.dev = dev;
s->p[i].port.irq = irq;
@@ -1569,14 +1563,8 @@ static int sc16is7xx_probe(struct device *dev,
s->p[i].port.rs485_supported = sc16is7xx_rs485_supported;
s->p[i].port.ops = &sc16is7xx_ops;
s->p[i].old_mctrl = 0;
- s->p[i].port.line = sc16is7xx_alloc_line();
s->p[i].regmap = regmaps[i];
- if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
- ret = -ENOMEM;
- goto out_ports;
- }
-
mutex_init(&s->p[i].efr_lock);
ret = uart_get_rs485_mode(&s->p[i].port);
@@ -1594,8 +1582,13 @@ static int sc16is7xx_probe(struct device *dev,
kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc);
kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc);
kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc);
+
/* Register port */
- uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ if (ret)
+ goto out_ports;
+
+ set_bit(s->p[i].port.line, &sc16is7xx_lines);
/* Enable EFR */
sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG,
@@ -1653,10 +1646,9 @@ static int sc16is7xx_probe(struct device *dev,
#endif
out_ports:
- for (i--; i >= 0; i--) {
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
- }
+ for (i = 0; i < devtype->nr_uart; i++)
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
kthread_stop(s->kworker_task);
@@ -1678,8 +1670,8 @@ static void sc16is7xx_remove(struct device *dev)
for (i = 0; i < s->devtype->nr_uart; i++) {
kthread_cancel_delayed_work_sync(&s->p[i].ms_work);
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
sc16is7xx_power(&s->p[i].port, 0);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8a1060ce974919f2a79807527ad82ac39336eda2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012649-autistic-cusp-6331@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
8a1060ce9749 ("serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in case of probe error")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
41a308cbedb2 ("serial: sc16is7xx: remove unused line structure member")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a1060ce974919f2a79807527ad82ac39336eda2 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Thu, 21 Dec 2023 18:18:08 -0500
Subject: [PATCH] serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in
case of probe error
If an error occurs during probing, the sc16is7xx_lines bitfield may be left
in a state that doesn't represent the correct state of lines allocation.
For example, in a system with two SC16 devices, if an error occurs only
during probing of channel (port) B of the second device, sc16is7xx_lines
final state will be 00001011b instead of the expected 00000011b.
This is caused in part because of the "i--" in the for/loop located in
the out_ports: error path.
Fix this by checking the return value of uart_add_one_port() and set line
allocation bit only if this was successful. This allows the refactor of
the obfuscated for(i--...) loop in the error path, and properly call
uart_remove_one_port() only when needed, and properly unset line allocation
bits.
Also use same mechanism in remove() when calling uart_remove_one_port().
Fixes: c64349722d14 ("sc16is7xx: support multiple devices")
Cc: <stable(a)vger.kernel.org>
Cc: Yury Norov <yury.norov(a)gmail.com>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231221231823.2327894-2-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index e40e4a99277e..17b90f971f96 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -407,19 +407,6 @@ static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
regmap_update_bits(one->regmap, reg, mask, val);
}
-static int sc16is7xx_alloc_line(void)
-{
- int i;
-
- BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG);
-
- for (i = 0; i < SC16IS7XX_MAX_DEVS; i++)
- if (!test_and_set_bit(i, &sc16is7xx_lines))
- break;
-
- return i;
-}
-
static void sc16is7xx_power(struct uart_port *port, int on)
{
sc16is7xx_port_update(port, SC16IS7XX_IER_REG,
@@ -1550,6 +1537,13 @@ static int sc16is7xx_probe(struct device *dev,
SC16IS7XX_IOCONTROL_SRESET_BIT);
for (i = 0; i < devtype->nr_uart; ++i) {
+ s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines,
+ SC16IS7XX_MAX_DEVS);
+ if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
+ ret = -ERANGE;
+ goto out_ports;
+ }
+
/* Initialize port data */
s->p[i].port.dev = dev;
s->p[i].port.irq = irq;
@@ -1569,14 +1563,8 @@ static int sc16is7xx_probe(struct device *dev,
s->p[i].port.rs485_supported = sc16is7xx_rs485_supported;
s->p[i].port.ops = &sc16is7xx_ops;
s->p[i].old_mctrl = 0;
- s->p[i].port.line = sc16is7xx_alloc_line();
s->p[i].regmap = regmaps[i];
- if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) {
- ret = -ENOMEM;
- goto out_ports;
- }
-
mutex_init(&s->p[i].efr_lock);
ret = uart_get_rs485_mode(&s->p[i].port);
@@ -1594,8 +1582,13 @@ static int sc16is7xx_probe(struct device *dev,
kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc);
kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc);
kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc);
+
/* Register port */
- uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+ if (ret)
+ goto out_ports;
+
+ set_bit(s->p[i].port.line, &sc16is7xx_lines);
/* Enable EFR */
sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG,
@@ -1653,10 +1646,9 @@ static int sc16is7xx_probe(struct device *dev,
#endif
out_ports:
- for (i--; i >= 0; i--) {
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
- }
+ for (i = 0; i < devtype->nr_uart; i++)
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
kthread_stop(s->kworker_task);
@@ -1678,8 +1670,8 @@ static void sc16is7xx_remove(struct device *dev)
for (i = 0; i < s->devtype->nr_uart; i++) {
kthread_cancel_delayed_work_sync(&s->p[i].ms_work);
- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
+ if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines))
+ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
sc16is7xx_power(&s->p[i].port, 0);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 9915753037eba7135b209fef4f2afeca841af816
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012623-gray-panorama-6f95@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
9915753037eb ("serial: sc16is7xx: fix unconditional activation of THRI interrupt")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b465848be8a6 ("serial: sc16is7xx: Use port lock wrappers")
b4a778303ea0 ("serial: sc16is7xx: add missing support for rs485 devicetree properties")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9915753037eba7135b209fef4f2afeca841af816 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Mon, 11 Dec 2023 12:13:53 -0500
Subject: [PATCH] serial: sc16is7xx: fix unconditional activation of THRI
interrupt
Commit cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop") changed
behavior to unconditionnaly set the THRI interrupt in sc16is7xx_tx_proc().
For example when sending a 65 bytes message, and assuming the Tx FIFO is
initially empty, sc16is7xx_handle_tx() will write the first 64 bytes of the
message to the FIFO and sc16is7xx_tx_proc() will then activate THRI. When
the THRI IRQ is fired, the driver will write the remaining byte of the
message to the FIFO, and disable THRI by calling sc16is7xx_stop_tx().
When sending a 2 bytes message, sc16is7xx_handle_tx() will write the 2
bytes of the message to the FIFO and call sc16is7xx_stop_tx(), disabling
THRI. After sc16is7xx_handle_tx() exits, control returns to
sc16is7xx_tx_proc() which will unconditionally set THRI. When the THRI IRQ
is fired, the driver simply acknowledges the interrupt and does nothing
more, since all the data has already been written to the FIFO. This results
in 2 register writes and 4 register reads all for nothing and taking
precious cycles from the I2C/SPI bus.
Fix this by enabling the THRI interrupt only when we fill the Tx FIFO to
its maximum capacity and there are remaining bytes to send in the message.
Fixes: cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-7-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 7e4b9b52841d..e40e4a99277e 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -687,6 +687,8 @@ static void sc16is7xx_handle_tx(struct uart_port *port)
if (uart_circ_empty(xmit))
sc16is7xx_stop_tx(port);
+ else
+ sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT);
uart_port_unlock_irqrestore(port, flags);
}
@@ -815,7 +817,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws)
{
struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port);
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
- unsigned long flags;
if ((port->rs485.flags & SER_RS485_ENABLED) &&
(port->rs485.delay_rts_before_send > 0))
@@ -824,10 +825,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws)
mutex_lock(&one->efr_lock);
sc16is7xx_handle_tx(port);
mutex_unlock(&one->efr_lock);
-
- uart_port_lock_irqsave(port, &flags);
- sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT);
- uart_port_unlock_irqrestore(port, flags);
}
static void sc16is7xx_reconf_rs485(struct uart_port *port)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 9915753037eba7135b209fef4f2afeca841af816
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012621-imaging-challenge-bb29@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
9915753037eb ("serial: sc16is7xx: fix unconditional activation of THRI interrupt")
4409df5866b7 ("serial: sc16is7xx: change EFR lock to operate on each channels")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
b465848be8a6 ("serial: sc16is7xx: Use port lock wrappers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9915753037eba7135b209fef4f2afeca841af816 Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Mon, 11 Dec 2023 12:13:53 -0500
Subject: [PATCH] serial: sc16is7xx: fix unconditional activation of THRI
interrupt
Commit cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop") changed
behavior to unconditionnaly set the THRI interrupt in sc16is7xx_tx_proc().
For example when sending a 65 bytes message, and assuming the Tx FIFO is
initially empty, sc16is7xx_handle_tx() will write the first 64 bytes of the
message to the FIFO and sc16is7xx_tx_proc() will then activate THRI. When
the THRI IRQ is fired, the driver will write the remaining byte of the
message to the FIFO, and disable THRI by calling sc16is7xx_stop_tx().
When sending a 2 bytes message, sc16is7xx_handle_tx() will write the 2
bytes of the message to the FIFO and call sc16is7xx_stop_tx(), disabling
THRI. After sc16is7xx_handle_tx() exits, control returns to
sc16is7xx_tx_proc() which will unconditionally set THRI. When the THRI IRQ
is fired, the driver simply acknowledges the interrupt and does nothing
more, since all the data has already been written to the FIFO. This results
in 2 register writes and 4 register reads all for nothing and taking
precious cycles from the I2C/SPI bus.
Fix this by enabling the THRI interrupt only when we fill the Tx FIFO to
its maximum capacity and there are remaining bytes to send in the message.
Fixes: cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-7-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 7e4b9b52841d..e40e4a99277e 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -687,6 +687,8 @@ static void sc16is7xx_handle_tx(struct uart_port *port)
if (uart_circ_empty(xmit))
sc16is7xx_stop_tx(port);
+ else
+ sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT);
uart_port_unlock_irqrestore(port, flags);
}
@@ -815,7 +817,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws)
{
struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port);
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
- unsigned long flags;
if ((port->rs485.flags & SER_RS485_ENABLED) &&
(port->rs485.delay_rts_before_send > 0))
@@ -824,10 +825,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws)
mutex_lock(&one->efr_lock);
sc16is7xx_handle_tx(port);
mutex_unlock(&one->efr_lock);
-
- uart_port_lock_irqsave(port, &flags);
- sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT);
- uart_port_unlock_irqrestore(port, flags);
}
static void sc16is7xx_reconf_rs485(struct uart_port *port)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x dbf4ab821804df071c8b566d9813083125e6d97b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012610-grumbly-purge-ae51@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
dbf4ab821804 ("serial: sc16is7xx: convert from _raw_ to _noinc_ regmap functions for FIFO")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dbf4ab821804df071c8b566d9813083125e6d97b Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Mon, 11 Dec 2023 12:13:52 -0500
Subject: [PATCH] serial: sc16is7xx: convert from _raw_ to _noinc_ regmap
functions for FIFO
The SC16IS7XX IC supports a burst mode to access the FIFOs where the
initial register address is sent ($00), followed by all the FIFO data
without having to resend the register address each time. In this mode, the
IC doesn't increment the register address for each R/W byte.
The regmap_raw_read() and regmap_raw_write() are functions which can
perform IO over multiple registers. They are currently used to read/write
from/to the FIFO, and although they operate correctly in this burst mode on
the SPI bus, they would corrupt the regmap cache if it was not disabled
manually. The reason is that when the R/W size is more than 1 byte, these
functions assume that the register address is incremented and handle the
cache accordingly.
Convert FIFO R/W functions to use the regmap _noinc_ versions in order to
remove the manual cache control which was a workaround when using the
_raw_ versions. FIFO registers are properly declared as volatile so
cache will not be used/updated for FIFO accesses.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 0bda9b74d096..7e4b9b52841d 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -381,9 +381,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen)
struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
}
static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
@@ -398,9 +396,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
if (unlikely(!to_send))
return;
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
}
static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
@@ -492,6 +488,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg)
return false;
}
+static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
+{
+ return reg == SC16IS7XX_RHR_REG;
+}
+
static int sc16is7xx_set_baud(struct uart_port *port, int baud)
{
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
@@ -1709,6 +1710,10 @@ static struct regmap_config regcfg = {
.cache_type = REGCACHE_RBTREE,
.volatile_reg = sc16is7xx_regmap_volatile,
.precious_reg = sc16is7xx_regmap_precious,
+ .writeable_noinc_reg = sc16is7xx_regmap_noinc,
+ .readable_noinc_reg = sc16is7xx_regmap_noinc,
+ .max_raw_read = SC16IS7XX_FIFO_SIZE,
+ .max_raw_write = SC16IS7XX_FIFO_SIZE,
.max_register = SC16IS7XX_EFCR_REG,
};
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x dbf4ab821804df071c8b566d9813083125e6d97b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012608-refocus-gender-343a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
dbf4ab821804 ("serial: sc16is7xx: convert from _raw_ to _noinc_ regmap functions for FIFO")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dbf4ab821804df071c8b566d9813083125e6d97b Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Mon, 11 Dec 2023 12:13:52 -0500
Subject: [PATCH] serial: sc16is7xx: convert from _raw_ to _noinc_ regmap
functions for FIFO
The SC16IS7XX IC supports a burst mode to access the FIFOs where the
initial register address is sent ($00), followed by all the FIFO data
without having to resend the register address each time. In this mode, the
IC doesn't increment the register address for each R/W byte.
The regmap_raw_read() and regmap_raw_write() are functions which can
perform IO over multiple registers. They are currently used to read/write
from/to the FIFO, and although they operate correctly in this burst mode on
the SPI bus, they would corrupt the regmap cache if it was not disabled
manually. The reason is that when the R/W size is more than 1 byte, these
functions assume that the register address is incremented and handle the
cache accordingly.
Convert FIFO R/W functions to use the regmap _noinc_ versions in order to
remove the manual cache control which was a workaround when using the
_raw_ versions. FIFO registers are properly declared as volatile so
cache will not be used/updated for FIFO accesses.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 0bda9b74d096..7e4b9b52841d 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -381,9 +381,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen)
struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
}
static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
@@ -398,9 +396,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
if (unlikely(!to_send))
return;
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
}
static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
@@ -492,6 +488,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg)
return false;
}
+static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
+{
+ return reg == SC16IS7XX_RHR_REG;
+}
+
static int sc16is7xx_set_baud(struct uart_port *port, int baud)
{
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
@@ -1709,6 +1710,10 @@ static struct regmap_config regcfg = {
.cache_type = REGCACHE_RBTREE,
.volatile_reg = sc16is7xx_regmap_volatile,
.precious_reg = sc16is7xx_regmap_precious,
+ .writeable_noinc_reg = sc16is7xx_regmap_noinc,
+ .readable_noinc_reg = sc16is7xx_regmap_noinc,
+ .max_raw_read = SC16IS7XX_FIFO_SIZE,
+ .max_raw_write = SC16IS7XX_FIFO_SIZE,
.max_register = SC16IS7XX_EFCR_REG,
};
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x dbf4ab821804df071c8b566d9813083125e6d97b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012606-coroner-legwork-d56e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
dbf4ab821804 ("serial: sc16is7xx: convert from _raw_ to _noinc_ regmap functions for FIFO")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dbf4ab821804df071c8b566d9813083125e6d97b Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Mon, 11 Dec 2023 12:13:52 -0500
Subject: [PATCH] serial: sc16is7xx: convert from _raw_ to _noinc_ regmap
functions for FIFO
The SC16IS7XX IC supports a burst mode to access the FIFOs where the
initial register address is sent ($00), followed by all the FIFO data
without having to resend the register address each time. In this mode, the
IC doesn't increment the register address for each R/W byte.
The regmap_raw_read() and regmap_raw_write() are functions which can
perform IO over multiple registers. They are currently used to read/write
from/to the FIFO, and although they operate correctly in this burst mode on
the SPI bus, they would corrupt the regmap cache if it was not disabled
manually. The reason is that when the R/W size is more than 1 byte, these
functions assume that the register address is incremented and handle the
cache accordingly.
Convert FIFO R/W functions to use the regmap _noinc_ versions in order to
remove the manual cache control which was a workaround when using the
_raw_ versions. FIFO registers are properly declared as volatile so
cache will not be used/updated for FIFO accesses.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 0bda9b74d096..7e4b9b52841d 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -381,9 +381,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen)
struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
}
static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
@@ -398,9 +396,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
if (unlikely(!to_send))
return;
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
}
static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
@@ -492,6 +488,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg)
return false;
}
+static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
+{
+ return reg == SC16IS7XX_RHR_REG;
+}
+
static int sc16is7xx_set_baud(struct uart_port *port, int baud)
{
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
@@ -1709,6 +1710,10 @@ static struct regmap_config regcfg = {
.cache_type = REGCACHE_RBTREE,
.volatile_reg = sc16is7xx_regmap_volatile,
.precious_reg = sc16is7xx_regmap_precious,
+ .writeable_noinc_reg = sc16is7xx_regmap_noinc,
+ .readable_noinc_reg = sc16is7xx_regmap_noinc,
+ .max_raw_read = SC16IS7XX_FIFO_SIZE,
+ .max_raw_write = SC16IS7XX_FIFO_SIZE,
.max_register = SC16IS7XX_EFCR_REG,
};
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x dbf4ab821804df071c8b566d9813083125e6d97b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012605-emphases-explore-4dff@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
dbf4ab821804 ("serial: sc16is7xx: convert from _raw_ to _noinc_ regmap functions for FIFO")
3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port")
049994292834 ("serial: sc16is7xx: fix regression with GPIO configuration")
dabc54a45711 ("serial: sc16is7xx: remove obsolete out_thread label")
c8f71b49ee4d ("serial: sc16is7xx: setup GPIO controller later in probe")
267913ecf737 ("serial: sc16is7xx: Fill in rs485_supported")
21144bab4f11 ("sc16is7xx: Handle modem status lines")
cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop")
d4ab5487cc77 ("Merge 5.17-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dbf4ab821804df071c8b566d9813083125e6d97b Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Date: Mon, 11 Dec 2023 12:13:52 -0500
Subject: [PATCH] serial: sc16is7xx: convert from _raw_ to _noinc_ regmap
functions for FIFO
The SC16IS7XX IC supports a burst mode to access the FIFOs where the
initial register address is sent ($00), followed by all the FIFO data
without having to resend the register address each time. In this mode, the
IC doesn't increment the register address for each R/W byte.
The regmap_raw_read() and regmap_raw_write() are functions which can
perform IO over multiple registers. They are currently used to read/write
from/to the FIFO, and although they operate correctly in this burst mode on
the SPI bus, they would corrupt the regmap cache if it was not disabled
manually. The reason is that when the R/W size is more than 1 byte, these
functions assume that the register address is incremented and handle the
cache accordingly.
Convert FIFO R/W functions to use the regmap _noinc_ versions in order to
remove the manual cache control which was a workaround when using the
_raw_ versions. FIFO registers are properly declared as volatile so
cache will not be used/updated for FIFO accesses.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 0bda9b74d096..7e4b9b52841d 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -381,9 +381,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen)
struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen);
}
static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
@@ -398,9 +396,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
if (unlikely(!to_send))
return;
- regcache_cache_bypass(one->regmap, true);
- regmap_raw_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
- regcache_cache_bypass(one->regmap, false);
+ regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send);
}
static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
@@ -492,6 +488,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg)
return false;
}
+static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
+{
+ return reg == SC16IS7XX_RHR_REG;
+}
+
static int sc16is7xx_set_baud(struct uart_port *port, int baud)
{
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
@@ -1709,6 +1710,10 @@ static struct regmap_config regcfg = {
.cache_type = REGCACHE_RBTREE,
.volatile_reg = sc16is7xx_regmap_volatile,
.precious_reg = sc16is7xx_regmap_precious,
+ .writeable_noinc_reg = sc16is7xx_regmap_noinc,
+ .readable_noinc_reg = sc16is7xx_regmap_noinc,
+ .max_raw_read = SC16IS7XX_FIFO_SIZE,
+ .max_raw_write = SC16IS7XX_FIFO_SIZE,
.max_register = SC16IS7XX_EFCR_REG,
};
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5ec8e8ea8b7783fab150cf86404fc38cb4db8800
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012608-bonanza-edition-993c@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage")
f1dc0db296bd ("mm: use __pfn_to_section() instead of open coding it")
0a9f9f623166 ("mm/sparse.c: only use subsection map in VMEMMAP case")
37bc15020a96 ("mm/sparse.c: introduce a new function clear_subsection_map()")
5d87255cadde ("mm/sparse.c: introduce new function fill_subsection_map()")
b943f045a9af ("mm/sparse: fix kernel crash with pfn_section_valid check")
d41e2f3bd546 ("mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case")
1f503443e7df ("mm/sparse.c: reset section's mem_map when fully deactivated")
8068df3b6037 ("mm/memory_hotplug: don't free usage map when removing a re-added early section")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5ec8e8ea8b7783fab150cf86404fc38cb4db8800 Mon Sep 17 00:00:00 2001
From: Charan Teja Kalla <quic_charante(a)quicinc.com>
Date: Fri, 13 Oct 2023 18:34:27 +0530
Subject: [PATCH] mm/sparsemem: fix race in accessing memory_section->usage
The below race is observed on a PFN which falls into the device memory
region with the system memory configuration where PFN's are such that
[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL]. Since normal zone start and end
pfn contains the device memory PFN's as well, the compaction triggered
will try on the device memory PFN's too though they end up in NOP(because
pfn_to_online_page() returns NULL for ZONE_DEVICE memory sections). When
from other core, the section mappings are being removed for the
ZONE_DEVICE region, that the PFN in question belongs to, on which
compaction is currently being operated is resulting into the kernel crash
with CONFIG_SPASEMEM_VMEMAP enabled. The crash logs can be seen at [1].
compact_zone() memunmap_pages
------------- ---------------
__pageblock_pfn_to_page
......
(a)pfn_valid():
valid_section()//return true
(b)__remove_pages()->
sparse_remove_section()->
section_deactivate():
[Free the array ms->usage and set
ms->usage = NULL]
pfn_section_valid()
[Access ms->usage which
is NULL]
NOTE: From the above it can be said that the race is reduced to between
the pfn_valid()/pfn_section_valid() and the section deactivate with
SPASEMEM_VMEMAP enabled.
The commit b943f045a9af("mm/sparse: fix kernel crash with
pfn_section_valid check") tried to address the same problem by clearing
the SECTION_HAS_MEM_MAP with the expectation of valid_section() returns
false thus ms->usage is not accessed.
Fix this issue by the below steps:
a) Clear SECTION_HAS_MEM_MAP before freeing the ->usage.
b) RCU protected read side critical section will either return NULL
when SECTION_HAS_MEM_MAP is cleared or can successfully access ->usage.
c) Free the ->usage with kfree_rcu() and set ms->usage = NULL. No
attempt will be made to access ->usage after this as the
SECTION_HAS_MEM_MAP is cleared thus valid_section() return false.
Thanks to David/Pavan for their inputs on this patch.
[1] https://lore.kernel.org/linux-mm/994410bb-89aa-d987-1f50-f514903c55aa@quici…
On Snapdragon SoC, with the mentioned memory configuration of PFN's as
[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL], we are able to see bunch of
issues daily while testing on a device farm.
For this particular issue below is the log. Though the below log is
not directly pointing to the pfn_section_valid(){ ms->usage;}, when we
loaded this dump on T32 lauterbach tool, it is pointing.
[ 540.578056] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000000
[ 540.578068] Mem abort info:
[ 540.578070] ESR = 0x0000000096000005
[ 540.578073] EC = 0x25: DABT (current EL), IL = 32 bits
[ 540.578077] SET = 0, FnV = 0
[ 540.578080] EA = 0, S1PTW = 0
[ 540.578082] FSC = 0x05: level 1 translation fault
[ 540.578085] Data abort info:
[ 540.578086] ISV = 0, ISS = 0x00000005
[ 540.578088] CM = 0, WnR = 0
[ 540.579431] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBSBTYPE=--)
[ 540.579436] pc : __pageblock_pfn_to_page+0x6c/0x14c
[ 540.579454] lr : compact_zone+0x994/0x1058
[ 540.579460] sp : ffffffc03579b510
[ 540.579463] x29: ffffffc03579b510 x28: 0000000000235800 x27:000000000000000c
[ 540.579470] x26: 0000000000235c00 x25: 0000000000000068 x24:ffffffc03579b640
[ 540.579477] x23: 0000000000000001 x22: ffffffc03579b660 x21:0000000000000000
[ 540.579483] x20: 0000000000235bff x19: ffffffdebf7e3940 x18:ffffffdebf66d140
[ 540.579489] x17: 00000000739ba063 x16: 00000000739ba063 x15:00000000009f4bff
[ 540.579495] x14: 0000008000000000 x13: 0000000000000000 x12:0000000000000001
[ 540.579501] x11: 0000000000000000 x10: 0000000000000000 x9 :ffffff897d2cd440
[ 540.579507] x8 : 0000000000000000 x7 : 0000000000000000 x6 :ffffffc03579b5b4
[ 540.579512] x5 : 0000000000027f25 x4 : ffffffc03579b5b8 x3 :0000000000000001
[ 540.579518] x2 : ffffffdebf7e3940 x1 : 0000000000235c00 x0 :0000000000235800
[ 540.579524] Call trace:
[ 540.579527] __pageblock_pfn_to_page+0x6c/0x14c
[ 540.579533] compact_zone+0x994/0x1058
[ 540.579536] try_to_compact_pages+0x128/0x378
[ 540.579540] __alloc_pages_direct_compact+0x80/0x2b0
[ 540.579544] __alloc_pages_slowpath+0x5c0/0xe10
[ 540.579547] __alloc_pages+0x250/0x2d0
[ 540.579550] __iommu_dma_alloc_noncontiguous+0x13c/0x3fc
[ 540.579561] iommu_dma_alloc+0xa0/0x320
[ 540.579565] dma_alloc_attrs+0xd4/0x108
[quic_charante(a)quicinc.com: use kfree_rcu() in place of synchronize_rcu(), per David]
Link: https://lkml.kernel.org/r/1698403778-20938-1-git-send-email-quic_charante@q…
Link: https://lkml.kernel.org/r/1697202267-23600-1-git-send-email-quic_charante@q…
Fixes: f46edbd1b151 ("mm/sparsemem: add helpers track active portions of a section at boot")
Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ec73582e7d27..2efd3be484fd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1799,6 +1799,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
struct mem_section_usage {
+ struct rcu_head rcu;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
@@ -1992,7 +1993,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
- return test_bit(idx, ms->usage->subsection_map);
+ return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
@@ -2016,6 +2017,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
static inline int pfn_valid(unsigned long pfn)
{
struct mem_section *ms;
+ int ret;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
@@ -2029,13 +2031,19 @@ static inline int pfn_valid(unsigned long pfn)
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __pfn_to_section(pfn);
- if (!valid_section(ms))
+ rcu_read_lock();
+ if (!valid_section(ms)) {
+ rcu_read_unlock();
return 0;
+ }
/*
* Traditionally early sections always returned pfn_valid() for
* the entire section-sized span.
*/
- return early_section(ms) || pfn_section_valid(ms, pfn);
+ ret = early_section(ms) || pfn_section_valid(ms, pfn);
+ rcu_read_unlock();
+
+ return ret;
}
#endif
diff --git a/mm/sparse.c b/mm/sparse.c
index 77d91e565045..338cf946dee8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -791,6 +791,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
if (empty) {
unsigned long section_nr = pfn_to_section_nr(pfn);
+ /*
+ * Mark the section invalid so that valid_section()
+ * return false. This prevents code from dereferencing
+ * ms->usage array.
+ */
+ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
+
/*
* When removing an early section, the usage map is kept (as the
* usage maps of other sections fall into the same page). It
@@ -799,16 +806,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* was allocated during boot.
*/
if (!PageReserved(virt_to_page(ms->usage))) {
- kfree(ms->usage);
- ms->usage = NULL;
+ kfree_rcu(ms->usage, rcu);
+ WRITE_ONCE(ms->usage, NULL);
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- /*
- * Mark the section invalid so that valid_section()
- * return false. This prevents code from dereferencing
- * ms->usage array.
- */
- ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
}
/*
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5ec8e8ea8b7783fab150cf86404fc38cb4db8800
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012604-protegee-reassign-85ae@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage")
f1dc0db296bd ("mm: use __pfn_to_section() instead of open coding it")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5ec8e8ea8b7783fab150cf86404fc38cb4db8800 Mon Sep 17 00:00:00 2001
From: Charan Teja Kalla <quic_charante(a)quicinc.com>
Date: Fri, 13 Oct 2023 18:34:27 +0530
Subject: [PATCH] mm/sparsemem: fix race in accessing memory_section->usage
The below race is observed on a PFN which falls into the device memory
region with the system memory configuration where PFN's are such that
[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL]. Since normal zone start and end
pfn contains the device memory PFN's as well, the compaction triggered
will try on the device memory PFN's too though they end up in NOP(because
pfn_to_online_page() returns NULL for ZONE_DEVICE memory sections). When
from other core, the section mappings are being removed for the
ZONE_DEVICE region, that the PFN in question belongs to, on which
compaction is currently being operated is resulting into the kernel crash
with CONFIG_SPASEMEM_VMEMAP enabled. The crash logs can be seen at [1].
compact_zone() memunmap_pages
------------- ---------------
__pageblock_pfn_to_page
......
(a)pfn_valid():
valid_section()//return true
(b)__remove_pages()->
sparse_remove_section()->
section_deactivate():
[Free the array ms->usage and set
ms->usage = NULL]
pfn_section_valid()
[Access ms->usage which
is NULL]
NOTE: From the above it can be said that the race is reduced to between
the pfn_valid()/pfn_section_valid() and the section deactivate with
SPASEMEM_VMEMAP enabled.
The commit b943f045a9af("mm/sparse: fix kernel crash with
pfn_section_valid check") tried to address the same problem by clearing
the SECTION_HAS_MEM_MAP with the expectation of valid_section() returns
false thus ms->usage is not accessed.
Fix this issue by the below steps:
a) Clear SECTION_HAS_MEM_MAP before freeing the ->usage.
b) RCU protected read side critical section will either return NULL
when SECTION_HAS_MEM_MAP is cleared or can successfully access ->usage.
c) Free the ->usage with kfree_rcu() and set ms->usage = NULL. No
attempt will be made to access ->usage after this as the
SECTION_HAS_MEM_MAP is cleared thus valid_section() return false.
Thanks to David/Pavan for their inputs on this patch.
[1] https://lore.kernel.org/linux-mm/994410bb-89aa-d987-1f50-f514903c55aa@quici…
On Snapdragon SoC, with the mentioned memory configuration of PFN's as
[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL], we are able to see bunch of
issues daily while testing on a device farm.
For this particular issue below is the log. Though the below log is
not directly pointing to the pfn_section_valid(){ ms->usage;}, when we
loaded this dump on T32 lauterbach tool, it is pointing.
[ 540.578056] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000000
[ 540.578068] Mem abort info:
[ 540.578070] ESR = 0x0000000096000005
[ 540.578073] EC = 0x25: DABT (current EL), IL = 32 bits
[ 540.578077] SET = 0, FnV = 0
[ 540.578080] EA = 0, S1PTW = 0
[ 540.578082] FSC = 0x05: level 1 translation fault
[ 540.578085] Data abort info:
[ 540.578086] ISV = 0, ISS = 0x00000005
[ 540.578088] CM = 0, WnR = 0
[ 540.579431] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBSBTYPE=--)
[ 540.579436] pc : __pageblock_pfn_to_page+0x6c/0x14c
[ 540.579454] lr : compact_zone+0x994/0x1058
[ 540.579460] sp : ffffffc03579b510
[ 540.579463] x29: ffffffc03579b510 x28: 0000000000235800 x27:000000000000000c
[ 540.579470] x26: 0000000000235c00 x25: 0000000000000068 x24:ffffffc03579b640
[ 540.579477] x23: 0000000000000001 x22: ffffffc03579b660 x21:0000000000000000
[ 540.579483] x20: 0000000000235bff x19: ffffffdebf7e3940 x18:ffffffdebf66d140
[ 540.579489] x17: 00000000739ba063 x16: 00000000739ba063 x15:00000000009f4bff
[ 540.579495] x14: 0000008000000000 x13: 0000000000000000 x12:0000000000000001
[ 540.579501] x11: 0000000000000000 x10: 0000000000000000 x9 :ffffff897d2cd440
[ 540.579507] x8 : 0000000000000000 x7 : 0000000000000000 x6 :ffffffc03579b5b4
[ 540.579512] x5 : 0000000000027f25 x4 : ffffffc03579b5b8 x3 :0000000000000001
[ 540.579518] x2 : ffffffdebf7e3940 x1 : 0000000000235c00 x0 :0000000000235800
[ 540.579524] Call trace:
[ 540.579527] __pageblock_pfn_to_page+0x6c/0x14c
[ 540.579533] compact_zone+0x994/0x1058
[ 540.579536] try_to_compact_pages+0x128/0x378
[ 540.579540] __alloc_pages_direct_compact+0x80/0x2b0
[ 540.579544] __alloc_pages_slowpath+0x5c0/0xe10
[ 540.579547] __alloc_pages+0x250/0x2d0
[ 540.579550] __iommu_dma_alloc_noncontiguous+0x13c/0x3fc
[ 540.579561] iommu_dma_alloc+0xa0/0x320
[ 540.579565] dma_alloc_attrs+0xd4/0x108
[quic_charante(a)quicinc.com: use kfree_rcu() in place of synchronize_rcu(), per David]
Link: https://lkml.kernel.org/r/1698403778-20938-1-git-send-email-quic_charante@q…
Link: https://lkml.kernel.org/r/1697202267-23600-1-git-send-email-quic_charante@q…
Fixes: f46edbd1b151 ("mm/sparsemem: add helpers track active portions of a section at boot")
Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ec73582e7d27..2efd3be484fd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1799,6 +1799,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
struct mem_section_usage {
+ struct rcu_head rcu;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
@@ -1992,7 +1993,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
- return test_bit(idx, ms->usage->subsection_map);
+ return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
@@ -2016,6 +2017,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
static inline int pfn_valid(unsigned long pfn)
{
struct mem_section *ms;
+ int ret;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
@@ -2029,13 +2031,19 @@ static inline int pfn_valid(unsigned long pfn)
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __pfn_to_section(pfn);
- if (!valid_section(ms))
+ rcu_read_lock();
+ if (!valid_section(ms)) {
+ rcu_read_unlock();
return 0;
+ }
/*
* Traditionally early sections always returned pfn_valid() for
* the entire section-sized span.
*/
- return early_section(ms) || pfn_section_valid(ms, pfn);
+ ret = early_section(ms) || pfn_section_valid(ms, pfn);
+ rcu_read_unlock();
+
+ return ret;
}
#endif
diff --git a/mm/sparse.c b/mm/sparse.c
index 77d91e565045..338cf946dee8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -791,6 +791,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
if (empty) {
unsigned long section_nr = pfn_to_section_nr(pfn);
+ /*
+ * Mark the section invalid so that valid_section()
+ * return false. This prevents code from dereferencing
+ * ms->usage array.
+ */
+ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
+
/*
* When removing an early section, the usage map is kept (as the
* usage maps of other sections fall into the same page). It
@@ -799,16 +806,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* was allocated during boot.
*/
if (!PageReserved(virt_to_page(ms->usage))) {
- kfree(ms->usage);
- ms->usage = NULL;
+ kfree_rcu(ms->usage, rcu);
+ WRITE_ONCE(ms->usage, NULL);
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- /*
- * Mark the section invalid so that valid_section()
- * return false. This prevents code from dereferencing
- * ms->usage array.
- */
- ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
}
/*
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5ec8e8ea8b7783fab150cf86404fc38cb4db8800
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012600-swaddling-filled-c787@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage")
f1dc0db296bd ("mm: use __pfn_to_section() instead of open coding it")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5ec8e8ea8b7783fab150cf86404fc38cb4db8800 Mon Sep 17 00:00:00 2001
From: Charan Teja Kalla <quic_charante(a)quicinc.com>
Date: Fri, 13 Oct 2023 18:34:27 +0530
Subject: [PATCH] mm/sparsemem: fix race in accessing memory_section->usage
The below race is observed on a PFN which falls into the device memory
region with the system memory configuration where PFN's are such that
[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL]. Since normal zone start and end
pfn contains the device memory PFN's as well, the compaction triggered
will try on the device memory PFN's too though they end up in NOP(because
pfn_to_online_page() returns NULL for ZONE_DEVICE memory sections). When
from other core, the section mappings are being removed for the
ZONE_DEVICE region, that the PFN in question belongs to, on which
compaction is currently being operated is resulting into the kernel crash
with CONFIG_SPASEMEM_VMEMAP enabled. The crash logs can be seen at [1].
compact_zone() memunmap_pages
------------- ---------------
__pageblock_pfn_to_page
......
(a)pfn_valid():
valid_section()//return true
(b)__remove_pages()->
sparse_remove_section()->
section_deactivate():
[Free the array ms->usage and set
ms->usage = NULL]
pfn_section_valid()
[Access ms->usage which
is NULL]
NOTE: From the above it can be said that the race is reduced to between
the pfn_valid()/pfn_section_valid() and the section deactivate with
SPASEMEM_VMEMAP enabled.
The commit b943f045a9af("mm/sparse: fix kernel crash with
pfn_section_valid check") tried to address the same problem by clearing
the SECTION_HAS_MEM_MAP with the expectation of valid_section() returns
false thus ms->usage is not accessed.
Fix this issue by the below steps:
a) Clear SECTION_HAS_MEM_MAP before freeing the ->usage.
b) RCU protected read side critical section will either return NULL
when SECTION_HAS_MEM_MAP is cleared or can successfully access ->usage.
c) Free the ->usage with kfree_rcu() and set ms->usage = NULL. No
attempt will be made to access ->usage after this as the
SECTION_HAS_MEM_MAP is cleared thus valid_section() return false.
Thanks to David/Pavan for their inputs on this patch.
[1] https://lore.kernel.org/linux-mm/994410bb-89aa-d987-1f50-f514903c55aa@quici…
On Snapdragon SoC, with the mentioned memory configuration of PFN's as
[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL], we are able to see bunch of
issues daily while testing on a device farm.
For this particular issue below is the log. Though the below log is
not directly pointing to the pfn_section_valid(){ ms->usage;}, when we
loaded this dump on T32 lauterbach tool, it is pointing.
[ 540.578056] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000000
[ 540.578068] Mem abort info:
[ 540.578070] ESR = 0x0000000096000005
[ 540.578073] EC = 0x25: DABT (current EL), IL = 32 bits
[ 540.578077] SET = 0, FnV = 0
[ 540.578080] EA = 0, S1PTW = 0
[ 540.578082] FSC = 0x05: level 1 translation fault
[ 540.578085] Data abort info:
[ 540.578086] ISV = 0, ISS = 0x00000005
[ 540.578088] CM = 0, WnR = 0
[ 540.579431] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBSBTYPE=--)
[ 540.579436] pc : __pageblock_pfn_to_page+0x6c/0x14c
[ 540.579454] lr : compact_zone+0x994/0x1058
[ 540.579460] sp : ffffffc03579b510
[ 540.579463] x29: ffffffc03579b510 x28: 0000000000235800 x27:000000000000000c
[ 540.579470] x26: 0000000000235c00 x25: 0000000000000068 x24:ffffffc03579b640
[ 540.579477] x23: 0000000000000001 x22: ffffffc03579b660 x21:0000000000000000
[ 540.579483] x20: 0000000000235bff x19: ffffffdebf7e3940 x18:ffffffdebf66d140
[ 540.579489] x17: 00000000739ba063 x16: 00000000739ba063 x15:00000000009f4bff
[ 540.579495] x14: 0000008000000000 x13: 0000000000000000 x12:0000000000000001
[ 540.579501] x11: 0000000000000000 x10: 0000000000000000 x9 :ffffff897d2cd440
[ 540.579507] x8 : 0000000000000000 x7 : 0000000000000000 x6 :ffffffc03579b5b4
[ 540.579512] x5 : 0000000000027f25 x4 : ffffffc03579b5b8 x3 :0000000000000001
[ 540.579518] x2 : ffffffdebf7e3940 x1 : 0000000000235c00 x0 :0000000000235800
[ 540.579524] Call trace:
[ 540.579527] __pageblock_pfn_to_page+0x6c/0x14c
[ 540.579533] compact_zone+0x994/0x1058
[ 540.579536] try_to_compact_pages+0x128/0x378
[ 540.579540] __alloc_pages_direct_compact+0x80/0x2b0
[ 540.579544] __alloc_pages_slowpath+0x5c0/0xe10
[ 540.579547] __alloc_pages+0x250/0x2d0
[ 540.579550] __iommu_dma_alloc_noncontiguous+0x13c/0x3fc
[ 540.579561] iommu_dma_alloc+0xa0/0x320
[ 540.579565] dma_alloc_attrs+0xd4/0x108
[quic_charante(a)quicinc.com: use kfree_rcu() in place of synchronize_rcu(), per David]
Link: https://lkml.kernel.org/r/1698403778-20938-1-git-send-email-quic_charante@q…
Link: https://lkml.kernel.org/r/1697202267-23600-1-git-send-email-quic_charante@q…
Fixes: f46edbd1b151 ("mm/sparsemem: add helpers track active portions of a section at boot")
Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ec73582e7d27..2efd3be484fd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1799,6 +1799,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
struct mem_section_usage {
+ struct rcu_head rcu;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
@@ -1992,7 +1993,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
- return test_bit(idx, ms->usage->subsection_map);
+ return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
@@ -2016,6 +2017,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
static inline int pfn_valid(unsigned long pfn)
{
struct mem_section *ms;
+ int ret;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
@@ -2029,13 +2031,19 @@ static inline int pfn_valid(unsigned long pfn)
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __pfn_to_section(pfn);
- if (!valid_section(ms))
+ rcu_read_lock();
+ if (!valid_section(ms)) {
+ rcu_read_unlock();
return 0;
+ }
/*
* Traditionally early sections always returned pfn_valid() for
* the entire section-sized span.
*/
- return early_section(ms) || pfn_section_valid(ms, pfn);
+ ret = early_section(ms) || pfn_section_valid(ms, pfn);
+ rcu_read_unlock();
+
+ return ret;
}
#endif
diff --git a/mm/sparse.c b/mm/sparse.c
index 77d91e565045..338cf946dee8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -791,6 +791,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
if (empty) {
unsigned long section_nr = pfn_to_section_nr(pfn);
+ /*
+ * Mark the section invalid so that valid_section()
+ * return false. This prevents code from dereferencing
+ * ms->usage array.
+ */
+ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
+
/*
* When removing an early section, the usage map is kept (as the
* usage maps of other sections fall into the same page). It
@@ -799,16 +806,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* was allocated during boot.
*/
if (!PageReserved(virt_to_page(ms->usage))) {
- kfree(ms->usage);
- ms->usage = NULL;
+ kfree_rcu(ms->usage, rcu);
+ WRITE_ONCE(ms->usage, NULL);
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- /*
- * Mark the section invalid so that valid_section()
- * return false. This prevents code from dereferencing
- * ms->usage array.
- */
- ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
}
/*
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x d1adb25df7111de83b64655a80b5a135adbded61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012640-racing-flannels-8384@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
d1adb25df711 ("mm: migrate: fix getting incorrect page mapping during page migration")
eebb3dabbb5c ("mm: migrate: record the mlocked page status to remove unnecessary lru drain")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1adb25df7111de83b64655a80b5a135adbded61 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Date: Fri, 15 Dec 2023 20:07:52 +0800
Subject: [PATCH] mm: migrate: fix getting incorrect page mapping during page
migration
When running stress-ng testing, we found below kernel crash after a few hours:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
pc : dentry_name+0xd8/0x224
lr : pointer+0x22c/0x370
sp : ffff800025f134c0
......
Call trace:
dentry_name+0xd8/0x224
pointer+0x22c/0x370
vsnprintf+0x1ec/0x730
vscnprintf+0x2c/0x60
vprintk_store+0x70/0x234
vprintk_emit+0xe0/0x24c
vprintk_default+0x3c/0x44
vprintk_func+0x84/0x2d0
printk+0x64/0x88
__dump_page+0x52c/0x530
dump_page+0x14/0x20
set_migratetype_isolate+0x110/0x224
start_isolate_page_range+0xc4/0x20c
offline_pages+0x124/0x474
memory_block_offline+0x44/0xf4
memory_subsys_offline+0x3c/0x70
device_offline+0xf0/0x120
......
After analyzing the vmcore, I found this issue is caused by page migration.
The scenario is that, one thread is doing page migration, and we will use the
target page's ->mapping field to save 'anon_vma' pointer between page unmap and
page move, and now the target page is locked and refcount is 1.
Currently, there is another stress-ng thread performing memory hotplug,
attempting to offline the target page that is being migrated. It discovers that
the refcount of this target page is 1, preventing the offline operation, thus
proceeding to dump the page. However, page_mapping() of the target page may
return an incorrect file mapping to crash the system in dump_mapping(), since
the target page->mapping only saves 'anon_vma' pointer without setting
PAGE_MAPPING_ANON flag.
There are seveval ways to fix this issue:
(1) Setting the PAGE_MAPPING_ANON flag for target page's ->mapping when saving
'anon_vma', but this can confuse PageAnon() for PFN walkers, since the target
page has not built mappings yet.
(2) Getting the page lock to call page_mapping() in __dump_page() to avoid crashing
the system, however, there are still some PFN walkers that call page_mapping()
without holding the page lock, such as compaction.
(3) Using target page->private field to save the 'anon_vma' pointer and 2 bits
page state, just as page->mapping records an anonymous page, which can remove
the page_mapping() impact for PFN walkers and also seems a simple way.
So I choose option 3 to fix this issue, and this can also fix other potential
issues for PFN walkers, such as compaction.
Link: https://lkml.kernel.org/r/e60b17a88afc38cb32f84c3e30837ec70b343d2b.17026417…
Fixes: 64c8902ed441 ("migrate_pages: split unmap_and_move() to _unmap() and _move()")
Signed-off-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Xu Yu <xuyu(a)linux.alibaba.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/migrate.c b/mm/migrate.c
index 397f2a6e34cb..bad3039d165e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1025,38 +1025,31 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
}
/*
- * To record some information during migration, we use some unused
- * fields (mapping and private) of struct folio of the newly allocated
- * destination folio. This is safe because nobody is using them
- * except us.
+ * To record some information during migration, we use unused private
+ * field of struct folio of the newly allocated destination folio.
+ * This is safe because nobody is using it except us.
*/
-union migration_ptr {
- struct anon_vma *anon_vma;
- struct address_space *mapping;
-};
-
enum {
PAGE_WAS_MAPPED = BIT(0),
PAGE_WAS_MLOCKED = BIT(1),
+ PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED,
};
static void __migrate_folio_record(struct folio *dst,
- unsigned long old_page_state,
+ int old_page_state,
struct anon_vma *anon_vma)
{
- union migration_ptr ptr = { .anon_vma = anon_vma };
- dst->mapping = ptr.mapping;
- dst->private = (void *)old_page_state;
+ dst->private = (void *)anon_vma + old_page_state;
}
static void __migrate_folio_extract(struct folio *dst,
int *old_page_state,
struct anon_vma **anon_vmap)
{
- union migration_ptr ptr = { .mapping = dst->mapping };
- *anon_vmap = ptr.anon_vma;
- *old_page_state = (unsigned long)dst->private;
- dst->mapping = NULL;
+ unsigned long private = (unsigned long)dst->private;
+
+ *anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES);
+ *old_page_state = private & PAGE_OLD_STATES;
dst->private = NULL;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 00bcfcd47a52f50f07a2e88d730d7931384cb073
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012616-armrest-racoon-14ea@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
00bcfcd47a52 ("selftests: mm: hugepage-vmemmap fails on 64K page size systems")
0183d777c29a ("selftests: mm: remove duplicate unneeded defines")
af605d26a8f2 ("selftests/mm: merge util.h into vm_util.h")
baa489fabd01 ("selftests/vm: rename selftests/vm to selftests/mm")
799fb82aa132 ("tools/vm: rename tools/vm to tools/mm")
93fb70aa5904 ("selftests/vm: add KSM unmerge tests")
7aca5ca15493 ("selftests/vm: anon_cow: prepare for non-anonymous COW tests")
65f199b2b40d ("vmalloc: add reviewers for vmalloc code")
e487ebbd1298 ("selftests/vm: anon_cow: add liburing test cases")
f4b5fd6946e2 ("selftests/vm: anon_cow: THP tests")
a905e82ae44b ("selftests/vm: factor out pagemap_is_populated() into vm_util")
69c66add5663 ("selftests/vm: anon_cow: test COW handling of anonymous memory")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 00bcfcd47a52f50f07a2e88d730d7931384cb073 Mon Sep 17 00:00:00 2001
From: Donet Tom <donettom(a)linux.vnet.ibm.com>
Date: Wed, 10 Jan 2024 14:03:35 +0530
Subject: [PATCH] selftests: mm: hugepage-vmemmap fails on 64K page size
systems
The kernel sefltest mm/hugepage-vmemmap fails on architectures which has
different page size other than 4K. In hugepage-vmemmap page size used is
4k so the pfn calculation will go wrong on systems which has different
page size .The length of MAP_HUGETLB memory must be hugepage aligned but
in hugepage-vmemmap map length is 2M so this will not get aligned if the
system has differnet hugepage size.
Added psize() to get the page size and default_huge_page_size() to
get the default hugepage size at run time, hugepage-vmemmap test pass
on powerpc with 64K page size and x86 with 4K page size.
Result on powerpc without patch (page size 64K)
*# ./hugepage-vmemmap
Returned address is 0x7effff000000 whose pfn is 0
Head page flags (100000000) is invalid
check_page_flags: Invalid argument
*#
Result on powerpc with patch (page size 64K)
*# ./hugepage-vmemmap
Returned address is 0x7effff000000 whose pfn is 600
*#
Result on x86 with patch (page size 4K)
*# ./hugepage-vmemmap
Returned address is 0x7fc7c2c00000 whose pfn is 1dac00
*#
Link: https://lkml.kernel.org/r/3b3a3ae37ba21218481c482a872bbf7526031600.17048657…
Fixes: b147c89cd429 ("selftests: vm: add a hugetlb test case")
Signed-off-by: Donet Tom <donettom(a)linux.vnet.ibm.com>
Reported-by: Geetika Moolchandani <geetika(a)linux.ibm.com>
Tested-by: Geetika Moolchandani <geetika(a)linux.ibm.com>
Acked-by: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c
index 5b354c209e93..894d28c3dd47 100644
--- a/tools/testing/selftests/mm/hugepage-vmemmap.c
+++ b/tools/testing/selftests/mm/hugepage-vmemmap.c
@@ -10,10 +10,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
-
-#define MAP_LENGTH (2UL * 1024 * 1024)
-
-#define PAGE_SIZE 4096
+#include "vm_util.h"
#define PAGE_COMPOUND_HEAD (1UL << 15)
#define PAGE_COMPOUND_TAIL (1UL << 16)
@@ -39,6 +36,9 @@
#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
#endif
+static size_t pagesize;
+static size_t maplength;
+
static void write_bytes(char *addr, size_t length)
{
unsigned long i;
@@ -56,7 +56,7 @@ static unsigned long virt_to_pfn(void *addr)
if (fd < 0)
return -1UL;
- lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET);
+ lseek(fd, (unsigned long)addr / pagesize * sizeof(pagemap), SEEK_SET);
read(fd, &pagemap, sizeof(pagemap));
close(fd);
@@ -86,7 +86,7 @@ static int check_page_flags(unsigned long pfn)
* this also verifies kernel has correctly set the fake page_head to tail
* while hugetlb_free_vmemmap is enabled.
*/
- for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) {
+ for (i = 1; i < maplength / pagesize; i++) {
read(fd, &pageflags, sizeof(pageflags));
if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
(pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
@@ -106,18 +106,25 @@ int main(int argc, char **argv)
void *addr;
unsigned long pfn;
- addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+ pagesize = psize();
+ maplength = default_huge_page_size();
+ if (!maplength) {
+ printf("Unable to determine huge page size\n");
+ exit(1);
+ }
+
+ addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
if (addr == MAP_FAILED) {
perror("mmap");
exit(1);
}
/* Trigger allocation of HugeTLB page. */
- write_bytes(addr, MAP_LENGTH);
+ write_bytes(addr, maplength);
pfn = virt_to_pfn(addr);
if (pfn == -1UL) {
- munmap(addr, MAP_LENGTH);
+ munmap(addr, maplength);
perror("virt_to_pfn");
exit(1);
}
@@ -125,13 +132,13 @@ int main(int argc, char **argv)
printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
if (check_page_flags(pfn) < 0) {
- munmap(addr, MAP_LENGTH);
+ munmap(addr, maplength);
perror("check_page_flags");
exit(1);
}
/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
- if (munmap(addr, MAP_LENGTH)) {
+ if (munmap(addr, maplength)) {
perror("munmap");
exit(1);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e95fa7404716f6e25021e66067271a4ad8eb1486
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012650-unfrozen-jukebox-4be8@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e95fa7404716 ("thermal: gov_power_allocator: avoid inability to reset a cdev")
94be1d27aa8d ("thermal: gov_power_allocator: Use trip pointers instead of trip indices")
2c7b4bfadef0 ("thermal: core: Store trip pointer in struct thermal_instance")
790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()")
c43198af05cf ("thermal: core: Introduce thermal_cooling_device_present()")
5b8de18ee902 ("thermal/core: Move the thermal trip code to a dedicated file")
e6ec64f85237 ("thermal/drivers/qcom: Fix set_trip_temp() deadlock")
1fa86b0a3692 ("thermal/drivers/qcom: Use generic thermal_zone_get_trip() function")
7f725a23f2b7 ("thermal/core/governors: Use thermal_zone_get_trip() instead of ops functions")
2e38a2a981b2 ("thermal/core: Add a generic thermal_zone_set_trip() function")
7c3d5c20dc16 ("thermal/core: Add a generic thermal_zone_get_trip() function")
91b3aafc2238 ("thermal/core: Remove thermal_zone_set_trips()")
05eeee2b51b4 ("thermal/core: Protect sysfs accesses to thermal operations with thermal zone mutex")
1c439dec359c ("thermal/core: Introduce locked version of thermal_zone_device_update")
a365105c685c ("thermal: sysfs: Reuse cdev->max_state")
c408b3d1d9bb ("thermal: Validate new state in cur_state_store()")
597f500fde76 ("thermal/core: Add a check before calling set_trip_temp()")
a930da9bf583 ("thermal/core: Move the mutex inside the thermal_zone_device_update() function")
670a5e356cb6 ("thermal/core: Move the thermal zone lock out of the governors")
63561fe36b09 ("thermal/governors: Group the thermal zone lock inside the throttle function")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95fa7404716f6e25021e66067271a4ad8eb1486 Mon Sep 17 00:00:00 2001
From: Di Shen <di.shen(a)unisoc.com>
Date: Wed, 10 Jan 2024 19:55:26 +0800
Subject: [PATCH] thermal: gov_power_allocator: avoid inability to reset a cdev
Commit 0952177f2a1f ("thermal/core/power_allocator: Update once
cooling devices when temp is low") adds an update flag to avoid
triggering a thermal event when there is no need, and the thermal
cdev is updated once when the temperature is low.
But when the trips are writable, and switch_on_temp is set to be a
higher value, the cooling device state may not be reset to 0,
because last_temperature is smaller than switch_on_temp.
For example:
First:
switch_on_temp=70 control_temp=85;
Then userspace change the trip_temp:
switch_on_temp=45 control_temp=55 cur_temp=54
Then userspace reset the trip_temp:
switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54
At this time, the cooling device state should be reset to 0.
However, because cur_temp(57) < switch_on_temp(70)
last_temp(54) < switch_on_temp(70) ----> update = false,
update is false, the cooling device state can not be reset.
Using the observation that tz->passive can also be regarded as the
temperature status, set the update flag to the tz->passive value.
When the temperature drops below switch_on for the first time, the
states of cooling devices can be reset once, and tz->passive is updated
to 0. In the next round, because tz->passive is 0, cdev->state will not
be updated.
By using the tz->passive value as the "update" flag, the issue above
can be solved, and the cooling devices can be updated only once when the
temperature is low.
Fixes: 0952177f2a1f ("thermal/core/power_allocator: Update once cooling devices when temp is low")
Cc: 5.13+ <stable(a)vger.kernel.org> # 5.13+
Suggested-by: Wei Wang <wvw(a)google.com>
Signed-off-by: Di Shen <di.shen(a)unisoc.com>
Reviewed-by: Lukasz Luba <lukasz.luba(a)arm.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 7b6aa265ff6a..81e061f183ad 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -762,7 +762,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz,
trip = params->trip_switch_on;
if (trip && tz->temperature < trip->temperature) {
- update = tz->last_temperature >= trip->temperature;
+ update = tz->passive;
tz->passive = 0;
reset_pid_controller(params);
allow_maximum_power(tz, update);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e95fa7404716f6e25021e66067271a4ad8eb1486
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012648-impending-gala-16f2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
e95fa7404716 ("thermal: gov_power_allocator: avoid inability to reset a cdev")
94be1d27aa8d ("thermal: gov_power_allocator: Use trip pointers instead of trip indices")
2c7b4bfadef0 ("thermal: core: Store trip pointer in struct thermal_instance")
790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()")
c43198af05cf ("thermal: core: Introduce thermal_cooling_device_present()")
5b8de18ee902 ("thermal/core: Move the thermal trip code to a dedicated file")
e6ec64f85237 ("thermal/drivers/qcom: Fix set_trip_temp() deadlock")
1fa86b0a3692 ("thermal/drivers/qcom: Use generic thermal_zone_get_trip() function")
7f725a23f2b7 ("thermal/core/governors: Use thermal_zone_get_trip() instead of ops functions")
2e38a2a981b2 ("thermal/core: Add a generic thermal_zone_set_trip() function")
7c3d5c20dc16 ("thermal/core: Add a generic thermal_zone_get_trip() function")
91b3aafc2238 ("thermal/core: Remove thermal_zone_set_trips()")
05eeee2b51b4 ("thermal/core: Protect sysfs accesses to thermal operations with thermal zone mutex")
1c439dec359c ("thermal/core: Introduce locked version of thermal_zone_device_update")
a365105c685c ("thermal: sysfs: Reuse cdev->max_state")
c408b3d1d9bb ("thermal: Validate new state in cur_state_store()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95fa7404716f6e25021e66067271a4ad8eb1486 Mon Sep 17 00:00:00 2001
From: Di Shen <di.shen(a)unisoc.com>
Date: Wed, 10 Jan 2024 19:55:26 +0800
Subject: [PATCH] thermal: gov_power_allocator: avoid inability to reset a cdev
Commit 0952177f2a1f ("thermal/core/power_allocator: Update once
cooling devices when temp is low") adds an update flag to avoid
triggering a thermal event when there is no need, and the thermal
cdev is updated once when the temperature is low.
But when the trips are writable, and switch_on_temp is set to be a
higher value, the cooling device state may not be reset to 0,
because last_temperature is smaller than switch_on_temp.
For example:
First:
switch_on_temp=70 control_temp=85;
Then userspace change the trip_temp:
switch_on_temp=45 control_temp=55 cur_temp=54
Then userspace reset the trip_temp:
switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54
At this time, the cooling device state should be reset to 0.
However, because cur_temp(57) < switch_on_temp(70)
last_temp(54) < switch_on_temp(70) ----> update = false,
update is false, the cooling device state can not be reset.
Using the observation that tz->passive can also be regarded as the
temperature status, set the update flag to the tz->passive value.
When the temperature drops below switch_on for the first time, the
states of cooling devices can be reset once, and tz->passive is updated
to 0. In the next round, because tz->passive is 0, cdev->state will not
be updated.
By using the tz->passive value as the "update" flag, the issue above
can be solved, and the cooling devices can be updated only once when the
temperature is low.
Fixes: 0952177f2a1f ("thermal/core/power_allocator: Update once cooling devices when temp is low")
Cc: 5.13+ <stable(a)vger.kernel.org> # 5.13+
Suggested-by: Wei Wang <wvw(a)google.com>
Signed-off-by: Di Shen <di.shen(a)unisoc.com>
Reviewed-by: Lukasz Luba <lukasz.luba(a)arm.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 7b6aa265ff6a..81e061f183ad 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -762,7 +762,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz,
trip = params->trip_switch_on;
if (trip && tz->temperature < trip->temperature) {
- update = tz->last_temperature >= trip->temperature;
+ update = tz->passive;
tz->passive = 0;
reset_pid_controller(params);
allow_maximum_power(tz, update);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x e95fa7404716f6e25021e66067271a4ad8eb1486
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012646-bulb-spur-7ae9@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
e95fa7404716 ("thermal: gov_power_allocator: avoid inability to reset a cdev")
94be1d27aa8d ("thermal: gov_power_allocator: Use trip pointers instead of trip indices")
2c7b4bfadef0 ("thermal: core: Store trip pointer in struct thermal_instance")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95fa7404716f6e25021e66067271a4ad8eb1486 Mon Sep 17 00:00:00 2001
From: Di Shen <di.shen(a)unisoc.com>
Date: Wed, 10 Jan 2024 19:55:26 +0800
Subject: [PATCH] thermal: gov_power_allocator: avoid inability to reset a cdev
Commit 0952177f2a1f ("thermal/core/power_allocator: Update once
cooling devices when temp is low") adds an update flag to avoid
triggering a thermal event when there is no need, and the thermal
cdev is updated once when the temperature is low.
But when the trips are writable, and switch_on_temp is set to be a
higher value, the cooling device state may not be reset to 0,
because last_temperature is smaller than switch_on_temp.
For example:
First:
switch_on_temp=70 control_temp=85;
Then userspace change the trip_temp:
switch_on_temp=45 control_temp=55 cur_temp=54
Then userspace reset the trip_temp:
switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54
At this time, the cooling device state should be reset to 0.
However, because cur_temp(57) < switch_on_temp(70)
last_temp(54) < switch_on_temp(70) ----> update = false,
update is false, the cooling device state can not be reset.
Using the observation that tz->passive can also be regarded as the
temperature status, set the update flag to the tz->passive value.
When the temperature drops below switch_on for the first time, the
states of cooling devices can be reset once, and tz->passive is updated
to 0. In the next round, because tz->passive is 0, cdev->state will not
be updated.
By using the tz->passive value as the "update" flag, the issue above
can be solved, and the cooling devices can be updated only once when the
temperature is low.
Fixes: 0952177f2a1f ("thermal/core/power_allocator: Update once cooling devices when temp is low")
Cc: 5.13+ <stable(a)vger.kernel.org> # 5.13+
Suggested-by: Wei Wang <wvw(a)google.com>
Signed-off-by: Di Shen <di.shen(a)unisoc.com>
Reviewed-by: Lukasz Luba <lukasz.luba(a)arm.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 7b6aa265ff6a..81e061f183ad 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -762,7 +762,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz,
trip = params->trip_switch_on;
if (trip && tz->temperature < trip->temperature) {
- update = tz->last_temperature >= trip->temperature;
+ update = tz->passive;
tz->passive = 0;
reset_pid_controller(params);
allow_maximum_power(tz, update);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 832dd634bd1b4e3bbe9f10b9c9ba5db6f6f2b97f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012633-expenses-vastly-81bc@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
832dd634bd1b ("arm64: entry: fix ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD")
546b7cde9b1d ("arm64: Rename ARM64_WORKAROUND_2966298")
471470bc7052 ("arm64: errata: Add Cortex-A520 speculative unprivileged load workaround")
cce8365fc47b ("arm64: errata: Group all Cortex-A510 errata together")
e8069f5a8e3b ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 832dd634bd1b4e3bbe9f10b9c9ba5db6f6f2b97f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland(a)arm.com>
Date: Tue, 16 Jan 2024 11:02:20 +0000
Subject: [PATCH] arm64: entry: fix ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
Currently the ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD workaround isn't
quite right, as it is supposed to be applied after the last explicit
memory access, but is immediately followed by an LDR.
The ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD workaround is used to
handle Cortex-A520 erratum 2966298 and Cortex-A510 erratum 3117295,
which are described in:
* https://developer.arm.com/documentation/SDEN2444153/0600/?lang=en
* https://developer.arm.com/documentation/SDEN1873361/1600/?lang=en
In both cases the workaround is described as:
| If pagetable isolation is disabled, the context switch logic in the
| kernel can be updated to execute the following sequence on affected
| cores before exiting to EL0, and after all explicit memory accesses:
|
| 1. A non-shareable TLBI to any context and/or address, including
| unused contexts or addresses, such as a `TLBI VALE1 Xzr`.
|
| 2. A DSB NSH to guarantee completion of the TLBI.
The important part being that the TLBI+DSB must be placed "after all
explicit memory accesses".
Unfortunately, as-implemented, the TLBI+DSB is immediately followed by
an LDR, as we have:
| alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
| tlbi vale1, xzr
| dsb nsh
| alternative_else_nop_endif
| alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
| ldr lr, [sp, #S_LR]
| add sp, sp, #PT_REGS_SIZE // restore sp
| eret
| alternative_else_nop_endif
|
| [ ... KPTI exception return path ... ]
This patch fixes this by reworking the logic to place the TLBI+DSB
immediately before the ERET, after all explicit memory accesses.
The ERET is currently in a separate alternative block, and alternatives
cannot be nested. To account for this, the alternative block for
ARM64_UNMAP_KERNEL_AT_EL0 is replaced with a single alternative branch
to skip the KPTI logic, with the new shape of the logic being:
| alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
| [ ... KPTI exception return path ... ]
| .L_skip_tramp_exit_\@:
|
| ldr lr, [sp, #S_LR]
| add sp, sp, #PT_REGS_SIZE // restore sp
|
| alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
| tlbi vale1, xzr
| dsb nsh
| alternative_else_nop_endif
| eret
The new structure means that the workaround is only applied when KPTI is
not in use; this is fine as noted in the documented implications of the
erratum:
| Pagetable isolation between EL0 and higher level ELs prevents the
| issue from occurring.
... and as per the workaround description quoted above, the workaround
is only necessary "If pagetable isolation is disabled".
Fixes: 471470bc7052 ("arm64: errata: Add Cortex-A520 speculative unprivileged load workaround")
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Cc: Rob Herring <robh(a)kernel.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240116110221.420467-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 544ab46649f3..7fcbee0f6c0e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,16 +428,9 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
.if \el == 0
-alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
- tlbi vale1, xzr
- dsb nsh
-alternative_else_nop_endif
-alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
- ldr lr, [sp, #S_LR]
- add sp, sp, #PT_REGS_SIZE // restore sp
- eret
-alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
+
msr far_el1, x29
ldr_this_cpu x30, this_cpu_vector, x29
@@ -446,7 +439,18 @@ alternative_else_nop_endif
ldr lr, [sp, #S_LR] // restore x30
add sp, sp, #PT_REGS_SIZE // restore sp
br x29
+
+.L_skip_tramp_exit_\@:
#endif
+ ldr lr, [sp, #S_LR]
+ add sp, sp, #PT_REGS_SIZE // restore sp
+
+ /* This must be after the last explicit memory access */
+alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ tlbi vale1, xzr
+ dsb nsh
+alternative_else_nop_endif
+ eret
.else
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f827bcdafa2a2ac21c91e47f587e8d0c76195409
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012602-carbon-unsworn-d737@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
f827bcdafa2a ("arm64: errata: Add Cortex-A510 speculative unprivileged load workaround")
546b7cde9b1d ("arm64: Rename ARM64_WORKAROUND_2966298")
471470bc7052 ("arm64: errata: Add Cortex-A520 speculative unprivileged load workaround")
cce8365fc47b ("arm64: errata: Group all Cortex-A510 errata together")
6df696cd9bc1 ("arm64: errata: Mitigate Ampere1 erratum AC03_CPU_38 at stage-2")
52b603628a2c ("Merge branch kvm-arm64/parallel-access-faults into kvmarm/next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f827bcdafa2a2ac21c91e47f587e8d0c76195409 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh(a)kernel.org>
Date: Wed, 10 Jan 2024 11:29:21 -0600
Subject: [PATCH] arm64: errata: Add Cortex-A510 speculative unprivileged load
workaround
Implement the workaround for ARM Cortex-A510 erratum 3117295. On an
affected Cortex-A510 core, a speculatively executed unprivileged load
might leak data from a privileged load via a cache side channel. The
issue only exists for loads within a translation regime with the same
translation (e.g. same ASID and VMID). Therefore, the issue only affects
the return to EL0.
The erratum and workaround are the same as ARM Cortex-A520 erratum
2966298, so reuse the existing workaround.
Cc: stable(a)vger.kernel.org
Signed-off-by: Rob Herring <robh(a)kernel.org>
Reviewed-by: Mark Rutland <mark.rutland(a)arm.com>
Link: https://lore.kernel.org/r/20240110-arm-errata-a510-v1-2-d02bc51aeeee@kernel…
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index f47f63bcf67c..7acd64c61f50 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -71,6 +71,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #3117295 | ARM64_ERRATUM_3117295 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 96f31e235a1a..bfd275249366 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1054,6 +1054,20 @@ config ARM64_ERRATUM_2966298
If unsure, say Y.
+config ARM64_ERRATUM_3117295
+ bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load"
+ select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 3117295.
+
+ On an affected Cortex-A510 core, a speculatively executed unprivileged
+ load might leak data from a privileged level via a cache side channel.
+
+ Work around this problem by executing a TLBI before returning to EL0.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index cb5e0622168d..967c7c7a4e7d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -416,6 +416,19 @@ static struct midr_range broken_aarch32_aes[] = {
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+static const struct midr_range erratum_spec_unpriv_load_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_3117295
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+ /* Cortex-A520 r0p0 to r0p1 */
+ MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+#endif
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -715,10 +728,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
{
- .desc = "ARM erratum 2966298",
+ .desc = "ARM errata 2966298, 3117295",
.capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD,
/* Cortex-A520 r0p0 - r0p1 */
- ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+ ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
},
#endif
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 206c857dd17d4d026de85866f1b5f0969f2a109e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012658-recycler-purity-f7c1@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
206c857dd17d ("media: mtk-jpeg: Fix use after free bug due to error path handling in mtk_jpeg_dec_device_run")
2023a9981111 ("media: platform: rename mediatek/mtk-jpeg/ to mediatek/jpeg/")
63fe3d27b226 ("media: platform/*/Kconfig: make manufacturer menus more uniform")
f2ab6d3e8c48 ("media: platform: Create vendor/{Makefile,Kconfig} files")
8148baabd1c4 ("media: platform: re-structure TI drivers")
012e3ca3cb4d ("media: platform: rename omap/ to ti/omap/")
ceafdaac46ea ("media: platform: rename omap3isp/ to ti/omap3isp/")
d24a170bde65 ("media: platform: rename davinci/ to ti/davinci/")
407965e2348e ("media: platform: rename am437x/ to ti/am437x/")
e7b8153e2a4f ("media: platform: place stm32/ and sti/ under st/ dir")
43ecec16c4fa ("media: platform: rename s5p-mfc/ to samsung/s5p-mfc/")
f4104b7851a8 ("media: platform: rename s5p-jpeg/ to samsung/s5p-jpeg/")
a7f3b2d32dab ("media: platform: rename s5p-g2d/ to samsung/s5p-g2d/")
c1024049033f ("media: platform: rename s3c-camif/ to samsung/s3c-camif/")
3bae07d4b44c ("media: platform: rename exynos-gsc/ to samsung/exynos-gsc/")
238c84f71120 ("media: platform: rename exynos4-is/ to samsung/exynos4-is/")
9b18ef7c9ff4 ("media: platform: rename tegra/vde/ to nvidia/tegra-vde/")
574476a7d05d ("media: platform: rename mtk-vpu/ to mediatek/mtk-vpu/")
728dc4075acc ("media: platform: rename mtk-vcodec/ to mediatek/mtk-vcodec/")
1cb72963fa1e ("media: platform: rename mtk-mdp/ to mediatek/mtk-mdp/")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 206c857dd17d4d026de85866f1b5f0969f2a109e Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz(a)163.com>
Date: Mon, 6 Nov 2023 15:48:10 +0100
Subject: [PATCH] media: mtk-jpeg: Fix use after free bug due to error path
handling in mtk_jpeg_dec_device_run
In mtk_jpeg_probe, &jpeg->job_timeout_work is bound with
mtk_jpeg_job_timeout_work.
In mtk_jpeg_dec_device_run, if error happens in
mtk_jpeg_set_dec_dst, it will finally start the worker while
mark the job as finished by invoking v4l2_m2m_job_finish.
There are two methods to trigger the bug. If we remove the
module, it which will call mtk_jpeg_remove to make cleanup.
The possible sequence is as follows, which will cause a
use-after-free bug.
CPU0 CPU1
mtk_jpeg_dec_... |
start worker |
|mtk_jpeg_job_timeout_work
mtk_jpeg_remove |
v4l2_m2m_release |
kfree(m2m_dev); |
|
| v4l2_m2m_get_curr_priv
| m2m_dev->curr_ctx //use
If we close the file descriptor, which will call mtk_jpeg_release,
it will have a similar sequence.
Fix this bug by starting timeout worker only if started jpegdec worker
successfully. Then v4l2_m2m_job_finish will only be called in
either mtk_jpeg_job_timeout_work or mtk_jpeg_dec_device_run.
Fixes: b2f0d2724ba4 ("[media] vcodec: mediatek: Add Mediatek JPEG Decoder Driver")
Signed-off-by: Zheng Wang <zyytlz.wz(a)163.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko(a)collabora.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
index 7c2e6a2f6c40..63165f05e123 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
@@ -1020,13 +1020,13 @@ static void mtk_jpeg_dec_device_run(void *priv)
if (ret < 0)
goto dec_end;
- schedule_delayed_work(&jpeg->job_timeout_work,
- msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
-
mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs);
if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb))
goto dec_end;
+ schedule_delayed_work(&jpeg->job_timeout_work,
+ msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
+
spin_lock_irqsave(&jpeg->hw_lock, flags);
mtk_jpeg_dec_reset(jpeg->reg_base);
mtk_jpeg_dec_set_config(jpeg->reg_base,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 206c857dd17d4d026de85866f1b5f0969f2a109e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012656-unaligned-conductor-4ef5@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
206c857dd17d ("media: mtk-jpeg: Fix use after free bug due to error path handling in mtk_jpeg_dec_device_run")
2023a9981111 ("media: platform: rename mediatek/mtk-jpeg/ to mediatek/jpeg/")
63fe3d27b226 ("media: platform/*/Kconfig: make manufacturer menus more uniform")
f2ab6d3e8c48 ("media: platform: Create vendor/{Makefile,Kconfig} files")
8148baabd1c4 ("media: platform: re-structure TI drivers")
012e3ca3cb4d ("media: platform: rename omap/ to ti/omap/")
ceafdaac46ea ("media: platform: rename omap3isp/ to ti/omap3isp/")
d24a170bde65 ("media: platform: rename davinci/ to ti/davinci/")
407965e2348e ("media: platform: rename am437x/ to ti/am437x/")
e7b8153e2a4f ("media: platform: place stm32/ and sti/ under st/ dir")
43ecec16c4fa ("media: platform: rename s5p-mfc/ to samsung/s5p-mfc/")
f4104b7851a8 ("media: platform: rename s5p-jpeg/ to samsung/s5p-jpeg/")
a7f3b2d32dab ("media: platform: rename s5p-g2d/ to samsung/s5p-g2d/")
c1024049033f ("media: platform: rename s3c-camif/ to samsung/s3c-camif/")
3bae07d4b44c ("media: platform: rename exynos-gsc/ to samsung/exynos-gsc/")
238c84f71120 ("media: platform: rename exynos4-is/ to samsung/exynos4-is/")
9b18ef7c9ff4 ("media: platform: rename tegra/vde/ to nvidia/tegra-vde/")
574476a7d05d ("media: platform: rename mtk-vpu/ to mediatek/mtk-vpu/")
728dc4075acc ("media: platform: rename mtk-vcodec/ to mediatek/mtk-vcodec/")
1cb72963fa1e ("media: platform: rename mtk-mdp/ to mediatek/mtk-mdp/")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 206c857dd17d4d026de85866f1b5f0969f2a109e Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz(a)163.com>
Date: Mon, 6 Nov 2023 15:48:10 +0100
Subject: [PATCH] media: mtk-jpeg: Fix use after free bug due to error path
handling in mtk_jpeg_dec_device_run
In mtk_jpeg_probe, &jpeg->job_timeout_work is bound with
mtk_jpeg_job_timeout_work.
In mtk_jpeg_dec_device_run, if error happens in
mtk_jpeg_set_dec_dst, it will finally start the worker while
mark the job as finished by invoking v4l2_m2m_job_finish.
There are two methods to trigger the bug. If we remove the
module, it which will call mtk_jpeg_remove to make cleanup.
The possible sequence is as follows, which will cause a
use-after-free bug.
CPU0 CPU1
mtk_jpeg_dec_... |
start worker |
|mtk_jpeg_job_timeout_work
mtk_jpeg_remove |
v4l2_m2m_release |
kfree(m2m_dev); |
|
| v4l2_m2m_get_curr_priv
| m2m_dev->curr_ctx //use
If we close the file descriptor, which will call mtk_jpeg_release,
it will have a similar sequence.
Fix this bug by starting timeout worker only if started jpegdec worker
successfully. Then v4l2_m2m_job_finish will only be called in
either mtk_jpeg_job_timeout_work or mtk_jpeg_dec_device_run.
Fixes: b2f0d2724ba4 ("[media] vcodec: mediatek: Add Mediatek JPEG Decoder Driver")
Signed-off-by: Zheng Wang <zyytlz.wz(a)163.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko(a)collabora.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
index 7c2e6a2f6c40..63165f05e123 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
@@ -1020,13 +1020,13 @@ static void mtk_jpeg_dec_device_run(void *priv)
if (ret < 0)
goto dec_end;
- schedule_delayed_work(&jpeg->job_timeout_work,
- msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
-
mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs);
if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb))
goto dec_end;
+ schedule_delayed_work(&jpeg->job_timeout_work,
+ msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
+
spin_lock_irqsave(&jpeg->hw_lock, flags);
mtk_jpeg_dec_reset(jpeg->reg_base);
mtk_jpeg_dec_set_config(jpeg->reg_base,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 206c857dd17d4d026de85866f1b5f0969f2a109e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012655-hasty-voyage-a475@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
206c857dd17d ("media: mtk-jpeg: Fix use after free bug due to error path handling in mtk_jpeg_dec_device_run")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 206c857dd17d4d026de85866f1b5f0969f2a109e Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz(a)163.com>
Date: Mon, 6 Nov 2023 15:48:10 +0100
Subject: [PATCH] media: mtk-jpeg: Fix use after free bug due to error path
handling in mtk_jpeg_dec_device_run
In mtk_jpeg_probe, &jpeg->job_timeout_work is bound with
mtk_jpeg_job_timeout_work.
In mtk_jpeg_dec_device_run, if error happens in
mtk_jpeg_set_dec_dst, it will finally start the worker while
mark the job as finished by invoking v4l2_m2m_job_finish.
There are two methods to trigger the bug. If we remove the
module, it which will call mtk_jpeg_remove to make cleanup.
The possible sequence is as follows, which will cause a
use-after-free bug.
CPU0 CPU1
mtk_jpeg_dec_... |
start worker |
|mtk_jpeg_job_timeout_work
mtk_jpeg_remove |
v4l2_m2m_release |
kfree(m2m_dev); |
|
| v4l2_m2m_get_curr_priv
| m2m_dev->curr_ctx //use
If we close the file descriptor, which will call mtk_jpeg_release,
it will have a similar sequence.
Fix this bug by starting timeout worker only if started jpegdec worker
successfully. Then v4l2_m2m_job_finish will only be called in
either mtk_jpeg_job_timeout_work or mtk_jpeg_dec_device_run.
Fixes: b2f0d2724ba4 ("[media] vcodec: mediatek: Add Mediatek JPEG Decoder Driver")
Signed-off-by: Zheng Wang <zyytlz.wz(a)163.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko(a)collabora.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
index 7c2e6a2f6c40..63165f05e123 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
@@ -1020,13 +1020,13 @@ static void mtk_jpeg_dec_device_run(void *priv)
if (ret < 0)
goto dec_end;
- schedule_delayed_work(&jpeg->job_timeout_work,
- msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
-
mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs);
if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb))
goto dec_end;
+ schedule_delayed_work(&jpeg->job_timeout_work,
+ msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
+
spin_lock_irqsave(&jpeg->hw_lock, flags);
mtk_jpeg_dec_reset(jpeg->reg_base);
mtk_jpeg_dec_set_config(jpeg->reg_base,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 206c857dd17d4d026de85866f1b5f0969f2a109e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012653-marital-palpitate-7d96@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
206c857dd17d ("media: mtk-jpeg: Fix use after free bug due to error path handling in mtk_jpeg_dec_device_run")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 206c857dd17d4d026de85866f1b5f0969f2a109e Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz(a)163.com>
Date: Mon, 6 Nov 2023 15:48:10 +0100
Subject: [PATCH] media: mtk-jpeg: Fix use after free bug due to error path
handling in mtk_jpeg_dec_device_run
In mtk_jpeg_probe, &jpeg->job_timeout_work is bound with
mtk_jpeg_job_timeout_work.
In mtk_jpeg_dec_device_run, if error happens in
mtk_jpeg_set_dec_dst, it will finally start the worker while
mark the job as finished by invoking v4l2_m2m_job_finish.
There are two methods to trigger the bug. If we remove the
module, it which will call mtk_jpeg_remove to make cleanup.
The possible sequence is as follows, which will cause a
use-after-free bug.
CPU0 CPU1
mtk_jpeg_dec_... |
start worker |
|mtk_jpeg_job_timeout_work
mtk_jpeg_remove |
v4l2_m2m_release |
kfree(m2m_dev); |
|
| v4l2_m2m_get_curr_priv
| m2m_dev->curr_ctx //use
If we close the file descriptor, which will call mtk_jpeg_release,
it will have a similar sequence.
Fix this bug by starting timeout worker only if started jpegdec worker
successfully. Then v4l2_m2m_job_finish will only be called in
either mtk_jpeg_job_timeout_work or mtk_jpeg_dec_device_run.
Fixes: b2f0d2724ba4 ("[media] vcodec: mediatek: Add Mediatek JPEG Decoder Driver")
Signed-off-by: Zheng Wang <zyytlz.wz(a)163.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko(a)collabora.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
index 7c2e6a2f6c40..63165f05e123 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
@@ -1020,13 +1020,13 @@ static void mtk_jpeg_dec_device_run(void *priv)
if (ret < 0)
goto dec_end;
- schedule_delayed_work(&jpeg->job_timeout_work,
- msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
-
mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs);
if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb))
goto dec_end;
+ schedule_delayed_work(&jpeg->job_timeout_work,
+ msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
+
spin_lock_irqsave(&jpeg->hw_lock, flags);
mtk_jpeg_dec_reset(jpeg->reg_base);
mtk_jpeg_dec_set_config(jpeg->reg_base,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0da611a8702101814257a7c03f6caf0574c83b98
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012646-blazing-family-fd71@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
0da611a87021 ("dma-buf: add dma_fence_timestamp helper")
f781f661e8c9 ("dma-buf: keep the signaling time of merged fences v3")
bafaf67c42f4 ("Revert "drm/sched: Use parent fence instead of finished"")
e4dc45b1848b ("drm/sched: Use parent fence instead of finished")
c85d00d4fd8b ("dma-buf: set signaling bit for the stub fence")
bbd60fee2d21 ("dma-buf: revert "return only unsignaled fences in dma_fence_unwrap_for_each v3"")
245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3")
01357a5a45ed ("dma-buf: cleanup dma_fence_unwrap implementation")
f778f405faa2 ("dma-buf/sync_file: cleanup fence merging a bit")
21d139d73f77 ("dma-buf/sync-file: fix logic error in new fence merge code")
519f490db07e ("dma-buf/sync-file: fix warning about fence containers")
64a8f92fd783 ("dma-buf: add dma_fence_unwrap v2")
caaf2ae712b7 ("dma-buf: Add dma_fence_array_for_each (v2)")
1d51775cd3f5 ("dma-buf: add dma_resv selftest v4")
bcf26654a38f ("drm/sched: fix the bug of time out calculation(v4)")
992c238188a8 ("dma-buf: nuke seqno-fence")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0da611a8702101814257a7c03f6caf0574c83b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com>
Date: Fri, 8 Sep 2023 10:27:23 +0200
Subject: [PATCH] dma-buf: add dma_fence_timestamp helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a fence signals there is a very small race window where the timestamp
isn't updated yet. sync_file solves this by busy waiting for the
timestamp to appear, but on other ocassions didn't handled this
correctly.
Provide a dma_fence_timestamp() helper function for this and use it in
all appropriate cases.
Another alternative would be to grab the spinlock when that happens.
v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case
the accurate timestamp is needed and/or the timestamp is not based on
ktime (e.g. hw timestamp)
v3 chk: drop the parameter again for unified handling
Signed-off-by: Yunxiang Li <Yunxiang.Li(a)amd.com>
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2")
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
CC: stable(a)vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christi…
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index c625bb2b5d56..628af51c81af 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
if (!dma_fence_is_signaled(tmp)) {
++count;
- } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
- &tmp->flags)) {
- if (ktime_after(tmp->timestamp, timestamp))
- timestamp = tmp->timestamp;
} else {
- /*
- * Use the current time if the fence is
- * currently signaling.
- */
- timestamp = ktime_get();
+ ktime_t t = dma_fence_timestamp(tmp);
+
+ if (ktime_after(t, timestamp))
+ timestamp = t;
}
}
}
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..2e9a316c596a 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence,
sizeof(info->driver_name));
info->status = dma_fence_get_status(fence);
- while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
- !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
- cpu_relax();
info->timestamp_ns =
- test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
- ktime_to_ns(fence->timestamp) :
- ktime_set(0, 0);
+ dma_fence_is_signaled(fence) ?
+ ktime_to_ns(dma_fence_timestamp(fence)) :
+ ktime_set(0, 0);
return info->status;
}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fd755e953487..99797a8c836a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -935,7 +935,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (next) {
next->s_fence->scheduled.timestamp =
- job->s_fence->finished.timestamp;
+ dma_fence_timestamp(&job->s_fence->finished);
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0d678e9a7b24..ebe78bd3d121 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence,
fence->error = error;
}
+/**
+ * dma_fence_timestamp - helper to get the completion timestamp of a fence
+ * @fence: fence to get the timestamp from.
+ *
+ * After a fence is signaled the timestamp is updated with the signaling time,
+ * but setting the timestamp can race with tasks waiting for the signaling. This
+ * helper busy waits for the correct timestamp to appear.
+ */
+static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
+{
+ if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+ return ktime_get();
+
+ while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+ cpu_relax();
+
+ return fence->timestamp;
+}
+
signed long dma_fence_wait_timeout(struct dma_fence *,
bool intr, signed long timeout);
signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0da611a8702101814257a7c03f6caf0574c83b98
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012644-unsolved-juicy-0a87@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
0da611a87021 ("dma-buf: add dma_fence_timestamp helper")
f781f661e8c9 ("dma-buf: keep the signaling time of merged fences v3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0da611a8702101814257a7c03f6caf0574c83b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com>
Date: Fri, 8 Sep 2023 10:27:23 +0200
Subject: [PATCH] dma-buf: add dma_fence_timestamp helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a fence signals there is a very small race window where the timestamp
isn't updated yet. sync_file solves this by busy waiting for the
timestamp to appear, but on other ocassions didn't handled this
correctly.
Provide a dma_fence_timestamp() helper function for this and use it in
all appropriate cases.
Another alternative would be to grab the spinlock when that happens.
v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case
the accurate timestamp is needed and/or the timestamp is not based on
ktime (e.g. hw timestamp)
v3 chk: drop the parameter again for unified handling
Signed-off-by: Yunxiang Li <Yunxiang.Li(a)amd.com>
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2")
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
CC: stable(a)vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christi…
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index c625bb2b5d56..628af51c81af 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
if (!dma_fence_is_signaled(tmp)) {
++count;
- } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
- &tmp->flags)) {
- if (ktime_after(tmp->timestamp, timestamp))
- timestamp = tmp->timestamp;
} else {
- /*
- * Use the current time if the fence is
- * currently signaling.
- */
- timestamp = ktime_get();
+ ktime_t t = dma_fence_timestamp(tmp);
+
+ if (ktime_after(t, timestamp))
+ timestamp = t;
}
}
}
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..2e9a316c596a 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence,
sizeof(info->driver_name));
info->status = dma_fence_get_status(fence);
- while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
- !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
- cpu_relax();
info->timestamp_ns =
- test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
- ktime_to_ns(fence->timestamp) :
- ktime_set(0, 0);
+ dma_fence_is_signaled(fence) ?
+ ktime_to_ns(dma_fence_timestamp(fence)) :
+ ktime_set(0, 0);
return info->status;
}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fd755e953487..99797a8c836a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -935,7 +935,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (next) {
next->s_fence->scheduled.timestamp =
- job->s_fence->finished.timestamp;
+ dma_fence_timestamp(&job->s_fence->finished);
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0d678e9a7b24..ebe78bd3d121 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence,
fence->error = error;
}
+/**
+ * dma_fence_timestamp - helper to get the completion timestamp of a fence
+ * @fence: fence to get the timestamp from.
+ *
+ * After a fence is signaled the timestamp is updated with the signaling time,
+ * but setting the timestamp can race with tasks waiting for the signaling. This
+ * helper busy waits for the correct timestamp to appear.
+ */
+static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
+{
+ if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+ return ktime_get();
+
+ while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+ cpu_relax();
+
+ return fence->timestamp;
+}
+
signed long dma_fence_wait_timeout(struct dma_fence *,
bool intr, signed long timeout);
signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 0da611a8702101814257a7c03f6caf0574c83b98
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012643-undecided-bride-578b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
0da611a87021 ("dma-buf: add dma_fence_timestamp helper")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0da611a8702101814257a7c03f6caf0574c83b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com>
Date: Fri, 8 Sep 2023 10:27:23 +0200
Subject: [PATCH] dma-buf: add dma_fence_timestamp helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a fence signals there is a very small race window where the timestamp
isn't updated yet. sync_file solves this by busy waiting for the
timestamp to appear, but on other ocassions didn't handled this
correctly.
Provide a dma_fence_timestamp() helper function for this and use it in
all appropriate cases.
Another alternative would be to grab the spinlock when that happens.
v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case
the accurate timestamp is needed and/or the timestamp is not based on
ktime (e.g. hw timestamp)
v3 chk: drop the parameter again for unified handling
Signed-off-by: Yunxiang Li <Yunxiang.Li(a)amd.com>
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2")
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
CC: stable(a)vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christi…
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index c625bb2b5d56..628af51c81af 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
if (!dma_fence_is_signaled(tmp)) {
++count;
- } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
- &tmp->flags)) {
- if (ktime_after(tmp->timestamp, timestamp))
- timestamp = tmp->timestamp;
} else {
- /*
- * Use the current time if the fence is
- * currently signaling.
- */
- timestamp = ktime_get();
+ ktime_t t = dma_fence_timestamp(tmp);
+
+ if (ktime_after(t, timestamp))
+ timestamp = t;
}
}
}
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..2e9a316c596a 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence,
sizeof(info->driver_name));
info->status = dma_fence_get_status(fence);
- while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
- !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
- cpu_relax();
info->timestamp_ns =
- test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
- ktime_to_ns(fence->timestamp) :
- ktime_set(0, 0);
+ dma_fence_is_signaled(fence) ?
+ ktime_to_ns(dma_fence_timestamp(fence)) :
+ ktime_set(0, 0);
return info->status;
}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fd755e953487..99797a8c836a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -935,7 +935,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (next) {
next->s_fence->scheduled.timestamp =
- job->s_fence->finished.timestamp;
+ dma_fence_timestamp(&job->s_fence->finished);
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0d678e9a7b24..ebe78bd3d121 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence,
fence->error = error;
}
+/**
+ * dma_fence_timestamp - helper to get the completion timestamp of a fence
+ * @fence: fence to get the timestamp from.
+ *
+ * After a fence is signaled the timestamp is updated with the signaling time,
+ * but setting the timestamp can race with tasks waiting for the signaling. This
+ * helper busy waits for the correct timestamp to appear.
+ */
+static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
+{
+ if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+ return ktime_get();
+
+ while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+ cpu_relax();
+
+ return fence->timestamp;
+}
+
signed long dma_fence_wait_timeout(struct dma_fence *,
bool intr, signed long timeout);
signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 0da611a8702101814257a7c03f6caf0574c83b98
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012642-clump-cofounder-8820@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
0da611a87021 ("dma-buf: add dma_fence_timestamp helper")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0da611a8702101814257a7c03f6caf0574c83b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com>
Date: Fri, 8 Sep 2023 10:27:23 +0200
Subject: [PATCH] dma-buf: add dma_fence_timestamp helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a fence signals there is a very small race window where the timestamp
isn't updated yet. sync_file solves this by busy waiting for the
timestamp to appear, but on other ocassions didn't handled this
correctly.
Provide a dma_fence_timestamp() helper function for this and use it in
all appropriate cases.
Another alternative would be to grab the spinlock when that happens.
v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case
the accurate timestamp is needed and/or the timestamp is not based on
ktime (e.g. hw timestamp)
v3 chk: drop the parameter again for unified handling
Signed-off-by: Yunxiang Li <Yunxiang.Li(a)amd.com>
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2")
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
CC: stable(a)vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christi…
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index c625bb2b5d56..628af51c81af 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
if (!dma_fence_is_signaled(tmp)) {
++count;
- } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
- &tmp->flags)) {
- if (ktime_after(tmp->timestamp, timestamp))
- timestamp = tmp->timestamp;
} else {
- /*
- * Use the current time if the fence is
- * currently signaling.
- */
- timestamp = ktime_get();
+ ktime_t t = dma_fence_timestamp(tmp);
+
+ if (ktime_after(t, timestamp))
+ timestamp = t;
}
}
}
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..2e9a316c596a 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence,
sizeof(info->driver_name));
info->status = dma_fence_get_status(fence);
- while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
- !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
- cpu_relax();
info->timestamp_ns =
- test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
- ktime_to_ns(fence->timestamp) :
- ktime_set(0, 0);
+ dma_fence_is_signaled(fence) ?
+ ktime_to_ns(dma_fence_timestamp(fence)) :
+ ktime_set(0, 0);
return info->status;
}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fd755e953487..99797a8c836a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -935,7 +935,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (next) {
next->s_fence->scheduled.timestamp =
- job->s_fence->finished.timestamp;
+ dma_fence_timestamp(&job->s_fence->finished);
/* start TO timer for next job */
drm_sched_start_timeout(sched);
}
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0d678e9a7b24..ebe78bd3d121 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence,
fence->error = error;
}
+/**
+ * dma_fence_timestamp - helper to get the completion timestamp of a fence
+ * @fence: fence to get the timestamp from.
+ *
+ * After a fence is signaled the timestamp is updated with the signaling time,
+ * but setting the timestamp can race with tasks waiting for the signaling. This
+ * helper busy waits for the correct timestamp to appear.
+ */
+static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
+{
+ if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+ return ktime_get();
+
+ while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+ cpu_relax();
+
+ return fence->timestamp;
+}
+
signed long dma_fence_wait_timeout(struct dma_fence *,
bool intr, signed long timeout);
signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x e9cdebbe23f1aa9a1caea169862f479ab3fa2773
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012608-anatomy-postage-9ce9@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")
dbb751ffab0b ("fs: dlm: parallelize lowcomms socket handling")
c852a6d70698 ("fs: dlm: use saved sk_error_report()")
e9dd5fd849f1 ("fs: dlm: use sock2con without checking null")
6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list")
c51c9cd8addc ("fs: dlm: don't put dlm_local_addrs on heap")
c3d88dfd1583 ("fs: dlm: cleanup listen sock handling")
4f567acb0b86 ("fs: dlm: remove socket shutdown handling")
1037c2a94ab5 ("fs: dlm: use listen sock as dlm running indicator")
194a3fb488f2 ("fs: dlm: relax sending to allow receiving")
f0f4bb431bd5 ("fs: dlm: retry accept() until -EAGAIN or error returns")
08ae0547e75e ("fs: dlm: fix sock release if listen fails")
dfc020f334f8 ("fs: dlm: fix grammar in lowcomms output")
3af2326ca0a1 ("fs: dlm: memory cache for writequeue_entry")
6c547f264077 ("fs: dlm: memory cache for midcomms hotpath")
be3b0400edbf ("fs: dlm: remove wq_alloc mutex")
92c446053814 ("fs: dlm: replace use of socket sk_callback_lock with sock_lock")
4c3d90570bcc ("fs: dlm: don't call kernel_getpeername() in error_report()")
b87b1883efe3 ("fs: dlm: remove double list_first_entry call")
9af5b8f0ead7 ("fs: dlm: add debugfs rawmsg send functionality")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e9cdebbe23f1aa9a1caea169862f479ab3fa2773 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jrife(a)google.com>
Date: Mon, 6 Nov 2023 15:24:38 -0600
Subject: [PATCH] dlm: use kernel_connect() and kernel_bind()
Recent changes to kernel_connect() and kernel_bind() ensure that
callers are insulated from changes to the address parameter made by BPF
SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and
ops->bind() with kernel_connect() and kernel_bind() to protect callers
in such cases.
Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.cam…
Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect")
Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Rife <jrife(a)google.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 67f8dd8a05ef..6296c62c10fa 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1817,8 +1817,8 @@ static int dlm_tcp_bind(struct socket *sock)
memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr));
make_sockaddr(&src_addr, 0, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
- addr_len);
+ result = kernel_bind(sock, (struct sockaddr *)&src_addr,
+ addr_len);
if (result < 0) {
/* This *may* not indicate a critical error */
log_print("could not bind for connect: %d", result);
@@ -1830,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock)
static int dlm_tcp_connect(struct connection *con, struct socket *sock,
struct sockaddr *addr, int addr_len)
{
- return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+ return kernel_connect(sock, addr, addr_len, O_NONBLOCK);
}
static int dlm_tcp_listen_validate(void)
@@ -1862,8 +1862,8 @@ static int dlm_tcp_listen_bind(struct socket *sock)
/* Bind to our port */
make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
- return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0],
- addr_len);
+ return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0],
+ addr_len);
}
static const struct dlm_proto_ops dlm_tcp_ops = {
@@ -1888,12 +1888,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
int ret;
/*
- * Make sock->ops->connect() function return in specified time,
+ * Make kernel_connect() function return in specified time,
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
sock_set_sndtimeo(sock->sk, 5);
- ret = sock->ops->connect(sock, addr, addr_len, 0);
+ ret = kernel_connect(sock, addr, addr_len, 0);
sock_set_sndtimeo(sock->sk, 0);
return ret;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e9cdebbe23f1aa9a1caea169862f479ab3fa2773
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012606-bonelike-audible-92ec@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")
dbb751ffab0b ("fs: dlm: parallelize lowcomms socket handling")
c852a6d70698 ("fs: dlm: use saved sk_error_report()")
e9dd5fd849f1 ("fs: dlm: use sock2con without checking null")
6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list")
c51c9cd8addc ("fs: dlm: don't put dlm_local_addrs on heap")
c3d88dfd1583 ("fs: dlm: cleanup listen sock handling")
4f567acb0b86 ("fs: dlm: remove socket shutdown handling")
1037c2a94ab5 ("fs: dlm: use listen sock as dlm running indicator")
194a3fb488f2 ("fs: dlm: relax sending to allow receiving")
f0f4bb431bd5 ("fs: dlm: retry accept() until -EAGAIN or error returns")
08ae0547e75e ("fs: dlm: fix sock release if listen fails")
dfc020f334f8 ("fs: dlm: fix grammar in lowcomms output")
3af2326ca0a1 ("fs: dlm: memory cache for writequeue_entry")
6c547f264077 ("fs: dlm: memory cache for midcomms hotpath")
be3b0400edbf ("fs: dlm: remove wq_alloc mutex")
92c446053814 ("fs: dlm: replace use of socket sk_callback_lock with sock_lock")
4c3d90570bcc ("fs: dlm: don't call kernel_getpeername() in error_report()")
b87b1883efe3 ("fs: dlm: remove double list_first_entry call")
9af5b8f0ead7 ("fs: dlm: add debugfs rawmsg send functionality")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e9cdebbe23f1aa9a1caea169862f479ab3fa2773 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jrife(a)google.com>
Date: Mon, 6 Nov 2023 15:24:38 -0600
Subject: [PATCH] dlm: use kernel_connect() and kernel_bind()
Recent changes to kernel_connect() and kernel_bind() ensure that
callers are insulated from changes to the address parameter made by BPF
SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and
ops->bind() with kernel_connect() and kernel_bind() to protect callers
in such cases.
Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.cam…
Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect")
Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Rife <jrife(a)google.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 67f8dd8a05ef..6296c62c10fa 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1817,8 +1817,8 @@ static int dlm_tcp_bind(struct socket *sock)
memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr));
make_sockaddr(&src_addr, 0, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
- addr_len);
+ result = kernel_bind(sock, (struct sockaddr *)&src_addr,
+ addr_len);
if (result < 0) {
/* This *may* not indicate a critical error */
log_print("could not bind for connect: %d", result);
@@ -1830,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock)
static int dlm_tcp_connect(struct connection *con, struct socket *sock,
struct sockaddr *addr, int addr_len)
{
- return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+ return kernel_connect(sock, addr, addr_len, O_NONBLOCK);
}
static int dlm_tcp_listen_validate(void)
@@ -1862,8 +1862,8 @@ static int dlm_tcp_listen_bind(struct socket *sock)
/* Bind to our port */
make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
- return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0],
- addr_len);
+ return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0],
+ addr_len);
}
static const struct dlm_proto_ops dlm_tcp_ops = {
@@ -1888,12 +1888,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
int ret;
/*
- * Make sock->ops->connect() function return in specified time,
+ * Make kernel_connect() function return in specified time,
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
sock_set_sndtimeo(sock->sk, 5);
- ret = sock->ops->connect(sock, addr, addr_len, 0);
+ ret = kernel_connect(sock, addr, addr_len, 0);
sock_set_sndtimeo(sock->sk, 0);
return ret;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e9cdebbe23f1aa9a1caea169862f479ab3fa2773
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012605-unaware-variety-77f5@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")
dbb751ffab0b ("fs: dlm: parallelize lowcomms socket handling")
c852a6d70698 ("fs: dlm: use saved sk_error_report()")
e9dd5fd849f1 ("fs: dlm: use sock2con without checking null")
6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list")
c51c9cd8addc ("fs: dlm: don't put dlm_local_addrs on heap")
c3d88dfd1583 ("fs: dlm: cleanup listen sock handling")
4f567acb0b86 ("fs: dlm: remove socket shutdown handling")
1037c2a94ab5 ("fs: dlm: use listen sock as dlm running indicator")
194a3fb488f2 ("fs: dlm: relax sending to allow receiving")
f0f4bb431bd5 ("fs: dlm: retry accept() until -EAGAIN or error returns")
08ae0547e75e ("fs: dlm: fix sock release if listen fails")
dfc020f334f8 ("fs: dlm: fix grammar in lowcomms output")
3af2326ca0a1 ("fs: dlm: memory cache for writequeue_entry")
6c547f264077 ("fs: dlm: memory cache for midcomms hotpath")
be3b0400edbf ("fs: dlm: remove wq_alloc mutex")
92c446053814 ("fs: dlm: replace use of socket sk_callback_lock with sock_lock")
4c3d90570bcc ("fs: dlm: don't call kernel_getpeername() in error_report()")
b87b1883efe3 ("fs: dlm: remove double list_first_entry call")
9af5b8f0ead7 ("fs: dlm: add debugfs rawmsg send functionality")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e9cdebbe23f1aa9a1caea169862f479ab3fa2773 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jrife(a)google.com>
Date: Mon, 6 Nov 2023 15:24:38 -0600
Subject: [PATCH] dlm: use kernel_connect() and kernel_bind()
Recent changes to kernel_connect() and kernel_bind() ensure that
callers are insulated from changes to the address parameter made by BPF
SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and
ops->bind() with kernel_connect() and kernel_bind() to protect callers
in such cases.
Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.cam…
Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect")
Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Rife <jrife(a)google.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 67f8dd8a05ef..6296c62c10fa 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1817,8 +1817,8 @@ static int dlm_tcp_bind(struct socket *sock)
memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr));
make_sockaddr(&src_addr, 0, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
- addr_len);
+ result = kernel_bind(sock, (struct sockaddr *)&src_addr,
+ addr_len);
if (result < 0) {
/* This *may* not indicate a critical error */
log_print("could not bind for connect: %d", result);
@@ -1830,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock)
static int dlm_tcp_connect(struct connection *con, struct socket *sock,
struct sockaddr *addr, int addr_len)
{
- return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+ return kernel_connect(sock, addr, addr_len, O_NONBLOCK);
}
static int dlm_tcp_listen_validate(void)
@@ -1862,8 +1862,8 @@ static int dlm_tcp_listen_bind(struct socket *sock)
/* Bind to our port */
make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
- return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0],
- addr_len);
+ return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0],
+ addr_len);
}
static const struct dlm_proto_ops dlm_tcp_ops = {
@@ -1888,12 +1888,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
int ret;
/*
- * Make sock->ops->connect() function return in specified time,
+ * Make kernel_connect() function return in specified time,
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
sock_set_sndtimeo(sock->sk, 5);
- ret = sock->ops->connect(sock, addr, addr_len, 0);
+ ret = kernel_connect(sock, addr, addr_len, 0);
sock_set_sndtimeo(sock->sk, 0);
return ret;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e9cdebbe23f1aa9a1caea169862f479ab3fa2773
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012604-antiviral-unhidden-5f9b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")
dbb751ffab0b ("fs: dlm: parallelize lowcomms socket handling")
c852a6d70698 ("fs: dlm: use saved sk_error_report()")
e9dd5fd849f1 ("fs: dlm: use sock2con without checking null")
6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list")
c51c9cd8addc ("fs: dlm: don't put dlm_local_addrs on heap")
c3d88dfd1583 ("fs: dlm: cleanup listen sock handling")
4f567acb0b86 ("fs: dlm: remove socket shutdown handling")
1037c2a94ab5 ("fs: dlm: use listen sock as dlm running indicator")
194a3fb488f2 ("fs: dlm: relax sending to allow receiving")
f0f4bb431bd5 ("fs: dlm: retry accept() until -EAGAIN or error returns")
08ae0547e75e ("fs: dlm: fix sock release if listen fails")
dfc020f334f8 ("fs: dlm: fix grammar in lowcomms output")
3af2326ca0a1 ("fs: dlm: memory cache for writequeue_entry")
6c547f264077 ("fs: dlm: memory cache for midcomms hotpath")
be3b0400edbf ("fs: dlm: remove wq_alloc mutex")
92c446053814 ("fs: dlm: replace use of socket sk_callback_lock with sock_lock")
4c3d90570bcc ("fs: dlm: don't call kernel_getpeername() in error_report()")
b87b1883efe3 ("fs: dlm: remove double list_first_entry call")
9af5b8f0ead7 ("fs: dlm: add debugfs rawmsg send functionality")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e9cdebbe23f1aa9a1caea169862f479ab3fa2773 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jrife(a)google.com>
Date: Mon, 6 Nov 2023 15:24:38 -0600
Subject: [PATCH] dlm: use kernel_connect() and kernel_bind()
Recent changes to kernel_connect() and kernel_bind() ensure that
callers are insulated from changes to the address parameter made by BPF
SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and
ops->bind() with kernel_connect() and kernel_bind() to protect callers
in such cases.
Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.cam…
Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect")
Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Rife <jrife(a)google.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 67f8dd8a05ef..6296c62c10fa 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1817,8 +1817,8 @@ static int dlm_tcp_bind(struct socket *sock)
memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr));
make_sockaddr(&src_addr, 0, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
- addr_len);
+ result = kernel_bind(sock, (struct sockaddr *)&src_addr,
+ addr_len);
if (result < 0) {
/* This *may* not indicate a critical error */
log_print("could not bind for connect: %d", result);
@@ -1830,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock)
static int dlm_tcp_connect(struct connection *con, struct socket *sock,
struct sockaddr *addr, int addr_len)
{
- return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+ return kernel_connect(sock, addr, addr_len, O_NONBLOCK);
}
static int dlm_tcp_listen_validate(void)
@@ -1862,8 +1862,8 @@ static int dlm_tcp_listen_bind(struct socket *sock)
/* Bind to our port */
make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
- return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0],
- addr_len);
+ return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0],
+ addr_len);
}
static const struct dlm_proto_ops dlm_tcp_ops = {
@@ -1888,12 +1888,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
int ret;
/*
- * Make sock->ops->connect() function return in specified time,
+ * Make kernel_connect() function return in specified time,
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
sock_set_sndtimeo(sock->sk, 5);
- ret = sock->ops->connect(sock, addr, addr_len, 0);
+ ret = kernel_connect(sock, addr, addr_len, 0);
sock_set_sndtimeo(sock->sk, 0);
return ret;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e9cdebbe23f1aa9a1caea169862f479ab3fa2773
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012603-demeanor-raider-e2cf@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")
dbb751ffab0b ("fs: dlm: parallelize lowcomms socket handling")
c852a6d70698 ("fs: dlm: use saved sk_error_report()")
e9dd5fd849f1 ("fs: dlm: use sock2con without checking null")
6f0b0b5d7ae7 ("fs: dlm: remove dlm_node_addrs lookup list")
c51c9cd8addc ("fs: dlm: don't put dlm_local_addrs on heap")
c3d88dfd1583 ("fs: dlm: cleanup listen sock handling")
4f567acb0b86 ("fs: dlm: remove socket shutdown handling")
1037c2a94ab5 ("fs: dlm: use listen sock as dlm running indicator")
194a3fb488f2 ("fs: dlm: relax sending to allow receiving")
f0f4bb431bd5 ("fs: dlm: retry accept() until -EAGAIN or error returns")
08ae0547e75e ("fs: dlm: fix sock release if listen fails")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e9cdebbe23f1aa9a1caea169862f479ab3fa2773 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jrife(a)google.com>
Date: Mon, 6 Nov 2023 15:24:38 -0600
Subject: [PATCH] dlm: use kernel_connect() and kernel_bind()
Recent changes to kernel_connect() and kernel_bind() ensure that
callers are insulated from changes to the address parameter made by BPF
SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and
ops->bind() with kernel_connect() and kernel_bind() to protect callers
in such cases.
Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.cam…
Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect")
Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Rife <jrife(a)google.com>
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 67f8dd8a05ef..6296c62c10fa 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1817,8 +1817,8 @@ static int dlm_tcp_bind(struct socket *sock)
memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr));
make_sockaddr(&src_addr, 0, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
- addr_len);
+ result = kernel_bind(sock, (struct sockaddr *)&src_addr,
+ addr_len);
if (result < 0) {
/* This *may* not indicate a critical error */
log_print("could not bind for connect: %d", result);
@@ -1830,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock)
static int dlm_tcp_connect(struct connection *con, struct socket *sock,
struct sockaddr *addr, int addr_len)
{
- return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+ return kernel_connect(sock, addr, addr_len, O_NONBLOCK);
}
static int dlm_tcp_listen_validate(void)
@@ -1862,8 +1862,8 @@ static int dlm_tcp_listen_bind(struct socket *sock)
/* Bind to our port */
make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
- return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0],
- addr_len);
+ return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0],
+ addr_len);
}
static const struct dlm_proto_ops dlm_tcp_ops = {
@@ -1888,12 +1888,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
int ret;
/*
- * Make sock->ops->connect() function return in specified time,
+ * Make kernel_connect() function return in specified time,
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
sock_set_sndtimeo(sock->sk, 5);
- ret = sock->ops->connect(sock, addr, addr_len, 0);
+ ret = kernel_connect(sock, addr, addr_len, 0);
sock_set_sndtimeo(sock->sk, 0);
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f1bb47a31dff6d4b34fb14e99850860ee74bb003
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012638-deferred-blunt-6572@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
f1bb47a31dff ("lsm: new security_file_ioctl_compat() hook")
98e828a0650f ("security: Refactor declaration of LSM hooks")
33c84e89abe4 ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f1bb47a31dff6d4b34fb14e99850860ee74bb003 Mon Sep 17 00:00:00 2001
From: Alfred Piccioni <alpic(a)google.com>
Date: Tue, 19 Dec 2023 10:09:09 +0100
Subject: [PATCH] lsm: new security_file_ioctl_compat() hook
Some ioctl commands do not require ioctl permission, but are routed to
other permissions such as FILE_GETATTR or FILE_SETATTR. This routing is
done by comparing the ioctl cmd to a set of 64-bit flags (FS_IOC_*).
However, if a 32-bit process is running on a 64-bit kernel, it emits
32-bit flags (FS_IOC32_*) for certain ioctl operations. These flags are
being checked erroneously, which leads to these ioctl operations being
routed to the ioctl permission, rather than the correct file
permissions.
This was also noted in a RED-PEN finding from a while back -
"/* RED-PEN how should LSM module know it's handling 32bit? */".
This patch introduces a new hook, security_file_ioctl_compat(), that is
called from the compat ioctl syscall. All current LSMs have been changed
to support this hook.
Reviewing the three places where we are currently using
security_file_ioctl(), it appears that only SELinux needs a dedicated
compat change; TOMOYO and SMACK appear to be functional without any
change.
Cc: stable(a)vger.kernel.org
Fixes: 0b24dcb7f2f7 ("Revert "selinux: simplify ioctl checking"")
Signed-off-by: Alfred Piccioni <alpic(a)google.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
[PM: subject tweak, line length fixes, and alignment corrections]
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f5fd99d6b0d4..76cf22ac97d7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -920,8 +920,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (!f.file)
return -EBADF;
- /* RED-PEN how should LSM module know it's handling 32bit? */
- error = security_file_ioctl(f.file, cmd, arg);
+ error = security_file_ioctl_compat(f.file, cmd, arg);
if (error)
goto out;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index c925a0d26edf..185924c56378 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -171,6 +171,8 @@ LSM_HOOK(int, 0, file_alloc_security, struct file *file)
LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
unsigned long arg)
+LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
+ unsigned long arg)
LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags)
diff --git a/include/linux/security.h b/include/linux/security.h
index 750130a7b9dd..d0eb20f90b26 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -394,6 +394,8 @@ int security_file_permission(struct file *file, int mask);
int security_file_alloc(struct file *file);
void security_file_free(struct file *file);
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int security_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg);
int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags);
int security_mmap_addr(unsigned long addr);
@@ -1002,6 +1004,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
return 0;
}
+static inline int security_file_ioctl_compat(struct file *file,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return 0;
+}
+
static inline int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags)
{
diff --git a/security/security.c b/security/security.c
index d7b15ea67c3f..69148dfc90cd 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2732,6 +2732,24 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
EXPORT_SYMBOL_GPL(security_file_ioctl);
+/**
+ * security_file_ioctl_compat() - Check if an ioctl is allowed in compat mode
+ * @file: associated file
+ * @cmd: ioctl cmd
+ * @arg: ioctl arguments
+ *
+ * Compat version of security_file_ioctl() that correctly handles 32-bit
+ * processes running on 64-bit kernels.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return call_int_hook(file_ioctl_compat, 0, file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(security_file_ioctl_compat);
+
static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
{
/*
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b340425ccfae..179540441115 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3732,6 +3732,33 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
return error;
}
+static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ /*
+ * If we are in a 64-bit kernel running 32-bit userspace, we need to
+ * make sure we don't compare 32-bit flags to 64-bit flags.
+ */
+ switch (cmd) {
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
+ break;
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
+ break;
+ case FS_IOC32_GETVERSION:
+ cmd = FS_IOC_GETVERSION;
+ break;
+ case FS_IOC32_SETVERSION:
+ cmd = FS_IOC_SETVERSION;
+ break;
+ default:
+ break;
+ }
+
+ return selinux_file_ioctl(file, cmd, arg);
+}
+
static int default_noexec __ro_after_init;
static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
@@ -7122,6 +7149,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(file_permission, selinux_file_permission),
LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 53336d7daa93..c126f6a16de4 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -5051,6 +5051,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = {
LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security),
LSM_HOOK_INIT(file_ioctl, smack_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, smack_file_ioctl),
LSM_HOOK_INIT(file_lock, smack_file_lock),
LSM_HOOK_INIT(file_fcntl, smack_file_fcntl),
LSM_HOOK_INIT(mmap_file, smack_mmap_file),
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index e10491f155a5..3c3af149bf1c 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -574,6 +574,7 @@ static struct security_hook_list tomoyo_hooks[] __ro_after_init = {
LSM_HOOK_INIT(path_rename, tomoyo_path_rename),
LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr),
LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl),
LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod),
LSM_HOOK_INIT(path_chown, tomoyo_path_chown),
LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot),
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f1bb47a31dff6d4b34fb14e99850860ee74bb003
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012636-appear-afford-1d24@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f1bb47a31dff ("lsm: new security_file_ioctl_compat() hook")
98e828a0650f ("security: Refactor declaration of LSM hooks")
33c84e89abe4 ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f1bb47a31dff6d4b34fb14e99850860ee74bb003 Mon Sep 17 00:00:00 2001
From: Alfred Piccioni <alpic(a)google.com>
Date: Tue, 19 Dec 2023 10:09:09 +0100
Subject: [PATCH] lsm: new security_file_ioctl_compat() hook
Some ioctl commands do not require ioctl permission, but are routed to
other permissions such as FILE_GETATTR or FILE_SETATTR. This routing is
done by comparing the ioctl cmd to a set of 64-bit flags (FS_IOC_*).
However, if a 32-bit process is running on a 64-bit kernel, it emits
32-bit flags (FS_IOC32_*) for certain ioctl operations. These flags are
being checked erroneously, which leads to these ioctl operations being
routed to the ioctl permission, rather than the correct file
permissions.
This was also noted in a RED-PEN finding from a while back -
"/* RED-PEN how should LSM module know it's handling 32bit? */".
This patch introduces a new hook, security_file_ioctl_compat(), that is
called from the compat ioctl syscall. All current LSMs have been changed
to support this hook.
Reviewing the three places where we are currently using
security_file_ioctl(), it appears that only SELinux needs a dedicated
compat change; TOMOYO and SMACK appear to be functional without any
change.
Cc: stable(a)vger.kernel.org
Fixes: 0b24dcb7f2f7 ("Revert "selinux: simplify ioctl checking"")
Signed-off-by: Alfred Piccioni <alpic(a)google.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
[PM: subject tweak, line length fixes, and alignment corrections]
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f5fd99d6b0d4..76cf22ac97d7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -920,8 +920,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (!f.file)
return -EBADF;
- /* RED-PEN how should LSM module know it's handling 32bit? */
- error = security_file_ioctl(f.file, cmd, arg);
+ error = security_file_ioctl_compat(f.file, cmd, arg);
if (error)
goto out;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index c925a0d26edf..185924c56378 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -171,6 +171,8 @@ LSM_HOOK(int, 0, file_alloc_security, struct file *file)
LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
unsigned long arg)
+LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
+ unsigned long arg)
LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags)
diff --git a/include/linux/security.h b/include/linux/security.h
index 750130a7b9dd..d0eb20f90b26 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -394,6 +394,8 @@ int security_file_permission(struct file *file, int mask);
int security_file_alloc(struct file *file);
void security_file_free(struct file *file);
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int security_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg);
int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags);
int security_mmap_addr(unsigned long addr);
@@ -1002,6 +1004,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
return 0;
}
+static inline int security_file_ioctl_compat(struct file *file,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return 0;
+}
+
static inline int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags)
{
diff --git a/security/security.c b/security/security.c
index d7b15ea67c3f..69148dfc90cd 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2732,6 +2732,24 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
EXPORT_SYMBOL_GPL(security_file_ioctl);
+/**
+ * security_file_ioctl_compat() - Check if an ioctl is allowed in compat mode
+ * @file: associated file
+ * @cmd: ioctl cmd
+ * @arg: ioctl arguments
+ *
+ * Compat version of security_file_ioctl() that correctly handles 32-bit
+ * processes running on 64-bit kernels.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return call_int_hook(file_ioctl_compat, 0, file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(security_file_ioctl_compat);
+
static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
{
/*
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b340425ccfae..179540441115 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3732,6 +3732,33 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
return error;
}
+static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ /*
+ * If we are in a 64-bit kernel running 32-bit userspace, we need to
+ * make sure we don't compare 32-bit flags to 64-bit flags.
+ */
+ switch (cmd) {
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
+ break;
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
+ break;
+ case FS_IOC32_GETVERSION:
+ cmd = FS_IOC_GETVERSION;
+ break;
+ case FS_IOC32_SETVERSION:
+ cmd = FS_IOC_SETVERSION;
+ break;
+ default:
+ break;
+ }
+
+ return selinux_file_ioctl(file, cmd, arg);
+}
+
static int default_noexec __ro_after_init;
static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
@@ -7122,6 +7149,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(file_permission, selinux_file_permission),
LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 53336d7daa93..c126f6a16de4 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -5051,6 +5051,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = {
LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security),
LSM_HOOK_INIT(file_ioctl, smack_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, smack_file_ioctl),
LSM_HOOK_INIT(file_lock, smack_file_lock),
LSM_HOOK_INIT(file_fcntl, smack_file_fcntl),
LSM_HOOK_INIT(mmap_file, smack_mmap_file),
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index e10491f155a5..3c3af149bf1c 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -574,6 +574,7 @@ static struct security_hook_list tomoyo_hooks[] __ro_after_init = {
LSM_HOOK_INIT(path_rename, tomoyo_path_rename),
LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr),
LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl),
LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod),
LSM_HOOK_INIT(path_chown, tomoyo_path_chown),
LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot),
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 41952be6661b20f56c2c5b06c431880dd975b747
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012657-magician-flatworm-8577@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
41952be6661b ("arm64: dts: qcom: sm6375: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 41952be6661b20f56c2c5b06c431880dd975b747 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 20 Nov 2023 17:43:29 +0100
Subject: [PATCH] arm64: dts: qcom: sm6375: fix USB wakeup interrupt types
The DP/DM wakeup interrupts are edge triggered and which edge to trigger
on depends on use-case and whether a Low speed or Full/High speed device
is connected.
Fixes: 59d34ca97f91 ("arm64: dts: qcom: Add initial device tree for SM6375")
Cc: stable(a)vger.kernel.org # 6.2
Cc: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231120164331.8116-10-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi
index 2fba0e7ea4e6..331bd98dbfde 100644
--- a/arch/arm64/boot/dts/qcom/sm6375.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi
@@ -1405,8 +1405,8 @@ usb_1: usb@4ef8800 {
interrupts = <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 93 IRQ_TYPE_EDGE_BOTH>,
+ <GIC_SPI 94 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq",
"ss_phy_irq",
"dm_hs_phy_irq",
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 41952be6661b20f56c2c5b06c431880dd975b747
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012656-surely-shamrock-7199@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
41952be6661b ("arm64: dts: qcom: sm6375: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 41952be6661b20f56c2c5b06c431880dd975b747 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 20 Nov 2023 17:43:29 +0100
Subject: [PATCH] arm64: dts: qcom: sm6375: fix USB wakeup interrupt types
The DP/DM wakeup interrupts are edge triggered and which edge to trigger
on depends on use-case and whether a Low speed or Full/High speed device
is connected.
Fixes: 59d34ca97f91 ("arm64: dts: qcom: Add initial device tree for SM6375")
Cc: stable(a)vger.kernel.org # 6.2
Cc: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231120164331.8116-10-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi
index 2fba0e7ea4e6..331bd98dbfde 100644
--- a/arch/arm64/boot/dts/qcom/sm6375.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi
@@ -1405,8 +1405,8 @@ usb_1: usb@4ef8800 {
interrupts = <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 93 IRQ_TYPE_EDGE_BOTH>,
+ <GIC_SPI 94 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq",
"ss_phy_irq",
"dm_hs_phy_irq",
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 710dd03464e4ab5b3d329768388b165d61958577
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012644-margarine-oval-e77b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
710dd03464e4 ("ARM: dts: qcom: sdx55: fix USB SS wakeup")
de95f139394a ("ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts")
d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 710dd03464e4ab5b3d329768388b165d61958577 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Wed, 13 Dec 2023 18:31:31 +0100
Subject: [PATCH] ARM: dts: qcom: sdx55: fix USB SS wakeup
The USB SS PHY interrupt needs to be provided by the PDC interrupt
controller in order to be able to wake the system up from low-power
states.
Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/20231213173131.29436-4-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index 5347519f8357..e233233c74ce 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -581,7 +581,7 @@ usb: usb@a6f8800 {
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 51 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 710dd03464e4ab5b3d329768388b165d61958577
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012643-unvaried-uninvited-23c2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
710dd03464e4 ("ARM: dts: qcom: sdx55: fix USB SS wakeup")
de95f139394a ("ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts")
d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 710dd03464e4ab5b3d329768388b165d61958577 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Wed, 13 Dec 2023 18:31:31 +0100
Subject: [PATCH] ARM: dts: qcom: sdx55: fix USB SS wakeup
The USB SS PHY interrupt needs to be provided by the PDC interrupt
controller in order to be able to wake the system up from low-power
states.
Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/20231213173131.29436-4-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index 5347519f8357..e233233c74ce 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -581,7 +581,7 @@ usb: usb@a6f8800 {
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 51 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x cc4e1da491b84ca05339a19893884cda78f74aef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012633-companion-trousers-6310@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
cc4e1da491b8 ("arm64: dts: qcom: sm8150: fix USB SS wakeup")
134de5e83177 ("arm64: dts: qcom: sm8150: fix USB DP/DM HS PHY interrupts")
54524b6987d1 ("arm64: dts: qcom: sm8150: fix USB wakeup interrupt types")
0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc4e1da491b84ca05339a19893884cda78f74aef Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Wed, 13 Dec 2023 18:34:03 +0100
Subject: [PATCH] arm64: dts: qcom: sm8150: fix USB SS wakeup
The USB SS PHY interrupts need to be provided by the PDC interrupt
controller in order to be able to wake the system up from low-power
states.
Fixes: 0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes")
Fixes: b33d2868e8d3 ("arm64: dts: qcom: sm8150: Add USB and PHY device nodes")
Cc: stable(a)vger.kernel.org # 5.10
Cc: Jack Pham <quic_jackp(a)quicinc.com>
Cc: Jonathan Marek <jonathan(a)marek.ca>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Link: https://lore.kernel.org/r/20231213173403.29544-6-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index 7f67d7d0cfff..81454442624e 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -3548,7 +3548,7 @@ usb_1: usb@a6f8800 {
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 8 IRQ_TYPE_EDGE_BOTH>,
<&pdc 9 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
@@ -3601,7 +3601,7 @@ usb_2: usb@a8f8800 {
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 7 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 134de5e831775e8b178db9b131c1d3769a766982
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012613-unsliced-alphabet-6d99@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
134de5e83177 ("arm64: dts: qcom: sm8150: fix USB DP/DM HS PHY interrupts")
54524b6987d1 ("arm64: dts: qcom: sm8150: fix USB wakeup interrupt types")
0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 134de5e831775e8b178db9b131c1d3769a766982 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Wed, 13 Dec 2023 18:34:02 +0100
Subject: [PATCH] arm64: dts: qcom: sm8150: fix USB DP/DM HS PHY interrupts
The USB DP/DM HS PHY interrupts need to be provided by the PDC interrupt
controller in order to be able to wake the system up from low-power
states and to be able to detect disconnect events, which requires
triggering on falling edges.
A recent commit updated the trigger type but failed to change the
interrupt provider as required. This leads to the current Linux driver
failing to probe instead of printing an error during suspend and USB
wakeup not working as intended.
Fixes: 54524b6987d1 ("arm64: dts: qcom: sm8150: fix USB wakeup interrupt types")
Fixes: 0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes")
Fixes: b33d2868e8d3 ("arm64: dts: qcom: sm8150: Add USB and PHY device nodes")
Cc: stable(a)vger.kernel.org # 5.10
Cc: Jack Pham <quic_jackp(a)quicinc.com>
Cc: Jonathan Marek <jonathan(a)marek.ca>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Link: https://lore.kernel.org/r/20231213173403.29544-5-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index 3283da81ee46..7f67d7d0cfff 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -3547,10 +3547,10 @@ usb_1: usb@a6f8800 {
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 488 IRQ_TYPE_EDGE_BOTH>,
- <GIC_SPI 489 IRQ_TYPE_EDGE_BOTH>;
+ interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 8 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 9 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
@@ -3600,10 +3600,10 @@ usb_2: usb@a8f8800 {
<&gcc GCC_USB30_SEC_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 490 IRQ_TYPE_EDGE_BOTH>,
- <GIC_SPI 491 IRQ_TYPE_EDGE_BOTH>;
+ interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 10 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 11 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x de95f139394a5ed82270f005bc441d2e7c1e51b7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012656-pending-same-af81@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
de95f139394a ("ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts")
d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From de95f139394a5ed82270f005bc441d2e7c1e51b7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Wed, 13 Dec 2023 18:31:30 +0100
Subject: [PATCH] ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts
The USB DP/DM HS PHY interrupts need to be provided by the PDC interrupt
controller in order to be able to wake the system up from low-power
states and to be able to detect disconnect events, which requires
triggering on falling edges.
A recent commit updated the trigger type but failed to change the
interrupt provider as required. This leads to the current Linux driver
failing to probe instead of printing an error during suspend and USB
wakeup not working as intended.
Fixes: d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/20231213173131.29436-3-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index 8934bf4ad433..5347519f8357 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -580,10 +580,10 @@ usb: usb@a6f8800 {
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 158 IRQ_TYPE_EDGE_BOTH>,
- <GIC_SPI 157 IRQ_TYPE_EDGE_BOTH>;
+ interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 11 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 10 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x de95f139394a5ed82270f005bc441d2e7c1e51b7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012654-thespian-marrow-a165@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
de95f139394a ("ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts")
d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From de95f139394a5ed82270f005bc441d2e7c1e51b7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Wed, 13 Dec 2023 18:31:30 +0100
Subject: [PATCH] ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts
The USB DP/DM HS PHY interrupts need to be provided by the PDC interrupt
controller in order to be able to wake the system up from low-power
states and to be able to detect disconnect events, which requires
triggering on falling edges.
A recent commit updated the trigger type but failed to change the
interrupt provider as required. This leads to the current Linux driver
failing to probe instead of printing an error during suspend and USB
wakeup not working as intended.
Fixes: d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/20231213173131.29436-3-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index 8934bf4ad433..5347519f8357 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -580,10 +580,10 @@ usb: usb@a6f8800 {
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 158 IRQ_TYPE_EDGE_BOTH>,
- <GIC_SPI 157 IRQ_TYPE_EDGE_BOTH>;
+ interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 11 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 10 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 9b956999bf725fd62613f719c3178fdbee6e5f47
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012626-ninja-ominous-1a86@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
9b956999bf72 ("arm64: dts: qcom: sc7180: fix USB wakeup interrupt types")
1e6e6e7a080c ("arm64: dts: qcom: sc7180: Use pdc interrupts for USB instead of GIC interrupts")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9b956999bf725fd62613f719c3178fdbee6e5f47 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 20 Nov 2023 17:43:23 +0100
Subject: [PATCH] arm64: dts: qcom: sc7180: fix USB wakeup interrupt types
The DP/DM wakeup interrupts are edge triggered and which edge to trigger
on depends on use-case and whether a Low speed or Full/High speed device
is connected.
Fixes: 0b766e7fe5a2 ("arm64: dts: qcom: sc7180: Add USB related nodes")
Cc: stable(a)vger.kernel.org # 5.10
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231120164331.8116-4-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index f0f0709718ac..4dcaa15caef2 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -2966,8 +2966,8 @@ usb_1: usb@a6f8800 {
interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 8 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 9 IRQ_TYPE_LEVEL_HIGH>;
+ <&pdc 8 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 9 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 84228d5e29dbc7a6be51e221000e1d122125826c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012606-illicitly-delegate-14d2@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
84228d5e29db ("ARM: dts: samsung: exynos4210-i9100: Unconditionally enable LDO12")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 84228d5e29dbc7a6be51e221000e1d122125826c Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul(a)crapouillou.net>
Date: Wed, 6 Dec 2023 23:15:54 +0100
Subject: [PATCH] ARM: dts: samsung: exynos4210-i9100: Unconditionally enable
LDO12
The kernel hangs for a good 12 seconds without any info being printed to
dmesg, very early in the boot process, if this regulator is not enabled.
Force-enable it to work around this issue, until we know more about the
underlying problem.
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
Fixes: 8620cc2f99b7 ("ARM: dts: exynos: Add devicetree file for the Galaxy S2")
Cc: stable(a)vger.kernel.org # v5.8+
Link: https://lore.kernel.org/r/20231206221556.15348-2-paul@crapouillou.net
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
index a9ec1f6c1dea..a076a1dfe41f 100644
--- a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
+++ b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
@@ -527,6 +527,14 @@ vtcam_reg: LDO12 {
regulator-name = "VT_CAM_1.8V";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
+
+ /*
+ * Force-enable this regulator; otherwise the
+ * kernel hangs very early in the boot process
+ * for about 12 seconds, without apparent
+ * reason.
+ */
+ regulator-always-on;
};
vcclcd_reg: LDO13 {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 84228d5e29dbc7a6be51e221000e1d122125826c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012605-dangling-sprout-0d8a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
84228d5e29db ("ARM: dts: samsung: exynos4210-i9100: Unconditionally enable LDO12")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 84228d5e29dbc7a6be51e221000e1d122125826c Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul(a)crapouillou.net>
Date: Wed, 6 Dec 2023 23:15:54 +0100
Subject: [PATCH] ARM: dts: samsung: exynos4210-i9100: Unconditionally enable
LDO12
The kernel hangs for a good 12 seconds without any info being printed to
dmesg, very early in the boot process, if this regulator is not enabled.
Force-enable it to work around this issue, until we know more about the
underlying problem.
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
Fixes: 8620cc2f99b7 ("ARM: dts: exynos: Add devicetree file for the Galaxy S2")
Cc: stable(a)vger.kernel.org # v5.8+
Link: https://lore.kernel.org/r/20231206221556.15348-2-paul@crapouillou.net
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
index a9ec1f6c1dea..a076a1dfe41f 100644
--- a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
+++ b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
@@ -527,6 +527,14 @@ vtcam_reg: LDO12 {
regulator-name = "VT_CAM_1.8V";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
+
+ /*
+ * Force-enable this regulator; otherwise the
+ * kernel hangs very early in the boot process
+ * for about 12 seconds, without apparent
+ * reason.
+ */
+ regulator-always-on;
};
vcclcd_reg: LDO13 {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 84228d5e29dbc7a6be51e221000e1d122125826c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012604-identify-helpline-73f8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
84228d5e29db ("ARM: dts: samsung: exynos4210-i9100: Unconditionally enable LDO12")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 84228d5e29dbc7a6be51e221000e1d122125826c Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul(a)crapouillou.net>
Date: Wed, 6 Dec 2023 23:15:54 +0100
Subject: [PATCH] ARM: dts: samsung: exynos4210-i9100: Unconditionally enable
LDO12
The kernel hangs for a good 12 seconds without any info being printed to
dmesg, very early in the boot process, if this regulator is not enabled.
Force-enable it to work around this issue, until we know more about the
underlying problem.
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
Fixes: 8620cc2f99b7 ("ARM: dts: exynos: Add devicetree file for the Galaxy S2")
Cc: stable(a)vger.kernel.org # v5.8+
Link: https://lore.kernel.org/r/20231206221556.15348-2-paul@crapouillou.net
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
index a9ec1f6c1dea..a076a1dfe41f 100644
--- a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
+++ b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts
@@ -527,6 +527,14 @@ vtcam_reg: LDO12 {
regulator-name = "VT_CAM_1.8V";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
+
+ /*
+ * Force-enable this regulator; otherwise the
+ * kernel hangs very early in the boot process
+ * for about 12 seconds, without apparent
+ * reason.
+ */
+ regulator-always-on;
};
vcclcd_reg: LDO13 {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d0ec3c4c11c3b30e1f2d344973b2a7bf0f986734
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012648-balance-makeshift-23a6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d0ec3c4c11c3b30e1f2d344973b2a7bf0f986734 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 20 Nov 2023 17:43:21 +0100
Subject: [PATCH] ARM: dts: qcom: sdx55: fix USB wakeup interrupt types
The DP/DM wakeup interrupts are edge triggered and which edge to trigger
on depends on use-case and whether a Low speed or Full/High speed device
is connected.
Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231120164331.8116-2-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index f604a27f50be..0fe220408888 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -582,8 +582,8 @@ usb: usb@a6f8800 {
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 158 IRQ_TYPE_EDGE_BOTH>,
+ <GIC_SPI 157 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d0ec3c4c11c3b30e1f2d344973b2a7bf0f986734
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012645-attic-doorstop-7a33@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d0ec3c4c11c3b30e1f2d344973b2a7bf0f986734 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 20 Nov 2023 17:43:21 +0100
Subject: [PATCH] ARM: dts: qcom: sdx55: fix USB wakeup interrupt types
The DP/DM wakeup interrupts are edge triggered and which edge to trigger
on depends on use-case and whether a Low speed or Full/High speed device
is connected.
Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231120164331.8116-2-johan+linaro@kernel.org
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index f604a27f50be..0fe220408888 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -582,8 +582,8 @@ usb: usb@a6f8800 {
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 158 IRQ_TYPE_EDGE_BOTH>,
+ <GIC_SPI 157 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
"dm_hs_phy_irq", "dp_hs_phy_irq";
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e95aada4cb93d42e25c30a0ef9eb2923d9711d4a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012610-lustiness-grew-ee5c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e95aada4cb93 ("pipe: wakeup wr_wait after setting max_usage")
189b0ddc2451 ("pipe: Fix missing lock in pipe_resize_ring()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95aada4cb93d42e25c30a0ef9eb2923d9711d4a Mon Sep 17 00:00:00 2001
From: Lukas Schauer <lukas(a)schauer.dev>
Date: Fri, 1 Dec 2023 11:11:28 +0100
Subject: [PATCH] pipe: wakeup wr_wait after setting max_usage
Commit c73be61cede5 ("pipe: Add general notification queue support") a
regression was introduced that would lock up resized pipes under certain
conditions. See the reproducer in [1].
The commit resizing the pipe ring size was moved to a different
function, doing that moved the wakeup for pipe->wr_wait before actually
raising pipe->max_usage. If a pipe was full before the resize occured it
would result in the wakeup never actually triggering pipe_write.
Set @max_usage and @nr_accounted before waking writers if this isn't a
watch queue.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=212295 [1]
Link: https://lore.kernel.org/r/20231201-orchideen-modewelt-e009de4562c6@brauner
Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reviewed-by: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Lukas Schauer <lukas(a)schauer.dev>
[Christian Brauner <brauner(a)kernel.org>: rewrite to account for watch queues]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/pipe.c b/fs/pipe.c
index 226e7f66b590..8d9286a1f2e8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1324,6 +1324,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;
+ if (!pipe_has_watch_queue(pipe)) {
+ pipe->max_usage = nr_slots;
+ pipe->nr_accounted = nr_slots;
+ }
+
spin_unlock_irq(&pipe->rd_wait.lock);
/* This might have made more room for writers */
@@ -1375,8 +1380,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
if (ret < 0)
goto out_revert_acct;
- pipe->max_usage = nr_slots;
- pipe->nr_accounted = nr_slots;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e95aada4cb93d42e25c30a0ef9eb2923d9711d4a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012609-rejoice-hardship-fbe8@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e95aada4cb93 ("pipe: wakeup wr_wait after setting max_usage")
189b0ddc2451 ("pipe: Fix missing lock in pipe_resize_ring()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95aada4cb93d42e25c30a0ef9eb2923d9711d4a Mon Sep 17 00:00:00 2001
From: Lukas Schauer <lukas(a)schauer.dev>
Date: Fri, 1 Dec 2023 11:11:28 +0100
Subject: [PATCH] pipe: wakeup wr_wait after setting max_usage
Commit c73be61cede5 ("pipe: Add general notification queue support") a
regression was introduced that would lock up resized pipes under certain
conditions. See the reproducer in [1].
The commit resizing the pipe ring size was moved to a different
function, doing that moved the wakeup for pipe->wr_wait before actually
raising pipe->max_usage. If a pipe was full before the resize occured it
would result in the wakeup never actually triggering pipe_write.
Set @max_usage and @nr_accounted before waking writers if this isn't a
watch queue.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=212295 [1]
Link: https://lore.kernel.org/r/20231201-orchideen-modewelt-e009de4562c6@brauner
Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reviewed-by: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Lukas Schauer <lukas(a)schauer.dev>
[Christian Brauner <brauner(a)kernel.org>: rewrite to account for watch queues]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/pipe.c b/fs/pipe.c
index 226e7f66b590..8d9286a1f2e8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1324,6 +1324,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;
+ if (!pipe_has_watch_queue(pipe)) {
+ pipe->max_usage = nr_slots;
+ pipe->nr_accounted = nr_slots;
+ }
+
spin_unlock_irq(&pipe->rd_wait.lock);
/* This might have made more room for writers */
@@ -1375,8 +1380,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
if (ret < 0)
goto out_revert_acct;
- pipe->max_usage = nr_slots;
- pipe->nr_accounted = nr_slots;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e95aada4cb93d42e25c30a0ef9eb2923d9711d4a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012607-thorn-catty-17f8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
e95aada4cb93 ("pipe: wakeup wr_wait after setting max_usage")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95aada4cb93d42e25c30a0ef9eb2923d9711d4a Mon Sep 17 00:00:00 2001
From: Lukas Schauer <lukas(a)schauer.dev>
Date: Fri, 1 Dec 2023 11:11:28 +0100
Subject: [PATCH] pipe: wakeup wr_wait after setting max_usage
Commit c73be61cede5 ("pipe: Add general notification queue support") a
regression was introduced that would lock up resized pipes under certain
conditions. See the reproducer in [1].
The commit resizing the pipe ring size was moved to a different
function, doing that moved the wakeup for pipe->wr_wait before actually
raising pipe->max_usage. If a pipe was full before the resize occured it
would result in the wakeup never actually triggering pipe_write.
Set @max_usage and @nr_accounted before waking writers if this isn't a
watch queue.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=212295 [1]
Link: https://lore.kernel.org/r/20231201-orchideen-modewelt-e009de4562c6@brauner
Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reviewed-by: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Lukas Schauer <lukas(a)schauer.dev>
[Christian Brauner <brauner(a)kernel.org>: rewrite to account for watch queues]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/pipe.c b/fs/pipe.c
index 226e7f66b590..8d9286a1f2e8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1324,6 +1324,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;
+ if (!pipe_has_watch_queue(pipe)) {
+ pipe->max_usage = nr_slots;
+ pipe->nr_accounted = nr_slots;
+ }
+
spin_unlock_irq(&pipe->rd_wait.lock);
/* This might have made more room for writers */
@@ -1375,8 +1380,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
if (ret < 0)
goto out_revert_acct;
- pipe->max_usage = nr_slots;
- pipe->nr_accounted = nr_slots;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x e95aada4cb93d42e25c30a0ef9eb2923d9711d4a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012606-expediter-rental-fd22@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
e95aada4cb93 ("pipe: wakeup wr_wait after setting max_usage")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e95aada4cb93d42e25c30a0ef9eb2923d9711d4a Mon Sep 17 00:00:00 2001
From: Lukas Schauer <lukas(a)schauer.dev>
Date: Fri, 1 Dec 2023 11:11:28 +0100
Subject: [PATCH] pipe: wakeup wr_wait after setting max_usage
Commit c73be61cede5 ("pipe: Add general notification queue support") a
regression was introduced that would lock up resized pipes under certain
conditions. See the reproducer in [1].
The commit resizing the pipe ring size was moved to a different
function, doing that moved the wakeup for pipe->wr_wait before actually
raising pipe->max_usage. If a pipe was full before the resize occured it
would result in the wakeup never actually triggering pipe_write.
Set @max_usage and @nr_accounted before waking writers if this isn't a
watch queue.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=212295 [1]
Link: https://lore.kernel.org/r/20231201-orchideen-modewelt-e009de4562c6@brauner
Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reviewed-by: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Lukas Schauer <lukas(a)schauer.dev>
[Christian Brauner <brauner(a)kernel.org>: rewrite to account for watch queues]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/pipe.c b/fs/pipe.c
index 226e7f66b590..8d9286a1f2e8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1324,6 +1324,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;
+ if (!pipe_has_watch_queue(pipe)) {
+ pipe->max_usage = nr_slots;
+ pipe->nr_accounted = nr_slots;
+ }
+
spin_unlock_irq(&pipe->rd_wait.lock);
/* This might have made more room for writers */
@@ -1375,8 +1380,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
if (ret < 0)
goto out_revert_acct;
- pipe->max_usage = nr_slots;
- pipe->nr_accounted = nr_slots;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 97566d09fd02d2ab329774bb89a2cdf2267e86d9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012623-affront-italics-53d9@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
97566d09fd02 ("thermal: intel: hfi: Add syscore callbacks for system-wide PM")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 97566d09fd02d2ab329774bb89a2cdf2267e86d9 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
Date: Tue, 9 Jan 2024 19:07:04 -0800
Subject: [PATCH] thermal: intel: hfi: Add syscore callbacks for system-wide PM
The kernel allocates a memory buffer and provides its location to the
hardware, which uses it to update the HFI table. This allocation occurs
during boot and remains constant throughout runtime.
When resuming from hibernation, the restore kernel allocates a second
memory buffer and reprograms the HFI hardware with the new location as
part of a normal boot. The location of the second memory buffer may
differ from the one allocated by the image kernel.
When the restore kernel transfers control to the image kernel, its HFI
buffer becomes invalid, potentially leading to memory corruption if the
hardware writes to it (the hardware continues to use the buffer from the
restore kernel).
It is also possible that the hardware "forgets" the address of the memory
buffer when resuming from "deep" suspend. Memory corruption may also occur
in such a scenario.
To prevent the described memory corruption, disable HFI when preparing to
suspend or hibernate. Enable it when resuming.
Add syscore callbacks to handle the package of the boot CPU (packages of
non-boot CPUs are handled via CPU offline). Syscore ops always run on the
boot CPU. Additionally, HFI only needs to be disabled during "deep" suspend
and hibernation. Syscore ops only run in these cases.
Cc: 6.1+ <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
[ rjw: Comment adjustment, subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 22445403b520..3b04c6ec4fca 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -35,7 +35,9 @@
#include <linux/processor.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <linux/topology.h>
#include <linux/workqueue.h>
@@ -571,6 +573,30 @@ static __init int hfi_parse_features(void)
return 0;
}
+static void hfi_do_enable(void)
+{
+ /* This code runs only on the boot CPU. */
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ struct hfi_instance *hfi_instance = info->hfi_instance;
+
+ /* No locking needed. There is no concurrency with CPU online. */
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+}
+
+static int hfi_do_disable(void)
+{
+ /* No locking needed. There is no concurrency with CPU offline. */
+ hfi_disable();
+
+ return 0;
+}
+
+static struct syscore_ops hfi_pm_ops = {
+ .resume = hfi_do_enable,
+ .suspend = hfi_do_disable,
+};
+
void __init intel_hfi_init(void)
{
struct hfi_instance *hfi_instance;
@@ -602,6 +628,8 @@ void __init intel_hfi_init(void)
if (!hfi_updates_wq)
goto err_nomem;
+ register_syscore_ops(&hfi_pm_ops);
+
return;
err_nomem:
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 97566d09fd02d2ab329774bb89a2cdf2267e86d9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012622-prudishly-removing-24fc@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
97566d09fd02 ("thermal: intel: hfi: Add syscore callbacks for system-wide PM")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 97566d09fd02d2ab329774bb89a2cdf2267e86d9 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
Date: Tue, 9 Jan 2024 19:07:04 -0800
Subject: [PATCH] thermal: intel: hfi: Add syscore callbacks for system-wide PM
The kernel allocates a memory buffer and provides its location to the
hardware, which uses it to update the HFI table. This allocation occurs
during boot and remains constant throughout runtime.
When resuming from hibernation, the restore kernel allocates a second
memory buffer and reprograms the HFI hardware with the new location as
part of a normal boot. The location of the second memory buffer may
differ from the one allocated by the image kernel.
When the restore kernel transfers control to the image kernel, its HFI
buffer becomes invalid, potentially leading to memory corruption if the
hardware writes to it (the hardware continues to use the buffer from the
restore kernel).
It is also possible that the hardware "forgets" the address of the memory
buffer when resuming from "deep" suspend. Memory corruption may also occur
in such a scenario.
To prevent the described memory corruption, disable HFI when preparing to
suspend or hibernate. Enable it when resuming.
Add syscore callbacks to handle the package of the boot CPU (packages of
non-boot CPUs are handled via CPU offline). Syscore ops always run on the
boot CPU. Additionally, HFI only needs to be disabled during "deep" suspend
and hibernation. Syscore ops only run in these cases.
Cc: 6.1+ <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
[ rjw: Comment adjustment, subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 22445403b520..3b04c6ec4fca 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -35,7 +35,9 @@
#include <linux/processor.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <linux/topology.h>
#include <linux/workqueue.h>
@@ -571,6 +573,30 @@ static __init int hfi_parse_features(void)
return 0;
}
+static void hfi_do_enable(void)
+{
+ /* This code runs only on the boot CPU. */
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ struct hfi_instance *hfi_instance = info->hfi_instance;
+
+ /* No locking needed. There is no concurrency with CPU online. */
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+}
+
+static int hfi_do_disable(void)
+{
+ /* No locking needed. There is no concurrency with CPU offline. */
+ hfi_disable();
+
+ return 0;
+}
+
+static struct syscore_ops hfi_pm_ops = {
+ .resume = hfi_do_enable,
+ .suspend = hfi_do_disable,
+};
+
void __init intel_hfi_init(void)
{
struct hfi_instance *hfi_instance;
@@ -602,6 +628,8 @@ void __init intel_hfi_init(void)
if (!hfi_updates_wq)
goto err_nomem;
+ register_syscore_ops(&hfi_pm_ops);
+
return;
err_nomem:
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 97566d09fd02d2ab329774bb89a2cdf2267e86d9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012620-dubiously-snowless-f0b1@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
97566d09fd02 ("thermal: intel: hfi: Add syscore callbacks for system-wide PM")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 97566d09fd02d2ab329774bb89a2cdf2267e86d9 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
Date: Tue, 9 Jan 2024 19:07:04 -0800
Subject: [PATCH] thermal: intel: hfi: Add syscore callbacks for system-wide PM
The kernel allocates a memory buffer and provides its location to the
hardware, which uses it to update the HFI table. This allocation occurs
during boot and remains constant throughout runtime.
When resuming from hibernation, the restore kernel allocates a second
memory buffer and reprograms the HFI hardware with the new location as
part of a normal boot. The location of the second memory buffer may
differ from the one allocated by the image kernel.
When the restore kernel transfers control to the image kernel, its HFI
buffer becomes invalid, potentially leading to memory corruption if the
hardware writes to it (the hardware continues to use the buffer from the
restore kernel).
It is also possible that the hardware "forgets" the address of the memory
buffer when resuming from "deep" suspend. Memory corruption may also occur
in such a scenario.
To prevent the described memory corruption, disable HFI when preparing to
suspend or hibernate. Enable it when resuming.
Add syscore callbacks to handle the package of the boot CPU (packages of
non-boot CPUs are handled via CPU offline). Syscore ops always run on the
boot CPU. Additionally, HFI only needs to be disabled during "deep" suspend
and hibernation. Syscore ops only run in these cases.
Cc: 6.1+ <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
[ rjw: Comment adjustment, subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 22445403b520..3b04c6ec4fca 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -35,7 +35,9 @@
#include <linux/processor.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <linux/topology.h>
#include <linux/workqueue.h>
@@ -571,6 +573,30 @@ static __init int hfi_parse_features(void)
return 0;
}
+static void hfi_do_enable(void)
+{
+ /* This code runs only on the boot CPU. */
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ struct hfi_instance *hfi_instance = info->hfi_instance;
+
+ /* No locking needed. There is no concurrency with CPU online. */
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+}
+
+static int hfi_do_disable(void)
+{
+ /* No locking needed. There is no concurrency with CPU offline. */
+ hfi_disable();
+
+ return 0;
+}
+
+static struct syscore_ops hfi_pm_ops = {
+ .resume = hfi_do_enable,
+ .suspend = hfi_do_disable,
+};
+
void __init intel_hfi_init(void)
{
struct hfi_instance *hfi_instance;
@@ -602,6 +628,8 @@ void __init intel_hfi_init(void)
if (!hfi_updates_wq)
goto err_nomem;
+ register_syscore_ops(&hfi_pm_ops);
+
return;
err_nomem:
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x eff9704f5332a13b08fbdbe0f84059c9e7051d5f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012630-reference-stumbling-1d52@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
eff9704f5332 ("bus: mhi: host: Add alignment check for event ring read pointer")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eff9704f5332a13b08fbdbe0f84059c9e7051d5f Mon Sep 17 00:00:00 2001
From: Krishna chaitanya chundru <quic_krichai(a)quicinc.com>
Date: Tue, 31 Oct 2023 15:21:05 +0530
Subject: [PATCH] bus: mhi: host: Add alignment check for event ring read
pointer
Though we do check the event ring read pointer by "is_valid_ring_ptr"
to make sure it is in the buffer range, but there is another risk the
pointer may be not aligned. Since we are expecting event ring elements
are 128 bits(struct mhi_ring_element) aligned, an unaligned read pointer
could lead to multiple issues like DoS or ring buffer memory corruption.
So add a alignment check for event ring read pointer.
Fixes: ec32332df764 ("bus: mhi: core: Sanity check values from remote device before use")
cc: stable(a)vger.kernel.org
Signed-off-by: Krishna chaitanya chundru <quic_krichai(a)quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/20231031-alignment_check-v2-1-1441db7c5efd@quicin…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c
index 6cf11457380b..d80975f4bba8 100644
--- a/drivers/bus/mhi/host/main.c
+++ b/drivers/bus/mhi/host/main.c
@@ -269,7 +269,8 @@ static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl,
static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr)
{
- return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len;
+ return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len &&
+ !(addr & (sizeof(struct mhi_ring_element) - 1));
}
int mhi_destroy_device(struct device *dev, void *data)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x eff9704f5332a13b08fbdbe0f84059c9e7051d5f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012629-motto-fidgeting-3df6@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
eff9704f5332 ("bus: mhi: host: Add alignment check for event ring read pointer")
a0f5a630668c ("bus: mhi: Move host MHI code to "host" directory")
ec32332df764 ("bus: mhi: core: Sanity check values from remote device before use")
855a70c12021 ("bus: mhi: Add MHI PCI support for WWAN modems")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eff9704f5332a13b08fbdbe0f84059c9e7051d5f Mon Sep 17 00:00:00 2001
From: Krishna chaitanya chundru <quic_krichai(a)quicinc.com>
Date: Tue, 31 Oct 2023 15:21:05 +0530
Subject: [PATCH] bus: mhi: host: Add alignment check for event ring read
pointer
Though we do check the event ring read pointer by "is_valid_ring_ptr"
to make sure it is in the buffer range, but there is another risk the
pointer may be not aligned. Since we are expecting event ring elements
are 128 bits(struct mhi_ring_element) aligned, an unaligned read pointer
could lead to multiple issues like DoS or ring buffer memory corruption.
So add a alignment check for event ring read pointer.
Fixes: ec32332df764 ("bus: mhi: core: Sanity check values from remote device before use")
cc: stable(a)vger.kernel.org
Signed-off-by: Krishna chaitanya chundru <quic_krichai(a)quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/20231031-alignment_check-v2-1-1441db7c5efd@quicin…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c
index 6cf11457380b..d80975f4bba8 100644
--- a/drivers/bus/mhi/host/main.c
+++ b/drivers/bus/mhi/host/main.c
@@ -269,7 +269,8 @@ static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl,
static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr)
{
- return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len;
+ return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len &&
+ !(addr & (sizeof(struct mhi_ring_element) - 1));
}
int mhi_destroy_device(struct device *dev, void *data)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b89b6a863dd53bc70d8e52d50f9cfaef8ef5e9c9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012614-satiable-cheese-4655@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
b89b6a863dd5 ("bus: mhi: host: Add spinlock to protect WP access when queueing TREs")
a0f5a630668c ("bus: mhi: Move host MHI code to "host" directory")
4547a749be99 ("bus: mhi: core: Fix MHI runtime_pm behavior")
68731852f6e5 ("bus: mhi: core: Return EAGAIN if MHI ring is full")
a8f75cb348fd ("mhi: core: Factorize mhi queuing")
855a70c12021 ("bus: mhi: Add MHI PCI support for WWAN modems")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b89b6a863dd53bc70d8e52d50f9cfaef8ef5e9c9 Mon Sep 17 00:00:00 2001
From: Bhaumik Bhatt <bbhatt(a)codeaurora.org>
Date: Mon, 11 Dec 2023 14:42:51 +0800
Subject: [PATCH] bus: mhi: host: Add spinlock to protect WP access when
queueing TREs
Protect WP accesses such that multiple threads queueing buffers for
incoming data do not race.
Meanwhile, if CONFIG_TRACE_IRQFLAGS is enabled, irq will be enabled once
__local_bh_enable_ip is called as part of write_unlock_bh. Hence, let's
take irqsave lock after TRE is generated to avoid running write_unlock_bh
when irqsave lock is held.
Cc: stable(a)vger.kernel.org
Fixes: 189ff97cca53 ("bus: mhi: core: Add support for data transfer")
Signed-off-by: Bhaumik Bhatt <bbhatt(a)codeaurora.org>
Signed-off-by: Qiang Yu <quic_qianyu(a)quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
Tested-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Link: https://lore.kernel.org/r/1702276972-41296-2-git-send-email-quic_qianyu@qui…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c
index d80975f4bba8..ad7807e4b523 100644
--- a/drivers/bus/mhi/host/main.c
+++ b/drivers/bus/mhi/host/main.c
@@ -1124,17 +1124,15 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info,
if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
return -EIO;
- read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
-
ret = mhi_is_ring_full(mhi_cntrl, tre_ring);
- if (unlikely(ret)) {
- ret = -EAGAIN;
- goto exit_unlock;
- }
+ if (unlikely(ret))
+ return -EAGAIN;
ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags);
if (unlikely(ret))
- goto exit_unlock;
+ return ret;
+
+ read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
/* Packet is queued, take a usage ref to exit M3 if necessary
* for host->device buffer, balanced put is done on buffer completion
@@ -1154,7 +1152,6 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info,
if (dir == DMA_FROM_DEVICE)
mhi_cntrl->runtime_put(mhi_cntrl);
-exit_unlock:
read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
return ret;
@@ -1206,6 +1203,9 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
int eot, eob, chain, bei;
int ret;
+ /* Protect accesses for reading and incrementing WP */
+ write_lock_bh(&mhi_chan->lock);
+
buf_ring = &mhi_chan->buf_ring;
tre_ring = &mhi_chan->tre_ring;
@@ -1223,8 +1223,10 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
if (!info->pre_mapped) {
ret = mhi_cntrl->map_single(mhi_cntrl, buf_info);
- if (ret)
+ if (ret) {
+ write_unlock_bh(&mhi_chan->lock);
return ret;
+ }
}
eob = !!(flags & MHI_EOB);
@@ -1241,6 +1243,8 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
mhi_add_ring_element(mhi_cntrl, tre_ring);
mhi_add_ring_element(mhi_cntrl, buf_ring);
+ write_unlock_bh(&mhi_chan->lock);
+
return 0;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b9bd10c43456d16abd97b717446f51afb3b88411
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012607-barley-trickster-1bf8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
b9bd10c43456 ("s390/vfio-ap: do not reset queue removed from host config")
f009cfa46655 ("s390/vfio-ap: reset queues associated with adapter for queue unbound from driver")
f848cba767e5 ("s390/vfio-ap: reset queues filtered from the guest's AP config")
774d10196e64 ("s390/vfio-ap: let on_scan_complete() callback filter matrix and update guest's APCB")
850fb7fa8c68 ("s390/vfio-ap: always filter entire AP matrix")
9261f0438835 ("s390/vfio-ap: use work struct to verify queue reset")
62aab082e999 ("s390/vfio-ap: store entire AP queue status word with the queue object")
dd174833e44e ("s390/vfio-ap: remove upper limit on wait for queue reset to complete")
c51f8c6bb5c8 ("s390/vfio-ap: allow deconfigured queue to be passed through to a guest")
411b0109daa5 ("s390/vfio-ap: wait for response code 05 to clear on queue reset")
7aa7b2a80cb7 ("s390/vfio-ap: clean up irq resources if possible")
680b7ddd7e2a ("s390/vfio-ap: no need to check the 'E' and 'I' bits in APQSW after TAPQ")
4bdf3c3956d8 ("s390/ap: provide F bit parameter for ap_rapq() and ap_zapq()")
7cb7636a1ac1 ("s390/vfio_ap: increase max wait time for reset verification")
51d4d9877087 ("s390/vfio_ap: fix handling of error response codes")
5a42b348adf9 ("s390/vfio_ap: verify ZAPQ completion after return of response code zero")
3ba41768105c ("s390/vfio_ap: use TAPQ to verify reset in progress completes")
0daf9878a799 ("s390/vfio_ap: check TAPQ response code when waiting for queue reset")
62414d901c3a ("s390/vfio-ap: verify reset complete in separate function")
08866d34c709 ("s390/vfio-ap: fix an error handling path in vfio_ap_mdev_probe_queue()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b9bd10c43456d16abd97b717446f51afb3b88411 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak(a)linux.ibm.com>
Date: Mon, 15 Jan 2024 13:54:36 -0500
Subject: [PATCH] s390/vfio-ap: do not reset queue removed from host config
When a queue is unbound from the vfio_ap device driver, it is reset to
ensure its crypto data is not leaked when it is bound to another device
driver. If the queue is unbound due to the fact that the adapter or domain
was removed from the host's AP configuration, then attempting to reset it
will fail with response code 01 (APID not valid) getting returned from the
reset command. Let's ensure that the queue is assigned to the host's
configuration before resetting it.
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Reviewed-by: "Jason J. Herne" <jjherne(a)linux.ibm.com>
Reviewed-by: Halil Pasic <pasic(a)linux.ibm.com>
Fixes: eeb386aeb5b7 ("s390/vfio-ap: handle config changed and scan complete notification")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240115185441.31526-7-akrowiak@linux.ibm.com
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 550c936c413d..983b3b16196c 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -2215,10 +2215,10 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
q = dev_get_drvdata(&apdev->device);
get_update_locks_for_queue(q);
matrix_mdev = q->matrix_mdev;
+ apid = AP_QID_CARD(q->apqn);
+ apqi = AP_QID_QUEUE(q->apqn);
if (matrix_mdev) {
- apid = AP_QID_CARD(q->apqn);
- apqi = AP_QID_QUEUE(q->apqn);
/* If the queue is assigned to the guest's AP configuration */
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
@@ -2234,8 +2234,16 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
}
}
- vfio_ap_mdev_reset_queue(q);
- flush_work(&q->reset_work);
+ /*
+ * If the queue is not in the host's AP configuration, then resetting
+ * it will fail with response code 01, (APQN not valid); so, let's make
+ * sure it is in the host's config.
+ */
+ if (test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm) &&
+ test_bit_inv(apqi, (unsigned long *)matrix_dev->info.aqm)) {
+ vfio_ap_mdev_reset_queue(q);
+ flush_work(&q->reset_work);
+ }
done:
if (matrix_mdev)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f009cfa466558b7dfe97f167ba1875d6f9ea4c07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012656-cider-favorable-b52e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
f009cfa46655 ("s390/vfio-ap: reset queues associated with adapter for queue unbound from driver")
f848cba767e5 ("s390/vfio-ap: reset queues filtered from the guest's AP config")
774d10196e64 ("s390/vfio-ap: let on_scan_complete() callback filter matrix and update guest's APCB")
850fb7fa8c68 ("s390/vfio-ap: always filter entire AP matrix")
9261f0438835 ("s390/vfio-ap: use work struct to verify queue reset")
62aab082e999 ("s390/vfio-ap: store entire AP queue status word with the queue object")
dd174833e44e ("s390/vfio-ap: remove upper limit on wait for queue reset to complete")
c51f8c6bb5c8 ("s390/vfio-ap: allow deconfigured queue to be passed through to a guest")
411b0109daa5 ("s390/vfio-ap: wait for response code 05 to clear on queue reset")
7aa7b2a80cb7 ("s390/vfio-ap: clean up irq resources if possible")
680b7ddd7e2a ("s390/vfio-ap: no need to check the 'E' and 'I' bits in APQSW after TAPQ")
4bdf3c3956d8 ("s390/ap: provide F bit parameter for ap_rapq() and ap_zapq()")
7cb7636a1ac1 ("s390/vfio_ap: increase max wait time for reset verification")
51d4d9877087 ("s390/vfio_ap: fix handling of error response codes")
5a42b348adf9 ("s390/vfio_ap: verify ZAPQ completion after return of response code zero")
3ba41768105c ("s390/vfio_ap: use TAPQ to verify reset in progress completes")
0daf9878a799 ("s390/vfio_ap: check TAPQ response code when waiting for queue reset")
62414d901c3a ("s390/vfio-ap: verify reset complete in separate function")
08866d34c709 ("s390/vfio-ap: fix an error handling path in vfio_ap_mdev_probe_queue()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f009cfa466558b7dfe97f167ba1875d6f9ea4c07 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak(a)linux.ibm.com>
Date: Mon, 15 Jan 2024 13:54:35 -0500
Subject: [PATCH] s390/vfio-ap: reset queues associated with adapter for queue
unbound from driver
When a queue is unbound from the vfio_ap device driver, if that queue is
assigned to a guest's AP configuration, its associated adapter is removed
because queues are defined to a guest via a matrix of adapters and
domains; so, it is not possible to remove a single queue.
If an adapter is removed from the guest's AP configuration, all associated
queues must be reset to prevent leaking crypto data should any of them be
assigned to a different guest or device driver. The one caveat is that if
the queue is being removed because the adapter or domain has been removed
from the host's AP configuration, then an attempt to reset the queue will
fail with response code 01, AP-queue number not valid; so resetting these
queues should be skipped.
Acked-by: Halil Pasic <pasic(a)linux.ibm.com>
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Fixes: 09d31ff78793 ("s390/vfio-ap: hot plug/unplug of AP devices when probed/removed")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240115185441.31526-6-akrowiak@linux.ibm.com
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 44cd29aace8e..550c936c413d 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -939,45 +939,45 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
AP_MKQID(apid, apqi));
}
+static void collect_queues_to_reset(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid,
+ struct list_head *qlist)
+{
+ struct vfio_ap_queue *q;
+ unsigned long apqi;
+
+ for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) {
+ q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi));
+ if (q)
+ list_add_tail(&q->reset_qnode, qlist);
+ }
+}
+
+static void reset_queues_for_apid(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid)
+{
+ struct list_head qlist;
+
+ INIT_LIST_HEAD(&qlist);
+ collect_queues_to_reset(matrix_mdev, apid, &qlist);
+ vfio_ap_mdev_reset_qlist(&qlist);
+}
+
static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev,
unsigned long *apm_reset)
{
- struct vfio_ap_queue *q, *tmpq;
struct list_head qlist;
- unsigned long apid, apqi;
- int apqn, ret = 0;
+ unsigned long apid;
if (bitmap_empty(apm_reset, AP_DEVICES))
return 0;
INIT_LIST_HEAD(&qlist);
- for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) {
- for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
- AP_DOMAINS) {
- /*
- * If the domain is not in the host's AP configuration,
- * then resetting it will fail with response code 01
- * (APQN not valid).
- */
- if (!test_bit_inv(apqi,
- (unsigned long *)matrix_dev->info.aqm))
- continue;
-
- apqn = AP_MKQID(apid, apqi);
- q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
-
- if (q)
- list_add_tail(&q->reset_qnode, &qlist);
- }
- }
+ for_each_set_bit_inv(apid, apm_reset, AP_DEVICES)
+ collect_queues_to_reset(matrix_mdev, apid, &qlist);
- ret = vfio_ap_mdev_reset_qlist(&qlist);
-
- list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode)
- list_del(&q->reset_qnode);
-
- return ret;
+ return vfio_ap_mdev_reset_qlist(&qlist);
}
/**
@@ -2217,24 +2217,30 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
matrix_mdev = q->matrix_mdev;
if (matrix_mdev) {
- vfio_ap_unlink_queue_fr_mdev(q);
-
apid = AP_QID_CARD(q->apqn);
apqi = AP_QID_QUEUE(q->apqn);
-
- /*
- * If the queue is assigned to the guest's APCB, then remove
- * the adapter's APID from the APCB and hot it into the guest.
- */
+ /* If the queue is assigned to the guest's AP configuration */
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
+ /*
+ * Since the queues are defined via a matrix of adapters
+ * and domains, it is not possible to hot unplug a
+ * single queue; so, let's unplug the adapter.
+ */
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apid(matrix_mdev, apid);
+ goto done;
}
}
vfio_ap_mdev_reset_queue(q);
flush_work(&q->reset_work);
+
+done:
+ if (matrix_mdev)
+ vfio_ap_unlink_queue_fr_mdev(q);
+
dev_set_drvdata(&apdev->device, NULL);
kfree(q);
release_update_locks_for_mdev(matrix_mdev);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f848cba767e59f8d5c54984b1d45451aae040d50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012637-gotten-threaten-c4cb@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
f848cba767e5 ("s390/vfio-ap: reset queues filtered from the guest's AP config")
774d10196e64 ("s390/vfio-ap: let on_scan_complete() callback filter matrix and update guest's APCB")
850fb7fa8c68 ("s390/vfio-ap: always filter entire AP matrix")
9261f0438835 ("s390/vfio-ap: use work struct to verify queue reset")
62aab082e999 ("s390/vfio-ap: store entire AP queue status word with the queue object")
dd174833e44e ("s390/vfio-ap: remove upper limit on wait for queue reset to complete")
c51f8c6bb5c8 ("s390/vfio-ap: allow deconfigured queue to be passed through to a guest")
411b0109daa5 ("s390/vfio-ap: wait for response code 05 to clear on queue reset")
7aa7b2a80cb7 ("s390/vfio-ap: clean up irq resources if possible")
680b7ddd7e2a ("s390/vfio-ap: no need to check the 'E' and 'I' bits in APQSW after TAPQ")
4bdf3c3956d8 ("s390/ap: provide F bit parameter for ap_rapq() and ap_zapq()")
7cb7636a1ac1 ("s390/vfio_ap: increase max wait time for reset verification")
51d4d9877087 ("s390/vfio_ap: fix handling of error response codes")
5a42b348adf9 ("s390/vfio_ap: verify ZAPQ completion after return of response code zero")
3ba41768105c ("s390/vfio_ap: use TAPQ to verify reset in progress completes")
0daf9878a799 ("s390/vfio_ap: check TAPQ response code when waiting for queue reset")
62414d901c3a ("s390/vfio-ap: verify reset complete in separate function")
08866d34c709 ("s390/vfio-ap: fix an error handling path in vfio_ap_mdev_probe_queue()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f848cba767e59f8d5c54984b1d45451aae040d50 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak(a)linux.ibm.com>
Date: Mon, 15 Jan 2024 13:54:34 -0500
Subject: [PATCH] s390/vfio-ap: reset queues filtered from the guest's AP
config
When filtering the adapters from the configuration profile for a guest to
create or update a guest's AP configuration, if the APID of an adapter and
the APQI of a domain identify a queue device that is not bound to the
vfio_ap device driver, the APID of the adapter will be filtered because an
individual APQN can not be filtered due to the fact the APQNs are assigned
to an AP configuration as a matrix of APIDs and APQIs. Consequently, a
guest will not have access to all of the queues associated with the
filtered adapter. If the queues are subsequently made available again to
the guest, they should re-appear in a reset state; so, let's make sure all
queues associated with an adapter unplugged from the guest are reset.
In order to identify the set of queues that need to be reset, let's allow a
vfio_ap_queue object to be simultaneously stored in both a hashtable and a
list: A hashtable used to store all of the queues assigned
to a matrix mdev; and/or, a list used to store a subset of the queues that
need to be reset. For example, when an adapter is hot unplugged from a
guest, all guest queues associated with that adapter must be reset. Since
that may be a subset of those assigned to the matrix mdev, they can be
stored in a list that can be passed to the vfio_ap_mdev_reset_queues
function.
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Acked-by: Halil Pasic <pasic(a)linux.ibm.com>
Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240115185441.31526-5-akrowiak@linux.ibm.com
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e45d0bf7b126..44cd29aace8e 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -32,7 +32,8 @@
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
-static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
+static int vfio_ap_mdev_reset_qlist(struct list_head *qlist);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
@@ -665,16 +666,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
* device driver.
*
* @matrix_mdev: the matrix mdev whose matrix is to be filtered.
+ * @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the
+ * guest's AP configuration that are still in the host's AP
+ * configuration.
*
* Note: If an APQN referencing a queue device that is not bound to the vfio_ap
* driver, its APID will be filtered from the guest's APCB. The matrix
* structure precludes filtering an individual APQN, so its APID will be
- * filtered.
+ * filtered. Consequently, all queues associated with the adapter that
+ * are in the host's AP configuration must be reset. If queues are
+ * subsequently made available again to the guest, they should re-appear
+ * in a reset state
*
* Return: a boolean value indicating whether the KVM guest's APCB was changed
* by the filtering or not.
*/
-static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
+static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm_filtered)
{
unsigned long apid, apqi, apqn;
DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
@@ -684,6 +692,7 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
+ bitmap_clear(apm_filtered, 0, AP_DEVICES);
/*
* Copy the adapters, domains and control domains to the shadow_apcb
@@ -709,8 +718,16 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q || q->reset_status.response_code) {
- clear_bit_inv(apid,
- matrix_mdev->shadow_apcb.apm);
+ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+
+ /*
+ * If the adapter was previously plugged into
+ * the guest, let's let the caller know that
+ * the APID was filtered.
+ */
+ if (test_bit_inv(apid, prev_shadow_apm))
+ set_bit_inv(apid, apm_filtered);
+
break;
}
}
@@ -812,7 +829,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
mutex_lock(&matrix_dev->guests_lock);
mutex_lock(&matrix_dev->mdevs_lock);
- vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_reset_queues(matrix_mdev);
vfio_ap_mdev_unlink_fr_queues(matrix_mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->mdevs_lock);
@@ -922,6 +939,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
AP_MKQID(apid, apqi));
}
+static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm_reset)
+{
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
+ unsigned long apid, apqi;
+ int apqn, ret = 0;
+
+ if (bitmap_empty(apm_reset, AP_DEVICES))
+ return 0;
+
+ INIT_LIST_HEAD(&qlist);
+
+ for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
+ AP_DOMAINS) {
+ /*
+ * If the domain is not in the host's AP configuration,
+ * then resetting it will fail with response code 01
+ * (APQN not valid).
+ */
+ if (!test_bit_inv(apqi,
+ (unsigned long *)matrix_dev->info.aqm))
+ continue;
+
+ apqn = AP_MKQID(apid, apqi);
+ q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
+
+ if (q)
+ list_add_tail(&q->reset_qnode, &qlist);
+ }
+ }
+
+ ret = vfio_ap_mdev_reset_qlist(&qlist);
+
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode)
+ list_del(&q->reset_qnode);
+
+ return ret;
+}
+
/**
* assign_adapter_store - parses the APID from @buf and sets the
* corresponding bit in the mediated matrix device's APM
@@ -962,6 +1020,7 @@ static ssize_t assign_adapter_store(struct device *dev,
{
int ret;
unsigned long apid;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -991,8 +1050,10 @@ static ssize_t assign_adapter_store(struct device *dev,
vfio_ap_mdev_link_adapter(matrix_mdev, apid);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
ret = count;
done:
@@ -1023,11 +1084,12 @@ static struct vfio_ap_queue
* adapter was assigned.
* @matrix_mdev: the matrix mediated device to which the adapter was assigned.
* @apid: the APID of the unassigned adapter.
- * @qtable: table for storing queues associated with unassigned adapter.
+ * @qlist: list for storing queues associated with unassigned adapter that
+ * need to be reset.
*/
static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid,
- struct ap_queue_table *qtable)
+ struct list_head *qlist)
{
unsigned long apqi;
struct vfio_ap_queue *q;
@@ -1035,11 +1097,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
- if (q && qtable) {
+ if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
- hash_add(qtable->queues, &q->mdev_qnode,
- q->apqn);
+ list_add_tail(&q->reset_qnode, qlist);
}
}
}
@@ -1047,26 +1108,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid)
{
- int loop_cursor;
- struct vfio_ap_queue *q;
- struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
- hash_init(qtable->queues);
- vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable);
+ INIT_LIST_HEAD(&qlist);
+ vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist);
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
- vfio_ap_mdev_reset_queues(qtable);
+ vfio_ap_mdev_reset_qlist(&qlist);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
- hash_del(&q->mdev_qnode);
+ list_del(&q->reset_qnode);
}
-
- kfree(qtable);
}
/**
@@ -1167,6 +1225,7 @@ static ssize_t assign_domain_store(struct device *dev,
{
int ret;
unsigned long apqi;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -1196,8 +1255,10 @@ static ssize_t assign_domain_store(struct device *dev,
vfio_ap_mdev_link_domain(matrix_mdev, apqi);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
ret = count;
done:
@@ -1210,7 +1271,7 @@ static DEVICE_ATTR_WO(assign_domain);
static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi,
- struct ap_queue_table *qtable)
+ struct list_head *qlist)
{
unsigned long apid;
struct vfio_ap_queue *q;
@@ -1218,11 +1279,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
- if (q && qtable) {
+ if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
- hash_add(qtable->queues, &q->mdev_qnode,
- q->apqn);
+ list_add_tail(&q->reset_qnode, qlist);
}
}
}
@@ -1230,26 +1290,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi)
{
- int loop_cursor;
- struct vfio_ap_queue *q;
- struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
- hash_init(qtable->queues);
- vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable);
+ INIT_LIST_HEAD(&qlist);
+ vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist);
if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
- vfio_ap_mdev_reset_queues(qtable);
+ vfio_ap_mdev_reset_qlist(&qlist);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
- hash_del(&q->mdev_qnode);
+ list_del(&q->reset_qnode);
}
-
- kfree(qtable);
}
/**
@@ -1604,7 +1661,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
get_update_locks_for_kvm(kvm);
kvm_arch_crypto_clear_masks(kvm);
- vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_reset_queues(matrix_mdev);
kvm_put_kvm(kvm);
matrix_mdev->kvm = NULL;
@@ -1740,15 +1797,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
}
}
-static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
{
int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+ hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode)
vfio_ap_mdev_reset_queue(q);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) {
+ flush_work(&q->reset_work);
+
+ if (q->reset_status.response_code)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int vfio_ap_mdev_reset_qlist(struct list_head *qlist)
+{
+ int ret = 0;
+ struct vfio_ap_queue *q;
+
+ list_for_each_entry(q, qlist, reset_qnode)
+ vfio_ap_mdev_reset_queue(q);
+
+ list_for_each_entry(q, qlist, reset_qnode) {
flush_work(&q->reset_work);
if (q->reset_status.response_code)
@@ -1934,7 +2009,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
ret = vfio_ap_mdev_get_device_info(arg);
break;
case VFIO_DEVICE_RESET:
- ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ ret = vfio_ap_mdev_reset_queues(matrix_mdev);
break;
case VFIO_DEVICE_GET_IRQ_INFO:
ret = vfio_ap_get_irq_info(arg);
@@ -2080,6 +2155,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
{
int ret;
struct vfio_ap_queue *q;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev;
ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
@@ -2112,15 +2188,17 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
!bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS))
goto done;
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
}
done:
dev_set_drvdata(&apdev->device, q);
release_update_locks_for_mdev(matrix_mdev);
- return 0;
+ return ret;
err_remove_group:
sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
@@ -2464,6 +2542,7 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info,
static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
{
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false;
mutex_lock(&matrix_mdev->kvm->lock);
@@ -2477,7 +2556,7 @@ static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
matrix_mdev->adm_add, AP_DOMAINS);
if (filter_adapters || filter_domains)
- do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev);
+ do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered);
if (filter_cdoms)
do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev);
@@ -2485,6 +2564,8 @@ static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
if (do_hotplug)
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+
mutex_unlock(&matrix_dev->mdevs_lock);
mutex_unlock(&matrix_mdev->kvm->lock);
}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 88aff8b81f2f..98d37aa27044 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -133,6 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
+ * @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues
+ * that need to be reset
* @reset_status: the status from the last reset of the queue
* @reset_work: work to wait for queue reset to complete
*/
@@ -143,6 +145,7 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
+ struct list_head reset_qnode;
struct ap_queue_status reset_status;
struct work_struct reset_work;
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 7b0454cfd8edb3509619407c3b9f78a6d0dee1a5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012659-opacity-greedy-0493@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
7b0454cfd8ed ("media: ov13b10: Enable runtime PM before registering async sub-device")
1af2f618acc1 ("media: ov13b10: Support device probe in non-zero ACPI D state")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b0454cfd8edb3509619407c3b9f78a6d0dee1a5 Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao(a)intel.com>
Date: Wed, 22 Nov 2023 17:46:08 +0800
Subject: [PATCH] media: ov13b10: Enable runtime PM before registering async
sub-device
As the sensor device maybe accessible right after its async sub-device is
registered, such as ipu-bridge will try to power up sensor by sensor's
client device's runtime PM from the async notifier callback, if runtime PM
is not enabled, it will fail.
So runtime PM should be ready before its async sub-device is registered
and accessible by others.
Fixes: 7ee850546822 ("media: Add sensor driver support for the ov13b10 camera.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bingbu Cao <bingbu.cao(a)intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/i2c/ov13b10.c b/drivers/media/i2c/ov13b10.c
index c06411d5ee2b..73c844aa5697 100644
--- a/drivers/media/i2c/ov13b10.c
+++ b/drivers/media/i2c/ov13b10.c
@@ -1554,24 +1554,27 @@ static int ov13b10_probe(struct i2c_client *client)
goto error_handler_free;
}
- ret = v4l2_async_register_subdev_sensor(&ov13b->sd);
- if (ret < 0)
- goto error_media_entity;
/*
* Device is already turned on by i2c-core with ACPI domain PM.
* Enable runtime PM and turn off the device.
*/
-
/* Set the device's state to active if it's in D0 state. */
if (full_power)
pm_runtime_set_active(&client->dev);
pm_runtime_enable(&client->dev);
pm_runtime_idle(&client->dev);
+ ret = v4l2_async_register_subdev_sensor(&ov13b->sd);
+ if (ret < 0)
+ goto error_media_entity_runtime_pm;
+
return 0;
-error_media_entity:
+error_media_entity_runtime_pm:
+ pm_runtime_disable(&client->dev);
+ if (full_power)
+ pm_runtime_set_suspended(&client->dev);
media_entity_cleanup(&ov13b->sd.entity);
error_handler_free:
@@ -1594,6 +1597,7 @@ static void ov13b10_remove(struct i2c_client *client)
ov13b10_free_controls(ov13b);
pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
}
static DEFINE_RUNTIME_DEV_PM_OPS(ov13b10_pm_ops, ov13b10_suspend,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x efa5fe19c0a9199f49e36e1f5242ed5c88da617d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012601-guidance-subtotal-5b16@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
efa5fe19c0a9 ("media: imx355: Enable runtime PM before registering async sub-device")
15786f7b564e ("media: v4l: fwnode: Rename v4l2_async_register_subdev_sensor_common")
9746b11715c3 ("media: i2c: Add imx334 camera sensor driver")
41b3e23376e9 ("media: dt-bindings: media: Add bindings for imx334")
3e90e5ad9497 ("media: Clarify v4l2-async subdevice addition API")
11c0d8fdccc5 ("media: i2c: Add support for the OV8865 image sensor")
e43ccb0a045f ("media: i2c: Add support for the OV5648 image sensor")
b24cc2a18c50 ("media: smiapp: Rename as "ccs"")
161cc847370a ("media: smiapp: Internal rename to CCS")
47ff2ff267ee ("media: smiapp: Rename register access functions")
235ac9a4b36c ("media: smiapp: Remove quirk function for writing a single 8-bit register")
42aab58f456a ("media: smiapp: Use CCS registers")
3e158e1f1ec2 ("media: smiapp: Switch to CCS limits")
ca296a11156a ("media: smiapp: Read CCS limit values")
503a88422fb0 ("media: smiapp: Use MIPI CCS version and manufacturer ID information")
e66a7c849086 ("media: smiapp: Add macros for accessing CCS registers")
cb50351be662 ("media: smiapp: Remove macros for defining registers, merge definitions")
ab47d5cd8253 ("media: smiapp: Calculate CCS limit offsets and limit buffer size")
82731a194fc1 ("media: smiapp: Use CCS register flags")
6493c4b777c2 ("media: smiapp: Import CCS definitions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From efa5fe19c0a9199f49e36e1f5242ed5c88da617d Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao(a)intel.com>
Date: Wed, 22 Nov 2023 17:46:06 +0800
Subject: [PATCH] media: imx355: Enable runtime PM before registering async
sub-device
As the sensor device maybe accessible right after its async sub-device is
registered, such as ipu-bridge will try to power up sensor by sensor's
client device's runtime PM from the async notifier callback, if runtime PM
is not enabled, it will fail.
So runtime PM should be ready before its async sub-device is registered
and accessible by others.
Fixes: df0b5c4a7ddd ("media: add imx355 camera sensor driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bingbu Cao <bingbu.cao(a)intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c
index e1b1d2fc79dd..8c995c58743a 100644
--- a/drivers/media/i2c/imx355.c
+++ b/drivers/media/i2c/imx355.c
@@ -1747,10 +1747,6 @@ static int imx355_probe(struct i2c_client *client)
goto error_handler_free;
}
- ret = v4l2_async_register_subdev_sensor(&imx355->sd);
- if (ret < 0)
- goto error_media_entity;
-
/*
* Device is already turned on by i2c-core with ACPI domain PM.
* Enable runtime PM and turn off the device.
@@ -1759,9 +1755,15 @@ static int imx355_probe(struct i2c_client *client)
pm_runtime_enable(&client->dev);
pm_runtime_idle(&client->dev);
+ ret = v4l2_async_register_subdev_sensor(&imx355->sd);
+ if (ret < 0)
+ goto error_media_entity_runtime_pm;
+
return 0;
-error_media_entity:
+error_media_entity_runtime_pm:
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
media_entity_cleanup(&imx355->sd.entity);
error_handler_free:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x efa5fe19c0a9199f49e36e1f5242ed5c88da617d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012600-slain-sloping-b06b@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
efa5fe19c0a9 ("media: imx355: Enable runtime PM before registering async sub-device")
15786f7b564e ("media: v4l: fwnode: Rename v4l2_async_register_subdev_sensor_common")
9746b11715c3 ("media: i2c: Add imx334 camera sensor driver")
41b3e23376e9 ("media: dt-bindings: media: Add bindings for imx334")
3e90e5ad9497 ("media: Clarify v4l2-async subdevice addition API")
11c0d8fdccc5 ("media: i2c: Add support for the OV8865 image sensor")
e43ccb0a045f ("media: i2c: Add support for the OV5648 image sensor")
b24cc2a18c50 ("media: smiapp: Rename as "ccs"")
161cc847370a ("media: smiapp: Internal rename to CCS")
47ff2ff267ee ("media: smiapp: Rename register access functions")
235ac9a4b36c ("media: smiapp: Remove quirk function for writing a single 8-bit register")
42aab58f456a ("media: smiapp: Use CCS registers")
3e158e1f1ec2 ("media: smiapp: Switch to CCS limits")
ca296a11156a ("media: smiapp: Read CCS limit values")
503a88422fb0 ("media: smiapp: Use MIPI CCS version and manufacturer ID information")
e66a7c849086 ("media: smiapp: Add macros for accessing CCS registers")
cb50351be662 ("media: smiapp: Remove macros for defining registers, merge definitions")
ab47d5cd8253 ("media: smiapp: Calculate CCS limit offsets and limit buffer size")
82731a194fc1 ("media: smiapp: Use CCS register flags")
6493c4b777c2 ("media: smiapp: Import CCS definitions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From efa5fe19c0a9199f49e36e1f5242ed5c88da617d Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao(a)intel.com>
Date: Wed, 22 Nov 2023 17:46:06 +0800
Subject: [PATCH] media: imx355: Enable runtime PM before registering async
sub-device
As the sensor device maybe accessible right after its async sub-device is
registered, such as ipu-bridge will try to power up sensor by sensor's
client device's runtime PM from the async notifier callback, if runtime PM
is not enabled, it will fail.
So runtime PM should be ready before its async sub-device is registered
and accessible by others.
Fixes: df0b5c4a7ddd ("media: add imx355 camera sensor driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bingbu Cao <bingbu.cao(a)intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c
index e1b1d2fc79dd..8c995c58743a 100644
--- a/drivers/media/i2c/imx355.c
+++ b/drivers/media/i2c/imx355.c
@@ -1747,10 +1747,6 @@ static int imx355_probe(struct i2c_client *client)
goto error_handler_free;
}
- ret = v4l2_async_register_subdev_sensor(&imx355->sd);
- if (ret < 0)
- goto error_media_entity;
-
/*
* Device is already turned on by i2c-core with ACPI domain PM.
* Enable runtime PM and turn off the device.
@@ -1759,9 +1755,15 @@ static int imx355_probe(struct i2c_client *client)
pm_runtime_enable(&client->dev);
pm_runtime_idle(&client->dev);
+ ret = v4l2_async_register_subdev_sensor(&imx355->sd);
+ if (ret < 0)
+ goto error_media_entity_runtime_pm;
+
return 0;
-error_media_entity:
+error_media_entity_runtime_pm:
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
media_entity_cleanup(&imx355->sd.entity);
error_handler_free:
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x d07f951903fa9922c375b8ab1ce81b18a0034e3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012607-flame-appear-dbc4@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
d07f951903fa ("crypto: s390/aes - Fix buffer overread in CTR mode")
6f3196b74d64 ("s390/crypto: Rework on paes implementation")
674f368a952c ("crypto: remove CRYPTO_TFM_RES_BAD_KEY_LEN")
5c925e8b10a5 ("crypto: remove CRYPTO_TFM_RES_BAD_BLOCK_LEN")
f9d89b853ec1 ("crypto: remove unused tfm result flags")
b828f905904c ("crypto: artpec6 - return correct error code for failed setkey()")
bd56cea012fc ("crypto: chelsio - fix writing tfm flags to wrong place")
e8cfed5e4e2b ("crypto: cipher - remove crt_u.cipher (struct cipher_tfm)")
c441a909c686 ("crypto: compress - remove crt_u.compress (struct compress_tfm)")
2edf86414b66 ("crypto: sun4i-ss - hide the Invalid keylen message")
d63007eb954e ("crypto: ablkcipher - remove deprecated and unused ablkcipher support")
7fe948a52287 ("crypto: qat - switch to skcipher API")
373960d794d2 ("crypto: talitos - switch to skcipher API")
ce0183cb6464 ("crypto: rockchip - switch to skcipher API")
23a6564a6b51 ("crypto: niagara2 - switch to skcipher API")
b3cde6bab4e8 ("crypto: picoxcell - switch to skcipher API")
c2609391f95b ("crypto: mediatek - switch to skcipher API")
7cea6d3e01c2 ("crypto: chelsio - switch to skcipher API")
ac0d3d130f90 ("crypto: cavium/cpt - switch to skcipher API")
a9c01cd608c4 ("crypto: bcm-spu - switch to skcipher API")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d07f951903fa9922c375b8ab1ce81b18a0034e3b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 28 Nov 2023 14:22:13 +0800
Subject: [PATCH] crypto: s390/aes - Fix buffer overread in CTR mode
When processing the last block, the s390 ctr code will always read
a whole block, even if there isn't a whole block of data left. Fix
this by using the actual length left and copy it into a buffer first
for processing.
Fixes: 0200f3ecc196 ("crypto: s390 - add System z hardware support for CTR mode")
Cc: <stable(a)vger.kernel.org>
Reported-by: Guangwu Zhang <guazhang(a)redhat.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Reviewd-by: Harald Freudenberger <freude(a)de.ibm.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c773820e4af9..c6fe5405de4a 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -597,7 +597,9 @@ static int ctr_aes_crypt(struct skcipher_request *req)
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
- cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk.src.virt.addr, nbytes);
+ cpacf_kmctr(sctx->fc, sctx->key, buf, buf,
AES_BLOCK_SIZE, walk.iv);
memcpy(walk.dst.virt.addr, buf, nbytes);
crypto_inc(walk.iv, AES_BLOCK_SIZE);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 8b541e44151d..55ee5567a5ea 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -693,9 +693,11 @@ static int ctr_paes_crypt(struct skcipher_request *req)
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk.src.virt.addr, nbytes);
while (1) {
if (cpacf_kmctr(ctx->fc, ¶m, buf,
- walk.src.virt.addr, AES_BLOCK_SIZE,
+ buf, AES_BLOCK_SIZE,
walk.iv) == AES_BLOCK_SIZE)
break;
if (__paes_convert_key(ctx))
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d07f951903fa9922c375b8ab1ce81b18a0034e3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012606-embargo-jumble-fa16@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
d07f951903fa ("crypto: s390/aes - Fix buffer overread in CTR mode")
6f3196b74d64 ("s390/crypto: Rework on paes implementation")
674f368a952c ("crypto: remove CRYPTO_TFM_RES_BAD_KEY_LEN")
5c925e8b10a5 ("crypto: remove CRYPTO_TFM_RES_BAD_BLOCK_LEN")
f9d89b853ec1 ("crypto: remove unused tfm result flags")
b828f905904c ("crypto: artpec6 - return correct error code for failed setkey()")
bd56cea012fc ("crypto: chelsio - fix writing tfm flags to wrong place")
e8cfed5e4e2b ("crypto: cipher - remove crt_u.cipher (struct cipher_tfm)")
c441a909c686 ("crypto: compress - remove crt_u.compress (struct compress_tfm)")
2edf86414b66 ("crypto: sun4i-ss - hide the Invalid keylen message")
d63007eb954e ("crypto: ablkcipher - remove deprecated and unused ablkcipher support")
7fe948a52287 ("crypto: qat - switch to skcipher API")
373960d794d2 ("crypto: talitos - switch to skcipher API")
ce0183cb6464 ("crypto: rockchip - switch to skcipher API")
23a6564a6b51 ("crypto: niagara2 - switch to skcipher API")
b3cde6bab4e8 ("crypto: picoxcell - switch to skcipher API")
c2609391f95b ("crypto: mediatek - switch to skcipher API")
7cea6d3e01c2 ("crypto: chelsio - switch to skcipher API")
ac0d3d130f90 ("crypto: cavium/cpt - switch to skcipher API")
a9c01cd608c4 ("crypto: bcm-spu - switch to skcipher API")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d07f951903fa9922c375b8ab1ce81b18a0034e3b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 28 Nov 2023 14:22:13 +0800
Subject: [PATCH] crypto: s390/aes - Fix buffer overread in CTR mode
When processing the last block, the s390 ctr code will always read
a whole block, even if there isn't a whole block of data left. Fix
this by using the actual length left and copy it into a buffer first
for processing.
Fixes: 0200f3ecc196 ("crypto: s390 - add System z hardware support for CTR mode")
Cc: <stable(a)vger.kernel.org>
Reported-by: Guangwu Zhang <guazhang(a)redhat.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Reviewd-by: Harald Freudenberger <freude(a)de.ibm.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c773820e4af9..c6fe5405de4a 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -597,7 +597,9 @@ static int ctr_aes_crypt(struct skcipher_request *req)
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
- cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk.src.virt.addr, nbytes);
+ cpacf_kmctr(sctx->fc, sctx->key, buf, buf,
AES_BLOCK_SIZE, walk.iv);
memcpy(walk.dst.virt.addr, buf, nbytes);
crypto_inc(walk.iv, AES_BLOCK_SIZE);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 8b541e44151d..55ee5567a5ea 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -693,9 +693,11 @@ static int ctr_paes_crypt(struct skcipher_request *req)
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk.src.virt.addr, nbytes);
while (1) {
if (cpacf_kmctr(ctx->fc, ¶m, buf,
- walk.src.virt.addr, AES_BLOCK_SIZE,
+ buf, AES_BLOCK_SIZE,
walk.iv) == AES_BLOCK_SIZE)
break;
if (__paes_convert_key(ctx))
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012654-lark-abdomen-caac@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
3c12466b6b7b ("erofs: fix lz4 inplace decompression")
123ec246ebe3 ("erofs: get rid of the remaining kmap_atomic()")
ab749badf9f4 ("erofs: support unaligned data decompression")
10e5f6e482e1 ("erofs: introduce z_erofs_fixup_insize")
d67aee76d418 ("erofs: tidy up z_erofs_lz4_decompress")
7e508f2ca8bb ("erofs: rename lz4_0pading to zero_padding")
eaa9172ad988 ("erofs: get rid of ->lru usage")
622ceaddb764 ("erofs: lzma compression support")
966edfb0a3dc ("erofs: rename some generic methods in decompressor")
386292919c25 ("erofs: introduce readmore decompression strategy")
8f89926290c4 ("erofs: get compression algorithms directly on mapping")
dfeab2e95a75 ("erofs: add multiple device support")
e62424651f43 ("erofs: decouple basic mount options from fs_context")
5b6e7e120e71 ("erofs: remove the fast path of per-CPU buffer decompression")
2e5fd489a4e5 ("Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang(a)kernel.org>
Date: Wed, 6 Dec 2023 12:55:34 +0800
Subject: [PATCH] erofs: fix lz4 inplace decompression
Currently EROFS can map another compressed buffer for inplace
decompression, that was used to handle the cases that some pages of
compressed data are actually not in-place I/O.
However, like most simple LZ77 algorithms, LZ4 expects the compressed
data is arranged at the end of the decompressed buffer and it
explicitly uses memmove() to handle overlapping:
__________________________________________________________
|_ direction of decompression --> ____ |_ compressed data _|
Although EROFS arranges compressed data like this, it typically maps two
individual virtual buffers so the relative order is uncertain.
Previously, it was hardly observed since LZ4 only uses memmove() for
short overlapped literals and x86/arm64 memmove implementations seem to
completely cover it up and they don't have this issue. Juhyung reported
that EROFS data corruption can be found on a new Intel x86 processor.
After some analysis, it seems that recent x86 processors with the new
FSRM feature expose this issue with "rep movsb".
Let's strictly use the decompressed buffer for lz4 inplace
decompression for now. Later, as an useful improvement, we could try
to tie up these two buffers together in the correct order.
Reported-and-tested-by: Juhyung Park <qkrwngud825(a)gmail.com>
Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR…
Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace")
Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend")
Cc: stable <stable(a)vger.kernel.org> # 5.4+
Tested-by: Yifan Zhao <zhaoyifan(a)sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao(a)linux.alibaba.com>
Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.…
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 021be5feb1bc..e0d609c3958f 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -121,11 +121,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
}
static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
- void *inpage, unsigned int *inputmargin, int *maptype,
- bool may_inplace)
+ void *inpage, void *out, unsigned int *inputmargin,
+ int *maptype, bool may_inplace)
{
struct z_erofs_decompress_req *rq = ctx->rq;
- unsigned int omargin, total, i, j;
+ unsigned int omargin, total, i;
struct page **in;
void *src, *tmp;
@@ -135,12 +135,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
- for (i = 0; i < ctx->inpages; ++i) {
- DBG_BUGON(rq->in[i] == NULL);
- for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
- if (rq->out[j] == rq->in[i])
- goto docopy;
- }
+ for (i = 0; i < ctx->inpages; ++i)
+ if (rq->out[ctx->outpages - ctx->inpages + i] !=
+ rq->in[i])
+ goto docopy;
+ kunmap_local(inpage);
+ *maptype = 3;
+ return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT);
}
if (ctx->inpages <= 1) {
@@ -148,7 +149,6 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
return inpage;
}
kunmap_local(inpage);
- might_sleep();
src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
@@ -204,12 +204,12 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
}
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
- u8 *out)
+ u8 *dst)
{
struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
- u8 *headpage, *src;
+ u8 *out, *headpage, *src;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
@@ -230,11 +230,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
inputmargin = rq->pageofs_in;
- src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
+ src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin,
&maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
+ out = dst + rq->pageofs_out;
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
@@ -265,7 +266,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
- } else {
+ } else if (maptype != 3) {
DBG_BUGON(1);
return -EFAULT;
}
@@ -308,7 +309,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}
dstmap_out:
- ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
+ ret = z_erofs_lz4_decompress_mem(&ctx, dst);
if (!dst_maptype)
kunmap_local(dst);
else if (dst_maptype == 2)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012647-disband-negotiate-1447@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
3c12466b6b7b ("erofs: fix lz4 inplace decompression")
123ec246ebe3 ("erofs: get rid of the remaining kmap_atomic()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang(a)kernel.org>
Date: Wed, 6 Dec 2023 12:55:34 +0800
Subject: [PATCH] erofs: fix lz4 inplace decompression
Currently EROFS can map another compressed buffer for inplace
decompression, that was used to handle the cases that some pages of
compressed data are actually not in-place I/O.
However, like most simple LZ77 algorithms, LZ4 expects the compressed
data is arranged at the end of the decompressed buffer and it
explicitly uses memmove() to handle overlapping:
__________________________________________________________
|_ direction of decompression --> ____ |_ compressed data _|
Although EROFS arranges compressed data like this, it typically maps two
individual virtual buffers so the relative order is uncertain.
Previously, it was hardly observed since LZ4 only uses memmove() for
short overlapped literals and x86/arm64 memmove implementations seem to
completely cover it up and they don't have this issue. Juhyung reported
that EROFS data corruption can be found on a new Intel x86 processor.
After some analysis, it seems that recent x86 processors with the new
FSRM feature expose this issue with "rep movsb".
Let's strictly use the decompressed buffer for lz4 inplace
decompression for now. Later, as an useful improvement, we could try
to tie up these two buffers together in the correct order.
Reported-and-tested-by: Juhyung Park <qkrwngud825(a)gmail.com>
Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR…
Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace")
Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend")
Cc: stable <stable(a)vger.kernel.org> # 5.4+
Tested-by: Yifan Zhao <zhaoyifan(a)sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao(a)linux.alibaba.com>
Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.…
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 021be5feb1bc..e0d609c3958f 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -121,11 +121,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
}
static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
- void *inpage, unsigned int *inputmargin, int *maptype,
- bool may_inplace)
+ void *inpage, void *out, unsigned int *inputmargin,
+ int *maptype, bool may_inplace)
{
struct z_erofs_decompress_req *rq = ctx->rq;
- unsigned int omargin, total, i, j;
+ unsigned int omargin, total, i;
struct page **in;
void *src, *tmp;
@@ -135,12 +135,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
- for (i = 0; i < ctx->inpages; ++i) {
- DBG_BUGON(rq->in[i] == NULL);
- for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
- if (rq->out[j] == rq->in[i])
- goto docopy;
- }
+ for (i = 0; i < ctx->inpages; ++i)
+ if (rq->out[ctx->outpages - ctx->inpages + i] !=
+ rq->in[i])
+ goto docopy;
+ kunmap_local(inpage);
+ *maptype = 3;
+ return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT);
}
if (ctx->inpages <= 1) {
@@ -148,7 +149,6 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
return inpage;
}
kunmap_local(inpage);
- might_sleep();
src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
@@ -204,12 +204,12 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
}
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
- u8 *out)
+ u8 *dst)
{
struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
- u8 *headpage, *src;
+ u8 *out, *headpage, *src;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
@@ -230,11 +230,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
inputmargin = rq->pageofs_in;
- src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
+ src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin,
&maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
+ out = dst + rq->pageofs_out;
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
@@ -265,7 +266,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
- } else {
+ } else if (maptype != 3) {
DBG_BUGON(1);
return -EFAULT;
}
@@ -308,7 +309,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}
dstmap_out:
- ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
+ ret = z_erofs_lz4_decompress_mem(&ctx, dst);
if (!dst_maptype)
kunmap_local(dst);
else if (dst_maptype == 2)
From: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
[ Upstream commit 0a5ec366de7e94192669ba08de6ed336607fd282 ]
The SQ is shared for between kernel and used by storing the kernel page
pointer and passing that to a kmap_atomic().
This then requires that the alignment is PAGE_SIZE aligned.
Fix by adding an iWarp specific alignment check.
The patch needed to be reworked because the separate routines
present upstream are not there in older irdma drivers.
Fixes: e965ef0e7b2c ("RDMA/irdma: Split QP handler into irdma_reg_user_mr_type_qp")
Link: https://lore.kernel.org/r/20231129202143.1434-3-shiraz.saleem@intel.com
Signed-off-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem(a)intel.com>
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
---
drivers/infiniband/hw/irdma/verbs.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 745712e1d7de..e02f541430ad 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -2783,6 +2783,11 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
switch (req.reg_type) {
case IRDMA_MEMREG_TYPE_QP:
+ /* iWarp: Catch page not starting on OS page boundary */
+ if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
+ ib_umem_offset(iwmr->region))
+ return -EINVAL;
+
total = req.sq_pages + req.rq_pages + shadow_pgcnt;
if (total > iwmr->page_cnt) {
err = -EINVAL;
--
1.8.3.1
This commit is for linux-5.4.y only, it has no direct upstream
equivalent.
Prior to commit 5f2fb52fac15 ("kbuild: rename hostprogs-y/always to
hostprogs/always-y"), always-y did not exist, making the backport of
mainline commit 1b1e38002648 ("powerpc: add crtsavres.o to always-y
instead of extra-y") to linux-5.4.y as commit 245da9eebba0 ("powerpc:
add crtsavres.o to always-y instead of extra-y") incorrect, breaking the
build with linkers that need crtsavres.o:
ld.lld: error: cannot open arch/powerpc/lib/crtsavres.o: No such file or directory
Backporting the aforementioned kbuild commit is not suitable for stable
due to its size and number of conflicts, so transform the always-y usage
to an equivalent form using always, which resolves the build issues.
Fixes: 245da9eebba0 ("powerpc: add crtsavres.o to always-y instead of extra-y")
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
arch/powerpc/lib/Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 7c603839fe28..841e6ed30f13 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -34,8 +34,8 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
# 64-bit linker creates .sfpr on demand for final link (vmlinux),
# so it is only needed for modules, and only for older linkers which
# do not support --save-restore-funcs
-ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
-always-$(CONFIG_PPC64) += crtsavres.o
+ifeq ($(call ld-ifversion, -lt, 225000000, y)$(CONFIG_PPC64),yy)
+always += crtsavres.o
endif
obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
---
base-commit: f0602893f43a54097fcf22bd8c2f7b8e75ca643e
change-id: 20240126-5-4-fix-lib-powerpc-backport-9d577643dcfc
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
The latest releases of 6.1.y, 6.6.y and 6.7.y introduce a duplicate
commit of 'drm/amd: Enable PCIe PME from D3'.
For example on the 6.6.y branch:
commit 847e6947afd3c46623172d2eabcfc2481ee8668e
Author: Mario Limonciello <mario.limonciello(a)amd.com>
AuthorDate: Fri Nov 24 09:56:32 2023 -0600
Commit: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
CommitDate: Thu Jan 25 15:35:45 2024 -0800
drm/amd: Enable PCIe PME from D3
commit bd1f6a31e7762ebc99b97f3eda5e5ea3708fa792 upstream.
When dGPU is put into BOCO it may be in D3cold but still able send
PME on display hotplug event. For this to work it must be enabled
as wake source from D3.
When runpm is enabled use pci_wake_from_d3() to mark wakeup as
enabled by default.
Cc: stable(a)vger.kernel.org # 6.1+
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
Acked-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 2c35036e4ba2..635b58553583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2197,6 +2197,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_wake_from_d3(pdev, TRUE);
+ pci_wake_from_d3(pdev, TRUE);
+
/*
* For runpm implemented via BACO, PMFW will handle the
* timing for BACO in and out:
commit 49227bea27ebcd260f0c94a3055b14bbd8605c5e
Author: Mario Limonciello <mario.limonciello(a)amd.com>
AuthorDate: Fri Nov 24 09:56:32 2023 -0600
Commit: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
CommitDate: Fri Dec 8 08:52:17 2023 +0100
drm/amd: Enable PCIe PME from D3
commit 6967741d26c87300a51b5e50d4acd104bc1a9759 upstream.
When dGPU is put into BOCO it may be in D3cold but still able send
PME on display hotplug event. For this to work it must be enabled
as wake source from D3.
When runpm is enabled use pci_wake_from_d3() to mark wakeup as
enabled by default.
Cc: stable(a)vger.kernel.org # 6.1+
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
Acked-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 81edf66dbea8..2c35036e4ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2195,6 +2195,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
+ pci_wake_from_d3(pdev, TRUE);
+
/*
* For runpm implemented via BACO, PMFW will handle the
* timing for BACO in and out:
From: Zack Rusin <zackr(a)vmware.com>
Some drivers require the mapped tt pages to be decrypted. In an ideal
world this would have been handled by the dma layer, but the TTM page
fault handling would have to be rewritten to able to do that.
A side-effect of the TTM page fault handling is using a dma allocation
per order (via ttm_pool_alloc_page) which makes it impossible to just
trivially use dma_mmap_attrs. As a result ttm has to be very careful
about trying to make its pgprot for the mapped tt pages match what
the dma layer thinks it is. At the ttm layer it's possible to
deduce the requirement to have tt pages decrypted by checking
whether coherent dma allocations have been requested and the system
is running with confidential computing technologies.
This approach isn't ideal but keeping TTM matching DMAs expectations
for the page properties is in general fragile, unfortunately proper
fix would require a rewrite of TTM's page fault handling.
Fixes vmwgfx with SEV enabled.
Signed-off-by: Zack Rusin <zackr(a)vmware.com>
Fixes: 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem")
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Huang Rui <ray.huang(a)amd.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-kernel(a)vger.kernel.org
Cc: <stable(a)vger.kernel.org> # v5.14+
---
drivers/gpu/drm/ttm/ttm_bo_util.c | 13 +++++++++++--
drivers/gpu/drm/ttm/ttm_tt.c | 7 +++++++
include/drm/ttm/ttm_tt.h | 9 ++++++++-
3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index fd9fd3d15101..0b3f4267130c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -294,7 +294,13 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res,
enum ttm_caching caching;
man = ttm_manager_type(bo->bdev, res->mem_type);
- caching = man->use_tt ? bo->ttm->caching : res->bus.caching;
+ if (man->use_tt) {
+ caching = bo->ttm->caching;
+ if (bo->ttm->page_flags & TTM_TT_FLAG_DECRYPTED)
+ tmp = pgprot_decrypted(tmp);
+ } else {
+ caching = res->bus.caching;
+ }
return ttm_prot_from_caching(caching, tmp);
}
@@ -337,6 +343,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
.no_wait_gpu = false
};
struct ttm_tt *ttm = bo->ttm;
+ struct ttm_resource_manager *man =
+ ttm_manager_type(bo->bdev, bo->resource->mem_type);
pgprot_t prot;
int ret;
@@ -346,7 +354,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
if (ret)
return ret;
- if (num_pages == 1 && ttm->caching == ttm_cached) {
+ if (num_pages == 1 && ttm->caching == ttm_cached &&
+ !(man->use_tt && (ttm->page_flags & TTM_TT_FLAG_DECRYPTED))) {
/*
* We're mapping a single page, and the desired
* page protection is consistent with the bo.
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index e0a77671edd6..02dcb728e29c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -81,6 +81,13 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
pr_err("Illegal buffer object type\n");
return -EINVAL;
}
+ /*
+ * When using dma_alloc_coherent with memory encryption the
+ * mapped TT pages need to be decrypted or otherwise the drivers
+ * will end up sending encrypted mem to the gpu.
+ */
+ if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ page_flags |= TTM_TT_FLAG_DECRYPTED;
bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
if (unlikely(bo->ttm == NULL))
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index a4eff85b1f44..2b9d856ff388 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -79,6 +79,12 @@ struct ttm_tt {
* page_flags = TTM_TT_FLAG_EXTERNAL |
* TTM_TT_FLAG_EXTERNAL_MAPPABLE;
*
+ * TTM_TT_FLAG_DECRYPTED: The mapped ttm pages should be marked as
+ * not encrypted. The framework will try to match what the dma layer
+ * is doing, but note that it is a little fragile because ttm page
+ * fault handling abuses the DMA api a bit and dma_map_attrs can't be
+ * used to assure pgprot always matches.
+ *
* TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
* set by TTM after ttm_tt_populate() has successfully returned, and is
* then unset when TTM calls ttm_tt_unpopulate().
@@ -87,8 +93,9 @@ struct ttm_tt {
#define TTM_TT_FLAG_ZERO_ALLOC BIT(1)
#define TTM_TT_FLAG_EXTERNAL BIT(2)
#define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3)
+#define TTM_TT_FLAG_DECRYPTED BIT(4)
-#define TTM_TT_FLAG_PRIV_POPULATED BIT(4)
+#define TTM_TT_FLAG_PRIV_POPULATED BIT(5)
uint32_t page_flags;
/** @num_pages: Number of pages in the page array. */
uint32_t num_pages;
--
2.39.2
With the dma conf being reallocated on each call to stmmac_open(), any
information in there is lost, unless we specifically handle it.
The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc
therefore would stop working when link was set down and then back up.
Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open")
Cc: stable(a)vger.kernel.org
Signed-off-by: Esben Haabendal <esben(a)geanix.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b334eb16da23..25519952f754 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev,
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
buf_sz = dma_conf->dma_buf_sz;
+ for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
+ if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN)
+ dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs;
memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));
stmmac_reset_queues_param(priv);
--
2.43.0
The quilt patch titled
Subject: mm: thp_get_unmapped_area must honour topdown preference
has been removed from the -mm tree. Its filename was
mm-thp_get_unmapped_area-must-honour-topdown-preference.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: thp_get_unmapped_area must honour topdown preference
Date: Tue, 23 Jan 2024 17:14:20 +0000
The addition of commit efa7df3e3bb5 ("mm: align larger anonymous mappings
on THP boundaries") caused the "virtual_address_range" mm selftest to
start failing on arm64. Let's fix that regression.
There were 2 visible problems when running the test; 1) it takes much
longer to execute, and 2) the test fails. Both are related:
The (first part of the) test allocates as many 1GB anonymous blocks as it
can in the low 256TB of address space, passing NULL as the addr hint to
mmap. Before the faulty patch, all allocations were abutted and contained
in a single, merged VMA. However, after this patch, each allocation is in
its own VMA, and there is a 2M gap between each VMA. This causes the 2
problems in the test: 1) mmap becomes MUCH slower because there are so
many VMAs to check to find a new 1G gap. 2) mmap fails once it hits the
VMA limit (/proc/sys/vm/max_map_count). Hitting this limit then causes a
subsequent calloc() to fail, which causes the test to fail.
The problem is that arm64 (unlike x86) selects
ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. But __thp_get_unmapped_area()
allocates len+2M then always aligns to the bottom of the discovered gap.
That causes the 2M hole.
Fix this by detecting cases where we can still achive the alignment goal
when moved to the top of the allocated area, if configured to prefer
top-down allocation.
While we are at it, fix thp_get_unmapped_area's use of pgoff, which should
always be zero for anonymous mappings. Prior to the faulty change, while
it was possible for user space to pass in pgoff!=0, the old
mm->get_unmapped_area() handler would not use it. thp_get_unmapped_area()
does use it, so let's explicitly zero it before calling the handler. This
should also be the correct behavior for arches that define their own
get_unmapped_area() handler.
Link: https://lkml.kernel.org/r/20240123171420.3970220-1-ryan.roberts@arm.com
Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
Closes: https://lore.kernel.org/linux-mm/1e8f5ac7-54ce-433a-ae53-81522b2320e1@arm.c…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 10 ++++++++--
mm/mmap.c | 6 ++++--
2 files changed, 12 insertions(+), 4 deletions(-)
--- a/mm/huge_memory.c~mm-thp_get_unmapped_area-must-honour-topdown-preference
+++ a/mm/huge_memory.c
@@ -810,7 +810,7 @@ static unsigned long __thp_get_unmapped_
{
loff_t off_end = off + len;
loff_t off_align = round_up(off, size);
- unsigned long len_pad, ret;
+ unsigned long len_pad, ret, off_sub;
if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
return 0;
@@ -839,7 +839,13 @@ static unsigned long __thp_get_unmapped_
if (ret == addr)
return addr;
- ret += (off - ret) & (size - 1);
+ off_sub = (off - ret) & (size - 1);
+
+ if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown &&
+ !off_sub)
+ return ret + size;
+
+ ret += off_sub;
return ret;
}
--- a/mm/mmap.c~mm-thp_get_unmapped_area-must-honour-topdown-preference
+++ a/mm/mmap.c
@@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, uns
/*
* mmap_region() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge.
- * do_mmap() will clear pgoff, so match alignment.
*/
- pgoff = 0;
get_area = shmem_get_unmapped_area;
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
/* Ensures that larger anonymous mappings are THP aligned. */
get_area = thp_get_unmapped_area;
}
+ /* Always treat pgoff as zero for anonymous memory. */
+ if (!file)
+ pgoff = 0;
+
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-userfaultfd-uffdio_move-implementation-should-use-ptep_get.patch
tools-mm-add-thpmaps-script-to-dump-thp-usage-info.patch
arm64-mm-make-set_ptes-robust-when-oas-cross-48-bit-boundary.patch
The quilt patch titled
Subject: mm/hugetlb: restore the reservation if needed
has been removed from the -mm tree. Its filename was
mm-hugetlb-restore-the-reservation-if-needed.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Breno Leitao <leitao(a)debian.org>
Subject: mm/hugetlb: restore the reservation if needed
Date: Wed, 17 Jan 2024 09:10:57 -0800
Currently there is a bug that a huge page could be stolen, and when the
original owner tries to fault in it, it causes a page fault.
You can achieve that by:
1) Creating a single page
echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
2) mmap() the page above with MAP_HUGETLB into (void *ptr1).
* This will mark the page as reserved
3) touch the page, which causes a page fault and allocates the page
* This will move the page out of the free list.
* It will also unreserved the page, since there is no more free
page
4) madvise(MADV_DONTNEED) the page
* This will free the page, but not mark it as reserved.
5) Allocate a secondary page with mmap(MAP_HUGETLB) into (void *ptr2).
* it should fail, but, since there is no more available page.
* But, since the page above is not reserved, this mmap() succeed.
6) Faulting at ptr1 will cause a SIGBUS
* it will try to allocate a huge page, but there is none
available
A full reproducer is in selftest. See
https://lore.kernel.org/all/20240105155419.1939484-1-leitao@debian.org/
Fix this by restoring the reserved page if necessary. If the page being
unmapped has HPAGE_RESV_OWNER set, and needs a reservation, set the
restore_reserve flag, which will move the page from free to reserved.
Link: https://lkml.kernel.org/r/20240117171058.2192286-1-leitao@debian.org
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Suggested-by: Rik van Riel <riel(a)surriel.com>
Cc: Lorenzo Stoakes <lstoakes(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 10 ++++++++++
1 file changed, 10 insertions(+)
--- a/mm/hugetlb.c~mm-hugetlb-restore-the-reservation-if-needed
+++ a/mm/hugetlb.c
@@ -5677,6 +5677,16 @@ void __unmap_hugepage_range(struct mmu_g
hugetlb_count_sub(pages_per_huge_page(h), mm);
hugetlb_remove_rmap(page_folio(page));
+ if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
+ vma_needs_reservation(h, vma, start)) {
+ /*
+ * Restore the reservation if needed, otherwise the
+ * backing page could be stolen by someone.
+ */
+ folio_set_hugetlb_restore_reserve(page_folio(page));
+ vma_add_reservation(h, vma, address);
+ }
+
spin_unlock(ptl);
tlb_remove_page_size(tlb, page, huge_page_size(h));
/*
_
Patches currently in -mm which might be from leitao(a)debian.org are
selftests-mm-new-test-that-steals-pages.patch
The quilt patch titled
Subject: selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag
has been removed from the -mm tree. Its filename was
selftests-mm-update-va_high_addr_switchsh-to-check-cpu-for-la57-flag.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Audra Mitchell <audra(a)redhat.com>
Subject: selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag
Date: Fri, 19 Jan 2024 15:58:01 -0500
In order for the page table level 5 to be in use, the CPU must have the
setting enabled in addition to the CONFIG option. Check for the flag to be
set to avoid false test failures on systems that do not have this cpu flag
set.
The test does a series of mmap calls including three using the
MAP_FIXED flag and specifying an address that is 1<<47 or 1<<48. These
addresses are only available if you are using level 5 page tables,
which requires both the CPU to have the capabiltiy (la57 flag) and the
kernel to be configured. Currently the test only checks for the kernel
configuration option, so this test can still report a false positive.
Here are the three failing lines:
$ ./va_high_addr_switch | grep FAILED
mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED
mmap(HIGH_ADDR, MAP_FIXED): 0xffffffffffffffff - FAILED
mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED
I thought (for about a second) refactoring the test so that these three
mmap calls will only be run on systems with the level 5 page tables
available, but the whole point of the test is to check the level 5
feature...
Link: https://lkml.kernel.org/r/20240119205801.62769-1-audra@redhat.com
Fixes: 4f2930c6718a ("selftests/vm: only run 128TBswitch with 5-level paging")
Signed-off-by: Audra Mitchell <audra(a)redhat.com>
Cc: Rafael Aquini <raquini(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Adam Sindelar <adam(a)wowsignal.io>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/va_high_addr_switch.sh | 6 ++++++
1 file changed, 6 insertions(+)
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh~selftests-mm-update-va_high_addr_switchsh-to-check-cpu-for-la57-flag
+++ a/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -29,9 +29,15 @@ check_supported_x86_64()
# See man 1 gzip under '-f'.
local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+ local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;}
+ else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
+
if [[ "${pg_table_levels}" -lt 5 ]]; then
echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
exit $ksft_skip
+ elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
+ echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
+ exit $ksft_skip
fi
}
_
Patches currently in -mm which might be from audra(a)redhat.com are
The quilt patch titled
Subject: userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb
has been removed from the -mm tree. Its filename was
userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Lokesh Gidra <lokeshgidra(a)google.com>
Subject: userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb
Date: Wed, 17 Jan 2024 14:37:29 -0800
In mfill_atomic_hugetlb(), mmap_changing isn't being checked
again if we drop mmap_lock and reacquire it. When the lock is not held,
mmap_changing could have been incremented. This is also inconsistent
with the behavior in mfill_atomic().
Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com
Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races")
Signed-off-by: Lokesh Gidra <lokeshgidra(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Brian Geffon <bgeffon(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Kalesh Singh <kaleshsingh(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Nicolas Geoffray <ngeoffray(a)google.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
--- a/mm/userfaultfd.c~userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb
+++ a/mm/userfaultfd.c
@@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_ato
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
+ atomic_t *mmap_changing,
uffd_flags_t flags)
{
struct mm_struct *dst_mm = dst_vma->vm_mm;
@@ -472,6 +473,15 @@ retry:
goto out;
}
mmap_read_lock(dst_mm);
+ /*
+ * If memory mappings are changing because of non-cooperative
+ * operation (e.g. mremap) running in parallel, bail out and
+ * request the user to retry later
+ */
+ if (mmap_changing && atomic_read(mmap_changing)) {
+ err = -EAGAIN;
+ break;
+ }
dst_vma = NULL;
goto retry;
@@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(stru
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
+ atomic_t *mmap_changing,
uffd_flags_t flags);
#endif /* CONFIG_HUGETLB_PAGE */
@@ -622,8 +633,8 @@ retry:
* If this is a HUGETLB vma, pass off to appropriate routine
*/
if (is_vm_hugetlb_page(dst_vma))
- return mfill_atomic_hugetlb(dst_vma, dst_start,
- src_start, len, flags);
+ return mfill_atomic_hugetlb(dst_vma, dst_start, src_start,
+ len, mmap_changing, flags);
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock;
_
Patches currently in -mm which might be from lokeshgidra(a)google.com are
userfaultfd-fix-return-error-if-mmap_changing-is-non-zero-in-move-ioctl.patch
The quilt patch titled
Subject: selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory
has been removed from the -mm tree. Its filename was
selftests-mm-ksm_tests-should-only-madv_hugepage-valid-memory.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory
Date: Mon, 22 Jan 2024 12:05:54 +0000
ksm_tests was previously mmapping a region of memory, aligning the
returned pointer to a PMD boundary, then setting MADV_HUGEPAGE, but was
setting it past the end of the mmapped area due to not taking the pointer
alignment into consideration. Fix this behaviour.
Up until commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
boundaries"), this buggy behavior was (usually) masked because the
alignment difference was always less than PMD-size. But since the
mentioned commit, `ksm_tests -H -s 100` started failing.
Link: https://lkml.kernel.org/r/20240122120554.3108022-1-ryan.roberts@arm.com
Fixes: 325254899684 ("selftests: vm: add KSM huge pages merging time test")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Pedro Demarchi Gomes <pedrodemargomes(a)gmail.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/ksm_tests.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/mm/ksm_tests.c~selftests-mm-ksm_tests-should-only-madv_hugepage-valid-memory
+++ a/tools/testing/selftests/mm/ksm_tests.c
@@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int
if (map_ptr_orig == MAP_FAILED)
err(2, "initial mmap");
- if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE))
+ if (madvise(map_ptr, len, MADV_HUGEPAGE))
err(2, "MADV_HUGEPAGE");
pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-userfaultfd-uffdio_move-implementation-should-use-ptep_get.patch
tools-mm-add-thpmaps-script-to-dump-thp-usage-info.patch
arm64-mm-make-set_ptes-robust-when-oas-cross-48-bit-boundary.patch
The quilt patch titled
Subject: scs: add CONFIG_MMU dependency for vfree_atomic()
has been removed from the -mm tree. Its filename was
scs-add-config_mmu-dependency-for-vfree_atomic.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Samuel Holland <samuel.holland(a)sifive.com>
Subject: scs: add CONFIG_MMU dependency for vfree_atomic()
Date: Mon, 22 Jan 2024 09:52:01 -0800
The shadow call stack implementation fails to build without CONFIG_MMU:
ld.lld: error: undefined symbol: vfree_atomic
>>> referenced by scs.c
>>> kernel/scs.o:(scs_free) in archive vmlinux.a
Link: https://lkml.kernel.org/r/20240122175204.2371009-1-samuel.holland@sifive.com
Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks")
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
Reviewed-by: Sami Tolvanen <samitolvanen(a)google.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)
--- a/arch/Kconfig~scs-add-config_mmu-dependency-for-vfree_atomic
+++ a/arch/Kconfig
@@ -673,6 +673,7 @@ config SHADOW_CALL_STACK
bool "Shadow Call Stack"
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+ depends on MMU
help
This option enables the compiler's Shadow Call Stack, which
uses a shadow stack to protect function return addresses from
_
Patches currently in -mm which might be from samuel.holland(a)sifive.com are
The quilt patch titled
Subject: mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again
has been removed from the -mm tree. Its filename was
mm-writeback-fix-possible-divide-by-zero-in-wb_dirty_limits-again.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Zach O'Keefe" <zokeefe(a)google.com>
Subject: mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again
Date: Thu, 18 Jan 2024 10:19:53 -0800
(struct dirty_throttle_control *)->thresh is an unsigned long, but is
passed as the u32 divisor argument to div_u64(). On architectures where
unsigned long is 64 bytes, the argument will be implicitly truncated.
Use div64_u64() instead of div_u64() so that the value used in the "is
this a safe division" check is the same as the divisor.
Also, remove redundant cast of the numerator to u64, as that should happen
implicitly.
This would be difficult to exploit in memcg domain, given the ratio-based
arithmetic domain_drity_limits() uses, but is much easier in global
writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g.
vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32)
Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com
Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()")
Signed-off-by: Zach O'Keefe <zokeefe(a)google.com>
Cc: Maxim Patlasov <MPatlasov(a)parallels.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page-writeback.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/page-writeback.c~mm-writeback-fix-possible-divide-by-zero-in-wb_dirty_limits-again
+++ a/mm/page-writeback.c
@@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struc
*/
dtc->wb_thresh = __wb_calc_thresh(dtc);
dtc->wb_bg_thresh = dtc->thresh ?
- div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+ div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
/*
* In order to avoid the stacked BDI deadlock we need
_
Patches currently in -mm which might be from zokeefe(a)google.com are
The quilt patch titled
Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems
has been removed from the -mm tree. Its filename was
selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Nico Pache <npache(a)redhat.com>
Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems
Date: Fri, 19 Jan 2024 06:14:29 -0700
On systems with 64k page size and 512M huge page sizes, the allocation and
test succeeds but errors out at the munmap. As the comment states, munmap
will failure if its not HUGEPAGE aligned. This is due to the length of
the mapping being 1/2 the size of the hugepage causing the munmap to not
be hugepage aligned. Fix this by making the mapping length the full
hugepage if the hugepage is larger than the length of the mapping.
Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com
Signed-off-by: Nico Pache <npache(a)redhat.com>
Cc: Donet Tom <donettom(a)linux.vnet.ibm.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Christophe Leroy <christophe.leroy(a)c-s.fr>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/map_hugetlb.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/tools/testing/selftests/mm/map_hugetlb.c~selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems
+++ a/tools/testing/selftests/mm/map_hugetlb.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "vm_util.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
@@ -58,10 +59,16 @@ int main(int argc, char **argv)
{
void *addr;
int ret;
+ size_t hugepage_size;
size_t length = LENGTH;
int flags = FLAGS;
int shift = 0;
+ hugepage_size = default_huge_page_size();
+ /* munmap with fail if the length is not page aligned */
+ if (hugepage_size > length)
+ length = hugepage_size;
+
if (argc > 1)
length = atol(argv[1]) << 20;
if (argc > 2) {
_
Patches currently in -mm which might be from npache(a)redhat.com are
selftests-mm-perform-some-system-cleanup-before-using-hugepages.patch
The quilt patch titled
Subject: selftests/mm: switch to bash from sh
has been removed from the -mm tree. Its filename was
selftests-mm-switch-to-bash-from-sh.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Subject: selftests/mm: switch to bash from sh
Date: Tue, 16 Jan 2024 14:04:54 +0500
Running charge_reserved_hugetlb.sh generates errors if sh is set to
dash:
./charge_reserved_hugetlb.sh: 9: [[: not found
./charge_reserved_hugetlb.sh: 19: [[: not found
./charge_reserved_hugetlb.sh: 27: [[: not found
./charge_reserved_hugetlb.sh: 37: [[: not found
./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected
Switch to using /bin/bash instead of /bin/sh. Make the switch for
write_hugetlb_memory.sh as well which is called from
charge_reserved_hugetlb.sh.
Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com
Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 2 +-
tools/testing/selftests/mm/write_hugetlb_memory.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh~selftests-mm-switch-to-bash-from-sh
+++ a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Kselftest framework requirement - SKIP code is 4.
--- a/tools/testing/selftests/mm/write_hugetlb_memory.sh~selftests-mm-switch-to-bash-from-sh
+++ a/tools/testing/selftests/mm/write_hugetlb_memory.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
set -e
_
Patches currently in -mm which might be from usama.anjum(a)collabora.com are
selftests-core-include-linux-close_rangeh-for-close_range_-macros.patch
selftests-mm-hugetlb_reparenting_test-do-not-unmount.patch
selftests-mm-run_vmtests-remove-sudo-and-conform-to-tap.patch
selftests-mm-save-and-restore-nr_hugepages-value.patch
selftests-mm-protection_keys-save-restore-nr_hugepages-settings.patch
selftests-mm-run_vmtestssh-add-missing-tests.patch
The quilt patch titled
Subject: readahead: avoid multiple marked readahead pages
has been removed from the -mm tree. Its filename was
readahead-avoid-multiple-marked-readahead-pages.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Jan Kara <jack(a)suse.cz>
Subject: readahead: avoid multiple marked readahead pages
Date: Thu, 4 Jan 2024 09:58:39 +0100
ra_alloc_folio() marks a page that should trigger next round of async
readahead. However it rounds up computed index to the order of page being
allocated. This can however lead to multiple consecutive pages being
marked with readahead flag. Consider situation with index == 1, mark ==
1, order == 0. We insert order 0 page at index 1 and mark it. Then we
bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page
at index 2 is marked as well. Then we bump order to 2, index is
incremented to 4, mark gets rounded to 4 so page at index 4 is marked as
well. The fact that multiple pages get marked within a single readahead
window confuses the readahead logic and results in readahead window being
trimmed back to 1. This situation is triggered in particular when maximum
readahead window size is not a power of two (in the observed case it was
768 KB) and as a result sequential read throughput suffers.
Fix the problem by rounding 'mark' down instead of up. Because the index
is naturally aligned to 'order', we are guaranteed 'rounded mark' == index
iff 'mark' is within the page we are allocating at 'index' and thus
exactly one page is marked with readahead flag as required by the
readahead code and sequential read performance is restored.
This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix
readahead with large folios"). The commit changed the rounding with the
rationale:
"... we were setting the readahead flag on the folio which contains the
last byte read from the block. This is wrong because we will trigger
readahead at the end of the read without waiting to see if a subsequent
read is going to use the pages we just read."
Although this is true, the fact is this was always the case with read
sizes not aligned to folio boundaries and large folios in the page cache
just make the situation more obvious (and frequent). Also for sequential
read workloads it is better to trigger the readahead earlier rather than
later. It is true that the difference in the rounding and thus earlier
triggering of the readahead can result in reading more for semi-random
workloads. However workloads really suffering from this seem to be rare.
In particular I have verified that the workload described in commit
b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading
random 100k blocks from a file like:
[reader]
bs=100k
rw=randread
numjobs=1
size=64g
runtime=60s
is not impacted by the rounding change and achieves ~70MB/s in both cases.
[jack(a)suse.cz: fix one more place where mark rounding was done as well]
Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz
Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz
Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Guo Xuenan <guoxuenan(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/readahead.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/readahead.c~readahead-avoid-multiple-marked-readahead-pages
+++ a/mm/readahead.c
@@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct
if (!folio)
return -ENOMEM;
- mark = round_up(mark, 1UL << order);
+ mark = round_down(mark, 1UL << order);
if (index == mark)
folio_set_readahead(folio);
err = filemap_add_folio(ractl->mapping, folio, index, gfp);
@@ -575,7 +575,7 @@ static void ondemand_readahead(struct re
* It's the expected callback index, assume sequential access.
* Ramp up sizes, and push forward the readahead window.
*/
- expected = round_up(ra->start + ra->size - ra->async_size,
+ expected = round_down(ra->start + ra->size - ra->async_size,
1UL << order);
if (index == expected || index == (ra->start + ra->size)) {
ra->start += ra->size;
_
Patches currently in -mm which might be from jack(a)suse.cz are
The quilt patch titled
Subject: fs/hugetlbfs/inode.c: mm/memory-failure.c: fix hugetlbfs hwpoison handling
has been removed from the -mm tree. Its filename was
fs-hugetlbfs-inodec-mm-memory-failurec-fix-hugetlbfs-hwpoison-handling.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Sidhartha Kumar <sidhartha.kumar(a)oracle.com>
Subject: fs/hugetlbfs/inode.c: mm/memory-failure.c: fix hugetlbfs hwpoison handling
Date: Fri, 12 Jan 2024 10:08:40 -0800
has_extra_refcount() makes the assumption that the page cache adds a ref
count of 1 and subtracts this in the extra_pins case. Commit a08c7193e4f1
(mm/filemap: remove hugetlb special casing in filemap.c) modifies
__filemap_add_folio() by calling folio_ref_add(folio, nr); for all cases
(including hugtetlb) where nr is the number of pages in the folio. We
should adjust the number of references coming from the page cache by
subtracing the number of pages rather than 1.
In hugetlbfs_read_iter(), folio_test_has_hwpoisoned() is testing the wrong
flag as, in the hugetlb case, memory-failure code calls
folio_test_set_hwpoison() to indicate poison. folio_test_hwpoison() is
the correct function to test for that flag.
After these fixes, the hugetlb hwpoison read selftest passes all cases.
Link: https://lkml.kernel.org/r/20240112180840.367006-1-sidhartha.kumar@oracle.com
Fixes: a08c7193e4f1 ("mm/filemap: remove hugetlb special casing in filemap.c")
Signed-off-by: Sidhartha Kumar <sidhartha.kumar(a)oracle.com>
Closes: https://lore.kernel.org/linux-mm/20230713001833.3778937-1-jiaqiyan@google.c…
Reported-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Tested-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Acked-by: Miaohe Lin <linmiaohe(a)huawei.com>
Acked-by: Muchun Song <muchun.song(a)linux.dev>
Cc: James Houghton <jthoughton(a)google.com>
Cc: Jiaqi Yan <jiaqiyan(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: <stable(a)vger.kernel.org> [6.7+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/hugetlbfs/inode.c | 2 +-
mm/memory-failure.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/fs/hugetlbfs/inode.c~fs-hugetlbfs-inodec-mm-memory-failurec-fix-hugetlbfs-hwpoison-handling
+++ a/fs/hugetlbfs/inode.c
@@ -340,7 +340,7 @@ static ssize_t hugetlbfs_read_iter(struc
} else {
folio_unlock(folio);
- if (!folio_test_has_hwpoisoned(folio))
+ if (!folio_test_hwpoison(folio))
want = nr;
else {
/*
--- a/mm/memory-failure.c~fs-hugetlbfs-inodec-mm-memory-failurec-fix-hugetlbfs-hwpoison-handling
+++ a/mm/memory-failure.c
@@ -982,7 +982,7 @@ static bool has_extra_refcount(struct pa
int count = page_count(p) - 1;
if (extra_pins)
- count -= 1;
+ count -= folio_nr_pages(page_folio(p));
if (count > 0) {
pr_err("%#lx: %s still referenced by %d users\n",
_
Patches currently in -mm which might be from sidhartha.kumar(a)oracle.com are
maple_tree-fix-comment-describing-mas_node_count_gfp.patch
The quilt patch titled
Subject: Revert "mm:vmscan: fix inaccurate reclaim during proactive reclaim"
has been removed from the -mm tree. Its filename was
revert-mm-vmscan-fix-inaccurate-reclaim-during-proactive-reclaim.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: "T.J. Mercier" <tjmercier(a)google.com>
Subject: Revert "mm:vmscan: fix inaccurate reclaim during proactive reclaim"
Date: Sun, 21 Jan 2024 21:44:12 +0000
This reverts commit 0388536ac29104a478c79b3869541524caec28eb.
Proactive reclaim on the root cgroup is 10x slower after this patch when
MGLRU is enabled, and completion times for proactive reclaim on much
smaller non-root cgroups take ~30% longer (with or without MGLRU). With
root reclaim before the patch, I observe average reclaim rates of ~70k
pages/sec before try_to_free_mem_cgroup_pages starts to fail and the
nr_retries counter starts to decrement, eventually ending the proactive
reclaim attempt. After the patch the reclaim rate is consistently ~6.6k
pages/sec due to the reduced nr_pages value causing scan aborts as soon as
SWAP_CLUSTER_MAX pages are reclaimed. The proactive reclaim doesn't
complete after several minutes because try_to_free_mem_cgroup_pages is
still capable of reclaiming pages in tiny SWAP_CLUSTER_MAX page chunks and
nr_retries is never decremented.
The docs for memory.reclaim say, "the kernel can over or under reclaim
from the target cgroup" which this patch was trying to fix. Revert it
until a less costly solution is found.
Link: https://lkml.kernel.org/r/20240121214413.833776-1-tjmercier@google.com
Fixes: 0388536ac291 ("mm:vmscan: fix inaccurate reclaim during proactive reclaim")
Signed-off-by: T.J. Mercier <tjmercier(a)google.com>
Cc: Efly Young <yangyifei03(a)kuaishou.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: T.J. Mercier <tjmercier(a)google.com>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/memcontrol.c~revert-mm-vmscan-fix-inaccurate-reclaim-during-proactive-reclaim
+++ a/mm/memcontrol.c
@@ -6977,8 +6977,8 @@ static ssize_t memory_reclaim(struct ker
lru_add_drain_all();
reclaimed = try_to_free_mem_cgroup_pages(memcg,
- min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
- GFP_KERNEL, reclaim_options);
+ nr_to_reclaim - nr_reclaimed,
+ GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
_
Patches currently in -mm which might be from tjmercier(a)google.com are
The patch titled
Subject: nilfs2: fix data corruption in dsync block recovery for small block sizes
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-fix-data-corruption-in-dsync-block-recovery-for-small-block-sizes.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix data corruption in dsync block recovery for small block sizes
Date: Wed, 24 Jan 2024 21:19:36 +0900
The helper function nilfs_recovery_copy_block() of
nilfs_recovery_dsync_blocks(), which recovers data from logs created by
data sync writes during a mount after an unclean shutdown, incorrectly
calculates the on-page offset when copying repair data to the file's page
cache. In environments where the block size is smaller than the page
size, this flaw can cause data corruption and leak uninitialized memory
bytes during the recovery process.
Fix these issues by correcting this byte offset calculation on the page.
Link: https://lkml.kernel.org/r/20240124121936.10575-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/recovery.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/fs/nilfs2/recovery.c~nilfs2-fix-data-corruption-in-dsync-block-recovery-for-small-block-sizes
+++ a/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_rec
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(str
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(st
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-fix-data-corruption-in-dsync-block-recovery-for-small-block-sizes.patch
nilfs2-convert-recovery-logic-to-use-kmap_local.patch
nilfs2-convert-segment-buffer-to-use-kmap_local.patch
nilfs2-convert-nilfs_copy_buffer-to-use-kmap_local.patch
nilfs2-convert-metadata-file-common-code-to-use-kmap_local.patch
nilfs2-convert-sufile-to-use-kmap_local.patch
nilfs2-convert-persistent-object-allocator-to-use-kmap_local.patch
nilfs2-convert-dat-to-use-kmap_local.patch
nilfs2-move-nilfs_bmap_write-call-out-of-nilfs_write_inode_common.patch
nilfs2-do-not-acquire-rwsem-in-nilfs_bmap_write.patch
nilfs2-convert-ifile-to-use-kmap_local.patch
nilfs2-localize-highmem-mapping-for-checkpoint-creation-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-finalization-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-reading-within-cpfile.patch
nilfs2-remove-nilfs_cpfile_getput_checkpoint.patch
nilfs2-convert-cpfile-to-use-kmap_local.patch
This small series addresses two bugs in the nfp conntrack offloading
code.
The first patch is a check to prevent offloading for a case which is
currently not supported by the nfp.
The second patch fixes up parsing of layer4 mangling code so it can be
correctly offloaded. Since the masks are an inverse mask and we are
shifting it so it can be packed together with the destination we
effectively need to 'clear' the lower bits of the mask by setting it to
0xFFFF.
Changes since v1:
- Added inline comment to the second patch
- Expanded the commit message to better explain the mask setting in the
second patch.
Hui Zhou (2):
nfp: flower: add hardware offload check for post ct entry
nfp: flower: fix hardware offload for the transfer layer port
.../ethernet/netronome/nfp/flower/conntrack.c | 46 +++++++++++++++++--
1 file changed, 43 insertions(+), 3 deletions(-)
--
2.34.1
From: Filipe Manana <fdmanana(a)suse.com>
[ Upstream commit 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 ]
The readdir implementation currently processes always up to the last index
it finds. This however can result in an infinite loop if the directory has
a large number of entries such that they won't all fit in the given buffer
passed to the readdir callback, that is, dir_emit() returns a non-zero
value. Because in that case readdir() will be called again and if in the
meanwhile new directory entries were added and we still can't put all the
remaining entries in the buffer, we keep repeating this over and over.
The following C program and test script reproduce the problem:
$ cat /mnt/readdir_prog.c
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
DIR *dir = opendir(".");
struct dirent *dd;
while ((dd = readdir(dir))) {
printf("%s\n", dd->d_name);
rename(dd->d_name, "TEMPFILE");
rename("TEMPFILE", dd->d_name);
}
closedir(dir);
}
$ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c
$ cat test.sh
#!/bin/bash
DEV=/dev/sdi
MNT=/mnt/sdi
mkfs.btrfs -f $DEV &> /dev/null
#mkfs.xfs -f $DEV &> /dev/null
#mkfs.ext4 -F $DEV &> /dev/null
mount $DEV $MNT
mkdir $MNT/testdir
for ((i = 1; i <= 2000; i++)); do
echo -n > $MNT/testdir/file_$i
done
cd $MNT/testdir
/mnt/readdir_prog
cd /mnt
umount $MNT
This behaviour is surprising to applications and it's unlike ext4, xfs,
tmpfs, vfat and other filesystems, which always finish. In this case where
new entries were added due to renames, some file names may be reported
more than once, but this varies according to each filesystem - for example
ext4 never reported the same file more than once while xfs reports the
first 13 file names twice.
So change our readdir implementation to track the last index number when
opendir() is called and then make readdir() never process beyond that
index number. This gives the same behaviour as ext4.
Reported-by: Rob Landley <rob(a)landley.net>
Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@la…
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681
CC: stable(a)vger.kernel.org # 5.15
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
[ Resolve a conflict due to member changes in 96d89923fa94 ]
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/ctree.h | 1 +
fs/btrfs/delayed-inode.c | 5 +-
fs/btrfs/delayed-inode.h | 1 +
fs/btrfs/inode.c | 131 +++++++++++++++++++++++----------------
4 files changed, 84 insertions(+), 54 deletions(-)
---
Changelog:
v2:
- Fix the upstream commit hash
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1467bf439cb4..7905f178efa3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1361,6 +1361,7 @@ struct btrfs_drop_extents_args {
struct btrfs_file_private {
void *filldir_buf;
+ u64 last_index;
};
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fd951aeaeac5..5a98c5da1225 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1513,6 +1513,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
}
bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ u64 last_index,
struct list_head *ins_list,
struct list_head *del_list)
{
@@ -1532,14 +1533,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
- while (item) {
+ while (item && item->key.offset <= last_index) {
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, ins_list);
item = __btrfs_next_delayed_item(item);
}
item = __btrfs_first_delayed_deletion_item(delayed_node);
- while (item) {
+ while (item && item->key.offset <= last_index) {
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, del_list);
item = __btrfs_next_delayed_item(item);
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index b2412160c5bc..a9cfce856d2e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -123,6 +123,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
/* Used for readdir() */
bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ u64 last_index,
struct list_head *ins_list,
struct list_head *del_list);
void btrfs_readdir_put_delayed_items(struct inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95af29634e55..1df374ce829b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6121,6 +6121,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
return d_splice_alias(inode, dentry);
}
+/*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key, found_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ /* FIXME: we should be able to handle this */
+ if (ret == 0)
+ goto out;
+ ret = 0;
+
+ if (path->slots[0] == 0) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ path->slots[0]--;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != btrfs_ino(inode) ||
+ found_key.type != BTRFS_DIR_INDEX_KEY) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ inode->index_cnt = found_key.offset + 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+ if (dir->index_cnt == (u64)-1) {
+ int ret;
+
+ ret = btrfs_inode_delayed_dir_index_count(dir);
+ if (ret) {
+ ret = btrfs_set_inode_index_count(dir);
+ if (ret)
+ return ret;
+ }
+ }
+
+ *index = dir->index_cnt;
+
+ return 0;
+}
+
/*
* All this infrastructure exists because dir_emit can fault, and we are holding
* the tree lock when doing readdir. For now just allocate a buffer and copy
@@ -6133,10 +6201,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
static int btrfs_opendir(struct inode *inode, struct file *file)
{
struct btrfs_file_private *private;
+ u64 last_index;
+ int ret;
+
+ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+ if (ret)
+ return ret;
private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
if (!private)
return -ENOMEM;
+ private->last_index = last_index;
private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!private->filldir_buf) {
kfree(private);
@@ -6205,7 +6280,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+ &ins_list, &del_list);
again:
key.type = BTRFS_DIR_INDEX_KEY;
@@ -6238,6 +6314,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
break;
if (found_key.offset < ctx->pos)
goto next;
+ if (found_key.offset > private->last_index)
+ break;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
@@ -6371,57 +6449,6 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
return dirty ? btrfs_dirty_inode(inode) : 0;
}
-/*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_key key, found_key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret;
-
- key.objectid = btrfs_ino(inode);
- key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- /* FIXME: we should be able to handle this */
- if (ret == 0)
- goto out;
- ret = 0;
-
- if (path->slots[0] == 0) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- path->slots[0]--;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != btrfs_ino(inode) ||
- found_key.type != BTRFS_DIR_INDEX_KEY) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- inode->index_cnt = found_key.offset + 1;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
/*
* helper to find a free sequence number in a given directory. This current
* code is very simple, later versions will do smarter things in the btree
--
2.43.0
In zswap_writeback_entry(), after we get a folio from
__read_swap_cache_async(), we grab the tree lock again to check that the
swap entry was not invalidated and recycled. If it was, we delete the
folio we just added to the swap cache and exit.
However, __read_swap_cache_async() returns the folio locked when it is
newly allocated, which is always true for this path, and the folio is
ref'd. Make sure to unlock and put the folio before returning.
This was discovered by code inspection, probably because this path
handles a race condition that should not happen often, and the bug would
not crash the system, it will only strand the folio indefinitely.
Fixes: 04fc7816089c ("mm: fix zswap writeback race condition")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
---
mm/zswap.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mm/zswap.c b/mm/zswap.c
index 8f4a7efc2bdae..00e90b9b5417d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1448,6 +1448,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
spin_unlock(&tree->lock);
delete_from_swap_cache(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return -ENOMEM;
}
spin_unlock(&tree->lock);
--
2.43.0.429.g432eaa2c6b-goog
From: "Maciej S. Szmigiero" <maciej.szmigiero(a)oracle.com>
The stable kernel version backport of the patch disabling XSAVES on AMD
Zen family 0x17 applied this change to the wrong function (init_amd_k6()),
one which isn't called for Zen CPUs.
Move the erratum to the init_amd_zn() function instead.
Add an explicit family 0x17 check to the erratum so nothing will break if
someone naively makes this kernel version call init_amd_zn() also for
family 0x19 in the future (as the current upstream code does).
Fixes: e40c1e9da1ec ("x86/CPU/AMD: Disable XSAVES on AMD family 0x17")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero(a)oracle.com>
---
arch/x86/kernel/cpu/amd.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b2cdf1c07e56..4e84203fc067 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -277,15 +277,6 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
#endif
- /*
- * Work around Erratum 1386. The XSAVES instruction malfunctions in
- * certain circumstances on Zen1/2 uarch, and not all parts have had
- * updated microcode at the time of writing (March 2023).
- *
- * Affected parts all have no supervisor XSAVE states, meaning that
- * the XSAVEC instruction (which works fine) is equivalent.
- */
- clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
static void init_amd_k7(struct cpuinfo_x86 *c)
@@ -989,6 +980,17 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
set_cpu_cap(c, X86_FEATURE_BTC_NO);
}
+
+ /*
+ * Work around Erratum 1386. The XSAVES instruction malfunctions in
+ * certain circumstances on Zen1/2 uarch, and not all parts have had
+ * updated microcode at the time of writing (March 2023).
+ *
+ * Affected parts all have no supervisor XSAVE states, meaning that
+ * the XSAVEC instruction (which works fine) is equivalent.
+ */
+ if (c->x86 == 0x17)
+ clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
static bool cpu_has_zenbleed_microcode(void)
This reverts commit bed9e27baf52a09b7ba2a3714f1e24e17ced386d.
The original set [1][2] was expected to undo a suboptimal fix in [2], and
replace it with a better fix [1]. However, as reported by Dan Moulding [2]
causes an issue with raid5 with journal device.
Revert [2] for now to close the issue. We will follow up on another issue
reported by Juxiao Bi, as [2] is expected to fix it. We believe this is a
good trade-off, because the latter issue happens less freqently.
In the meanwhile, we will NOT revert [1], as it contains the right logic.
Reported-by: Dan Moulding <dan(a)danm.net>
Closes: https://lore.kernel.org/linux-raid/20240123005700.9302-1-dan@danm.net/
Fixes: bed9e27baf52 ("Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"")
Cc: stable(a)vger.kernel.org # v5.19+
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Yu Kuai <yukuai3(a)huawei.com>
Signed-off-by: Song Liu <song(a)kernel.org>
[1] commit d6e035aad6c0 ("md: bypass block throttle for superblock update")
[2] commit bed9e27baf52 ("Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"")
---
drivers/md/raid5.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8497880135ee..2b2f03705990 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -36,6 +36,7 @@
*/
#include <linux/blkdev.h>
+#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
@@ -6773,7 +6774,18 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
+
+ /*
+ * Waiting on MD_SB_CHANGE_PENDING below may deadlock
+ * seeing md_check_recovery() is needed to clear
+ * the flag when using mdmon.
+ */
+ continue;
}
+
+ wait_event_lock_irq(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
+ conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
--
2.34.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a484755ab2526ebdbe042397cdd6e427eb4b1a68
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012236-pebbly-coroner-581f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
a484755ab252 ("Revert "nSVM: Check for reserved encodings of TLB_CONTROL in nested VMCB"")
0977cfac6e76 ("KVM: nSVM: Implement support for nested VNMI")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a484755ab2526ebdbe042397cdd6e427eb4b1a68 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 18 Oct 2023 12:41:03 -0700
Subject: [PATCH] Revert "nSVM: Check for reserved encodings of TLB_CONTROL in
nested VMCB"
Revert KVM's made-up consistency check on SVM's TLB control. The APM says
that unsupported encodings are reserved, but the APM doesn't state that
VMRUN checks for a supported encoding. Unless something is called out
in "Canonicalization and Consistency Checks" or listed as MBZ (Must Be
Zero), AMD behavior is typically to let software shoot itself in the foot.
This reverts commit 174a921b6975ef959dd82ee9e8844067a62e3ec1.
Fixes: 174a921b6975 ("nSVM: Check for reserved encodings of TLB_CONTROL in nested VMCB")
Reported-by: Stefan Sterz <s.sterz(a)proxmox.com>
Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
Cc: stable(a)vger.kernel.org
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Link: https://lore.kernel.org/r/20231018194104.1896415-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3fea8c47679e..60891b9ce25f 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -247,18 +247,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
}
-static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
-{
- /* Nested FLUSHBYASID is not supported yet. */
- switch(tlb_ctl) {
- case TLB_CONTROL_DO_NOTHING:
- case TLB_CONTROL_FLUSH_ALL_ASID:
- return true;
- default:
- return false;
- }
-}
-
static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
struct vmcb_ctrl_area_cached *control)
{
@@ -278,9 +266,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
IOPM_SIZE)))
return false;
- if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
- return false;
-
if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
!vmcb12_is_intercept(control, INTERCEPT_NMI))) {
return false;
From: Michal Pecio <michal.pecio(a)gmail.com>
xHCI 4.9 explicitly forbids assuming that the xHC has released its
ownership of a multi-TRB TD when it reports an error on one of the
early TRBs. Yet the driver makes such assumption and releases the TD,
allowing the remaining TRBs to be freed or overwritten by new TDs.
The xHC should also report completion of the final TRB due to its IOC
flag being set by us, regardless of prior errors. This event cannot
be recognized if the TD has already been freed earlier, resulting in
"Transfer event TRB DMA ptr not part of current TD" error message.
Fix this by reusing the logic for processing isoc Transaction Errors.
This also handles hosts which fail to report the final completion.
Fix transfer length reporting on Babble errors. They may be caused by
device malfunction, no guarantee that the buffer has been filled.
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 41be7d31a36e..f0d8a607ff21 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2394,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
case COMP_BANDWIDTH_OVERRUN_ERROR:
frame->status = -ECOMM;
break;
- case COMP_ISOCH_BUFFER_OVERRUN:
case COMP_BABBLE_DETECTED_ERROR:
+ sum_trbs_for_length = true;
+ fallthrough;
+ case COMP_ISOCH_BUFFER_OVERRUN:
frame->status = -EOVERFLOW;
+ if (ep_trb != td->last_trb)
+ td->error_mid_td = true;
break;
case COMP_INCOMPATIBLE_DEVICE_ERROR:
case COMP_STALL_ERROR:
--
2.25.1
The last TRB of a isoc TD might not trigger an event if there was
an error event for a TRB mid TD. This is seen on a NEC Corporation
uPD720200 USB 3.0 Host
After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1
generate events for passed TRBs with IOC flag set if it proceeds to the
next TD. This event is either a copy of the original error, or a
"success" transfer event.
If that event is missing then the driver and xHC host get out of sync as
the driver is still expecting a transfer event for that first TD, while
xHC host is already sending events for the next TD in the list.
This leads to
"Transfer event TRB DMA ptr not part of current TD" messages.
As a solution we tag the isoc TDs that get error events mid TD.
If an event doesn't match the first TD, then check if the tag is
set, and event points to the next TD.
In that case give back the fist TD and process the next TD normally
Make sure TD status and transferred length stay valid in both cases
with and without final TD completion event.
Reported-by: Michał Pecio <michal.pecio(a)gmail.com>
Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/
Tested-by: Michał Pecio <michal.pecio(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++-------
drivers/usb/host/xhci.h | 1 +
2 files changed, 61 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 33806ae966f9..41be7d31a36e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2376,6 +2376,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
/* handle completion code */
switch (trb_comp_code) {
case COMP_SUCCESS:
+ /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
+ if (td->error_mid_td)
+ break;
if (remaining) {
frame->status = short_framestatus;
if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
@@ -2401,8 +2404,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
break;
case COMP_USB_TRANSACTION_ERROR:
frame->status = -EPROTO;
+ sum_trbs_for_length = true;
if (ep_trb != td->last_trb)
- return 0;
+ td->error_mid_td = true;
break;
case COMP_STOPPED:
sum_trbs_for_length = true;
@@ -2422,6 +2426,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
break;
}
+ if (td->urb_length_set)
+ goto finish_td;
+
if (sum_trbs_for_length)
frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
ep_trb_len - remaining;
@@ -2430,6 +2437,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
td->urb->actual_length += frame->actual_length;
+finish_td:
+ /* Don't give back TD yet if we encountered an error mid TD */
+ if (td->error_mid_td && ep_trb != td->last_trb) {
+ xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
+ td->urb_length_set = true;
+ return 0;
+ }
+
return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
}
@@ -2808,17 +2823,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
}
if (!ep_seg) {
- if (!ep->skip ||
- !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
- /* Some host controllers give a spurious
- * successful event after a short transfer.
- * Ignore it.
- */
- if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
- ep_ring->last_td_was_short) {
- ep_ring->last_td_was_short = false;
- goto cleanup;
+
+ if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+ skip_isoc_td(xhci, td, ep, status);
+ goto cleanup;
+ }
+
+ /*
+ * Some hosts give a spurious success event after a short
+ * transfer. Ignore it.
+ */
+ if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+ ep_ring->last_td_was_short) {
+ ep_ring->last_td_was_short = false;
+ goto cleanup;
+ }
+
+ /*
+ * xhci 4.10.2 states isoc endpoints should continue
+ * processing the next TD if there was an error mid TD.
+ * So host like NEC don't generate an event for the last
+ * isoc TRB even if the IOC flag is set.
+ * xhci 4.9.1 states that if there are errors in mult-TRB
+ * TDs xHC should generate an error for that TRB, and if xHC
+ * proceeds to the next TD it should genete an event for
+ * any TRB with IOC flag on the way. Other host follow this.
+ * So this event might be for the next TD.
+ */
+ if (td->error_mid_td &&
+ !list_is_last(&td->td_list, &ep_ring->td_list)) {
+ struct xhci_td *td_next = list_next_entry(td, td_list);
+
+ ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
+ td_next->last_trb, ep_trb_dma, false);
+ if (ep_seg) {
+ /* give back previous TD, start handling new */
+ xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
+ ep_ring->dequeue = td->last_trb;
+ ep_ring->deq_seg = td->last_trb_seg;
+ inc_deq(xhci, ep_ring);
+ xhci_td_cleanup(xhci, td, ep_ring, td->status);
+ td = td_next;
}
+ }
+
+ if (!ep_seg) {
/* HC is busted, give up! */
xhci_err(xhci,
"ERROR Transfer event TRB DMA ptr not "
@@ -2830,9 +2879,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
ep_trb_dma, true);
return -ESHUTDOWN;
}
-
- skip_isoc_td(xhci, td, ep, status);
- goto cleanup;
}
if (trb_comp_code == COMP_SHORT_PACKET)
ep_ring->last_td_was_short = true;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index a5c72a634e6a..6f82d404883f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1549,6 +1549,7 @@ struct xhci_td {
struct xhci_segment *bounce_seg;
/* actual_length of the URB has already been set */
bool urb_length_set;
+ bool error_mid_td;
unsigned int num_trbs;
};
--
2.25.1
Hi,
The interface /sys/devices/system/cpu/cpuX/cache is broken (not populated)
if CPUs have different numbers of subleaves in CPUID 4. This is the case
of Intel Meteor Lake.
This is v4 of a patchset to fix the cache sysfs interface by setting the
number of cache leaves independently for each CPU.
v1, v2, and v3 can be found here[1], here[2], and here[3].
All the tests described in [4] passed.
Changes since v3:
* Fixed another NULL-pointer dereference when checking the validity of
the last-level cache info.
* Added the Reviewed-by tags from Radu and Sudeep. Thanks!
* Rebased on v6.7-rc5.
Changes since v2:
* This version uncovered a NULL-pointer dereference in recent changes to
cacheinfo[5]. This dereference is observed when the system does not
configure cacheinfo early during boot nor makes corrections later
during CPU hotplug; as is the case in x86. Patch 1 fixes this issue.
Changes since v1:
* Dave Hansen suggested to use the existing per-CPU ci_cpu_cacheinfo
variable. Now the global variable num_cache_leaves became useless.
* While here, I noticed that init_cache_level() also became useless:
x86 does not need ci_cpu_cacheinfo::num_levels.
Thanks and BR,
Ricardo
[1]. https://lore.kernel.org/lkml/20230314231658.30169-1-ricardo.neri-calderon@l…
[2]. https://lore.kernel.org/all/20230424001956.21434-1-ricardo.neri-calderon@li…
[3]. https://lore.kernel.org/lkml/20230805012421.7002-1-ricardo.neri-calderon@li…
[4]. https://lore.kernel.org/lkml/20230912032350.GA17008@ranerica-svr.sc.intel.c…
[5]. https://lore.kernel.org/all/20230412185759.755408-1-rrendec@redhat.com/
Ricardo Neri (4):
cacheinfo: Check for null last-level cache info
cacheinfo: Allocate memory for memory if not done from the primary CPU
x86/cacheinfo: Delete global num_cache_leaves
x86/cacheinfo: Clean out init_cache_level()
arch/x86/kernel/cpu/cacheinfo.c | 49 +++++++++++++++++----------------
drivers/base/cacheinfo.c | 9 +++++-
2 files changed, 34 insertions(+), 24 deletions(-)
--
2.25.1
In edac_device_register_sysfs_main_kobj(), when dev_root is NULL,
kobject_init_and_add() is not called.
if (err) { // err = -ENODEV
edac_dbg(1, "Failed to register '.../edac/%s'\n",
edac_dev->name);
goto err_kobj_reg; // This calls kobj_put()
}
This will cause a runtime warning in kobject_put() if the above happens.
Warning:
"kobject: '%s' (%p): is not initialized, yet kobject_put() is being called."
Fix the error handling to avoid the above possible situation.
Cc: <stable(a)vger.kernel.org>
Fixes: cb4a0bec0bb9 ("EDAC/sysfs: move to use bus_get_dev_root()")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
---
This is based on static analysis and only compile tested.
v1->v2: Resend as a patchset as they are two similar bugs.
---
drivers/edac/edac_device_sysfs.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 010c26be5846..4cac14cbdb60 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -253,11 +253,13 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
/* register */
dev_root = bus_get_dev_root(edac_subsys);
- if (dev_root) {
- err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
- &dev_root->kobj, "%s", edac_dev->name);
- put_device(dev_root);
- }
+ if (!dev_root)
+ goto module_put;
+
+ err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
+ &dev_root->kobj, "%s", edac_dev->name);
+ put_device(dev_root);
+
if (err) {
edac_dbg(1, "Failed to register '.../edac/%s'\n",
edac_dev->name);
@@ -276,8 +278,8 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
/* Error exit stack */
err_kobj_reg:
kobject_put(&edac_dev->kobj);
+module_put:
module_put(edac_dev->owner);
-
err_out:
return err;
}
--
2.42.0
With the dma conf being reallocated on each call to stmmac_open(), any
information in there is lost, unless we specifically handle it.
The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc
therefore would stop working when link was set down and then back up.
Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open")
Cc: stable(a)vger.kernel.org
Signed-off-by: Esben Haabendal <esben(a)geanix.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b334eb16da23..25519952f754 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev,
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
buf_sz = dma_conf->dma_buf_sz;
+ for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
+ if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN)
+ dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs;
memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));
stmmac_reset_queues_param(priv);
--
2.43.0
Hello Greg,
Hello Filipe,
On Sun, Aug 13, 2023 at 12:34:08PM +0100, fdmanana(a)kernel.org wrote:
> From: Filipe Manana <fdmanana(a)suse.com>
>
> The readdir implementation currently processes always up to the last index
> it finds. This however can result in an infinite loop if the directory has
> a large number of entries such that they won't all fit in the given buffer
> passed to the readdir callback, that is, dir_emit() returns a non-zero
> value. Because in that case readdir() will be called again and if in the
> meanwhile new directory entries were added and we still can't put all the
> remaining entries in the buffer, we keep repeating this over and over.
>
> The following C program and test script reproduce the problem:
This crucial fix successfully landed into vanilla v6.5 [1] and stable
v6.4.12 [2], but unfortunately not into the older stable trees.
Consequently, the fix is missing on the popular Ubuntu versions like
20.04 (KNL v5.15.x) and 22.04.3 (KNL v6.2.x). For that reason, people
still experience infinite loops when building Linux on those systems.
To overcome the issue, people fall back to workarounds [3-4].
The patch seems to apply cleanly to v6.2, but not to v5.15
(v5.15 backport attempt failed miserably).
Is there a chance for:
- Stable maintainers to accept the clean backport to v6.2?
- BTRFS experts to suggest a conflict resolution for v5.15?
[1] https:// git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9b378…
[2] https:// git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=5441532…
[3] https:// android-review.googlesource.com/c/kernel/build/+/2708835
[4] https:// android-review.googlesource.com/c/kernel/build/+/2715296
Best Regards
Eugeniu
From: Filipe Manana <fdmanana(a)suse.com>
[ Upstream commit b4c639f699349880b7918b861e1bd360442ec450 ]
The readdir implementation currently processes always up to the last index
it finds. This however can result in an infinite loop if the directory has
a large number of entries such that they won't all fit in the given buffer
passed to the readdir callback, that is, dir_emit() returns a non-zero
value. Because in that case readdir() will be called again and if in the
meanwhile new directory entries were added and we still can't put all the
remaining entries in the buffer, we keep repeating this over and over.
The following C program and test script reproduce the problem:
$ cat /mnt/readdir_prog.c
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
DIR *dir = opendir(".");
struct dirent *dd;
while ((dd = readdir(dir))) {
printf("%s\n", dd->d_name);
rename(dd->d_name, "TEMPFILE");
rename("TEMPFILE", dd->d_name);
}
closedir(dir);
}
$ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c
$ cat test.sh
#!/bin/bash
DEV=/dev/sdi
MNT=/mnt/sdi
mkfs.btrfs -f $DEV &> /dev/null
#mkfs.xfs -f $DEV &> /dev/null
#mkfs.ext4 -F $DEV &> /dev/null
mount $DEV $MNT
mkdir $MNT/testdir
for ((i = 1; i <= 2000; i++)); do
echo -n > $MNT/testdir/file_$i
done
cd $MNT/testdir
/mnt/readdir_prog
cd /mnt
umount $MNT
This behaviour is surprising to applications and it's unlike ext4, xfs,
tmpfs, vfat and other filesystems, which always finish. In this case where
new entries were added due to renames, some file names may be reported
more than once, but this varies according to each filesystem - for example
ext4 never reported the same file more than once while xfs reports the
first 13 file names twice.
So change our readdir implementation to track the last index number when
opendir() is called and then make readdir() never process beyond that
index number. This gives the same behaviour as ext4.
Reported-by: Rob Landley <rob(a)landley.net>
Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@la…
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681
CC: stable(a)vger.kernel.org # 5.15
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
[ Resolve a conflict due to member changes in 96d89923fa94 ]
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/ctree.h | 1 +
fs/btrfs/delayed-inode.c | 5 +-
fs/btrfs/delayed-inode.h | 1 +
fs/btrfs/inode.c | 131 +++++++++++++++++++++++----------------
4 files changed, 84 insertions(+), 54 deletions(-)
---
Initially I tried to backport all the needed dependency to v5.15, but it
turns out to be a disaster, at least 3 large rework series are needed,
and the deeper I dig, more dependency series came up.
So this version is a manual backport, the conflicts are caused by two
missing dependency:
- 96d89923fa94 ("btrfs: store index number instead of key in struct btrfs_delayed_item")
This changes btrfs_delayed_item::key to index, which saves some space.
Thankfully the new @index member is just the same as @key.offset.
- 3c32c7212f16 ("btrfs: use cached state when looking for delalloc ranges with lseek")
This belongs to the series "[PATCH 0/9] btrfs: more optimizations for
lseek and fiemap", thankfully the conflicting member is only utilized
by the optimizations, we can go without that member.
And if I dig deeper with every conflict, at least the following series
are needed to be backported:
- [PATCH 0/9] btrfs: more optimizations for lseek and fiemap
- [PATCH v2 00/15] btrfs: some updates to delayed items and inode logging
- [PATCH v2 00/16] btrfs: inode creation cleanups and fixes
At this stage, a full backport with all those needed dependency looks
impractical.
Thus a manual backport is created, so far the fstests result looks fine
and generic/736 (the reproducer inspired by the bug report) passes.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1467bf439cb4..7905f178efa3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1361,6 +1361,7 @@ struct btrfs_drop_extents_args {
struct btrfs_file_private {
void *filldir_buf;
+ u64 last_index;
};
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fd951aeaeac5..5a98c5da1225 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1513,6 +1513,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
}
bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ u64 last_index,
struct list_head *ins_list,
struct list_head *del_list)
{
@@ -1532,14 +1533,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
- while (item) {
+ while (item && item->key.offset <= last_index) {
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, ins_list);
item = __btrfs_next_delayed_item(item);
}
item = __btrfs_first_delayed_deletion_item(delayed_node);
- while (item) {
+ while (item && item->key.offset <= last_index) {
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, del_list);
item = __btrfs_next_delayed_item(item);
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index b2412160c5bc..a9cfce856d2e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -123,6 +123,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
/* Used for readdir() */
bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ u64 last_index,
struct list_head *ins_list,
struct list_head *del_list);
void btrfs_readdir_put_delayed_items(struct inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95af29634e55..1df374ce829b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6121,6 +6121,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
return d_splice_alias(inode, dentry);
}
+/*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key, found_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ /* FIXME: we should be able to handle this */
+ if (ret == 0)
+ goto out;
+ ret = 0;
+
+ if (path->slots[0] == 0) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ path->slots[0]--;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != btrfs_ino(inode) ||
+ found_key.type != BTRFS_DIR_INDEX_KEY) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ inode->index_cnt = found_key.offset + 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+ if (dir->index_cnt == (u64)-1) {
+ int ret;
+
+ ret = btrfs_inode_delayed_dir_index_count(dir);
+ if (ret) {
+ ret = btrfs_set_inode_index_count(dir);
+ if (ret)
+ return ret;
+ }
+ }
+
+ *index = dir->index_cnt;
+
+ return 0;
+}
+
/*
* All this infrastructure exists because dir_emit can fault, and we are holding
* the tree lock when doing readdir. For now just allocate a buffer and copy
@@ -6133,10 +6201,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
static int btrfs_opendir(struct inode *inode, struct file *file)
{
struct btrfs_file_private *private;
+ u64 last_index;
+ int ret;
+
+ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+ if (ret)
+ return ret;
private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
if (!private)
return -ENOMEM;
+ private->last_index = last_index;
private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!private->filldir_buf) {
kfree(private);
@@ -6205,7 +6280,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+ &ins_list, &del_list);
again:
key.type = BTRFS_DIR_INDEX_KEY;
@@ -6238,6 +6314,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
break;
if (found_key.offset < ctx->pos)
goto next;
+ if (found_key.offset > private->last_index)
+ break;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
@@ -6371,57 +6449,6 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
return dirty ? btrfs_dirty_inode(inode) : 0;
}
-/*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_key key, found_key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret;
-
- key.objectid = btrfs_ino(inode);
- key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- /* FIXME: we should be able to handle this */
- if (ret == 0)
- goto out;
- ret = 0;
-
- if (path->slots[0] == 0) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- path->slots[0]--;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != btrfs_ino(inode) ||
- found_key.type != BTRFS_DIR_INDEX_KEY) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- inode->index_cnt = found_key.offset + 1;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
/*
* helper to find a free sequence number in a given directory. This current
* code is very simple, later versions will do smarter things in the btree
--
2.43.0
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 9a574ea9069be30b835a3da772c039993c43369b
Gitweb: https://git.kernel.org/tip/9a574ea9069be30b835a3da772c039993c43369b
Author: Tim Chen <tim.c.chen(a)linux.intel.com>
AuthorDate: Mon, 22 Jan 2024 15:35:34 -08:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 25 Jan 2024 09:52:40 +01:00
tick/sched: Preserve number of idle sleeps across CPU hotplug events
Commit 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs
CPU hotplug") preserved total idle sleep time and iowait sleeptime across
CPU hotplug events.
Similar reasoning applies to the number of idle calls and idle sleeps to
get the proper average of sleep time per idle invocation.
Preserve those fields too.
Fixes: 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug")
Signed-off-by: Tim Chen <tim.c.chen(a)linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240122233534.3094238-1-tim.c.chen@linux.intel.c…
---
kernel/time/tick-sched.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d250167..01fb50c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1577,6 +1577,7 @@ void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t idle_sleeptime, iowait_sleeptime;
+ unsigned long idle_calls, idle_sleeps;
# ifdef CONFIG_HIGH_RES_TIMERS
if (ts->sched_timer.base)
@@ -1585,9 +1586,13 @@ void tick_cancel_sched_timer(int cpu)
idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
+ idle_calls = ts->idle_calls;
+ idle_sleeps = ts->idle_sleeps;
memset(ts, 0, sizeof(*ts));
ts->idle_sleeptime = idle_sleeptime;
ts->iowait_sleeptime = iowait_sleeptime;
+ ts->idle_calls = idle_calls;
+ ts->idle_sleeps = idle_sleeps;
}
#endif
From: Johannes Berg <johannes.berg(a)intel.com>
When a wiphy work is queued with timer, and then again
without a delay, it's started immediately but *also*
started again after the timer expires. This can lead,
for example, to warnings in mac80211's offchannel code
as reported by Jouni. Running the same work twice isn't
expected, of course. Fix this by deleting the timer at
this point, when queuing immediately due to delay=0.
Reported-by: Jouni Malinen <j(a)w1.fi>
Cc: stable(a)vger.kernel.org
Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics")
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
---
net/wireless/core.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 409d74c57ca0..3fb1b637352a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
--
2.43.0
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 644649553508b9bacf0fc7a5bdc4f9e0165576a5
Gitweb: https://git.kernel.org/tip/644649553508b9bacf0fc7a5bdc4f9e0165576a5
Author: Jiri Wiesner <jwiesner(a)suse.de>
AuthorDate: Mon, 22 Jan 2024 18:23:50 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 25 Jan 2024 09:13:16 +01:00
clocksource: Skip watchdog check for large watchdog intervals
There have been reports of the watchdog marking clocksources unstable on
machines with 8 NUMA nodes:
clocksource: timekeeping watchdog on CPU373:
Marking clocksource 'tsc' as unstable because the skew is too large:
clocksource: 'hpet' wd_nsec: 14523447520
clocksource: 'tsc' cs_nsec: 14524115132
The measured clocksource skew - the absolute difference between cs_nsec
and wd_nsec - was 668 microseconds:
cs_nsec - wd_nsec = 14524115132 - 14523447520 = 667612
The kernel used 200 microseconds for the uncertainty_margin of both the
clocksource and watchdog, resulting in a threshold of 400 microseconds (the
md variable). Both the cs_nsec and the wd_nsec value indicate that the
readout interval was circa 14.5 seconds. The observed behaviour is that
watchdog checks failed for large readout intervals on 8 NUMA node
machines. This indicates that the size of the skew was directly proportinal
to the length of the readout interval on those machines. The measured
clocksource skew, 668 microseconds, was evaluated against a threshold (the
md variable) that is suited for readout intervals of roughly
WATCHDOG_INTERVAL, i.e. HZ >> 1, which is 0.5 second.
The intention of 2e27e793e280 ("clocksource: Reduce clocksource-skew
threshold") was to tighten the threshold for evaluating skew and set the
lower bound for the uncertainty_margin of clocksources to twice
WATCHDOG_MAX_SKEW. Later in c37e85c135ce ("clocksource: Loosen clocksource
watchdog constraints"), the WATCHDOG_MAX_SKEW constant was increased to
125 microseconds to fit the limit of NTP, which is able to use a
clocksource that suffers from up to 500 microseconds of skew per second.
Both the TSC and the HPET use default uncertainty_margin. When the
readout interval gets stretched the default uncertainty_margin is no
longer a suitable lower bound for evaluating skew - it imposes a limit
that is far stricter than the skew with which NTP can deal.
The root causes of the skew being directly proportinal to the length of
the readout interval are:
* the inaccuracy of the shift/mult pairs of clocksources and the watchdog
* the conversion to nanoseconds is imprecise for large readout intervals
Prevent this by skipping the current watchdog check if the readout
interval exceeds 2 * WATCHDOG_INTERVAL. Considering the maximum readout
interval of 2 * WATCHDOG_INTERVAL, the current default uncertainty margin
(of the TSC and HPET) corresponds to a limit on clocksource skew of 250
ppm (microseconds of skew per second). To keep the limit imposed by NTP
(500 microseconds of skew per second) for all possible readout intervals,
the margins would have to be scaled so that the threshold value is
proportional to the length of the actual readout interval.
As for why the readout interval may get stretched: Since the watchdog is
executed in softirq context the expiration of the watchdog timer can get
severely delayed on account of a ksoftirqd thread not getting to run in a
timely manner. Surely, a system with such belated softirq execution is not
working well and the scheduling issue should be looked into but the
clocksource watchdog should be able to deal with it accordingly.
Fixes: 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold")
Suggested-by: Feng Tang <feng.tang(a)intel.com>
Signed-off-by: Jiri Wiesner <jwiesner(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Paul E. McKenney <paulmck(a)kernel.org>
Reviewed-by: Feng Tang <feng.tang(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240122172350.GA740@incl
---
kernel/time/clocksource.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c108ed8..3052b1f 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -99,6 +99,7 @@ static u64 suspend_start;
* Interval: 0.5sec.
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
+#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ))
/*
* Threshold: 0.0312s, when doubled: 0.0625s.
@@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
+static int64_t watchdog_max_interval;
static inline void clocksource_watchdog_lock(unsigned long *flags)
{
@@ -399,8 +401,8 @@ static inline void clocksource_reset_watchdog(void)
static void clocksource_watchdog(struct timer_list *unused)
{
u64 csnow, wdnow, cslast, wdlast, delta;
+ int64_t wd_nsec, cs_nsec, interval;
int next_cpu, reset_pending;
- int64_t wd_nsec, cs_nsec;
struct clocksource *cs;
enum wd_read_status read_ret;
unsigned long extra_wait = 0;
@@ -470,6 +472,27 @@ static void clocksource_watchdog(struct timer_list *unused)
if (atomic_read(&watchdog_reset_pending))
continue;
+ /*
+ * The processing of timer softirqs can get delayed (usually
+ * on account of ksoftirqd not getting to run in a timely
+ * manner), which causes the watchdog interval to stretch.
+ * Skew detection may fail for longer watchdog intervals
+ * on account of fixed margins being used.
+ * Some clocksources, e.g. acpi_pm, cannot tolerate
+ * watchdog intervals longer than a few seconds.
+ */
+ interval = max(cs_nsec, wd_nsec);
+ if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) {
+ if (system_state > SYSTEM_SCHEDULING &&
+ interval > 2 * watchdog_max_interval) {
+ watchdog_max_interval = interval;
+ pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n",
+ cs_nsec, wd_nsec);
+ }
+ watchdog_timer.expires = jiffies;
+ continue;
+ }
+
/* Check the deviation from the watchdog clocksource. */
md = cs->uncertainty_margin + watchdog->uncertainty_margin;
if (abs(cs_nsec - wd_nsec) > md) {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 07c30ea5861fb26a77dade8cdc787252f6122fb1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012204-tattered-oxidation-ac0c@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
07c30ea5861f ("serial: Do not hold the port lock when setting rx-during-tx GPIO")
fe2017ba24f3 ("Merge 6.6-rc6 into tty-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 07c30ea5861fb26a77dade8cdc787252f6122fb1 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Date: Wed, 3 Jan 2024 07:18:12 +0100
Subject: [PATCH] serial: Do not hold the port lock when setting rx-during-tx
GPIO
Both the imx and stm32 driver set the rx-during-tx GPIO in rs485_config().
Since this function is called with the port lock held, this can be a
problem in case that setting the GPIO line can sleep (e.g. if a GPIO
expander is used which is connected via SPI or I2C).
Avoid this issue by moving the GPIO setting outside of the port lock into
the serial core and thus making it a generic feature.
Also with commit c54d48543689 ("serial: stm32: Add support for rs485
RX_DURING_TX output GPIO") the SER_RS485_RX_DURING_TX flag is only set if a
rx-during-tx GPIO is _not_ available, which is wrong. Fix this, too.
Furthermore reset old GPIO settings in case that changing the RS485
configuration failed.
Fixes: c54d48543689 ("serial: stm32: Add support for rs485 RX_DURING_TX output GPIO")
Fixes: ca530cfa968c ("serial: imx: Add support for RS485 RX_DURING_TX output GPIO")
Cc: Shawn Guo <shawnguo(a)kernel.org>
Cc: Sascha Hauer <s.hauer(a)pengutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Link: https://lore.kernel.org/r/20240103061818.564-2-l.sanfilippo@kunbus.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index a0fcf18cbeac..8b7d9c5a7455 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -1948,10 +1948,6 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio
rs485conf->flags & SER_RS485_RX_DURING_TX)
imx_uart_start_rx(port);
- if (port->rs485_rx_during_tx_gpio)
- gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio,
- !!(rs485conf->flags & SER_RS485_RX_DURING_TX));
-
return 0;
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index cc866da50b81..8d381c283cec 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1407,6 +1407,16 @@ static void uart_set_rs485_termination(struct uart_port *port,
!!(rs485->flags & SER_RS485_TERMINATE_BUS));
}
+static void uart_set_rs485_rx_during_tx(struct uart_port *port,
+ const struct serial_rs485 *rs485)
+{
+ if (!(rs485->flags & SER_RS485_ENABLED))
+ return;
+
+ gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio,
+ !!(rs485->flags & SER_RS485_RX_DURING_TX));
+}
+
static int uart_rs485_config(struct uart_port *port)
{
struct serial_rs485 *rs485 = &port->rs485;
@@ -1418,12 +1428,17 @@ static int uart_rs485_config(struct uart_port *port)
uart_sanitize_serial_rs485(port, rs485);
uart_set_rs485_termination(port, rs485);
+ uart_set_rs485_rx_during_tx(port, rs485);
uart_port_lock_irqsave(port, &flags);
ret = port->rs485_config(port, NULL, rs485);
uart_port_unlock_irqrestore(port, flags);
- if (ret)
+ if (ret) {
memset(rs485, 0, sizeof(*rs485));
+ /* unset GPIOs */
+ gpiod_set_value_cansleep(port->rs485_term_gpio, 0);
+ gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, 0);
+ }
return ret;
}
@@ -1462,6 +1477,7 @@ static int uart_set_rs485_config(struct tty_struct *tty, struct uart_port *port,
return ret;
uart_sanitize_serial_rs485(port, &rs485);
uart_set_rs485_termination(port, &rs485);
+ uart_set_rs485_rx_during_tx(port, &rs485);
uart_port_lock_irqsave(port, &flags);
ret = port->rs485_config(port, &tty->termios, &rs485);
@@ -1473,8 +1489,14 @@ static int uart_set_rs485_config(struct tty_struct *tty, struct uart_port *port,
port->ops->set_mctrl(port, port->mctrl);
}
uart_port_unlock_irqrestore(port, flags);
- if (ret)
+ if (ret) {
+ /* restore old GPIO settings */
+ gpiod_set_value_cansleep(port->rs485_term_gpio,
+ !!(port->rs485.flags & SER_RS485_TERMINATE_BUS));
+ gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio,
+ !!(port->rs485.flags & SER_RS485_RX_DURING_TX));
return ret;
+ }
if (copy_to_user(rs485_user, &port->rs485, sizeof(port->rs485)))
return -EFAULT;
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 9781c143def2..794b77512740 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -226,12 +226,6 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter
stm32_usart_clr_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit));
- if (port->rs485_rx_during_tx_gpio)
- gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio,
- !!(rs485conf->flags & SER_RS485_RX_DURING_TX));
- else
- rs485conf->flags |= SER_RS485_RX_DURING_TX;
-
if (rs485conf->flags & SER_RS485_ENABLED) {
cr1 = readl_relaxed(port->membase + ofs->cr1);
cr3 = readl_relaxed(port->membase + ofs->cr3);
@@ -256,6 +250,8 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter
writel_relaxed(cr3, port->membase + ofs->cr3);
writel_relaxed(cr1, port->membase + ofs->cr1);
+
+ rs485conf->flags |= SER_RS485_RX_DURING_TX;
} else {
stm32_usart_clr_bits(port, ofs->cr3,
USART_CR3_DEM | USART_CR3_DEP);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1a33e33ca0e80d485458410f149265cdc0178cfa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012243-scorch-bundle-08dd@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
1a33e33ca0e8 ("serial: core: set missing supported flag for RX during TX GPIO")
7cda0b9eb6eb ("serial: core: Simplify uart_get_rs485_mode()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a33e33ca0e80d485458410f149265cdc0178cfa Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Date: Wed, 3 Jan 2024 07:18:13 +0100
Subject: [PATCH] serial: core: set missing supported flag for RX during TX
GPIO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If the RS485 feature RX-during-TX is supported by means of a GPIO set the
according supported flag. Otherwise setting this feature from userspace may
not be possible, since in uart_sanitize_serial_rs485() the passed RS485
configuration is matched against the supported features and unsupported
settings are thereby removed and thus take no effect.
Cc: <stable(a)vger.kernel.org>
Fixes: 163f080eb717 ("serial: core: Add option to output RS485 RX_DURING_TX state via GPIO")
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Link: https://lore.kernel.org/r/20240103061818.564-3-l.sanfilippo@kunbus.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 8d381c283cec..850f24cc53e5 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3649,6 +3649,8 @@ int uart_get_rs485_mode(struct uart_port *port)
if (IS_ERR(desc))
return dev_err_probe(dev, PTR_ERR(desc), "Cannot get rs485-rx-during-tx-gpios\n");
port->rs485_rx_during_tx_gpio = desc;
+ if (port->rs485_rx_during_tx_gpio)
+ port->rs485_supported.flags |= SER_RS485_RX_DURING_TX;
return 0;
}
Page cache reads are lockless, so setting the freshly allocated page
uptodate before we've overwritten it with the data it's supposed to have
in it will allow a simultaneous reader to see old data. Move the call
to SetPageUptodate into ubifs_write_end(), which is after we copied the
new data into the page.
Fixes: 1e51764a3c2a ("UBIFS: add new flash file system")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
---
fs/ubifs/file.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5029eb3390a5..d0694b83dd02 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -261,9 +261,6 @@ static int write_begin_slow(struct address_space *mapping,
return err;
}
}
-
- SetPageUptodate(page);
- ClearPageError(page);
}
if (PagePrivate(page))
@@ -463,9 +460,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return err;
}
}
-
- SetPageUptodate(page);
- ClearPageError(page);
}
err = allocate_budget(c, page, ui, appending);
@@ -475,10 +469,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* If we skipped reading the page because we were going to
* write all of it, then it is not up to date.
*/
- if (skipped_read) {
+ if (skipped_read)
ClearPageChecked(page);
- ClearPageUptodate(page);
- }
/*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
@@ -569,6 +561,9 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
goto out;
}
+ if (len == PAGE_SIZE)
+ SetPageUptodate(page);
+
if (!PagePrivate(page)) {
attach_page_private(page, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
--
2.43.0
For !CONFIG_SPARSE_IRQ kernel, early_irq_init() is supposed to
initialize all the desc entries in system, desc->resend_node
included.
Thus, initialize desc->resend_node for all irq_desc entries, rather
than irq_desc[0] only, which is the current implementation is about.
Fixes: bc06a9e08742 ("genirq: Use hlist for managing resend handlers")
Cc: stable(a)vger.kernel.org
Acked-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Dawei Li <dawei.li(a)shingroup.cn>
---
kernel/irq/irqdesc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 27ca1c866f29..371eb1711d34 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -600,7 +600,7 @@ int __init early_irq_init(void)
mutex_init(&desc[i].request_mutex);
init_waitqueue_head(&desc[i].wait_for_threads);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
- irq_resend_init(desc);
+ irq_resend_init(&desc[i]);
}
return arch_early_irq_init();
}
--
2.27.0
When there is no device on the bus for a given address, the pull up
resistor on the data line results in the read returning 0xffff. The
phylib core code understands this when scanning for devices on the
bus, and a number of MDIO bus masters make use of this as a way to
indicate they cannot perform the read.
Make us of this as a minimal fix for stable where the mv88e6xxx
returns EOPNOTSUPP when the hardware does not support C45, but phylib
interprets this as a fatal error, which it should not be.
Cc: stable(a)vger.kernel.org
Reported-by: Tim Menninger <tmenninger(a)purestorage.com>
Fixes: 1a136ca2e089 ("net: mdio: scan bus based on bus capabilities for C22 and C45")
Fixes: da099a7fb13d ("net: phy: Remove probe_capabilities")
Signed-off-by: Andrew Lunn <andrew(a)lunn.ch>
---
drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 383b3c4d6f59..614cabb5c1b0 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad,
int err;
if (!chip->info->ops->phy_read_c45)
- return -EOPNOTSUPP;
+ return 0xffff;
mv88e6xxx_reg_lock(chip);
err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val);
--
2.43.0
With the dma conf being reallocated on each call to stmmac_open(), any
information in there is lost, unless we specifically handle it.
The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc
therefore would stop working when link was set down and then back up.
Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open")
Cc: stable(a)vger.kernel.org
Signed-off-by: Esben Haabendal <esben(a)geanix.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a0e46369ae15..691bf3ef5e30 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev,
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
buf_sz = dma_conf->dma_buf_sz;
+ for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
+ if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN)
+ dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs;
memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));
stmmac_reset_queues_param(priv);
--
2.43.0
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 56062d60f117dccfb5281869e0ab61e090baf864
Gitweb: https://git.kernel.org/tip/56062d60f117dccfb5281869e0ab61e090baf864
Author: Richard Palethorpe <rpalethorpe(a)suse.com>
AuthorDate: Wed, 10 Jan 2024 15:01:22 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Wed, 24 Jan 2024 11:49:19 +01:00
x86/entry/ia32: Ensure s32 is sign extended to s64
Presently ia32 registers stored in ptregs are unconditionally cast to
unsigned int by the ia32 stub. They are then cast to long when passed to
__se_sys*, but will not be sign extended.
This takes the sign of the syscall argument into account in the ia32
stub. It still casts to unsigned int to avoid implementation specific
behavior. However then casts to int or unsigned int as necessary. So that
the following cast to long sign extends the value.
This fixes the io_pgetevents02 LTP test when compiled with -m32. Presently
the systemcall io_pgetevents_time64() unexpectedly accepts -1 for the
maximum number of events.
It doesn't appear other systemcalls with signed arguments are effected
because they all have compat variants defined and wired up.
Fixes: ebeb8c82ffaf ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32")
Suggested-by: Arnd Bergmann <arnd(a)arndb.de>
Signed-off-by: Richard Palethorpe <rpalethorpe(a)suse.com>
Signed-off-by: Nikolay Borisov <nik.borisov(a)suse.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Arnd Bergmann <arnd(a)arndb.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240110130122.3836513-1-nik.borisov@suse.com
Link: https://lore.kernel.org/ltp/20210921130127.24131-1-rpalethorpe@suse.com/
---
arch/x86/include/asm/syscall_wrapper.h | 25 +++++++++++++++++++++----
include/linux/syscalls.h | 1 +-
2 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 21f9407..7e88705 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
,,regs->di,,regs->si,,regs->dx \
,,regs->r10,,regs->r8,,regs->r9) \
+
+/* SYSCALL_PT_ARGS is Adapted from s390x */
+#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \
+ SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp))
+#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \
+ SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di))
+#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \
+ SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si))
+#define SYSCALL_PT_ARG3(m, t1, t2, t3) \
+ SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx))
+#define SYSCALL_PT_ARG2(m, t1, t2) \
+ SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx))
+#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx))
+#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+
+#define __SC_COMPAT_CAST(t, a) \
+ (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \
+ (unsigned int)a
+
/* Mapping of registers to parameters for syscalls on i386 */
#define SC_IA32_REGS_TO_ARGS(x, ...) \
- __MAP(x,__SC_ARGS \
- ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \
- ,,(unsigned int)regs->dx,,(unsigned int)regs->si \
- ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+ SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \
+ __MAP(x, __SC_TYPE, __VA_ARGS__)) \
#define __SYS_STUB0(abi, name) \
long __##abi##_##name(const struct pt_regs *regs); \
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cdba4d0..77eb9b0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -128,6 +128,7 @@ struct mnt_id_req;
#define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL))
#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a
#define __SC_CAST(t, a) (__force t) a
+#define __SC_TYPE(t, a) t
#define __SC_ARGS(t, a) a
#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long))
The patch titled
Subject: exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Oleg Nesterov <oleg(a)redhat.com>
Subject: exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)
Date: Tue, 23 Jan 2024 16:34:00 +0100
After the recent changes nobody use siglock to read the values protected
by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and
update the comment.
With this patch only __exit_signal() and thread_group_start_cputime() take
stats_lock under siglock.
Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Dylan Hatch <dylanbhatch(a)google.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/exit.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
--- a/kernel/exit.c~exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock
+++ a/kernel/exit.c
@@ -1127,17 +1127,14 @@ static int wait_task_zombie(struct wait_
* and nobody can change them.
*
* psig->stats_lock also protects us from our sub-threads
- * which can reap other children at the same time. Until
- * we change k_getrusage()-like users to rely on this lock
- * we have to take ->siglock as well.
+ * which can reap other children at the same time.
*
* We use thread_group_cputime_adjusted() to get times for
* the thread group, which consolidates times for all threads
* in the group including the group leader.
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
- spin_lock_irq(¤t->sighand->siglock);
- write_seqlock(&psig->stats_lock);
+ write_seqlock_irq(&psig->stats_lock);
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1160,8 +1157,7 @@ static int wait_task_zombie(struct wait_
psig->cmaxrss = maxrss;
task_io_accounting_add(&psig->ioac, &p->ioac);
task_io_accounting_add(&psig->ioac, &sig->ioac);
- write_sequnlock(&psig->stats_lock);
- spin_unlock_irq(¤t->sighand->siglock);
+ write_sequnlock_irq(&psig->stats_lock);
}
if (wo->wo_rusage)
_
Patches currently in -mm which might be from oleg(a)redhat.com are
getrusage-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
getrusage-use-sig-stats_lock-rather-than-lock_task_sighand.patch
fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch
exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock.patch
ptrace_attach-shift-sendsigstop-into-ptrace_set_stopped.patch
The patch titled
Subject: fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Oleg Nesterov <oleg(a)redhat.com>
Subject: fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand()
Date: Tue, 23 Jan 2024 16:33:55 +0100
Patch series "fs/proc: do_task_stat: use sig->stats_".
do_task_stat() has the same problem as getrusage() had before "getrusage:
use sig->stats_lock rather than lock_task_sighand()": a hard lockup. If
NR_CPUS threads call lock_task_sighand() at the same time and the process
has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS *
NR_THREADS) time.
This patch (of 3):
thread_group_cputime() does its own locking, we can safely shift
thread_group_cputime_adjusted() which does another for_each_thread loop
outside of ->siglock protected section.
Not only this removes for_each_thread() from the critical section with
irqs disabled, this removes another case when stats_lock is taken with
siglock held. We want to remove this dependency, then we can change the
users of stats_lock to not disable irqs.
Link: https://lkml.kernel.org/r/20240123153313.GA21832@redhat.com
Link: https://lkml.kernel.org/r/20240123153355.GA21854@redhat.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Dylan Hatch <dylanbhatch(a)google.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/array.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/fs/proc/array.c~fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand
+++ a/fs/proc/array.c
@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = 0;
+ cutime = cstime = 0;
cgtime = gtime = 0;
if (lock_task_sighand(task, &flags)) {
@@ -546,7 +546,6 @@ static int do_task_stat(struct seq_file
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
@@ -562,10 +561,13 @@ static int do_task_stat(struct seq_file
if (permitted && (!whole || num_threads < 2))
wchan = !task_is_running(task);
- if (!whole) {
+
+ if (whole) {
+ thread_group_cputime_adjusted(task, &utime, &stime);
+ } else {
+ task_cputime_adjusted(task, &utime, &stime);
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
}
_
Patches currently in -mm which might be from oleg(a)redhat.com are
getrusage-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
getrusage-use-sig-stats_lock-rather-than-lock_task_sighand.patch
fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch
exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock.patch
ptrace_attach-shift-sendsigstop-into-ptrace_set_stopped.patch
The patch titled
Subject: fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Oleg Nesterov <oleg(a)redhat.com>
Subject: fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats
Date: Tue, 23 Jan 2024 16:33:57 +0100
lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
do_task_stat() at the same time and the process has NR_THREADS, it will
spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
Change do_task_stat() to use sig->stats_lock to gather the statistics
outside of ->siglock protected section, in the likely case this code will
run lockless.
Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Dylan Hatch <dylanbhatch(a)google.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/array.c | 58 +++++++++++++++++++++++++---------------------
1 file changed, 32 insertions(+), 26 deletions(-)
--- a/fs/proc/array.c~fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats
+++ a/fs/proc/array.c
@@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file
int permitted;
struct mm_struct *mm;
unsigned long long start_time;
- unsigned long cmin_flt = 0, cmaj_flt = 0;
- unsigned long min_flt = 0, maj_flt = 0;
- u64 cutime, cstime, utime, stime;
- u64 cgtime, gtime;
+ unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
+ u64 cutime, cstime, cgtime, utime, stime, gtime;
unsigned long rsslim = 0;
unsigned long flags;
int exit_code = task->exit_code;
+ struct signal_struct *sig = task->signal;
+ unsigned int seq = 1;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = 0;
- cgtime = gtime = 0;
if (lock_task_sighand(task, &flags)) {
- struct signal_struct *sig = task->signal;
-
if (sig->tty) {
struct pid *pgrp = tty_get_pgrp(sig->tty);
tty_pgrp = pid_nr_ns(pgrp, ns);
@@ -527,27 +523,9 @@ static int do_task_stat(struct seq_file
num_threads = get_nr_threads(task);
collect_sigign_sigcatch(task, &sigign, &sigcatch);
- cmin_flt = sig->cmin_flt;
- cmaj_flt = sig->cmaj_flt;
- cutime = sig->cutime;
- cstime = sig->cstime;
- cgtime = sig->cgtime;
rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
- /* add up live thread stats at the group level */
if (whole) {
- struct task_struct *t;
-
- __for_each_thread(sig, t) {
- min_flt += t->min_flt;
- maj_flt += t->maj_flt;
- gtime += task_gtime(t);
- }
-
- min_flt += sig->min_flt;
- maj_flt += sig->maj_flt;
- gtime += sig->gtime;
-
if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
exit_code = sig->group_exit_code;
}
@@ -562,6 +540,34 @@ static int do_task_stat(struct seq_file
if (permitted && (!whole || num_threads < 2))
wchan = !task_is_running(task);
+ do {
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+
+ cmin_flt = sig->cmin_flt;
+ cmaj_flt = sig->cmaj_flt;
+ cutime = sig->cutime;
+ cstime = sig->cstime;
+ cgtime = sig->cgtime;
+
+ if (whole) {
+ struct task_struct *t;
+
+ min_flt = sig->min_flt;
+ maj_flt = sig->maj_flt;
+ gtime = sig->gtime;
+
+ rcu_read_lock();
+ __for_each_thread(sig, t) {
+ min_flt += t->min_flt;
+ maj_flt += t->maj_flt;
+ gtime += task_gtime(t);
+ }
+ rcu_read_unlock();
+ }
+ } while (need_seqretry(&sig->stats_lock, seq));
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+
if (whole) {
thread_group_cputime_adjusted(task, &utime, &stime);
} else {
_
Patches currently in -mm which might be from oleg(a)redhat.com are
getrusage-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
getrusage-use-sig-stats_lock-rather-than-lock_task_sighand.patch
fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch
exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock.patch
ptrace_attach-shift-sendsigstop-into-ptrace_set_stopped.patch
The patch titled
Subject: mm: thp_get_unmapped_area must honour topdown preference
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-thp_get_unmapped_area-must-honour-topdown-preference.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: thp_get_unmapped_area must honour topdown preference
Date: Tue, 23 Jan 2024 17:14:20 +0000
The addition of commit efa7df3e3bb5 ("mm: align larger anonymous mappings
on THP boundaries") caused the "virtual_address_range" mm selftest to
start failing on arm64. Let's fix that regression.
There were 2 visible problems when running the test; 1) it takes much
longer to execute, and 2) the test fails. Both are related:
The (first part of the) test allocates as many 1GB anonymous blocks as it
can in the low 256TB of address space, passing NULL as the addr hint to
mmap. Before the faulty patch, all allocations were abutted and contained
in a single, merged VMA. However, after this patch, each allocation is in
its own VMA, and there is a 2M gap between each VMA. This causes the 2
problems in the test: 1) mmap becomes MUCH slower because there are so
many VMAs to check to find a new 1G gap. 2) mmap fails once it hits the
VMA limit (/proc/sys/vm/max_map_count). Hitting this limit then causes a
subsequent calloc() to fail, which causes the test to fail.
The problem is that arm64 (unlike x86) selects
ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. But __thp_get_unmapped_area()
allocates len+2M then always aligns to the bottom of the discovered gap.
That causes the 2M hole.
Fix this by detecting cases where we can still achive the alignment goal
when moved to the top of the allocated area, if configured to prefer
top-down allocation.
While we are at it, fix thp_get_unmapped_area's use of pgoff, which should
always be zero for anonymous mappings. Prior to the faulty change, while
it was possible for user space to pass in pgoff!=0, the old
mm->get_unmapped_area() handler would not use it. thp_get_unmapped_area()
does use it, so let's explicitly zero it before calling the handler. This
should also be the correct behavior for arches that define their own
get_unmapped_area() handler.
Link: https://lkml.kernel.org/r/20240123171420.3970220-1-ryan.roberts@arm.com
Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
Closes: https://lore.kernel.org/linux-mm/1e8f5ac7-54ce-433a-ae53-81522b2320e1@arm.c…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 10 ++++++++--
mm/mmap.c | 6 ++++--
2 files changed, 12 insertions(+), 4 deletions(-)
--- a/mm/huge_memory.c~mm-thp_get_unmapped_area-must-honour-topdown-preference
+++ a/mm/huge_memory.c
@@ -810,7 +810,7 @@ static unsigned long __thp_get_unmapped_
{
loff_t off_end = off + len;
loff_t off_align = round_up(off, size);
- unsigned long len_pad, ret;
+ unsigned long len_pad, ret, off_sub;
if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
return 0;
@@ -839,7 +839,13 @@ static unsigned long __thp_get_unmapped_
if (ret == addr)
return addr;
- ret += (off - ret) & (size - 1);
+ off_sub = (off - ret) & (size - 1);
+
+ if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown &&
+ !off_sub)
+ return ret + size;
+
+ ret += off_sub;
return ret;
}
--- a/mm/mmap.c~mm-thp_get_unmapped_area-must-honour-topdown-preference
+++ a/mm/mmap.c
@@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, uns
/*
* mmap_region() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge.
- * do_mmap() will clear pgoff, so match alignment.
*/
- pgoff = 0;
get_area = shmem_get_unmapped_area;
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
/* Ensures that larger anonymous mappings are THP aligned. */
get_area = thp_get_unmapped_area;
}
+ /* Always treat pgoff as zero for anonymous memory. */
+ if (!file)
+ pgoff = 0;
+
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
selftests-mm-ksm_tests-should-only-madv_hugepage-valid-memory.patch
mm-thp_get_unmapped_area-must-honour-topdown-preference.patch
tools-mm-add-thpmaps-script-to-dump-thp-usage-info.patch