Hello Greg,
This patch applies cleanly to the v5.4 kernel.
dmaengine: ti: edma: Add some null pointer checks to the edma_probe
[upstream commit 6e2276203ac9ff10fc76917ec9813c660f627369]
Nouveau currently relies on the assumption that dma_fences will only
ever get signalled through nouveau_fence_signal(), which takes care of
removing a signalled fence from the list nouveau_fence_chan.pending.
This self-imposed rule is violated in nouveau_fence_done(), where
dma_fence_is_signaled() can signal the fence without removing it from
the list. This enables accesses to already signalled fences through the
list, which is a bug.
Furthermore, it must always be possible to use standard dma_fence
methods an a dma_fence and observe valid behavior. The canonical way of
ensuring that signalling a fence has additional effects is to add those
effects to a callback and register it on that fence.
Move the code from nouveau_fence_signal() into a dma_fence callback.
Register that callback when creating the fence.
Cc: <stable(a)vger.kernel.org> # 4.10+
Signed-off-by: Philipp Stanner <phasta(a)kernel.org>
---
Changes in v2:
- Remove Fixes: tag. (Danilo)
- Remove integer "drop" and call nvif_event_block() in the fence
callback. (Danilo)
---
drivers/gpu/drm/nouveau/nouveau_fence.c | 52 +++++++++++++------------
drivers/gpu/drm/nouveau/nouveau_fence.h | 1 +
2 files changed, 29 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 7cc84472cece..cf510ef9641a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -50,24 +50,24 @@ nouveau_fctx(struct nouveau_fence *fence)
return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
}
-static int
-nouveau_fence_signal(struct nouveau_fence *fence)
+static void
+nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct dma_fence_cb *cb)
{
- int drop = 0;
+ struct nouveau_fence_chan *fctx;
+ struct nouveau_fence *fence;
+
+ fence = container_of(dfence, struct nouveau_fence, base);
+ fctx = nouveau_fctx(fence);
- dma_fence_signal_locked(&fence->base);
list_del(&fence->head);
rcu_assign_pointer(fence->channel, NULL);
if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
- struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
-
if (!--fctx->notify_ref)
- drop = 1;
+ nvif_event_block(&fctx->event);
}
dma_fence_put(&fence->base);
- return drop;
}
static struct nouveau_fence *
@@ -93,8 +93,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
if (error)
dma_fence_set_error(&fence->base, error);
- if (nouveau_fence_signal(fence))
- nvif_event_block(&fctx->event);
+ dma_fence_signal_locked(&fence->base);
}
fctx->killed = 1;
spin_unlock_irqrestore(&fctx->lock, flags);
@@ -127,11 +126,10 @@ nouveau_fence_context_free(struct nouveau_fence_chan *fctx)
kref_put(&fctx->fence_ref, nouveau_fence_context_put);
}
-static int
+static void
nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
{
struct nouveau_fence *fence;
- int drop = 0;
u32 seq = fctx->read(chan);
while (!list_empty(&fctx->pending)) {
@@ -140,10 +138,8 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
if ((int)(seq - fence->base.seqno) < 0)
break;
- drop |= nouveau_fence_signal(fence);
+ dma_fence_signal_locked(&fence->base);
}
-
- return drop;
}
static void
@@ -152,7 +148,6 @@ nouveau_fence_uevent_work(struct work_struct *work)
struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
uevent_work);
unsigned long flags;
- int drop = 0;
spin_lock_irqsave(&fctx->lock, flags);
if (!list_empty(&fctx->pending)) {
@@ -161,11 +156,8 @@ nouveau_fence_uevent_work(struct work_struct *work)
fence = list_entry(fctx->pending.next, typeof(*fence), head);
chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
- if (nouveau_fence_update(chan, fctx))
- drop = 1;
+ nouveau_fence_update(chan, fctx);
}
- if (drop)
- nvif_event_block(&fctx->event);
spin_unlock_irqrestore(&fctx->lock, flags);
}
@@ -235,6 +227,19 @@ nouveau_fence_emit(struct nouveau_fence *fence)
&fctx->lock, fctx->context, ++fctx->sequence);
kref_get(&fctx->fence_ref);
+ fence->cb.func = nouveau_fence_cleanup_cb;
+ /* Adding a callback runs into __dma_fence_enable_signaling(), which will
+ * ultimately run into nouveau_fence_no_signaling(), where a WARN_ON
+ * would fire because the refcount can be dropped there.
+ *
+ * Increment the refcount here temporarily to work around that.
+ */
+ dma_fence_get(&fence->base);
+ ret = dma_fence_add_callback(&fence->base, &fence->cb, nouveau_fence_cleanup_cb);
+ dma_fence_put(&fence->base);
+ if (ret)
+ return ret;
+
ret = fctx->emit(fence);
if (!ret) {
dma_fence_get(&fence->base);
@@ -246,8 +251,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
return -ENODEV;
}
- if (nouveau_fence_update(chan, fctx))
- nvif_event_block(&fctx->event);
+ nouveau_fence_update(chan, fctx);
list_add_tail(&fence->head, &fctx->pending);
spin_unlock_irq(&fctx->lock);
@@ -270,8 +274,8 @@ nouveau_fence_done(struct nouveau_fence *fence)
spin_lock_irqsave(&fctx->lock, flags);
chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
- if (chan && nouveau_fence_update(chan, fctx))
- nvif_event_block(&fctx->event);
+ if (chan)
+ nouveau_fence_update(chan, fctx);
spin_unlock_irqrestore(&fctx->lock, flags);
}
return dma_fence_is_signaled(&fence->base);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 8bc065acfe35..e6b2df7fdc42 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -10,6 +10,7 @@ struct nouveau_bo;
struct nouveau_fence {
struct dma_fence base;
+ struct dma_fence_cb cb;
struct list_head head;
--
2.48.1
Nikolay reports [1] that accessing BIOS data (first 1MB of the physical
address space) via /dev/mem results in an SEPT violation.
The cause is ioremap() (via xlate_dev_mem_ptr()) establishing an
unencrypted mapping where the kernel had established an encrypted
mapping previously.
Teach __ioremap_check_other() that this address space shall always be
mapped as encrypted as historically it is memory resident data, not MMIO
with side-effects.
Cc: <x86(a)kernel.org>
Cc: Vishal Annapurve <vannapurve(a)google.com>
Cc: Kirill Shutemov <kirill.shutemov(a)linux.intel.com>
Reported-by: Nikolay Borisov <nik.borisov(a)suse.com>
Closes: http://lore.kernel.org/20250318113604.297726-1-nik.borisov@suse.com [1]
Tested-by: Nikolay Borisov <nik.borisov(a)suse.com>
Fixes: 9aa6ea69852c ("x86/tdx: Make pages shared in ioremap()")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
arch/x86/mm/ioremap.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 42c90b420773..9e81286a631e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -122,6 +122,10 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des
return;
}
+ /* Ensure BIOS data (see devmem_is_allowed()) is consistently mapped */
+ if (PHYS_PFN(addr) < 256)
+ desc->flags |= IORES_MAP_ENCRYPTED;
+
if (!IS_ENABLED(CONFIG_EFI))
return;
The call to read_word_at_a_time() in sized_strscpy() is problematic
with MTE because it may trigger a tag check fault when reading
across a tag granule (16 bytes) boundary. To make this code
MTE compatible, let's start using load_unaligned_zeropad()
on architectures where it is available (i.e. architectures that
define CONFIG_DCACHE_WORD_ACCESS). Because load_unaligned_zeropad()
takes care of page boundaries as well as tag granule boundaries,
also disable the code preventing crossing page boundaries when using
load_unaligned_zeropad().
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/If4b22e43b5a4ca49726b4bf98ada827fd…
Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS")
Cc: stable(a)vger.kernel.org
---
v2:
- new approach
lib/string.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/lib/string.c b/lib/string.c
index eb4486ed40d25..b632c71df1a50 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -119,6 +119,7 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count)
if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
return -E2BIG;
+#ifndef CONFIG_DCACHE_WORD_ACCESS
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
/*
* If src is unaligned, don't cross a page boundary,
@@ -133,12 +134,14 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count)
/* If src or dest is unaligned, don't do word-at-a-time. */
if (((long) dest | (long) src) & (sizeof(long) - 1))
max = 0;
+#endif
#endif
/*
- * read_word_at_a_time() below may read uninitialized bytes after the
- * trailing zero and use them in comparisons. Disable this optimization
- * under KMSAN to prevent false positive reports.
+ * load_unaligned_zeropad() or read_word_at_a_time() below may read
+ * uninitialized bytes after the trailing zero and use them in
+ * comparisons. Disable this optimization under KMSAN to prevent
+ * false positive reports.
*/
if (IS_ENABLED(CONFIG_KMSAN))
max = 0;
@@ -146,7 +149,11 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count)
while (max >= sizeof(unsigned long)) {
unsigned long c, data;
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+ c = load_unaligned_zeropad(src+res);
+#else
c = read_word_at_a_time(src+res);
+#endif
if (has_zero(c, &data, &constants)) {
data = prep_zero_mask(c, data, &constants);
data = create_zero_mask(data);
--
2.49.0.472.ge94155a9ec-goog
From: Sibi Sankar <quic_sibis(a)quicinc.com>
Currently the perf and powercap protocol relies on the protocol domain
attributes, which just ensures that one fastchannel per domain, before
instantiating fastchannels for all possible message-ids. Fix this by
ensuring that each message-id supports fastchannel before initialization.
Logs:
scmi: Failed to get FC for protocol 13 [MSG_ID:6 / RES_ID:0] - ret:-95. Using regular messaging.
scmi: Failed to get FC for protocol 13 [MSG_ID:6 / RES_ID:1] - ret:-95. Using regular messaging.
scmi: Failed to get FC for protocol 13 [MSG_ID:6 / RES_ID:2] - ret:-95. Using regular messaging.
CC: stable(a)vger.kernel.org
Reported-by: Johan Hovold <johan+linaro(a)kernel.org>
Closes: https://lore.kernel.org/lkml/ZoQjAWse2YxwyRJv@hovoldconsulting.com/
Fixes: 6f9ea4dabd2d ("firmware: arm_scmi: Generalize the fast channel support")
Signed-off-by: Sibi Sankar <quic_sibis(a)quicinc.com>
[Cristian: Modified the condition checked to establish support or not]
Signed-off-by: Cristian Marussi <cristian.marussi(a)arm.com>
---
Since PROTOCOL_MESSAGE_ATTRIBUTES, used to check if message_id is supported,
is a mandatory command, it cannot fail so we must bail-out NOT only if FC was
not supported for that command but also if the query fails as a whole; so the
condition checked for bailing out is modified to:
if (ret || !MSG_SUPPORTS_FASTCHANNEL(attributes)) {
Removed also Tested-by and Reviewed-by tags since I modified the logic.
---
drivers/firmware/arm_scmi/driver.c | 76 +++++++++++++++------------
drivers/firmware/arm_scmi/protocols.h | 2 +
2 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index bf2dc200604e..3855a9791f4a 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -1738,6 +1738,39 @@ static int scmi_common_get_max_msg_size(const struct scmi_protocol_handle *ph)
return info->desc->max_msg_size;
}
+/**
+ * scmi_protocol_msg_check - Check protocol message attributes
+ *
+ * @ph: A reference to the protocol handle.
+ * @message_id: The ID of the message to check.
+ * @attributes: A parameter to optionally return the retrieved message
+ * attributes, in case of Success.
+ *
+ * An helper to check protocol message attributes for a specific protocol
+ * and message pair.
+ *
+ * Return: 0 on SUCCESS
+ */
+static int scmi_protocol_msg_check(const struct scmi_protocol_handle *ph,
+ u32 message_id, u32 *attributes)
+{
+ int ret;
+ struct scmi_xfer *t;
+
+ ret = xfer_get_init(ph, PROTOCOL_MESSAGE_ATTRIBUTES,
+ sizeof(__le32), 0, &t);
+ if (ret)
+ return ret;
+
+ put_unaligned_le32(message_id, t->tx.buf);
+ ret = do_xfer(ph, t);
+ if (!ret && attributes)
+ *attributes = get_unaligned_le32(t->rx.buf);
+ xfer_put(ph, t);
+
+ return ret;
+}
+
/**
* struct scmi_iterator - Iterator descriptor
* @msg: A reference to the message TX buffer; filled by @prepare_message with
@@ -1879,6 +1912,7 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph,
int ret;
u32 flags;
u64 phys_addr;
+ u32 attributes;
u8 size;
void __iomem *addr;
struct scmi_xfer *t;
@@ -1887,6 +1921,15 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph,
struct scmi_msg_resp_desc_fc *resp;
const struct scmi_protocol_instance *pi = ph_to_pi(ph);
+ /* Check if the MSG_ID supports fastchannel */
+ ret = scmi_protocol_msg_check(ph, message_id, &attributes);
+ if (ret || !MSG_SUPPORTS_FASTCHANNEL(attributes)) {
+ dev_dbg(ph->dev,
+ "Skip FC init for 0x%02X/%d domain:%d - ret:%d\n",
+ pi->proto->id, message_id, domain, ret);
+ return;
+ }
+
if (!p_addr) {
ret = -EINVAL;
goto err_out;
@@ -2014,39 +2057,6 @@ static void scmi_common_fastchannel_db_ring(struct scmi_fc_db_info *db)
#endif
}
-/**
- * scmi_protocol_msg_check - Check protocol message attributes
- *
- * @ph: A reference to the protocol handle.
- * @message_id: The ID of the message to check.
- * @attributes: A parameter to optionally return the retrieved message
- * attributes, in case of Success.
- *
- * An helper to check protocol message attributes for a specific protocol
- * and message pair.
- *
- * Return: 0 on SUCCESS
- */
-static int scmi_protocol_msg_check(const struct scmi_protocol_handle *ph,
- u32 message_id, u32 *attributes)
-{
- int ret;
- struct scmi_xfer *t;
-
- ret = xfer_get_init(ph, PROTOCOL_MESSAGE_ATTRIBUTES,
- sizeof(__le32), 0, &t);
- if (ret)
- return ret;
-
- put_unaligned_le32(message_id, t->tx.buf);
- ret = do_xfer(ph, t);
- if (!ret && attributes)
- *attributes = get_unaligned_le32(t->rx.buf);
- xfer_put(ph, t);
-
- return ret;
-}
-
static const struct scmi_proto_helpers_ops helpers_ops = {
.extended_name_get = scmi_common_extended_name_get,
.get_max_msg_size = scmi_common_get_max_msg_size,
diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h
index aaee57cdcd55..d62c4469d1fd 100644
--- a/drivers/firmware/arm_scmi/protocols.h
+++ b/drivers/firmware/arm_scmi/protocols.h
@@ -31,6 +31,8 @@
#define SCMI_PROTOCOL_VENDOR_BASE 0x80
+#define MSG_SUPPORTS_FASTCHANNEL(x) ((x) & BIT(0))
+
enum scmi_common_cmd {
PROTOCOL_VERSION = 0x0,
PROTOCOL_ATTRIBUTES = 0x1,
--
2.47.0
From: Saurabh Sengar <ssengar(a)linux.microsoft.com>
On a x86 system under test with 1780 CPUs, topology_span_sane() takes
around 8 seconds cumulatively for all the iterations. It is an expensive
operation which does the sanity of non-NUMA topology masks.
CPU topology is not something which changes very frequently hence make
this check optional for the systems where the topology is trusted and
need faster bootup.
Restrict this to sched_verbose kernel cmdline option so that this penalty
can be avoided for the systems who want to avoid it.
Cc: stable(a)vger.kernel.org
Fixes: ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap")
Signed-off-by: Saurabh Sengar <ssengar(a)linux.microsoft.com>
Co-developed-by: Naman Jain <namjain(a)linux.microsoft.com>
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
Tested-by: K Prateek Nayak <kprateek.nayak(a)amd.com>
---
Changes since v4:
https://lore.kernel.org/all/20250306055354.52915-1-namjain@linux.microsoft.…
- Rephrased print statement and moved it to sched_domain_debug.
(addressing Valentin's comments)
Changes since v3:
https://lore.kernel.org/all/20250203114738.3109-1-namjain@linux.microsoft.c…
- Minor typo correction in comment
- Added Tested-by tag from Prateek for x86
Changes since v2:
https://lore.kernel.org/all/1731922777-7121-1-git-send-email-ssengar@linux.…
- Use sched_debug() instead of using sched_debug_verbose
variable directly (addressing Prateek's comment)
Changes since v1:
https://lore.kernel.org/all/1729619853-2597-1-git-send-email-ssengar@linux.…
- Use kernel cmdline param instead of compile time flag.
Adding a link to the other patch which is under review.
https://lore.kernel.org/lkml/20241031200431.182443-1-steve.wahl@hpe.com/
Above patch tries to optimize the topology sanity check, whereas this
patch makes it optional. We believe both patches can coexist, as even
with optimization, there will still be some performance overhead for
this check.
---
kernel/sched/topology.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index c49aea8c1025..d7254c47af45 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -132,8 +132,11 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
{
int level = 0;
- if (!sched_debug_verbose)
+ if (!sched_debug_verbose) {
+ pr_info_once("%s: Scheduler topology debugging disabled, add 'sched_verbose' to the cmdline to enable it\n",
+ __func__);
return;
+ }
if (!sd) {
printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
@@ -2359,6 +2362,10 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
{
int i = cpu + 1;
+ /* Skip the topology sanity check for non-debug, as it is a time-consuming operation */
+ if (!sched_debug())
+ return true;
+
/* NUMA levels are allowed to overlap */
if (tl->flags & SDTL_OVERLAP)
return true;
base-commit: 7ec162622e66a4ff886f8f28712ea1b13069e1aa
--
2.34.1