On Sun, Mar 10, 2024 at 03:43:38PM -0400, Kent Overstreet wrote:
> The following changes since commit 2e7cdd29fc42c410eab52fffe5710bf656619222:
>
> Linux 6.7.9 (2024-03-06 14:54:01 +0000)
>
> are available in the Git repository at:
>
> https://evilpiepirate.org/git/bcachefs.git tags/bcachefs-for-v6.7-stable-20240310
>
> for you to fetch changes up to 560ceb6a4d9e3bea57c29f5f3a7a1d671dfc7983:
>
> bcachefs: Fix BTREE_ITER_FILTER_SNAPSHOTS on inodes btree (2024-03-10 14:36:57 -0400)
>
> ----------------------------------------------------------------
> bcachefs fixes for 6.7 stable
>
> "bcachefs: fix simulateously upgrading & downgrading" is the important
> one here. This fixes a really nasty bug where in a rare situation we
> wouldn't downgrade; we'd write a superblock where the version number is
> higher than the currently supported version.
>
> This caused total failure to mount multi device filesystems with the
> splitbrain checking in 6.8, since now we wouldn't be updating the member
> sequence numbers used for splitbrain checking, but the version number
> said we would be - and newer versions would attempt to kick every device
> out of the fs.
>
> ----------------------------------------------------------------
> Helge Deller (1):
> bcachefs: Fix build on parisc by avoiding __multi3()
>
> Kent Overstreet (3):
> bcachefs: check for failure to downgrade
> bcachefs: fix simulateously upgrading & downgrading
> bcachefs: Fix BTREE_ITER_FILTER_SNAPSHOTS on inodes btree
>
> Mathias Krause (1):
> bcachefs: install fd later to avoid race with close
>
> fs/bcachefs/btree_iter.c | 4 +++-
> fs/bcachefs/chardev.c | 3 +--
> fs/bcachefs/errcode.h | 1 +
> fs/bcachefs/mean_and_variance.h | 2 +-
> fs/bcachefs/super-io.c | 27 ++++++++++++++++++++++++---
> 5 files changed, 30 insertions(+), 7 deletions(-)
Qcom SoCs making use of ARM SMMU require BDF to SID translation table in
the driver to properly map the SID for the PCIe devices based on their BDF
identifier. This is currently achieved with the help of
qcom_pcie_config_sid_1_9_0() function for SoCs supporting the 1_9_0 config.
But With newer Qcom SoCs starting from SM8450, BDF to SID translation is
set to bypass mode by default in hardware. Due to this, the translation
table that is set in the qcom_pcie_config_sid_1_9_0() is essentially
unused and the default SID is used for all endpoints in SoCs starting from
SM8450.
This is a security concern and also warrants swapping the DeviceID in DT
while using the GIC ITS to handle MSIs from endpoints. The swapping is
currently done like below in DT when using GIC ITS:
/*
* MSIs for BDF (1:0.0) only works with Device ID 0x5980.
* Hence, the IDs are swapped.
*/
msi-map = <0x0 &gic_its 0x5981 0x1>,
<0x100 &gic_its 0x5980 0x1>;
Here, swapping of the DeviceIDs ensure that the endpoint with BDF (1:0.0)
gets the DeviceID 0x5980 which is associated with the default SID as per
the iommu mapping in DT. So MSIs were delivered with IDs swapped so far.
But this also means the Root Port (0:0.0) won't receive any MSIs (for PME,
AER etc...)
So let's fix these issues by clearing the BDF to SID bypass mode for all
SoCs making use of the 1_9_0 config. This allows the PCIe devices to use
the correct SID, thus avoiding the DeviceID swapping hack in DT and also
achieving the isolation between devices.
Cc: <stable(a)vger.kernel.org> # 5.11
Fixes: 4c9398822106 ("PCI: qcom: Add support for configuring BDF to SID mapping for SM8250")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
I will send the DT patches to fix the msi-map entries once this patch gets
merged.
---
drivers/pci/controller/dwc/pcie-qcom.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 10f2d0bb86be..84e47c6f95fe 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -53,6 +53,7 @@
#define PARF_SLV_ADDR_SPACE_SIZE 0x358
#define PARF_DEVICE_TYPE 0x1000
#define PARF_BDF_TO_SID_TABLE_N 0x2000
+#define PARF_BDF_TO_SID_CFG 0x2c00
/* ELBI registers */
#define ELBI_SYS_CTRL 0x04
@@ -120,6 +121,9 @@
/* PARF_DEVICE_TYPE register fields */
#define DEVICE_TYPE_RC 0x4
+/* PARF_BDF_TO_SID_CFG fields */
+#define BDF_TO_SID_BYPASS BIT(0)
+
/* ELBI_SYS_CTRL register fields */
#define ELBI_SYS_CTRL_LT_ENABLE BIT(0)
@@ -1008,11 +1012,17 @@ static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie)
u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE];
int i, nr_map, size = 0;
u32 smmu_sid_base;
+ u32 val;
of_get_property(dev->of_node, "iommu-map", &size);
if (!size)
return 0;
+ /* Enable BDF to SID translation by disabling bypass mode (default) */
+ val = readl(pcie->parf + PARF_BDF_TO_SID_CFG);
+ val &= ~BDF_TO_SID_BYPASS;
+ writel(val, pcie->parf + PARF_BDF_TO_SID_CFG);
+
map = kzalloc(size, GFP_KERNEL);
if (!map)
return -ENOMEM;
---
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
change-id: 20240307-pci-bdf-sid-fix-c9cd8c0023d0
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Clang enables -Wenum-enum-conversion and -Wenum-compare-conditional
under -Wenum-conversion. A recent change in Clang strengthened these
warnings and they appear frequently in common builds, primarily due to
several instances in common headers but there are quite a few drivers
that have individual instances as well.
include/linux/vmstat.h:508:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
508 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~ ^
509 | item];
| ~~~~
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:955:24: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional]
955 | flags |= is_new_rate ? IWL_MAC_BEACON_CCK
| ^ ~~~~~~~~~~~~~~~~~~
956 | : IWL_MAC_BEACON_CCK_V1;
| ~~~~~~~~~~~~~~~~~~~~~
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:1120:21: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional]
1120 | 0) > 10 ?
| ^
1121 | IWL_MAC_BEACON_FILS :
| ~~~~~~~~~~~~~~~~~~~
1122 | IWL_MAC_BEACON_FILS_V1;
| ~~~~~~~~~~~~~~~~~~~~~~
Doing arithmetic between or returning two different types of enums could
be a bug, so each of the instance of the warning needs to be evaluated.
Unfortunately, as mentioned above, there are many instances of this
warning in many different configurations, which can break the build when
CONFIG_WERROR is enabled.
To avoid introducing new instances of the warnings while cleaning up the
disruption for the majority of users, disable these warnings for the
default build while leaving them on for W=1 builds.
Cc: stable(a)vger.kernel.org
Closes: https://github.com/ClangBuiltLinux/linux/issues/2002
Link: https://github.com/llvm/llvm-project/commit/8c2ae42b3e1c6aa7c18f873edcebff7…
Acked-by: Yonghong Song <yonghong.song(a)linux.dev>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
Changes in v2:
- Only disable the warning for the default build, leave it on for W=1 (Arnd)
- Add Yonghong's ack, as the warning is still disabled for the default
build.
- Link to v1: https://lore.kernel.org/r/20240305-disable-extra-clang-enum-warnings-v1-1-6…
---
scripts/Makefile.extrawarn | 2 ++
1 file changed, 2 insertions(+)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index a9e552a1e910..2f25a1de129d 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -132,6 +132,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict)
+KBUILD_CFLAGS += -Wno-enum-compare-conditional
+KBUILD_CFLAGS += -Wno-enum-enum-conversion
endif
endif
---
base-commit: 90d35da658da8cff0d4ecbb5113f5fac9d00eb72
change-id: 20240304-disable-extra-clang-enum-warnings-bf574c7c99fd
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The .release() function does not get called until all readers of a file
descriptor are finished.
If a thread is blocked on reading a file descriptor in ring_buffer_wait(),
and another thread closes the file descriptor, it will not wake up the
other thread as ring_buffer_wake_waiters() is called by .release(), and
that will not get called until the .read() is finished.
The issue originally showed up in trace-cmd, but the readers are actually
other processes with their own file descriptors. So calling close() would wake
up the other tasks because they are blocked on another descriptor then the
one that was closed(). But there's other wake ups that solve that issue.
When a thread is blocked on a read, it can still hang even when another
thread closed its descriptor.
This is what the .flush() callback is for. Have the .flush() wake up the
readers.
Link: https://lore.kernel.org/linux-trace-kernel/20240308202432.107909457@goodmis…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: linke li <lilinke99(a)qq.com>
Cc: Rabin Vincent <rabin(a)rab.in>
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d16b95ca58a7..c9c898307348 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8393,6 +8393,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
return size;
}
+static int tracing_buffers_flush(struct file *file, fl_owner_t id)
+{
+ struct ftrace_buffer_info *info = file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ iter->wait_index++;
+ /* Make sure the waiters see the new wait_index */
+ smp_wmb();
+
+ ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
+
+ return 0;
+}
+
static int tracing_buffers_release(struct inode *inode, struct file *file)
{
struct ftrace_buffer_info *info = file->private_data;
@@ -8404,12 +8418,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
__trace_array_put(iter->tr);
- iter->wait_index++;
- /* Make sure the waiters see the new wait_index */
- smp_wmb();
-
- ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
-
if (info->spare)
ring_buffer_free_read_page(iter->array_buffer->buffer,
info->spare_cpu, info->spare);
@@ -8625,6 +8633,7 @@ static const struct file_operations tracing_buffers_fops = {
.read = tracing_buffers_read,
.poll = tracing_buffers_poll,
.release = tracing_buffers_release,
+ .flush = tracing_buffers_flush,
.splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
.llseek = no_llseek,
--
2.43.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The "shortest_full" variable is used to keep track of the waiter that is
waiting for the smallest amount on the ring buffer before being woken up.
When a tasks waits on the ring buffer, it passes in a "full" value that is
a percentage. 0 means wake up on any data. 1-100 means wake up from 1% to
100% full buffer.
As all waiters are on the same wait queue, the wake up happens for the
waiter with the smallest percentage.
The problem is that the smallest_full on the cpu_buffer that stores the
smallest amount doesn't get reset when all the waiters are woken up. It
does get reset when the ring buffer is reset (echo > /sys/kernel/tracing/trace).
This means that tasks may be woken up more often then when they want to
be. Instead, have the shortest_full field get reset just before waking up
all the tasks. If the tasks wait again, they will update the shortest_full
before sleeping.
Also add locking around setting of shortest_full in the poll logic, and
change "work" to "rbwork" to match the variable name for rb_irq_work
structures that are used in other places.
Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.948914369@goodmis…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: linke li <lilinke99(a)qq.com>
Cc: Rabin Vincent <rabin(a)rab.in>
Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3400f11286e3..aa332ace108b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -755,8 +755,19 @@ static void rb_wake_up_waiters(struct irq_work *work)
wake_up_all(&rbwork->waiters);
if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+ /* Only cpu_buffer sets the above flags */
+ struct ring_buffer_per_cpu *cpu_buffer =
+ container_of(rbwork, struct ring_buffer_per_cpu, irq_work);
+
+ /* Called from interrupt context */
+ raw_spin_lock(&cpu_buffer->reader_lock);
rbwork->wakeup_full = false;
rbwork->full_waiters_pending = false;
+
+ /* Waking up all waiters, they will reset the shortest full */
+ cpu_buffer->shortest_full = 0;
+ raw_spin_unlock(&cpu_buffer->reader_lock);
+
wake_up_all(&rbwork->full_waiters);
}
}
@@ -934,28 +945,33 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct rb_irq_work *work;
+ struct rb_irq_work *rbwork;
if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
+ rbwork = &buffer->irq_work;
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return EPOLLERR;
cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
+ rbwork = &cpu_buffer->irq_work;
}
if (full) {
- poll_wait(filp, &work->full_waiters, poll_table);
- work->full_waiters_pending = true;
+ unsigned long flags;
+
+ poll_wait(filp, &rbwork->full_waiters, poll_table);
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ rbwork->full_waiters_pending = true;
if (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
} else {
- poll_wait(filp, &work->waiters, poll_table);
- work->waiters_pending = true;
+ poll_wait(filp, &rbwork->waiters, poll_table);
+ rbwork->waiters_pending = true;
}
/*
--
2.43.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
A task can wait on a ring buffer for when it fills up to a specific
watermark. The writer will check the minimum watermark that waiters are
waiting for and if the ring buffer is past that, it will wake up all the
waiters.
The waiters are in a wait loop, and will first check if a signal is
pending and then check if the ring buffer is at the desired level where it
should break out of the loop.
If a file that uses a ring buffer closes, and there's threads waiting on
the ring buffer, it needs to wake up those threads. To do this, a
"wait_index" was used.
Before entering the wait loop, the waiter will read the wait_index. On
wakeup, it will check if the wait_index is different than when it entered
the loop, and will exit the loop if it is. The waker will only need to
update the wait_index before waking up the waiters.
This had a couple of bugs. One trivial one and one broken by design.
The trivial bug was that the waiter checked the wait_index after the
schedule() call. It had to be checked between the prepare_to_wait() and
the schedule() which it was not.
The main bug is that the first check to set the default wait_index will
always be outside the prepare_to_wait() and the schedule(). That's because
the ring_buffer_wait() doesn't have enough context to know if it should
break out of the loop.
The loop itself is not needed, because all the callers to the
ring_buffer_wait() also has their own loop, as the callers have a better
sense of what the context is to decide whether to break out of the loop
or not.
Just have the ring_buffer_wait() block once, and if it gets woken up, exit
the function and let the callers decide what to do next.
Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNS…
Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.792933613@goodmis…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: linke li <lilinke99(a)qq.com>
Cc: Rabin Vincent <rabin(a)rab.in>
Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 139 ++++++++++++++++++-------------------
1 file changed, 68 insertions(+), 71 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0699027b4f4c..3400f11286e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -384,7 +384,6 @@ struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
- long wait_index;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -798,14 +797,40 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
rbwork = &cpu_buffer->irq_work;
}
- rbwork->wait_index++;
- /* make sure the waiters see the new index */
- smp_wmb();
-
/* This can be called in any context */
irq_work_queue(&rbwork->work);
}
+static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ bool ret = false;
+
+ /* Reads of all CPUs always waits for any data */
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return !ring_buffer_empty(buffer);
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (!ring_buffer_empty_cpu(buffer, cpu)) {
+ unsigned long flags;
+ bool pagebusy;
+
+ if (!full)
+ return true;
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ ret = !pagebusy && full_hit(buffer, cpu, full);
+
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)
+ cpu_buffer->shortest_full = full;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ }
+ return ret;
+}
+
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
@@ -821,7 +846,6 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
- long wait_index;
int ret = 0;
/*
@@ -840,81 +864,54 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
work = &cpu_buffer->irq_work;
}
- wait_index = READ_ONCE(work->wait_index);
-
- while (true) {
- if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
- else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
-
- /*
- * The events can happen in critical sections where
- * checking a work queue can cause deadlocks.
- * After adding a task to the queue, this flag is set
- * only to notify events to try to wake up the queue
- * using irq_work.
- *
- * We don't clear it even if the buffer is no longer
- * empty. The flag only causes the next event to run
- * irq_work to do the work queue wake up. The worse
- * that can happen if we race with !trace_empty() is that
- * an event will cause an irq_work to try to wake up
- * an empty queue.
- *
- * There's no reason to protect this flag either, as
- * the work queue and irq_work logic will do the necessary
- * synchronization for the wake ups. The only thing
- * that is necessary is that the wake up happens after
- * a task has been queued. It's OK for spurious wake ups.
- */
- if (full)
- work->full_waiters_pending = true;
- else
- work->waiters_pending = true;
-
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
- break;
-
- if (cpu != RING_BUFFER_ALL_CPUS &&
- !ring_buffer_empty_cpu(buffer, cpu)) {
- unsigned long flags;
- bool pagebusy;
- bool done;
-
- if (!full)
- break;
-
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- done = !pagebusy && full_hit(buffer, cpu, full);
+ if (full)
+ prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
+ else
+ prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (done)
- break;
- }
+ /*
+ * The events can happen in critical sections where
+ * checking a work queue can cause deadlocks.
+ * After adding a task to the queue, this flag is set
+ * only to notify events to try to wake up the queue
+ * using irq_work.
+ *
+ * We don't clear it even if the buffer is no longer
+ * empty. The flag only causes the next event to run
+ * irq_work to do the work queue wake up. The worse
+ * that can happen if we race with !trace_empty() is that
+ * an event will cause an irq_work to try to wake up
+ * an empty queue.
+ *
+ * There's no reason to protect this flag either, as
+ * the work queue and irq_work logic will do the necessary
+ * synchronization for the wake ups. The only thing
+ * that is necessary is that the wake up happens after
+ * a task has been queued. It's OK for spurious wake ups.
+ */
+ if (full)
+ work->full_waiters_pending = true;
+ else
+ work->waiters_pending = true;
- schedule();
+ if (rb_watermark_hit(buffer, cpu, full))
+ goto out;
- /* Make sure to see the new wait index */
- smp_rmb();
- if (wait_index != work->wait_index)
- break;
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
}
+ schedule();
+ out:
if (full)
finish_wait(&work->full_waiters, &wait);
else
finish_wait(&work->waiters, &wait);
+ if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
+ ret = -EINTR;
+
return ret;
}
--
2.43.0
The DebugSwap feature of SEV-ES provides a way for confidential guests to use
data breakpoints. However, because the status of the DebugSwap feature is
recorded in the VMSA, enabling it by default invalidates the attestation
signatures. In 6.10 we will introduce a new API to create SEV VMs that
will allow enabling DebugSwap based on what the user tells KVM to do.
Contextually, we will change the legacy KVM_SEV_ES_INIT API to never
enable DebugSwap.
For compatibility with kernels that pre-date the introduction of DebugSwap,
as well as with those where KVM_SEV_ES_INIT will never enable it, do not enable
the feature by default. If anybody wants to use it, for now they can enable
the sev_es_debug_swap_enabled module parameter, but this will result in a
warning.
Fixes: d1f85fbe836e ("KVM: SEV: Enable data breakpoints in SEV-ES")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/svm/sev.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f760106c31f8..69b37956c1c8 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -57,7 +57,7 @@ static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
/* enable/disable SEV-ES DebugSwap support */
-static bool sev_es_debug_swap_enabled = true;
+static bool sev_es_debug_swap_enabled = false;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
#else
#define sev_enabled false
@@ -612,8 +612,11 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
- if (sev_es_debug_swap_enabled)
+ if (sev_es_debug_swap_enabled) {
save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+ pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. "
+ "This will not work starting with Linux 6.10\n");
+ }
pr_debug("Virtual Machine Save Area (VMSA):\n");
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
--
2.39.1
This is a small set of patches that address build breakage with
allyesconfig / allmodconfig.
This solves some, but not all, build breakage.
The parport fix depends on the previous patch, the rest are independent
fixes.
With v2 there is a extra patch that drops ZONE_DMA support.
It does not fix any build failure, but a nice cleanup.
Cc: Miquel Raynal <miquel.raynal(a)bootlin.com>
To: Maciej W. Rozycki <macro(a)orcam.me.uk>
To: <sparclinux(a)vger.kernel.org>
Cc: <linux-parport(a)lists.infradead.org>
Cc: David S. Miller <davem(a)davemloft.net>
To: Andreas Larsson <andreas(a)gaisler.com>
To: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: <linux-kernel(a)vger.kernel.org>
Changes in v2:
- Added r-b/tested by (thanks to Randy and Maciej)
- Dropped patch for uhci-grlib.c as it is already upstream (Randy)
- Added a few Fixes (Maciej)
- Fixed commit message when dropping GENERIC_ISA_DMA (Maciej)
- Added new patch that drop ZONE_DMA (Maciej)
- Added new patch to fix section mismatch error
In an allmodconfig build I see a lot of:
modpost: "__udelay" [module] has no CRC!
Similar for a handful of other symbols.
Any hint how to get rid of them would be nice.
I have tried to add the prototype to asm-prototypes.h with no luck.
On top of this the link fails, but I assume this the kernel that grows
too big which is no surprise.
- Link to v1: https://lore.kernel.org/r/20240223-sam-fix-sparc32-all-builds-v1-0-5c60fd5c…
---
Sam Ravnborg (7):
sparc32: Use generic cmpdi2/ucmpdi2 variants
sparc32: Fix build with trapbase
mtd: maps: sun_uflash: Declare uflash_devinit static
sparc32: Do not select ZONE_DMA
sparc32: Do not select GENERIC_ISA_DMA
sparc32: Fix parport build with sparc32
sparc32: Fix section mismatch in leon_pci_grpci
arch/sparc/Kconfig | 7 +-
arch/sparc/include/asm/parport.h | 259 +-----------------------------------
arch/sparc/include/asm/parport_64.h | 256 +++++++++++++++++++++++++++++++++++
arch/sparc/kernel/irq_32.c | 6 +-
arch/sparc/kernel/kernel.h | 8 +-
arch/sparc/kernel/kgdb_32.c | 4 +-
arch/sparc/kernel/leon_pci_grpci1.c | 2 +-
arch/sparc/kernel/leon_pci_grpci2.c | 2 +-
arch/sparc/kernel/leon_smp.c | 6 +-
arch/sparc/kernel/setup_32.c | 4 +-
arch/sparc/lib/Makefile | 4 +-
arch/sparc/lib/cmpdi2.c | 28 ----
arch/sparc/lib/ucmpdi2.c | 20 ---
arch/sparc/mm/srmmu.c | 1 -
drivers/mtd/maps/sun_uflash.c | 2 +-
15 files changed, 284 insertions(+), 325 deletions(-)
---
base-commit: 626db6ee8ee1edac206610db407114aa83b53fd3
change-id: 20240223-sam-fix-sparc32-all-builds-0a0403d6e1b3
Best regards,
--
Sam Ravnborg <sam(a)ravnborg.org>
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
When the trace_pipe_raw file is closed, there should be no new readers on
the file descriptor. This is mostly handled with the waking and wait_index
fields of the iterator. But there's still a slight race.
CPU 0 CPU 1
----- -----
wait_woken_prepare()
if (waking)
woken = true;
index = wait_index;
wait_woken_set()
waking = true
wait_index++;
ring_buffer_wake_waiters();
wait_on_pipe()
ring_buffer_wait();
The ring_buffer_wait() will miss the wakeup from CPU 1. The problem is
that the ring_buffer_wait() needs the logic of:
prepare_to_wait();
if (!condition)
schedule();
Where the missing condition check is the iter->waking.
Either that condition check needs to be passed to ring_buffer_wait() or
the function needs to be broken up into three parts. This chooses to do
the break up.
Break ring_buffer_wait() into:
ring_buffer_prepare_to_wait();
ring_buffer_wait();
ring_buffer_finish_wait();
Now wait_on_pipe() can have:
ring_buffer_prepare_to_wait();
if (!iter->waking)
ring_buffer_wait();
ring_buffer_finish_wait();
And this will catch the above race, as the waiter will either see waking,
or already have been woken up.
Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNS…
Cc: stable(a)vger.kernel.org
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
include/linux/ring_buffer.h | 4 ++
kernel/trace/ring_buffer.c | 88 ++++++++++++++++++++++++++-----------
kernel/trace/trace.c | 14 +++++-
3 files changed, 78 insertions(+), 28 deletions(-)
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index fa802db216f9..e5b5903cdc21 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -98,7 +98,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
__ring_buffer_alloc((size), (flags), &__key); \
})
+int ring_buffer_prepare_to_wait(struct trace_buffer *buffer, int cpu, int *full,
+ struct wait_queue_entry *wait);
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
+void ring_buffer_finish_wait(struct trace_buffer *buffer, int cpu, int full,
+ struct wait_queue_entry *wait);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full);
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 856d0e5b0da5..fa7090f6b4fc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -868,29 +868,29 @@ rb_get_work_queue(struct trace_buffer *buffer, int cpu, int *full)
}
/**
- * ring_buffer_wait - wait for input to the ring buffer
+ * ring_buffer_prepare_to_wait - Prepare to wait for data on the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
- * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @full: wait until the percentage of pages are available,
+ * if @cpu != RING_BUFFER_ALL_CPUS. It may be updated via this function.
+ * @wait: The wait queue entry.
*
- * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
- * as data is added to any of the @buffer's cpu buffers. Otherwise
- * it will wait for data to be added to a specific cpu buffer.
+ * This must be called before ring_buffer_wait(). It calls the prepare_to_wait()
+ * on @wait for the necessary wait queue defined by @buffer, @cpu, and @full.
*/
-int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+int ring_buffer_prepare_to_wait(struct trace_buffer *buffer, int cpu, int *full,
+ struct wait_queue_entry *wait)
{
struct rb_irq_work *rbwork;
- DEFINE_WAIT(wait);
- int ret = 0;
- rbwork = rb_get_work_queue(buffer, cpu, &full);
+ rbwork = rb_get_work_queue(buffer, cpu, full);
if (IS_ERR(rbwork))
return PTR_ERR(rbwork);
- if (full)
- prepare_to_wait(&rbwork->full_waiters, &wait, TASK_INTERRUPTIBLE);
+ if (*full)
+ prepare_to_wait(&rbwork->full_waiters, wait, TASK_INTERRUPTIBLE);
else
- prepare_to_wait(&rbwork->waiters, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&rbwork->waiters, wait, TASK_INTERRUPTIBLE);
/*
* The events can happen in critical sections where
@@ -912,30 +912,66 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* that is necessary is that the wake up happens after
* a task has been queued. It's OK for spurious wake ups.
*/
- if (full)
+ if (*full)
rbwork->full_waiters_pending = true;
else
rbwork->waiters_pending = true;
- if (rb_watermark_hit(buffer, cpu, full))
- goto out;
+ return 0;
+}
- if (signal_pending(current)) {
- ret = -EINTR;
- goto out;
- }
+/**
+ * ring_buffer_finish_wait - clean up of ring_buffer_prepare_to_wait()
+ * @buffer: buffer to wait on
+ * @cpu: the cpu buffer to wait on
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @wait: The wait queue entry.
+ *
+ * This must be called after ring_buffer_prepare_to_wait(). It cleans up
+ * the @wait for the queue defined by @buffer, @cpu, and @full.
+ */
+void ring_buffer_finish_wait(struct trace_buffer *buffer, int cpu, int full,
+ struct wait_queue_entry *wait)
+{
+ struct rb_irq_work *rbwork;
+
+ rbwork = rb_get_work_queue(buffer, cpu, &full);
+ if (WARN_ON_ONCE(IS_ERR(rbwork)))
+ return;
- schedule();
- out:
if (full)
- finish_wait(&rbwork->full_waiters, &wait);
+ finish_wait(&rbwork->full_waiters, wait);
else
- finish_wait(&rbwork->waiters, &wait);
+ finish_wait(&rbwork->waiters, wait);
+}
- if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
- ret = -EINTR;
+/**
+ * ring_buffer_wait - wait for input to the ring buffer
+ * @buffer: buffer to wait on
+ * @cpu: the cpu buffer to wait on
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ *
+ * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+ * as data is added to any of the @buffer's cpu buffers. Otherwise
+ * it will wait for data to be added to a specific cpu buffer.
+ *
+ * ring_buffer_prepare_to_wait() must be called before this function
+ * and ring_buffer_finish_wait() must be called after.
+ */
+int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+{
+ if (rb_watermark_hit(buffer, cpu, full))
+ return 0;
- return ret;
+ if (signal_pending(current))
+ return -EINTR;
+
+ schedule();
+
+ if (!rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
+ return -EINTR;
+
+ return 0;
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a184dbdf8e91..2d6bc6ee8a58 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1984,7 +1984,8 @@ static bool wait_woken_prepare(struct trace_iterator *iter, int *wait_index)
spin_lock(&wait_lock);
if (iter->waking)
woken = true;
- *wait_index = iter->wait_index;
+ if (wait_index)
+ *wait_index = iter->wait_index;
spin_unlock(&wait_lock);
return woken;
@@ -2019,13 +2020,22 @@ static void wait_woken_clear(struct trace_iterator *iter)
static int wait_on_pipe(struct trace_iterator *iter, int full)
{
+ struct trace_buffer *buffer;
+ DEFINE_WAIT(wait);
int ret;
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
return 0;
- ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
+ buffer = iter->array_buffer->buffer;
+
+ ret = ring_buffer_prepare_to_wait(buffer, iter->cpu_file, &full, &wait);
+ if (ret < 0)
+ return ret;
+ if (!wait_woken_prepare(iter, NULL))
+ ret = ring_buffer_wait(buffer, iter->cpu_file, full);
+ ring_buffer_finish_wait(buffer, iter->cpu_file, full, &wait);
#ifdef CONFIG_TRACER_MAX_TRACE
/*
--
2.43.0