From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The ring_buffer_wait() needs to be broken into three functions for proper
synchronization from the context of the callers:
ring_buffer_prepare_to_wait()
ring_buffer_wait()
ring_buffer_finish_wait()
To simplify the process, pull out the logic for getting the right work
queue to wait on, as it will be needed for the above functions.
There are three work queues depending on the cpu value.
If cpu == RING_BUFFER_ALL_CPUS, then the main "buffer->irq_work" is used.
Otherwise, the cpu_buffer representing the CPU buffer's irq_work is used.
Create a rb_get_work_queue() helper function to retrieve the proper queue.
Also rename "work" to "rbwork" as the variable point to struct rb_irq_work,
and to be more consistent with the variable naming elsewhere in the file.
Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNS…
Cc: stable(a)vger.kernel.org
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 58 +++++++++++++++++++++++---------------
1 file changed, 35 insertions(+), 23 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index aa332ace108b..856d0e5b0da5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -842,6 +842,31 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
return ret;
}
+static struct rb_irq_work *
+rb_get_work_queue(struct trace_buffer *buffer, int cpu, int *full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct rb_irq_work *rbwork;
+
+ /*
+ * Depending on what the caller is waiting for, either any
+ * data in any cpu buffer, or a specific buffer, put the
+ * caller on the appropriate wait queue.
+ */
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ rbwork = &buffer->irq_work;
+ /* Full only makes sense on per cpu reads */
+ *full = 0;
+ } else {
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return ERR_PTR(-ENODEV);
+ cpu_buffer = buffer->buffers[cpu];
+ rbwork = &cpu_buffer->irq_work;
+ }
+
+ return rbwork;
+}
+
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
@@ -854,31 +879,18 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
*/
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
{
- struct ring_buffer_per_cpu *cpu_buffer;
+ struct rb_irq_work *rbwork;
DEFINE_WAIT(wait);
- struct rb_irq_work *work;
int ret = 0;
- /*
- * Depending on what the caller is waiting for, either any
- * data in any cpu buffer, or a specific buffer, put the
- * caller on the appropriate wait queue.
- */
- if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
- /* Full only makes sense on per cpu reads */
- full = 0;
- } else {
- if (!cpumask_test_cpu(cpu, buffer->cpumask))
- return -ENODEV;
- cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
- }
+ rbwork = rb_get_work_queue(buffer, cpu, &full);
+ if (IS_ERR(rbwork))
+ return PTR_ERR(rbwork);
if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&rbwork->full_waiters, &wait, TASK_INTERRUPTIBLE);
else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&rbwork->waiters, &wait, TASK_INTERRUPTIBLE);
/*
* The events can happen in critical sections where
@@ -901,9 +913,9 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* a task has been queued. It's OK for spurious wake ups.
*/
if (full)
- work->full_waiters_pending = true;
+ rbwork->full_waiters_pending = true;
else
- work->waiters_pending = true;
+ rbwork->waiters_pending = true;
if (rb_watermark_hit(buffer, cpu, full))
goto out;
@@ -916,9 +928,9 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
schedule();
out:
if (full)
- finish_wait(&work->full_waiters, &wait);
+ finish_wait(&rbwork->full_waiters, &wait);
else
- finish_wait(&work->waiters, &wait);
+ finish_wait(&rbwork->waiters, &wait);
if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
ret = -EINTR;
--
2.43.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The .release() function does not get called until all readers of a file
descriptor are finished.
If a thread is blocked on reading a file descriptor in ring_buffer_wait(),
and another thread closes the file descriptor, it will not wake up the
other thread as ring_buffer_wake_waiters() is called by .release(), and
that will not get called until the .read() is finished.
The issue originally showed up in trace-cmd, but the readers are actually
other processes with their own file descriptors. So calling close() would wake
up the other tasks because they are blocked on another descriptor then the
one that was closed(). But there's other wake ups that solve that issue.
When a thread is blocked on a read, it can still hang even when another
thread closed its descriptor.
This is what the .flush() callback is for. Have the .flush() wake up the
readers.
Cc: stable(a)vger.kernel.org
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d16b95ca58a7..c9c898307348 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8393,6 +8393,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
return size;
}
+static int tracing_buffers_flush(struct file *file, fl_owner_t id)
+{
+ struct ftrace_buffer_info *info = file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ iter->wait_index++;
+ /* Make sure the waiters see the new wait_index */
+ smp_wmb();
+
+ ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
+
+ return 0;
+}
+
static int tracing_buffers_release(struct inode *inode, struct file *file)
{
struct ftrace_buffer_info *info = file->private_data;
@@ -8404,12 +8418,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
__trace_array_put(iter->tr);
- iter->wait_index++;
- /* Make sure the waiters see the new wait_index */
- smp_wmb();
-
- ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
-
if (info->spare)
ring_buffer_free_read_page(iter->array_buffer->buffer,
info->spare_cpu, info->spare);
@@ -8625,6 +8633,7 @@ static const struct file_operations tracing_buffers_fops = {
.read = tracing_buffers_read,
.poll = tracing_buffers_poll,
.release = tracing_buffers_release,
+ .flush = tracing_buffers_flush,
.splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
.llseek = no_llseek,
--
2.43.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
A task can wait on a ring buffer for when it fills up to a specific
watermark. The writer will check the minimum watermark that waiters are
waiting for and if the ring buffer is past that, it will wake up all the
waiters.
The waiters are in a wait loop, and will first check if a signal is
pending and then check if the ring buffer is at the desired level where it
should break out of the loop.
If a file that uses a ring buffer closes, and there's threads waiting on
the ring buffer, it needs to wake up those threads. To do this, a
"wait_index" was used.
Before entering the wait loop, the waiter will read the wait_index. On
wakeup, it will check if the wait_index is different than when it entered
the loop, and will exit the loop if it is. The waker will only need to
update the wait_index before waking up the waiters.
This had a couple of bugs. One trivial one and one broken by design.
The trivial bug was that the waiter checked the wait_index after the
schedule() call. It had to be checked between the prepare_to_wait() and
the schedule() which it was not.
The main bug is that the first check to set the default wait_index will
always be outside the prepare_to_wait() and the schedule(). That's because
the ring_buffer_wait() doesn't have enough context to know if it should
break out of the loop.
The loop itself is not needed, because all the callers to the
ring_buffer_wait() also has their own loop, as the callers have a better
sense of what the context is to decide whether to break out of the loop
or not.
Just have the ring_buffer_wait() block once, and if it gets woken up, exit
the function and let the callers decide what to do next.
Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNS…
Cc: stable(a)vger.kernel.org
Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 139 ++++++++++++++++++-------------------
1 file changed, 68 insertions(+), 71 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0699027b4f4c..3400f11286e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -384,7 +384,6 @@ struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
- long wait_index;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -798,14 +797,40 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
rbwork = &cpu_buffer->irq_work;
}
- rbwork->wait_index++;
- /* make sure the waiters see the new index */
- smp_wmb();
-
/* This can be called in any context */
irq_work_queue(&rbwork->work);
}
+static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ bool ret = false;
+
+ /* Reads of all CPUs always waits for any data */
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return !ring_buffer_empty(buffer);
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (!ring_buffer_empty_cpu(buffer, cpu)) {
+ unsigned long flags;
+ bool pagebusy;
+
+ if (!full)
+ return true;
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ ret = !pagebusy && full_hit(buffer, cpu, full);
+
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)
+ cpu_buffer->shortest_full = full;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ }
+ return ret;
+}
+
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
@@ -821,7 +846,6 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
- long wait_index;
int ret = 0;
/*
@@ -840,81 +864,54 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
work = &cpu_buffer->irq_work;
}
- wait_index = READ_ONCE(work->wait_index);
-
- while (true) {
- if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
- else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
-
- /*
- * The events can happen in critical sections where
- * checking a work queue can cause deadlocks.
- * After adding a task to the queue, this flag is set
- * only to notify events to try to wake up the queue
- * using irq_work.
- *
- * We don't clear it even if the buffer is no longer
- * empty. The flag only causes the next event to run
- * irq_work to do the work queue wake up. The worse
- * that can happen if we race with !trace_empty() is that
- * an event will cause an irq_work to try to wake up
- * an empty queue.
- *
- * There's no reason to protect this flag either, as
- * the work queue and irq_work logic will do the necessary
- * synchronization for the wake ups. The only thing
- * that is necessary is that the wake up happens after
- * a task has been queued. It's OK for spurious wake ups.
- */
- if (full)
- work->full_waiters_pending = true;
- else
- work->waiters_pending = true;
-
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
- break;
-
- if (cpu != RING_BUFFER_ALL_CPUS &&
- !ring_buffer_empty_cpu(buffer, cpu)) {
- unsigned long flags;
- bool pagebusy;
- bool done;
-
- if (!full)
- break;
-
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- done = !pagebusy && full_hit(buffer, cpu, full);
+ if (full)
+ prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
+ else
+ prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (done)
- break;
- }
+ /*
+ * The events can happen in critical sections where
+ * checking a work queue can cause deadlocks.
+ * After adding a task to the queue, this flag is set
+ * only to notify events to try to wake up the queue
+ * using irq_work.
+ *
+ * We don't clear it even if the buffer is no longer
+ * empty. The flag only causes the next event to run
+ * irq_work to do the work queue wake up. The worse
+ * that can happen if we race with !trace_empty() is that
+ * an event will cause an irq_work to try to wake up
+ * an empty queue.
+ *
+ * There's no reason to protect this flag either, as
+ * the work queue and irq_work logic will do the necessary
+ * synchronization for the wake ups. The only thing
+ * that is necessary is that the wake up happens after
+ * a task has been queued. It's OK for spurious wake ups.
+ */
+ if (full)
+ work->full_waiters_pending = true;
+ else
+ work->waiters_pending = true;
- schedule();
+ if (rb_watermark_hit(buffer, cpu, full))
+ goto out;
- /* Make sure to see the new wait index */
- smp_rmb();
- if (wait_index != work->wait_index)
- break;
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
}
+ schedule();
+ out:
if (full)
finish_wait(&work->full_waiters, &wait);
else
finish_wait(&work->waiters, &wait);
+ if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
+ ret = -EINTR;
+
return ret;
}
--
2.43.0
The .get_modes() callback is supposed to return the number of modes,
never a negative error code. If a negative value is returned, it'll just
be interpreted as a negative count, and added to previous calculations.
Document the rules, but handle the negative values gracefully with an
error message.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/drm_probe_helper.c | 7 +++++++
include/drm/drm_modeset_helper_vtables.h | 3 ++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 4d60cc810b57..bf2dd1f46b6c 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -422,6 +422,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector)
count = connector_funcs->get_modes(connector);
+ /* The .get_modes() callback should not return negative values. */
+ if (count < 0) {
+ drm_err(connector->dev, ".get_modes() returned %pe\n",
+ ERR_PTR(count));
+ count = 0;
+ }
+
/*
* Fallback for when DDC probe failed in drm_get_edid() and thus skipped
* override/firmware EDID.
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index 881b03e4dc28..9ed42469540e 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -898,7 +898,8 @@ struct drm_connector_helper_funcs {
*
* RETURNS:
*
- * The number of modes added by calling drm_mode_probed_add().
+ * The number of modes added by calling drm_mode_probed_add(). Return 0
+ * on failures (no modes) instead of negative error codes.
*/
int (*get_modes)(struct drm_connector *connector);
--
2.39.2