The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5bc5cc2819c2c0adb644919e3e790b504ea47e0a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon(a)collabora.com>
Date: Thu, 5 Nov 2020 16:17:04 +0100
Subject: [PATCH] drm/panfrost: Move the GPU reset bits outside the timeout
handler
We've fixed many races in panfrost_job_timedout() but some remain.
Instead of trying to fix it again, let's simplify the logic and move
the reset bits to a separate work scheduled when one of the queue
reports a timeout.
v5:
- Simplify panfrost_scheduler_stop() (Steven Price)
- Always restart the queue in panfrost_scheduler_start() even if
the status is corrupted (Steven Price)
v4:
- Rework the logic to prevent a race between drm_sched_start()
(reset work) and drm_sched_job_timedout() (timeout work)
- Drop Steven's R-b
- Add dma_fence annotation to the panfrost_reset() function (Daniel Vetter)
v3:
- Replace the atomic_cmpxchg() by an atomic_xchg() (Robin Murphy)
- Add Steven's R-b
v2:
- Use atomic_cmpxchg() to conditionally schedule the reset work
(Steven Price)
Fixes: 1a11a88cfd9a ("drm/panfrost: Fix job timeout handling")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
Reviewed-by: Steven Price <steven.price(a)arm.com>
Signed-off-by: Steven Price <steven.price(a)arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201105151704.2010667-1-bori…
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 1daf9322954a..fbcf5edbe367 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -200,7 +200,6 @@ int panfrost_device_init(struct panfrost_device *pfdev)
struct resource *res;
mutex_init(&pfdev->sched_lock);
- mutex_init(&pfdev->reset_lock);
INIT_LIST_HEAD(&pfdev->scheduled_jobs);
INIT_LIST_HEAD(&pfdev->as_lru_list);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 140e004a3790..597cf1459b0a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -106,7 +106,11 @@ struct panfrost_device {
struct panfrost_perfcnt *perfcnt;
struct mutex sched_lock;
- struct mutex reset_lock;
+
+ struct {
+ struct work_struct work;
+ atomic_t pending;
+ } reset;
struct mutex shrinker_lock;
struct list_head shrinker_list;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index e75b7d2192f7..04e6f6f9b742 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -20,12 +20,21 @@
#include "panfrost_gpu.h"
#include "panfrost_mmu.h"
+#define JOB_TIMEOUT_MS 500
+
#define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
#define job_read(dev, reg) readl(dev->iomem + (reg))
+enum panfrost_queue_status {
+ PANFROST_QUEUE_STATUS_ACTIVE,
+ PANFROST_QUEUE_STATUS_STOPPED,
+ PANFROST_QUEUE_STATUS_STARTING,
+ PANFROST_QUEUE_STATUS_FAULT_PENDING,
+};
+
struct panfrost_queue_state {
struct drm_gpu_scheduler sched;
- bool stopped;
+ atomic_t status;
struct mutex lock;
u64 fence_context;
u64 emit_seqno;
@@ -373,28 +382,61 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue,
struct drm_sched_job *bad)
{
+ enum panfrost_queue_status old_status;
bool stopped = false;
mutex_lock(&queue->lock);
- if (!queue->stopped) {
- drm_sched_stop(&queue->sched, bad);
- if (bad)
- drm_sched_increase_karma(bad);
- queue->stopped = true;
- stopped = true;
- }
+ old_status = atomic_xchg(&queue->status,
+ PANFROST_QUEUE_STATUS_STOPPED);
+ if (old_status == PANFROST_QUEUE_STATUS_STOPPED)
+ goto out;
+
+ WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE);
+ drm_sched_stop(&queue->sched, bad);
+ if (bad)
+ drm_sched_increase_karma(bad);
+
+ stopped = true;
+
+ /*
+ * Set the timeout to max so the timer doesn't get started
+ * when we return from the timeout handler (restored in
+ * panfrost_scheduler_start()).
+ */
+ queue->sched.timeout = MAX_SCHEDULE_TIMEOUT;
+
+out:
mutex_unlock(&queue->lock);
return stopped;
}
+static void panfrost_scheduler_start(struct panfrost_queue_state *queue)
+{
+ enum panfrost_queue_status old_status;
+
+ mutex_lock(&queue->lock);
+ old_status = atomic_xchg(&queue->status,
+ PANFROST_QUEUE_STATUS_STARTING);
+ WARN_ON(old_status != PANFROST_QUEUE_STATUS_STOPPED);
+
+ /* Restore the original timeout before starting the scheduler. */
+ queue->sched.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS);
+ drm_sched_resubmit_jobs(&queue->sched);
+ drm_sched_start(&queue->sched, true);
+ old_status = atomic_xchg(&queue->status,
+ PANFROST_QUEUE_STATUS_ACTIVE);
+ if (old_status == PANFROST_QUEUE_STATUS_FAULT_PENDING)
+ drm_sched_fault(&queue->sched);
+
+ mutex_unlock(&queue->lock);
+}
+
static void panfrost_job_timedout(struct drm_sched_job *sched_job)
{
struct panfrost_job *job = to_panfrost_job(sched_job);
struct panfrost_device *pfdev = job->pfdev;
int js = panfrost_job_get_slot(job);
- unsigned long flags;
- int i;
/*
* If the GPU managed to complete this jobs fence, the timeout is
@@ -415,56 +457,9 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job))
return;
- if (!mutex_trylock(&pfdev->reset_lock))
- return;
-
- for (i = 0; i < NUM_JOB_SLOTS; i++) {
- struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
-
- /*
- * If the queue is still active, make sure we wait for any
- * pending timeouts.
- */
- if (!pfdev->js->queue[i].stopped)
- cancel_delayed_work_sync(&sched->work_tdr);
-
- /*
- * If the scheduler was not already stopped, there's a tiny
- * chance a timeout has expired just before we stopped it, and
- * drm_sched_stop() does not flush pending works. Let's flush
- * them now so the timeout handler doesn't get called in the
- * middle of a reset.
- */
- if (panfrost_scheduler_stop(&pfdev->js->queue[i], NULL))
- cancel_delayed_work_sync(&sched->work_tdr);
-
- /*
- * Now that we cancelled the pending timeouts, we can safely
- * reset the stopped state.
- */
- pfdev->js->queue[i].stopped = false;
- }
-
- spin_lock_irqsave(&pfdev->js->job_lock, flags);
- for (i = 0; i < NUM_JOB_SLOTS; i++) {
- if (pfdev->jobs[i]) {
- pm_runtime_put_noidle(pfdev->dev);
- panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
- pfdev->jobs[i] = NULL;
- }
- }
- spin_unlock_irqrestore(&pfdev->js->job_lock, flags);
-
- panfrost_device_reset(pfdev);
-
- for (i = 0; i < NUM_JOB_SLOTS; i++)
- drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
-
- mutex_unlock(&pfdev->reset_lock);
-
- /* restart scheduler after GPU is usable again */
- for (i = 0; i < NUM_JOB_SLOTS; i++)
- drm_sched_start(&pfdev->js->queue[i].sched, true);
+ /* Schedule a reset if there's no reset in progress. */
+ if (!atomic_xchg(&pfdev->reset.pending, 1))
+ schedule_work(&pfdev->reset.work);
}
static const struct drm_sched_backend_ops panfrost_sched_ops = {
@@ -496,6 +491,8 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
job_write(pfdev, JOB_INT_CLEAR, mask);
if (status & JOB_INT_MASK_ERR(j)) {
+ enum panfrost_queue_status old_status;
+
job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
@@ -504,7 +501,18 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
job_read(pfdev, JS_HEAD_LO(j)),
job_read(pfdev, JS_TAIL_LO(j)));
- drm_sched_fault(&pfdev->js->queue[j].sched);
+ /*
+ * When the queue is being restarted we don't report
+ * faults directly to avoid races between the timeout
+ * and reset handlers. panfrost_scheduler_start() will
+ * call drm_sched_fault() after the queue has been
+ * started if status == FAULT_PENDING.
+ */
+ old_status = atomic_cmpxchg(&pfdev->js->queue[j].status,
+ PANFROST_QUEUE_STATUS_STARTING,
+ PANFROST_QUEUE_STATUS_FAULT_PENDING);
+ if (old_status == PANFROST_QUEUE_STATUS_ACTIVE)
+ drm_sched_fault(&pfdev->js->queue[j].sched);
}
if (status & JOB_INT_MASK_DONE(j)) {
@@ -531,11 +539,66 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static void panfrost_reset(struct work_struct *work)
+{
+ struct panfrost_device *pfdev = container_of(work,
+ struct panfrost_device,
+ reset.work);
+ unsigned long flags;
+ unsigned int i;
+ bool cookie;
+
+ cookie = dma_fence_begin_signalling();
+ for (i = 0; i < NUM_JOB_SLOTS; i++) {
+ /*
+ * We want pending timeouts to be handled before we attempt
+ * to stop the scheduler. If we don't do that and the timeout
+ * handler is in flight, it might have removed the bad job
+ * from the list, and we'll lose this job if the reset handler
+ * enters the critical section in panfrost_scheduler_stop()
+ * before the timeout handler.
+ *
+ * Timeout is set to MAX_SCHEDULE_TIMEOUT - 1 because we need
+ * something big enough to make sure the timer will not expire
+ * before we manage to stop the scheduler, but we can't use
+ * MAX_SCHEDULE_TIMEOUT because drm_sched_get_cleanup_job()
+ * considers that as 'timer is not running' and will dequeue
+ * the job without making sure the timeout handler is not
+ * running.
+ */
+ pfdev->js->queue[i].sched.timeout = MAX_SCHEDULE_TIMEOUT - 1;
+ cancel_delayed_work_sync(&pfdev->js->queue[i].sched.work_tdr);
+ panfrost_scheduler_stop(&pfdev->js->queue[i], NULL);
+ }
+
+ /* All timers have been stopped, we can safely reset the pending state. */
+ atomic_set(&pfdev->reset.pending, 0);
+
+ spin_lock_irqsave(&pfdev->js->job_lock, flags);
+ for (i = 0; i < NUM_JOB_SLOTS; i++) {
+ if (pfdev->jobs[i]) {
+ pm_runtime_put_noidle(pfdev->dev);
+ panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
+ pfdev->jobs[i] = NULL;
+ }
+ }
+ spin_unlock_irqrestore(&pfdev->js->job_lock, flags);
+
+ panfrost_device_reset(pfdev);
+
+ for (i = 0; i < NUM_JOB_SLOTS; i++)
+ panfrost_scheduler_start(&pfdev->js->queue[i]);
+
+ dma_fence_end_signalling(cookie);
+}
+
int panfrost_job_init(struct panfrost_device *pfdev)
{
struct panfrost_job_slot *js;
int ret, j, irq;
+ INIT_WORK(&pfdev->reset.work, panfrost_reset);
+
pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
if (!js)
return -ENOMEM;
@@ -560,7 +623,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
ret = drm_sched_init(&js->queue[j].sched,
&panfrost_sched_ops,
- 1, 0, msecs_to_jiffies(500),
+ 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS),
"pan_js");
if (ret) {
dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1a11a88cfd9a97e13be8bc880c4795f9844fbbec Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon(a)collabora.com>
Date: Fri, 2 Oct 2020 14:25:06 +0200
Subject: [PATCH] drm/panfrost: Fix job timeout handling
If more than two jobs end up timeout-ing concurrently, only one of them
(the one attached to the scheduler acquiring the lock) is fully handled.
The other one remains in a dangling state where it's no longer part of
the scheduling queue, but still blocks something in scheduler, leading
to repetitive timeouts when new jobs are queued.
Let's make sure all bad jobs are properly handled by the thread
acquiring the lock.
v3:
- Add Steven's R-b
- Don't take the sched_lock when stopping the schedulers
v2:
- Fix the subject prefix
- Stop the scheduler before returning from panfrost_job_timedout()
- Call cancel_delayed_work_sync() after drm_sched_stop() to make sure
no timeout handlers are in flight when we reset the GPU (Steven Price)
- Make sure we release the reset lock before restarting the
schedulers (Steven Price)
Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
Reviewed-by: Steven Price <steven.price(a)arm.com>
Signed-off-by: Steven Price <steven.price(a)arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201002122506.1374183-1-bori…
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 30e7b7196dab..d0469e944143 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -25,7 +25,8 @@
struct panfrost_queue_state {
struct drm_gpu_scheduler sched;
-
+ bool stopped;
+ struct mutex lock;
u64 fence_context;
u64 emit_seqno;
};
@@ -369,6 +370,24 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
job_write(pfdev, JOB_INT_MASK, irq_mask);
}
+static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue,
+ struct drm_sched_job *bad)
+{
+ bool stopped = false;
+
+ mutex_lock(&queue->lock);
+ if (!queue->stopped) {
+ drm_sched_stop(&queue->sched, bad);
+ if (bad)
+ drm_sched_increase_karma(bad);
+ queue->stopped = true;
+ stopped = true;
+ }
+ mutex_unlock(&queue->lock);
+
+ return stopped;
+}
+
static void panfrost_job_timedout(struct drm_sched_job *sched_job)
{
struct panfrost_job *job = to_panfrost_job(sched_job);
@@ -392,19 +411,39 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
job_read(pfdev, JS_TAIL_LO(js)),
sched_job);
+ /* Scheduler is already stopped, nothing to do. */
+ if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job))
+ return;
+
if (!mutex_trylock(&pfdev->reset_lock))
return;
for (i = 0; i < NUM_JOB_SLOTS; i++) {
struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
- drm_sched_stop(sched, sched_job);
- if (js != i)
- /* Ensure any timeouts on other slots have finished */
+ /*
+ * If the queue is still active, make sure we wait for any
+ * pending timeouts.
+ */
+ if (!pfdev->js->queue[i].stopped)
cancel_delayed_work_sync(&sched->work_tdr);
- }
- drm_sched_increase_karma(sched_job);
+ /*
+ * If the scheduler was not already stopped, there's a tiny
+ * chance a timeout has expired just before we stopped it, and
+ * drm_sched_stop() does not flush pending works. Let's flush
+ * them now so the timeout handler doesn't get called in the
+ * middle of a reset.
+ */
+ if (panfrost_scheduler_stop(&pfdev->js->queue[i], NULL))
+ cancel_delayed_work_sync(&sched->work_tdr);
+
+ /*
+ * Now that we cancelled the pending timeouts, we can safely
+ * reset the stopped state.
+ */
+ pfdev->js->queue[i].stopped = false;
+ }
spin_lock_irqsave(&pfdev->js->job_lock, flags);
for (i = 0; i < NUM_JOB_SLOTS; i++) {
@@ -421,11 +460,11 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
+ mutex_unlock(&pfdev->reset_lock);
+
/* restart scheduler after GPU is usable again */
for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_start(&pfdev->js->queue[i].sched, true);
-
- mutex_unlock(&pfdev->reset_lock);
}
static const struct drm_sched_backend_ops panfrost_sched_ops = {
@@ -558,6 +597,7 @@ int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
int ret, i;
for (i = 0; i < NUM_JOB_SLOTS; i++) {
+ mutex_init(&js->queue[i].lock);
sched = &js->queue[i].sched;
ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
DRM_SCHED_PRIORITY_NORMAL, &sched,
@@ -570,10 +610,14 @@ int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
{
+ struct panfrost_device *pfdev = panfrost_priv->pfdev;
+ struct panfrost_job_slot *js = pfdev->js;
int i;
- for (i = 0; i < NUM_JOB_SLOTS; i++)
+ for (i = 0; i < NUM_JOB_SLOTS; i++) {
drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
+ mutex_destroy(&js->queue[i].lock);
+ }
}
int panfrost_job_is_idle(struct panfrost_device *pfdev)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 89bdfaf93d9157499c3a0d61f489df66f2dead7f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Mon, 14 Dec 2020 15:26:14 +0100
Subject: [PATCH] ovl: make ioctl() safe
ovl_ioctl_set_flags() does a capability check using flags, but then the
real ioctl double-fetches flags and uses potentially different value.
The "Check the capability before cred override" comment misleading: user
can skip this check by presenting benign flags first and then overwriting
them to non-benign flags.
Just remove the cred override for now, hoping this doesn't cause a
regression.
The proper solution is to create a new setxflags i_op (patches are in the
works).
Xfstests don't show a regression.
Reported-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
Reviewed-by: Amir Goldstein <amir73il(a)gmail.com>
Fixes: dab5ca8fd9dd ("ovl: add lsattr/chattr support")
Cc: <stable(a)vger.kernel.org> # v4.19
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index efccb7c1f9bc..a1f72ac053e5 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -541,46 +541,31 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fd real;
- const struct cred *old_cred;
long ret;
ret = ovl_real_fdget(file, &real);
if (ret)
return ret;
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = security_file_ioctl(real.file, cmd, arg);
- if (!ret)
+ if (!ret) {
+ /*
+ * Don't override creds, since we currently can't safely check
+ * permissions before doing so.
+ */
ret = vfs_ioctl(real.file, cmd, arg);
- revert_creds(old_cred);
+ }
fdput(real);
return ret;
}
-static unsigned int ovl_iflags_to_fsflags(unsigned int iflags)
-{
- unsigned int flags = 0;
-
- if (iflags & S_SYNC)
- flags |= FS_SYNC_FL;
- if (iflags & S_APPEND)
- flags |= FS_APPEND_FL;
- if (iflags & S_IMMUTABLE)
- flags |= FS_IMMUTABLE_FL;
- if (iflags & S_NOATIME)
- flags |= FS_NOATIME_FL;
-
- return flags;
-}
-
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int flags)
+ unsigned long arg)
{
long ret;
struct inode *inode = file_inode(file);
- unsigned int oldflags;
if (!inode_owner_or_capable(inode))
return -EACCES;
@@ -591,10 +576,13 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
inode_lock(inode);
- /* Check the capability before cred override */
- oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags));
- ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
- if (ret)
+ /*
+ * Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
+ * capability.
+ */
+ ret = -EPERM;
+ if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
+ !capable(CAP_LINUX_IMMUTABLE))
goto unlock;
ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
@@ -613,46 +601,6 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
}
-static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- unsigned int flags;
-
- if (get_user(flags, (int __user *) arg))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg, flags);
-}
-
-static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags)
-{
- unsigned int flags = 0;
-
- if (xflags & FS_XFLAG_SYNC)
- flags |= FS_SYNC_FL;
- if (xflags & FS_XFLAG_APPEND)
- flags |= FS_APPEND_FL;
- if (xflags & FS_XFLAG_IMMUTABLE)
- flags |= FS_IMMUTABLE_FL;
- if (xflags & FS_XFLAG_NOATIME)
- flags |= FS_NOATIME_FL;
-
- return flags;
-}
-
-static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(fa));
- if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsxflags_to_fsflags(fa.fsx_xflags));
-}
-
long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret;
@@ -663,12 +611,9 @@ long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ret = ovl_real_ioctl(file, cmd, arg);
break;
- case FS_IOC_SETFLAGS:
- ret = ovl_ioctl_set_fsflags(file, cmd, arg);
- break;
-
case FS_IOC_FSSETXATTR:
- ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
+ case FS_IOC_SETFLAGS:
+ ret = ovl_ioctl_set_flags(file, cmd, arg);
break;
default:
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 20f1431160c6b590cdc269a846fc5a448abf5b98 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard(a)nod.at>
Date: Mon, 16 Nov 2020 22:05:30 +0100
Subject: [PATCH] ubifs: wbuf: Don't leak kernel memory to flash
Write buffers use a kmalloc()'ed buffer, they can leak
up to seven bytes of kernel memory to flash if writes are not
aligned.
So use ubifs_pad() to fill these gaps with padding bytes.
This was never a problem while scanning because the scanner logic
manually aligns node lengths and skips over these gaps.
Cc: <stable(a)vger.kernel.org>
Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system")
Signed-off-by: Richard Weinberger <richard(a)nod.at>
Reviewed-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Signed-off-by: Richard Weinberger <richard(a)nod.at>
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2dc933f73165..a9cabb3fa64c 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -319,7 +319,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;
- ubifs_assert(c, pad >= 0 && !(pad & 7));
+ ubifs_assert(c, pad >= 0);
if (pad >= UBIFS_PAD_NODE_SZ) {
struct ubifs_ch *ch = buf;
@@ -764,6 +764,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
* write-buffer.
*/
memcpy(wbuf->buf + wbuf->used, buf, len);
+ if (aligned_len > len) {
+ ubifs_assert(c, aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+ }
if (aligned_len == wbuf->avail) {
dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -856,13 +860,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
}
spin_lock(&wbuf->lock);
- if (aligned_len)
+ if (aligned_len) {
/*
* And now we have what's left and what does not take whole
* max. write unit, so write it to the write-buffer and we are
* done.
*/
memcpy(wbuf->buf, buf + written, len);
+ if (aligned_len > len) {
+ ubifs_assert(c, aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+ }
+ }
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 20f1431160c6b590cdc269a846fc5a448abf5b98 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard(a)nod.at>
Date: Mon, 16 Nov 2020 22:05:30 +0100
Subject: [PATCH] ubifs: wbuf: Don't leak kernel memory to flash
Write buffers use a kmalloc()'ed buffer, they can leak
up to seven bytes of kernel memory to flash if writes are not
aligned.
So use ubifs_pad() to fill these gaps with padding bytes.
This was never a problem while scanning because the scanner logic
manually aligns node lengths and skips over these gaps.
Cc: <stable(a)vger.kernel.org>
Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system")
Signed-off-by: Richard Weinberger <richard(a)nod.at>
Reviewed-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Signed-off-by: Richard Weinberger <richard(a)nod.at>
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2dc933f73165..a9cabb3fa64c 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -319,7 +319,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;
- ubifs_assert(c, pad >= 0 && !(pad & 7));
+ ubifs_assert(c, pad >= 0);
if (pad >= UBIFS_PAD_NODE_SZ) {
struct ubifs_ch *ch = buf;
@@ -764,6 +764,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
* write-buffer.
*/
memcpy(wbuf->buf + wbuf->used, buf, len);
+ if (aligned_len > len) {
+ ubifs_assert(c, aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+ }
if (aligned_len == wbuf->avail) {
dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -856,13 +860,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
}
spin_lock(&wbuf->lock);
- if (aligned_len)
+ if (aligned_len) {
/*
* And now we have what's left and what does not take whole
* max. write unit, so write it to the write-buffer and we are
* done.
*/
memcpy(wbuf->buf, buf + written, len);
+ if (aligned_len > len) {
+ ubifs_assert(c, aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+ }
+ }
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 207cdd565dfc95a0a5185263a567817b7ebf5467 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Date: Thu, 26 Nov 2020 11:34:56 +0100
Subject: [PATCH] ima: Don't modify file descriptor mode on the fly
Commit a408e4a86b36b ("ima: open a new file instance if no read
permissions") already introduced a second open to measure a file when the
original file descriptor does not allow it. However, it didn't remove the
existing method of changing the mode of the original file descriptor, which
is still necessary if the current process does not have enough privileges
to open a new one.
Changing the mode isn't really an option, as the filesystem might need to
do preliminary steps to make the read possible. Thus, this patch removes
the code and keeps the second open as the only option to measure a file
when it is unreadable with the original file descriptor.
Cc: <stable(a)vger.kernel.org> # 4.20.x: 0014cc04e8ec0 ima: Set file->f_mode
Fixes: 2fe5d6def1672 ("ima: integrity appraisal extension")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Mimi Zohar <zohar(a)linux.ibm.com>
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 21989fa0c107..f6a7e9643b54 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -537,7 +537,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
loff_t i_size;
int rc;
struct file *f = file;
- bool new_file_instance = false, modified_mode = false;
+ bool new_file_instance = false;
/*
* For consistency, fail file's opened with the O_DIRECT flag on
@@ -555,18 +555,10 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL);
flags |= O_RDONLY;
f = dentry_open(&file->f_path, flags, file->f_cred);
- if (IS_ERR(f)) {
- /*
- * Cannot open the file again, lets modify f_mode
- * of original and continue
- */
- pr_info_ratelimited("Unable to reopen file for reading.\n");
- f = file;
- f->f_mode |= FMODE_READ;
- modified_mode = true;
- } else {
- new_file_instance = true;
- }
+ if (IS_ERR(f))
+ return PTR_ERR(f);
+
+ new_file_instance = true;
}
i_size = i_size_read(file_inode(f));
@@ -581,8 +573,6 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
out:
if (new_file_instance)
fput(f);
- else if (modified_mode)
- f->f_mode &= ~FMODE_READ;
return rc;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 207cdd565dfc95a0a5185263a567817b7ebf5467 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Date: Thu, 26 Nov 2020 11:34:56 +0100
Subject: [PATCH] ima: Don't modify file descriptor mode on the fly
Commit a408e4a86b36b ("ima: open a new file instance if no read
permissions") already introduced a second open to measure a file when the
original file descriptor does not allow it. However, it didn't remove the
existing method of changing the mode of the original file descriptor, which
is still necessary if the current process does not have enough privileges
to open a new one.
Changing the mode isn't really an option, as the filesystem might need to
do preliminary steps to make the read possible. Thus, this patch removes
the code and keeps the second open as the only option to measure a file
when it is unreadable with the original file descriptor.
Cc: <stable(a)vger.kernel.org> # 4.20.x: 0014cc04e8ec0 ima: Set file->f_mode
Fixes: 2fe5d6def1672 ("ima: integrity appraisal extension")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Mimi Zohar <zohar(a)linux.ibm.com>
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 21989fa0c107..f6a7e9643b54 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -537,7 +537,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
loff_t i_size;
int rc;
struct file *f = file;
- bool new_file_instance = false, modified_mode = false;
+ bool new_file_instance = false;
/*
* For consistency, fail file's opened with the O_DIRECT flag on
@@ -555,18 +555,10 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL);
flags |= O_RDONLY;
f = dentry_open(&file->f_path, flags, file->f_cred);
- if (IS_ERR(f)) {
- /*
- * Cannot open the file again, lets modify f_mode
- * of original and continue
- */
- pr_info_ratelimited("Unable to reopen file for reading.\n");
- f = file;
- f->f_mode |= FMODE_READ;
- modified_mode = true;
- } else {
- new_file_instance = true;
- }
+ if (IS_ERR(f))
+ return PTR_ERR(f);
+
+ new_file_instance = true;
}
i_size = i_size_read(file_inode(f));
@@ -581,8 +573,6 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
out:
if (new_file_instance)
fput(f);
- else if (modified_mode)
- f->f_mode &= ~FMODE_READ;
return rc;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 89bdfaf93d9157499c3a0d61f489df66f2dead7f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Mon, 14 Dec 2020 15:26:14 +0100
Subject: [PATCH] ovl: make ioctl() safe
ovl_ioctl_set_flags() does a capability check using flags, but then the
real ioctl double-fetches flags and uses potentially different value.
The "Check the capability before cred override" comment misleading: user
can skip this check by presenting benign flags first and then overwriting
them to non-benign flags.
Just remove the cred override for now, hoping this doesn't cause a
regression.
The proper solution is to create a new setxflags i_op (patches are in the
works).
Xfstests don't show a regression.
Reported-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
Reviewed-by: Amir Goldstein <amir73il(a)gmail.com>
Fixes: dab5ca8fd9dd ("ovl: add lsattr/chattr support")
Cc: <stable(a)vger.kernel.org> # v4.19
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index efccb7c1f9bc..a1f72ac053e5 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -541,46 +541,31 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fd real;
- const struct cred *old_cred;
long ret;
ret = ovl_real_fdget(file, &real);
if (ret)
return ret;
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = security_file_ioctl(real.file, cmd, arg);
- if (!ret)
+ if (!ret) {
+ /*
+ * Don't override creds, since we currently can't safely check
+ * permissions before doing so.
+ */
ret = vfs_ioctl(real.file, cmd, arg);
- revert_creds(old_cred);
+ }
fdput(real);
return ret;
}
-static unsigned int ovl_iflags_to_fsflags(unsigned int iflags)
-{
- unsigned int flags = 0;
-
- if (iflags & S_SYNC)
- flags |= FS_SYNC_FL;
- if (iflags & S_APPEND)
- flags |= FS_APPEND_FL;
- if (iflags & S_IMMUTABLE)
- flags |= FS_IMMUTABLE_FL;
- if (iflags & S_NOATIME)
- flags |= FS_NOATIME_FL;
-
- return flags;
-}
-
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int flags)
+ unsigned long arg)
{
long ret;
struct inode *inode = file_inode(file);
- unsigned int oldflags;
if (!inode_owner_or_capable(inode))
return -EACCES;
@@ -591,10 +576,13 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
inode_lock(inode);
- /* Check the capability before cred override */
- oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags));
- ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
- if (ret)
+ /*
+ * Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
+ * capability.
+ */
+ ret = -EPERM;
+ if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
+ !capable(CAP_LINUX_IMMUTABLE))
goto unlock;
ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
@@ -613,46 +601,6 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
}
-static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- unsigned int flags;
-
- if (get_user(flags, (int __user *) arg))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg, flags);
-}
-
-static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags)
-{
- unsigned int flags = 0;
-
- if (xflags & FS_XFLAG_SYNC)
- flags |= FS_SYNC_FL;
- if (xflags & FS_XFLAG_APPEND)
- flags |= FS_APPEND_FL;
- if (xflags & FS_XFLAG_IMMUTABLE)
- flags |= FS_IMMUTABLE_FL;
- if (xflags & FS_XFLAG_NOATIME)
- flags |= FS_NOATIME_FL;
-
- return flags;
-}
-
-static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(fa));
- if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsxflags_to_fsflags(fa.fsx_xflags));
-}
-
long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret;
@@ -663,12 +611,9 @@ long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ret = ovl_real_ioctl(file, cmd, arg);
break;
- case FS_IOC_SETFLAGS:
- ret = ovl_ioctl_set_fsflags(file, cmd, arg);
- break;
-
case FS_IOC_FSSETXATTR:
- ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
+ case FS_IOC_SETFLAGS:
+ ret = ovl_ioctl_set_flags(file, cmd, arg);
break;
default: