The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From aa9509209c5ac2f0b35d01a922bf9ae072d0c2fc Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Wed, 8 Jan 2020 10:46:05 -0500
Subject: [PATCH] dm writecache: fix incorrect flush sequence when doing SSD
mode commit
When committing state, the function writecache_flush does the following:
1. write metadata (writecache_commit_flushed)
2. flush disk cache (writecache_commit_flushed)
3. wait for data writes to complete (writecache_wait_for_ios)
4. increase superblock seq_count
5. write the superblock
6. flush disk cache
It may happen that at step 3, when we wait for some write to finish, the
disk may report the write as finished, but the write only hit the disk
cache and it is not yet stored in persistent storage. At step 5 we write
the superblock - it may happen that the superblock is written before the
write that we waited for in step 3. If the machine crashes, it may result
in incorrect data being returned after reboot.
In order to fix the bug, we must swap steps 2 and 3 in the above sequence,
so that we first wait for writes to complete and then flush the disk
cache.
Fixes: 48debafe4f2f ("dm: add writecache target")
Cc: stable(a)vger.kernel.org # 4.18+
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 7d727a72aa13..9b0a3bf6a4a1 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -442,7 +442,13 @@ static void writecache_notify_io(unsigned long error, void *context)
complete(&endio->c);
}
-static void ssd_commit_flushed(struct dm_writecache *wc)
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+ wait_event(wc->bio_in_progress_wait[direction],
+ !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
struct dm_io_region region;
struct dm_io_request req;
@@ -488,17 +494,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc)
writecache_notify_io(0, &endio);
wait_for_completion_io(&endio.c);
+ if (wait_for_ios)
+ writecache_wait_for_ios(wc, WRITE);
+
writecache_disk_flush(wc, wc->ssd_dev);
memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
}
-static void writecache_commit_flushed(struct dm_writecache *wc)
+static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
if (WC_MODE_PMEM(wc))
wmb();
else
- ssd_commit_flushed(wc);
+ ssd_commit_flushed(wc, wait_for_ios);
}
static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
@@ -522,12 +531,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
writecache_error(wc, r, "error flushing metadata: %d", r);
}
-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
-{
- wait_event(wc->bio_in_progress_wait[direction],
- !atomic_read(&wc->bio_in_progress[direction]));
-}
-
#define WFE_RETURN_FOLLOWING 1
#define WFE_LOWEST_SEQ 2
@@ -724,15 +727,12 @@ static void writecache_flush(struct dm_writecache *wc)
e = e2;
cond_resched();
}
- writecache_commit_flushed(wc);
-
- if (!WC_MODE_PMEM(wc))
- writecache_wait_for_ios(wc, WRITE);
+ writecache_commit_flushed(wc, true);
wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc->overwrote_committed = false;
@@ -756,7 +756,7 @@ static void writecache_flush(struct dm_writecache *wc)
}
if (need_flush_after_free)
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}
static void writecache_flush_work(struct work_struct *work)
@@ -809,7 +809,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_
}
if (discarded_something)
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}
static bool writecache_wait_for_writeback(struct dm_writecache *wc)
@@ -958,7 +958,7 @@ static void writecache_resume(struct dm_target *ti)
if (need_flush) {
writecache_flush_all_metadata(wc);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}
wc_unlock(wc);
@@ -1342,7 +1342,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *
wc->writeback_size--;
n_walked++;
if (unlikely(n_walked >= ENDIO_LATENCY)) {
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc_unlock(wc);
wc_lock(wc);
n_walked = 0;
@@ -1423,7 +1423,7 @@ static int writecache_endio_thread(void *data)
writecache_wait_for_ios(wc, READ);
}
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc_unlock(wc);
}
@@ -1766,10 +1766,10 @@ static int init_memory(struct dm_writecache *wc)
write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
writecache_flush_all_metadata(wc);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
return 0;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 47ace7e012b9f7ad71d43ac9063d335ea3d6820b Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer(a)redhat.com>
Date: Mon, 27 Jan 2020 14:07:23 -0500
Subject: [PATCH] dm: fix potential for q->make_request_fn NULL pointer
Move blk_queue_make_request() to dm.c:alloc_dev() so that
q->make_request_fn is never NULL during the lifetime of a DM device
(even one that is created without a DM table).
Otherwise generic_make_request() will crash simply by doing:
dmsetup create -n test
mount /dev/dm-N /mnt
While at it, move ->congested_data initialization out of
dm.c:alloc_dev() and into the bio-based specific init method.
Reported-by: Stefan Bader <stefan.bader(a)canonical.com>
BugLink: https://bugs.launchpad.net/bugs/1860231
Fixes: ff36ab34583a ("dm: remove request-based logic from make_request_fn wrapper")
Depends-on: c12c9a3c3860c ("dm: various cleanups to md->queue initialization code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e8f9661a10a1..b89f07ee2eff 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1859,6 +1859,7 @@ static void dm_init_normal_md_queue(struct mapped_device *md)
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
+ md->queue->backing_dev_info->congested_data = md;
md->queue->backing_dev_info->congested_fn = dm_any_congested;
}
@@ -1949,7 +1950,12 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->queue)
goto bad;
md->queue->queuedata = md;
- md->queue->backing_dev_info->congested_data = md;
+ /*
+ * default to bio-based required ->make_request_fn until DM
+ * table is loaded and md->type established. If request-based
+ * table is loaded: blk-mq will override accordingly.
+ */
+ blk_queue_make_request(md->queue, dm_make_request);
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2264,7 +2270,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
- blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 47ace7e012b9f7ad71d43ac9063d335ea3d6820b Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer(a)redhat.com>
Date: Mon, 27 Jan 2020 14:07:23 -0500
Subject: [PATCH] dm: fix potential for q->make_request_fn NULL pointer
Move blk_queue_make_request() to dm.c:alloc_dev() so that
q->make_request_fn is never NULL during the lifetime of a DM device
(even one that is created without a DM table).
Otherwise generic_make_request() will crash simply by doing:
dmsetup create -n test
mount /dev/dm-N /mnt
While at it, move ->congested_data initialization out of
dm.c:alloc_dev() and into the bio-based specific init method.
Reported-by: Stefan Bader <stefan.bader(a)canonical.com>
BugLink: https://bugs.launchpad.net/bugs/1860231
Fixes: ff36ab34583a ("dm: remove request-based logic from make_request_fn wrapper")
Depends-on: c12c9a3c3860c ("dm: various cleanups to md->queue initialization code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e8f9661a10a1..b89f07ee2eff 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1859,6 +1859,7 @@ static void dm_init_normal_md_queue(struct mapped_device *md)
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
+ md->queue->backing_dev_info->congested_data = md;
md->queue->backing_dev_info->congested_fn = dm_any_congested;
}
@@ -1949,7 +1950,12 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->queue)
goto bad;
md->queue->queuedata = md;
- md->queue->backing_dev_info->congested_data = md;
+ /*
+ * default to bio-based required ->make_request_fn until DM
+ * table is loaded and md->type established. If request-based
+ * table is loaded: blk-mq will override accordingly.
+ */
+ blk_queue_make_request(md->queue, dm_make_request);
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2264,7 +2270,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
- blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 47ace7e012b9f7ad71d43ac9063d335ea3d6820b Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer(a)redhat.com>
Date: Mon, 27 Jan 2020 14:07:23 -0500
Subject: [PATCH] dm: fix potential for q->make_request_fn NULL pointer
Move blk_queue_make_request() to dm.c:alloc_dev() so that
q->make_request_fn is never NULL during the lifetime of a DM device
(even one that is created without a DM table).
Otherwise generic_make_request() will crash simply by doing:
dmsetup create -n test
mount /dev/dm-N /mnt
While at it, move ->congested_data initialization out of
dm.c:alloc_dev() and into the bio-based specific init method.
Reported-by: Stefan Bader <stefan.bader(a)canonical.com>
BugLink: https://bugs.launchpad.net/bugs/1860231
Fixes: ff36ab34583a ("dm: remove request-based logic from make_request_fn wrapper")
Depends-on: c12c9a3c3860c ("dm: various cleanups to md->queue initialization code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e8f9661a10a1..b89f07ee2eff 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1859,6 +1859,7 @@ static void dm_init_normal_md_queue(struct mapped_device *md)
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
+ md->queue->backing_dev_info->congested_data = md;
md->queue->backing_dev_info->congested_fn = dm_any_congested;
}
@@ -1949,7 +1950,12 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->queue)
goto bad;
md->queue->queuedata = md;
- md->queue->backing_dev_info->congested_data = md;
+ /*
+ * default to bio-based required ->make_request_fn until DM
+ * table is loaded and md->type established. If request-based
+ * table is loaded: blk-mq will override accordingly.
+ */
+ blk_queue_make_request(md->queue, dm_make_request);
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2264,7 +2270,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
- blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 54a16ff6f2e50775145b210bcd94d62c3c2af117 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Wed, 5 Feb 2020 09:20:32 -0500
Subject: [PATCH] ftrace: Protect ftrace_graph_hash with ftrace_sync
As function_graph tracer can run when RCU is not "watching", it can not be
protected by synchronize_rcu() it requires running a task on each CPU before
it can be freed. Calling schedule_on_each_cpu(ftrace_sync) needs to be used.
Link: https://lore.kernel.org/r/20200205131110.GT2935@paulmck-ThinkPad-P72
Cc: stable(a)vger.kernel.org
Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables")
Reported-by: "Paul E. McKenney" <paulmck(a)kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 481ede3eac13..3f7ee102868a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5867,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file)
mutex_unlock(&graph_lock);
- /* Wait till all users are no longer using the old hash */
- synchronize_rcu();
+ /*
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ schedule_on_each_cpu(ftrace_sync);
free_ftrace_hash(old_hash);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8c52f5de9384..3c75d29bd861 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -979,6 +979,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible());
@@ -1031,6 +1032,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
!preemptible());
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 54a16ff6f2e50775145b210bcd94d62c3c2af117 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Wed, 5 Feb 2020 09:20:32 -0500
Subject: [PATCH] ftrace: Protect ftrace_graph_hash with ftrace_sync
As function_graph tracer can run when RCU is not "watching", it can not be
protected by synchronize_rcu() it requires running a task on each CPU before
it can be freed. Calling schedule_on_each_cpu(ftrace_sync) needs to be used.
Link: https://lore.kernel.org/r/20200205131110.GT2935@paulmck-ThinkPad-P72
Cc: stable(a)vger.kernel.org
Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables")
Reported-by: "Paul E. McKenney" <paulmck(a)kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 481ede3eac13..3f7ee102868a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5867,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file)
mutex_unlock(&graph_lock);
- /* Wait till all users are no longer using the old hash */
- synchronize_rcu();
+ /*
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ schedule_on_each_cpu(ftrace_sync);
free_ftrace_hash(old_hash);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8c52f5de9384..3c75d29bd861 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -979,6 +979,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible());
@@ -1031,6 +1032,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
!preemptible());
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 54a16ff6f2e50775145b210bcd94d62c3c2af117 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Wed, 5 Feb 2020 09:20:32 -0500
Subject: [PATCH] ftrace: Protect ftrace_graph_hash with ftrace_sync
As function_graph tracer can run when RCU is not "watching", it can not be
protected by synchronize_rcu() it requires running a task on each CPU before
it can be freed. Calling schedule_on_each_cpu(ftrace_sync) needs to be used.
Link: https://lore.kernel.org/r/20200205131110.GT2935@paulmck-ThinkPad-P72
Cc: stable(a)vger.kernel.org
Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables")
Reported-by: "Paul E. McKenney" <paulmck(a)kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 481ede3eac13..3f7ee102868a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5867,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file)
mutex_unlock(&graph_lock);
- /* Wait till all users are no longer using the old hash */
- synchronize_rcu();
+ /*
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ schedule_on_each_cpu(ftrace_sync);
free_ftrace_hash(old_hash);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8c52f5de9384..3c75d29bd861 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -979,6 +979,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible());
@@ -1031,6 +1032,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
!preemptible());
This reverts commit b24be4acd17a8963a29b2a92e1d80b9ddf759c95.
Commit b24be4acd17a ("ovl: modify ovl_permission() to do checks on two
inodes") (stable kernel id) breaks r/w access in overlayfs when setting
ACL to files, in 4.4 stable kernel. There is an available reproducer in
[1].
To reproduce the issue :
$./make-overlay.sh
$./test.sh
st_mode is 100644
open failed: -1
cat: /tmp/overlay/animal: Permission denied <---- Breaks access
-rw-r--r-- 1 jo jo 0 Oct 11 09:57 /tmp/overlay/animal
There are two options to fix this; (a) backport commit ce31513a9114
("ovl: copyattr after setting POSIX ACL") to 4.4 or (b) revert offending
commit b24be4acd17a ("ovl: modify ovl_permission() to do checks on two
inodes"). Following option (a) entails high risk of regression since
commit ce31513a9114 ("ovl: copyattr after setting POSIX ACL") has many
dependencies on other commits that need to be backported too (~18
commits).
This patch proceeds with reverting commit b24be4acd17a ("ovl: modify
ovl_permission() to do checks on two inodes"). The reverted commit is
associated with CVE-2018-16597, however the test-script provided in [3]
shows that 4.4 kernel is NOT affected by this cve and therefore it's
safe to revert it.
The offending commit was introduced upstream in v4.8-rc1. At this point
had nothing to do with any CVE. It was related with CVE-2018-16597 as
it was the fix for bug [2]. Later on it was backported to stable 4.4.
The test-script [3] tests whether 4.4 kernel is affected by
CVE-2018-16597. It tests the reproducer found in [2] plus a few more
cases. The correct output of the script is failure with "Permission
denied" when a normal user tries to overwrite root owned files. For
more details please refer to [4].
[1] https://gist.github.com/thomas-holmes/711bcdb28e2b8e6d1c39c1d99d292af7
[2] https://bugzilla.suse.com/show_bug.cgi?id=1106512#c0
[3] https://launchpadlibrarian.net/459694705/test_overlay_permission.sh
[4] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1851243
Signed-off-by: Ioanna Alifieraki <ioanna-maria.alifieraki(a)canonical.com>
---
fs/overlayfs/inode.c | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 060482e349ef..013d27dc6f58 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -9,7 +9,6 @@
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/cred.h>
#include <linux/xattr.h>
#include "overlayfs.h"
@@ -92,7 +91,6 @@ int ovl_permission(struct inode *inode, int mask)
struct ovl_entry *oe;
struct dentry *alias = NULL;
struct inode *realinode;
- const struct cred *old_cred;
struct dentry *realdentry;
bool is_upper;
int err;
@@ -145,18 +143,7 @@ int ovl_permission(struct inode *inode, int mask)
goto out_dput;
}
- /*
- * Check overlay inode with the creds of task and underlying inode
- * with creds of mounter
- */
- err = generic_permission(inode, mask);
- if (err)
- goto out_dput;
-
- old_cred = ovl_override_creds(inode->i_sb);
err = __inode_permission(realinode, mask);
- revert_creds(old_cred);
-
out_dput:
dput(alias);
return err;
--
2.17.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 410e5e55c9c1c9c0d452ac5b9adb37b933a7747e Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Date: Fri, 24 Jan 2020 15:36:21 -0600
Subject: [PATCH] ASoC: SOF: core: release resources on errors in
probe_continue
The initial intent of releasing resources in the .remove does not work
well with HDaudio codecs. If the probe_continue() fails in a work
queue, e.g. due to missing firmware or authentication issues, we don't
release any resources, and as a result the kernel oopses during
suspend operations.
The suggested fix is to release all resources during errors in
probe_continue(), and use fw_state to track resource allocation
state, so that .remove does not attempt to release the same
hardware resources twice. PM operations are also modified so that
no action is done if DSP resources have been freed due to
an error at probe.
Reported-by: Takashi Iwai <tiwai(a)suse.de>
Co-developed-by: Kai Vehmanen <kai.vehmanen(a)linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen(a)linux.intel.com>
Bugzilla: http://bugzilla.suse.com/show_bug.cgi?id=1161246
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Reviewed-by: Takashi Iwai <tiwai(a)suse.de>
Link: https://lore.kernel.org/r/20200124213625.30186-4-pierre-louis.bossart@linux…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index f517ab448a1d..34cefbaf2d2a 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -244,7 +244,6 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
return 0;
-#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)
fw_trace_err:
snd_sof_free_trace(sdev);
fw_run_err:
@@ -255,22 +254,10 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
snd_sof_free_debug(sdev);
dbg_err:
snd_sof_remove(sdev);
-#else
- /*
- * when the probe_continue is handled in a work queue, the
- * probe does not fail so we don't release resources here.
- * They will be released with an explicit call to
- * snd_sof_device_remove() when the PCI/ACPI device is removed
- */
-
-fw_trace_err:
-fw_run_err:
-fw_load_err:
-ipc_err:
-dbg_err:
-
-#endif
+ /* all resources freed, update state to match */
+ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
+ sdev->first_boot = true;
return ret;
}
@@ -353,10 +340,12 @@ int snd_sof_device_remove(struct device *dev)
if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
cancel_work_sync(&sdev->probe_work);
- snd_sof_fw_unload(sdev);
- snd_sof_ipc_free(sdev);
- snd_sof_free_debug(sdev);
- snd_sof_free_trace(sdev);
+ if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) {
+ snd_sof_fw_unload(sdev);
+ snd_sof_ipc_free(sdev);
+ snd_sof_free_debug(sdev);
+ snd_sof_free_trace(sdev);
+ }
/*
* Unregister machine driver. This will unbind the snd_card which
@@ -364,13 +353,15 @@ int snd_sof_device_remove(struct device *dev)
* before freeing the snd_card.
*/
snd_sof_machine_unregister(sdev, pdata);
+
/*
* Unregistering the machine driver results in unloading the topology.
* Some widgets, ex: scheduler, attempt to power down the core they are
* scheduled on, when they are unloaded. Therefore, the DSP must be
* removed only after the topology has been unloaded.
*/
- snd_sof_remove(sdev);
+ if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED)
+ snd_sof_remove(sdev);
/* release firmware */
release_firmware(pdata->fw);
diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index 84290bbeebdd..a0cde053b61a 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -56,6 +56,10 @@ static int sof_resume(struct device *dev, bool runtime_resume)
if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume)
return 0;
+ /* DSP was never successfully started, nothing to resume */
+ if (sdev->first_boot)
+ return 0;
+
/*
* if the runtime_resume flag is set, call the runtime_resume routine
* or else call the system resume routine