The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 20cb1c2fb7568a6054c55defe044311397e01ddb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023071635-handsaw-enclosure-0363@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
20cb1c2fb756 ("blk-cgroup: Flush stats before releasing blkcg_gq")
2c275afeb61d ("block: make blkcg_punt_bio_submit optional")
12be09fe18f2 ("block: async_bio_lock does not need to be bh-safe")
3480373ebdf7 ("btrfs, block: move REQ_CGROUP_PUNT to btrfs")
0a0596fbbe5b ("btrfs, mm: remove the punt_to_cgroup field in struct writeback_control")
05d06a5c9d9c ("btrfs: move kthread_associate_blkcg out of btrfs_submit_compressed_write")
ae42a154ca89 ("btrfs: pass a btrfs_bio to btrfs_submit_bio")
34f888ce3a35 ("btrfs: cleanup main loop in btrfs_encoded_read_regular_fill_pages")
72b505dc5757 ("btrfs: add a wbc pointer to struct btrfs_bio_ctrl")
794c26e214ab ("btrfs: remove the sync_io flag in struct btrfs_bio_ctrl")
c000bc04bad4 ("btrfs: store the bio opf in struct btrfs_bio_ctrl")
eb8d0c6d042f ("btrfs: remove the force_bio_submit to submit_extent_page")
67998cf438e2 ("btrfs: don't set force_bio_submit in read_extent_buffer_subpage")
10e924bc320a ("btrfs: factor out a btrfs_add_compressed_bio_pages helper")
e7aff33e3161 ("btrfs: use the bbio file offset in btrfs_submit_compressed_read")
798c9fc74d03 ("btrfs: remove redundant free_extent_map in btrfs_submit_compressed_read")
544fe4a903ce ("btrfs: embed a btrfs_bio into struct compressed_bio")
3822a7c40997 ("Merge tag 'mm-stable-2023-02-20-13-37' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 20cb1c2fb7568a6054c55defe044311397e01ddb Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei(a)redhat.com>
Date: Sat, 10 Jun 2023 07:42:49 +0800
Subject: [PATCH] blk-cgroup: Flush stats before releasing blkcg_gq
As noted by Michal, the blkg_iostat_set's in the lockless list hold
reference to blkg's to protect against their removal. Those blkg's
hold reference to blkcg. When a cgroup is being destroyed,
cgroup_rstat_flush() is only called at css_release_work_fn() which
is called when the blkcg reference count reaches 0. This circular
dependency will prevent blkcg and some blkgs from being freed after
they are made offline.
It is less a problem if the cgroup to be destroyed also has other
controllers like memory that will call cgroup_rstat_flush() which will
clean up the reference count. If block is the only controller that uses
rstat, these offline blkcg and blkgs may never be freed leaking more
and more memory over time.
To prevent this potential memory leak:
- flush blkcg per-cpu stats list in __blkg_release(), when no new stat
can be added
- add global blkg_stat_lock for covering concurrent parent blkg stat
update
- don't grab bio->bi_blkg reference when adding the stats into blkcg's
per-cpu stat list since all stats are guaranteed to be consumed before
releasing blkg instance, and grabbing blkg reference for stats was the
most fragile part of original patch
Based on Waiman's patch:
https://lore.kernel.org/linux-block/20221215033132.230023-3-longman@redhat.…
Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()")
Cc: stable(a)vger.kernel.org
Reported-by: Jay Shin <jaeshin(a)redhat.com>
Acked-by: Tejun Heo <tj(a)kernel.org>
Cc: Waiman Long <longman(a)redhat.com>
Cc: mkoutny(a)suse.com
Cc: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
Link: https://lore.kernel.org/r/20230609234249.1412858-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0ce64dd73cfe..f0b5c9c41cde 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -34,6 +34,8 @@
#include "blk-ioprio.h"
#include "blk-throttle.h"
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);
+
/*
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
+static DEFINE_RAW_SPINLOCK(blkg_stat_lock);
+
#define BLKG_DESTROY_BATCH_SIZE 64
/*
@@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg)
static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+ struct blkcg *blkcg = blkg->blkcg;
+ int cpu;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif
+ /*
+ * Flush all the non-empty percpu lockless lists before releasing
+ * us, given these stat belongs to us.
+ *
+ * blkg_stat_lock is for serializing blkg stat update
+ */
+ for_each_possible_cpu(cpu)
+ __blkcg_rstat_flush(blkcg, cpu);
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -951,23 +965,26 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
{
- struct blkcg *blkcg = css_to_blkcg(css);
struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
struct llist_node *lnode;
struct blkg_iostat_set *bisc, *next_bisc;
- /* Root-level stats are sourced from system-wide IO stats */
- if (!cgroup_parent(css->cgroup))
- return;
-
rcu_read_lock();
lnode = llist_del_all(lhead);
if (!lnode)
goto out;
+ /*
+ * For covering concurrent parent blkg update from blkg_release().
+ *
+ * When flushing from cgroup, cgroup_rstat_lock is always held, so
+ * this lock won't cause contention most of time.
+ */
+ raw_spin_lock(&blkg_stat_lock);
+
/*
* Iterate only the iostat_cpu's queued in the lockless list.
*/
@@ -991,13 +1008,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
- percpu_ref_put(&blkg->refcnt);
}
-
+ raw_spin_unlock(&blkg_stat_lock);
out:
rcu_read_unlock();
}
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ /* Root-level stats are sourced from system-wide IO stats */
+ if (cgroup_parent(css->cgroup))
+ __blkcg_rstat_flush(css_to_blkcg(css), cpu);
+}
+
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
@@ -2075,7 +2098,6 @@ void blk_cgroup_bio_start(struct bio *bio)
llist_add(&bis->lnode, lhead);
WRITE_ONCE(bis->lqueued, true);
- percpu_ref_get(&bis->blkg->refcnt);
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
Hi stable team,
Building BPF selftests on 5.10.186 currently causes the following compile error:
$ make -C tools/testing/selftests/bpf
...
BINARY test_verifier
In file included from
/usr/src/linux-5.10.186/tools/testing/selftests/bpf/verifier/tests.h:59,
from test_verifier.c:355:
/usr/src/linux-5.10.186/tools/testing/selftests/bpf/verifier/ref_tracking.c:935:10:
error: 'struct bpf_test' has no member named 'fixup_map_ringbuf'; did
you mean 'fixup_map_in_map'?
935 | .fixup_map_ringbuf = { 11 },
| ^~~~~~~~~~~~~~~~~
| fixup_map_in_map
The problem was introduced by commit f4b8c0710ab6 ("selftests/bpf: Add
verifier test for release_reference()") in your tree.
Seems like at least commit 4237e9f4a962 ("selftests/bpf: Add verifier
test for PTR_TO_MEM spill") is required for the build to succeed.
I previously reported this but things probably fell through the
cracks: https://lore.kernel.org/stable/CAN+4W8iMcwwVjmSekZ9txzZNxOZ0x98nBXo4cEoTU9G…
Thanks!
Lorenz
Hi Greg, Sasha,
The following list shows the backported patches, I am using original
commit IDs for reference:
1) 628bd3e49cba ("netfilter: nf_tables: drop map element references from preparation phase")
2) 3e70489721b6 ("netfilter: nf_tables: unbind non-anonymous set if rule construction fails")
Please, apply.
Thanks.
Pablo Neira Ayuso (2):
netfilter: nf_tables: drop map element references from preparation phase
netfilter: nf_tables: unbind non-anonymous set if rule construction fails
include/net/netfilter/nf_tables.h | 5 +-
net/netfilter/nf_tables_api.c | 147 ++++++++++++++++++++++++++----
net/netfilter/nft_set_bitmap.c | 5 +-
net/netfilter/nft_set_hash.c | 23 ++++-
net/netfilter/nft_set_pipapo.c | 14 ++-
net/netfilter/nft_set_rbtree.c | 5 +-
6 files changed, 168 insertions(+), 31 deletions(-)
--
2.30.2
commit 69562eb0bd3e6bb8e522a7b254334e0fb30dff0c upstream.
Hopefully, nobody is trying to abuse mount/sb marks for watching all
anonymous pipes/inodes.
I cannot think of a good reason to allow this - it looks like an
oversight that dated back to the original fanotify API.
Link: https://lore.kernel.org/linux-fsdevel/20230628101132.kvchg544mczxv2pm@quack…
Fixes: 0ff21db9fcc3 ("fanotify: hooks the fanotify_mark syscall to the vfsmount code")
Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
Reviewed-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Jan Kara <jack(a)suse.cz>
Message-Id: <20230629042044.25723-1-amir73il(a)gmail.com>
[backport to 5.x.y]
Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
---
Greg,
This 5.15 backport should cleanly apply to all 5.x.y LTS kernels.
It will NOT apply to 4.x.y kernels.
The original upstream commit should apply cleanly to 6.x.y stable
kernels.
Thanks,
Amir.
fs/notify/fanotify/fanotify_user.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 84ec851211d9..0e2a0eb7cb9e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1337,8 +1337,11 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
return 0;
}
-static int fanotify_events_supported(struct path *path, __u64 mask)
+static int fanotify_events_supported(struct path *path, __u64 mask,
+ unsigned int flags)
{
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+
/*
* Some filesystems such as 'proc' acquire unusual locks when opening
* files. For them fanotify permission events have high chances of
@@ -1350,6 +1353,21 @@ static int fanotify_events_supported(struct path *path, __u64 mask)
if (mask & FANOTIFY_PERM_EVENTS &&
path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
return -EINVAL;
+
+ /*
+ * mount and sb marks are not allowed on kernel internal pseudo fs,
+ * like pipe_mnt, because that would subscribe to events on all the
+ * anonynous pipes in the system.
+ *
+ * SB_NOUSER covers all of the internal pseudo fs whose objects are not
+ * exposed to user's mount namespace, but there are other SB_KERNMOUNT
+ * fs, like nsfs, debugfs, for which the value of allowing sb and mount
+ * mark is questionable. For now we leave them alone.
+ */
+ if (mark_type != FAN_MARK_INODE &&
+ path->mnt->mnt_sb->s_flags & SB_NOUSER)
+ return -EINVAL;
+
return 0;
}
@@ -1476,7 +1494,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
if (flags & FAN_MARK_ADD) {
- ret = fanotify_events_supported(&path, mask);
+ ret = fanotify_events_supported(&path, mask, flags);
if (ret)
goto path_put_and_out;
}
--
2.16.5
Hi,
one 'BUG_ON(ret < 0);' is still left in queue-6.1/btrfs-do-not-bug_on-on-tree-mod-log-failure-at-balan.patch
so we need to rebase this patch.
Best Regards
Wang Yugui (wangyugui(a)e16-tech.com)
2023/07/15
Hi,
In linux-6.1.y we don't have ARCH_BCM4908 symbol anymore (see commit
dd5c672d7ca9 ("arm64: bcmbca: Merge ARCH_BCM4908 to ARCH_BCMBCA") but
drivers/mtd/parsers/Kconfig still references it.
Please kindly cherry-pick a fix for that: commit 085679b15b5a ("mtd:
parsers: refer to ARCH_BCMBCA instead of ARCH_BCM4908") - it's part of
v6.2.
--
Rafał