Hi Greg, Sasha,
The following list shows patches that you can cherry-pick to -stable 6.5. I am using original commit IDs for reference:
1) 7ab9d0827af8 ("netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention")
2) 4e5f5b47d8de ("netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC")
3) 1d16d80d4230 ("netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails")
4) 7606622f20da ("netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration")
5) 44a76f08f7ca ("netfilter: nf_tables: fix memleak when more than 255 elements expired")
Please, apply.
Thanks.
Florian Westphal (1): netfilter: nf_tables: fix memleak when more than 255 elements expired
Pablo Neira Ayuso (4): netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration
include/net/netfilter/nf_tables.h | 7 ++++--- net/netfilter/nf_tables_api.c | 32 ++++++++++++++++++++++++++----- net/netfilter/nft_set_hash.c | 11 ++++------- net/netfilter/nft_set_pipapo.c | 4 ++-- net/netfilter/nft_set_rbtree.c | 8 +++----- 5 files changed, 40 insertions(+), 22 deletions(-)
commit 96b33300fba880ec0eafcf3d82486f3463b4b6da upstream.
rbtree GC does not modify the datastructure, instead it collects expired elements and it enqueues a GC transaction. Use a read spinlock instead to avoid data contention while GC worker is running.
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nft_set_rbtree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f250b5399344..70491ba98dec 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -622,8 +622,7 @@ static void nft_rbtree_gc(struct work_struct *work) if (!gc) goto done;
- write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
/* Ruleset has been updated, try later. */ @@ -673,8 +672,7 @@ static void nft_rbtree_gc(struct work_struct *work) gc = nft_trans_gc_catchall(gc, gc_seq);
try_later: - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + read_unlock_bh(&priv->lock);
if (gc) nft_trans_gc_queue_async_done(gc);
commit 4a9e12ea7e70223555ec010bec9f711089ce96f6 upstream.
pipapo needs to enqueue GC transactions for catchall elements through nft_trans_gc_queue_sync(). Add nft_trans_gc_catchall_sync() and nft_trans_gc_catchall_async() to handle GC transaction queueing accordingly.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- include/net/netfilter/nf_tables.h | 5 +++-- net/netfilter/nf_tables_api.c | 22 +++++++++++++++++++--- net/netfilter/nft_set_hash.c | 2 +- net/netfilter/nft_set_pipapo.c | 2 +- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dd40c75011d2..a4455f4995ab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1700,8 +1700,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc);
void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a72934f00804..bba8042f721a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9602,8 +9602,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); }
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq) +static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, + unsigned int gc_seq, + bool sync) { struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; @@ -9619,7 +9620,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
nft_set_elem_dead(ext); dead_elem: - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (sync) + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + else + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) return NULL;
@@ -9629,6 +9634,17 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, return gc; }
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) +{ + return nft_trans_gc_catchall(gc, gc_seq, false); +} + +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) +{ + return nft_trans_gc_catchall(gc, 0, true); +} + static void nf_tables_module_autoload_cleanup(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 524763659f25..eca20dc60138 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -372,7 +372,7 @@ static void nft_rhash_gc(struct work_struct *work) nft_trans_gc_elem_add(gc, he); }
- gc = nft_trans_gc_catchall(gc, gc_seq); + gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later: /* catchall list iteration requires rcu read side lock. */ diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 6af9c9ed4b5c..10b89ac74476 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1610,7 +1610,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) } }
- gc = nft_trans_gc_catchall(gc, 0); + gc = nft_trans_gc_catchall_sync(gc); if (gc) { nft_trans_gc_queue_sync_done(gc); priv->last_gc = jiffies; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 70491ba98dec..487572dcd614 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -669,7 +669,7 @@ static void nft_rbtree_gc(struct work_struct *work) nft_trans_gc_elem_add(gc, rbe); }
- gc = nft_trans_gc_catchall(gc, gc_seq); + gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later: read_unlock_bh(&priv->lock);
commit 6d365eabce3c018a80f6e0379b17df2abb17405e upstream.
nft_trans_gc_queue_sync() enqueues the GC transaction and it allocates a new one. If this allocation fails, then stop this GC sync run and retry later.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 10b89ac74476..c0dcc40de358 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1596,7 +1596,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (!gc) - break; + return;
nft_pipapo_gc_deactivate(net, set, e); pipapo_drop(m, rulemap);
commit b079155faae94e9b3ab9337e82100a914ebb4e8d upstream.
Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC might collect the same element more than once.
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nft_set_hash.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index eca20dc60138..2013de934cef 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -338,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work)
while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) { - nft_trans_gc_destroy(gc); - gc = NULL; - goto try_later; - } - continue; + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; }
/* Ruleset has been updated, try later. */
From: Florian Westphal fw@strlen.de
commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream.
When more than 255 elements expired we're supposed to switch to a new gc container structure.
This never happens: u8 type will wrap before reaching the boundary and nft_trans_gc_space() always returns true.
This means we recycle the initial gc container structure and lose track of the elements that came before.
While at it, don't deref 'gc' after we've passed it to call_rcu.
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a4455f4995ab..7c816359d5a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1682,7 +1682,7 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; + u16 count; void *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bba8042f721a..1c2fb32bfa5f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9559,12 +9559,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { + struct nft_set *set; + if (nft_trans_gc_space(gc)) return gc;
+ set = gc->set; nft_trans_gc_queue_work(gc);
- return nft_trans_gc_alloc(gc->set, gc_seq, gfp); + return nft_trans_gc_alloc(set, gc_seq, gfp); }
void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) @@ -9579,15 +9582,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { + struct nft_set *set; + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) return NULL;
if (nft_trans_gc_space(gc)) return gc;
+ set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free);
- return nft_trans_gc_alloc(gc->set, 0, gfp); + return nft_trans_gc_alloc(set, 0, gfp); }
void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
On Fri, Sep 22, 2023 at 06:02:51PM +0200, Pablo Neira Ayuso wrote:
Hi Greg, Sasha,
The following list shows patches that you can cherry-pick to -stable 6.5. I am using original commit IDs for reference:
7ab9d0827af8 ("netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention")
4e5f5b47d8de ("netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC")
1d16d80d4230 ("netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails")
7606622f20da ("netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration")
44a76f08f7ca ("netfilter: nf_tables: fix memleak when more than 255 elements expired")
Please, apply.
Queued up this and the rest of the patches you send for 5.10+, thanks!
linux-stable-mirror@lists.linaro.org