From: Vlad Buslov vladbu@mellanox.com
[ Upstream commit 6f99528e9797794b91b43321fbbc93fe772b0803 ]
Rtnl lock is encapsulated in netlink and cannot be accessed by other modules directly. This means that reference counted objects that rely on rtnl lock cannot use it with refcounter helper function that atomically releases decrements reference and obtains mutex.
This patch implements simple wrapper function around refcount_dec_and_lock that obtains rtnl lock if reference counter value reached 0.
Signed-off-by: Vlad Buslov vladbu@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a56... Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones lee.jones@linaro.org --- include/linux/rtnetlink.h | 2 ++ net/core/rtnetlink.c | 6 ++++++ 2 files changed, 8 insertions(+)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5225832bd6ff1..9cdd76348d9ab 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/wait.h> +#include <linux/refcount.h> #include <uapi/linux/rtnetlink.h>
extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -34,6 +35,7 @@ extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); +extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 83de32e34bb55..907dd0c7e8a66 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -130,6 +130,12 @@ int rtnl_is_locked(void) } EXPORT_SYMBOL(rtnl_is_locked);
+bool refcount_dec_and_rtnl_lock(refcount_t *r) +{ + return refcount_dec_and_mutex_lock(r, &rtnl_mutex); +} +EXPORT_SYMBOL(refcount_dec_and_rtnl_lock); + #ifdef CONFIG_PROVE_LOCKING bool lockdep_rtnl_is_held(void) {
From: Vlad Buslov vladbu@mellanox.com
[ Upstream commit 86bd446b5cebd783187ea3772ff258210de77d99 ]
Current implementation of qdisc_destroy() decrements Qdisc reference counter and only actually destroy Qdisc if reference counter value reached zero. Rename qdisc_destroy() to qdisc_put() in order for it to better describe the way in which this function currently implemented and used.
Extract code that deallocates Qdisc into new private qdisc_destroy() function. It is intended to be shared between regular qdisc_put() and its unlocked version that is introduced in next patch in this series.
Signed-off-by: Vlad Buslov vladbu@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a56... Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones lee.jones@linaro.org --- include/net/sch_generic.h | 2 +- net/sched/sch_api.c | 6 +++--- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_cbs.c | 2 +- net/sched/sch_drr.c | 4 ++-- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fifo.c | 2 +- net/sched/sch_generic.c | 23 ++++++++++++++--------- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_mq.c | 4 ++-- net/sched/sch_mqprio.c | 4 ++-- net/sched/sch_multiq.c | 6 +++--- net/sched/sch_netem.c | 2 +- net/sched/sch_prio.c | 6 +++--- net/sched/sch_qfq.c | 4 ++-- net/sched/sch_red.c | 4 ++-- net/sched/sch_sfb.c | 4 ++-- net/sched/sch_tbf.c | 4 ++-- 20 files changed, 47 insertions(+), 42 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 286bc674a6e79..70dfbd0437530 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -559,7 +559,7 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); -void qdisc_destroy(struct Qdisc *qdisc); +void qdisc_put(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0bb4f7a94a3c8..60f40a396a039 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -928,7 +928,7 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, qdisc_notify(net, skb, n, clid, old, new);
if (old) - qdisc_destroy(old); + qdisc_put(old); }
/* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -981,7 +981,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_refcount_inc(new);
if (!ingress) - qdisc_destroy(old); + qdisc_put(old); }
skip: @@ -1589,7 +1589,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); if (err) { if (q) - qdisc_destroy(q); + qdisc_put(q); return err; }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 91bd5c8393036..9a1bfa13a6cd6 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -150,7 +150,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) pr_debug("atm_tc_put: destroying\n"); list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_destroy(flow->q); + qdisc_put(flow->q); tcf_block_put(flow->block); if (flow->sock) { pr_debug("atm_tc_put: f_count %ld\n", diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index bc62e1b246539..0a76ad05e5ae5 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1439,7 +1439,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) WARN_ON(cl->filters);
tcf_block_put(cl->block); - qdisc_destroy(cl->q); + qdisc_put(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->rate_est); if (cl != &q->link) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index d5e22452d5976..f95dc899e989a 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -452,7 +452,7 @@ static void cbs_destroy(struct Qdisc *sch) cbs_disable_offload(dev, q);
if (q->qdisc) - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); }
static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e0b0cf8a99393..cdebaed0f8cfd 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -134,7 +134,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } @@ -153,7 +153,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) { gen_kill_estimator(&cl->rate_est); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); }
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index fe030af9272c4..47a61689dadbc 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -415,7 +415,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
tcf_block_put(p->block); - qdisc_destroy(p->q); + qdisc_put(p->q); if (p->mv != p->embedded) kfree(p->mv); } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index bcd3ca97caea1..3697cd7997678 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -180,7 +180,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, if (q) { err = fifo_set_limit(q, limit); if (err < 0) { - qdisc_destroy(q); + qdisc_put(q); q = NULL; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2128b77d5cb33..6fee2731b4c22 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -918,7 +918,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, if (!ops->init || ops->init(sch, NULL, extack) == 0) return sch;
- qdisc_destroy(sch); + qdisc_put(sch); return NULL; } EXPORT_SYMBOL(qdisc_create_dflt); @@ -958,7 +958,7 @@ void qdisc_free(struct Qdisc *qdisc) kfree((char *) qdisc - qdisc->padded); }
-void qdisc_destroy(struct Qdisc *qdisc) +static void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops; struct sk_buff *skb, *tmp; @@ -967,10 +967,6 @@ void qdisc_destroy(struct Qdisc *qdisc) return; ops = qdisc->ops;
- if (qdisc->flags & TCQ_F_BUILTIN || - !refcount_dec_and_test(&qdisc->refcnt)) - return; - #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc);
@@ -997,7 +993,16 @@ void qdisc_destroy(struct Qdisc *qdisc)
qdisc_free(qdisc); } -EXPORT_SYMBOL(qdisc_destroy); + +void qdisc_put(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_test(&qdisc->refcnt)) + return; + + qdisc_destroy(qdisc); +} +EXPORT_SYMBOL(qdisc_put);
/* Attach toplevel qdisc to device queue. */ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, @@ -1318,7 +1323,7 @@ static void shutdown_scheduler_queue(struct net_device *dev, rcu_assign_pointer(dev_queue->qdisc, qdisc_default); dev_queue->qdisc_sleeping = qdisc_default;
- qdisc_destroy(qdisc); + qdisc_put(qdisc); } }
@@ -1327,7 +1332,7 @@ void dev_shutdown(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - qdisc_destroy(dev->qdisc); + qdisc_put(dev->qdisc); dev->qdisc = &noop_qdisc;
WARN_ON(timer_pending(&dev->watchdog_timer)); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3278a76f68615..b18ec1f6de60c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1092,7 +1092,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) struct hfsc_sched *q = qdisc_priv(sch);
tcf_block_put(cl->block); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); gen_kill_estimator(&cl->rate_est); if (cl != &q->root) kfree(cl); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 43c4bfe625a91..862a33b9e2e0f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1224,7 +1224,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { WARN_ON(!cl->un.leaf.q); - qdisc_destroy(cl->un.leaf.q); + qdisc_put(cl->un.leaf.q); } gen_kill_estimator(&cl->rate_est); tcf_block_put(cl->block); @@ -1425,7 +1425,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_reset(parent->un.leaf.q); qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog); - qdisc_destroy(parent->un.leaf.q); + qdisc_put(parent->un.leaf.q); if (parent->prio_activity) htb_deactivate(q, parent);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 699b6bb444cea..0ab13a495af95 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -65,7 +65,7 @@ static void mq_destroy(struct Qdisc *sch) if (!priv->qdiscs) return; for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); }
@@ -119,7 +119,7 @@ static void mq_attach(struct Qdisc *sch) qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) - qdisc_destroy(old); + qdisc_put(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3fd0e5dd7ae3e..64d7f876d7de2 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -40,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch) for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); }
@@ -300,7 +300,7 @@ static void mqprio_attach(struct Qdisc *sch) qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) - qdisc_destroy(old); + qdisc_put(old); if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 1df78e361ef94..1c2f9a3ab1ca7 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -175,7 +175,7 @@ multiq_destroy(struct Qdisc *sch)
tcf_block_put(q->block); for (band = 0; band < q->bands; band++) - qdisc_destroy(q->queues[band]); + qdisc_put(q->queues[band]);
kfree(q->queues); } @@ -204,7 +204,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, q->queues[i] = &noop_qdisc; qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); + qdisc_put(child); } }
@@ -228,7 +228,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); - qdisc_destroy(old); + qdisc_put(old); } sch_tree_unlock(sch); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 02d8d3fd84a5c..ad400f4f9a2d6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1054,7 +1054,7 @@ static void netem_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog); if (q->qdisc) - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); dist_free(q->delay_dist); dist_free(q->slot_dist); } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 1cbbd8c31405b..0e6f34bd9a683 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -175,7 +175,7 @@ prio_destroy(struct Qdisc *sch) tcf_block_put(q->block); prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) - qdisc_destroy(q->queues[prio]); + qdisc_put(q->queues[prio]); }
static int prio_tune(struct Qdisc *sch, struct nlattr *opt, @@ -205,7 +205,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, extack); if (!queues[i]) { while (i > oldbands) - qdisc_destroy(queues[--i]); + qdisc_put(queues[--i]); return -ENOMEM; } } @@ -220,7 +220,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); + qdisc_put(child); }
for (i = oldbands; i < q->bands; i++) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index a93402fe1a9f8..fa6ad95fb6fb4 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -524,7 +524,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return 0;
destroy_class: - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } @@ -535,7 +535,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
qfq_rm_from_agg(q, cl); gen_kill_estimator(&cl->rate_est); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); }
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index e42f890fa1478..0424aa747c341 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -181,7 +181,7 @@ static void red_destroy(struct Qdisc *sch)
del_timer_sync(&q->adapt_timer); red_offload(sch, false); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); }
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { @@ -236,7 +236,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; }
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 1aa95e761671e..81d205acb1b6a 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -470,7 +470,7 @@ static void sfb_destroy(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); }
static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { @@ -524,7 +524,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child;
q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 6f74a426f159e..dd29de1418b76 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -392,7 +392,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; } q->limit = qopt->limit; @@ -438,7 +438,7 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); }
static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
From: Vlad Buslov vladbu@mellanox.com
[ Upstream commit 3a7d0d07a386716b459b00783b11a8211cefcc0f ]
Currently, Qdisc API functions assume that users have rtnl lock taken. To implement rtnl unlocked classifiers update interface, Qdisc API must be extended with functions that do not require rtnl lock.
Extend Qdisc structure with rcu. Implement special version of put function qdisc_put_unlocked() that is called without rtnl lock taken. This function only takes rtnl lock if Qdisc reference counter reached zero and is intended to be used as optimization.
Signed-off-by: Vlad Buslov vladbu@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a56... Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones lee.jones@linaro.org --- include/linux/rtnetlink.h | 5 +++++ include/net/pkt_sched.h | 1 + include/net/sch_generic.h | 2 ++ net/sched/sch_api.c | 18 ++++++++++++++++++ net/sched/sch_generic.c | 25 ++++++++++++++++++++++++- 5 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9cdd76348d9ab..bb9cb84114c15 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); }
+static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) +{ + return rcu_dereference(dev->ingress_queue); +} + struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
#ifdef CONFIG_NET_INGRESS diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index edca90ef3bdc4..1a6ac924266db 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -103,6 +103,7 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 70dfbd0437530..04a5133d875e8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -108,6 +108,7 @@ struct Qdisc {
spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + struct rcu_head rcu; };
static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -560,6 +561,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); +void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 60f40a396a039..af035431bec60 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -315,6 +315,24 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) return q; }
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) +{ + struct netdev_queue *nq; + struct Qdisc *q; + + if (!handle) + return NULL; + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; + + nq = dev_ingress_queue_rcu(dev); + if (nq) + q = qdisc_match_from_root(nq->qdisc_sleeping, handle); +out: + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6fee2731b4c22..80498c22d001b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -958,6 +958,13 @@ void qdisc_free(struct Qdisc *qdisc) kfree((char *) qdisc - qdisc->padded); }
+void qdisc_free_cb(struct rcu_head *head) +{ + struct Qdisc *q = container_of(head, struct Qdisc, rcu); + + qdisc_free(q); +} + static void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops; @@ -991,7 +998,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(skb); }
- qdisc_free(qdisc); + call_rcu(&qdisc->rcu, qdisc_free_cb); }
void qdisc_put(struct Qdisc *qdisc) @@ -1004,6 +1011,22 @@ void qdisc_put(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_put);
+/* Version of qdisc_put() that is called with rtnl mutex unlocked. + * Intended to be used as optimization, this function only takes rtnl lock if + * qdisc reference counter reached zero. + */ + +void qdisc_put_unlocked(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) + return; + + qdisc_destroy(qdisc); + rtnl_unlock(); +} +EXPORT_SYMBOL(qdisc_put_unlocked); + /* Attach toplevel qdisc to device queue. */ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc)
From: Vlad Buslov vladbu@mellanox.com
[ Upstream commit 9d7e82cec35c027756ec97e274f878251f271181 ]
Implement function to take reference to Qdisc that relies on rcu read lock instead of rtnl mutex. Function only takes reference to Qdisc if reference counter isn't zero. Intended to be used by unlocked cls API.
Signed-off-by: Vlad Buslov vladbu@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a56... Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones lee.jones@linaro.org --- include/net/sch_generic.h | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 04a5133d875e8..c0147888b1555 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -118,6 +118,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); }
+/* Intended to be used by unlocked users, when concurrent qdisc release is + * possible. + */ + +static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return qdisc; + if (refcount_inc_not_zero(&qdisc->refcnt)) + return qdisc; + return NULL; +} + static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK)
From: Vlad Buslov vladbu@mellanox.com
[ Upstream commit e368fdb61d8e7c67ac70791b23345b26d7bbc661 ]
As a preparation from removing rtnl lock dependency from rules update path, use Qdisc rcu and reference counting capabilities instead of relying on rtnl lock while working with Qdiscs. Create new tcf_block_release() function, and use it to free resources taken by tcf_block_find(). Currently, this function only releases Qdisc and it is extended in next patches in this series.
Signed-off-by: Vlad Buslov vladbu@mellanox.com Acked-by: Jiri Pirko jiri@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a56... Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones lee.jones@linaro.org --- net/sched/cls_api.c | 79 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 15 deletions(-)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4413aa8d4e829..b5da2c7a8092b 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -539,6 +539,7 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, struct netlink_ext_ack *extack) { struct tcf_block *block; + int err = 0;
if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_lookup(net, block_index); @@ -550,55 +551,93 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, const struct Qdisc_class_ops *cops; struct net_device *dev;
+ rcu_read_lock(); + /* Find link */ - dev = __dev_get_by_index(net, ifindex); - if (!dev) + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); return ERR_PTR(-ENODEV); + }
/* Find qdisc */ if (!*parent) { *q = dev->qdisc; *parent = (*q)->handle; } else { - *q = qdisc_lookup(dev, TC_H_MAJ(*parent)); + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto errout_rcu; } }
+ *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } + /* Is it classful? */ cops = (*q)->ops->cl_ops; if (!cops) { NL_SET_ERR_MSG(extack, "Qdisc not classful"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto errout_rcu; }
if (!cops->tcf_block) { NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - return ERR_PTR(-EOPNOTSUPP); + err = -EOPNOTSUPP; + goto errout_rcu; }
+ /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + /* Do we search for filter, attached to class? */ if (TC_H_MIN(*parent)) { *cl = cops->find(*q, *parent); if (*cl == 0) { NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - return ERR_PTR(-ENOENT); + err = -ENOENT; + goto errout_qdisc; } }
/* And the last stroke */ block = cops->tcf_block(*q, *cl, extack); - if (!block) - return ERR_PTR(-EINVAL); + if (!block) { + err = -EINVAL; + goto errout_qdisc; + } if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - return ERR_PTR(-EOPNOTSUPP); + err = -EOPNOTSUPP; + goto errout_qdisc; } }
return block; + +errout_rcu: + rcu_read_unlock(); +errout_qdisc: + if (*q) + qdisc_put(*q); + return ERR_PTR(err); +} + +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +{ + if (q) + qdisc_put(q); }
struct tcf_block_owner_item { @@ -1336,6 +1375,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ goto replay; @@ -1457,6 +1497,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); return err; }
@@ -1542,6 +1583,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); return err; }
@@ -1858,7 +1900,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); - return -EINVAL; + err = -EINVAL; + goto errout_block; } chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { @@ -1870,23 +1913,27 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, tcf_chain_hold(chain); } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); - return -EEXIST; + err = -EEXIST; + goto errout_block; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); - return -ENOENT; + err = -ENOENT; + goto errout_block; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); - return -ENOMEM; + err = -ENOMEM; + goto errout_block; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); - return -EINVAL; + err = -EINVAL; + goto errout_block; } tcf_chain_hold(chain); } @@ -1930,6 +1977,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
errout: tcf_chain_put(chain); +errout_block: + tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ goto replay;
linux-stable-mirror@lists.linaro.org