3.16.60-rc1 review patch. If anyone has any objections, please let me know.
------------------
From: Willem de Bruijn willemb@google.com
commit a6361f0ca4b25460f2cdf3235ebe8115f622901e upstream.
Updates to the bitfields in struct packet_sock are not atomic. Serialize these read-modify-write cycles.
Move po->running into a separate variable. Its writes are protected by po->bind_lock (except for one startup case at packet_create). Also replace a textual precondition warning with lockdep annotation.
All others are set only in packet_setsockopt. Serialize these updates by holding the socket lock. Analogous to other field updates, also hold the lock when testing whether a ring is active (pg_vec).
Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") Reported-by: DaeRyong Jeong threeearcat@gmail.com Reported-by: Byoungyoung Lee byoungyoung@purdue.edu Signed-off-by: Willem de Bruijn willemb@google.com Signed-off-by: David S. Miller davem@davemloft.net [bwh: Backported to 3.16: adjust context] Signed-off-by: Ben Hutchings ben@decadent.org.uk --- net/packet/af_packet.c | 60 +++++++++++++++++++++++++++++++----------- net/packet/internal.h | 10 +++---- 2 files changed, 49 insertions(+), 21 deletions(-)
--- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -343,11 +343,11 @@ static void packet_pick_tx_queue(struct skb_set_queue_mapping(skb, queue_index); }
-/* register_prot_hook must be invoked with the po->bind_lock held, +/* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ -static void register_prot_hook(struct sock *sk) +static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk);
@@ -362,8 +362,13 @@ static void register_prot_hook(struct so } }
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock - * held. If the sync parameter is true, we will temporarily drop +static void register_prot_hook(struct sock *sk) +{ + lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); + __register_prot_hook(sk); +} + +/* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the @@ -373,6 +378,8 @@ static void __unregister_prot_hook(struc { struct packet_sock *po = pkt_sk(sk);
+ lockdep_assert_held_once(&po->bind_lock); + po->running = 0;
if (po->fanout) @@ -2887,7 +2894,7 @@ static int packet_create(struct net *net
if (proto) { po->prot_hook.type = proto; - register_prot_hook(sk); + __register_prot_hook(sk); }
mutex_lock(&net->packet.sklist_lock); @@ -3386,12 +3393,18 @@ packet_setsockopt(struct socket *sock, i
if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_loss = !!val; - return 0; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_loss = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_AUXDATA: { @@ -3402,7 +3415,9 @@ packet_setsockopt(struct socket *sock, i if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT;
+ lock_sock(sk); po->auxdata = !!val; + release_sock(sk); return 0; } case PACKET_ORIGDEV: @@ -3414,7 +3429,9 @@ packet_setsockopt(struct socket *sock, i if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT;
+ lock_sock(sk); po->origdev = !!val; + release_sock(sk); return 0; } case PACKET_VNET_HDR: @@ -3423,15 +3440,20 @@ packet_setsockopt(struct socket *sock, i
if (sock->type != SOCK_RAW) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT;
- po->has_vnet_hdr = !!val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->has_vnet_hdr = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_TIMESTAMP: { @@ -3462,11 +3484,17 @@ packet_setsockopt(struct socket *sock, i
if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tx_has_off = !!val; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_tx_has_off = !!val; + ret = 0; + } + release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -100,10 +100,12 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, + unsigned int running; /* bind_lock must be held */ + unsigned int auxdata:1, /* writer must hold sock lock */ origdev:1, - has_vnet_hdr:1; + has_vnet_hdr:1, + tp_loss:1, + tp_tx_has_off:1; int ifindex; /* bound device */ __be16 num; struct packet_mclist *mclist; @@ -111,8 +113,6 @@ struct packet_sock { enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb);