- Linux-stable-mirror - lists.linaro.org

[Linux-stable-mirror] Patch "tcp: invalidate rate samples during SACK reneging" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled tcp: invalidate rate samples during SACK reneging to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: tcp-invalidate-rate-samples-during-sack-reneging.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Yousuk Seung <ysseung(a)google.com> Date: Thu, 7 Dec 2017 13:41:34 -0800 Subject: tcp: invalidate rate samples during SACK reneging From: Yousuk Seung <ysseung(a)google.com> [ Upstream commit d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 ] Mark tcp_sock during a SACK reneging event and invalidate rate samples while marked. Such rate samples may overestimate bw by including packets that were SACKed before reneging. < ack 6001 win 10000 sack 7001:38001 < ack 7001 win 0 sack 8001:38001 // Reneg detected > seq 7001:8001 // RTO, SACK cleared. < ack 38001 win 10000 In above example the rate sample taken after the last ack will count 7001-38001 as delivered while the actual delivery rate likely could be much lower i.e. 7001-8001. This patch adds a new field tcp_sock.sack_reneg and marks it when we declare SACK reneging and entering TCP_CA_Loss, and unmarks it after the last rate sample was taken before moving back to TCP_CA_Open. This patch also invalidates rate samples taken while tcp_sock.is_sack_reneg is set. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Yousuk Seung <ysseung(a)google.com> Signed-off-by: Neal Cardwell <ncardwell(a)google.com> Signed-off-by: Yuchung Cheng <ycheng(a)google.com> Acked-by: Soheil Hassas Yeganeh <soheil(a)google.com> Acked-by: Eric Dumazet <edumazet(a)google.com> Acked-by: Priyaranjan Jha <priyarjha(a)google.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 10 ++++++++-- net/ipv4/tcp_rate.c | 10 +++++++--- 5 files changed, 19 insertions(+), 7 deletions(-) --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -219,7 +219,8 @@ struct tcp_sock { } rack; u16 advmss; /* Advertised MSS */ u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:7; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:6; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1001,7 +1001,7 @@ void tcp_rate_skb_sent(struct sock *sk, void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs); + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2297,6 +2297,7 @@ int tcp_disconnect(struct sock *sk, int tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1966,6 +1966,8 @@ void tcp_enter_loss(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2463,6 +2465,7 @@ static bool tcp_try_undo_recovery(struct return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2494,8 +2497,10 @@ static bool tcp_try_undo_loss(struct soc NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3589,6 +3594,7 @@ static int tcp_ack(struct sock *sk, cons struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3711,7 +3717,7 @@ static int tcp_ack(struct sock *sk, cons tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs); tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs) + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 d rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp.v64) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp.v64 || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; Patches currently in stable-queue which might be from ysseung(a)google.com are queue-4.9/tcp-invalidate-rate-samples-during-sack-reneging.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "sock: free skb in skb_complete_tx_timestamp on error" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled sock: free skb in skb_complete_tx_timestamp on error to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Willem de Bruijn <willemb(a)google.com> Date: Wed, 13 Dec 2017 14:41:06 -0500 Subject: sock: free skb in skb_complete_tx_timestamp on error From: Willem de Bruijn <willemb(a)google.com> [ Upstream commit 35b99dffc3f710cafceee6c8c6ac6a98eb2cb4bf ] skb_complete_tx_timestamp must ingest the skb it is passed. Call kfree_skb if the skb cannot be enqueued. Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl") Fixes: 9ac25fc06375 ("net: fix socket refcounting in skb_complete_tx_timestamp()") Reported-by: Richard Cochran <richardcochran(a)gmail.com> Signed-off-by: Willem de Bruijn <willemb(a)google.com> Reviewed-by: Eric Dumazet <edumazet(a)google.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/core/skbuff.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3823,7 +3823,7 @@ void skb_complete_tx_timestamp(struct sk struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) - return; + goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. @@ -3832,7 +3832,11 @@ void skb_complete_tx_timestamp(struct sk *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); sock_put(sk); + return; } + +err: + kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); Patches currently in stable-queue which might be from willemb(a)google.com are queue-4.9/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "sctp: Replace use of sockets_allocated with specified macro." has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled sctp: Replace use of sockets_allocated with specified macro. to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: sctp-replace-use-of-sockets_allocated-with-specified-macro.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Tonghao Zhang <xiangxia.m.yue(a)gmail.com> Date: Fri, 22 Dec 2017 10:15:20 -0800 Subject: sctp: Replace use of sockets_allocated with specified macro. From: Tonghao Zhang <xiangxia.m.yue(a)gmail.com> [ Upstream commit 8cb38a602478e9f806571f6920b0a3298aabf042 ] The patch(180d8cd942ce) replaces all uses of struct sock fields' memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem to accessor macros. But the sockets_allocated field of sctp sock is not replaced at all. Then replace it now for unifying the code. Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.") Cc: Glauber Costa <glommer(a)parallels.com> Signed-off-by: Tonghao Zhang <zhangtonghao(a)didichuxing.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4246,7 +4246,7 @@ static int sctp_init_sock(struct sock *s SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); - percpu_counter_inc(&sctp_sockets_allocated); + sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); /* Nothing can fail after this block, otherwise @@ -4290,7 +4290,7 @@ static void sctp_destroy_sock(struct soc } sctp_endpoint_free(sp->ep); local_bh_disable(); - percpu_counter_dec(&sctp_sockets_allocated); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } Patches currently in stable-queue which might be from xiangxia.m.yue(a)gmail.com are queue-4.9/sctp-replace-use-of-sockets_allocated-with-specified-macro.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "s390/qeth: update takeover IPs after configuration change" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled s390/qeth: update takeover IPs after configuration change to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: s390-qeth-update-takeover-ips-after-configuration-change.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Date: Wed, 13 Dec 2017 18:56:32 +0100 Subject: s390/qeth: update takeover IPs after configuration change From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> [ Upstream commit 02f510f326501470348a5df341e8232c3497bbbb ] Any modification to the takeover IP-ranges requires that we re-evaluate which IP addresses are takeover-eligible. Otherwise we might do takeover for some addresses when we no longer should, or vice-versa. Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/s390/net/qeth_core.h | 4 +- drivers/s390/net/qeth_core_main.c | 4 +- drivers/s390/net/qeth_l3.h | 2 - drivers/s390/net/qeth_l3_main.c | 31 ++++++++++++++++-- drivers/s390/net/qeth_l3_sys.c | 63 ++++++++++++++++++++------------------ 5 files changed, 67 insertions(+), 37 deletions(-) --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -577,8 +577,8 @@ enum qeth_cq { struct qeth_ipato { bool enabled; - int invert4; - int invert6; + bool invert4; + bool invert6; struct list_head entries; }; --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1476,8 +1476,8 @@ static int qeth_setup_card(struct qeth_c /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); card->ipato.enabled = false; - card->ipato.invert4 = 0; - card->ipato.invert6 = 0; + card->ipato.invert4 = false; + card->ipato.invert6 = false; /* init QDIO stuff */ qeth_init_qdio_info(card); INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work); --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -80,7 +80,7 @@ void qeth_l3_del_vipa(struct qeth_card * int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *); void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions, const u8 *); -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *); +void qeth_l3_update_ipato(struct qeth_card *card); struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions); int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *); int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *); --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -168,8 +168,8 @@ static void qeth_l3_convert_addr_to_bits } } -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, - struct qeth_ipaddr *addr) +static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, + struct qeth_ipaddr *addr) { struct qeth_ipato_entry *ipatoe; u8 addr_bits[128] = {0, }; @@ -608,6 +608,27 @@ int qeth_l3_setrouting_v6(struct qeth_ca /* * IP address takeover related functions */ + +/** + * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs. + * + * Caller must hold ip_lock. + */ +void qeth_l3_update_ipato(struct qeth_card *card) +{ + struct qeth_ipaddr *addr; + unsigned int i; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + else + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + } +} + static void qeth_l3_clear_ipato_list(struct qeth_card *card) { struct qeth_ipato_entry *ipatoe, *tmp; @@ -619,6 +640,7 @@ static void qeth_l3_clear_ipato_list(str kfree(ipatoe); } + qeth_l3_update_ipato(card); spin_unlock_bh(&card->ip_lock); } @@ -643,8 +665,10 @@ int qeth_l3_add_ipato_entry(struct qeth_ } } - if (!rc) + if (!rc) { list_add_tail(&new->entry, &card->ipato.entries); + qeth_l3_update_ipato(card); + } spin_unlock_bh(&card->ip_lock); @@ -667,6 +691,7 @@ void qeth_l3_del_ipato_entry(struct qeth (proto == QETH_PROT_IPV4)? 4:16) && (ipatoe->mask_bits == mask_bits)) { list_del(&ipatoe->entry); + qeth_l3_update_ipato(card); kfree(ipatoe); } } --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -372,9 +372,8 @@ static ssize_t qeth_l3_dev_ipato_enable_ struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - struct qeth_ipaddr *addr; - int i, rc = 0; bool enable; + int rc = 0; if (!card) return -EINVAL; @@ -393,20 +392,12 @@ static ssize_t qeth_l3_dev_ipato_enable_ goto out; } - if (card->ipato.enabled == enable) - goto out; - card->ipato.enabled = enable; - - spin_lock_bh(&card->ip_lock); - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->type != QETH_IP_TYPE_NORMAL) - continue; - if (!enable) - addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; - else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + if (card->ipato.enabled != enable) { + card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); } - spin_unlock_bh(&card->ip_lock); out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; @@ -432,20 +423,27 @@ static ssize_t qeth_l3_dev_ipato_invert4 const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert4 = (card->ipato.invert4)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert4 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert4 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert4; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert4 != invert) { + card->ipato.invert4 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } @@ -611,20 +609,27 @@ static ssize_t qeth_l3_dev_ipato_invert6 struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert6 = (card->ipato.invert6)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert6 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert6 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert6; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert6 != invert) { + card->ipato.invert6 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "s390/qeth: don't apply takeover changes to RXIP" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled s390/qeth: don't apply takeover changes to RXIP to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: s390-qeth-don-t-apply-takeover-changes-to-rxip.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Date: Wed, 13 Dec 2017 18:56:30 +0100 Subject: s390/qeth: don't apply takeover changes to RXIP From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> [ Upstream commit b22d73d6689fd902a66c08ebe71ab2f3b351e22f ] When takeover is switched off, current code clears the 'TAKEOVER' flag on all IPs. But the flag is also used for RXIP addresses, and those should not be affected by the takeover mode. Fix the behaviour by consistenly applying takover logic to NORMAL addresses only. Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/s390/net/qeth_l3_main.c | 5 +++-- drivers/s390/net/qeth_l3_sys.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -178,6 +178,8 @@ int qeth_l3_is_addr_covered_by_ipato(str if (!card->ipato.enabled) return 0; + if (addr->type != QETH_IP_TYPE_NORMAL) + return 0; qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits, (addr->proto == QETH_PROT_IPV4)? 4:16); @@ -293,8 +295,7 @@ int qeth_l3_add_ip(struct qeth_card *car memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr)); addr->ref_counter = 1; - if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) { + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) { QETH_CARD_TEXT(card, 2, "tkovaddr"); addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -398,10 +398,11 @@ static ssize_t qeth_l3_dev_ipato_enable_ card->ipato.enabled = enable; hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; if (!enable) addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; - else if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) + else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } out: Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "s390/qeth: lock IP table while applying takeover changes" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled s390/qeth: lock IP table while applying takeover changes to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: s390-qeth-lock-ip-table-while-applying-takeover-changes.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Date: Wed, 13 Dec 2017 18:56:31 +0100 Subject: s390/qeth: lock IP table while applying takeover changes From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> [ Upstream commit 8a03a3692b100d84785ee7a834e9215e304c9e00 ] Modifying the flags of an IP addr object needs to be protected against eg. concurrent removal of the same object from the IP table. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/s390/net/qeth_l3_sys.c | 2 ++ 1 file changed, 2 insertions(+) --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -397,6 +397,7 @@ static ssize_t qeth_l3_dev_ipato_enable_ goto out; card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); hash_for_each(card->ip_htable, i, addr, hnode) { if (addr->type != QETH_IP_TYPE_NORMAL) continue; @@ -405,6 +406,7 @@ static ssize_t qeth_l3_dev_ipato_enable_ else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } + spin_unlock_bh(&card->ip_lock); out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "s390/qeth: apply takeover changes when mode is toggled" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled s390/qeth: apply takeover changes when mode is toggled to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Date: Wed, 13 Dec 2017 18:56:29 +0100 Subject: s390/qeth: apply takeover changes when mode is toggled From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> [ Upstream commit 7fbd9493f0eeae8cef58300505a9ef5c8fce6313 ] Just as for an explicit enable/disable, toggling the takeover mode also requires that the IP addresses get updated. Otherwise all IPs that were added to the table before the mode-toggle, get registered with the old settings. Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 2 +- drivers/s390/net/qeth_l3_sys.c | 35 +++++++++++++++++------------------ 3 files changed, 19 insertions(+), 20 deletions(-) --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -576,7 +576,7 @@ enum qeth_cq { }; struct qeth_ipato { - int enabled; + bool enabled; int invert4; int invert6; struct list_head entries; --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1475,7 +1475,7 @@ static int qeth_setup_card(struct qeth_c qeth_set_intial_options(card); /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); - card->ipato.enabled = 0; + card->ipato.enabled = false; card->ipato.invert4 = 0; card->ipato.invert6 = 0; /* init QDIO stuff */ --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -374,6 +374,7 @@ static ssize_t qeth_l3_dev_ipato_enable_ struct qeth_card *card = dev_get_drvdata(dev); struct qeth_ipaddr *addr; int i, rc = 0; + bool enable; if (!card) return -EINVAL; @@ -386,25 +387,23 @@ static ssize_t qeth_l3_dev_ipato_enable_ } if (sysfs_streq(buf, "toggle")) { - card->ipato.enabled = (card->ipato.enabled)? 0 : 1; - } else if (sysfs_streq(buf, "1")) { - card->ipato.enabled = 1; - hash_for_each(card->ip_htable, i, addr, hnode) { - if ((addr->type == QETH_IP_TYPE_NORMAL) && - qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= - QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else if (sysfs_streq(buf, "0")) { - card->ipato.enabled = 0; - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->set_flags & - QETH_IPA_SETIP_TAKEOVER_FLAG) - addr->set_flags &= - ~QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else + enable = !card->ipato.enabled; + } else if (kstrtobool(buf, &enable)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.enabled == enable) + goto out; + card->ipato.enabled = enable; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (!enable) + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + else if (addr->type == QETH_IP_TYPE_NORMAL && + qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + } out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "RDS: Check cmsg_len before dereferencing CMSG_DATA" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled RDS: Check cmsg_len before dereferencing CMSG_DATA to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: rds-check-cmsg_len-before-dereferencing-cmsg_data.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Avinash Repaka <avinash.repaka(a)oracle.com> Date: Thu, 21 Dec 2017 20:17:04 -0800 Subject: RDS: Check cmsg_len before dereferencing CMSG_DATA From: Avinash Repaka <avinash.repaka(a)oracle.com> [ Upstream commit 14e138a86f6347c6199f610576d2e11c03bec5f0 ] RDS currently doesn't check if the length of the control message is large enough to hold the required data, before dereferencing the control message data. This results in following crash: BUG: KASAN: stack-out-of-bounds in rds_rdma_bytes net/rds/send.c:1013 [inline] BUG: KASAN: stack-out-of-bounds in rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 Read of size 8 at addr ffff8801c928fb70 by task syzkaller455006/3157 CPU: 0 PID: 3157 Comm: syzkaller455006 Not tainted 4.15.0-rc3+ #161 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 rds_rdma_bytes net/rds/send.c:1013 [inline] rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 ___sys_sendmsg+0x320/0x8b0 net/socket.c:2018 __sys_sendmmsg+0x1ee/0x620 net/socket.c:2108 SYSC_sendmmsg net/socket.c:2139 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2134 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x43fe49 RSP: 002b:00007fffbe244ad8 EFLAGS: 00000217 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fe49 RDX: 0000000000000001 RSI: 000000002020c000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004017b0 R13: 0000000000401840 R14: 0000000000000000 R15: 0000000000000000 To fix this, we verify that the cmsg_len is large enough to hold the data to be read, before proceeding further. Reported-by: syzbot <syzkaller-bugs(a)googlegroups.com> Signed-off-by: Avinash Repaka <avinash.repaka(a)oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar(a)oracle.com> Reviewed-by: Yuval Shaia <yuval.shaia(a)oracle.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/rds/send.c | 3 +++ 1 file changed, 3 insertions(+) --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1006,6 +1006,9 @@ static int rds_rdma_bytes(struct msghdr continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { + if (cmsg->cmsg_len < + CMSG_LEN(sizeof(struct rds_rdma_args))) + return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } Patches currently in stable-queue which might be from avinash.repaka(a)oracle.com are queue-4.9/rds-check-cmsg_len-before-dereferencing-cmsg_data.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "ptr_ring: add barriers" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled ptr_ring: add barriers to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: ptr_ring-add-barriers.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: "Michael S. Tsirkin" <mst(a)redhat.com> Date: Tue, 5 Dec 2017 21:29:37 +0200 Subject: ptr_ring: add barriers From: "Michael S. Tsirkin" <mst(a)redhat.com> [ Upstream commit a8ceb5dbfde1092b466936bca0ff3be127ecf38e ] Users of ptr_ring expect that it's safe to give the data structure a pointer and have it be available to consumers, but that actually requires an smb_wmb or a stronger barrier. In absence of such barriers and on architectures that reorder writes, consumer might read an un=initialized value from an skb pointer stored in the skb array. This was observed causing crashes. To fix, add memory barriers. The barrier we use is a wmb, the assumption being that producers do not need to read the value so we do not need to order these reads. Reported-by: George Cherian <george.cherian(a)cavium.com> Suggested-by: Jason Wang <jasowang(a)redhat.com> Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com> Acked-by: Jason Wang <jasowang(a)redhat.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- include/linux/ptr_ring.h | 9 +++++++++ 1 file changed, 9 insertions(+) --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -99,12 +99,18 @@ static inline bool ptr_ring_full_bh(stru /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. + * Callers are responsible for making sure pointer that is being queued + * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; + /* Make sure the pointer we are storing points to a valid data. */ + /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + smp_wmb(); + r->queue[r->producer++] = ptr; if (unlikely(r->producer >= r->size)) r->producer = 0; @@ -244,6 +250,9 @@ static inline void *__ptr_ring_consume(s if (ptr) __ptr_ring_discard_one(r); + /* Make sure anyone accessing data through the pointer is up to date. */ + /* Pairs with smp_wmb in __ptr_ring_produce. */ + smp_read_barrier_depends(); return ptr; } Patches currently in stable-queue which might be from mst(a)redhat.com are queue-4.9/ptr_ring-add-barriers.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "netlink: Add netns check on taps" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled netlink: Add netns check on taps to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: netlink-add-netns-check-on-taps.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Kevin Cernekee <cernekee(a)chromium.org> Date: Wed, 6 Dec 2017 12:12:27 -0800 Subject: netlink: Add netns check on taps From: Kevin Cernekee <cernekee(a)chromium.org> [ Upstream commit 93c647643b48f0131f02e45da3bd367d80443291 ] Currently, a nlmon link inside a child namespace can observe systemwide netlink activity. Filter the traffic so that nlmon can only sniff netlink messages from its own netns. Test case: vpnns -- bash -c "ip link add nlmon0 type nlmon; \ ip link set nlmon0 up; \ tcpdump -i nlmon0 -q -w /tmp/nlmon.pcap -U" & sudo ip xfrm state add src 10.1.1.1 dst 10.1.1.2 proto esp \ spi 0x1 mode transport \ auth sha1 0x6162633132330000000000000000000000000000 \ enc aes 0x00000000000000000000000000000000 grep --binary abc123 /tmp/nlmon.pcap Signed-off-by: Kevin Cernekee <cernekee(a)chromium.org> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/netlink/af_netlink.c | 3 +++ 1 file changed, 3 insertions(+) --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -261,6 +261,9 @@ static int __netlink_deliver_tap_skb(str struct sock *sk = skb->sk; int ret = -ENOMEM; + if (!net_eq(dev_net(dev), sock_net(sk))) + return 0; + dev_hold(dev); if (is_vmalloc_addr(skb->head)) Patches currently in stable-queue which might be from cernekee(a)chromium.org are queue-4.9/net-igmp-use-correct-source-address-on-igmpv3-reports.patch queue-4.9/netlink-add-netns-check-on-taps.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: reevalulate autoflowlabel setting after sysctl setting" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: reevalulate autoflowlabel setting after sysctl setting to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Shaohua Li <shli(a)fb.com> Date: Wed, 20 Dec 2017 12:10:21 -0800 Subject: net: reevalulate autoflowlabel setting after sysctl setting From: Shaohua Li <shli(a)fb.com> [ Upstream commit 513674b5a2c9c7a67501506419da5c3c77ac6f08 ] sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. If sockopt doesn't set autoflowlabel, outcome packets from the hosts are supposed to not include flowlabel. This is true for normal packet, but not for reset packet. The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed, so the sock will keep the old behavior in terms of auto flowlabel. Reset packet is suffering from this problem, because reset packet is sent from a special control socket, which is created at boot time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control socket will always have its ipv6_pinfo.autoflowlabel set, even after user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have flowlabel. Normal sock created before sysctl setting suffers from the same issue. We can't even turn off autoflowlabel unless we kill all socks in the hosts. To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the autoflowlabel setting from user, otherwise we always call ip6_default_np_autolabel() which has the new settings of sysctl. Note, this changes behavior a little bit. Before commit 42240901f7c4 (ipv6: Implement different admin modes for automatic flow labels), the autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, existing connection will change autoflowlabel behavior. After that commit, autoflowlabel behavior is sticky in the whole life of the sock. With this patch, the behavior isn't sticky again. Cc: Martin KaFai Lau <kafai(a)fb.com> Cc: Eric Dumazet <eric.dumazet(a)gmail.com> Cc: Tom Herbert <tom(a)quantonium.net> Signed-off-by: Shaohua Li <shli(a)fb.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- include/linux/ipv6.h | 3 ++- net/ipv6/af_inet6.c | 1 - net/ipv6/ip6_output.c | 12 ++++++++++-- net/ipv6/ipv6_sockglue.c | 1 + 4 files changed, 13 insertions(+), 4 deletions(-) --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -246,7 +246,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -209,7 +209,6 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -156,6 +156,14 @@ int ip6_output(struct net *net, struct s !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +{ + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); + else + return np->autoflowlabel; +} + /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified @@ -219,7 +227,7 @@ int ip6_xmit(const struct sock *sk, stru hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1691,7 +1699,7 @@ struct sk_buff *__ip6_make_skb(struct so ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -874,6 +874,7 @@ pref_skip_coa: break; case IPV6_AUTOFLOWLABEL: np->autoflowlabel = valbool; + np->autoflowlabel_set = 1; retv = 0; break; } Patches currently in stable-queue which might be from shli(a)fb.com are queue-4.9/net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: qmi_wwan: add Sierra EM7565 1199:9091" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: qmi_wwan: add Sierra EM7565 1199:9091 to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-qmi_wwan-add-sierra-em7565-1199-9091.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Sebastian Sjoholm <ssjoholm(a)mac.com> Date: Mon, 11 Dec 2017 21:51:14 +0100 Subject: net: qmi_wwan: add Sierra EM7565 1199:9091 From: Sebastian Sjoholm <ssjoholm(a)mac.com> [ Upstream commit aceef61ee56898cfa7b6960fb60b9326c3860441 ] Sierra Wireless EM7565 is an Qualcomm MDM9x50 based M.2 modem. The USB id is added to qmi_wwan.c to allow QMI communication with the EM7565. Signed-off-by: Sebastian Sjoholm <ssjoholm(a)mac.com> Acked-by: Bjørn Mork <bjorn(a)mork.no> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -907,6 +907,7 @@ static const struct usb_device_id produc {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ Patches currently in stable-queue which might be from ssjoholm(a)mac.com are queue-4.9/net-qmi_wwan-add-sierra-em7565-1199-9091.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Grygorii Strashko <grygorii.strashko(a)ti.com> Date: Wed, 20 Dec 2017 18:45:10 -0600 Subject: net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround From: Grygorii Strashko <grygorii.strashko(a)ti.com> [ Upstream commit c1a8d0a3accf64a014d605e6806ce05d1c17adf1 ] Under some circumstances driver will perform PHY reset in ksz9031_read_status() to fix autoneg failure case (idle error count = 0xFF). When this happens ksz9031 will not detect link status change any more when connecting to Netgear 1G switch (link can be recovered sometimes by restarting netdevice "ifconfig down up"). Reproduced with TI am572x board equipped with ksz9031 PHY while connecting to Netgear 1G switch. Fix the issue by reconfiguring autonegotiation after PHY reset in ksz9031_read_status(). Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg") Signed-off-by: Grygorii Strashko <grygorii.strashko(a)ti.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -624,6 +624,7 @@ static int ksz9031_read_status(struct ph phydev->link = 0; if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); } return 0; Patches currently in stable-queue which might be from grygorii.strashko(a)ti.com are queue-4.9/net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Tobias Jordan <Tobias.Jordan(a)elektrobit.com> Date: Wed, 6 Dec 2017 15:23:23 +0100 Subject: net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case From: Tobias Jordan <Tobias.Jordan(a)elektrobit.com> [ Upstream commit 589bf32f09852041fbd3b7ce1a9e703f95c230ba ] add appropriate calls to clk_disable_unprepare() by jumping to out_mdio in case orion_mdio_probe() returns -EPROBE_DEFER. Found by Linux Driver Verification project (linuxtesting.org). Fixes: 3d604da1e954 ("net: mvmdio: get and enable optional clock") Signed-off-by: Tobias Jordan <Tobias.Jordan(a)elektrobit.com> Reviewed-by: Andrew Lunn <andrew(a)lunn.ch> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/marvell/mvmdio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -232,7 +232,8 @@ static int orion_mdio_probe(struct platf dev->regs + MVMDIO_ERR_INT_MASK); } else if (dev->err_interrupt == -EPROBE_DEFER) { - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out_mdio; } mutex_init(&dev->lock); Patches currently in stable-queue which might be from Tobias.Jordan(a)elektrobit.com are queue-4.9/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net/mlx5e: Prevent possible races in VXLAN control flow" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net/mlx5e: Prevent possible races in VXLAN control flow to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Gal Pressman <galp(a)mellanox.com> Date: Mon, 4 Dec 2017 09:57:43 +0200 Subject: net/mlx5e: Prevent possible races in VXLAN control flow From: Gal Pressman <galp(a)mellanox.com> [ Upstream commit 0c1cc8b2215f5122ca614b5adca60346018758c3 ] When calling add/remove VXLAN port, a lock must be held in order to prevent race scenarios when more than one add/remove happens at the same time. Fix by holding our state_lock (mutex) as done by all other parts of the driver. Note that the spinlock protecting the radix-tree is still needed in order to synchronize radix-tree access from softirq context. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman <galp(a)mellanox.com> Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++++ 1 file changed, 4 insertions(+) --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -88,6 +88,7 @@ static void mlx5e_vxlan_add_port(struct struct mlx5e_vxlan *vxlan; int err; + mutex_lock(&priv->state_lock); vxlan = mlx5e_vxlan_lookup_port(priv, port); if (vxlan) { atomic_inc(&vxlan->refcount); @@ -117,6 +118,7 @@ err_free: err_delete_port: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); free_work: + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } @@ -130,6 +132,7 @@ static void mlx5e_vxlan_del_port(struct struct mlx5e_vxlan *vxlan; bool remove = false; + mutex_lock(&priv->state_lock); spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) @@ -147,6 +150,7 @@ out_unlock: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); kfree(vxlan); } + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } Patches currently in stable-queue which might be from galp(a)mellanox.com are queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net/mlx5e: Fix possible deadlock of VXLAN lock" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net/mlx5e: Fix possible deadlock of VXLAN lock to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Gal Pressman <galp(a)mellanox.com> Date: Thu, 23 Nov 2017 13:52:28 +0200 Subject: net/mlx5e: Fix possible deadlock of VXLAN lock From: Gal Pressman <galp(a)mellanox.com> [ Upstream commit 6323514116404cc651df1b7fffa1311ddf8ce647 ] mlx5e_vxlan_lookup_port is called both from mlx5e_add_vxlan_port (user context) and mlx5e_features_check (softirq), but the lock acquired does not disable bottom half and might result in deadlock. Fix it by simply replacing spin_lock() with spin_lock_bh(). While at it, replace all unnecessary spin_lock_irq() to spin_lock_bh(). lockdep's WARNING: inconsistent lock state [ 654.028136] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 654.028229] swapper/5/0 [HC0[0]:SC1[9]:HE1:SE0] takes: [ 654.028321] (&(&vxlan_db->lock)->rlock){+.?.}, at: [<ffffffffa06e7f0e>] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core] [ 654.028528] {SOFTIRQ-ON-W} state was registered at: [ 654.028607] _raw_spin_lock+0x3c/0x70 [ 654.028689] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core] [ 654.028794] mlx5e_vxlan_add_port+0x2e/0x120 [mlx5_core] [ 654.028878] process_one_work+0x1e9/0x640 [ 654.028942] worker_thread+0x4a/0x3f0 [ 654.029002] kthread+0x141/0x180 [ 654.029056] ret_from_fork+0x24/0x30 [ 654.029114] irq event stamp: 579088 [ 654.029174] hardirqs last enabled at (579088): [<ffffffff818f475a>] ip6_finish_output2+0x49a/0x8c0 [ 654.029309] hardirqs last disabled at (579087): [<ffffffff818f470e>] ip6_finish_output2+0x44e/0x8c0 [ 654.029446] softirqs last enabled at (579030): [<ffffffff810b3b3d>] irq_enter+0x6d/0x80 [ 654.029567] softirqs last disabled at (579031): [<ffffffff810b3c05>] irq_exit+0xb5/0xc0 [ 654.029684] other info that might help us debug this: [ 654.029781] Possible unsafe locking scenario: [ 654.029868] CPU0 [ 654.029908] ---- [ 654.029947] lock(&(&vxlan_db->lock)->rlock); [ 654.030045] <Interrupt> [ 654.030090] lock(&(&vxlan_db->lock)->rlock); [ 654.030162] *** DEADLOCK *** Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman <galp(a)mellanox.com> Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_p struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); - spin_unlock(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); return vxlan; } @@ -100,9 +100,9 @@ static void mlx5e_vxlan_add_port(struct vxlan->udp_port = port; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (err) goto err_free; @@ -121,9 +121,9 @@ static void __mlx5e_vxlan_core_del_port( struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (!vxlan) return; @@ -171,12 +171,12 @@ void mlx5e_vxlan_cleanup(struct mlx5e_pr struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); } - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); } Patches currently in stable-queue which might be from galp(a)mellanox.com are queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net/mlx5e: Fix features check of IPv6 traffic" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net/mlx5e: Fix features check of IPv6 traffic to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mlx5e-fix-features-check-of-ipv6-traffic.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Gal Pressman <galp(a)mellanox.com> Date: Tue, 21 Nov 2017 17:49:36 +0200 Subject: net/mlx5e: Fix features check of IPv6 traffic From: Gal Pressman <galp(a)mellanox.com> [ Upstream commit 2989ad1ec03021ee6d2193c35414f1d970a243de ] The assumption that the next header field contains the transport protocol is wrong for IPv6 packets with extension headers. Instead, we should look the inner-most next header field in the buffer. This will fix TSO offload for tunnels over IPv6 with extension headers. Performance testing: 19.25x improvement, cool! Measuring bandwidth of 16 threads TCP traffic over IPv6 GRE tap. CPU: Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz NIC: Mellanox Technologies MT28800 Family [ConnectX-5 Ex] TSO: Enabled Before: 4,926.24 Mbps Now : 94,827.91 Mbps Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman <galp(a)mellanox.com> Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3038,6 +3038,7 @@ static netdev_features_t mlx5e_vxlan_fea struct sk_buff *skb, netdev_features_t features) { + unsigned int offset = 0; struct udphdr *udph; u16 proto; u16 port = 0; @@ -3047,7 +3048,7 @@ static netdev_features_t mlx5e_vxlan_fea proto = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): - proto = ipv6_hdr(skb)->nexthdr; + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); break; default: goto out; Patches currently in stable-queue which might be from galp(a)mellanox.com are queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net/mlx5e: Add refcount to VXLAN structure" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net/mlx5e: Add refcount to VXLAN structure to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mlx5e-add-refcount-to-vxlan-structure.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Gal Pressman <galp(a)mellanox.com> Date: Sun, 3 Dec 2017 13:58:50 +0200 Subject: net/mlx5e: Add refcount to VXLAN structure From: Gal Pressman <galp(a)mellanox.com> [ Upstream commit 23f4cc2cd9ed92570647220aca60d0197d8c1fa9 ] A refcount mechanism must be implemented in order to prevent unwanted scenarios such as: - Open an IPv4 VXLAN interface - Open an IPv6 VXLAN interface (different socket) - Remove one of the interfaces With current implementation, the UDP port will be removed from our VXLAN database and turn off the offloads for the other interface, which is still active. The reference count mechanism will only allow UDP port removals once all consumers are gone. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman <galp(a)mellanox.com> Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 50 ++++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/vxlan.h | 1 2 files changed, 28 insertions(+), 23 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -88,8 +88,11 @@ static void mlx5e_vxlan_add_port(struct struct mlx5e_vxlan *vxlan; int err; - if (mlx5e_vxlan_lookup_port(priv, port)) + vxlan = mlx5e_vxlan_lookup_port(priv, port); + if (vxlan) { + atomic_inc(&vxlan->refcount); goto free_work; + } if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; @@ -99,6 +102,7 @@ static void mlx5e_vxlan_add_port(struct goto err_delete_port; vxlan->udp_port = port; + atomic_set(&vxlan->refcount, 1); spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); @@ -116,32 +120,33 @@ free_work: kfree(vxlan_work); } -static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) +static void mlx5e_vxlan_del_port(struct work_struct *work) { + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + u16 port = vxlan_work->port; struct mlx5e_vxlan *vxlan; + bool remove = false; spin_lock_bh(&vxlan_db->lock); - vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_bh(&vxlan_db->lock); - + vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) - return; - - mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port); - - kfree(vxlan); -} + goto out_unlock; -static void mlx5e_vxlan_del_port(struct work_struct *work) -{ - struct mlx5e_vxlan_work *vxlan_work = - container_of(work, struct mlx5e_vxlan_work, work); - struct mlx5e_priv *priv = vxlan_work->priv; - u16 port = vxlan_work->port; + if (atomic_dec_and_test(&vxlan->refcount)) { + radix_tree_delete(&vxlan_db->tree, port); + remove = true; + } - __mlx5e_vxlan_core_del_port(priv, port); +out_unlock: + spin_unlock_bh(&vxlan_db->lock); + if (remove) { + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); + } kfree(vxlan_work); } @@ -171,12 +176,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_pr struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_bh(&vxlan_db->lock); + /* Lockless since we are the only radix-tree consumers, wq is disabled */ while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_bh(&vxlan_db->lock); - __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_bh(&vxlan_db->lock); + radix_tree_delete(&vxlan_db->tree, port); + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); } - spin_unlock_bh(&vxlan_db->lock); } --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -36,6 +36,7 @@ #include "en.h" struct mlx5e_vxlan { + atomic_t refcount; u16 udp_port; }; Patches currently in stable-queue which might be from galp(a)mellanox.com are queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net/mlx5: Fix rate limit packet pacing naming and struct" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net/mlx5: Fix rate limit packet pacing naming and struct to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mlx5-fix-rate-limit-packet-pacing-naming-and-struct.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Eran Ben Elisha <eranbe(a)mellanox.com> Date: Mon, 13 Nov 2017 10:11:27 +0200 Subject: net/mlx5: Fix rate limit packet pacing naming and struct From: Eran Ben Elisha <eranbe(a)mellanox.com> [ Upstream commit 37e92a9d4fe38dc3e7308913575983a6a088c8d4 ] In mlx5_ifc, struct size was not complete, and thus driver was sending garbage after the last defined field. Fixed it by adding reserved field to complete the struct size. In addition, rename all set_rate_limit to set_pp_rate_limit to be compliant with the Firmware <-> Driver definition. Fixes: 7486216b3a0b ("{net,IB}/mlx5: mlx5_ifc updates") Fixes: 1466cc5b23d1 ("net/mlx5: Rate limit tables support") Signed-off-by: Eran Ben Elisha <eranbe(a)mellanox.com> Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 22 +++++++++++----------- include/linux/mlx5/mlx5_ifc.h | 8 +++++--- 3 files changed, 18 insertions(+), 16 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -367,7 +367,7 @@ static int mlx5_internal_err_ret_value(s case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: - case MLX5_CMD_OP_SET_RATE_LIMIT: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: @@ -502,7 +502,7 @@ const char *mlx5_command_str(int command MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); - MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -60,16 +60,16 @@ static struct mlx5_rl_entry *find_rl_ent return ret_entry; } -static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; - MLX5_SET(set_rate_limit_in, in, opcode, - MLX5_CMD_OP_SET_RATE_LIMIT); - MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -108,7 +108,7 @@ int mlx5_rl_add_rate(struct mlx5_core_de entry->refcount++; } else { /* new rate limit */ - err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index); if (err) { mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", rate, err); @@ -144,7 +144,7 @@ void mlx5_rl_remove_rate(struct mlx5_cor entry->refcount--; if (!entry->refcount) { /* need to remove rate */ - mlx5_set_rate_limit_cmd(dev, 0, entry->index); + mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index); entry->rate = 0; } @@ -197,8 +197,8 @@ void mlx5_cleanup_rl_table(struct mlx5_c /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) if (table->rl_entry[i].rate) - mlx5_set_rate_limit_cmd(dev, 0, - table->rl_entry[i].index); + mlx5_set_pp_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); kfree(dev->priv.rl_table.rl_entry); } --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -143,7 +143,7 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, - MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, @@ -6689,7 +6689,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_b u8 vxlan_udp_port[0x10]; }; -struct mlx5_ifc_set_rate_limit_out_bits { +struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6698,7 +6698,7 @@ struct mlx5_ifc_set_rate_limit_out_bits u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_set_rate_limit_in_bits { +struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -6711,6 +6711,8 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; + + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_access_register_out_bits { Patches currently in stable-queue which might be from eranbe(a)mellanox.com are queue-4.9/net-mlx5-fix-rate-limit-packet-pacing-naming-and-struct.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net/mlx5: Fix error flow in CREATE_QP command" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net/mlx5: Fix error flow in CREATE_QP command to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-mlx5-fix-error-flow-in-create_qp-command.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Moni Shoua <monis(a)mellanox.com> Date: Mon, 4 Dec 2017 08:59:25 +0200 Subject: net/mlx5: Fix error flow in CREATE_QP command From: Moni Shoua <monis(a)mellanox.com> [ Upstream commit dbff26e44dc3ec4de6578733b054a0114652a764 ] In error flow, when DESTROY_QP command should be executed, the wrong mailbox was set with data, not the one that is written to hardware, Fix that. Fixes: 09a7d9eca1a6 '{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc' Signed-off-by: Moni Shoua <monis(a)mellanox.com> Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -303,8 +303,8 @@ int mlx5_core_create_qp(struct mlx5_core err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); - MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); - MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } Patches currently in stable-queue which might be from monis(a)mellanox.com are queue-4.9/net-mlx5-fix-error-flow-in-create_qp-command.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: ipv4: fix for a race condition in raw_sendmsg" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: ipv4: fix for a race condition in raw_sendmsg to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Mohamed Ghannam <simo.ghannam(a)gmail.com> Date: Sun, 10 Dec 2017 03:50:58 +0000 Subject: net: ipv4: fix for a race condition in raw_sendmsg From: Mohamed Ghannam <simo.ghannam(a)gmail.com> [ Upstream commit 8f659a03a0ba9289b9aeb9b4470e6fb263d6f483 ] inet->hdrincl is racy, and could lead to uninitialized stack pointer usage, so its value should be read only once. Fixes: c008ba5bdc9f ("ipv4: Avoid reading user iov twice after raw_probe_proto_opt") Signed-off-by: Mohamed Ghannam <simo.ghannam(a)gmail.com> Reviewed-by: Eric Dumazet <edumazet(a)google.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/ipv4/raw.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -502,11 +502,16 @@ static int raw_sendmsg(struct sock *sk, int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; + int hdrincl; err = -EMSGSIZE; if (len > 0xFFFF) goto out; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields + */ + hdrincl = inet->hdrincl; /* * Check the flags. */ @@ -582,7 +587,7 @@ static int raw_sendmsg(struct sock *sk, /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ - if (inet->hdrincl) + if (hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) @@ -604,12 +609,12 @@ static int raw_sendmsg(struct sock *sk, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | - (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), + (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!inet->hdrincl) { + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; @@ -634,7 +639,7 @@ static int raw_sendmsg(struct sock *sk, goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); Patches currently in stable-queue which might be from simo.ghannam(a)gmail.com are queue-4.9/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: igmp: Use correct source address on IGMPv3 reports" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: igmp: Use correct source address on IGMPv3 reports to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-igmp-use-correct-source-address-on-igmpv3-reports.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Kevin Cernekee <cernekee(a)chromium.org> Date: Mon, 11 Dec 2017 11:13:45 -0800 Subject: net: igmp: Use correct source address on IGMPv3 reports From: Kevin Cernekee <cernekee(a)chromium.org> [ Upstream commit a46182b00290839fa3fa159d54fd3237bd8669f0 ] Closing a multicast socket after the final IPv4 address is deleted from an interface can generate a membership report that uses the source IP from a different interface. The following test script, run from an isolated netns, reproduces the issue: #!/bin/bash ip link add dummy0 type dummy ip link add dummy1 type dummy ip link set dummy0 up ip link set dummy1 up ip addr add 10.1.1.1/24 dev dummy0 ip addr add 192.168.99.99/24 dev dummy1 tcpdump -U -i dummy0 & socat EXEC:"sleep 2" \ UDP4-DATAGRAM:239.101.1.68:8889,ip-add-membership=239.0.1.68:10.1.1.1 & sleep 1 ip addr del 10.1.1.1/24 dev dummy0 sleep 5 kill %tcpdump RFC 3376 specifies that the report must be sent with a valid IP source address from the destination subnet, or from address 0.0.0.0. Add an extra check to make sure this is the case. Signed-off-by: Kevin Cernekee <cernekee(a)chromium.org> Reviewed-by: Andrew Lunn <andrew(a)lunn.ch> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/ipv4/igmp.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -89,6 +89,7 @@ #include <linux/rtnetlink.h> #include <linux/times.h> #include <linux/pkt_sched.h> +#include <linux/byteorder/generic.h> #include <net/net_namespace.h> #include <net/arp.h> @@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int return scount; } +/* source address selection per RFC 3376 section 4.2.13 */ +static __be32 igmpv3_get_srcaddr(struct net_device *dev, + const struct flowi4 *fl4) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return htonl(INADDR_ANY); + + for_ifa(in_dev) { + if (inet_ifa_match(fl4->saddr, ifa)) + return fl4->saddr; + } endfor_ifa(in_dev); + + return htonl(INADDR_ANY); +} + static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; @@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(st pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; - pip->saddr = fl4.saddr; + pip->saddr = igmpv3_get_srcaddr(dev, &fl4); pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); Patches currently in stable-queue which might be from cernekee(a)chromium.org are queue-4.9/net-igmp-use-correct-source-address-on-igmpv3-reports.patch queue-4.9/netlink-add-netns-check-on-taps.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: Fix double free and memory corruption in get_net_ns_by_id()" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: Fix double free and memory corruption in get_net_ns_by_id() to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: "Eric W. Biederman" <ebiederm(a)xmission.com> Date: Tue, 19 Dec 2017 11:27:56 -0600 Subject: net: Fix double free and memory corruption in get_net_ns_by_id() From: "Eric W. Biederman" <ebiederm(a)xmission.com> [ Upstream commit 21b5944350052d2583e82dd59b19a9ba94a007f0 ] (I can trivially verify that that idr_remove in cleanup_net happens after the network namespace count has dropped to zero --EWB) Function get_net_ns_by_id() does not check for net::count after it has found a peer in netns_ids idr. It may dereference a peer, after its count has already been finaly decremented. This leads to double free and memory corruption: put_net(peer) rtnl_lock() atomic_dec_and_test(&peer->count) [count=0] ... __put_net(peer) get_net_ns_by_id(net, id) spin_lock(&cleanup_list_lock) list_add(&net->cleanup_list, &cleanup_list) spin_unlock(&cleanup_list_lock) queue_work() peer = idr_find(&net->netns_ids, id) | get_net(peer) [count=1] | ... | (use after final put) v ... cleanup_net() ... spin_lock(&cleanup_list_lock) ... list_replace_init(&cleanup_list, ..) ... spin_unlock(&cleanup_list_lock) ... ... ... ... put_net(peer) ... atomic_dec_and_test(&peer->count) [count=0] ... spin_lock(&cleanup_list_lock) ... list_add(&net->cleanup_list, &cleanup_list) ... spin_unlock(&cleanup_list_lock) ... queue_work() ... rtnl_unlock() rtnl_lock() ... for_each_net(tmp) { ... id = __peernet2id(tmp, peer) ... spin_lock_irq(&tmp->nsid_lock) ... idr_remove(&tmp->netns_ids, id) ... ... ... net_drop_ns() ... net_free(peer) ... } ... | v cleanup_net() ... (Second free of peer) Also, put_net() on the right cpu may reorder with left's cpu list_replace_init(&cleanup_list, ..), and then cleanup_list will be corrupted. Since cleanup_net() is executed in worker thread, while put_net(peer) can happen everywhere, there should be enough time for concurrent get_net_ns_by_id() to pick the peer up, and the race does not seem to be unlikely. The patch fixes the problem in standard way. (Also, there is possible problem in peernet2id_alloc(), which requires check for net::count under nsid_lock and maybe_get_net(peer), but in current stable kernel it's used under rtnl_lock() and it has to be safe. Openswitch begun to use peernet2id_alloc(), and possibly it should be fixed too. While this is not in stable kernel yet, so I'll send a separate message to netdev@ later). Cc: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Signed-off-by: Kirill Tkhai <ktkhai(a)virtuozzo.com> Fixes: 0c7aecd4bde4 "netns: add rtnl cmd to add and get peer netns ids" Reviewed-by: Andrey Ryabinin <aryabinin(a)virtuozzo.com> Reviewed-by: "Eric W. Biederman" <ebiederm(a)xmission.com> Signed-off-by: Eric W. Biederman <ebiederm(a)xmission.com> Reviewed-by: Eric Dumazet <edumazet(a)google.com> Acked-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -263,7 +263,7 @@ struct net *get_net_ns_by_id(struct net spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) - get_net(peer); + peer = maybe_get_net(peer); spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); Patches currently in stable-queue which might be from ebiederm(a)xmission.com are queue-4.9/net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: fec: unmap the xmit buffer that are not transferred by DMA" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: fec: unmap the xmit buffer that are not transferred by DMA to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-fec-unmap-the-xmit-buffer-that-are-not-transferred-by-dma.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Fugang Duan <fugang.duan(a)nxp.com> Date: Fri, 22 Dec 2017 17:12:09 +0800 Subject: net: fec: unmap the xmit buffer that are not transferred by DMA From: Fugang Duan <fugang.duan(a)nxp.com> [ Upstream commit 178e5f57a8d8f8fc5799a624b96fc31ef9a29ffa ] The enet IP only support 32 bit, it will use swiotlb buffer to do dma mapping when xmit buffer DMA memory address is bigger than 4G in i.MX platform. After stress suspend/resume test, it will print out: log: [12826.352864] fec 5b040000.ethernet: swiotlb buffer is full (sz: 191 bytes) [12826.359676] DMA: Out of SW-IOMMU space for 191 bytes at device 5b040000.ethernet [12826.367110] fec 5b040000.ethernet eth0: Tx DMA memory map failed The issue is that the ready xmit buffers that are dma mapped but DMA still don't copy them into fifo, once MAC restart, these DMA buffers are not unmapped. So it should check the dma mapping buffer and unmap them. Signed-off-by: Fugang Duan <fugang.duan(a)nxp.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/freescale/fec_main.c | 6 ++++++ 1 file changed, 6 insertions(+) --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -813,6 +813,12 @@ static void fec_enet_bd_init(struct net_ for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); if (txq->tx_skbuff[i]) { dev_kfree_skb_any(txq->tx_skbuff[i]); txq->tx_skbuff[i] = NULL; Patches currently in stable-queue which might be from fugang.duan(a)nxp.com are queue-4.9/net-fec-unmap-the-xmit-buffer-that-are-not-transferred-by-dma.patch

7 years, 10 months

1
0
0 0

[Linux-stable-mirror] Patch "net: fec: Allow reception of frames bigger than 1522 bytes" has been added to the 4.9-stable tree

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled net: fec: Allow reception of frames bigger than 1522 bytes to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Sun Dec 31 11:13:15 CET 2017 From: Andrew Lunn <andrew(a)lunn.ch> Date: Sun, 30 Jul 2017 19:36:05 +0200 Subject: net: fec: Allow reception of frames bigger than 1522 bytes From: Andrew Lunn <andrew(a)lunn.ch> [ Upstream commit fbbeefdd21049fcf9437c809da3828b210577f36 ] The FEC Receive Control Register has a 14 bit field indicating the longest frame that may be received. It is being set to 1522. Frames longer than this are discarded, but counted as being in error. When using DSA, frames from the switch has an additional header, either 4 or 8 bytes if a Marvell switch is used. Thus a full MTU frame of 1522 bytes received by the switch on a port becomes 1530 bytes when passed to the host via the FEC interface. Change the maximum receive size to 2048 - 64, where 64 is the maximum rx_alignment applied on the receive buffer for AVB capable FEC cores. Use this value also for the maximum receive buffer size. The driver is already allocating a receive SKB of 2048 bytes, so this change should not have any significant effects. Tested on imx51, imx6, vf610. Signed-off-by: Andrew Lunn <andrew(a)lunn.ch> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/net/ethernet/freescale/fec_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -172,10 +172,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet #endif /* CONFIG_M5272 */ /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets. + * + * 2048 byte skbufs are allocated. However, alignment requirements + * varies between FEC variants. Worst case is 64, so round down by 64. */ -#define PKT_MAXBUF_SIZE 1522 +#define PKT_MAXBUF_SIZE (round_down(2048 - 64, 64)) #define PKT_MINBUF_SIZE 64 -#define PKT_MAXBLR_SIZE 1536 /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) @@ -853,7 +855,7 @@ static void fec_enet_enable_ring(struct for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i)); - writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); + writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ if (i) Patches currently in stable-queue which might be from andrew(a)lunn.ch are queue-4.9/net-igmp-use-correct-source-address-on-igmpv3-reports.patch queue-4.9/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch queue-4.9/net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch

7 years, 10 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror