This is a note to let you know that I've just added the patch titled
net: ipv4: fix for a race condition in raw_sendmsg
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:31:10 CET 2017
From: Mohamed Ghannam <simo.ghannam(a)gmail.com>
Date: Sun, 10 Dec 2017 03:50:58 +0000
Subject: net: ipv4: fix for a race condition in raw_sendmsg
From: Mohamed Ghannam <simo.ghannam(a)gmail.com>
[ Upstream commit 8f659a03a0ba9289b9aeb9b4470e6fb263d6f483 ]
inet->hdrincl is racy, and could lead to uninitialized stack pointer
usage, so its value should be read only once.
Fixes: c008ba5bdc9f ("ipv4: Avoid reading user iov twice after raw_probe_proto_opt")
Signed-off-by: Mohamed Ghannam <simo.ghannam(a)gmail.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/raw.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -483,11 +483,16 @@ static int raw_sendmsg(struct kiocb *ioc
u8 tos;
int err;
struct ip_options_data opt_copy;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields
+ */
+ hdrincl = inet->hdrincl;
/*
* Check the flags.
*/
@@ -560,7 +565,7 @@ static int raw_sendmsg(struct kiocb *ioc
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -582,12 +587,12 @@ static int raw_sendmsg(struct kiocb *ioc
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
- if (!inet->hdrincl) {
+ if (!hdrincl) {
err = raw_probe_proto_opt(&fl4, msg);
if (err)
goto done;
@@ -609,7 +614,7 @@ static int raw_sendmsg(struct kiocb *ioc
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
&rt, msg->msg_flags);
Patches currently in stable-queue which might be from simo.ghannam(a)gmail.com are
queue-3.18/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
This is a note to let you know that I've just added the patch titled
ipv6: mcast: better catch silly mtu values
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipv6-mcast-better-catch-silly-mtu-values.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:31:10 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 11 Dec 2017 07:03:38 -0800
Subject: ipv6: mcast: better catch silly mtu values
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit b9b312a7a451e9c098921856e7cfbc201120e1a7 ]
syzkaller reported crashes in IPv6 stack [1]
Xin Long found that lo MTU was set to silly values.
IPv6 stack reacts to changes to small MTU, by disabling itself under
RTNL.
But there is a window where threads not using RTNL can see a wrong
device mtu. This can lead to surprises, in mld code where it is assumed
the mtu is suitable.
Fix this by reading device mtu once and checking IPv6 minimal MTU.
[1]
skbuff: skb_over_panic: text:0000000010b86b8d len:196 put:20
head:000000003b477e60 data:000000000e85441e tail:0xd4 end:0xc0 dev:lo
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:104!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
(ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc2-mm1+ #39
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
RIP: 0010:skb_panic+0x15c/0x1f0 net/core/skbuff.c:100
RSP: 0018:ffff8801db307508 EFLAGS: 00010286
RAX: 0000000000000082 RBX: ffff8801c517e840 RCX: 0000000000000000
RDX: 0000000000000082 RSI: 1ffff1003b660e61 RDI: ffffed003b660e95
RBP: ffff8801db307570 R08: 1ffff1003b660e23 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff85bd4020
R13: ffffffff84754ed2 R14: 0000000000000014 R15: ffff8801c4e26540
FS: 0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000463610 CR3: 00000001c6698000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<IRQ>
skb_over_panic net/core/skbuff.c:109 [inline]
skb_put+0x181/0x1c0 net/core/skbuff.c:1694
add_grhead.isra.24+0x42/0x3b0 net/ipv6/mcast.c:1695
add_grec+0xa55/0x1060 net/ipv6/mcast.c:1817
mld_send_cr net/ipv6/mcast.c:1903 [inline]
mld_ifc_timer_expire+0x4d2/0x770 net/ipv6/mcast.c:2448
call_timer_fn+0x23b/0x840 kernel/time/timer.c:1320
expire_timers kernel/time/timer.c:1357 [inline]
__run_timers+0x7e1/0xb60 kernel/time/timer.c:1660
run_timer_softirq+0x4c/0xb0 kernel/time/timer.c:1686
__do_softirq+0x29d/0xbb2 kernel/softirq.c:285
invoke_softirq kernel/softirq.c:365 [inline]
irq_exit+0x1d3/0x210 kernel/softirq.c:405
exiting_irq arch/x86/include/asm/apic.h:540 [inline]
smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052
apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:920
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Tested-by: Xin Long <lucien.xin(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv6/mcast.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1670,16 +1670,16 @@ static int grec_size(struct ifmcaddr6 *p
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, struct mld2_grec **ppgr)
+ int type, struct mld2_grec **ppgr, unsigned int mtu)
{
- struct net_device *dev = pmc->idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr;
- if (!skb)
- skb = mld_newpack(pmc->idev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = mld_newpack(pmc->idev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -1702,10 +1702,15 @@ static struct sk_buff *add_grec(struct s
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV6_MIN_MTU)
+ return skb;
+
isquery = type == MLD2_MODE_IS_INCLUDE ||
type == MLD2_MODE_IS_EXCLUDE;
truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1726,7 +1731,7 @@ static struct sk_buff *add_grec(struct s
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
}
}
first = 1;
@@ -1753,12 +1758,12 @@ static struct sk_buff *add_grec(struct s
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -1792,7 +1797,7 @@ empty_source:
mld_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-3.18/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
queue-3.18/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
queue-3.18/ipv4-igmp-guard-against-silly-mtu-values.patch
queue-3.18/ipv6-mcast-better-catch-silly-mtu-values.patch
This is a note to let you know that I've just added the patch titled
ipv4: igmp: guard against silly MTU values
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipv4-igmp-guard-against-silly-mtu-values.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:31:10 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 11 Dec 2017 07:17:39 -0800
Subject: ipv4: igmp: guard against silly MTU values
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit b5476022bbada3764609368f03329ca287528dc8 ]
IPv4 stack reacts to changes to small MTU, by disabling itself under
RTNL.
But there is a window where threads not using RTNL can see a wrong
device mtu. This can lead to surprises, in igmp code where it is
assumed the mtu is suitable.
Fix this by reading device mtu once and checking IPv4 minimal MTU.
This patch adds missing IPV4_MIN_MTU define, to not abuse
ETH_MIN_MTU anymore.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/net/ip.h | 2 ++
net/ipv4/devinet.c | 2 +-
net/ipv4/igmp.c | 24 +++++++++++++++---------
net/ipv4/ip_tunnel.c | 4 ++--
4 files changed, 20 insertions(+), 12 deletions(-)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -33,6 +33,8 @@
#include <net/flow.h>
#include <net/flow_keys.h>
+#define IPV4_MIN_MTU 68 /* RFC 791 */
+
struct sock;
struct inet_skb_parm {
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1328,7 +1328,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -402,16 +402,17 @@ static int grec_size(struct ip_mc_list *
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -433,10 +434,15 @@ static struct sk_buff *add_grec(struct s
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -457,7 +463,7 @@ static struct sk_buff *add_grec(struct s
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -484,12 +490,12 @@ static struct sk_buff *add_grec(struct s
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -523,7 +529,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -395,8 +395,8 @@ static int ip_tunnel_bind_dev(struct net
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-3.18/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
queue-3.18/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
queue-3.18/ipv4-igmp-guard-against-silly-mtu-values.patch
queue-3.18/ipv6-mcast-better-catch-silly-mtu-values.patch
This is a note to let you know that I've just added the patch titled
tg3: Fix rx hang on MTU change with 5717/5719
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Brian King <brking(a)linux.vnet.ibm.com>
Date: Fri, 15 Dec 2017 15:21:50 -0600
Subject: tg3: Fix rx hang on MTU change with 5717/5719
From: Brian King <brking(a)linux.vnet.ibm.com>
[ Upstream commit 748a240c589824e9121befb1cba5341c319885bc ]
This fixes a hang issue seen when changing the MTU size from 1500 MTU
to 9000 MTU on both 5717 and 5719 chips. In discussion with Broadcom,
they've indicated that these chipsets have the same phy as the 57766
chipset, so the same workarounds apply. This has been tested by IBM
on both Power 8 and Power 9 systems as well as by Broadcom on x86
hardware and has been confirmed to resolve the hang issue.
Signed-off-by: Brian King <brking(a)linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/broadcom/tg3.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14226,7 +14226,9 @@ static int tg3_change_mtu(struct net_dev
/* Reset PHY, otherwise the read DMA engine will be in a mode that
* breaks all requests to 256 bytes.
*/
- if (tg3_asic_rev(tp) == ASIC_REV_57766)
+ if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+ tg3_asic_rev(tp) == ASIC_REV_5717 ||
+ tg3_asic_rev(tp) == ASIC_REV_5719)
reset_phy = true;
err = tg3_restart_hw(tp, reset_phy);
Patches currently in stable-queue which might be from brking(a)linux.vnet.ibm.com are
queue-4.9/tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch
This is a note to let you know that I've just added the patch titled
tcp_bbr: record "full bw reached" decision in new full_bw_reached bit
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp_bbr-record-full-bw-reached-decision-in-new-full_bw_reached-bit.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Neal Cardwell <ncardwell(a)google.com>
Date: Thu, 7 Dec 2017 12:43:30 -0500
Subject: tcp_bbr: record "full bw reached" decision in new full_bw_reached bit
From: Neal Cardwell <ncardwell(a)google.com>
[ Upstream commit c589e69b508d29ed8e644dfecda453f71c02ec27 ]
This commit records the "full bw reached" decision in a new
full_bw_reached bit. This is a pure refactor that does not change the
current behavior, but enables subsequent fixes and improvements.
In particular, this enables simple and clean fixes because the full_bw
and full_bw_cnt can be unconditionally zeroed without worrying about
forgetting that we estimated we filled the pipe in Startup. And it
enables future improvements because multiple code paths can be used
for estimating that we filled the pipe in Startup; any new code paths
only need to set this bit when they think the pipe is full.
Note that this fix intentionally reduces the width of the full_bw_cnt
counter, since we have never used the most significant bit.
Signed-off-by: Neal Cardwell <ncardwell(a)google.com>
Reviewed-by: Yuchung Cheng <ycheng(a)google.com>
Acked-by: Soheil Hassas Yeganeh <soheil(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/tcp_bbr.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -81,7 +81,8 @@ struct bbr {
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
- full_bw_cnt:3, /* number of rounds without large bw gains */
+ full_bw_reached:1, /* reached full bw in Startup? */
+ full_bw_cnt:2, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
has_seen_rtt:1, /* have we seen an RTT sample yet? */
unused_b:5;
@@ -151,7 +152,7 @@ static bool bbr_full_bw_reached(const st
{
const struct bbr *bbr = inet_csk_ca(sk);
- return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+ return bbr->full_bw_reached;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -688,6 +689,7 @@ static void bbr_check_full_bw_reached(st
return;
}
++bbr->full_bw_cnt;
+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -821,6 +823,7 @@ static void bbr_init(struct sock *sk)
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
+ bbr->full_bw_reached = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp.v64 = 0;
Patches currently in stable-queue which might be from ncardwell(a)google.com are
queue-4.9/tcp-invalidate-rate-samples-during-sack-reneging.patch
queue-4.9/tcp_bbr-record-full-bw-reached-decision-in-new-full_bw_reached-bit.patch
This is a note to let you know that I've just added the patch titled
tcp md5sig: Use skb's saddr when replying to an incoming segment
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Christoph Paasch <cpaasch(a)apple.com>
Date: Mon, 11 Dec 2017 00:05:46 -0800
Subject: tcp md5sig: Use skb's saddr when replying to an incoming segment
From: Christoph Paasch <cpaasch(a)apple.com>
[ Upstream commit 30791ac41927ebd3e75486f9504b6d2280463bf0 ]
The MD5-key that belongs to a connection is identified by the peer's
IP-address. When we are in tcp_v4(6)_reqsk_send_ack(), we are replying
to an incoming segment from tcp_check_req() that failed the seq-number
checks.
Thus, to find the correct key, we need to use the skb's saddr and not
the daddr.
This bug seems to have been there since quite a while, but probably got
unnoticed because the consequences are not catastrophic. We will call
tcp_v4_reqsk_send_ack only to send a challenge-ACK back to the peer,
thus the connection doesn't really fail.
Fixes: 9501f9722922 ("tcp md5sig: Let the caller pass appropriate key for tcp_v{4,6}_do_calc_md5_hash().")
Signed-off-by: Christoph Paasch <cpaasch(a)apple.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv6/tcp_ipv6.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -828,7 +828,7 @@ static void tcp_v4_reqsk_send_ack(const
tcp_time_stamp,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -962,7 +962,7 @@ static void tcp_v6_reqsk_send_ack(const
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
Patches currently in stable-queue which might be from cpaasch(a)apple.com are
queue-4.9/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
This is a note to let you know that I've just added the patch titled
tcp: invalidate rate samples during SACK reneging
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp-invalidate-rate-samples-during-sack-reneging.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Yousuk Seung <ysseung(a)google.com>
Date: Thu, 7 Dec 2017 13:41:34 -0800
Subject: tcp: invalidate rate samples during SACK reneging
From: Yousuk Seung <ysseung(a)google.com>
[ Upstream commit d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 ]
Mark tcp_sock during a SACK reneging event and invalidate rate samples
while marked. Such rate samples may overestimate bw by including packets
that were SACKed before reneging.
< ack 6001 win 10000 sack 7001:38001
< ack 7001 win 0 sack 8001:38001 // Reneg detected
> seq 7001:8001 // RTO, SACK cleared.
< ack 38001 win 10000
In above example the rate sample taken after the last ack will count
7001-38001 as delivered while the actual delivery rate likely could
be much lower i.e. 7001-8001.
This patch adds a new field tcp_sock.sack_reneg and marks it when we
declare SACK reneging and entering TCP_CA_Loss, and unmarks it after
the last rate sample was taken before moving back to TCP_CA_Open. This
patch also invalidates rate samples taken while tcp_sock.is_sack_reneg
is set.
Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection")
Signed-off-by: Yousuk Seung <ysseung(a)google.com>
Signed-off-by: Neal Cardwell <ncardwell(a)google.com>
Signed-off-by: Yuchung Cheng <ycheng(a)google.com>
Acked-by: Soheil Hassas Yeganeh <soheil(a)google.com>
Acked-by: Eric Dumazet <edumazet(a)google.com>
Acked-by: Priyaranjan Jha <priyarjha(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/linux/tcp.h | 3 ++-
include/net/tcp.h | 2 +-
net/ipv4/tcp.c | 1 +
net/ipv4/tcp_input.c | 10 ++++++++--
net/ipv4/tcp_rate.c | 10 +++++++---
5 files changed, 19 insertions(+), 7 deletions(-)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -219,7 +219,8 @@ struct tcp_sock {
} rack;
u16 advmss; /* Advertised MSS */
u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
- unused:7;
+ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
+ unused:6;
u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */
thin_dupack : 1,/* Fast retransmit on first dupack */
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1001,7 +1001,7 @@ void tcp_rate_skb_sent(struct sock *sk,
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs);
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct skb_mstamp *now, struct rate_sample *rs);
+ bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk);
/* These functions determine how the current flow behaves in respect of SACK
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2297,6 +2297,7 @@ int tcp_disconnect(struct sock *sk, int
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1966,6 +1966,8 @@ void tcp_enter_loss(struct sock *sk)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
tp->fackets_out = 0;
+ /* Mark SACK reneging until we recover from this loss event. */
+ tp->is_sack_reneg = 1;
}
tcp_clear_all_retrans_hints(tp);
@@ -2463,6 +2465,7 @@ static bool tcp_try_undo_recovery(struct
return true;
}
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
return false;
}
@@ -2494,8 +2497,10 @@ static bool tcp_try_undo_loss(struct soc
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
- if (frto_undo || tcp_is_sack(tp))
+ if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
+ }
return true;
}
return false;
@@ -3589,6 +3594,7 @@ static int tcp_ack(struct sock *sk, cons
struct tcp_sacktag_state sack_state;
struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una;
+ bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
@@ -3711,7 +3717,7 @@ static int tcp_ack(struct sock *sk, cons
tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, &now, &rs);
+ tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs);
tcp_cong_control(sk, ack, delivered, flag, &rs);
tcp_xmit_recovery(sk, rexmit);
return 1;
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock
/* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct skb_mstamp *now, struct rate_sample *rs)
+ bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us;
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 d
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
- /* Return an invalid sample if no timing information is available. */
- if (!rs->prior_mstamp.v64) {
+ /* Return an invalid sample if no timing information is available or
+ * in recovery from loss with SACK reneging. Rate samples taken during
+ * a SACK reneging event may overestimate bw by including packets that
+ * were SACKed before the reneg.
+ */
+ if (!rs->prior_mstamp.v64 || is_sack_reneg) {
rs->delivered = -1;
rs->interval_us = -1;
return;
Patches currently in stable-queue which might be from ysseung(a)google.com are
queue-4.9/tcp-invalidate-rate-samples-during-sack-reneging.patch
This is a note to let you know that I've just added the patch titled
sock: free skb in skb_complete_tx_timestamp on error
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Willem de Bruijn <willemb(a)google.com>
Date: Wed, 13 Dec 2017 14:41:06 -0500
Subject: sock: free skb in skb_complete_tx_timestamp on error
From: Willem de Bruijn <willemb(a)google.com>
[ Upstream commit 35b99dffc3f710cafceee6c8c6ac6a98eb2cb4bf ]
skb_complete_tx_timestamp must ingest the skb it is passed. Call
kfree_skb if the skb cannot be enqueued.
Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl")
Fixes: 9ac25fc06375 ("net: fix socket refcounting in skb_complete_tx_timestamp()")
Reported-by: Richard Cochran <richardcochran(a)gmail.com>
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/core/skbuff.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3823,7 +3823,7 @@ void skb_complete_tx_timestamp(struct sk
struct sock *sk = skb->sk;
if (!skb_may_tx_timestamp(sk, false))
- return;
+ goto err;
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
@@ -3832,7 +3832,11 @@ void skb_complete_tx_timestamp(struct sk
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
sock_put(sk);
+ return;
}
+
+err:
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
Patches currently in stable-queue which might be from willemb(a)google.com are
queue-4.9/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch
This is a note to let you know that I've just added the patch titled
sctp: Replace use of sockets_allocated with specified macro.
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sctp-replace-use-of-sockets_allocated-with-specified-macro.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Tonghao Zhang <xiangxia.m.yue(a)gmail.com>
Date: Fri, 22 Dec 2017 10:15:20 -0800
Subject: sctp: Replace use of sockets_allocated with specified macro.
From: Tonghao Zhang <xiangxia.m.yue(a)gmail.com>
[ Upstream commit 8cb38a602478e9f806571f6920b0a3298aabf042 ]
The patch(180d8cd942ce) replaces all uses of struct sock fields'
memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem
to accessor macros. But the sockets_allocated field of sctp sock is
not replaced at all. Then replace it now for unifying the code.
Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.")
Cc: Glauber Costa <glommer(a)parallels.com>
Signed-off-by: Tonghao Zhang <zhangtonghao(a)didichuxing.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/sctp/socket.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4246,7 +4246,7 @@ static int sctp_init_sock(struct sock *s
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
- percpu_counter_inc(&sctp_sockets_allocated);
+ sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
/* Nothing can fail after this block, otherwise
@@ -4290,7 +4290,7 @@ static void sctp_destroy_sock(struct soc
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
- percpu_counter_dec(&sctp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
}
Patches currently in stable-queue which might be from xiangxia.m.yue(a)gmail.com are
queue-4.9/sctp-replace-use-of-sockets_allocated-with-specified-macro.patch
This is a note to let you know that I've just added the patch titled
s390/qeth: don't apply takeover changes to RXIP
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
s390-qeth-don-t-apply-takeover-changes-to-rxip.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
Date: Wed, 13 Dec 2017 18:56:30 +0100
Subject: s390/qeth: don't apply takeover changes to RXIP
From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
[ Upstream commit b22d73d6689fd902a66c08ebe71ab2f3b351e22f ]
When takeover is switched off, current code clears the 'TAKEOVER' flag on
all IPs. But the flag is also used for RXIP addresses, and those should
not be affected by the takeover mode.
Fix the behaviour by consistenly applying takover logic to NORMAL
addresses only.
Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/s390/net/qeth_l3_main.c | 5 +++--
drivers/s390/net/qeth_l3_sys.c | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -178,6 +178,8 @@ int qeth_l3_is_addr_covered_by_ipato(str
if (!card->ipato.enabled)
return 0;
+ if (addr->type != QETH_IP_TYPE_NORMAL)
+ return 0;
qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits,
(addr->proto == QETH_PROT_IPV4)? 4:16);
@@ -293,8 +295,7 @@ int qeth_l3_add_ip(struct qeth_card *car
memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
addr->ref_counter = 1;
- if (addr->type == QETH_IP_TYPE_NORMAL &&
- qeth_l3_is_addr_covered_by_ipato(card, addr)) {
+ if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
QETH_CARD_TEXT(card, 2, "tkovaddr");
addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
}
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -398,10 +398,11 @@ static ssize_t qeth_l3_dev_ipato_enable_
card->ipato.enabled = enable;
hash_for_each(card->ip_htable, i, addr, hnode) {
+ if (addr->type != QETH_IP_TYPE_NORMAL)
+ continue;
if (!enable)
addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
- else if (addr->type == QETH_IP_TYPE_NORMAL &&
- qeth_l3_is_addr_covered_by_ipato(card, addr))
+ else if (qeth_l3_is_addr_covered_by_ipato(card, addr))
addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
}
out:
Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are
queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch
queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch
queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch
queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch
This is a note to let you know that I've just added the patch titled
s390/qeth: lock IP table while applying takeover changes
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
s390-qeth-lock-ip-table-while-applying-takeover-changes.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
Date: Wed, 13 Dec 2017 18:56:31 +0100
Subject: s390/qeth: lock IP table while applying takeover changes
From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
[ Upstream commit 8a03a3692b100d84785ee7a834e9215e304c9e00 ]
Modifying the flags of an IP addr object needs to be protected against
eg. concurrent removal of the same object from the IP table.
Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/s390/net/qeth_l3_sys.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -397,6 +397,7 @@ static ssize_t qeth_l3_dev_ipato_enable_
goto out;
card->ipato.enabled = enable;
+ spin_lock_bh(&card->ip_lock);
hash_for_each(card->ip_htable, i, addr, hnode) {
if (addr->type != QETH_IP_TYPE_NORMAL)
continue;
@@ -405,6 +406,7 @@ static ssize_t qeth_l3_dev_ipato_enable_
else if (qeth_l3_is_addr_covered_by_ipato(card, addr))
addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
}
+ spin_unlock_bh(&card->ip_lock);
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are
queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch
queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch
queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch
queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch
This is a note to let you know that I've just added the patch titled
s390/qeth: apply takeover changes when mode is toggled
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
Date: Wed, 13 Dec 2017 18:56:29 +0100
Subject: s390/qeth: apply takeover changes when mode is toggled
From: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
[ Upstream commit 7fbd9493f0eeae8cef58300505a9ef5c8fce6313 ]
Just as for an explicit enable/disable, toggling the takeover mode also
requires that the IP addresses get updated. Otherwise all IPs that were
added to the table before the mode-toggle, get registered with the old
settings.
Signed-off-by: Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/s390/net/qeth_core.h | 2 +-
drivers/s390/net/qeth_core_main.c | 2 +-
drivers/s390/net/qeth_l3_sys.c | 35 +++++++++++++++++------------------
3 files changed, 19 insertions(+), 20 deletions(-)
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -576,7 +576,7 @@ enum qeth_cq {
};
struct qeth_ipato {
- int enabled;
+ bool enabled;
int invert4;
int invert6;
struct list_head entries;
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1475,7 +1475,7 @@ static int qeth_setup_card(struct qeth_c
qeth_set_intial_options(card);
/* IP address takeover */
INIT_LIST_HEAD(&card->ipato.entries);
- card->ipato.enabled = 0;
+ card->ipato.enabled = false;
card->ipato.invert4 = 0;
card->ipato.invert6 = 0;
/* init QDIO stuff */
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -374,6 +374,7 @@ static ssize_t qeth_l3_dev_ipato_enable_
struct qeth_card *card = dev_get_drvdata(dev);
struct qeth_ipaddr *addr;
int i, rc = 0;
+ bool enable;
if (!card)
return -EINVAL;
@@ -386,25 +387,23 @@ static ssize_t qeth_l3_dev_ipato_enable_
}
if (sysfs_streq(buf, "toggle")) {
- card->ipato.enabled = (card->ipato.enabled)? 0 : 1;
- } else if (sysfs_streq(buf, "1")) {
- card->ipato.enabled = 1;
- hash_for_each(card->ip_htable, i, addr, hnode) {
- if ((addr->type == QETH_IP_TYPE_NORMAL) &&
- qeth_l3_is_addr_covered_by_ipato(card, addr))
- addr->set_flags |=
- QETH_IPA_SETIP_TAKEOVER_FLAG;
- }
- } else if (sysfs_streq(buf, "0")) {
- card->ipato.enabled = 0;
- hash_for_each(card->ip_htable, i, addr, hnode) {
- if (addr->set_flags &
- QETH_IPA_SETIP_TAKEOVER_FLAG)
- addr->set_flags &=
- ~QETH_IPA_SETIP_TAKEOVER_FLAG;
- }
- } else
+ enable = !card->ipato.enabled;
+ } else if (kstrtobool(buf, &enable)) {
rc = -EINVAL;
+ goto out;
+ }
+
+ if (card->ipato.enabled == enable)
+ goto out;
+ card->ipato.enabled = enable;
+
+ hash_for_each(card->ip_htable, i, addr, hnode) {
+ if (!enable)
+ addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+ else if (addr->type == QETH_IP_TYPE_NORMAL &&
+ qeth_l3_is_addr_covered_by_ipato(card, addr))
+ addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+ }
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
Patches currently in stable-queue which might be from jwi(a)linux.vnet.ibm.com are
queue-4.9/s390-qeth-lock-ip-table-while-applying-takeover-changes.patch
queue-4.9/s390-qeth-apply-takeover-changes-when-mode-is-toggled.patch
queue-4.9/s390-qeth-update-takeover-ips-after-configuration-change.patch
queue-4.9/s390-qeth-don-t-apply-takeover-changes-to-rxip.patch
This is a note to let you know that I've just added the patch titled
RDS: Check cmsg_len before dereferencing CMSG_DATA
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
rds-check-cmsg_len-before-dereferencing-cmsg_data.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Avinash Repaka <avinash.repaka(a)oracle.com>
Date: Thu, 21 Dec 2017 20:17:04 -0800
Subject: RDS: Check cmsg_len before dereferencing CMSG_DATA
From: Avinash Repaka <avinash.repaka(a)oracle.com>
[ Upstream commit 14e138a86f6347c6199f610576d2e11c03bec5f0 ]
RDS currently doesn't check if the length of the control message is
large enough to hold the required data, before dereferencing the control
message data. This results in following crash:
BUG: KASAN: stack-out-of-bounds in rds_rdma_bytes net/rds/send.c:1013
[inline]
BUG: KASAN: stack-out-of-bounds in rds_sendmsg+0x1f02/0x1f90
net/rds/send.c:1066
Read of size 8 at addr ffff8801c928fb70 by task syzkaller455006/3157
CPU: 0 PID: 3157 Comm: syzkaller455006 Not tainted 4.15.0-rc3+ #161
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:17 [inline]
dump_stack+0x194/0x257 lib/dump_stack.c:53
print_address_description+0x73/0x250 mm/kasan/report.c:252
kasan_report_error mm/kasan/report.c:351 [inline]
kasan_report+0x25b/0x340 mm/kasan/report.c:409
__asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
rds_rdma_bytes net/rds/send.c:1013 [inline]
rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066
sock_sendmsg_nosec net/socket.c:628 [inline]
sock_sendmsg+0xca/0x110 net/socket.c:638
___sys_sendmsg+0x320/0x8b0 net/socket.c:2018
__sys_sendmmsg+0x1ee/0x620 net/socket.c:2108
SYSC_sendmmsg net/socket.c:2139 [inline]
SyS_sendmmsg+0x35/0x60 net/socket.c:2134
entry_SYSCALL_64_fastpath+0x1f/0x96
RIP: 0033:0x43fe49
RSP: 002b:00007fffbe244ad8 EFLAGS: 00000217 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fe49
RDX: 0000000000000001 RSI: 000000002020c000 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004017b0
R13: 0000000000401840 R14: 0000000000000000 R15: 0000000000000000
To fix this, we verify that the cmsg_len is large enough to hold the
data to be read, before proceeding further.
Reported-by: syzbot <syzkaller-bugs(a)googlegroups.com>
Signed-off-by: Avinash Repaka <avinash.repaka(a)oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar(a)oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia(a)oracle.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/rds/send.c | 3 +++
1 file changed, 3 insertions(+)
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1006,6 +1006,9 @@ static int rds_rdma_bytes(struct msghdr
continue;
if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+ if (cmsg->cmsg_len <
+ CMSG_LEN(sizeof(struct rds_rdma_args)))
+ return -EINVAL;
args = CMSG_DATA(cmsg);
*rdma_bytes += args->remote_vec.bytes;
}
Patches currently in stable-queue which might be from avinash.repaka(a)oracle.com are
queue-4.9/rds-check-cmsg_len-before-dereferencing-cmsg_data.patch
This is a note to let you know that I've just added the patch titled
ptr_ring: add barriers
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ptr_ring-add-barriers.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: "Michael S. Tsirkin" <mst(a)redhat.com>
Date: Tue, 5 Dec 2017 21:29:37 +0200
Subject: ptr_ring: add barriers
From: "Michael S. Tsirkin" <mst(a)redhat.com>
[ Upstream commit a8ceb5dbfde1092b466936bca0ff3be127ecf38e ]
Users of ptr_ring expect that it's safe to give the
data structure a pointer and have it be available
to consumers, but that actually requires an smb_wmb
or a stronger barrier.
In absence of such barriers and on architectures that reorder writes,
consumer might read an un=initialized value from an skb pointer stored
in the skb array. This was observed causing crashes.
To fix, add memory barriers. The barrier we use is a wmb, the
assumption being that producers do not need to read the value so we do
not need to order these reads.
Reported-by: George Cherian <george.cherian(a)cavium.com>
Suggested-by: Jason Wang <jasowang(a)redhat.com>
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/linux/ptr_ring.h | 9 +++++++++
1 file changed, 9 insertions(+)
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -99,12 +99,18 @@ static inline bool ptr_ring_full_bh(stru
/* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax(). Callers must hold producer_lock.
+ * Callers are responsible for making sure pointer that is being queued
+ * points to a valid data.
*/
static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
{
if (unlikely(!r->size) || r->queue[r->producer])
return -ENOSPC;
+ /* Make sure the pointer we are storing points to a valid data. */
+ /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+ smp_wmb();
+
r->queue[r->producer++] = ptr;
if (unlikely(r->producer >= r->size))
r->producer = 0;
@@ -244,6 +250,9 @@ static inline void *__ptr_ring_consume(s
if (ptr)
__ptr_ring_discard_one(r);
+ /* Make sure anyone accessing data through the pointer is up to date. */
+ /* Pairs with smp_wmb in __ptr_ring_produce. */
+ smp_read_barrier_depends();
return ptr;
}
Patches currently in stable-queue which might be from mst(a)redhat.com are
queue-4.9/ptr_ring-add-barriers.patch
This is a note to let you know that I've just added the patch titled
netlink: Add netns check on taps
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
netlink-add-netns-check-on-taps.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Kevin Cernekee <cernekee(a)chromium.org>
Date: Wed, 6 Dec 2017 12:12:27 -0800
Subject: netlink: Add netns check on taps
From: Kevin Cernekee <cernekee(a)chromium.org>
[ Upstream commit 93c647643b48f0131f02e45da3bd367d80443291 ]
Currently, a nlmon link inside a child namespace can observe systemwide
netlink activity. Filter the traffic so that nlmon can only sniff
netlink messages from its own netns.
Test case:
vpnns -- bash -c "ip link add nlmon0 type nlmon; \
ip link set nlmon0 up; \
tcpdump -i nlmon0 -q -w /tmp/nlmon.pcap -U" &
sudo ip xfrm state add src 10.1.1.1 dst 10.1.1.2 proto esp \
spi 0x1 mode transport \
auth sha1 0x6162633132330000000000000000000000000000 \
enc aes 0x00000000000000000000000000000000
grep --binary abc123 /tmp/nlmon.pcap
Signed-off-by: Kevin Cernekee <cernekee(a)chromium.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/netlink/af_netlink.c | 3 +++
1 file changed, 3 insertions(+)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -261,6 +261,9 @@ static int __netlink_deliver_tap_skb(str
struct sock *sk = skb->sk;
int ret = -ENOMEM;
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return 0;
+
dev_hold(dev);
if (is_vmalloc_addr(skb->head))
Patches currently in stable-queue which might be from cernekee(a)chromium.org are
queue-4.9/net-igmp-use-correct-source-address-on-igmpv3-reports.patch
queue-4.9/netlink-add-netns-check-on-taps.patch
This is a note to let you know that I've just added the patch titled
net: reevalulate autoflowlabel setting after sysctl setting
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Shaohua Li <shli(a)fb.com>
Date: Wed, 20 Dec 2017 12:10:21 -0800
Subject: net: reevalulate autoflowlabel setting after sysctl setting
From: Shaohua Li <shli(a)fb.com>
[ Upstream commit 513674b5a2c9c7a67501506419da5c3c77ac6f08 ]
sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2.
If sockopt doesn't set autoflowlabel, outcome packets from the hosts are
supposed to not include flowlabel. This is true for normal packet, but
not for reset packet.
The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if
we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't
changed, so the sock will keep the old behavior in terms of auto
flowlabel. Reset packet is suffering from this problem, because reset
packet is sent from a special control socket, which is created at boot
time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control
socket will always have its ipv6_pinfo.autoflowlabel set, even after
user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always
have flowlabel. Normal sock created before sysctl setting suffers from
the same issue. We can't even turn off autoflowlabel unless we kill all
socks in the hosts.
To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the
autoflowlabel setting from user, otherwise we always call
ip6_default_np_autolabel() which has the new settings of sysctl.
Note, this changes behavior a little bit. Before commit 42240901f7c4
(ipv6: Implement different admin modes for automatic flow labels), the
autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes,
existing connection will change autoflowlabel behavior. After that
commit, autoflowlabel behavior is sticky in the whole life of the sock.
With this patch, the behavior isn't sticky again.
Cc: Martin KaFai Lau <kafai(a)fb.com>
Cc: Eric Dumazet <eric.dumazet(a)gmail.com>
Cc: Tom Herbert <tom(a)quantonium.net>
Signed-off-by: Shaohua Li <shli(a)fb.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/linux/ipv6.h | 3 ++-
net/ipv6/af_inet6.c | 1 -
net/ipv6/ip6_output.c | 12 ++++++++++--
net/ipv6/ipv6_sockglue.c | 1 +
4 files changed, 13 insertions(+), 4 deletions(-)
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -246,7 +246,8 @@ struct ipv6_pinfo {
* 100: prefer care-of address
*/
dontfrag:1,
- autoflowlabel:1;
+ autoflowlabel:1,
+ autoflowlabel_set:1;
__u8 min_hopcount;
__u8 tclass;
__be32 rcv_flowinfo;
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -209,7 +209,6 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -156,6 +156,14 @@ int ip6_output(struct net *net, struct s
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+ if (!np->autoflowlabel_set)
+ return ip6_default_np_autolabel(net);
+ else
+ return np->autoflowlabel;
+}
+
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
@@ -219,7 +227,7 @@ int ip6_xmit(const struct sock *sk, stru
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -1691,7 +1699,7 @@ struct sk_buff *__ip6_make_skb(struct so
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -874,6 +874,7 @@ pref_skip_coa:
break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
+ np->autoflowlabel_set = 1;
retv = 0;
break;
}
Patches currently in stable-queue which might be from shli(a)fb.com are
queue-4.9/net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch
This is a note to let you know that I've just added the patch titled
net: qmi_wwan: add Sierra EM7565 1199:9091
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-qmi_wwan-add-sierra-em7565-1199-9091.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Sebastian Sjoholm <ssjoholm(a)mac.com>
Date: Mon, 11 Dec 2017 21:51:14 +0100
Subject: net: qmi_wwan: add Sierra EM7565 1199:9091
From: Sebastian Sjoholm <ssjoholm(a)mac.com>
[ Upstream commit aceef61ee56898cfa7b6960fb60b9326c3860441 ]
Sierra Wireless EM7565 is an Qualcomm MDM9x50 based M.2 modem.
The USB id is added to qmi_wwan.c to allow QMI communication
with the EM7565.
Signed-off-by: Sebastian Sjoholm <ssjoholm(a)mac.com>
Acked-by: Bjørn Mork <bjorn(a)mork.no>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/usb/qmi_wwan.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -907,6 +907,7 @@ static const struct usb_device_id produc
{QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
Patches currently in stable-queue which might be from ssjoholm(a)mac.com are
queue-4.9/net-qmi_wwan-add-sierra-em7565-1199-9091.patch
This is a note to let you know that I've just added the patch titled
net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Grygorii Strashko <grygorii.strashko(a)ti.com>
Date: Wed, 20 Dec 2017 18:45:10 -0600
Subject: net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround
From: Grygorii Strashko <grygorii.strashko(a)ti.com>
[ Upstream commit c1a8d0a3accf64a014d605e6806ce05d1c17adf1 ]
Under some circumstances driver will perform PHY reset in
ksz9031_read_status() to fix autoneg failure case (idle error count =
0xFF). When this happens ksz9031 will not detect link status change any
more when connecting to Netgear 1G switch (link can be recovered sometimes by
restarting netdevice "ifconfig down up"). Reproduced with TI am572x board
equipped with ksz9031 PHY while connecting to Netgear 1G switch.
Fix the issue by reconfiguring autonegotiation after PHY reset in
ksz9031_read_status().
Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg")
Signed-off-by: Grygorii Strashko <grygorii.strashko(a)ti.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/phy/micrel.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -624,6 +624,7 @@ static int ksz9031_read_status(struct ph
phydev->link = 0;
if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
phydev->drv->config_intr(phydev);
+ return genphy_config_aneg(phydev);
}
return 0;
Patches currently in stable-queue which might be from grygorii.strashko(a)ti.com are
queue-4.9/net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch
This is a note to let you know that I've just added the patch titled
net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Tobias Jordan <Tobias.Jordan(a)elektrobit.com>
Date: Wed, 6 Dec 2017 15:23:23 +0100
Subject: net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case
From: Tobias Jordan <Tobias.Jordan(a)elektrobit.com>
[ Upstream commit 589bf32f09852041fbd3b7ce1a9e703f95c230ba ]
add appropriate calls to clk_disable_unprepare() by jumping to out_mdio
in case orion_mdio_probe() returns -EPROBE_DEFER.
Found by Linux Driver Verification project (linuxtesting.org).
Fixes: 3d604da1e954 ("net: mvmdio: get and enable optional clock")
Signed-off-by: Tobias Jordan <Tobias.Jordan(a)elektrobit.com>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/marvell/mvmdio.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -232,7 +232,8 @@ static int orion_mdio_probe(struct platf
dev->regs + MVMDIO_ERR_INT_MASK);
} else if (dev->err_interrupt == -EPROBE_DEFER) {
- return -EPROBE_DEFER;
+ ret = -EPROBE_DEFER;
+ goto out_mdio;
}
mutex_init(&dev->lock);
Patches currently in stable-queue which might be from Tobias.Jordan(a)elektrobit.com are
queue-4.9/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch
This is a note to let you know that I've just added the patch titled
net/mlx5e: Prevent possible races in VXLAN control flow
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Gal Pressman <galp(a)mellanox.com>
Date: Mon, 4 Dec 2017 09:57:43 +0200
Subject: net/mlx5e: Prevent possible races in VXLAN control flow
From: Gal Pressman <galp(a)mellanox.com>
[ Upstream commit 0c1cc8b2215f5122ca614b5adca60346018758c3 ]
When calling add/remove VXLAN port, a lock must be held in order to
prevent race scenarios when more than one add/remove happens at the
same time.
Fix by holding our state_lock (mutex) as done by all other parts of the
driver.
Note that the spinlock protecting the radix-tree is still needed in
order to synchronize radix-tree access from softirq context.
Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Gal Pressman <galp(a)mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -88,6 +88,7 @@ static void mlx5e_vxlan_add_port(struct
struct mlx5e_vxlan *vxlan;
int err;
+ mutex_lock(&priv->state_lock);
vxlan = mlx5e_vxlan_lookup_port(priv, port);
if (vxlan) {
atomic_inc(&vxlan->refcount);
@@ -117,6 +118,7 @@ err_free:
err_delete_port:
mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
free_work:
+ mutex_unlock(&priv->state_lock);
kfree(vxlan_work);
}
@@ -130,6 +132,7 @@ static void mlx5e_vxlan_del_port(struct
struct mlx5e_vxlan *vxlan;
bool remove = false;
+ mutex_lock(&priv->state_lock);
spin_lock_bh(&vxlan_db->lock);
vxlan = radix_tree_lookup(&vxlan_db->tree, port);
if (!vxlan)
@@ -147,6 +150,7 @@ out_unlock:
mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
kfree(vxlan);
}
+ mutex_unlock(&priv->state_lock);
kfree(vxlan_work);
}
Patches currently in stable-queue which might be from galp(a)mellanox.com are
queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch
queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch
queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch
queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch
This is a note to let you know that I've just added the patch titled
net/mlx5e: Fix possible deadlock of VXLAN lock
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Gal Pressman <galp(a)mellanox.com>
Date: Thu, 23 Nov 2017 13:52:28 +0200
Subject: net/mlx5e: Fix possible deadlock of VXLAN lock
From: Gal Pressman <galp(a)mellanox.com>
[ Upstream commit 6323514116404cc651df1b7fffa1311ddf8ce647 ]
mlx5e_vxlan_lookup_port is called both from mlx5e_add_vxlan_port (user
context) and mlx5e_features_check (softirq), but the lock acquired does
not disable bottom half and might result in deadlock. Fix it by simply
replacing spin_lock() with spin_lock_bh().
While at it, replace all unnecessary spin_lock_irq() to spin_lock_bh().
lockdep's WARNING: inconsistent lock state
[ 654.028136] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[ 654.028229] swapper/5/0 [HC0[0]:SC1[9]:HE1:SE0] takes:
[ 654.028321] (&(&vxlan_db->lock)->rlock){+.?.}, at: [<ffffffffa06e7f0e>] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core]
[ 654.028528] {SOFTIRQ-ON-W} state was registered at:
[ 654.028607] _raw_spin_lock+0x3c/0x70
[ 654.028689] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core]
[ 654.028794] mlx5e_vxlan_add_port+0x2e/0x120 [mlx5_core]
[ 654.028878] process_one_work+0x1e9/0x640
[ 654.028942] worker_thread+0x4a/0x3f0
[ 654.029002] kthread+0x141/0x180
[ 654.029056] ret_from_fork+0x24/0x30
[ 654.029114] irq event stamp: 579088
[ 654.029174] hardirqs last enabled at (579088): [<ffffffff818f475a>] ip6_finish_output2+0x49a/0x8c0
[ 654.029309] hardirqs last disabled at (579087): [<ffffffff818f470e>] ip6_finish_output2+0x44e/0x8c0
[ 654.029446] softirqs last enabled at (579030): [<ffffffff810b3b3d>] irq_enter+0x6d/0x80
[ 654.029567] softirqs last disabled at (579031): [<ffffffff810b3c05>] irq_exit+0xb5/0xc0
[ 654.029684] other info that might help us debug this:
[ 654.029781] Possible unsafe locking scenario:
[ 654.029868] CPU0
[ 654.029908] ----
[ 654.029947] lock(&(&vxlan_db->lock)->rlock);
[ 654.030045] <Interrupt>
[ 654.030090] lock(&(&vxlan_db->lock)->rlock);
[ 654.030162]
*** DEADLOCK ***
Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Gal Pressman <galp(a)mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_p
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
struct mlx5e_vxlan *vxlan;
- spin_lock(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
vxlan = radix_tree_lookup(&vxlan_db->tree, port);
- spin_unlock(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
return vxlan;
}
@@ -100,9 +100,9 @@ static void mlx5e_vxlan_add_port(struct
vxlan->udp_port = port;
- spin_lock_irq(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
- spin_unlock_irq(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
if (err)
goto err_free;
@@ -121,9 +121,9 @@ static void __mlx5e_vxlan_core_del_port(
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
struct mlx5e_vxlan *vxlan;
- spin_lock_irq(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
vxlan = radix_tree_delete(&vxlan_db->tree, port);
- spin_unlock_irq(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
if (!vxlan)
return;
@@ -171,12 +171,12 @@ void mlx5e_vxlan_cleanup(struct mlx5e_pr
struct mlx5e_vxlan *vxlan;
unsigned int port = 0;
- spin_lock_irq(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
port = vxlan->udp_port;
- spin_unlock_irq(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
__mlx5e_vxlan_core_del_port(priv, (u16)port);
- spin_lock_irq(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
}
- spin_unlock_irq(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
}
Patches currently in stable-queue which might be from galp(a)mellanox.com are
queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch
queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch
queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch
queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch
This is a note to let you know that I've just added the patch titled
net/mlx5e: Fix features check of IPv6 traffic
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-mlx5e-fix-features-check-of-ipv6-traffic.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Gal Pressman <galp(a)mellanox.com>
Date: Tue, 21 Nov 2017 17:49:36 +0200
Subject: net/mlx5e: Fix features check of IPv6 traffic
From: Gal Pressman <galp(a)mellanox.com>
[ Upstream commit 2989ad1ec03021ee6d2193c35414f1d970a243de ]
The assumption that the next header field contains the transport
protocol is wrong for IPv6 packets with extension headers.
Instead, we should look the inner-most next header field in the buffer.
This will fix TSO offload for tunnels over IPv6 with extension headers.
Performance testing: 19.25x improvement, cool!
Measuring bandwidth of 16 threads TCP traffic over IPv6 GRE tap.
CPU: Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz
NIC: Mellanox Technologies MT28800 Family [ConnectX-5 Ex]
TSO: Enabled
Before: 4,926.24 Mbps
Now : 94,827.91 Mbps
Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Gal Pressman <galp(a)mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3038,6 +3038,7 @@ static netdev_features_t mlx5e_vxlan_fea
struct sk_buff *skb,
netdev_features_t features)
{
+ unsigned int offset = 0;
struct udphdr *udph;
u16 proto;
u16 port = 0;
@@ -3047,7 +3048,7 @@ static netdev_features_t mlx5e_vxlan_fea
proto = ip_hdr(skb)->protocol;
break;
case htons(ETH_P_IPV6):
- proto = ipv6_hdr(skb)->nexthdr;
+ proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
break;
default:
goto out;
Patches currently in stable-queue which might be from galp(a)mellanox.com are
queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch
queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch
queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch
queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch
This is a note to let you know that I've just added the patch titled
net/mlx5e: Add refcount to VXLAN structure
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-mlx5e-add-refcount-to-vxlan-structure.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Sun Dec 31 11:13:15 CET 2017
From: Gal Pressman <galp(a)mellanox.com>
Date: Sun, 3 Dec 2017 13:58:50 +0200
Subject: net/mlx5e: Add refcount to VXLAN structure
From: Gal Pressman <galp(a)mellanox.com>
[ Upstream commit 23f4cc2cd9ed92570647220aca60d0197d8c1fa9 ]
A refcount mechanism must be implemented in order to prevent unwanted
scenarios such as:
- Open an IPv4 VXLAN interface
- Open an IPv6 VXLAN interface (different socket)
- Remove one of the interfaces
With current implementation, the UDP port will be removed from our VXLAN
database and turn off the offloads for the other interface, which is
still active.
The reference count mechanism will only allow UDP port removals once all
consumers are gone.
Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Gal Pressman <galp(a)mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm(a)mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 50 ++++++++++++------------
drivers/net/ethernet/mellanox/mlx5/core/vxlan.h | 1
2 files changed, 28 insertions(+), 23 deletions(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -88,8 +88,11 @@ static void mlx5e_vxlan_add_port(struct
struct mlx5e_vxlan *vxlan;
int err;
- if (mlx5e_vxlan_lookup_port(priv, port))
+ vxlan = mlx5e_vxlan_lookup_port(priv, port);
+ if (vxlan) {
+ atomic_inc(&vxlan->refcount);
goto free_work;
+ }
if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
goto free_work;
@@ -99,6 +102,7 @@ static void mlx5e_vxlan_add_port(struct
goto err_delete_port;
vxlan->udp_port = port;
+ atomic_set(&vxlan->refcount, 1);
spin_lock_bh(&vxlan_db->lock);
err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
@@ -116,32 +120,33 @@ free_work:
kfree(vxlan_work);
}
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
+static void mlx5e_vxlan_del_port(struct work_struct *work)
{
+ struct mlx5e_vxlan_work *vxlan_work =
+ container_of(work, struct mlx5e_vxlan_work, work);
+ struct mlx5e_priv *priv = vxlan_work->priv;
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
+ u16 port = vxlan_work->port;
struct mlx5e_vxlan *vxlan;
+ bool remove = false;
spin_lock_bh(&vxlan_db->lock);
- vxlan = radix_tree_delete(&vxlan_db->tree, port);
- spin_unlock_bh(&vxlan_db->lock);
-
+ vxlan = radix_tree_lookup(&vxlan_db->tree, port);
if (!vxlan)
- return;
-
- mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
- kfree(vxlan);
-}
+ goto out_unlock;
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
- struct mlx5e_vxlan_work *vxlan_work =
- container_of(work, struct mlx5e_vxlan_work, work);
- struct mlx5e_priv *priv = vxlan_work->priv;
- u16 port = vxlan_work->port;
+ if (atomic_dec_and_test(&vxlan->refcount)) {
+ radix_tree_delete(&vxlan_db->tree, port);
+ remove = true;
+ }
- __mlx5e_vxlan_core_del_port(priv, port);
+out_unlock:
+ spin_unlock_bh(&vxlan_db->lock);
+ if (remove) {
+ mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+ kfree(vxlan);
+ }
kfree(vxlan_work);
}
@@ -171,12 +176,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_pr
struct mlx5e_vxlan *vxlan;
unsigned int port = 0;
- spin_lock_bh(&vxlan_db->lock);
+ /* Lockless since we are the only radix-tree consumers, wq is disabled */
while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
port = vxlan->udp_port;
- spin_unlock_bh(&vxlan_db->lock);
- __mlx5e_vxlan_core_del_port(priv, (u16)port);
- spin_lock_bh(&vxlan_db->lock);
+ radix_tree_delete(&vxlan_db->tree, port);
+ mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+ kfree(vxlan);
}
- spin_unlock_bh(&vxlan_db->lock);
}
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
#include "en.h"
struct mlx5e_vxlan {
+ atomic_t refcount;
u16 udp_port;
};
Patches currently in stable-queue which might be from galp(a)mellanox.com are
queue-4.9/net-mlx5e-fix-possible-deadlock-of-vxlan-lock.patch
queue-4.9/net-mlx5e-prevent-possible-races-in-vxlan-control-flow.patch
queue-4.9/net-mlx5e-add-refcount-to-vxlan-structure.patch
queue-4.9/net-mlx5e-fix-features-check-of-ipv6-traffic.patch