This patchset has some fixes for the xfrm interfaces that are needed but did not make it into the stable tree so far.
1) Fix a memory leak when creating xfrm interfaces.
2) Fix a xfrm interface corruptinon on changelink.
3) Fix a list corruption when changing network namespaces.
4) Fix unregistation of the underying phydev, otherwise the phydev cannot be removed.
From: Nicolas Dichtel nicolas.dichtel@6wind.com
commit 56c5ee1a5823e9cf5288b84ae6364cb4112f8225 upstream
The following commands produce a backtrace and return an error but the xfrm interface is created (in the wrong netns): $ ip netns add foo $ ip netns add bar $ ip -n foo netns set bar 0 $ ip -n foo link add xfrmi0 link-netnsid 0 type xfrm dev lo if_id 23 RTNETLINK answers: Invalid argument $ ip -n bar link ls xfrmi0 2: xfrmi0@lo: <NOARP,M-DOWN> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00
Here is the backtrace: [ 79.879174] WARNING: CPU: 0 PID: 1178 at net/core/dev.c:8172 rollback_registered_many+0x86/0x3c1 [ 79.880260] Modules linked in: xfrm_interface nfsv3 nfs_acl auth_rpcgss nfsv4 nfs lockd grace sunrpc fscache button parport_pc parport serio_raw evdev pcspkr loop ext4 crc16 mbcache jbd2 crc32c_generic ide_cd_mod ide_gd_mod cdrom ata_$ eneric ata_piix libata scsi_mod 8139too piix psmouse i2c_piix4 ide_core 8139cp mii i2c_core floppy [ 79.883698] CPU: 0 PID: 1178 Comm: ip Not tainted 5.2.0-rc6+ #106 [ 79.884462] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 79.885447] RIP: 0010:rollback_registered_many+0x86/0x3c1 [ 79.886120] Code: 01 e8 d7 7d c6 ff 0f 0b 48 8b 45 00 4c 8b 20 48 8d 58 90 49 83 ec 70 48 8d 7b 70 48 39 ef 74 44 8a 83 d0 04 00 00 84 c0 75 1f <0f> 0b e8 61 cd ff ff 48 b8 00 01 00 00 00 00 ad de 48 89 43 70 66 [ 79.888667] RSP: 0018:ffffc900015ab740 EFLAGS: 00010246 [ 79.889339] RAX: ffff8882353e5700 RBX: ffff8882353e56a0 RCX: ffff8882353e5710 [ 79.890174] RDX: ffffc900015ab7e0 RSI: ffffc900015ab7e0 RDI: ffff8882353e5710 [ 79.891029] RBP: ffffc900015ab7e0 R08: ffffc900015ab7e0 R09: ffffc900015ab7e0 [ 79.891866] R10: ffffc900015ab7a0 R11: ffffffff82233fec R12: ffffc900015ab770 [ 79.892728] R13: ffffffff81eb7ec0 R14: ffff88822ed6cf00 R15: 00000000ffffffea [ 79.893557] FS: 00007ff350f31740(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000 [ 79.894581] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 79.895317] CR2: 00000000006c8580 CR3: 000000022c272000 CR4: 00000000000006f0 [ 79.896137] Call Trace: [ 79.896464] unregister_netdevice_many+0x12/0x6c [ 79.896998] __rtnl_newlink+0x6e2/0x73b [ 79.897446] ? __kmalloc_node_track_caller+0x15e/0x185 [ 79.898039] ? pskb_expand_head+0x5f/0x1fe [ 79.898556] ? stack_access_ok+0xd/0x2c [ 79.899009] ? deref_stack_reg+0x12/0x20 [ 79.899462] ? stack_access_ok+0xd/0x2c [ 79.899927] ? stack_access_ok+0xd/0x2c [ 79.900404] ? __module_text_address+0x9/0x4f [ 79.900910] ? is_bpf_text_address+0x5/0xc [ 79.901390] ? kernel_text_address+0x67/0x7b [ 79.901884] ? __kernel_text_address+0x1a/0x25 [ 79.902397] ? unwind_get_return_address+0x12/0x23 [ 79.903122] ? __cmpxchg_double_slab.isra.37+0x46/0x77 [ 79.903772] rtnl_newlink+0x43/0x56 [ 79.904217] rtnetlink_rcv_msg+0x200/0x24c
In fact, each time a xfrm interface was created, a netdev was allocated by __rtnl_newlink()/rtnl_create_link() and then another one by xfrmi_newlink()/xfrmi_create(). Only the second one was registered, it's why the previous commands produce a backtrace: dev_change_net_namespace() was called on a netdev with reg_state set to NETREG_UNINITIALIZED (the first one).
CC: Lorenzo Colitti lorenzo@google.com CC: Benedict Wong benedictwong@google.com CC: Steffen Klassert steffen.klassert@secunet.com CC: Shannon Nelson shannon.nelson@oracle.com CC: Antony Antony antony@phenome.org CC: Eyal Birger eyal.birger@gmail.com Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: Julien Floret julien.floret@6wind.com Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com --- net/xfrm/xfrm_interface.c | 98 +++++++++++---------------------------- 1 file changed, 28 insertions(+), 70 deletions(-)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 555ee2aca6c0..e1265fda3035 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -133,7 +133,7 @@ static void xfrmi_dev_free(struct net_device *dev) free_percpu(dev->tstats); }
-static int xfrmi_create2(struct net_device *dev) +static int xfrmi_create(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct net *net = dev_net(dev); @@ -156,54 +156,7 @@ static int xfrmi_create2(struct net_device *dev) return err; }
-static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p) -{ - struct net_device *dev; - struct xfrm_if *xi; - char name[IFNAMSIZ]; - int err; - - if (p->name[0]) { - strlcpy(name, p->name, IFNAMSIZ); - } else { - err = -EINVAL; - goto failed; - } - - dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup); - if (!dev) { - err = -EAGAIN; - goto failed; - } - - dev_net_set(dev, net); - - xi = netdev_priv(dev); - xi->p = *p; - xi->net = net; - xi->dev = dev; - xi->phydev = dev_get_by_index(net, p->link); - if (!xi->phydev) { - err = -ENODEV; - goto failed_free; - } - - err = xfrmi_create2(dev); - if (err < 0) - goto failed_dev_put; - - return xi; - -failed_dev_put: - dev_put(xi->phydev); -failed_free: - free_netdev(dev); -failed: - return ERR_PTR(err); -} - -static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p, - int create) +static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p) { struct xfrm_if __rcu **xip; struct xfrm_if *xi; @@ -211,17 +164,11 @@ static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
for (xip = &xfrmn->xfrmi[0]; (xi = rtnl_dereference(*xip)) != NULL; - xip = &xi->next) { - if (xi->p.if_id == p->if_id) { - if (create) - return ERR_PTR(-EEXIST); - + xip = &xi->next) + if (xi->p.if_id == p->if_id) return xi; - } - } - if (!create) - return ERR_PTR(-ENODEV); - return xfrmi_create(net, p); + + return NULL; }
static void xfrmi_dev_uninit(struct net_device *dev) @@ -689,21 +636,33 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); - struct xfrm_if_parms *p; + struct xfrm_if_parms p; struct xfrm_if *xi; + int err;
- xi = netdev_priv(dev); - p = &xi->p; - - xfrmi_netlink_parms(data, p); + xfrmi_netlink_parms(data, &p);
if (!tb[IFLA_IFNAME]) return -EINVAL;
- nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ); + nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ);
- xi = xfrmi_locate(net, p, 1); - return PTR_ERR_OR_ZERO(xi); + xi = xfrmi_locate(net, &p); + if (xi) + return -EEXIST; + + xi = netdev_priv(dev); + xi->p = p; + xi->net = net; + xi->dev = dev; + xi->phydev = dev_get_by_index(net, p.link); + if (!xi->phydev) + return -ENODEV; + + err = xfrmi_create(dev); + if (err < 0) + dev_put(xi->phydev); + return err; }
static void xfrmi_dellink(struct net_device *dev, struct list_head *head) @@ -720,9 +679,8 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
xfrmi_netlink_parms(data, &xi->p);
- xi = xfrmi_locate(net, &xi->p, 0); - - if (IS_ERR_OR_NULL(xi)) { + xi = xfrmi_locate(net, &xi->p); + if (!xi) { xi = netdev_priv(dev); } else { if (xi->dev != dev)
From: Nicolas Dichtel nicolas.dichtel@6wind.com
commit e9e7e85d75f3731079ffd77c1a66f037aef04fe7 upstream
The new parameters must not be stored in the netdev_priv() before validation, it may corrupt the interface. Note also that if data is NULL, only a memset() is done.
$ ip link add xfrm1 type xfrm dev lo if_id 1 $ ip link add xfrm2 type xfrm dev lo if_id 2 $ ip link set xfrm1 type xfrm dev lo if_id 2 RTNETLINK answers: File exists $ ip -d link list dev xfrm1 5: xfrm1@lo: <NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 minmtu 68 maxmtu 1500 xfrm if_id 0x2 addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
=> "if_id 0x2"
Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com Tested-by: Julien Floret julien.floret@6wind.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com --- net/xfrm/xfrm_interface.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index e1265fda3035..c811158b9f1c 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -674,12 +674,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct xfrm_if *xi = netdev_priv(dev); struct net *net = dev_net(dev); + struct xfrm_if_parms p; + struct xfrm_if *xi;
- xfrmi_netlink_parms(data, &xi->p); - - xi = xfrmi_locate(net, &xi->p); + xfrmi_netlink_parms(data, &p); + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); } else { @@ -687,7 +687,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; }
- return xfrmi_update(xi, &xi->p); + return xfrmi_update(xi, &p); }
static size_t xfrmi_get_size(const struct net_device *dev)
From: Nicolas Dichtel nicolas.dichtel@6wind.com
commit c5d1030f23002430c2a336b2b629b9d6f72b3564 upstream
dev_net(dev) is the netns of the device and xi->net is the link netns, where the device has been linked. changelink() must operate in the link netns to avoid a corruption of the xfrm lists.
Note that xi->net and dev_net(xi->physdev) are always the same.
Before the patch, the xfrmi lists may be corrupted and can later trigger a kernel panic.
Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: Julien Floret julien.floret@6wind.com Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com Tested-by: Julien Floret julien.floret@6wind.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com --- net/xfrm/xfrm_interface.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index c811158b9f1c..2ed779db8769 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -505,7 +505,7 @@ static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) { - struct net *net = dev_net(xi->dev); + struct net *net = xi->net; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); int err;
@@ -674,9 +674,9 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); + struct xfrm_if *xi = netdev_priv(dev); + struct net *net = xi->net; struct xfrm_if_parms p; - struct xfrm_if *xi;
xfrmi_netlink_parms(data, &p); xi = xfrmi_locate(net, &p); @@ -718,7 +718,7 @@ struct net *xfrmi_get_link_net(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev);
- return dev_net(xi->phydev); + return xi->net; }
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
From: Nicolas Dichtel nicolas.dichtel@6wind.com
commit 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b upstream
With the current implementation, phydev cannot be removed:
$ ip link add dummy type dummy $ ip link add xfrm1 type xfrm dev dummy if_id 1 $ ip l d dummy kernel:[77938.465445] unregister_netdevice: waiting for dummy to become free. Usage count = 1
Manage it like in ip tunnels, ie just keep the ifindex. Not that the side effect, is that the phydev is now optional.
Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com Tested-by: Julien Floret julien.floret@6wind.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com --- include/net/xfrm.h | 1 - net/xfrm/xfrm_interface.c | 32 +++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4ddd2b13ac8d..fb9b19a3b749 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1062,7 +1062,6 @@ struct xfrm_if_parms { struct xfrm_if { struct xfrm_if __rcu *next; /* next interface in list */ struct net_device *dev; /* virtual device associated with interface */ - struct net_device *phydev; /* physical device */ struct net *net; /* netns for packet i/o */ struct xfrm_if_parms p; /* interface parms */
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 2ed779db8769..d6a3cdf7885c 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -177,7 +177,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
xfrmi_unlink(xfrmn, xi); - dev_put(xi->phydev); dev_put(dev); }
@@ -364,7 +363,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; }
- fl.flowi_oif = xi->phydev->ifindex; + fl.flowi_oif = xi->p.link;
ret = xfrmi_xmit2(skb, dev, &fl); if (ret < 0) @@ -553,7 +552,7 @@ static int xfrmi_get_iflink(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev);
- return xi->phydev->ifindex; + return xi->p.link; }
@@ -579,12 +578,14 @@ static void xfrmi_dev_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = xfrmi_dev_free; netif_keep_dst(dev); + + eth_broadcast_addr(dev->broadcast); }
static int xfrmi_dev_init(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device *phydev = xi->phydev; + struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link); int err;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -599,13 +600,19 @@ static int xfrmi_dev_init(struct net_device *dev)
dev->features |= NETIF_F_LLTX;
- dev->needed_headroom = phydev->needed_headroom; - dev->needed_tailroom = phydev->needed_tailroom; + if (phydev) { + dev->needed_headroom = phydev->needed_headroom; + dev->needed_tailroom = phydev->needed_tailroom;
- if (is_zero_ether_addr(dev->dev_addr)) - eth_hw_addr_inherit(dev, phydev); - if (is_zero_ether_addr(dev->broadcast)) - memcpy(dev->broadcast, phydev->broadcast, dev->addr_len); + if (is_zero_ether_addr(dev->dev_addr)) + eth_hw_addr_inherit(dev, phydev); + if (is_zero_ether_addr(dev->broadcast)) + memcpy(dev->broadcast, phydev->broadcast, + dev->addr_len); + } else { + eth_hw_addr_random(dev); + eth_broadcast_addr(dev->broadcast); + }
return 0; } @@ -655,13 +662,8 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, xi->p = p; xi->net = net; xi->dev = dev; - xi->phydev = dev_get_by_index(net, p.link); - if (!xi->phydev) - return -ENODEV;
err = xfrmi_create(dev); - if (err < 0) - dev_put(xi->phydev); return err; }
On Mon, Dec 09, 2019 at 09:30:41AM +0100, Steffen Klassert wrote:
This patchset has some fixes for the xfrm interfaces that are needed but did not make it into the stable tree so far.
Fix a memory leak when creating xfrm interfaces.
Fix a xfrm interface corruptinon on changelink.
Fix a list corruption when changing network namespaces.
Fix unregistation of the underying phydev, otherwise the phydev cannot be removed.
Thanks for these, all now queued up.
greg k-h
linux-stable-mirror@lists.linaro.org