From: Eric Dumazet edumazet@google.com
[ Upstream commit 2a06b8982f8f2f40d03a3daf634676386bd84dbc ]
Intel test robot reported a ~7% regression on TCP_CRR tests that they bisected to the cited commit.
Indeed, every time a new TCP socket is created or deleted, the atomic counter net->count is touched (via get_net(net) and put_net(net) calls)
So cpus might have to reload a contended cache line in net_hash_mix(net) calls.
We need to reorder 'struct net' fields to move @hash_mix in a read mostly cache line.
We move in the first cache line fields that can be dirtied often.
We probably will have to address in a followup patch the __randomize_layout that was added in linux-4.13, since this might break our placement choices.
Fixes: 355b98553789 ("netns: provide pure entropy for net_hash_mix()") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: kernel test robot oliver.sang@intel.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- include/net/net_namespace.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
--- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -52,6 +52,9 @@ struct bpf_prog; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net { + /* First cache line can be often dirtied. + * Do not place here read-mostly fields. + */ refcount_t passive; /* To decide when the network * namespace should be freed. */ @@ -60,7 +63,13 @@ struct net { */ spinlock_t rules_mod_lock;
- u32 hash_mix; + unsigned int dev_unreg_count; + + unsigned int dev_base_seq; /* protected by rtnl_mutex */ + int ifindex; + + spinlock_t nsid_lock; + atomic_t fnhe_genid;
struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit @@ -76,11 +85,11 @@ struct net { #endif struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; - spinlock_t nsid_lock; struct idr netns_ids;
struct ns_common ns;
+ struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat;
@@ -93,12 +102,14 @@ struct net {
struct uevent_sock *uevent_sock; /* uevent socket */
- struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ - int ifindex; - unsigned int dev_unreg_count; + /* Note that @hash_mix can be read millions times per second, + * it is critical that it is on a read_mostly cache line. + */ + u32 hash_mix; + + struct net_device *loopback_dev; /* The loopback */
/* core fib_rules */ struct list_head rules_ops; @@ -106,7 +117,6 @@ struct net { struct list_head fib_notifier_ops; /* Populated by * register_pernet_subsys() */ - struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -171,7 +181,6 @@ struct net { struct netns_xdp xdp; #endif struct sock *diag_nlsk; - atomic_t fnhe_genid; } __randomize_layout;
#include <linux/seq_file_net.h>