Some programs need to know the size of the network buffers to operate correctly, export the following sysctls read-only in network namespaces:
- net.core.rmem_default - net.core.rmem_max - net.core.wmem_default - net.core.wmem_max
Matteo Croce (2): net: make net.core.{r,w}mem_{default,max} namespaced selftests: net: tests net.core.{r,w}mem_{default,max} sysctls in a netns
net/core/sysctl_net_core.c | 75 ++++++++++++--------- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-sysctl.sh | 15 +++++ 3 files changed, 58 insertions(+), 33 deletions(-) create mode 100755 tools/testing/selftests/net/netns-sysctl.sh
The following sysctl are global and can't be read from a netns:
net.core.rmem_default net.core.rmem_max net.core.wmem_default net.core.wmem_max
Make the following sysctl parameters available readonly from within a network namespace, allowing a container to read them.
Signed-off-by: Matteo Croce teknoraver@meta.com --- net/core/sysctl_net_core.c | 75 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 33 deletions(-)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c9fb9ad87485..2079000691e2 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -382,38 +382,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, #endif
static struct ctl_table net_core_table[] = { - { - .procname = "wmem_max", - .data = &sysctl_wmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, - }, - { - .procname = "rmem_max", - .data = &sysctl_rmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, - }, - { - .procname = "wmem_default", - .data = &sysctl_wmem_default, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, - }, - { - .procname = "rmem_default", - .data = &sysctl_rmem_default, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, - }, { .procname = "mem_pcpu_rsv", .data = &net_hotdata.sysctl_mem_pcpu_rsv, @@ -697,6 +665,41 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, + /* sysctl_core_net_init() will set the values after this + * to readonly in network namespaces + */ + { + .procname = "wmem_max", + .data = &sysctl_wmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem_max", + .data = &sysctl_rmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, + { + .procname = "wmem_default", + .data = &sysctl_wmem_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem_default", + .data = &sysctl_rmem_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, };
static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) @@ -724,8 +727,14 @@ static __net_init int sysctl_core_net_init(struct net *net) if (tbl == NULL) goto err_dup;
- for (i = 0; i < table_size; ++i) + for (i = 0; i < table_size; ++i) { + if (tbl[i].data == &sysctl_wmem_max) + break; + tbl[i].data += (char *)net - (char *)&init_net; + } + for (; i < table_size; ++i) + tbl[i].mode &= ~0222; }
net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size);
On Tue, May 28, 2024 at 02:11:38PM GMT, Matteo Croce wrote:
The following sysctl are global and can't be read from a netns:
net.core.rmem_default net.core.rmem_max net.core.wmem_default net.core.wmem_max
Make the following sysctl parameters available readonly from within a network namespace, allowing a container to read them.
Signed-off-by: Matteo Croce teknoraver@meta.com
Reviewed-by: Shakeel Butt shakeel.butt@linux.dev
Add a selftest which checks that the sysctl is present in a netns, that the value is read from the init one, and that it's readonly.
Signed-off-by: Matteo Croce teknoraver@meta.com --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-sysctl.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100755 tools/testing/selftests/net/netns-sysctl.sh
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..6da63d1831c1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,6 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..b948ba67b13a --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,15 @@ +#!/bin/bash -e + +for sc in {r,w}mem_{default,max}; do + # change the value in the host netns + sysctl -qw "net.core.$sc=300000" + + # check that the value is read from the init netns + [ "$(unshare -n sysctl -n "net.core.$sc")" -eq 300000 ] + + # check that this isn't writeable in a netns + ! unshare -n [ -w "/proc/sys/net/core/$sc" ] + ! unshare -n sysctl -w "net.core.$sc=100000" +done + +echo 'Test passed OK'
On Tue, May 28, 2024 at 02:11:39PM +0200, Matteo Croce wrote:
Add a selftest which checks that the sysctl is present in a netns, that the value is read from the init one, and that it's readonly.
Signed-off-by: Matteo Croce teknoraver@meta.com
tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-sysctl.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100755 tools/testing/selftests/net/netns-sysctl.sh
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..6da63d1831c1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,6 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..b948ba67b13a --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,15 @@ +#!/bin/bash -e
Don't you need to add the SPDX license header?
On Thu, 2024-05-30 at 05:10 -0700, Breno Leitao wrote:
On Tue, May 28, 2024 at 02:11:39PM +0200, Matteo Croce wrote:
Add a selftest which checks that the sysctl is present in a netns, that the value is read from the init one, and that it's readonly.
Signed-off-by: Matteo Croce teknoraver@meta.com
tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-sysctl.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100755 tools/testing/selftests/net/netns-sysctl.sh
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..6da63d1831c1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,6 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..b948ba67b13a --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,15 @@ +#!/bin/bash -e
Don't you need to add the SPDX license header?
Yes, please!
Additionally, please handle explicitly the sysctl-related I/O errors so that the script could output a human readable message in case of failure.
Thanks!
Paolo
linux-kselftest-mirror@lists.linaro.org