When we stopped using KSFT_KHDR_INSTALL, a side effect is we also
changed the value of `top_srcdir`. This can be seen by looking at the
code removed by commit 49de12ba06ef
("selftests: drop KSFT_KHDR_INSTALL make target").
(Note though that this commit didn't break this, technically the one
before it did since that's the one that stopped KSFT_KHDR_INSTALL from
being used, even though the code was still there.)
Previously lib.mk reconfigured `top_srcdir` when KSFT_KHDR_INSTALL was
being used. Now, that's no longer the case.
As a result, the path to gup_test.h in vm/Makefile was wrong, and
since it's a dependency of all of the vm binaries none of them could
be built. Instead, we'd get an "error" like:
make[1]: *** No rule to make target
'/[...]/tools/testing/selftests/vm/compaction_test', needed by
'all'. Stop.
So, modify lib.mk so it once again sets top_srcdir to the root of the
kernel tree.
Fixes: f2745dc0ba3d ("selftests: stop using KSFT_KHDR_INSTALL")
Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com>
---
tools/testing/selftests/lib.mk | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 947fc72413e9..d44c72b3abe3 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -40,6 +40,7 @@ ifeq (0,$(MAKELEVEL))
endif
endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
+top_srcdir = $(selfdir)/../../..
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
--
2.37.1.595.g718a3a8f04-goog
There are currently 3 ip tunnels that are capable of carrying
L2 traffic: gretap, vxlan and geneve.
They all are capable to inherit the TOS/TTL for the outer
IP-header from the inner frame.
Add a test that verifies that these fields are correctly inherited.
These tests failed before the following commits:
b09ab9c92e50 ("ip6_tunnel: allow to inherit from VLAN encapsulated IP")
3f8a8447fd0b ("ip6_gre: use actual protocol to select xmit")
41337f52b967 ("ip6_gre: set DSCP for non-IP")
7ae29fd1be43 ("ip_tunnel: allow to inherit from VLAN encapsulated IP")
7074732c8fae ("ip_tunnels: allow VXLAN/GENEVE to inherit TOS/TTL from VLAN")
ca2bb69514a8 ("geneve: do not use RT_TOS for IPv6 flowlabel")
b4ab94d6adaa ("geneve: fix TOS inheriting for ipv4")
Signed-off-by: Matthias May <matthias.may(a)westermo.com>
---
tools/testing/selftests/net/Makefile | 1 +
.../selftests/net/l2_tos_ttl_inherit.sh | 390 ++++++++++++++++++
2 files changed, 391 insertions(+)
create mode 100755 tools/testing/selftests/net/l2_tos_ttl_inherit.sh
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index c0ee2955fe54..11a288b67e2f 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -42,6 +42,7 @@ TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
TEST_PROGS += arp_ndisc_untracked_subnets.sh
TEST_PROGS += stress_reuseport_listen.sh
+TEST_PROGS := l2_tos_ttl_inherit.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
new file mode 100755
index 000000000000..dca1e6f777a8
--- /dev/null
+++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
@@ -0,0 +1,390 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Matthias May <matthias.may(a)westermo.com>
+#
+# This script evaluates ip tunnels that are capable of carrying L2 traffic
+# if they inherit or set the inheritable fields.
+# Namely these tunnels are: 'gretap', 'vxlan' and 'geneve'.
+# Checked inheritable fields are: TOS and TTL.
+# The outer tunnel protocol of 'IPv4' or 'IPv6' is verified-
+# As payload frames of type 'IPv4', 'IPv6' and 'other'(ARP) are verified.
+# In addition this script also checks if forcing a specific field in the
+# outer header is working.
+
+if [ "$(id -u)" != "0" ]; then
+ echo "Please run as root."
+ exit 0
+fi
+if ! which tcpdump > /dev/null 2>&1; then
+ echo "No tcpdump found. Required for this test."
+ exit 0
+fi
+
+expected_tos="0x00"
+expected_ttl="0"
+failed=false
+
+get_random_tos() {
+ # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4
+ echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\
+$(tr -dc '048c' < /dev/urandom | head -c 1)"
+}
+get_random_ttl() {
+ # Get a random dec value between 0 and 255
+ printf "%d" "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 2)"
+}
+get_field() {
+ # Expects to get the 'head -n 1' of a captured frame by tcpdump.
+ # Parses this first line and returns the specified field.
+ local field="$1"
+ local input="$2"
+ local found=false
+ input="$(echo "$input" | tr -d '(),')"
+ for input_field in $input; do
+ if $found; then
+ echo "$input_field"
+ return
+ fi
+ # The next field that we iterate over is the looked for value
+ if [ "$input_field" = "$field" ]; then
+ found=true
+ fi
+ done
+ echo "0"
+}
+setup() {
+ local type="$1"
+ local outer="$2"
+ local inner="$3"
+ local tos_ttl="$4"
+ local vlan="$5"
+ local test_tos="0x00"
+ local test_ttl="0"
+ local ns="ip netns exec testing"
+
+ # We don't want a test-tos of 0x00,
+ # because this is the value that we get when no tos is set.
+ expected_tos="$(get_random_tos)"
+ while [ "$expected_tos" = "0x00" ]; do
+ expected_tos="$(get_random_tos)"
+ done
+ if [ "$tos_ttl" = "random" ]; then
+ test_tos="$expected_tos"
+ tos="fixed $test_tos"
+ elif [ "$tos_ttl" = "inherit" ]; then
+ test_tos="$tos_ttl"
+ tos="inherit $expected_tos"
+ fi
+
+ # We don't want a test-ttl of 64 or 0,
+ # because 64 is when no ttl is set and 0 is not a valid ttl.
+ expected_ttl="$(get_random_ttl)"
+ while [ "$expected_ttl" = "64" ] || [ "$expected_ttl" = "0" ]; do
+ expected_ttl="$(get_random_ttl)"
+ done
+
+ if [ "$tos_ttl" = "random" ]; then
+ test_ttl="$expected_ttl"
+ ttl="fixed $test_ttl"
+ elif [ "$tos_ttl" = "inherit" ]; then
+ test_ttl="$tos_ttl"
+ ttl="inherit $expected_ttl"
+ fi
+ printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \
+ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan"
+
+ # Create 'testing' netns, veth pair and connect main ns with testing ns
+ ip netns add testing
+ ip link add type veth
+ ip link set veth1 netns testing
+ ip link set veth0 up
+ $ns ip link set veth1 up
+ ip addr flush dev veth0
+ $ns ip addr flush dev veth1
+
+ local local_addr1=""
+ local local_addr2=""
+ if [ "$type" = "gre" ] || [ "$type" = "vxlan" ]; then
+ if [ "$outer" = "4" ]; then
+ local_addr1="local 198.18.0.1"
+ local_addr2="local 198.18.0.2"
+ elif [ "$outer" = "6" ]; then
+ local_addr1="local fdd1:ced0:5d88:3fce::1"
+ local_addr2="local fdd1:ced0:5d88:3fce::2"
+ fi
+ fi
+ local vxlan=""
+ if [ "$type" = "vxlan" ]; then
+ vxlan="vni 100 dstport 4789"
+ fi
+ local geneve=""
+ if [ "$type" = "geneve" ]; then
+ geneve="vni 100"
+ fi
+ # Create tunnel and assign outer IPv4/IPv6 addresses
+ if [ "$outer" = "4" ]; then
+ if [ "$type" = "gre" ]; then
+ type="gretap"
+ fi
+ ip addr add 198.18.0.1/24 dev veth0
+ $ns ip addr add 198.18.0.2/24 dev veth1
+ ip link add name tep0 type $type $local_addr1 remote \
+ 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve
+ $ns ip link add name tep1 type $type $local_addr2 remote \
+ 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve
+ elif [ "$outer" = "6" ]; then
+ if [ "$type" = "gre" ]; then
+ type="ip6gretap"
+ fi
+ ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0
+ $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1
+ ip link add name tep0 type $type $local_addr1 \
+ remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ $ns ip link add name tep1 type $type $local_addr2 \
+ remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ fi
+
+ # Bring L2-tunnel link up and create VLAN on top
+ ip link set tep0 up
+ $ns ip link set tep1 up
+ ip addr flush dev tep0
+ $ns ip addr flush dev tep1
+ local parent
+ if $vlan; then
+ parent="vlan99-"
+ ip link add link tep0 name ${parent}0 type vlan id 99
+ $ns ip link add link tep1 name ${parent}1 type vlan id 99
+ ip link set ${parent}0 up
+ $ns ip link set ${parent}1 up
+ ip addr flush dev ${parent}0
+ $ns ip addr flush dev ${parent}1
+ else
+ parent="tep"
+ fi
+
+ # Assign inner IPv4/IPv6 addresses
+ if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then
+ ip addr add 198.19.0.1/24 brd + dev ${parent}0
+ $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1
+ elif [ "$inner" = "6" ]; then
+ ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0
+ $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1
+ fi
+}
+
+verify() {
+ local outer="$1"
+ local inner="$2"
+ local tos_ttl="$3"
+ local vlan="$4"
+
+ local ping_pid out captured_tos captured_ttl result
+
+ local ping_dst
+ if [ "$inner" = "4" ]; then
+ ping_dst="198.19.0.2"
+ elif [ "$inner" = "6" ]; then
+ ping_dst="fdd4:96cf:4eae:443b::2"
+ elif [ "$inner" = "other" ]; then
+ ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \
+ 2>/dev/null 1>&2 & ping_pid="$!"
+ else
+ ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
+ fi
+ local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset
+ if [ "$type" = "gre" ]; then
+ tunnel_type_proto="0x2f"
+ elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ tunnel_type_proto="0x11"
+ fi
+ if [ "$outer" = "4" ]; then
+ tunnel_type_offset="9"
+ if [ "$inner" = "4" ]; then
+ req_proto_offset="47"
+ req_offset="58"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x01 and \
+ ip[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "6" ]; then
+ req_proto_offset="44"
+ req_offset="78"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x3a and \
+ ip[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "other" ]; then
+ req_proto_offset="36"
+ req_offset="45"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ expected_tos="0x00"
+ expected_ttl="64"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x08 and \
+ ip[$((req_proto_offset + 1))] = 0x06 and \
+ ip[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+ fi
+ elif [ "$outer" = "6" ]; then
+ if [ "$type" = "gre" ]; then
+ tunnel_type_offset="40"
+ elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ tunnel_type_offset="6"
+ fi
+ if [ "$inner" = "4" ]; then
+ local req_proto_offset="75"
+ local req_offset="86"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x01 and \
+ ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "6" ]; then
+ local req_proto_offset="72"
+ local req_offset="106"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x3a and \
+ ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "other" ]; then
+ local req_proto_offset="64"
+ local req_offset="73"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ expected_tos="0x00"
+ expected_ttl="64"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x08 and \
+ ip6[$((req_proto_offset + 1))] = 0x06 and \
+ ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+ fi
+ fi
+ kill -9 $ping_pid
+ wait $ping_pid 2>/dev/null
+ result="FAIL"
+ if [ "$outer" = "4" ]; then
+ captured_ttl="$(get_field "ttl" "$out")"
+ captured_tos="$(printf "0x%02x" "$(get_field "tos" "$out")")"
+ if [ "$captured_tos" = "$expected_tos" ] &&
+ [ "$captured_ttl" = "$expected_ttl" ]; then
+ result="OK"
+ fi
+ elif [ "$outer" = "6" ]; then
+ captured_ttl="$(get_field "hlim" "$out")"
+ captured_tos="$(printf "0x%02x" "$(get_field "class" "$out")")"
+ if [ "$captured_tos" = "$expected_tos" ] &&
+ [ "$captured_ttl" = "$expected_ttl" ]; then
+ result="OK"
+ fi
+ fi
+
+ printf "%7s │\n" "$result"
+ if [ "$result" = "FAIL" ]; then
+ failed=true
+ if [ "$captured_tos" != "$expected_tos" ]; then
+ printf "│%43s%27s │\n" \
+ "Expected TOS value: $expected_tos" \
+ "Captured TOS value: $captured_tos"
+ fi
+ if [ "$captured_ttl" != "$expected_ttl" ]; then
+ printf "│%43s%27s │\n" \
+ "Expected TTL value: $expected_ttl" \
+ "Captured TTL value: $captured_ttl"
+ fi
+ printf "│%71s│\n" " "
+ fi
+}
+
+cleanup() {
+ ip link del veth0 2>/dev/null
+ ip netns del testing 2>/dev/null
+ ip link del tep0 2>/dev/null
+}
+
+printf "┌────────┬───────┬───────┬──────────────┬"
+printf "──────────────┬───────┬────────┐\n"
+for type in gre vxlan geneve; do
+ if ! $(modprobe "$type" 2>/dev/null); then
+ continue
+ fi
+ for outer in 4 6; do
+ printf "├────────┼───────┼───────┼──────────────┼"
+ printf "──────────────┼───────┼────────┤\n"
+ printf "│ Type │ outer | inner │ tos │"
+ printf " ttl │ vlan │ result │\n"
+ for inner in 4 6 other; do
+ printf "├────────┼───────┼───────┼──────────────┼"
+ printf "──────────────┼───────┼────────┤\n"
+ for tos_ttl in inherit random; do
+ for vlan in false true; do
+ setup "$type" "$outer" "$inner" \
+ "$tos_ttl" "$vlan"
+ verify "$outer" "$inner" "$tos_ttl" \
+ "$vlan"
+ cleanup
+ done
+ done
+ done
+ done
+done
+printf "└────────┴───────┴───────┴──────────────┴"
+printf "──────────────┴───────┴────────┘\n"
+
+if $failed; then
+ exit 1
+fi
--
2.35.1
This series is based on torvalds/master.
The series is split up like so:
- Patch 1 is a simple fixup which we should take in any case (even by itself).
- Patches 2-5 add the feature, configurable selftest support, and docs.
Why not ...?
============
- Why not /proc/[pid]/userfaultfd? Two main points (additional discussion [1]):
- /proc/[pid]/* files are all owned by the user/group of the process, and
they don't really support chmod/chown. So, without extending procfs it
doesn't solve the problem this series is trying to solve.
- The main argument *for* this was to support creating UFFDs for remote
processes. But, that use case clearly calls for CAP_SYS_PTRACE, so to
support this we could just use the UFFD syscall as-is.
- Why not use a syscall? Access to syscalls is generally controlled by
capabilities. We don't have a capability which is used for userfaultfd access
without also granting more / other permissions as well, and adding a new
capability was rejected [2].
- It's possible a LSM could be used to control access instead, but I have
some concerns. I don't think this approach would be as easy to use,
particularly if we were to try to solve this with something heavyweight
like SELinux. Maybe we could pursue adding a new LSM specifically for
this user case, but it may be too narrow of a case to justify that.
Changelog
=========
v6->v7:
- Handle misc_register() failure properly by propagating the error instead if
just WARN_ON-ing. [Greg]
- Remove no-op open function from file_operations, since its caller detects
the lack of an open implementation and proceeds normally anyway. [Greg]
v5->v6:
- Modified selftest to exit with KSFT_SKIP *only* when features are
unsupported, exiting with 1 in other error cases. [Mike]
- Improved wording in two spots in the documentation. [Mike]
- Picked up some Acked-by's.
v4->v5:
- Call userfaultfd_syscall_allowed() directly in the syscall, so we don't
have to plumb a flag into new_userfaultfd(). [Nadav]
- Refactored run_vmtests.sh to loop over UFFD test mods. [Nadav]
- Reworded cover letter.
- Picked up some Acked-by's.
v3->v4:
- Picked up an Acked-by on 5/5.
- Updated cover letter to cover "why not ...".
- Refactored userfaultfd_allowed() into userfaultfd_syscall_allowed(). [Peter]
- Removed obsolete comment from a previous version. [Peter]
- Refactored userfaultfd_open() in selftest. [Peter]
- Reworded admin-guide documentation. [Mike, Peter]
- Squashed 2 commits adding /dev/userfaultfd to selftest and making selftest
configurable. [Peter]
- Added "syscall" test modifier (the default behavior) to selftest. [Peter]
v2->v3:
- Rebased onto linux-next/akpm-base, in order to be based on top of the
run_vmtests.sh refactor which was merged previously.
- Picked up some Reviewed-by's.
- Fixed ioctl definition (_IO instead of _IOWR), and stopped using
compat_ptr_ioctl since it is unneeded for ioctls which don't take a pointer.
- Removed the "handle_kernel_faults" bool, simplifying the code. The result is
logically equivalent, but simpler.
- Fixed userfaultfd selftest so it returns KSFT_SKIP appropriately.
- Reworded documentation per Shuah's feedback on v2.
- Improved example usage for userfaultfd selftest.
v1->v2:
- Add documentation update.
- Test *both* userfaultfd(2) and /dev/userfaultfd via the selftest.
[1]: https://patchwork.kernel.org/project/linux-mm/cover/20220719195628.3415852-…
[2]: https://lore.kernel.org/lkml/686276b9-4530-2045-6bd8-170e5943abe4@schaufler…
Axel Rasmussen (5):
selftests: vm: add hugetlb_shared userfaultfd test to run_vmtests.sh
userfaultfd: add /dev/userfaultfd for fine grained access control
userfaultfd: selftests: modify selftest to use /dev/userfaultfd
userfaultfd: update documentation to describe /dev/userfaultfd
selftests: vm: add /dev/userfaultfd test cases to run_vmtests.sh
Documentation/admin-guide/mm/userfaultfd.rst | 41 ++++++++++-
Documentation/admin-guide/sysctl/vm.rst | 3 +
fs/userfaultfd.c | 71 +++++++++++++-----
include/uapi/linux/userfaultfd.h | 4 ++
tools/testing/selftests/vm/run_vmtests.sh | 15 ++--
tools/testing/selftests/vm/userfaultfd.c | 76 +++++++++++++++++---
6 files changed, 176 insertions(+), 34 deletions(-)
--
2.37.1.595.g718a3a8f04-goog
This series is based on torvalds/master.
The series is split up like so:
- Patch 1 is a simple fixup which we should take in any case (even by itself).
- Patches 2-5 add the feature, configurable selftest support, and docs.
Why not ...?
============
- Why not /proc/[pid]/userfaultfd? Two main points (additional discussion [1]):
- /proc/[pid]/* files are all owned by the user/group of the process, and
they don't really support chmod/chown. So, without extending procfs it
doesn't solve the problem this series is trying to solve.
- The main argument *for* this was to support creating UFFDs for remote
processes. But, that use case clearly calls for CAP_SYS_PTRACE, so to
support this we could just use the UFFD syscall as-is.
- Why not use a syscall? Access to syscalls is generally controlled by
capabilities. We don't have a capability which is used for userfaultfd access
without also granting more / other permissions as well, and adding a new
capability was rejected [2].
- It's possible a LSM could be used to control access instead, but I have
some concerns. I don't think this approach would be as easy to use,
particularly if we were to try to solve this with something heavyweight
like SELinux. Maybe we could pursue adding a new LSM specifically for
this user case, but it may be too narrow of a case to justify that.
Changelog
=========
v5->v6:
- Modified selftest to exit with KSFT_SKIP *only* when features are
unsupported, exiting with 1 in other error cases. [Mike]
- Improved wording in two spots in the documentation. [Mike]
- Picked up some Acked-by's.
v4->v5:
- Call userfaultfd_syscall_allowed() directly in the syscall, so we don't
have to plumb a flag into new_userfaultfd(). [Nadav]
- Refactored run_vmtests.sh to loop over UFFD test mods. [Nadav]
- Reworded cover letter.
- Picked up some Acked-by's.
v3->v4:
- Picked up an Acked-by on 5/5.
- Updated cover letter to cover "why not ...".
- Refactored userfaultfd_allowed() into userfaultfd_syscall_allowed(). [Peter]
- Removed obsolete comment from a previous version. [Peter]
- Refactored userfaultfd_open() in selftest. [Peter]
- Reworded admin-guide documentation. [Mike, Peter]
- Squashed 2 commits adding /dev/userfaultfd to selftest and making selftest
configurable. [Peter]
- Added "syscall" test modifier (the default behavior) to selftest. [Peter]
v2->v3:
- Rebased onto linux-next/akpm-base, in order to be based on top of the
run_vmtests.sh refactor which was merged previously.
- Picked up some Reviewed-by's.
- Fixed ioctl definition (_IO instead of _IOWR), and stopped using
compat_ptr_ioctl since it is unneeded for ioctls which don't take a pointer.
- Removed the "handle_kernel_faults" bool, simplifying the code. The result is
logically equivalent, but simpler.
- Fixed userfaultfd selftest so it returns KSFT_SKIP appropriately.
- Reworded documentation per Shuah's feedback on v2.
- Improved example usage for userfaultfd selftest.
v1->v2:
- Add documentation update.
- Test *both* userfaultfd(2) and /dev/userfaultfd via the selftest.
[1]: https://patchwork.kernel.org/project/linux-mm/cover/20220719195628.3415852-…
[2]: https://lore.kernel.org/lkml/686276b9-4530-2045-6bd8-170e5943abe4@schaufler…
Axel Rasmussen (5):
selftests: vm: add hugetlb_shared userfaultfd test to run_vmtests.sh
userfaultfd: add /dev/userfaultfd for fine grained access control
userfaultfd: selftests: modify selftest to use /dev/userfaultfd
userfaultfd: update documentation to describe /dev/userfaultfd
selftests: vm: add /dev/userfaultfd test cases to run_vmtests.sh
Documentation/admin-guide/mm/userfaultfd.rst | 41 ++++++++++-
Documentation/admin-guide/sysctl/vm.rst | 3 +
fs/userfaultfd.c | 73 ++++++++++++++-----
include/uapi/linux/userfaultfd.h | 4 ++
tools/testing/selftests/vm/run_vmtests.sh | 15 ++--
tools/testing/selftests/vm/userfaultfd.c | 76 +++++++++++++++++---
6 files changed, 178 insertions(+), 34 deletions(-)
--
2.37.1.595.g718a3a8f04-goog
Fix the comment to accurately describe the test and recently added
SYSTEM_SUSPEND test case.
What was once psci_cpu_on_test was renamed and extended to squeeze in a
test case for PSCI SYSTEM_SUSPEND. Nonetheless, the author of those
changes (whoever they may be...) failed to update the file comment to
reflect what had changed.
Reported-by: Reiji Watanabe <reijiw(a)google.com>
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
---
tools/testing/selftests/kvm/aarch64/psci_test.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index f7621f6e938e..8a77bd06427c 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
- * CPU_ON PSCI call matches what the caller requested.
+ * psci_test - Tests relating to KVM's PSCI implementation.
*
* Copyright (c) 2021 Google LLC.
*
- * This is a regression test for a race between KVM servicing the PSCI call and
- * userspace reading the vCPUs registers.
+ * This test includes:
+ * - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ * and userspace reading the targeted vCPU's registers.
+ * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ * KVM_EXIT_SYSTEM_SUSPEND UAPI.
*/
#define _GNU_SOURCE
base-commit: 568035b01cfb107af8d2e4bd2fb9aea22cf5b868
--
2.37.1.595.g718a3a8f04-goog
This creates a test collection in drivers/net/bonding for bonding
specific kernel selftests.
The first test is a reproducer that provisions a bond and given the
specific order in how the ip-link(8) commands are issued the bond never
transmits an LACPDU frame on any of its slaves.
Signed-off-by: Jonathan Toppins <jtoppins(a)redhat.com>
---
Notes:
v2:
* fully integrated the test into the kselftests infrastructure
* moved the reproducer to under
tools/testing/selftests/drivers/net/bonding
* reduced the test to its minimial amount and used ip-link(8) for
all bond interface configuration
v3:
* rebase to latest net/master
* remove `#set -x` requested by Hangbin
v4:
* no changes
v5:
* no changes
MAINTAINERS | 1 +
tools/testing/selftests/Makefile | 1 +
.../selftests/drivers/net/bonding/Makefile | 6 ++
.../net/bonding/bond-break-lacpdu-tx.sh | 81 +++++++++++++++++++
.../selftests/drivers/net/bonding/config | 1 +
.../selftests/drivers/net/bonding/settings | 1 +
6 files changed, 91 insertions(+)
create mode 100644 tools/testing/selftests/drivers/net/bonding/Makefile
create mode 100755 tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
create mode 100644 tools/testing/selftests/drivers/net/bonding/config
create mode 100644 tools/testing/selftests/drivers/net/bonding/settings
diff --git a/MAINTAINERS b/MAINTAINERS
index f2d64020399b..e5fb14dc302d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3672,6 +3672,7 @@ F: Documentation/networking/bonding.rst
F: drivers/net/bonding/
F: include/net/bond*
F: include/uapi/linux/if_bonding.h
+F: tools/testing/selftests/net/bonding/
BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
M: Dan Robertson <dan(a)dlrobertson.com>
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 10b34bb03bc1..c2064a35688b 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -12,6 +12,7 @@ TARGETS += cpu-hotplug
TARGETS += damon
TARGETS += drivers/dma-buf
TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net/bonding
TARGETS += efivarfs
TARGETS += exec
TARGETS += filesystems
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
new file mode 100644
index 000000000000..ab6c54b12098
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := bond-break-lacpdu-tx.sh
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
new file mode 100755
index 000000000000..47ab90596acb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# Verify LACPDUs get transmitted after setting the MAC address of
+# the bond.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2020773
+#
+# +---------+
+# | fab-br0 |
+# +---------+
+# |
+# +---------+
+# | fbond |
+# +---------+
+# | |
+# +------+ +------+
+# |veth1 | |veth2 |
+# +------+ +------+
+#
+# We use veths instead of physical interfaces
+
+set -e
+tmp=$(mktemp -q dump.XXXXXX)
+cleanup() {
+ ip link del fab-br0 >/dev/null 2>&1 || :
+ ip link del fbond >/dev/null 2>&1 || :
+ ip link del veth1-bond >/dev/null 2>&1 || :
+ ip link del veth2-bond >/dev/null 2>&1 || :
+ modprobe -r bonding >/dev/null 2>&1 || :
+ rm -f -- ${tmp}
+}
+
+trap cleanup 0 1 2
+cleanup
+sleep 1
+
+# create the bridge
+ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
+ forward_delay 15
+
+# create the bond
+ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \
+ ad_actor_sys_prio 65535 lacp_rate fast
+
+# set bond address
+ip link set fbond address 52:54:00:3B:7C:A6
+ip link set fbond up
+
+# set again bond sysfs parameters
+ip link set fbond type bond ad_actor_sys_prio 65535
+
+# create veths
+ip link add name veth1-bond type veth peer name veth1-end
+ip link add name veth2-bond type veth peer name veth2-end
+
+# add ports
+ip link set fbond master fab-br0
+ip link set veth1-bond down master fbond
+ip link set veth2-bond down master fbond
+
+# bring up
+ip link set veth1-end up
+ip link set veth2-end up
+ip link set fab-br0 up
+ip link set fbond up
+ip addr add dev fab-br0 10.0.0.3
+
+tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 &
+sleep 15
+pkill tcpdump >/dev/null 2>&1
+rc=0
+num=$(grep "packets captured" ${tmp} | awk '{print $1}')
+if test "$num" -gt 0; then
+ echo "PASS, captured ${num}"
+else
+ echo "FAIL"
+ rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
new file mode 100644
index 000000000000..dc1c22de3c92
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -0,0 +1 @@
+CONFIG_BONDING=y
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
new file mode 100644
index 000000000000..867e118223cd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -0,0 +1 @@
+timeout=60
--
2.31.1
This patch set extends the locked port feature for devices
that are behind a locked port, but do not have the ability to
authorize themselves as a supplicant using IEEE 802.1X.
Such devices can be printers, meters or anything related to
fixed installations. Instead of 802.1X authorization, devices
can get access based on their MAC addresses being whitelisted.
For an authorization daemon to detect that a device is trying
to get access through a locked port, the bridge will add the
MAC address of the device to the FDB with a locked flag to it.
Thus the authorization daemon can catch the FDB add event and
check if the MAC address is in the whitelist and if so replace
the FDB entry without the locked flag enabled, and thus open
the port for the device.
This feature is known as MAC-Auth or MAC Authentication Bypass
(MAB) in Cisco terminology, where the full MAB concept involves
additional Cisco infrastructure for authorization. There is no
real authentication process, as the MAC address of the device
is the only input the authorization daemon, in the general
case, has to base the decision if to unlock the port or not.
With this patch set, an implementation of the offloaded case is
supplied for the mv88e6xxx driver. When a packet ingresses on
a locked port, an ATU miss violation event will occur. When
handling such ATU miss violation interrupts, the MAC address of
the device is added to the FDB with a zero destination port
vector (DPV) and the MAC address is communicated through the
switchdev layer to the bridge, so that a FDB entry with the
locked flag enabled can be added.
Log:
v3: Added timers and lists in the driver (mv88e6xxx)
to keep track of and remove locked entries.
v4: Leave out enforcing a limit to the number of
locked entries in the bridge.
Removed the timers in the driver and use the
worker only. Add locked FDB flag to all drivers
using port_fdb_add() from the dsa api and let
all drivers ignore entries with this flag set.
Change how to get the ageing timeout of locked
entries. See global1_atu.c and switchdev.c.
Use struct mv88e6xxx_port for locked entries
variables instead of struct dsa_port.
Hans Schultz (6):
net: bridge: add locked entry fdb flag to extend locked port feature
net: switchdev: add support for offloading of fdb locked flag
drivers: net: dsa: add locked fdb entry flag to drivers
net: dsa: mv88e6xxx: allow reading FID when handling ATU violations
net: dsa: mv88e6xxx: mac-auth/MAB implementation
selftests: forwarding: add test of MAC-Auth Bypass to locked port
tests
drivers/net/dsa/b53/b53_common.c | 5 +
drivers/net/dsa/b53/b53_priv.h | 1 +
drivers/net/dsa/hirschmann/hellcreek.c | 5 +
drivers/net/dsa/lan9303-core.c | 5 +
drivers/net/dsa/lantiq_gswip.c | 5 +
drivers/net/dsa/microchip/ksz9477.c | 5 +
drivers/net/dsa/mt7530.c | 5 +
drivers/net/dsa/mv88e6xxx/Makefile | 1 +
drivers/net/dsa/mv88e6xxx/chip.c | 54 +++-
drivers/net/dsa/mv88e6xxx/chip.h | 15 +
drivers/net/dsa/mv88e6xxx/global1.h | 1 +
drivers/net/dsa/mv88e6xxx/global1_atu.c | 32 +-
drivers/net/dsa/mv88e6xxx/port.c | 30 +-
drivers/net/dsa/mv88e6xxx/port.h | 2 +
drivers/net/dsa/mv88e6xxx/switchdev.c | 280 ++++++++++++++++++
drivers/net/dsa/mv88e6xxx/switchdev.h | 37 +++
drivers/net/dsa/ocelot/felix.c | 5 +
drivers/net/dsa/qca8k.c | 5 +
drivers/net/dsa/sja1105/sja1105_main.c | 5 +
include/net/dsa.h | 7 +
include/net/switchdev.h | 1 +
include/uapi/linux/neighbour.h | 1 +
net/bridge/br.c | 3 +-
net/bridge/br_fdb.c | 19 +-
net/bridge/br_input.c | 10 +-
net/bridge/br_private.h | 5 +-
net/bridge/br_switchdev.c | 1 +
net/dsa/dsa_priv.h | 4 +-
net/dsa/port.c | 7 +-
net/dsa/slave.c | 4 +-
net/dsa/switch.c | 10 +-
.../net/forwarding/bridge_locked_port.sh | 30 +-
32 files changed, 566 insertions(+), 34 deletions(-)
create mode 100644 drivers/net/dsa/mv88e6xxx/switchdev.c
create mode 100644 drivers/net/dsa/mv88e6xxx/switchdev.h
--
2.30.2
Dzień dobry,
kontaktuję się z Państwem, ponieważ chciałbym zaproponować wygodne rozwiązanie, które umożliwi Państwa firmie stabilny rozwój.
Konkurencyjne otoczenie wymaga ciągłego ulepszania i poszerzenia oferty, co z kolei wiąże się z koniecznością inwestowania. Brak odpowiedniego kapitału poważnie ogranicza tempo rozwoju firmy.
Od wielu lat z powodzeniem pomagam firmom w uzyskaniu najlepszej formy finansowania z banku oraz UE. Mam stałych Klientów, którzy nadal chętnie korzystają z moich usług, a także polecają je innym.
Czy chcieliby Państwo skorzystać z pomocy wykwalifikowanego i doświadczonego doradcy finansowego?
Pozdrawiam
Jakub Olejniczak