Verify that total device stats don't decrease after it has been turned down. Also make sure the device doesn't crash when we access per-queue stats when it's down (in case it tries to access some pointers that are NULL).
KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0
v3: - use errno.EOPNOTSUPP (Petr) - move qstat[0] under try (Petr)
v2: - KTAP output formatting (Jakub) - defer instead of try/finally (Jakub) - disappearing stats is an error (Jakub) - ksft_ge instead of open coding (Jakub)
Signed-off-by: Stanislav Fomichev sdf@fomichev.me -- Cc: Shuah Khan shuah@kernel.org Cc: Joe Damato jdamato@fastly.com Cc: Petr Machata petrm@nvidia.com Cc: linux-kselftest@vger.kernel.org --- tools/testing/selftests/drivers/net/stats.py | 25 +++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 820b8e0a22c6..2fdde8cf0307 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0
+import errno from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv +from lib.py import ip, defer
ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -133,9 +135,30 @@ rtnl = RtnlFamily() ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+def check_down(cfg) -> None: + try: + qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("qstats not supported by the device") + raise + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + for k, v in qstat.items(): + ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") + + # exercise per-queue API to make sure that "device down" state + # is handled correctly and doesn't crash + netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) + + def main() -> None: with NetDrvEnv(__file__) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex], + ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, + check_down], args=(cfg, )) ksft_exit()
Add new @ksft_disruptive decorator to mark the tests that might be disruptive to the system. Depending on how well the previous test works in the CI we might want to disable disruptive tests by default and only let the developers run them manually.
KSFT framework runs disruptive tests by default. DISRUPTIVE=False environment (or config file) can be used to disable these tests. ksft_setup should be called by the test cases that want to use new decorator (ksft_setup is only called via NetDrvEnv/NetDrvEpEnv for now).
In the future we can add similar decorators to, for example, avoid running slow tests all the time. And/or have some option to run only 'fast' tests for some sort of smoke test scenario.
$ DISRUPTIVE=False ./stats.py KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # SKIP marked as disruptive # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:1 error:0
v3: - parse yes and properly treat non-zero nums as true (Petr)
v2: - convert from cli argument to env variable (Jakub)
Signed-off-by: Stanislav Fomichev sdf@fomichev.me -- Cc: Shuah Khan shuah@kernel.org Cc: Joe Damato jdamato@fastly.com Cc: Petr Machata petrm@nvidia.com Cc: linux-kselftest@vger.kernel.org --- .../selftests/drivers/net/lib/py/env.py | 5 ++- tools/testing/selftests/drivers/net/stats.py | 2 + tools/testing/selftests/net/lib/py/ksft.py | 40 +++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index a5e800b8f103..1ea9bb695e94 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,6 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import ksft_setup from lib.py import cmd, ethtool, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -14,7 +15,7 @@ from .remote import Remote
src_dir = Path(src_path).parent.resolve() if not (src_dir / "net.config").exists(): - return env + return ksft_setup(env)
with open((src_dir / "net.config").as_posix(), 'r') as fp: for line in fp.readlines(): @@ -30,7 +31,7 @@ from .remote import Remote if len(pair) != 2: raise Exception("Can't parse configuration line:", full_file) env[pair[0]] = pair[1] - return env + return ksft_setup(env)
class NetDrvEnv: diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 2fdde8cf0307..d17dfed2788f 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -4,6 +4,7 @@ import errno from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv from lib.py import ip, defer @@ -135,6 +136,7 @@ rtnl = RtnlFamily() ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+@ksft_disruptive def check_down(cfg) -> None: try: qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index f26c20df9db4..353860fe6223 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0
import builtins +import functools import inspect import sys import time @@ -10,6 +11,7 @@ from .utils import global_defer_queue
KSFT_RESULT = None KSFT_RESULT_ALL = True +KSFT_DISRUPTIVE = True
class KsftFailEx(Exception): @@ -127,6 +129,44 @@ KSFT_RESULT_ALL = True KSFT_RESULT = False
+def ksft_disruptive(func): + """ + Decorator that marks the test as disruptive (e.g. the test + that can down the interface). Disruptive tests can be skipped + by passing DISRUPTIVE=False environment variable. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not KSFT_DISRUPTIVE: + raise KsftSkipEx(f"marked as disruptive") + return func(*args, **kwargs) + return wrapper + + +def ksft_setup(env): + """ + Setup test framework global state from the environment. + """ + + def get_bool(env, name): + value = env.get(name, "").lower() + if value in ["yes", "true"]: + return True + if value in ["no", "false"]: + return False + try: + return bool(int(value)) + except: + raise Exception(f"failed to parse {name}") + + if "DISRUPTIVE" in env: + global KSFT_DISRUPTIVE + KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE") + + return env + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or []
Petr suggested to use errno.EOPNOTSUPP instead of hard-coded 95 in the new test case. Adjust existing ones to match this style.
Signed-off-by: Stanislav Fomichev sdf@fomichev.me -- Cc: Shuah Khan shuah@kernel.org Cc: Joe Damato jdamato@fastly.com Cc: Petr Machata petrm@nvidia.com Cc: linux-kselftest@vger.kernel.org --- tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py | 3 ++- tools/testing/selftests/drivers/net/stats.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index 026d98976c35..05b6fbb3fcdd 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0
+import errno import time import os from lib.py import ksft_run, ksft_exit, ksft_pr @@ -61,7 +62,7 @@ from lib.py import cmd, tool, GenerateTraffic try: stats = get_stats() except NlError as e: - if e.nl_msg.error == -95: + if e.nl_msg.error == -errno.EOPNOTSUPP: stats = {} else: raise diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index d17dfed2788f..63e3c045a3b2 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -20,7 +20,7 @@ rtnl = RtnlFamily() try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("pause not supported by the device") raise
@@ -35,7 +35,7 @@ rtnl = RtnlFamily() try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("FEC not supported by the device") raise
@@ -120,7 +120,7 @@ rtnl = RtnlFamily() # loopback has no stats with ksft_raises(NlError) as cm: netfam.qstats_get({"ifindex": 1}, dump=True) - ksft_eq(cm.exception.nl_msg.error, -95) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# Try to get stats for lowest unused ifindex but not 0
Hello:
This series was applied to netdev/net-next.git (main) by Jakub Kicinski kuba@kernel.org:
On Thu, 1 Aug 2024 17:03:07 -0700 you wrote:
Verify that total device stats don't decrease after it has been turned down. Also make sure the device doesn't crash when we access per-queue stats when it's down (in case it tries to access some pointers that are NULL).
KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0
[...]
Here is the summary with links: - [net-next,v3,1/3] selftests: net-drv: exercise queue stats when the device is down https://git.kernel.org/netdev/net-next/c/ab1000976cc7 - [net-next,v3,2/3] selftests: net: ksft: support marking tests as disruptive https://git.kernel.org/netdev/net-next/c/f87930683481 - [net-next,v3,3/3] selftests: net: ksft: replace 95 with errno.EOPNOTSUPP https://git.kernel.org/netdev/net-next/c/a48395f22b8c
You are awesome, thank you!
linux-kselftest-mirror@lists.linaro.org