Verify that total device stats don't decrease after it has been turned down. Also make sure the device doesn't crash when we access per-queue stats when it's down (in case it tries to access some pointers that are NULL).
KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Stanislav Fomichev sdf@fomichev.me -- Cc: Shuah Khan shuah@kernel.org Cc: Joe Damato jdamato@fastly.com Cc: Petr Machata petrm@nvidia.com Cc: linux-kselftest@vger.kernel.org --- tools/testing/selftests/drivers/net/stats.py | 31 +++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 820b8e0a22c6..6f8bef379565 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -5,6 +5,7 @@ from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv +from lib.py import ip
ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -133,9 +134,37 @@ rtnl = RtnlFamily() ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+def check_down(cfg) -> None: + try: + qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("qstats not supported by the device") + raise + + ip(f"link set dev {cfg.dev['ifname']} down") + + try: + qstat = qstat[0] + qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + for k, v in qstat.items(): + if k not in qstat2: + # skip the stats that are not globally preserved + continue + if qstat2[k] < qstat[k]: + raise Exception(f"{k} ({qstat2[k]}) should be preserved but has lower value ({qstat[k]}) when the device is down") + + # exercise per-queue API to make sure that "device down" state + # is handled correctly and doesn't crash + netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) + finally: + ip(f"link set dev {cfg.dev['ifname']} up") + + def main() -> None: with NetDrvEnv(__file__) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex], + ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, + check_down], args=(cfg, )) ksft_exit()
(not sure we want this, but just throwing it out there..)
Add new @ksft_disruptive decorator to mark the tests that might be disruptive to the system. Depending on how well the previous test works in the CI we might want to disable disruptive tests by default and only let the developers run them manually.
In the future we can add similar decorators to, for example, avoid running slow tests all the time. And/or have some option to run only 'fast' tests for some sort of smoke test scenario.
$ ./stats.py --skip-disruptive KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # SKIP marked as disruptive # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:1 error:0
Signed-off-by: Stanislav Fomichev sdf@fomichev.me -- Cc: Shuah Khan shuah@kernel.org Cc: Joe Damato jdamato@fastly.com Cc: Petr Machata petrm@nvidia.com Cc: linux-kselftest@vger.kernel.org --- tools/testing/selftests/drivers/net/stats.py | 2 ++ tools/testing/selftests/net/lib/py/ksft.py | 22 ++++++++++++++++++++ 2 files changed, 24 insertions(+)
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 6f8bef379565..04508894ef9c 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -3,6 +3,7 @@
from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv from lib.py import ip @@ -134,6 +135,7 @@ rtnl = RtnlFamily() ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+@ksft_disruptive def check_down(cfg) -> None: try: qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True) diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index f26c20df9db4..f6545a45942a 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0
+import argparse import builtins +import functools import inspect import sys import time @@ -10,6 +12,7 @@ from .utils import global_defer_queue
KSFT_RESULT = None KSFT_RESULT_ALL = True +KSFT_ARGS = None
class KsftFailEx(Exception): @@ -127,7 +130,26 @@ KSFT_RESULT_ALL = True KSFT_RESULT = False
+def ksft_disruptive(func): + """ + Decorator that marks the test as disruptive. Disruptive tests + can be skipped by adding --skip-disruptive argument. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if KSFT_ARGS.skip_disruptive: + raise KsftSkipEx(f"marked as disruptive") + return func(*args, **kwargs) + return wrapper + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + parser = argparse.ArgumentParser() + parser.add_argument('--skip-disruptive', default=False, action='store_true', help='skip tests that might be disruptive (e.g. restart the interface)') + global KSFT_ARGS + KSFT_ARGS = parser.parse_args() + cases = cases or []
if globs and case_pfx:
On Mon, 29 Jul 2024 15:10:42 -0700 Stanislav Fomichev wrote:
- parser = argparse.ArgumentParser()
- parser.add_argument('--skip-disruptive', default=False, action='store_true', help='skip tests that might be disruptive (e.g. restart the interface)')
- global KSFT_ARGS
- KSFT_ARGS = parser.parse_args()
We pass all other args via env exports, I think we should stick to that, it's easier to integrate with external runners.
FWIW Mohsin is also adding VERBOSE=1 this way: https://lore.kernel.org/all/20240715030723.1768360-1-mohsin.bashr@gmail.com/
On 07/29, Jakub Kicinski wrote:
On Mon, 29 Jul 2024 15:10:42 -0700 Stanislav Fomichev wrote:
- parser = argparse.ArgumentParser()
- parser.add_argument('--skip-disruptive', default=False, action='store_true', help='skip tests that might be disruptive (e.g. restart the interface)')
- global KSFT_ARGS
- KSFT_ARGS = parser.parse_args()
We pass all other args via env exports, I think we should stick to that, it's easier to integrate with external runners.
FWIW Mohsin is also adding VERBOSE=1 this way: https://lore.kernel.org/all/20240715030723.1768360-1-mohsin.bashr@gmail.com/
SG! The patch you reference is doing it in NetDrvEnv* but I'll probably try to keep most of the code in 'core' ksft. So far I'm thinking about adding some ksft_setup(env) to initialize that disruptive=yes/no state. LMK if you prefer me to keep everything in NetDrvEnv instead (or wait until I send out a v2 later today).
And thanks for the feedback on 1/2, will incorporate in the v2.
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index a5e800b8f103..2b2a216bf108 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,6 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import ksft_setup from lib.py import cmd, ethtool, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -30,6 +31,7 @@ from .remote import Remote if len(pair) != 2: raise Exception("Can't parse configuration line:", full_file) env[pair[0]] = pair[1] + ksft_setup(env) return env
On Tue, 30 Jul 2024 09:20:05 -0700 Stanislav Fomichev wrote:
SG! The patch you reference is doing it in NetDrvEnv* but I'll probably try to keep most of the code in 'core' ksft. So far I'm thinking about adding some ksft_setup(env) to initialize that disruptive=yes/no state. LMK if you prefer me to keep everything in NetDrvEnv instead (or wait until I send out a v2 later today).
Yup, core is cleaner, agreed. I haven't spent the time to investigate how to hook it in properly but both DISRUPTIVE and VERBOSE are really core settings. Just to state the obvious, would be great to maintain the ability to load the settings from a file (_load_env_file()) It's convenient not to have to re-export after reboot or when SSH session dies.
On Mon, 29 Jul 2024 15:10:41 -0700 Stanislav Fomichev wrote:
Verify that total device stats don't decrease after it has been turned down. Also make sure the device doesn't crash when we access per-queue stats when it's down (in case it tries to access some pointers that are NULL).
KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0
nit: may be personal preference but if you indent by 2 spaces that will neutralize the # as well
+def check_down(cfg) -> None:
- try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)
- except NlError as e:
if e.error == 95:
raise KsftSkipEx("qstats not supported by the device")
raise
- ip(f"link set dev {cfg.dev['ifname']} down")
- try:
The rest of this test predates defer() but I think it may be cleaner:
ip(f"link set dev {cfg.dev['ifname']} down") defer(ip, f"link set dev {cfg.dev['ifname']} up")
then you don't need to try/finally.
qstat = qstat[0]
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
for k, v in qstat.items():
if k not in qstat2:
# skip the stats that are not globally preserved
continue
I think we should fail if stats disappear. I can't think of a legit reason for that to happen.
if qstat2[k] < qstat[k]:
raise Exception(f"{k} ({qstat2[k]}) should be preserved but has lower value ({qstat[k]}) when the device is down")
ksft_ge(qstat2[k], qstat[k], comment=k) ?
# exercise per-queue API to make sure that "device down" state
# is handled correctly and doesn't crash
netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
- finally:
ip(f"link set dev {cfg.dev['ifname']} up")
def main() -> None: with NetDrvEnv(__file__) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
ksft_exit()check_down], args=(cfg, ))
linux-kselftest-mirror@lists.linaro.org