On Wed, 09 Jul 2025 02:08:17 -0700 Breno Leitao wrote:
Add a basic selftest for the netpoll polling mechanism, specifically targeting the netpoll poll() side.
The test creates a scenario where network transmission is running at maximum speed, and netpoll needs to poll the NIC. This is achieved by:
- Configuring a single RX/TX queue to create contention
- Generating background traffic to saturate the interface
- Sending netconsole messages to trigger netpoll polling
- Using dynamic netconsole targets via configfs
- Delete and create new netconsole targets after some messages
- Start a bpftrace in parallel to make sure netpoll_poll_dev() is called
- If bpftrace exists and netpoll_poll_dev() was called, stop.
The test validates a critical netpoll code path by monitoring traffic flow and ensuring netpoll_poll_dev() is called when the normal TX path is blocked.
+# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40
FWIW the test takes 25sec on our debug-heavy VMs right now. I think we can crank the writes quite a bit.. ?
+def ethtool_read_rx_tx_queue(interface_name: str) -> tuple[int, int]:
- """
- Read the number of RX and TX queues using ethtool. This will be used
- to restore it after the test
- """
- rx_queue = 0
- tx_queue = 0
- try:
ethtool_result = ethtool(f"-g {interface_name}").stdout
json=True please and you'll get a dict, on CLI you can try:
ethtool --json -g eth0
for line in ethtool_result.splitlines():
if line.startswith("RX:"):
rx_queue = int(line.split()[1])
if line.startswith("TX:"):
tx_queue = int(line.split()[1])
- except IndexError as exception:
raise KsftSkipEx(
f"Failed to read RX/TX queues numbers: {exception}. Not going to mess with them."
) from exception
- if not rx_queue or not tx_queue:
raise KsftSkipEx(
"Failed to read RX/TX queues numbers. Not going to mess with them."
)
- return rx_queue, tx_queue
+def ethtool_set_rx_tx_queue(interface_name: str, rx_val: int, tx_val: int) -> None:
- """Set the number of RX and TX queues to 1 using ethtool"""
- try:
# This don't need to be reverted, since interfaces will be deleted after test
Well. But that's easily fixed;
defer(ethtool, f"-G {interface_name} rx {prev_rx} tx {prev_tx}")
ethtool(f"-G {interface_name} rx {rx_val} tx {tx_val}")
This is setting _ring size_ not queue count. I suppose we want both, this and queue count to 1 (with ethtool -l / -L) The ring size of 1 is unlikely to work on real devices. I'd try setting it to 128 and 256 and if neither sticks just carry on with whatever was there.
- except Exception as exception:
raise KsftSkipEx(
f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
) from exception
+def netcons_generate_random_target_name() -> str:
- """Generate a random target name starting with 'netcons'"""
- random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
- return f"netcons_{random_suffix}"
+def netcons_create_target(
- config_data: dict[str, str],
- target_name: str,
+) -> None:
- """Create a netconsole dynamic target against the interfaces"""
- logging.debug("Using netconsole name: %s", target_name)
- try:
os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
logging.debug(
"Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
)
- except OSError as exception:
if exception.errno != errno.EEXIST:
raise KsftFailEx(
f"Failed to create netconsole target directory: {exception}"
) from exception
- try:
for key, value in config_data.items():
path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
logging.debug("Writing %s to %s", key, path)
with open(path, "w", encoding="utf-8") as file:
# Always convert to string to write to file
file.write(str(value))
# Read all configuration values for debugging purposes
for debug_key in config_data.keys():
with open(
f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
"r",
encoding="utf-8",
) as file:
content = file.read()
logging.debug(
"%s/%s/%s : %s",
NETCONSOLE_CONFIGFS_PATH,
target_name,
debug_key,
content.strip(),
)
- except Exception as exception:
raise KsftFailEx(
f"Failed to configure netconsole target: {exception}"
) from exception
+def netcons_configure_target(
- cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
- """Configure netconsole on the interface with the given target name"""
- config_data = {
"extended": "1",
"dev_name": interface_name,
"local_port": NETCONS_LOCAL_PORT,
"remote_port": NETCONS_REMOTE_PORT,
"local_ip": cfg.addr_v["4"] if cfg.addr_ipver == "4" else cfg.addr_v["6"],
"remote_ip": (
cfg.remote_addr_v["4"] if cfg.addr_ipver == "4" else cfg.remote_addr_v["6"]
),
this is already done for you cfg.addr is either v4 or v6 depending on what was provided in the env
"remote_mac": "00:00:00:00:00:00", # Not important for this test
"enabled": "1",
- }
- netcons_create_target(config_data, target_name)
- logging.debug(
"Created netconsole target: %s on interface %s", target_name, interface_name
- )
+def netcons_delete_target(name: str) -> None:
- """Delete a netconsole dynamic target"""
- target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
- try:
if os.path.exists(target_path):
os.rmdir(target_path)
- except OSError as exception:
raise KsftFailEx(
f"Failed to delete netconsole target: {exception}"
) from exception
+# toggle the interface up and down, to cause some congestion
Let's not do this, you're missing disruptive annotation and for many drivers NAPI is stopped before queues https://github.com/linux-netdev/nipa/wiki/Guidance-for-test-authors#ksft_dis...
+def toggle_interface(ifname: str) -> None:
- """Toggle the interface up and down"""
- logging.debug("Toggling interface %s", ifname)
- try:
ip(f"link set dev {ifname} down")
# Send a message while the interface is down, just to
# cause more test scenarios. Netconsole should be
# going down here as well, giving the link was lost
with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
kmsg.write("netcons test while interface down\n")
ip(f"link set dev {ifname} up")
- except Exception as exception:
raise KsftFailEx(f"Failed to toggle interface: {exception}") from exception
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
- """
- Test netpoll by sending traffic to the interface and then sending
- netconsole messages to trigger a poll
- """
- target_name = netcons_generate_random_target_name()
- ifname = cfg.dev["ifname"]
cfg.ifname
- traffic = None
- original_queues = ethtool_read_rx_tx_queue(ifname)
- try:
# Set RX/TX queues to 1 to force congestion
ethtool_set_rx_tx_queue(ifname, 1, 1)
traffic = GenerateTraffic(cfg)
do_netpoll_flush_monitored(cfg, ifname, target_name)
- finally:
if traffic:
traffic.stop()
# Revert RX/TX queues
ethtool_set_rx_tx_queue(ifname, original_queues[0], original_queues[1])
netcons_delete_target(target_name)
+def main() -> None:
- """Main function to run the test"""
- netcons_load_module()
- test_check_dependencies()
- with NetDrvEpEnv(__file__, nsim_test=True) as cfg:
I think nsim_test=True will make the test run _only_ on netdevsim. But there's nothing netdevsim specific here right? You can remove the argument and let's have this run against real drivers, too?