November 2023 - Linux-kselftest-mirror

[PATCH net-next] net: ctnetlink: support filtering by zone

by Felix Huettner

conntrack zones are heavily used by tools like openvswitch to run multiple virtual "routers" on a single machine. In this context each conntrack zone matches to a single router, thereby preventing overlapping IPs from becoming issues. In these systems it is common to operate on all conntrack entries of a given zone, e.g. to delete them when a router is deleted. Previously this required these tools to dump the full conntrack table and filter out the relevant entries in userspace potentially causing performance issues. To do this we reuse the existing CTA_ZONE attribute. This was previous parsed but not used during dump and flush requests. Now if CTA_ZONE is set we filter these operations based on the provided zone. However this means that users that previously passed CTA_ZONE will experience a difference in functionality. Alternatively CTA_FILTER could have been used for the same functionality. However it is not yet supported during flush requests and is only available when using AF_INET or AF_INET6. Co-developed-by: Luca Czesla <luca.czesla(a)mail.schwarz> Signed-off-by: Luca Czesla <luca.czesla(a)mail.schwarz> Co-developed-by: Max Lamprecht <max.lamprecht(a)mail.schwarz> Signed-off-by: Max Lamprecht <max.lamprecht(a)mail.schwarz> Signed-off-by: Felix Huettner <felix.huettner(a)mail.schwarz> --- net/netfilter/nf_conntrack_netlink.c | 15 +- tools/testing/selftests/netfilter/.gitignore | 2 + tools/testing/selftests/netfilter/Makefile | 3 +- .../netfilter/conntrack_dump_flush.c | 430 ++++++++++++++++++ 4 files changed, 444 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/netfilter/conntrack_dump_flush.c diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fb0ae15e96df..4e9133f61251 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -992,13 +992,13 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) if (err) goto err_filter; + err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); + if (err < 0) + goto err_filter; + if (!cda[CTA_FILTER]) return filter; - err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); - if (err < 0) - goto err_filter; - err = ctnetlink_parse_filter(cda[CTA_FILTER], filter); if (err < 0) goto err_filter; @@ -1043,7 +1043,8 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) { - return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS]; + return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || + cda[CTA_ZONE]; } static int ctnetlink_start(struct netlink_callback *cb) @@ -1148,6 +1149,10 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) if (filter->family && nf_ct_l3num(ct) != filter->family) goto ignore_entry; + if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID && + !nf_ct_zone_equal_any(ct, &filter->zone)) + goto ignore_entry; + if (filter->orig_flags) { tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL); if (!ctnetlink_filter_match_tuple(&filter->orig, tuple, diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4b2928e1c19d..c2229b3e40d4 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -2,3 +2,5 @@ nf-queue connect_close audit_logread +conntrack_dump_flush +sctp_collision diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index bced422b78f7..db27153eb4a0 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -14,6 +14,7 @@ HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \ + conntrack_dump_flush include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c new file mode 100644 index 000000000000..61bfb2be468a --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <time.h> +#include <libmnl/libmnl.h> +#include <netinet/ip.h> + +#include <linux/netlink.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include "../kselftest_harness.h" + +#define TEST_ZONE_ID 123 +#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2) + +static int reply_counter; + +static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, + uint32_t src_ip, uint32_t dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip); + mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, + struct in6_addr src_ip, struct in6_addr dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip); + mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_proto(struct nlmsghdr *nlh) +{ + struct nlattr *nest, *nest_proto; + + nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO); + if (!nest) + return -1; + + nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *rplnlh; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + ret = build_cta_proto(nlh); + if (ret < 0) { + perror("build_cta_proto"); + return -1; + } + mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000)); + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + if (errno == EEXIST) { + /* The entries are probably still there from a previous + * run. So we are good + */ + return 0; + } + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +static int conntracK_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip, + uint32_t dst_ip, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int conntracK_data_generate_v6(struct mnl_socket *sock, + struct in6_addr src_ip, + struct in6_addr dst_ip, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET6; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int count_entries(const struct nlmsghdr *nlh, void *data) +{ + reply_counter++; +} + +static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + reply_counter = 0; + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, + count_entries, NULL); + if (ret <= MNL_CB_STOP) + break; + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + } + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + return reply_counter; +} + +static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +FIXTURE(conntrack_dump_flush) +{ + struct mnl_socket *sock; +}; + +FIXTURE_SETUP(conntrack_dump_flush) +{ + struct in6_addr src, dst; + int ret; + + self->sock = mnl_socket_open(NETLINK_NETFILTER); + if (!self->sock) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + if (ret < 0 && errno == EPERM) + SKIP(return, "Needs to be run as root"); + else if (ret < 0 && errno == EOPNOTSUPP) + SKIP(return, "Kernel does not seem to support conntrack zones"); + + ret = conntracK_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + ret = conntracK_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x01000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x02000000 + } + }}; + ret = conntracK_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x03000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x04000000 + } + }}; + ret = conntracK_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x05000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x06000000 + } + }}; + ret = conntracK_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_GE(ret, 2); + if (ret > 2) + SKIP(return, "kernel does not support filtering by zone"); +} + +FIXTURE_TEARDOWN(conntrack_dump_flush) +{ +} + +TEST_F(conntrack_dump_flush, test_dump_by_zone) +{ + int ret; + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); +} + +TEST_HARNESS_MAIN base-commit: 18de1e517ed37ebaf33e771e46faf052e966e163 -- 2.42.0 Diese E Mail enthält möglicherweise vertrauliche Inhalte und ist nur für die Verwertung durch den vorgesehenen Empfänger bestimmt. Sollten Sie nicht der vorgesehene Empfänger sein, setzen Sie den Absender bitte unverzüglich in Kenntnis und löschen diese E Mail. Hinweise zum Datenschutz finden Sie hier<https://www.datenschutz.schwarz>. This e-mail may contain confidential content and is intended only for the specified recipient/s. If you are not the intended recipient, please inform the sender immediately and delete this e-mail. Information on data protection can be found here<https://www.datenschutz.schwarz>.

1 year, 7 months

1
0
0 0

Re: [PATCH net-next 14/14] selftests: mlxsw: Add PCI reset test

by Simon Horman

+ linux-kselftest ML, Shuah Khan On Wed, Nov 15, 2023 at 01:17:23PM +0100, Petr Machata wrote: > From: Ido Schimmel <idosch(a)nvidia.com> > > Test that PCI reset works correctly by verifying that only the expected > reset methods are supported and that after issuing the reset the ifindex > of the port changes. > > Signed-off-by: Ido Schimmel <idosch(a)nvidia.com> > Reviewed-by: Petr Machata <petrm(a)nvidia.com> > Signed-off-by: Petr Machata <petrm(a)nvidia.com> Reviewed-by: Simon Horman <horms(a)kernel.org> > --- > .../selftests/drivers/net/mlxsw/pci_reset.sh | 58 +++++++++++++++++++ > 1 file changed, 58 insertions(+) > create mode 100755 tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh > > diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh > new file mode 100755 > index 000000000000..fe0343b95e6c > --- /dev/null > +++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh > @@ -0,0 +1,58 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# > +# Test that PCI reset works correctly by verifying that only the expected reset > +# methods are supported and that after issuing the reset the ifindex of the > +# port changes. > + > +lib_dir=$(dirname $0)/../../../net/forwarding > + > +ALL_TESTS=" > + pci_reset_test > +" > +NUM_NETIFS=1 > +source $lib_dir/lib.sh > +source $lib_dir/devlink_lib.sh > + > +pci_reset_test() > +{ > + RET=0 > + > + local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1) > + local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2) > + > + if [ $bus != "pci" ]; then > + check_err 1 "devlink device is not a PCI device" > + log_test "pci reset" > + return > + fi > + > + if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then > + check_err 1 "reset is not supported" > + log_test "pci reset" > + return > + fi > + > + [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]] > + check_err $? "only \"bus\" reset method should be supported" > + > + local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') > + > + echo 1 > /sys/bus/pci/devices/$bdf/reset > + check_err $? "reset failed" > + > + # Wait for udev to rename newly created netdev. > + udevadm settle > + > + local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') > + > + [[ $ifindex_pre != $ifindex_post ]] > + check_err $? "reset not performed" > + > + log_test "pci reset" > +} > + > +swp1=${NETIFS[p1]} > +tests_run > + > +exit $EXIT_STATUS > -- > 2.41.0 >

1 year, 7 months

1
0
0 0

Re: [RFC 7/7] vfio: Add vfio_register_pasid_iommu_dev()

by Yi Liu

On 2023/11/16 13:12, Cao, Yahui wrote: > > On 10/9/2023 4:51 PM, Yi Liu wrote: >> From: Kevin Tian<kevin.tian(a)intel.com> >> >> This adds vfio_register_pasid_iommu_dev() for device driver to register >> virtual devices which are isolated per PASID in physical IOMMU. The major >> usage is for the SIOV devices which allows device driver to tag the DMAs >> out of virtual devices within it with different PASIDs. >> >> For a given vfio device, VFIO core creates both group user interface and >> device user interface (device cdev) if configured. However, for the virtual >> devices backed by PASID of the device, VFIO core shall only create device >> user interface as there is no plan to support such devices in the legacy >> vfio_iommu drivers which is a must if creating group user interface for >> such virtual devices. This introduces a VFIO_PASID_IOMMU group type for >> the device driver to register PASID virtual devices, and provides a wrapper >> API for it. In particular no iommu group (neither fake group or real group) >> exists per PASID, hence no group interface for this type. >> >> Signed-off-by: Kevin Tian<kevin.tian(a)intel.com> >> Signed-off-by: Yi Liu<yi.l.liu(a)intel.com> >> --- >> drivers/vfio/group.c | 18 ++++++++++++++++++ >> drivers/vfio/vfio.h | 8 ++++++++ >> drivers/vfio/vfio_main.c | 10 ++++++++++ >> include/linux/vfio.h | 1 + >> 4 files changed, 37 insertions(+) >> >> ... >> ... >> +/* >> + * Register a virtual device with IOMMU pasid protection. The user of >> + * this device can trigger DMA as long as all of its outgoing DMAs are >> + * always tagged with a pasid. >> + */ >> +int vfio_register_pasid_iommu_dev(struct vfio_device *device) >> +{ >> + return __vfio_register_dev(device, VFIO_PASID_IOMMU); >> +} >> + > > > Missing symbol export here. > fixed. -- Regards, Yi Liu

1 year, 7 months

1
0
0 0

[PATCH v1] KVM: selftests: Initalize sem_vcpu_[cont|stop] before each test in dirty_log_test

by Shaoqin Huang

When execute the dirty_log_test on some aarch64 machine, it sometimes trigger the ASSERT: ==== Test Assertion Failure ==== dirty_log_test.c:384: dirty_ring_vcpu_ring_full pid=14854 tid=14854 errno=22 - Invalid argument 1 0x00000000004033eb: dirty_ring_collect_dirty_pages at dirty_log_test.c:384 2 0x0000000000402d27: log_mode_collect_dirty_pages at dirty_log_test.c:505 3 (inlined by) run_test at dirty_log_test.c:802 4 0x0000000000403dc7: for_each_guest_mode at guest_modes.c:100 5 0x0000000000401dff: main at dirty_log_test.c:941 (discriminator 3) 6 0x0000ffff9be173c7: ?? ??:0 7 0x0000ffff9be1749f: ?? ??:0 8 0x000000000040206f: _start at ??:? Didn't continue vcpu even without ring full The dirty_log_test fails when execute the dirty-ring test, this is because the sem_vcpu_cont and the sem_vcpu_stop is non-zero value when execute the dirty_ring_collect_dirty_pages() function. When those two sem_t variables are non-zero, the dirty_ring_wait_vcpu() at the beginning of the dirty_ring_collect_dirty_pages() will not wait for the vcpu to stop, but continue to execute the following code. In this case, before vcpu stop, if the dirty_ring_vcpu_ring_full is true, and the dirty_ring_collect_dirty_pages() has passed the check for the dirty_ring_vcpu_ring_full but hasn't execute the check for the continued_vcpu, the vcpu stop, and set the dirty_ring_vcpu_ring_full to false. Then dirty_ring_collect_dirty_pages() will trigger the ASSERT. Why sem_vcpu_cont and sem_vcpu_stop can be non-zero value? It's because the dirty_ring_before_vcpu_join() execute the sem_post(&sem_vcpu_cont) at the end of each dirty-ring test. It can cause two cases: 1. sem_vcpu_cont be non-zero. When we set the host_quit to be true, the vcpu_worker directly see the host_quit to be true, it quit. So the log_mode_before_vcpu_join() function will set the sem_vcpu_cont to 1, since the vcpu_worker has quit, it won't consume it. 2. sem_vcpu_stop be non-zero. When we set the host_quit to be true, the vcpu_worker has entered the guest state, the next time it exit from guest state, it will set the sem_vcpu_stop to 1, and then see the host_quit, no one will consume the sem_vcpu_stop. When execute more and more dirty-ring tests, the sem_vcpu_cont and sem_vcpu_stop can be larger and larger, which makes many code paths don't wait for the sem_t. Thus finally cause the problem. Fix this problem is easy, simply initialize the sem_t before every test. Thus whatever the state previous test left, it won't interfere the next test. Signed-off-by: Shaoqin Huang <shahuang(a)redhat.com> --- tools/testing/selftests/kvm/dirty_log_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 936f3a8d1b83..23b179534c0b 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -726,6 +726,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) return; } + sem_init(&sem_vcpu_stop, 0, 0); + sem_init(&sem_vcpu_cont, 0, 0); + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -871,9 +874,6 @@ int main(int argc, char *argv[]) int opt, i; sigset_t sigset; - sem_init(&sem_vcpu_stop, 0, 0); - sem_init(&sem_vcpu_cont, 0, 0); - guest_modes_append_default(); while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { -- 2.40.1

1 year, 7 months

2
2
0 0

[PATCH net-next v3] selftests/net: synchronize udpgro tests' tx and rx connection

by Lucas Karpinski

The sockets used by udpgso_bench_tx aren't always ready when udpgso_bench_tx transmits packets. This issue is more prevalent in -rt kernels, but can occur in both. Replace the hacky sleep calls with a function that checks whether the ports in the namespace are ready for use. Suggested-by: Paolo Abeni <pabeni(a)redhat.com> Signed-off-by: Lucas Karpinski <lkarpins(a)redhat.com> --- https://lore.kernel.org/all/6ceki76bcv7qz6de5rxc26ot6aezdmeoz2g4ubtve7qwozm… v3: - moved redundant code to net_helper.sh - replaced awk with grep - changed commit message to specify udpgro - added target tree v2: - applied synchronization method suggested by Paolo - changed commit message to code tools/testing/selftests/net/net_helper.sh | 22 +++++++++++++++++++ tools/testing/selftests/net/udpgro.sh | 13 +++++------ tools/testing/selftests/net/udpgro_bench.sh | 5 +++-- tools/testing/selftests/net/udpgro_frglist.sh | 5 +++-- 4 files changed, 34 insertions(+), 11 deletions(-) create mode 100755 tools/testing/selftests/net/net_helper.sh diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100755 index 000000000000..4fe0befa13fb --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local port_hex + local i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \ + grep -q "${port_hex}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 0c743752669a..af5dc57c8ce9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -51,8 +53,7 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -97,7 +98,7 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? kill -INT $pid @@ -118,11 +119,9 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 894972877e8b..cb664679b434 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -40,8 +42,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 0a6359bed0b9..dd47fa96f6b3 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -45,8 +47,7 @@ run_one() { echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } -- 2.41.0

1 year, 7 months

3
2
0 0

[PATCH RFC 0/3] selftests/nolibc: introduce new test harness

by Thomas Weißschuh

This series introduces a new test harness for nolibc. It is similar to kselftest-harness and google test. More information in patch 1. This is an RFC to gather feedback, especially if it can be integrated with the original kselftest-harness somehow. Note: When run under qemu-loongarch64 8.1.2 the test "mmap_munmap_good" fails. This is a bug in qemu-user and already fixed there on master. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- Thomas Weißschuh (3): selftests/nolibc: add custom test harness selftests/nolibc: migrate startup tests to new harness selftests/nolibc: migrate vfprintf tests to new harness tools/testing/selftests/nolibc/nolibc-harness.h | 269 ++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 180 ++++++++-------- 2 files changed, 353 insertions(+), 96 deletions(-) --- base-commit: d38d5366cb1c51f687b4720277adee97074b22e9 change-id: 20231105-nolibc-harness-28c59029d7a5 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

1 year, 7 months

2
9
0 0

[PATCH v2] KVM: arm64: selftests: Clean up the GIC[D,R]_BASE_GPA

by Shaoqin Huang

The GIC[D,R]_BASE_GPA has been defined in multiple files with the same value, define it in one place to make the code clean. Signed-off-by: Shaoqin Huang <shahuang(a)redhat.com> --- v1->v2: - Clean up the vpmu_counter_access.c. --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 3 --- tools/testing/selftests/kvm/aarch64/vgic_irq.c | 3 --- tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c | 3 --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 3 --- tools/testing/selftests/kvm/include/aarch64/vgic.h | 3 +++ 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 274b8465b42a..818854007bfd 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -59,9 +59,6 @@ static struct test_args test_args = { #define msecs_to_usecs(msec) ((msec) * 1000LL) -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - enum guest_stage { GUEST_STAGE_VTIMER_CVAL = 1, GUEST_STAGE_VTIMER_TVAL, diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 2e64b4856e38..a48aff110fb6 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -19,9 +19,6 @@ #include "gic_v3.h" #include "vgic.h" -#define GICD_BASE_GPA 0x08000000ULL -#define GICR_BASE_GPA 0x080A0000ULL - /* * Stores the user specified args; it's passed to the guest and to every test * function. diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index 5ea78986e665..325a8ead0660 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -421,9 +421,6 @@ static void guest_code(uint64_t expected_pmcr_n) GUEST_DONE(); } -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - /* Create a VM that has one vCPU with PMUv3 configured. */ static void create_vpmu_vm(void *guest_code) { diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d374dbcf9a53..4971e8f77a0a 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -22,9 +22,6 @@ #ifdef __aarch64__ #include "aarch64/vgic.h" -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - static int gic_fd; static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h index 0ac6f05c63f9..9dbb342fd808 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h @@ -33,4 +33,7 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +#define GICD_BASE_GPA 0x08000000ULL +#define GICR_BASE_GPA 0x080A0000ULL + #endif // SELFTEST_KVM_VGIC_H -- 2.40.1

1 year, 7 months

2
2
0 0

[PATCH v3 00/10] iov_iter: kunit: Cleanup, abstraction and more tests

by David Howells

Hi Christian, Can you take this through the filesystem tree? These patches make some changes to the kunit tests previously added for iov_iter testing, in particular adding testing of UBUF/IOVEC iterators and some benchmarking: (1) Clean up a couple of checkpatch style complaints. (2) Consolidate some repeated bits of code into helper functions and use the same struct to represent straight offset/address ranges and partial page lists. (3) Add a function to set up a userspace VM, attach the VM to the kunit testing thread, create an anonymous file, stuff some pages into the file and map the file into the VM to act as a buffer that can be used with UBUF/IOVEC iterators. I map an anonymous file with pages attached rather than using MAP_ANON so that I can check the pages obtained from iov_iter_extract_pages() without worrying about them changing due to swap, migrate, etc.. [?] Is this the best way to do things? Mirroring execve, it requires a number of extra core symbols to be exported. Should this be done in the core code? (4) Add tests for copying into and out of UBUF and IOVEC iterators. (5) Add tests for extracting pages from UBUF and IOVEC iterators. (6) Add tests to benchmark copying 256MiB to UBUF, IOVEC, KVEC, BVEC and XARRAY iterators. (7) Add a test to bencmark copying 256MiB from an xarray that gets decanted into 256-page BVEC iterators to model batching from the pagecache. (8) Add a test to benchmark copying 256MiB through dynamically allocated 256-page bvecs to simulate bio construction. Example benchmarks output: iov_kunit_benchmark_ubuf: avg 4474 uS, stddev 1340 uS iov_kunit_benchmark_iovec: avg 6619 uS, stddev 23 uS iov_kunit_benchmark_kvec: avg 2672 uS, stddev 14 uS iov_kunit_benchmark_bvec: avg 3189 uS, stddev 19 uS iov_kunit_benchmark_bvec_split: avg 3403 uS, stddev 8 uS iov_kunit_benchmark_xarray: avg 3709 uS, stddev 7 uS I've pushed the patches here also: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/log/?… David Changes ======= ver #3) - #include <linux/personality.h> to get READ_IMPLIES_EXEC. - Add a test to benchmark decanting an xarray into bio_vecs. ver #2) - Use MAP_ANON to make the user buffer if we don't want a list of pages. - KUNIT_ASSERT_NOT_ERR_OR_NULL() doesn't like __user pointers as the condition, so cast. - Make the UBUF benchmark loop, doing an iterator per page so that the overhead from the iterator code is not negligible. - Make the KVEC benchmark use an iovec per page so that the iteration is not not negligible. - Switch the benchmarking to use copy_from_iter() so that only a single page is needed in the userspace buffer (as it can be shared R/O), not 256MiB's worth. Link: https://lore.kernel.org/r/20230914221526.3153402-1-dhowells@redhat.com/ # v1 Link: https://lore.kernel.org/r/20230920130400.203330-1-dhowells@redhat.com/ # v2 Link: https://lore.kernel.org/r/20230922113038.1135236-1-dhowells@redhat.com/ # v3 David Howells (10): iov_iter: Fix some checkpatch complaints in kunit tests iov_iter: Consolidate some of the repeated code into helpers iov_iter: Consolidate the test vector struct in the kunit tests iov_iter: Consolidate bvec pattern checking iov_iter: Create a function to prepare userspace VM for UBUF/IOVEC tests iov_iter: Add copy kunit tests for ITER_UBUF and ITER_IOVEC iov_iter: Add extract kunit tests for ITER_UBUF and ITER_IOVEC iov_iter: Add benchmarking kunit tests iov_iter: Add kunit to benchmark decanting of xarray to bvec iov_iter: Add benchmarking kunit tests for UBUF/IOVEC arch/s390/kernel/vdso.c | 1 + fs/anon_inodes.c | 1 + kernel/fork.c | 2 + lib/kunit_iov_iter.c | 1317 +++++++++++++++++++++++++++++++++------ mm/mmap.c | 1 + mm/util.c | 3 + 6 files changed, 1139 insertions(+), 186 deletions(-)

1 year, 7 months

3
16
0 0

[PATCH v3 0/5] cgroup/cpuset: Improve CPU isolation in isolated partitions

by Waiman Long

v3: - Break out a separate patch to make workqueue_set_unbound_cpumask() static and move it down to the CONFIG_SYSFS section. - Remove the "__DEBUG__." prefix and the CFTYPE_DEBUG flag from the new root only cpuset.cpus.isolated control files and update the test accordingly. v2: - Add 2 read-only workqueue sysfs files to expose the user requested cpumask as well as the isolated CPUs to be excluded from wq_unbound_cpumask. - Ensure that caller of the new workqueue_unbound_exclude_cpumask() hold cpus_read_lock. - Update the cpuset code to make sure the cpus_read_lock is held whenever workqueue_unbound_exclude_cpumask() may be called. Isolated cpuset partition can currently be created to contain an exclusive set of CPUs not used in other cgroups and with load balancing disabled to reduce interference from the scheduler. The main purpose of this isolated partition type is to dynamically emulate what can be done via the "isolcpus" boot command line option, specifically the default domain flag. One effect of the "isolcpus" option is to remove the isolated CPUs from the cpumasks of unbound workqueues since running work functions in an isolated CPU can be a major source of interference. Changing the unbound workqueue cpumasks can be done at run time by writing an appropriate cpumask without the isolated CPUs to /sys/devices/virtual/workqueue/cpumask. So one can set up an isolated cpuset partition and then write to the cpumask sysfs file to achieve similar level of CPU isolation. However, this manual process can be error prone. This patch series implements automatic exclusion of isolated CPUs from unbound workqueue cpumasks when an isolated cpuset partition is created and then adds those CPUs back when the isolated partition is destroyed. There are also other places in the kernel that look at the HK_FLAG_DOMAIN cpumask or other HK_FLAG_* cpumasks and exclude the isolated CPUs from certain actions to further reduce interference. CPUs in an isolated cpuset partition will not be able to avoid those interferences yet. That may change in the future as the need arises. Waiman Long (5): workqueue: Make workqueue_set_unbound_cpumask() static workqueue: Add workqueue_unbound_exclude_cpumask() to exclude CPUs from wq_unbound_cpumask selftests/cgroup: Minor code cleanup and reorganization of test_cpuset_prs.sh cgroup/cpuset: Keep track of CPUs in isolated partitions cgroup/cpuset: Take isolated CPUs out of workqueue unbound cpumask Documentation/admin-guide/cgroup-v2.rst | 10 +- include/linux/workqueue.h | 2 +- kernel/cgroup/cpuset.c | 286 +++++++++++++----- kernel/workqueue.c | 139 +++++++-- .../selftests/cgroup/test_cpuset_prs.sh | 216 ++++++++----- 5 files changed, 462 insertions(+), 191 deletions(-) -- 2.39.3

1 year, 7 months

2
7
0 0

[PATCH 1/3] kunit: Add a macro to wrap a deferred action function

by David Gow

KUnit's deferred action API accepts a void(*)(void *) function pointer which is called when the test is exited. However, we very frequently want to use existing functions which accept a single pointer, but which may not be of type void*. While this is probably dodgy enough to be on the wrong side of the C standard, it's been often used for similar callbacks, and gcc's -Wcast-function-type seems to ignore cases where the only difference is the type of the argument, assuming it's compatible (i.e., they're both pointers to data). However, clang 16 has introduced -Wcast-function-type-strict, which no longer permits any deviation in function pointer type. This seems to be because it'd break CFI, which validates the type of function calls. This rather ruins our attempts to cast functions to defer them, and leaves us with a few options. The one we've chosen is to implement a macro which will generate a wrapper function which accepts a void*, and casts the argument to the appropriate type. For example, if you were trying to wrap: void foo_close(struct foo *handle); you could use: KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_foo_close, foo_close, struct foo *); This would create a new kunit_action_foo_close() function, of type kunit_action_t, which could be passed into kunit_add_action() and similar functions. In addition to defining this macro, update KUnit and its tests to use it. Link: https://github.com/ClangBuiltLinux/linux/issues/1750 Signed-off-by: David Gow <davidgow(a)google.com> --- This is a follow-up to the RFC here: https://lore.kernel.org/linux-kselftest/20230915050125.3609689-1-davidgow@g… There's no difference in the macro implementation, just an update to the KUnit tests to use it. This version is intended to complement: https://lore.kernel.org/all/20231106172557.2963-1-rf@opensource.cirrus.com/ There are also two follow-up patches in the series to use this macro in various DRM tests. Hopefully this will solve any CFI issues that show up with KUnit. Thanks, -- David --- include/kunit/resource.h | 9 +++++++++ lib/kunit/kunit-test.c | 5 +---- lib/kunit/test.c | 6 ++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/kunit/resource.h b/include/kunit/resource.h index c7383e90f5c9..4110e13970dc 100644 --- a/include/kunit/resource.h +++ b/include/kunit/resource.h @@ -390,6 +390,15 @@ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res); /* A 'deferred action' function to be used with kunit_add_action. */ typedef void (kunit_action_t)(void *); +/* We can't cast function pointers to kunit_action_t if CFI is enabled. */ +#define KUNIT_DEFINE_ACTION_WRAPPER(wrapper, orig, arg_type) \ + static void wrapper(void *in) \ + { \ + arg_type arg = (arg_type)in; \ + orig(arg); \ + } + + /** * kunit_add_action() - Call a function when the test ends. * @test: Test case to associate the action with. diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index de2113a58fa0..ee6927c60979 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -538,10 +538,7 @@ static struct kunit_suite kunit_resource_test_suite = { #if IS_BUILTIN(CONFIG_KUNIT_TEST) /* This avoids a cast warning if kfree() is passed direct to kunit_add_action(). */ -static void kfree_wrapper(void *p) -{ - kfree(p); -} +KUNIT_DEFINE_ACTION_WRAPPER(kfree_wrapper, kfree, const void *); static void kunit_log_test(struct kunit *test) { diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f2eb71f1a66c..0308865194bb 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -772,6 +772,8 @@ static struct notifier_block kunit_mod_nb = { }; #endif +KUNIT_DEFINE_ACTION_WRAPPER(kfree_action_wrapper, kfree, const void *) + void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp) { void *data; @@ -781,7 +783,7 @@ void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp) if (!data) return NULL; - if (kunit_add_action_or_reset(test, (kunit_action_t *)kfree, data) != 0) + if (kunit_add_action_or_reset(test, kfree_action_wrapper, data) != 0) return NULL; return data; @@ -793,7 +795,7 @@ void kunit_kfree(struct kunit *test, const void *ptr) if (!ptr) return; - kunit_release_action(test, (kunit_action_t *)kfree, (void *)ptr); + kunit_release_action(test, kfree_action_wrapper, (void *)ptr); } EXPORT_SYMBOL_GPL(kunit_kfree); -- 2.42.0.869.gea05f2083d-goog

1 year, 7 months

4
6
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror November 2023