From: zhang yunkai (CGEL ZTE) zhang.yunkai@zte.com.cn
The UDP GSO bench only tests the performance of userspace payload splitting and UDP GSO. But we are also concerned about the performance comparing with IP fragmentation and UDP GSO. In other words comparing IP fragmentation and segmentation.
So we add testcase of IP fragmentation of UDP packets, then user would easy to get to know the performance promotion of UDP GSO compared with IP fragmentation. We add a new option "-f", which is to send big data using IP fragmentation instead of using UDP GSO or userspace payload splitting.
In the QEMU environment we could see obvious promotion of UDP GSO. The first test is to get the performance of userspace payload splitting. bash# udpgso_bench_tx -l 4 -4 -D "$DST" udp tx: 21 MB/s 15162 calls/s 361 msg/s udp tx: 21 MB/s 15498 calls/s 369 msg/s udp tx: 18 MB/s 13440 calls/s 320 msg/s udp tx: 19 MB/s 13776 calls/s 328 msg/s
The second test is to get the performance of IP fragmentation. bash# udpgso_bench_tx -l 4 -4 -D "$DST" -f udp tx: 41 MB/s 711 calls/s 711 msg/s udp tx: 41 MB/s 700 calls/s 700 msg/s udp tx: 43 MB/s 738 calls/s 738 msg/s udp tx: 40 MB/s 693 calls/s 693 msg/s
The third test is to get the performance of UDP GSO. bash# udpgso_bench_tx -l 4 -4 -D "$DST" -S 0 udp tx: 45 MB/s 775 calls/s 775 msg/s udp tx: 47 MB/s 800 calls/s 800 msg/s udp tx: 47 MB/s 814 calls/s 814 msg/s udp tx: 47 MB/s 812 calls/s 812 msg/s
v2: Suggested by Willem de Bruijn willemdebruijn.kernel@gmail.com - Use IP_PMTUDISC_OMIT to disable PMTU discovery and to avoid send returning with error after ICMP destination unreachable messages if MTU is exceeded in the path.
Signed-off-by: zhang yunkai (CGEL ZTE) zhang.yunkai@zte.com.cn Reviewed-by: xu xin (CGEL ZTE) xu.xin16@zte.com.cn Reviewed-by: Yang Yang (CGEL ZTE) yang.yang29@zte.com.cn Cc: Xuexin Jiang (CGEL ZTE) jiang.xuexin@zte.com.cn --- tools/testing/selftests/net/udpgso_bench_tx.c | 49 ++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 477392715a9a..b3333b39bb87 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -64,6 +64,7 @@ static int cfg_runtime_ms = -1; static bool cfg_poll; static int cfg_poll_loop_timeout_ms = 2000; static bool cfg_segment; +static bool cfg_fragment; static bool cfg_sendmmsg; static bool cfg_tcp; static uint32_t cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE; @@ -375,6 +376,21 @@ static int send_udp_sendmmsg(int fd, char *data) return ret; }
+static int send_udp_fragment(int fd, char *data) +{ + int ret; + + ret = sendto(fd, data, cfg_payload_len, cfg_zerocopy ? MSG_ZEROCOPY : 0, + cfg_connected ? NULL : (void *)&cfg_dst_addr, + cfg_connected ? 0 : cfg_alen); + if (ret == -1) + error(1, errno, "write"); + if (ret != cfg_payload_len) + error(1, errno, "write: %uB != %uB\n", ret, cfg_payload_len); + + return 1; +} + static void send_udp_segment_cmsg(struct cmsghdr *cm) { uint16_t *valp; @@ -429,7 +445,7 @@ static int send_udp_segment(int fd, char *data)
static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " + error(1, 0, "Usage: %s [-46acfmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } @@ -440,7 +456,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c;
- while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) { + while ((c = getopt(argc, argv, "46acC:D:fHl:L:mM:p:s:PS:tTuvz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -469,6 +485,9 @@ static void parse_opts(int argc, char **argv) case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; break; + case 'f': + cfg_fragment = true; + break; case 'L': cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; break; @@ -527,10 +546,10 @@ static void parse_opts(int argc, char **argv) error(1, 0, "must pass one of -4 or -6"); if (cfg_tcp && !cfg_connected) error(1, 0, "connectionless tcp makes no sense"); - if (cfg_segment && cfg_sendmmsg) - error(1, 0, "cannot combine segment offload and sendmmsg"); - if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg)) - error(1, 0, "Options -T and -H require either -S or -m option"); + if ((cfg_segment + cfg_sendmmsg + cfg_fragment) > 1) + error(1, 0, "cannot combine segment offload, fragment and sendmmsg"); + if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg || cfg_fragment)) + error(1, 0, "Options -T and -H require either -S or -m or -f option");
if (cfg_family == PF_INET) hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); @@ -551,14 +570,24 @@ static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val;
+ /* IP fragmentation test uses IP_PMTUDISC_OMIT to disable PMTU discovery and + * to avoid send returning with error after ICMP destination unreachable + * messages if MTU is exceeded in the path. + */ if (is_ipv4) { level = SOL_IP; name = IP_MTU_DISCOVER; - val = IP_PMTUDISC_DO; + if (cfg_segment) + val = IP_PMTUDISC_DO; + else if (cfg_fragment) + val = IP_PMTUDISC_OMIT; } else { level = SOL_IPV6; name = IPV6_MTU_DISCOVER; - val = IPV6_PMTUDISC_DO; + if (cfg_segment) + val = IPV6_PMTUDISC_DO; + else if (cfg_fragment) + val = IPV6_PMTUDISC_OMIT; }
if (setsockopt(fd, level, name, &val, sizeof(val))) @@ -674,7 +703,7 @@ int main(int argc, char **argv) connect(fd, (void *)&cfg_dst_addr, cfg_alen)) error(1, errno, "connect");
- if (cfg_segment) + if (cfg_segment || cfg_fragment) set_pmtu_discover(fd, cfg_family == PF_INET);
if (cfg_tx_tstamp) @@ -695,6 +724,8 @@ int main(int argc, char **argv) num_sends += send_udp_segment(fd, buf[i]); else if (cfg_sendmmsg) num_sends += send_udp_sendmmsg(fd, buf[i]); + else if (cfg_fragment) + num_sends += send_udp_fragment(fd, buf[i]); else num_sends += send_udp(fd, buf[i]); num_msgs++;
yang.yang29@ wrote:
From: zhang yunkai (CGEL ZTE) zhang.yunkai@zte.com.cn
The UDP GSO bench only tests the performance of userspace payload splitting and UDP GSO. But we are also concerned about the performance comparing with IP fragmentation and UDP GSO. In other words comparing IP fragmentation and segmentation.
So we add testcase of IP fragmentation of UDP packets, then user would easy to get to know the performance promotion of UDP GSO compared with IP fragmentation. We add a new option "-f", which is to send big data using IP fragmentation instead of using UDP GSO or userspace payload splitting.
In the QEMU environment we could see obvious promotion of UDP GSO. The first test is to get the performance of userspace payload splitting. bash# udpgso_bench_tx -l 4 -4 -D "$DST" udp tx: 21 MB/s 15162 calls/s 361 msg/s udp tx: 21 MB/s 15498 calls/s 369 msg/s udp tx: 18 MB/s 13440 calls/s 320 msg/s udp tx: 19 MB/s 13776 calls/s 328 msg/s
The second test is to get the performance of IP fragmentation. bash# udpgso_bench_tx -l 4 -4 -D "$DST" -f udp tx: 41 MB/s 711 calls/s 711 msg/s udp tx: 41 MB/s 700 calls/s 700 msg/s udp tx: 43 MB/s 738 calls/s 738 msg/s udp tx: 40 MB/s 693 calls/s 693 msg/s
The third test is to get the performance of UDP GSO. bash# udpgso_bench_tx -l 4 -4 -D "$DST" -S 0 udp tx: 45 MB/s 775 calls/s 775 msg/s udp tx: 47 MB/s 800 calls/s 800 msg/s udp tx: 47 MB/s 814 calls/s 814 msg/s udp tx: 47 MB/s 812 calls/s 812 msg/s
v2: Suggested by Willem de Bruijn willemdebruijn.kernel@gmail.com
- Use IP_PMTUDISC_OMIT to disable PMTU discovery and to avoid send returning with error after ICMP destination unreachable messages if MTU is exceeded in the path.
Did you actually observe a difference in behavior with this change?
The man page summarizes it better than I could:
IP_PMTUDISC_DO forces the don't-fragment flag to be set on all outgoing packets.[..] The kernel will reject (with EMSGSIZE) datagrams that are bigger than the known path MTU.
I would think your fragmentation test fails with that option set.
net-next is still closed btw.
Signed-off-by: zhang yunkai (CGEL ZTE) zhang.yunkai@zte.com.cn Reviewed-by: xu xin (CGEL ZTE) xu.xin16@zte.com.cn Reviewed-by: Yang Yang (CGEL ZTE) yang.yang29@zte.com.cn Cc: Xuexin Jiang (CGEL ZTE) jiang.xuexin@zte.com.cn
tools/testing/selftests/net/udpgso_bench_tx.c | 49 ++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 477392715a9a..b3333b39bb87 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -64,6 +64,7 @@ static int cfg_runtime_ms = -1; static bool cfg_poll; static int cfg_poll_loop_timeout_ms = 2000; static bool cfg_segment; +static bool cfg_fragment; static bool cfg_sendmmsg; static bool cfg_tcp; static uint32_t cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE; @@ -375,6 +376,21 @@ static int send_udp_sendmmsg(int fd, char *data) return ret; }
+static int send_udp_fragment(int fd, char *data) +{
- int ret;
- ret = sendto(fd, data, cfg_payload_len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
cfg_connected ? NULL : (void *)&cfg_dst_addr,
cfg_connected ? 0 : cfg_alen);
- if (ret == -1)
error(1, errno, "write");
- if (ret != cfg_payload_len)
error(1, errno, "write: %uB != %uB\n", ret, cfg_payload_len);
- return 1;
+}
static void send_udp_segment_cmsg(struct cmsghdr *cm) { uint16_t *valp; @@ -429,7 +445,7 @@ static int send_udp_segment(int fd, char *data)
static void usage(const char *filepath) {
- error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] "
- error(1, 0, "Usage: %s [-46acfmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath);
} @@ -440,7 +456,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c;
- while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) {
- while ((c = getopt(argc, argv, "46acC:D:fHl:L:mM:p:s:PS:tTuvz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC)
@@ -469,6 +485,9 @@ static void parse_opts(int argc, char **argv) case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; break;
case 'f':
cfg_fragment = true;
case 'L': cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; break;break;
@@ -527,10 +546,10 @@ static void parse_opts(int argc, char **argv) error(1, 0, "must pass one of -4 or -6"); if (cfg_tcp && !cfg_connected) error(1, 0, "connectionless tcp makes no sense");
- if (cfg_segment && cfg_sendmmsg)
error(1, 0, "cannot combine segment offload and sendmmsg");
- if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg))
error(1, 0, "Options -T and -H require either -S or -m option");
if ((cfg_segment + cfg_sendmmsg + cfg_fragment) > 1)
error(1, 0, "cannot combine segment offload, fragment and sendmmsg");
if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg || cfg_fragment))
error(1, 0, "Options -T and -H require either -S or -m or -f option");
if (cfg_family == PF_INET) hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
@@ -551,14 +570,24 @@ static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val;
- /* IP fragmentation test uses IP_PMTUDISC_OMIT to disable PMTU discovery and
* to avoid send returning with error after ICMP destination unreachable
* messages if MTU is exceeded in the path.
if (is_ipv4) { level = SOL_IP; name = IP_MTU_DISCOVER;*/
val = IP_PMTUDISC_DO;
if (cfg_segment)
val = IP_PMTUDISC_DO;
else if (cfg_fragment)
} else { level = SOL_IPV6; name = IPV6_MTU_DISCOVER;val = IP_PMTUDISC_OMIT;
val = IPV6_PMTUDISC_DO;
if (cfg_segment)
val = IPV6_PMTUDISC_DO;
else if (cfg_fragment)
val = IPV6_PMTUDISC_OMIT;
}
if (setsockopt(fd, level, name, &val, sizeof(val)))
@@ -674,7 +703,7 @@ int main(int argc, char **argv) connect(fd, (void *)&cfg_dst_addr, cfg_alen)) error(1, errno, "connect");
- if (cfg_segment)
if (cfg_segment || cfg_fragment) set_pmtu_discover(fd, cfg_family == PF_INET);
if (cfg_tx_tstamp)
@@ -695,6 +724,8 @@ int main(int argc, char **argv) num_sends += send_udp_segment(fd, buf[i]); else if (cfg_sendmmsg) num_sends += send_udp_sendmmsg(fd, buf[i]);
else if (cfg_fragment)
else num_sends += send_udp(fd, buf[i]); num_msgs++;num_sends += send_udp_fragment(fd, buf[i]);
-- 2.15.2
Did you actually observe a difference in behavior with this change?
The test of UDP only cares about sending, and does not much need to consider the problem of PMTU, we configure it to IP_PMTUDISC_DONT. IP_PMTUDISC_DONT: turn off pmtu detection. IP_PMTUDISC_OMIT: the same as DONT, but in some scenarios, DF will be ignored. I did not construct such a scene, presumably when forwarding. Any way, in this test, is the same as DONT.
We have a question, what is the point of this test if it is not compared to UDP GSO and IP fragmentation. No user or tool will segment in user mode, UDP GSO should compare performance with IP fragmentation.
On Fri, Mar 3, 2023 at 12:03 PM yang.yang29@zte.com.cn wrote:
Did you actually observe a difference in behavior with this change?
The test of UDP only cares about sending, and does not much need to consider the problem of PMTU, we configure it to IP_PMTUDISC_DONT. IP_PMTUDISC_DONT: turn off pmtu detection. IP_PMTUDISC_OMIT: the same as DONT, but in some scenarios, DF will be ignored. I did not construct such a scene, presumably when forwarding. Any way, in this test, is the same as DONT.
We have a question, what is the point of this test if it is not compared to UDP GSO and IP fragmentation. No user or tool will segment in user mode, UDP GSO should compare performance with IP fragmentation.
I think it is misleading to think the cost of IP fragmentation matters at the sender side.
Major issue is the receiving side, with many implications of memory and cpu costs, not counting amplifications of potential packet losses.
So your patch would make sense if you also change tools/testing/selftests/net/udpgso_bench_rx.c accordingly.
If you send UDP packets to a receiver, then you should not receive ICMP errors, unless a reassembly error occured.
About ICMP packets being disruptive, you can always ignore errors at sendmsg() time and retry the syscall.
Eric Dumazet wrote:
On Fri, Mar 3, 2023 at 12:03 PM yang.yang29@zte.com.cn wrote:
Did you actually observe a difference in behavior with this change?
The test of UDP only cares about sending, and does not much need to consider the problem of PMTU, we configure it to IP_PMTUDISC_DONT. IP_PMTUDISC_DONT: turn off pmtu detection. IP_PMTUDISC_OMIT: the same as DONT, but in some scenarios, DF will be ignored. I did not construct such a scene, presumably when forwarding. Any way, in this test, is the same as DONT.
My points was not to compare IP_PMTUDISC_OMIT to .._DONT but to .._DO, which is what the existing UDP GSO test is setting.
USO should generate segments that meet MTU rules. The test forces the DF bit (IP_PMTUDISC_DO).
UFO instead requires local fragmentation, must enter the path for this in ip_output.c. It should fail if IP_PMTUDISC_DO is set:
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ skb->ignore_df = ip_sk_ignore_df(sk);
/* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF);
We have a question, what is the point of this test if it is not compared to UDP GSO and IP fragmentation. No user or tool will segment in user mode,
Are you saying no process will use UDP_SEGMENT?
The local protocol stack removed UFO in series d9d30adf5677. USO can be offloaded to hardware by quite a few devices (NETIF_F_GSO_UDP_L4).
UDP GSO should compare performance with IP fragmentation.
I think it is misleading to think the cost of IP fragmentation matters at the sender side.
Major issue is the receiving side, with many implications of memory and cpu costs, not counting amplifications of potential packet losses.
So your patch would make sense if you also change tools/testing/selftests/net/udpgso_bench_rx.c accordingly.
If you send UDP packets to a receiver, then you should not receive ICMP errors, unless a reassembly error occured.
About ICMP packets being disruptive, you can always ignore errors at sendmsg() time and retry the syscall.
IP_PMTUDISC_DONT: turn off pmtu detection. IP_PMTUDISC_OMIT: the same as DONT, but in some scenarios, DF will
be ignored. I did not construct such a scene, presumably when forwarding. Any way, in this test, is the same as DONT.
My points was not to compare IP_PMTUDISC_OMIT to .._DONT but to .._DO, which is what the existing UDP GSO test is setting.
Yeah, we got your point, but the result was as the patch showed, which hadn't changed much (patch v2 V.S patch v1), because the fragmentation option of 'patch v1' used the default PMTU discovery strategy(IP_PMTUDISC_DONT, because the code didn't setting PMTU explicitly by setsockopt() when use './udpgso_bench_tx -f' ), which is not much different from the 'patch v2' using IP_PMTUDISC_OMIT.
USO should generate segments that meet MTU rules. The test forces the DF bit (IP_PMTUDISC_DO).
UFO instead requires local fragmentation, must enter the path for this in ip_output.c. It should fail if IP_PMTUDISC_DO is set:
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF);
We have a question, what is the point of this test if it is not compared to UDP GSO and IP fragmentation. No user or tool will segment in user mode,
Are you saying no process will use UDP_SEGMENT?
No, we are saying "user-space payload splitting", in other words, use ./udpgso_bench_tx without '-f' or '-S'.
Sincerely.
The local protocol stack removed UFO in series d9d30adf5677. USO can be offloaded to hardware by quite a few devices (NETIF_F_GSO_UDP_L4).
UDP GSO should compare performance with IP fragmentation.
I think it is misleading to think the cost of IP fragmentation matters
xu xin wrote:
IP_PMTUDISC_DONT: turn off pmtu detection. IP_PMTUDISC_OMIT: the same as DONT, but in some scenarios, DF will
be ignored. I did not construct such a scene, presumably when forwarding. Any way, in this test, is the same as DONT.
My points was not to compare IP_PMTUDISC_OMIT to .._DONT but to .._DO, which is what the existing UDP GSO test is setting.
Yeah, we got your point, but the result was as the patch showed, which hadn't changed much (patch v2 V.S patch v1), because the fragmentation option of 'patch v1' used the default PMTU discovery strategy(IP_PMTUDISC_DONT, because the code didn't setting PMTU explicitly by setsockopt() when use './udpgso_bench_tx -f' ), which is not much different from the 'patch v2' using IP_PMTUDISC_OMIT.
Or IP_PMTUDISC_WANT unless sysctl_ip_no_pmtu_disc is set. But fair point. Explicitly disabling pmtu is not needed.
USO should generate segments that meet MTU rules. The test forces the DF bit (IP_PMTUDISC_DO).
UFO instead requires local fragmentation, must enter the path for this in ip_output.c. It should fail if IP_PMTUDISC_DO is set:
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF);
We have a question, what is the point of this test if it is not compared to UDP GSO and IP fragmentation. No user or tool will segment in user mode,
Are you saying no process will use UDP_SEGMENT?
No, we are saying "user-space payload splitting", in other words, use ./udpgso_bench_tx without '-f' or '-S'.
I see. I guess you heard the arguments why the test does not compare udp segmentation with udp fragmentation:
- fragmentation is particularly expensive on the receiver side - fragmentation cannot be offloaded, while segmentation can
Sincerely.
The local protocol stack removed UFO in series d9d30adf5677. USO can be offloaded to hardware by quite a few devices (NETIF_F_GSO_UDP_L4).
UDP GSO should compare performance with IP fragmentation.
I think it is misleading to think the cost of IP fragmentation matters
linux-kselftest-mirror@lists.linaro.org