On 01/06, Song Yoong Siang wrote:
Add Launch Time hw offload request to xdp_hw_metadata. User can configure the delta of launch time to HW RX-time by using "-l" argument. The default delta is 100,000,000 nanosecond.
Signed-off-by: Song Yoong Siang yoong.siang.song@intel.com
tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f7b15d6c6ed..795c1d14e02d 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -13,6 +13,7 @@
- UDP 9091 packets trigger TX reply
- TX HW timestamp is requested and reported back upon completion
- TX checksum is requested
*/
- TX launch time HW offload is requested for transmission
#include <test_progs.h> @@ -64,6 +65,8 @@ int rxq; bool skip_tx; __u64 last_hw_rx_timestamp; __u64 last_xdp_rx_timestamp; +__u64 last_launch_time; +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */ void test__fail(void) { /* for network_helpers.c */ } @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id) if (meta->completion.tx_timestamp) { __u64 ref_tstamp = gettime(clock_id);
print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", meta->completion.tx_timestamp, ref_tstamp); print_tstamp_delta("XDP RX-time", "User TX-complete-time",last_launch_time, meta->completion.tx_timestamp);
@@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) xsk, ntohs(udph->check), ntohs(want_csum), meta->request.csum_start, meta->request.csum_offset);
- /* Set the value of launch time */
- meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
- meta->request.launch_time = last_hw_rx_timestamp +
launch_time_delta_to_hw_rx_timestamp;
- last_launch_time = meta->request.launch_time;
- print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp,
meta->request.launch_time);
- memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ tx_desc->options |= XDP_TX_METADATA; tx_desc->len = len;
@@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) xsk_ring_prod__submit(&xsk->tx, 1); } +#define SLEEP_PER_ITERATION_IN_US 10 +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000) +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500) static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) { const struct xdp_desc *rx_desc; struct pollfd fds[rxq + 1];
- int max_iterations; __u64 comp_addr; __u64 addr; __u32 idx = 0;
@@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t fds[i].revents = 0; }
- /* Calculate max iterations to wait for transmit completion */
- max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
- fds[rxq].fd = server_fd; fds[rxq].events = POLLIN; fds[rxq].revents = 0;
@@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t if (ret) printf("kick_tx ret=%d\n", ret);
[..]
for (int j = 0; j < 500; j++) {
for (int j = 0; j < max_iterations; j++) { if (complete_tx(xsk, clock_id)) break;
usleep(10);
usleep(SLEEP_PER_ITERATION_IN_US);
nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this to the following?
static u64 now(void) { clock_gettime(...); return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; }
/* wait 5 seconds + cover launch time */ deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp; while (true) { if (complete_tx()) break; if (now() >= deadline) break; usleep(10); }
It is a bit more readable than converting time to wait to the iterations..