sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple iovs becomes ITER_IOVEC. iter_iov_len does not return correct value for UBUF, so teach to treat UBUF differently.
Cc: Al Viro viro@zeniv.linux.org.uk Cc: Pavel Begunkov asml.silence@gmail.com Cc: Mina Almasry almasrymina@google.com Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Stanislav Fomichev stfomichev@gmail.com --- include/linux/uio.h | 8 +++++++- net/core/datagram.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h index 49ece9e1888f..393d0622cc28 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter) }
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) -#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset) + +static inline size_t iter_iov_len(const struct iov_iter *i) +{ + if (i->iter_type == ITER_UBUF) + return i->count; + return iter_iov(i)->iov_len - i->iov_offset; +}
static inline enum iter_type iov_iter_type(const struct iov_iter *i) { diff --git a/net/core/datagram.c b/net/core/datagram.c index 9ef5442536f5..c44f1d2b70a4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -706,7 +706,8 @@ zerocopy_fill_skb_from_devmem(struct sk_buff *skb, struct iov_iter *from, * iov_addrs are interpreted as an offset in bytes into the dma-buf to * send from. We do not support other iter types. */ - if (iov_iter_type(from) != ITER_IOVEC) + if (iov_iter_type(from) != ITER_IOVEC && + iov_iter_type(from) != ITER_UBUF) return -EFAULT;
while (length && iov_iter_count(from)) {
Add new -z argument to specify max IOV size. By default, use single large IOV.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com --- .../selftests/drivers/net/hw/ncdevmem.c | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index ca723722a810..fc7ba7d71502 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -82,6 +82,9 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif
+#define MAX_IOV 1024 + +static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; @@ -834,10 +837,10 @@ static int do_client(struct memory_buffer *mem) struct sockaddr_in6 server_sin; struct sockaddr_in6 client_sin; struct ynl_sock *ys = NULL; + struct iovec iov[MAX_IOV]; struct msghdr msg = {}; ssize_t line_size = 0; struct cmsghdr *cmsg; - struct iovec iov[2]; char *line = NULL; unsigned long mid; size_t len = 0; @@ -893,27 +896,29 @@ static int do_client(struct memory_buffer *mem) if (line_size < 0) break;
- mid = (line_size / 2) + 1; - - iov[0].iov_base = (void *)1; - iov[0].iov_len = mid; - iov[1].iov_base = (void *)(mid + 2); - iov[1].iov_len = line_size - mid; + if (max_chunk) { + msg.msg_iovlen = + (line_size + max_chunk - 1) / max_chunk; + if (msg.msg_iovlen > MAX_IOV) + error(1, 0, + "can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV);
- provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line, - iov[0].iov_len); - provider->memcpy_to_device(mem, (size_t)iov[1].iov_base, - line + iov[0].iov_len, - iov[1].iov_len); + for (int i = 0; i < msg.msg_iovlen; i++) { + iov[i].iov_base = (void *)(i * max_chunk); + iov[i].iov_len = max_chunk; + }
- fprintf(stderr, - "read line_size=%ld iov[0].iov_base=%lu, iov[0].iov_len=%lu, iov[1].iov_base=%lu, iov[1].iov_len=%lu\n", - line_size, (unsigned long)iov[0].iov_base, - iov[0].iov_len, (unsigned long)iov[1].iov_base, - iov[1].iov_len); + iov[msg.msg_iovlen - 1].iov_len = + line_size - (msg.msg_iovlen - 1) * max_chunk; + } else { + iov[0].iov_base = 0; + iov[0].iov_len = line_size; + msg.msg_iovlen = 1; + }
msg.msg_iov = iov; - msg.msg_iovlen = 2; + provider->memcpy_to_device(mem, 0, line, line_size);
msg.msg_control = ctrl_data; msg.msg_controllen = sizeof(ctrl_data); @@ -934,7 +939,8 @@ static int do_client(struct memory_buffer *mem) fprintf(stderr, "sendmsg_ret=%d\n", ret);
if (ret != line_size) - error(1, errno, "Did not send all bytes"); + error(1, errno, "Did not send all bytes %d vs %zd", ret, + line_size);
wait_compl(socket_fd); } @@ -956,7 +962,7 @@ int main(int argc, char *argv[]) int is_server = 0, opt; int ret;
- while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { case 'l': is_server = 1; @@ -982,6 +988,9 @@ int main(int argc, char *argv[]) case 'f': ifname = optarg; break; + case 'z': + max_chunk = atoi(optarg); + break; case '?': fprintf(stderr, "unknown option: %c\n", optopt); break;
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
Add new -z argument to specify max IOV size. By default, use single large IOV.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
.../selftests/drivers/net/hw/ncdevmem.c | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index ca723722a810..fc7ba7d71502 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -82,6 +82,9 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif
+#define MAX_IOV 1024
+static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; @@ -834,10 +837,10 @@ static int do_client(struct memory_buffer *mem) struct sockaddr_in6 server_sin; struct sockaddr_in6 client_sin; struct ynl_sock *ys = NULL;
struct iovec iov[MAX_IOV]; struct msghdr msg = {}; ssize_t line_size = 0; struct cmsghdr *cmsg;
struct iovec iov[2]; char *line = NULL; unsigned long mid; size_t len = 0;
@@ -893,27 +896,29 @@ static int do_client(struct memory_buffer *mem) if (line_size < 0) break;
mid = (line_size / 2) + 1;
iov[0].iov_base = (void *)1;
iov[0].iov_len = mid;
iov[1].iov_base = (void *)(mid + 2);
iov[1].iov_len = line_size - mid;
if (max_chunk) {
msg.msg_iovlen =
(line_size + max_chunk - 1) / max_chunk;
if (msg.msg_iovlen > MAX_IOV)
error(1, 0,
"can't partition %zd bytes into maximum of %d chunks",
line_size, MAX_IOV);
provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line,
iov[0].iov_len);
provider->memcpy_to_device(mem, (size_t)iov[1].iov_base,
line + iov[0].iov_len,
iov[1].iov_len);
for (int i = 0; i < msg.msg_iovlen; i++) {
iov[i].iov_base = (void *)(i * max_chunk);
iov[i].iov_len = max_chunk;
Isn't the last iov going to be truncated in the case where line_size is not exactly divisible with max_chunk?
}
fprintf(stderr,
"read line_size=%ld iov[0].iov_base=%lu, iov[0].iov_len=%lu, iov[1].iov_base=%lu, iov[1].iov_len=%lu\n",
line_size, (unsigned long)iov[0].iov_base,
iov[0].iov_len, (unsigned long)iov[1].iov_base,
iov[1].iov_len);
iov[msg.msg_iovlen - 1].iov_len =
line_size - (msg.msg_iovlen - 1) * max_chunk;
} else {
iov[0].iov_base = 0;
iov[0].iov_len = line_size;
msg.msg_iovlen = 1;
}
Do you need to special case this? Shouldn't this be the same as max_chunk==1?
msg.msg_iov = iov;
msg.msg_iovlen = 2;
provider->memcpy_to_device(mem, 0, line, line_size); msg.msg_control = ctrl_data; msg.msg_controllen = sizeof(ctrl_data);
@@ -934,7 +939,8 @@ static int do_client(struct memory_buffer *mem) fprintf(stderr, "sendmsg_ret=%d\n", ret);
if (ret != line_size)
error(1, errno, "Did not send all bytes");
error(1, errno, "Did not send all bytes %d vs %zd", ret,
line_size); wait_compl(socket_fd); }
@@ -956,7 +962,7 @@ int main(int argc, char *argv[]) int is_server = 0, opt; int ret;
while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { case 'l': is_server = 1;
@@ -982,6 +988,9 @@ int main(int argc, char *argv[]) case 'f': ifname = optarg; break;
case 'z':
max_chunk = atoi(optarg);
break; case '?': fprintf(stderr, "unknown option: %c\n", optopt); break;
-- 2.49.0
On 05/21, Mina Almasry wrote:
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
Add new -z argument to specify max IOV size. By default, use single large IOV.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
.../selftests/drivers/net/hw/ncdevmem.c | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index ca723722a810..fc7ba7d71502 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -82,6 +82,9 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif
+#define MAX_IOV 1024
+static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; @@ -834,10 +837,10 @@ static int do_client(struct memory_buffer *mem) struct sockaddr_in6 server_sin; struct sockaddr_in6 client_sin; struct ynl_sock *ys = NULL;
struct iovec iov[MAX_IOV]; struct msghdr msg = {}; ssize_t line_size = 0; struct cmsghdr *cmsg;
struct iovec iov[2]; char *line = NULL; unsigned long mid; size_t len = 0;
@@ -893,27 +896,29 @@ static int do_client(struct memory_buffer *mem) if (line_size < 0) break;
mid = (line_size / 2) + 1;
iov[0].iov_base = (void *)1;
iov[0].iov_len = mid;
iov[1].iov_base = (void *)(mid + 2);
iov[1].iov_len = line_size - mid;
if (max_chunk) {
msg.msg_iovlen =
(line_size + max_chunk - 1) / max_chunk;
if (msg.msg_iovlen > MAX_IOV)
error(1, 0,
"can't partition %zd bytes into maximum of %d chunks",
line_size, MAX_IOV);
provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line,
iov[0].iov_len);
provider->memcpy_to_device(mem, (size_t)iov[1].iov_base,
line + iov[0].iov_len,
iov[1].iov_len);
for (int i = 0; i < msg.msg_iovlen; i++) {
iov[i].iov_base = (void *)(i * max_chunk);
iov[i].iov_len = max_chunk;
Isn't the last iov going to be truncated in the case where line_size is not exactly divisible with max_chunk?
I have this for the last iov entry:
iov[msg.msg_iovlen - 1].iov_len = line_size - (msg.msg_iovlen - 1) * max_chunk;
I think that should correctly adjust it to the remaining 1..max_chunk len?
}
fprintf(stderr,
"read line_size=%ld iov[0].iov_base=%lu, iov[0].iov_len=%lu, iov[1].iov_base=%lu, iov[1].iov_len=%lu\n",
line_size, (unsigned long)iov[0].iov_base,
iov[0].iov_len, (unsigned long)iov[1].iov_base,
iov[1].iov_len);
iov[msg.msg_iovlen - 1].iov_len =
line_size - (msg.msg_iovlen - 1) * max_chunk;
} else {
iov[0].iov_base = 0;
iov[0].iov_len = line_size;
msg.msg_iovlen = 1;
}
Do you need to special case this? Shouldn't this be the same as max_chunk==1?
I might need a better name. max_chunk is the max size of the iov entry (max iov_len), not the max number of IOVs. And I use max_chunk==0 as "max size of the iov is not provided -> use line_size", so not sure I can't make it work without a special case?
On Wed, May 21, 2025 at 10:39 AM Stanislav Fomichev stfomichev@gmail.com wrote:
On 05/21, Mina Almasry wrote:
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
Add new -z argument to specify max IOV size. By default, use single large IOV.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
.../selftests/drivers/net/hw/ncdevmem.c | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index ca723722a810..fc7ba7d71502 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -82,6 +82,9 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif
+#define MAX_IOV 1024
+static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; @@ -834,10 +837,10 @@ static int do_client(struct memory_buffer *mem) struct sockaddr_in6 server_sin; struct sockaddr_in6 client_sin; struct ynl_sock *ys = NULL;
struct iovec iov[MAX_IOV]; struct msghdr msg = {}; ssize_t line_size = 0; struct cmsghdr *cmsg;
struct iovec iov[2]; char *line = NULL; unsigned long mid; size_t len = 0;
@@ -893,27 +896,29 @@ static int do_client(struct memory_buffer *mem) if (line_size < 0) break;
mid = (line_size / 2) + 1;
iov[0].iov_base = (void *)1;
iov[0].iov_len = mid;
iov[1].iov_base = (void *)(mid + 2);
iov[1].iov_len = line_size - mid;
if (max_chunk) {
msg.msg_iovlen =
(line_size + max_chunk - 1) / max_chunk;
if (msg.msg_iovlen > MAX_IOV)
error(1, 0,
"can't partition %zd bytes into maximum of %d chunks",
line_size, MAX_IOV);
provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line,
iov[0].iov_len);
provider->memcpy_to_device(mem, (size_t)iov[1].iov_base,
line + iov[0].iov_len,
iov[1].iov_len);
for (int i = 0; i < msg.msg_iovlen; i++) {
iov[i].iov_base = (void *)(i * max_chunk);
iov[i].iov_len = max_chunk;
Isn't the last iov going to be truncated in the case where line_size is not exactly divisible with max_chunk?
I have this for the last iov entry:
iov[msg.msg_iovlen - 1].iov_len = line_size - (msg.msg_iovlen - 1) * max_chunk;
I think that should correctly adjust it to the remaining 1..max_chunk len?
Thanks, I missed that line.
Reviewed-by: Mina Almasry almasrymina@google.com
Use prime 3 for length to make offset slowly drift away.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com --- .../testing/selftests/drivers/net/hw/devmem.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 7fc686cf47a2..d7f6a76eb2b7 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -49,12 +49,27 @@ from lib.py import ksft_disruptive ksft_eq(socat.stdout.strip(), "hello\nworld")
+@ksft_disruptive +def check_tx_chunks(cfg) -> None: + cfg.require_ipver("6") + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + + with bkg(listen_cmd, exit_wait=True) as socat: + wait_port_listen(port) + cmd(f"echo -e "hello\nworld"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + def main() -> None: with NetDrvEpEnv(__file__) as cfg: cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- ksft_run([check_rx, check_tx], + ksft_run([check_rx, check_tx, check_tx_chunks], args=(cfg, )) ksft_exit()
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
Use prime 3 for length to make offset slowly drift away.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
.../testing/selftests/drivers/net/hw/devmem.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 7fc686cf47a2..d7f6a76eb2b7 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -49,12 +49,27 @@ from lib.py import ksft_disruptive ksft_eq(socat.stdout.strip(), "hello\nworld")
+@ksft_disruptive +def check_tx_chunks(cfg) -> None:
- cfg.require_ipver("6")
- require_devmem(cfg)
- port = rand_port()
- listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
- with bkg(listen_cmd, exit_wait=True) as socat:
wait_port_listen(port)
cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
- ksft_eq(socat.stdout.strip(), "hello\nworld")
def main() -> None: with NetDrvEpEnv(__file__) as cfg: cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
ksft_run([check_rx, check_tx],
ksft_exit()ksft_run([check_rx, check_tx, check_tx_chunks], args=(cfg, ))
-- 2.49.0
I was hoping we'd add the test coverage without the need to add test cases to the ksft. I was thinking maybe ncdevmem can do rand() each sendmsg loop and send a different set of chunks, so that we don't need a flag.
But it may be too hacky to have the test be non-deterministic, so up to you
Acked-by: Mina Almasry almasrymina@google.com
On 05/21, Mina Almasry wrote:
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
Use prime 3 for length to make offset slowly drift away.
Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
.../testing/selftests/drivers/net/hw/devmem.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 7fc686cf47a2..d7f6a76eb2b7 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -49,12 +49,27 @@ from lib.py import ksft_disruptive ksft_eq(socat.stdout.strip(), "hello\nworld")
+@ksft_disruptive +def check_tx_chunks(cfg) -> None:
- cfg.require_ipver("6")
- require_devmem(cfg)
- port = rand_port()
- listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
- with bkg(listen_cmd, exit_wait=True) as socat:
wait_port_listen(port)
cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
- ksft_eq(socat.stdout.strip(), "hello\nworld")
def main() -> None: with NetDrvEpEnv(__file__) as cfg: cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
ksft_run([check_rx, check_tx],
ksft_exit()ksft_run([check_rx, check_tx, check_tx_chunks], args=(cfg, ))
-- 2.49.0
I was hoping we'd add the test coverage without the need to add test cases to the ksft. I was thinking maybe ncdevmem can do rand() each sendmsg loop and send a different set of chunks, so that we don't need a flag.
But it may be too hacky to have the test be non-deterministic, so up to you
Acked-by: Mina Almasry almasrymina@google.com
Thanks! Let's spell it out explicitly for now. Doing it at random seems like an option when we start sending more data (after your series that converts the tests to -v).
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple iovs becomes ITER_IOVEC. iter_iov_len does not return correct value for UBUF, so teach to treat UBUF differently.
Cc: Al Viro viro@zeniv.linux.org.uk Cc: Pavel Begunkov asml.silence@gmail.com Cc: Mina Almasry almasrymina@google.com Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
include/linux/uio.h | 8 +++++++- net/core/datagram.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h index 49ece9e1888f..393d0622cc28 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter) }
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) -#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset)
+static inline size_t iter_iov_len(const struct iov_iter *i) +{
if (i->iter_type == ITER_UBUF)
return i->count;
return iter_iov(i)->iov_len - i->iov_offset;
+}
This change looks good to me from devmem perspective, but aren't you potentially breaking all these existing callers to iter_iov_len?
ackc -i iter_iov_len fs/read_write.c 846: iter_iov_len(iter), ppos); 849: iter_iov_len(iter), ppos); 858: if (nr != iter_iov_len(iter))
mm/madvise.c 1808: size_t len_in = iter_iov_len(iter); 1838: iov_iter_advance(iter, iter_iov_len(iter));
io_uring/rw.c 710: len = iter_iov_len(iter);
Or are you confident this change is compatible with these callers for some reason?
Maybe better to handle this locally in zerocopy_fill_skb_from_devmem, and then follow up with a more ambitious change that streamlines how all the iters behave.
On 05/21, Mina Almasry wrote:
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple iovs becomes ITER_IOVEC. iter_iov_len does not return correct value for UBUF, so teach to treat UBUF differently.
Cc: Al Viro viro@zeniv.linux.org.uk Cc: Pavel Begunkov asml.silence@gmail.com Cc: Mina Almasry almasrymina@google.com Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
include/linux/uio.h | 8 +++++++- net/core/datagram.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h index 49ece9e1888f..393d0622cc28 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter) }
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) -#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset)
+static inline size_t iter_iov_len(const struct iov_iter *i) +{
if (i->iter_type == ITER_UBUF)
return i->count;
return iter_iov(i)->iov_len - i->iov_offset;
+}
This change looks good to me from devmem perspective, but aren't you potentially breaking all these existing callers to iter_iov_len?
ackc -i iter_iov_len fs/read_write.c 846: iter_iov_len(iter), ppos); 849: iter_iov_len(iter), ppos); 858: if (nr != iter_iov_len(iter))
mm/madvise.c 1808: size_t len_in = iter_iov_len(iter); 1838: iov_iter_advance(iter, iter_iov_len(iter));
io_uring/rw.c 710: len = iter_iov_len(iter);
Or are you confident this change is compatible with these callers for some reason?
Pavel did go over all callers, see: https://lore.kernel.org/netdev/7f06216e-1e66-433e-a247-2445dac22498@gmail.co...
Maybe better to handle this locally in zerocopy_fill_skb_from_devmem, and then follow up with a more ambitious change that streamlines how all the iters behave.
Yes, I can definitely do that, but it seems a bit strange that the callers need to distinguish between IOVEC and UBUF (which is a 1-entry IOVEC), so having working iter_iov_len seems a bit cleaner.
On Wed, May 21, 2025 at 10:33 AM Stanislav Fomichev stfomichev@gmail.com wrote:
On 05/21, Mina Almasry wrote:
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev stfomichev@gmail.com wrote:
sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple iovs becomes ITER_IOVEC. iter_iov_len does not return correct value for UBUF, so teach to treat UBUF differently.
Cc: Al Viro viro@zeniv.linux.org.uk Cc: Pavel Begunkov asml.silence@gmail.com Cc: Mina Almasry almasrymina@google.com Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Stanislav Fomichev stfomichev@gmail.com
include/linux/uio.h | 8 +++++++- net/core/datagram.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h index 49ece9e1888f..393d0622cc28 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter) }
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) -#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset)
+static inline size_t iter_iov_len(const struct iov_iter *i) +{
if (i->iter_type == ITER_UBUF)
return i->count;
return iter_iov(i)->iov_len - i->iov_offset;
+}
This change looks good to me from devmem perspective, but aren't you potentially breaking all these existing callers to iter_iov_len?
ackc -i iter_iov_len fs/read_write.c 846: iter_iov_len(iter), ppos); 849: iter_iov_len(iter), ppos); 858: if (nr != iter_iov_len(iter))
mm/madvise.c 1808: size_t len_in = iter_iov_len(iter); 1838: iov_iter_advance(iter, iter_iov_len(iter));
io_uring/rw.c 710: len = iter_iov_len(iter);
Or are you confident this change is compatible with these callers for some reason?
Pavel did go over all callers, see: https://lore.kernel.org/netdev/7f06216e-1e66-433e-a247-2445dac22498@gmail.co...
Thanks, I'm not confident enough in my understanding of the other call sites of iter_iov_len to provide a reviewed-by, but this looks fine to me for devmem TCP, so I think acked-by is appropriate.
Acked-by: Mina Almasry almasrymina@google.com
Thanks!
linux-kselftest-mirror@lists.linaro.org