From: Bobby Eshleman bobbyeshleman@meta.com
Add SO_DEVMEM_AUTORELEASE socket option to allow applications to control token release behavior on a per-socket basis.
The socket option accepts boolean values (0 or 1): - 1 (true): outstanding tokens are automatically released when the socket closes - 0 (false): outstanding tokens are released when the dmabuf is unbound
The option can only be changed when the socket has no outstanding tokens, enforced by checking: 1. The frags xarray is empty (no tokens in autorelease mode) 2. The outstanding_urefs counter is zero (no tokens in manual mode)
This restriction prevents inconsistent token tracking state between acquisition and release calls. If either condition fails, setsockopt returns -EBUSY.
The default state is autorelease off.
Signed-off-by: Bobby Eshleman bobbyeshleman@meta.com --- include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 51 +++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 2 +- tools/include/uapi/asm-generic/socket.h | 2 ++ 4 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 53b5a8c002b1..59302318bb34 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -150,6 +150,8 @@ #define SO_INQ 84 #define SCM_INQ SO_INQ
+#define SO_DEVMEM_AUTORELEASE 85 + #if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index 465645c1d74f..27af476f3cd3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1160,6 +1160,46 @@ sock_devmem_dontneed_autorelease(struct sock *sk, struct dmabuf_token *tokens, return ret; }
+static noinline_for_stack int +sock_devmem_set_autorelease(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + int val; + + if (!sk_is_tcp(sk)) + return -EBADF; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + /* Validate that val is 0 or 1 */ + if (val != 0 && val != 1) + return -EINVAL; + + sockopt_lock_sock(sk); + + /* Can only change autorelease if: + * 1. No tokens in the frags xarray (autorelease mode) + * 2. No outstanding urefs (manual release mode) + */ + if (!xa_empty(&sk->sk_devmem_info.frags)) { + sockopt_release_sock(sk); + return -EBUSY; + } + + if (atomic_read(&sk->sk_devmem_info.outstanding_urefs) > 0) { + sockopt_release_sock(sk); + return -EBUSY; + } + + sk->sk_devmem_info.autorelease = !!val; + + sockopt_release_sock(sk); + return 0; +} + static noinline_for_stack int sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen) { @@ -1351,6 +1391,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_PAGE_POOL case SO_DEVMEM_DONTNEED: return sock_devmem_dontneed(sk, optval, optlen); + + case SO_DEVMEM_AUTORELEASE: + return sock_devmem_set_autorelease(sk, optval, optlen); #endif case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: @@ -2208,6 +2251,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = READ_ONCE(sk->sk_txrehash); break;
+#ifdef CONFIG_PAGE_POOL + case SO_DEVMEM_AUTORELEASE: + if (!sk_is_tcp(sk)) + return -EBADF; + v.val = sk->sk_devmem_info.autorelease; + break; +#endif + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 052875c1b547..8226ba892b36 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -496,7 +496,7 @@ void tcp_init_sock(struct sock *sk) xa_init_flags(&sk->sk_devmem_info.frags, XA_FLAGS_ALLOC1); sk->sk_devmem_info.binding = NULL; atomic_set(&sk->sk_devmem_info.outstanding_urefs, 0); - sk->sk_devmem_info.autorelease = true; + sk->sk_devmem_info.autorelease = false; } EXPORT_IPV6_MOD(tcp_init_sock);
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index f333a0ac4ee4..9710a3d7cc4d 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -147,6 +147,8 @@
#define SO_PASSRIGHTS 83
+#define SO_DEVMEM_AUTORELEASE 85 + #if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))