With a raw socket bound to IPPROTO_RAW (ie with hdrincl enabled), the protocol field of the flow structure, build by raw_sendmsg() / rawv6_sendmsg()), is set to IPPROTO_RAW. This breaks the ipsec policy lookup when some policies are defined with a protocol in the selector.
For ipv6, the sin6_port field from 'struct sockaddr_in6' could be used to specify the protocol. Just accept all values for IPPROTO_RAW socket.
For ipv4, the sin_port field of 'struct sockaddr_in' could not be used without breaking backward compatibility (the value of this field was never checked). Let's add a new kind of control message, so that the userland could specify which protocol is used.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") CC: stable@vger.kernel.org Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com ---
The first version has been marked 'Awaiting Upstream'. Steffen confirmed that the 'net' tree should be the target, thus I resend this patch. I also CC stable@vger.kernel.org.
include/net/ip.h | 2 ++ include/uapi/linux/in.h | 1 + net/ipv4/ip_sockglue.c | 15 ++++++++++++++- net/ipv4/raw.c | 5 ++++- net/ipv6/raw.c | 3 ++- 5 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h index c3fffaa92d6e..acec504c469a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -76,6 +76,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options_rcu *opt; + __u8 protocol; __u8 ttl; __s16 tos; char priority; @@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; + ipcm->protocol = inet->inet_num; }
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 4b7f2df66b99..e682ab628dfa 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -163,6 +163,7 @@ struct in_addr { #define IP_MULTICAST_ALL 49 #define IP_UNICAST_IF 50 #define IP_LOCAL_PORT_RANGE 51 +#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b511ff0adc0a..ec0fbe874426 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -317,7 +317,17 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->tos = val; ipc->priority = rt_tos2priority(ipc->tos); break; - + case IP_PROTOCOL: + if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) + val = *(int *)CMSG_DATA(cmsg); + else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) + val = *(u8 *)CMSG_DATA(cmsg); + else + return -EINVAL; + if (val < 1 || val > 255) + return -EINVAL; + ipc->protocol = val; + break; default: return -EINVAL; } @@ -1761,6 +1771,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_LOCAL_PORT_RANGE: val = inet->local_port_range.hi << 16 | inet->local_port_range.lo; break; + case IP_PROTOCOL: + val = inet_sk(sk)->inet_num; + break; default: sockopt_release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff712bf2a98d..eadf1c9ef7e4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) }
ipcm_init_sk(&ipc, inet); + /* Keep backward compat */ + if (hdrincl) + ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); @@ -599,7 +602,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, - hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7d0adb612bdd..44ee7a2e72ac 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto) proto = inet->inet_num; - else if (proto != inet->inet_num) + else if (proto != inet->inet_num && + inet->inet_num != IPPROTO_RAW) return -EINVAL;
if (proto > 255)
On Tue, 2023-05-16 at 22:15 +0200, Nicolas Dichtel wrote:
With a raw socket bound to IPPROTO_RAW (ie with hdrincl enabled), the protocol field of the flow structure, build by raw_sendmsg() / rawv6_sendmsg()), is set to IPPROTO_RAW. This breaks the ipsec policy lookup when some policies are defined with a protocol in the selector.
For ipv6, the sin6_port field from 'struct sockaddr_in6' could be used to specify the protocol. Just accept all values for IPPROTO_RAW socket.
For ipv4, the sin_port field of 'struct sockaddr_in' could not be used without breaking backward compatibility (the value of this field was never checked). Let's add a new kind of control message, so that the userland could specify which protocol is used.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") CC: stable@vger.kernel.org Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com
The first version has been marked 'Awaiting Upstream'. Steffen confirmed that the 'net' tree should be the target, thus I resend this patch. I also CC stable@vger.kernel.org.
include/net/ip.h | 2 ++ include/uapi/linux/in.h | 1 + net/ipv4/ip_sockglue.c | 15 ++++++++++++++- net/ipv4/raw.c | 5 ++++- net/ipv6/raw.c | 3 ++- 5 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h index c3fffaa92d6e..acec504c469a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -76,6 +76,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options_rcu *opt;
- __u8 protocol; __u8 ttl; __s16 tos; char priority;
@@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr;
- ipcm->protocol = inet->inet_num;
} #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 4b7f2df66b99..e682ab628dfa 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -163,6 +163,7 @@ struct in_addr { #define IP_MULTICAST_ALL 49 #define IP_UNICAST_IF 50 #define IP_LOCAL_PORT_RANGE 51 +#define IP_PROTOCOL 52 #define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b511ff0adc0a..ec0fbe874426 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -317,7 +317,17 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->tos = val; ipc->priority = rt_tos2priority(ipc->tos); break;
case IP_PROTOCOL:
if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
val = *(int *)CMSG_DATA(cmsg);
else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
val = *(u8 *)CMSG_DATA(cmsg);
AFAICS the 'dual' u8 support for IP_TOS has been introduce to cope with asymmetry WRT recvmsg(). Here we don't have (yet) the recvmsg counter- part, and if/when that will be added we can use the correct data type.
I think we are better off supporting only int, as e.g. IP_TTL does.
Side note, the above code could be factored out in an helper to be used both for IP_PROTOCOL and IP_TTL (possibly in a net-next patch).
Thanks!
Paolo
linux-stable-mirror@lists.linaro.org