This is a preliminary patch to introduce IP6IP6 flowtable acceleration.
Signed-off-by: Lorenzo Bianconi lorenzo@kernel.org --- net/netfilter/nf_flow_table_ip.c | 80 ++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 36 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 78883343e5d686014752ec4fe1a28319cbf08845..d28c256d33dc5a8d07490b765747b5c6c48aa67d 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -142,7 +142,18 @@ static bool ip_has_options(unsigned int thoff) return thoff != sizeof(struct iphdr); }
-static void nf_flow_tuple_encap(struct sk_buff *skb, +struct nf_flowtable_ctx { + const struct net_device *in; + u32 offset; + u32 hdrsize; + struct { + u32 offset; + u8 proto; + } tun; +}; + +static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb, struct flow_offload_tuple *tuple) { __be16 inner_proto = skb->protocol; @@ -174,22 +185,15 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, break; }
- if (inner_proto == htons(ETH_P_IP)) { + if (inner_proto == htons(ETH_P_IP) && + ctx->tun.proto == IPPROTO_IPIP) { iph = (struct iphdr *)(skb_network_header(skb) + offset); - if (iph->protocol == IPPROTO_IPIP) { - tuple->tun.dst_v4.s_addr = iph->daddr; - tuple->tun.src_v4.s_addr = iph->saddr; - tuple->tun.l3_proto = IPPROTO_IPIP; - } + tuple->tun.dst_v4.s_addr = iph->daddr; + tuple->tun.src_v4.s_addr = iph->saddr; + tuple->tun.l3_proto = IPPROTO_IPIP; } }
-struct nf_flowtable_ctx { - const struct net_device *in; - u32 offset; - u32 hdrsize; -}; - static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, struct flow_offload_tuple *tuple) { @@ -257,7 +261,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, tuple->l3proto = AF_INET; tuple->l4proto = ipproto; tuple->iifidx = ctx->in->ifindex; - nf_flow_tuple_encap(skb, tuple); + nf_flow_tuple_encap(ctx, skb, tuple);
return 0; } @@ -293,15 +297,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; }
-static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize) +static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb) { struct iphdr *iph; u16 size;
- if (!pskb_may_pull(skb, sizeof(*iph) + *psize)) + if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) return false;
- iph = (struct iphdr *)(skb_network_header(skb) + *psize); + iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); size = iph->ihl << 2;
if (ip_is_fragment(iph) || unlikely(ip_has_options(size))) @@ -310,25 +315,27 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize) if (iph->ttl <= 1) return false;
- if (iph->protocol == IPPROTO_IPIP) - *psize += size; + if (iph->protocol == IPPROTO_IPIP) { + ctx->tun.proto = IPPROTO_IPIP; + ctx->tun.offset = size; + ctx->offset += size; + }
return true; }
-static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb) +static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb) { - struct iphdr *iph = (struct iphdr *)skb_network_header(skb); - - if (iph->protocol != IPPROTO_IPIP) + if (ctx->tun.proto != IPPROTO_IPIP) return;
- skb_pull(skb, iph->ihl << 2); + skb_pull(skb, ctx->tun.offset); skb_reset_network_header(skb); }
-static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, - u32 *offset) +static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb, __be16 proto) { __be16 inner_proto = skb->protocol; struct vlan_ethhdr *veth; @@ -341,7 +348,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { - *offset += VLAN_HLEN; + ctx->offset += VLAN_HLEN; inner_proto = proto; ret = true; } @@ -349,19 +356,20 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, case htons(ETH_P_PPP_SES): if (nf_flow_pppoe_proto(skb, &inner_proto) && inner_proto == proto) { - *offset += PPPOE_SES_HLEN; + ctx->offset += PPPOE_SES_HLEN; ret = true; } break; }
if (inner_proto == htons(ETH_P_IP)) - ret = nf_flow_ip4_tunnel_proto(skb, offset); + ret = nf_flow_ip4_tunnel_proto(ctx, skb);
return ret; }
-static void nf_flow_encap_pop(struct sk_buff *skb, +static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb, struct flow_offload_tuple_rhash *tuplehash) { struct vlan_hdr *vlan_hdr; @@ -388,7 +396,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, }
if (skb->protocol == htons(ETH_P_IP)) - nf_flow_ip4_tunnel_pop(skb); + nf_flow_ip4_tunnel_pop(ctx, skb); }
struct nf_flow_xmit { @@ -414,7 +422,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, { struct flow_offload_tuple tuple = {};
- if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset)) + if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP))) return NULL;
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0) @@ -458,7 +466,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
- nf_flow_encap_pop(skb, tuplehash); + nf_flow_encap_pop(ctx, skb, tuplehash); thoff -= ctx->offset;
iph = ip_hdr(skb); @@ -836,7 +844,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, tuple->l3proto = AF_INET6; tuple->l4proto = nexthdr; tuple->iifidx = ctx->in->ifindex; - nf_flow_tuple_encap(skb, tuple); + nf_flow_tuple_encap(ctx, skb, tuple);
return 0; } @@ -873,7 +881,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
- nf_flow_encap_pop(skb, tuplehash); + nf_flow_encap_pop(ctx, skb, tuplehash);
ip6h = ipv6_hdr(skb); nf_flow_nat_ipv6(flow, skb, dir, ip6h); @@ -895,7 +903,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, struct flow_offload_tuple tuple = {};
if (skb->protocol != htons(ETH_P_IPV6) && - !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset)) + !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6))) return NULL;
if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)