Chengen Du wrote:
Hi Willem,
Thank you for your comments on patch v4. I have made some modifications and would like to discuss some of your suggestions in detail.
Thanks for the revision. It addresses many of my comments.
On Tue, Jun 4, 2024 at 1:50 PM Chengen Du chengen.du@canonical.com wrote:
The issue initially stems from libpcap. The ethertype will be overwritten as the VLAN TPID if the network interface lacks hardware VLAN offloading. In the outbound packet path, if hardware VLAN offloading is unavailable, the VLAN tag is inserted into the payload but then cleared from the sk_buff struct. Consequently, this can lead to a false negative when checking for the presence of a VLAN tag, causing the packet sniffing outcome to lack VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing tool may be unable to parse packets as expected.
The TCI-TPID is missing because the prb_fill_vlan_info() function does not modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the payload and not in the sk_buff struct. The skb_vlan_tag_present() function only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header is stripped, preventing the packet capturing tool from determining the correct TCI-TPID value. Additionally, the protocol in SLL is incorrect, which means the packet capturing tool cannot parse the L3 header correctly.
Link: https://github.com/the-tcpdump-group/libpcap/issues/1105 Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@canonical.c... Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace") Cc: stable@vger.kernel.org Signed-off-by: Chengen Du chengen.du@canonical.com
net/packet/af_packet.c | 64 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea3ebc160e25..53d51ac87ac6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -538,6 +538,52 @@ static void *packet_current_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, rb->head, status); }
+static u16 vlan_get_tci(struct sk_buff *skb) +{
unsigned int vlan_depth = skb->mac_len;
struct vlan_hdr vhdr, *vh;
u8 *skb_head = skb->data;
int skb_len = skb->len;
if (vlan_depth) {
if (WARN_ON(vlan_depth < VLAN_HLEN))
return 0;
vlan_depth -= VLAN_HLEN;
} else {
vlan_depth = ETH_HLEN;
}
skb_push(skb, skb->data - skb_mac_header(skb));
vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr);
if (skb_head != skb->data) {
skb->data = skb_head;
skb->len = skb_len;
}
if (unlikely(!vh))
return 0;
return ntohs(vh->h_vlan_TCI);
+}
As you mentioned, we need the outermost VLAN tag to fit the protocol we referenced in tp_vlan_tpid. In if_vlan.h, __vlan_get_protocol() only provides the protocol and may affect other usage scenarios if we modify it to also provide TCI.
There is a similar function called __vlan_get_tag(), which also aims to extract TCI from the L2 header, but it doesn't check the header size as __vlan_get_protocol() does. To prevent affecting other usage scenarios, I introduced a new function to achieve our purpose.
__vlan_get_protocol exists to parse through possibly multiple VLAG tags to the inner non-VLAN protocol.
Since you only want the outer VLAN tag, you can use skb_header_pointer directly. No need for vlan_depth.
Similar to what __vlan_get_tag does, but with safe access as it is not guaranteed that the data beyond the link layer lies in skb linear.
Additionally, the starting point of skb->data differs in SOCK_RAW and SOCK_DGRAM cases. I accounted for this by using skb_push to ensure that skb->data starts from the L2 header.
Would you recommend modifying __vlan_get_tag() to add the size check logic instead? If so, we would also need to consider how to integrate the size check logic into both __vlan_get_protocol() and __vlan_get_tag().
No, let's not touch those.
+static __be16 vlan_get_protocol_dgram(struct sk_buff *skb) +{
__be16 proto = skb->protocol;
if (unlikely(eth_type_vlan(proto))) {
u8 *skb_head = skb->data;
int skb_len = skb->len;
skb_push(skb, skb->data - skb_mac_header(skb));
proto = __vlan_get_protocol(skb, proto, NULL);
if (skb_head != skb->data) {
skb->data = skb_head;
skb->len = skb_len;
}
}
return proto;
+}
static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { del_timer_sync(&pkc->retire_blk_timer); @@ -1011,6 +1057,10 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else if (unlikely(eth_type_vlan(pkc->skb->protocol))) {
ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb);
ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; ppd->hv1.tp_vlan_tpid = 0;
@@ -2428,6 +2478,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else if (unlikely(eth_type_vlan(skb->protocol))) {
h.h2->tp_vlan_tci = vlan_get_tci(skb);
h.h2->tp_vlan_tpid = ntohs(skb->protocol);
status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; h.h2->tp_vlan_tpid = 0;
@@ -2457,7 +2511,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
vlan_get_protocol_dgram(skb) : skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex;
@@ -3482,7 +3537,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* Original length was stored in sockaddr_ll fields */ origlen = PACKET_SKB_CB(skb)->sa.origlen; sll->sll_family = AF_PACKET;
sll->sll_protocol = skb->protocol;
sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
vlan_get_protocol_dgram(skb) : skb->protocol; } sock_recv_cmsgs(msg, sk, skb);
@@ -3539,6 +3595,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else if (unlikely(eth_type_vlan(skb->protocol))) {
aux.tp_vlan_tci = vlan_get_tci(skb);
aux.tp_vlan_tpid = ntohs(skb->protocol);
aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0;
-- 2.43.0