The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong(a)codeaurora.org>
Date: Tue, 11 May 2021 20:02:51 +0200
Subject: [PATCH] mac80211: extend protection against mixed key and fragment
cache attacks
For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is
done by the hardware, and the Protected bit in the Frame Control field
is cleared in the lower level driver before the frame is passed to
mac80211. In such cases, the condition for ieee80211_has_protected() is
not met in ieee80211_rx_h_defragment() of mac80211 and the new security
validation steps are not executed.
Extend mac80211 to cover the case where the Protected bit has been
cleared, but the frame is indicated as having been decrypted by the
hardware. This extends protection against mixed key and fragment cache
attack for additional drivers/chips. This fixes CVE-2020-24586 and
CVE-2020-24587 for such cases.
Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1
Cc: stable(a)vger.kernel.org
Signed-off-by: Wen Gong <wgong(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 22a925899a9e..1bb43edd47b6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
unsigned int frag, seq;
struct ieee80211_fragment_entry *entry;
struct sk_buff *skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
hdr = (struct ieee80211_hdr *)rx->skb->data;
fc = hdr->frame_control;
@@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
- } else if (rx->key && ieee80211_has_protected(fc)) {
+ } else if (rx->key &&
+ (ieee80211_has_protected(fc) ||
+ (status->flag & RX_FLAG_DECRYPTED))) {
entry->is_protected = true;
entry->key_color = rx->key->color;
}
@@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
} else if (entry->is_protected &&
- (!rx->key || !ieee80211_has_protected(fc) ||
+ (!rx->key ||
+ (!ieee80211_has_protected(fc) &&
+ !(status->flag & RX_FLAG_DECRYPTED)) ||
rx->key->color != entry->key_color)) {
/* Drop this as a mixed key or fragment cache attack, even
* if for TKIP Michael MIC should protect us, and WEP is a
* lost cause anyway.
*/
return RX_DROP_UNUSABLE;
+ } else if (entry->is_protected && rx->key &&
+ entry->key_color != rx->key->color &&
+ (status->flag & RX_FLAG_DECRYPTED)) {
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong(a)codeaurora.org>
Date: Tue, 11 May 2021 20:02:51 +0200
Subject: [PATCH] mac80211: extend protection against mixed key and fragment
cache attacks
For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is
done by the hardware, and the Protected bit in the Frame Control field
is cleared in the lower level driver before the frame is passed to
mac80211. In such cases, the condition for ieee80211_has_protected() is
not met in ieee80211_rx_h_defragment() of mac80211 and the new security
validation steps are not executed.
Extend mac80211 to cover the case where the Protected bit has been
cleared, but the frame is indicated as having been decrypted by the
hardware. This extends protection against mixed key and fragment cache
attack for additional drivers/chips. This fixes CVE-2020-24586 and
CVE-2020-24587 for such cases.
Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1
Cc: stable(a)vger.kernel.org
Signed-off-by: Wen Gong <wgong(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 22a925899a9e..1bb43edd47b6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
unsigned int frag, seq;
struct ieee80211_fragment_entry *entry;
struct sk_buff *skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
hdr = (struct ieee80211_hdr *)rx->skb->data;
fc = hdr->frame_control;
@@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
- } else if (rx->key && ieee80211_has_protected(fc)) {
+ } else if (rx->key &&
+ (ieee80211_has_protected(fc) ||
+ (status->flag & RX_FLAG_DECRYPTED))) {
entry->is_protected = true;
entry->key_color = rx->key->color;
}
@@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
} else if (entry->is_protected &&
- (!rx->key || !ieee80211_has_protected(fc) ||
+ (!rx->key ||
+ (!ieee80211_has_protected(fc) &&
+ !(status->flag & RX_FLAG_DECRYPTED)) ||
rx->key->color != entry->key_color)) {
/* Drop this as a mixed key or fragment cache attack, even
* if for TKIP Michael MIC should protect us, and WEP is a
* lost cause anyway.
*/
return RX_DROP_UNUSABLE;
+ } else if (entry->is_protected && rx->key &&
+ entry->key_color != rx->key->color &&
+ (status->flag & RX_FLAG_DECRYPTED)) {
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong(a)codeaurora.org>
Date: Tue, 11 May 2021 20:02:51 +0200
Subject: [PATCH] mac80211: extend protection against mixed key and fragment
cache attacks
For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is
done by the hardware, and the Protected bit in the Frame Control field
is cleared in the lower level driver before the frame is passed to
mac80211. In such cases, the condition for ieee80211_has_protected() is
not met in ieee80211_rx_h_defragment() of mac80211 and the new security
validation steps are not executed.
Extend mac80211 to cover the case where the Protected bit has been
cleared, but the frame is indicated as having been decrypted by the
hardware. This extends protection against mixed key and fragment cache
attack for additional drivers/chips. This fixes CVE-2020-24586 and
CVE-2020-24587 for such cases.
Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1
Cc: stable(a)vger.kernel.org
Signed-off-by: Wen Gong <wgong(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 22a925899a9e..1bb43edd47b6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
unsigned int frag, seq;
struct ieee80211_fragment_entry *entry;
struct sk_buff *skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
hdr = (struct ieee80211_hdr *)rx->skb->data;
fc = hdr->frame_control;
@@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
- } else if (rx->key && ieee80211_has_protected(fc)) {
+ } else if (rx->key &&
+ (ieee80211_has_protected(fc) ||
+ (status->flag & RX_FLAG_DECRYPTED))) {
entry->is_protected = true;
entry->key_color = rx->key->color;
}
@@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
} else if (entry->is_protected &&
- (!rx->key || !ieee80211_has_protected(fc) ||
+ (!rx->key ||
+ (!ieee80211_has_protected(fc) &&
+ !(status->flag & RX_FLAG_DECRYPTED)) ||
rx->key->color != entry->key_color)) {
/* Drop this as a mixed key or fragment cache attack, even
* if for TKIP Michael MIC should protect us, and WEP is a
* lost cause anyway.
*/
return RX_DROP_UNUSABLE;
+ } else if (entry->is_protected && rx->key &&
+ entry->key_color != rx->key->color &&
+ (status->flag & RX_FLAG_DECRYPTED)) {
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:50 +0200
Subject: [PATCH] mac80211: do not accept/forward invalid EAPOL frames
EAPOL frames are used for authentication and key management between the
AP and each individual STA associated in the BSS. Those frames are not
supposed to be sent by one associated STA to another associated STA
(either unicast for broadcast/multicast).
Similarly, in 802.11 they're supposed to be sent to the authenticator
(AP) address.
Since it is possible for unexpected EAPOL frames to result in misbehavior
in supplicant implementations, it is better for the AP to not allow such
cases to be forwarded to other clients either directly, or indirectly if
the AP interface is part of a bridge.
Accept EAPOL (control port) frames only if they're transmitted to the
own address, or, due to interoperability concerns, to the PAE group
address.
Disable forwarding of EAPOL (or well, the configured control port
protocol) frames back to wireless medium in all cases. Previously, these
frames were accepted from fully authenticated and authorized stations
and also from unauthenticated stations for one of the cases.
Additionally, to avoid forwarding by the bridge, rewrite the PAE group
address case to the local MAC address.
Cc: stable(a)vger.kernel.org
Co-developed-by: Jouni Malinen <jouni(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4454ec47283f..22a925899a9e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
/*
- * Allow EAPOL frames to us/the PAE group address regardless
- * of whether the frame was encrypted or not.
+ * Allow EAPOL frames to us/the PAE group address regardless of
+ * whether the frame was encrypted or not, and always disallow
+ * all other destination addresses for them.
*/
- if (ehdr->h_proto == rx->sdata->control_port_protocol &&
- (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
- ether_addr_equal(ehdr->h_dest, pae_group_addr)))
- return true;
+ if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+ return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+ ether_addr_equal(ehdr->h_dest, pae_group_addr);
if (ieee80211_802_1x_port_control(rx) ||
ieee80211_drop_unencrypted(rx, fc))
@@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
cfg80211_rx_control_port(dev, skb, noencrypt);
dev_kfree_skb(skb);
} else {
+ struct ethhdr *ehdr = (void *)skb_mac_header(skb);
+
memset(skb->cb, 0, sizeof(skb->cb));
+ /*
+ * 802.1X over 802.11 requires that the authenticator address
+ * be used for EAPOL frames. However, 802.1X allows the use of
+ * the PAE group address instead. If the interface is part of
+ * a bridge and we pass the frame with the PAE group address,
+ * then the bridge will forward it to the network (even if the
+ * client was not associated yet), which isn't supposed to
+ * happen.
+ * To avoid that, rewrite the destination address to our own
+ * address, so that the authenticator (e.g. hostapd) will see
+ * the frame, but bridge won't forward it anywhere else. Note
+ * that due to earlier filtering, the only other address can
+ * be the PAE group address.
+ */
+ if (unlikely(skb->protocol == sdata->control_port_protocol &&
+ !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+ ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
/* deliver to local stack */
if (rx->list)
list_add_tail(&skb->list, rx->list);
@@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+ ehdr->h_proto != rx->sdata->control_port_protocol &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest) &&
ieee80211_vif_get_num_mcast_if(sdata) != 0) {
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:50 +0200
Subject: [PATCH] mac80211: do not accept/forward invalid EAPOL frames
EAPOL frames are used for authentication and key management between the
AP and each individual STA associated in the BSS. Those frames are not
supposed to be sent by one associated STA to another associated STA
(either unicast for broadcast/multicast).
Similarly, in 802.11 they're supposed to be sent to the authenticator
(AP) address.
Since it is possible for unexpected EAPOL frames to result in misbehavior
in supplicant implementations, it is better for the AP to not allow such
cases to be forwarded to other clients either directly, or indirectly if
the AP interface is part of a bridge.
Accept EAPOL (control port) frames only if they're transmitted to the
own address, or, due to interoperability concerns, to the PAE group
address.
Disable forwarding of EAPOL (or well, the configured control port
protocol) frames back to wireless medium in all cases. Previously, these
frames were accepted from fully authenticated and authorized stations
and also from unauthenticated stations for one of the cases.
Additionally, to avoid forwarding by the bridge, rewrite the PAE group
address case to the local MAC address.
Cc: stable(a)vger.kernel.org
Co-developed-by: Jouni Malinen <jouni(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4454ec47283f..22a925899a9e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
/*
- * Allow EAPOL frames to us/the PAE group address regardless
- * of whether the frame was encrypted or not.
+ * Allow EAPOL frames to us/the PAE group address regardless of
+ * whether the frame was encrypted or not, and always disallow
+ * all other destination addresses for them.
*/
- if (ehdr->h_proto == rx->sdata->control_port_protocol &&
- (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
- ether_addr_equal(ehdr->h_dest, pae_group_addr)))
- return true;
+ if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+ return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+ ether_addr_equal(ehdr->h_dest, pae_group_addr);
if (ieee80211_802_1x_port_control(rx) ||
ieee80211_drop_unencrypted(rx, fc))
@@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
cfg80211_rx_control_port(dev, skb, noencrypt);
dev_kfree_skb(skb);
} else {
+ struct ethhdr *ehdr = (void *)skb_mac_header(skb);
+
memset(skb->cb, 0, sizeof(skb->cb));
+ /*
+ * 802.1X over 802.11 requires that the authenticator address
+ * be used for EAPOL frames. However, 802.1X allows the use of
+ * the PAE group address instead. If the interface is part of
+ * a bridge and we pass the frame with the PAE group address,
+ * then the bridge will forward it to the network (even if the
+ * client was not associated yet), which isn't supposed to
+ * happen.
+ * To avoid that, rewrite the destination address to our own
+ * address, so that the authenticator (e.g. hostapd) will see
+ * the frame, but bridge won't forward it anywhere else. Note
+ * that due to earlier filtering, the only other address can
+ * be the PAE group address.
+ */
+ if (unlikely(skb->protocol == sdata->control_port_protocol &&
+ !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+ ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
/* deliver to local stack */
if (rx->list)
list_add_tail(&skb->list, rx->list);
@@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+ ehdr->h_proto != rx->sdata->control_port_protocol &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest) &&
ieee80211_vif_get_num_mcast_if(sdata) != 0) {
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:50 +0200
Subject: [PATCH] mac80211: do not accept/forward invalid EAPOL frames
EAPOL frames are used for authentication and key management between the
AP and each individual STA associated in the BSS. Those frames are not
supposed to be sent by one associated STA to another associated STA
(either unicast for broadcast/multicast).
Similarly, in 802.11 they're supposed to be sent to the authenticator
(AP) address.
Since it is possible for unexpected EAPOL frames to result in misbehavior
in supplicant implementations, it is better for the AP to not allow such
cases to be forwarded to other clients either directly, or indirectly if
the AP interface is part of a bridge.
Accept EAPOL (control port) frames only if they're transmitted to the
own address, or, due to interoperability concerns, to the PAE group
address.
Disable forwarding of EAPOL (or well, the configured control port
protocol) frames back to wireless medium in all cases. Previously, these
frames were accepted from fully authenticated and authorized stations
and also from unauthenticated stations for one of the cases.
Additionally, to avoid forwarding by the bridge, rewrite the PAE group
address case to the local MAC address.
Cc: stable(a)vger.kernel.org
Co-developed-by: Jouni Malinen <jouni(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4454ec47283f..22a925899a9e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
/*
- * Allow EAPOL frames to us/the PAE group address regardless
- * of whether the frame was encrypted or not.
+ * Allow EAPOL frames to us/the PAE group address regardless of
+ * whether the frame was encrypted or not, and always disallow
+ * all other destination addresses for them.
*/
- if (ehdr->h_proto == rx->sdata->control_port_protocol &&
- (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
- ether_addr_equal(ehdr->h_dest, pae_group_addr)))
- return true;
+ if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+ return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+ ether_addr_equal(ehdr->h_dest, pae_group_addr);
if (ieee80211_802_1x_port_control(rx) ||
ieee80211_drop_unencrypted(rx, fc))
@@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
cfg80211_rx_control_port(dev, skb, noencrypt);
dev_kfree_skb(skb);
} else {
+ struct ethhdr *ehdr = (void *)skb_mac_header(skb);
+
memset(skb->cb, 0, sizeof(skb->cb));
+ /*
+ * 802.1X over 802.11 requires that the authenticator address
+ * be used for EAPOL frames. However, 802.1X allows the use of
+ * the PAE group address instead. If the interface is part of
+ * a bridge and we pass the frame with the PAE group address,
+ * then the bridge will forward it to the network (even if the
+ * client was not associated yet), which isn't supposed to
+ * happen.
+ * To avoid that, rewrite the destination address to our own
+ * address, so that the authenticator (e.g. hostapd) will see
+ * the frame, but bridge won't forward it anywhere else. Note
+ * that due to earlier filtering, the only other address can
+ * be the PAE group address.
+ */
+ if (unlikely(skb->protocol == sdata->control_port_protocol &&
+ !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+ ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
/* deliver to local stack */
if (rx->list)
list_add_tail(&skb->list, rx->list);
@@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+ ehdr->h_proto != rx->sdata->control_port_protocol &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest) &&
ieee80211_vif_get_num_mcast_if(sdata) != 0) {
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:49 +0200
Subject: [PATCH] mac80211: prevent attacks on TKIP/WEP as well
Similar to the issues fixed in previous patches, TKIP and WEP
should be protected even if for TKIP we have the Michael MIC
protecting it, and WEP is broken anyway.
However, this also somewhat protects potential other algorithms
that drivers might implement.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b619c47e1d12..4454ec47283f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
* next fragment has a sequential PN value.
*/
entry->check_sequential_pn = true;
+ entry->is_protected = true;
entry->key_color = rx->key->color;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
@@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
+ } else if (rx->key && ieee80211_has_protected(fc)) {
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
}
return RX_QUEUED;
}
@@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+ } else if (entry->is_protected &&
+ (!rx->key || !ieee80211_has_protected(fc) ||
+ rx->key->color != entry->key_color)) {
+ /* Drop this as a mixed key or fragment cache attack, even
+ * if for TKIP Michael MIC should protect us, and WEP is a
+ * lost cause anyway.
+ */
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c56d29a619e..0333072ebd98 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -455,7 +455,8 @@ struct ieee80211_fragment_entry {
u16 extra_len;
u16 last_frag;
u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
+ u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+ is_protected:1;
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
unsigned int key_color;
};
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:49 +0200
Subject: [PATCH] mac80211: prevent attacks on TKIP/WEP as well
Similar to the issues fixed in previous patches, TKIP and WEP
should be protected even if for TKIP we have the Michael MIC
protecting it, and WEP is broken anyway.
However, this also somewhat protects potential other algorithms
that drivers might implement.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b619c47e1d12..4454ec47283f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
* next fragment has a sequential PN value.
*/
entry->check_sequential_pn = true;
+ entry->is_protected = true;
entry->key_color = rx->key->color;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
@@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
+ } else if (rx->key && ieee80211_has_protected(fc)) {
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
}
return RX_QUEUED;
}
@@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+ } else if (entry->is_protected &&
+ (!rx->key || !ieee80211_has_protected(fc) ||
+ rx->key->color != entry->key_color)) {
+ /* Drop this as a mixed key or fragment cache attack, even
+ * if for TKIP Michael MIC should protect us, and WEP is a
+ * lost cause anyway.
+ */
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c56d29a619e..0333072ebd98 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -455,7 +455,8 @@ struct ieee80211_fragment_entry {
u16 extra_len;
u16 last_frag;
u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
+ u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+ is_protected:1;
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
unsigned int key_color;
};
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:49 +0200
Subject: [PATCH] mac80211: prevent attacks on TKIP/WEP as well
Similar to the issues fixed in previous patches, TKIP and WEP
should be protected even if for TKIP we have the Michael MIC
protecting it, and WEP is broken anyway.
However, this also somewhat protects potential other algorithms
that drivers might implement.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b619c47e1d12..4454ec47283f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
* next fragment has a sequential PN value.
*/
entry->check_sequential_pn = true;
+ entry->is_protected = true;
entry->key_color = rx->key->color;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
@@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
+ } else if (rx->key && ieee80211_has_protected(fc)) {
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
}
return RX_QUEUED;
}
@@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+ } else if (entry->is_protected &&
+ (!rx->key || !ieee80211_has_protected(fc) ||
+ rx->key->color != entry->key_color)) {
+ /* Drop this as a mixed key or fragment cache attack, even
+ * if for TKIP Michael MIC should protect us, and WEP is a
+ * lost cause anyway.
+ */
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c56d29a619e..0333072ebd98 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -455,7 +455,8 @@ struct ieee80211_fragment_entry {
u16 extra_len;
u16 last_frag;
u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
+ u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+ is_protected:1;
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
unsigned int key_color;
};
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From bf30ca922a0c0176007e074b0acc77ed345e9990 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:48 +0200
Subject: [PATCH] mac80211: check defrag PN against current frame
As pointed out by Mathy Vanhoef, we implement the RX PN check
on fragmented frames incorrectly - we check against the last
received PN prior to the new frame, rather than to the one in
this frame itself.
Prior patches addressed the security issue here, but in order
to be able to reason better about the code, fix it to really
compare against the current frame's PN, not the last stored
one.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.bfbc340ff071.Id0b690e581da7d03d76d…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4c714375bad0..214404a558fb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -223,8 +223,15 @@ struct ieee80211_rx_data {
*/
int security_idx;
- u32 tkip_iv32;
- u16 tkip_iv16;
+ union {
+ struct {
+ u32 iv32;
+ u16 iv16;
+ } tkip;
+ struct {
+ u8 pn[IEEE80211_CCMP_PN_LEN];
+ } ccm_gcm;
+ };
};
struct ieee80211_csa_settings {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7212a1bebd0c..b619c47e1d12 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2308,7 +2308,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
- int queue;
if (!requires_sequential_pn(rx, fc))
return RX_DROP_UNUSABLE;
@@ -2323,8 +2322,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (pn[i])
break;
}
- queue = rx->security_idx;
- rpn = rx->key->u.ccmp.rx_pn[queue];
+
+ rpn = rx->ccm_gcm.pn;
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 91bf32af55e9..bca47fad5a16 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -3,6 +3,7 @@
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2008, Jouni Malinen <j(a)w1.fi>
* Copyright (C) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2020-2021 Intel Corporation
*/
#include <linux/netdevice.h>
@@ -167,8 +168,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
update_iv:
/* update IV in key information to be able to detect replays */
- rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32;
- rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16;
+ rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32;
+ rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16;
return RX_CONTINUE;
@@ -294,8 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
key, skb->data + hdrlen,
skb->len - hdrlen, rx->sta->sta.addr,
hdr->addr1, hwaccel, rx->security_idx,
- &rx->tkip_iv32,
- &rx->tkip_iv16);
+ &rx->tkip.iv32,
+ &rx->tkip.iv16);
if (res != TKIP_DECRYPT_OK)
return RX_DROP_UNUSABLE;
@@ -553,6 +554,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
}
memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
+ if (unlikely(ieee80211_is_frag(hdr)))
+ memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
}
/* Remove CCMP header and MIC */
@@ -781,6 +784,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
}
memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+ if (unlikely(ieee80211_is_frag(hdr)))
+ memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
}
/* Remove GCMP header and MIC */
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:47 +0200
Subject: [PATCH] mac80211: add fragment cache to sta_info
Prior patches protected against fragmentation cache attacks
by coloring keys, but this shows that it can lead to issues
when multiple stations use the same sequence number. Add a
fragment cache to struct sta_info (in addition to the one in
the interface) to separate fragments for different stations
properly.
This then automatically clear most of the fragment cache when a
station disconnects (or reassociates) from an AP, or when client
interfaces disconnect from the network, etc.
On the way, also fix the comment there since this brings us in line
with the recommendation in 802.11-2016 ("An AP should support ...").
Additionally, remove a useless condition (since there's no problem
purging an already empty list).
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f660…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 874ffe7819e5..4c714375bad0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
#define IEEE80211_ENCRYPT_HEADROOM 8
#define IEEE80211_ENCRYPT_TAILROOM 18
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
@@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
-struct ieee80211_fragment_entry {
- struct sk_buff_head skb_list;
- unsigned long first_frag_time;
- u16 seq;
- u16 extra_len;
- u16 last_frag;
- u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
- u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
- unsigned int key_color;
-};
-
-
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -903,9 +884,7 @@ struct ieee80211_sub_if_data {
char name[IFNAMSIZ];
- /* Fragment table for host-based reassembly */
- struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
- unsigned int fragment_next;
+ struct ieee80211_fragment_cache frags;
/* TID bitmap for NoAck policy */
u16 noack_map;
@@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
#define debug_noinline
#endif
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7032a2b59249..2e2f73a4aa73 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
*/
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
- int i;
-
/* free extra data */
ieee80211_free_keys(sdata, false);
ieee80211_debugfs_remove_netdev(sdata);
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- __skb_queue_purge(&sdata->fragments[i].skb_list);
- sdata->fragment_next = 0;
+ ieee80211_destroy_frag_cache(&sdata->frags);
if (ieee80211_vif_is_mesh(&sdata->vif))
ieee80211_mesh_teardown_sdata(sdata);
@@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata->wdev.wiphy = local->hw.wiphy;
sdata->local = local;
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- skb_queue_head_init(&sdata->fragments[i].skb_list);
+ ieee80211_init_frag_cache(&sdata->frags);
INIT_LIST_HEAD(&sdata->key_list);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a72d48ad6e0..7212a1bebd0c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
return result;
}
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ __skb_queue_purge(&cache->entries[i].skb_list);
+}
+
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq, int rx_queue,
struct sk_buff **skb)
{
struct ieee80211_fragment_entry *entry;
- entry = &sdata->fragments[sdata->fragment_next++];
- if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
- sdata->fragment_next = 0;
+ entry = &cache->entries[cache->next++];
+ if (cache->next >= IEEE80211_FRAGMENT_MAX)
+ cache->next = 0;
- if (!skb_queue_empty(&entry->skb_list))
- __skb_queue_purge(&entry->skb_list);
+ __skb_queue_purge(&entry->skb_list);
__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
*skb = NULL;
@@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
}
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq,
int rx_queue, struct ieee80211_hdr *hdr)
{
struct ieee80211_fragment_entry *entry;
int i, idx;
- idx = sdata->fragment_next;
+ idx = cache->next;
for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
struct ieee80211_hdr *f_hdr;
struct sk_buff *f_skb;
@@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
if (idx < 0)
idx = IEEE80211_FRAGMENT_MAX - 1;
- entry = &sdata->fragments[idx];
+ entry = &cache->entries[idx];
if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
entry->rx_queue != rx_queue ||
entry->last_frag + 1 != frag)
@@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
{
+ struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
struct ieee80211_hdr *hdr;
u16 sc;
__le16 fc;
@@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
goto out_no_led;
}
+ if (rx->sta)
+ cache = &rx->sta->frags;
+
if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;
@@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (frag == 0) {
/* This is the first fragment of a new frame. */
- entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_add(cache, frag, seq,
rx->seqno_idx, &(rx->skb));
if (requires_sequential_pn(rx, fc)) {
int queue = rx->security_idx;
@@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
/* This is a fragment for a frame that should already be pending in
* fragment cache. Add this fragment to the end of the pending entry.
*/
- entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_find(cache, frag, seq,
rx->seqno_idx, hdr);
if (!entry) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ec6973ee88ef..f2fb69da9b6e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc(a)suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/module.h>
@@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
u64_stats_init(&sta->rx_stats.syncp);
+ ieee80211_init_frag_cache(&sta->frags);
+
sta->sta_state = IEEE80211_STA_NONE;
/* Mark TID as unreserved */
@@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
ieee80211_sta_debugfs_remove(sta);
+ ieee80211_destroy_frag_cache(&sta->frags);
+
cleanup_single_sta(sta);
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b9d0c7cc58..5c56d29a619e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats {
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
+/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+ struct sk_buff_head skb_list;
+ unsigned long first_frag_time;
+ u16 seq;
+ u16 extra_len;
+ u16 last_frag;
+ u8 rx_queue;
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
+ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+ unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+ struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX];
+ unsigned int next;
+};
+
/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
@@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats {
* @status_stats.last_ack_signal: last ACK signal
* @status_stats.ack_signal_filled: last ACK signal validity
* @status_stats.avg_ack_signal: average ACK signal
+ * @frags: fragment cache
*/
struct sta_info {
/* General information, mostly static */
@@ -639,6 +667,8 @@ struct sta_info {
struct cfg80211_chan_def tdls_chandef;
+ struct ieee80211_fragment_cache frags;
+
/* keep last! */
struct ieee80211_sta sta;
};
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:47 +0200
Subject: [PATCH] mac80211: add fragment cache to sta_info
Prior patches protected against fragmentation cache attacks
by coloring keys, but this shows that it can lead to issues
when multiple stations use the same sequence number. Add a
fragment cache to struct sta_info (in addition to the one in
the interface) to separate fragments for different stations
properly.
This then automatically clear most of the fragment cache when a
station disconnects (or reassociates) from an AP, or when client
interfaces disconnect from the network, etc.
On the way, also fix the comment there since this brings us in line
with the recommendation in 802.11-2016 ("An AP should support ...").
Additionally, remove a useless condition (since there's no problem
purging an already empty list).
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f660…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 874ffe7819e5..4c714375bad0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
#define IEEE80211_ENCRYPT_HEADROOM 8
#define IEEE80211_ENCRYPT_TAILROOM 18
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
@@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
-struct ieee80211_fragment_entry {
- struct sk_buff_head skb_list;
- unsigned long first_frag_time;
- u16 seq;
- u16 extra_len;
- u16 last_frag;
- u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
- u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
- unsigned int key_color;
-};
-
-
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -903,9 +884,7 @@ struct ieee80211_sub_if_data {
char name[IFNAMSIZ];
- /* Fragment table for host-based reassembly */
- struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
- unsigned int fragment_next;
+ struct ieee80211_fragment_cache frags;
/* TID bitmap for NoAck policy */
u16 noack_map;
@@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
#define debug_noinline
#endif
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7032a2b59249..2e2f73a4aa73 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
*/
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
- int i;
-
/* free extra data */
ieee80211_free_keys(sdata, false);
ieee80211_debugfs_remove_netdev(sdata);
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- __skb_queue_purge(&sdata->fragments[i].skb_list);
- sdata->fragment_next = 0;
+ ieee80211_destroy_frag_cache(&sdata->frags);
if (ieee80211_vif_is_mesh(&sdata->vif))
ieee80211_mesh_teardown_sdata(sdata);
@@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata->wdev.wiphy = local->hw.wiphy;
sdata->local = local;
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- skb_queue_head_init(&sdata->fragments[i].skb_list);
+ ieee80211_init_frag_cache(&sdata->frags);
INIT_LIST_HEAD(&sdata->key_list);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a72d48ad6e0..7212a1bebd0c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
return result;
}
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ __skb_queue_purge(&cache->entries[i].skb_list);
+}
+
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq, int rx_queue,
struct sk_buff **skb)
{
struct ieee80211_fragment_entry *entry;
- entry = &sdata->fragments[sdata->fragment_next++];
- if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
- sdata->fragment_next = 0;
+ entry = &cache->entries[cache->next++];
+ if (cache->next >= IEEE80211_FRAGMENT_MAX)
+ cache->next = 0;
- if (!skb_queue_empty(&entry->skb_list))
- __skb_queue_purge(&entry->skb_list);
+ __skb_queue_purge(&entry->skb_list);
__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
*skb = NULL;
@@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
}
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq,
int rx_queue, struct ieee80211_hdr *hdr)
{
struct ieee80211_fragment_entry *entry;
int i, idx;
- idx = sdata->fragment_next;
+ idx = cache->next;
for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
struct ieee80211_hdr *f_hdr;
struct sk_buff *f_skb;
@@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
if (idx < 0)
idx = IEEE80211_FRAGMENT_MAX - 1;
- entry = &sdata->fragments[idx];
+ entry = &cache->entries[idx];
if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
entry->rx_queue != rx_queue ||
entry->last_frag + 1 != frag)
@@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
{
+ struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
struct ieee80211_hdr *hdr;
u16 sc;
__le16 fc;
@@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
goto out_no_led;
}
+ if (rx->sta)
+ cache = &rx->sta->frags;
+
if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;
@@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (frag == 0) {
/* This is the first fragment of a new frame. */
- entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_add(cache, frag, seq,
rx->seqno_idx, &(rx->skb));
if (requires_sequential_pn(rx, fc)) {
int queue = rx->security_idx;
@@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
/* This is a fragment for a frame that should already be pending in
* fragment cache. Add this fragment to the end of the pending entry.
*/
- entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_find(cache, frag, seq,
rx->seqno_idx, hdr);
if (!entry) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ec6973ee88ef..f2fb69da9b6e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc(a)suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/module.h>
@@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
u64_stats_init(&sta->rx_stats.syncp);
+ ieee80211_init_frag_cache(&sta->frags);
+
sta->sta_state = IEEE80211_STA_NONE;
/* Mark TID as unreserved */
@@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
ieee80211_sta_debugfs_remove(sta);
+ ieee80211_destroy_frag_cache(&sta->frags);
+
cleanup_single_sta(sta);
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b9d0c7cc58..5c56d29a619e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats {
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
+/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+ struct sk_buff_head skb_list;
+ unsigned long first_frag_time;
+ u16 seq;
+ u16 extra_len;
+ u16 last_frag;
+ u8 rx_queue;
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
+ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+ unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+ struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX];
+ unsigned int next;
+};
+
/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
@@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats {
* @status_stats.last_ack_signal: last ACK signal
* @status_stats.ack_signal_filled: last ACK signal validity
* @status_stats.avg_ack_signal: average ACK signal
+ * @frags: fragment cache
*/
struct sta_info {
/* General information, mostly static */
@@ -639,6 +667,8 @@ struct sta_info {
struct cfg80211_chan_def tdls_chandef;
+ struct ieee80211_fragment_cache frags;
+
/* keep last! */
struct ieee80211_sta sta;
};
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:47 +0200
Subject: [PATCH] mac80211: add fragment cache to sta_info
Prior patches protected against fragmentation cache attacks
by coloring keys, but this shows that it can lead to issues
when multiple stations use the same sequence number. Add a
fragment cache to struct sta_info (in addition to the one in
the interface) to separate fragments for different stations
properly.
This then automatically clear most of the fragment cache when a
station disconnects (or reassociates) from an AP, or when client
interfaces disconnect from the network, etc.
On the way, also fix the comment there since this brings us in line
with the recommendation in 802.11-2016 ("An AP should support ...").
Additionally, remove a useless condition (since there's no problem
purging an already empty list).
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f660…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 874ffe7819e5..4c714375bad0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
#define IEEE80211_ENCRYPT_HEADROOM 8
#define IEEE80211_ENCRYPT_TAILROOM 18
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
@@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
-struct ieee80211_fragment_entry {
- struct sk_buff_head skb_list;
- unsigned long first_frag_time;
- u16 seq;
- u16 extra_len;
- u16 last_frag;
- u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
- u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
- unsigned int key_color;
-};
-
-
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -903,9 +884,7 @@ struct ieee80211_sub_if_data {
char name[IFNAMSIZ];
- /* Fragment table for host-based reassembly */
- struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
- unsigned int fragment_next;
+ struct ieee80211_fragment_cache frags;
/* TID bitmap for NoAck policy */
u16 noack_map;
@@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
#define debug_noinline
#endif
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7032a2b59249..2e2f73a4aa73 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
*/
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
- int i;
-
/* free extra data */
ieee80211_free_keys(sdata, false);
ieee80211_debugfs_remove_netdev(sdata);
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- __skb_queue_purge(&sdata->fragments[i].skb_list);
- sdata->fragment_next = 0;
+ ieee80211_destroy_frag_cache(&sdata->frags);
if (ieee80211_vif_is_mesh(&sdata->vif))
ieee80211_mesh_teardown_sdata(sdata);
@@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata->wdev.wiphy = local->hw.wiphy;
sdata->local = local;
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- skb_queue_head_init(&sdata->fragments[i].skb_list);
+ ieee80211_init_frag_cache(&sdata->frags);
INIT_LIST_HEAD(&sdata->key_list);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a72d48ad6e0..7212a1bebd0c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
return result;
}
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ __skb_queue_purge(&cache->entries[i].skb_list);
+}
+
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq, int rx_queue,
struct sk_buff **skb)
{
struct ieee80211_fragment_entry *entry;
- entry = &sdata->fragments[sdata->fragment_next++];
- if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
- sdata->fragment_next = 0;
+ entry = &cache->entries[cache->next++];
+ if (cache->next >= IEEE80211_FRAGMENT_MAX)
+ cache->next = 0;
- if (!skb_queue_empty(&entry->skb_list))
- __skb_queue_purge(&entry->skb_list);
+ __skb_queue_purge(&entry->skb_list);
__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
*skb = NULL;
@@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
}
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq,
int rx_queue, struct ieee80211_hdr *hdr)
{
struct ieee80211_fragment_entry *entry;
int i, idx;
- idx = sdata->fragment_next;
+ idx = cache->next;
for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
struct ieee80211_hdr *f_hdr;
struct sk_buff *f_skb;
@@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
if (idx < 0)
idx = IEEE80211_FRAGMENT_MAX - 1;
- entry = &sdata->fragments[idx];
+ entry = &cache->entries[idx];
if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
entry->rx_queue != rx_queue ||
entry->last_frag + 1 != frag)
@@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
{
+ struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
struct ieee80211_hdr *hdr;
u16 sc;
__le16 fc;
@@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
goto out_no_led;
}
+ if (rx->sta)
+ cache = &rx->sta->frags;
+
if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;
@@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (frag == 0) {
/* This is the first fragment of a new frame. */
- entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_add(cache, frag, seq,
rx->seqno_idx, &(rx->skb));
if (requires_sequential_pn(rx, fc)) {
int queue = rx->security_idx;
@@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
/* This is a fragment for a frame that should already be pending in
* fragment cache. Add this fragment to the end of the pending entry.
*/
- entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_find(cache, frag, seq,
rx->seqno_idx, hdr);
if (!entry) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ec6973ee88ef..f2fb69da9b6e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc(a)suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/module.h>
@@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
u64_stats_init(&sta->rx_stats.syncp);
+ ieee80211_init_frag_cache(&sta->frags);
+
sta->sta_state = IEEE80211_STA_NONE;
/* Mark TID as unreserved */
@@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
ieee80211_sta_debugfs_remove(sta);
+ ieee80211_destroy_frag_cache(&sta->frags);
+
cleanup_single_sta(sta);
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b9d0c7cc58..5c56d29a619e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats {
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
+/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+ struct sk_buff_head skb_list;
+ unsigned long first_frag_time;
+ u16 seq;
+ u16 extra_len;
+ u16 last_frag;
+ u8 rx_queue;
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
+ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+ unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+ struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX];
+ unsigned int next;
+};
+
/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
@@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats {
* @status_stats.last_ack_signal: last ACK signal
* @status_stats.ack_signal_filled: last ACK signal validity
* @status_stats.avg_ack_signal: average ACK signal
+ * @frags: fragment cache
*/
struct sta_info {
/* General information, mostly static */
@@ -639,6 +667,8 @@ struct sta_info {
struct cfg80211_chan_def tdls_chandef;
+ struct ieee80211_fragment_cache frags;
+
/* keep last! */
struct ieee80211_sta sta;
};
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:46 +0200
Subject: [PATCH] mac80211: drop A-MSDUs on old ciphers
With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs
since A-MSDUs are only supported if we know that they are, and
the only practical way for that is HT support which doesn't
support old ciphers.
However, we would normally accept them anyway. Since we check
the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in
the QoS header is not protected in TKIP (or WEP), this enables
attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs
completely with old ciphers.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f14d32a5001d..8a72d48ad6e0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (is_multicast_ether_addr(hdr->addr1))
return RX_DROP_UNUSABLE;
+ if (rx->key) {
+ /*
+ * We should not receive A-MSDUs on pre-HT connections,
+ * and HT connections cannot use old ciphers. Thus drop
+ * them, as in those cases we couldn't even have SPP
+ * A-MSDUs or such.
+ */
+ switch (rx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ case WLAN_CIPHER_SUITE_TKIP:
+ return RX_DROP_UNUSABLE;
+ default:
+ break;
+ }
+ }
+
return __ieee80211_rx_h_amsdu(rx, 0);
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:46 +0200
Subject: [PATCH] mac80211: drop A-MSDUs on old ciphers
With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs
since A-MSDUs are only supported if we know that they are, and
the only practical way for that is HT support which doesn't
support old ciphers.
However, we would normally accept them anyway. Since we check
the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in
the QoS header is not protected in TKIP (or WEP), this enables
attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs
completely with old ciphers.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f14d32a5001d..8a72d48ad6e0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (is_multicast_ether_addr(hdr->addr1))
return RX_DROP_UNUSABLE;
+ if (rx->key) {
+ /*
+ * We should not receive A-MSDUs on pre-HT connections,
+ * and HT connections cannot use old ciphers. Thus drop
+ * them, as in those cases we couldn't even have SPP
+ * A-MSDUs or such.
+ */
+ switch (rx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ case WLAN_CIPHER_SUITE_TKIP:
+ return RX_DROP_UNUSABLE;
+ default:
+ break;
+ }
+ }
+
return __ieee80211_rx_h_amsdu(rx, 0);
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Tue, 11 May 2021 20:02:46 +0200
Subject: [PATCH] mac80211: drop A-MSDUs on old ciphers
With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs
since A-MSDUs are only supported if we know that they are, and
the only practical way for that is HT support which doesn't
support old ciphers.
However, we would normally accept them anyway. Since we check
the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in
the QoS header is not protected in TKIP (or WEP), this enables
attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs
completely with old ciphers.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f14d32a5001d..8a72d48ad6e0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes(a)sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (is_multicast_ether_addr(hdr->addr1))
return RX_DROP_UNUSABLE;
+ if (rx->key) {
+ /*
+ * We should not receive A-MSDUs on pre-HT connections,
+ * and HT connections cannot use old ciphers. Thus drop
+ * them, as in those cases we couldn't even have SPP
+ * A-MSDUs or such.
+ */
+ switch (rx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ case WLAN_CIPHER_SUITE_TKIP:
+ return RX_DROP_UNUSABLE;
+ default:
+ break;
+ }
+ }
+
return __ieee80211_rx_h_amsdu(rx, 0);
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2b8a1fee3488c602aca8bea004a087e60806a5cf Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef(a)kuleuven.be>
Date: Tue, 11 May 2021 20:02:45 +0200
Subject: [PATCH] cfg80211: mitigate A-MSDU aggregation attacks
Mitigate A-MSDU injection attacks (CVE-2020-24588) by detecting if the
destination address of a subframe equals an RFC1042 (i.e., LLC/SNAP)
header, and if so dropping the complete A-MSDU frame. This mitigates
known attacks, although new (unknown) aggregation-based attacks may
remain possible.
This defense works because in A-MSDU aggregation injection attacks, a
normal encrypted Wi-Fi frame is turned into an A-MSDU frame. This means
the first 6 bytes of the first A-MSDU subframe correspond to an RFC1042
header. In other words, the destination MAC address of the first A-MSDU
subframe contains the start of an RFC1042 header during an aggregation
attack. We can detect this and thereby prevent this specific attack.
For details, see Section 7.2 of "Fragment and Forge: Breaking Wi-Fi
Through Frame Aggregation and Fragmentation".
Note that for kernel 4.9 and above this patch depends on "mac80211:
properly handle A-MSDUs that start with a rfc1042 header". Otherwise
this patch has no impact and attacks will remain possible.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef(a)kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.25d93176ddaf.I9e265b597f2cd23eb445…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 39966a873e40..7ec021a610ae 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -771,6 +771,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
remaining = skb->len - offset;
if (subframe_len > remaining)
goto purge;
+ /* mitigate A-MSDU aggregation injection attacks */
+ if (ether_addr_equal(eth.h_dest, rfc1042_header))
+ goto purge;
offset += sizeof(struct ethhdr);
last = remaining <= subframe_len + padding;
The patch below does not apply to the 5.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 17a91051fe63b40ec651b80097c9fff5b093fdc5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Sun, 23 May 2021 15:48:39 +0100
Subject: [PATCH] io_uring/io-wq: close io-wq full-stop gap
There is an old problem with io-wq cancellation where requests should be
killed and are in io-wq but are not discoverable, e.g. in @next_hashed
or @linked vars of io_worker_handle_work(). It adds some unreliability
to individual request canellation, but also may potentially get
__io_uring_cancel() stuck. For instance:
1) An __io_uring_cancel()'s cancellation round have not found any
request but there are some as desribed.
2) __io_uring_cancel() goes to sleep
3) Then workers wake up and try to execute those hidden requests
that happen to be unbound.
As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT
in advance, so preventing 3) from executing unbound requests. The
workers will initially break looping because of getting a signal as they
are threads of the dying/exec()'ing user task.
Cc: stable(a)vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.16217812…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5361a9b4b47b..de9b7ba3ba01 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
return cwd->wqe->wq == data;
}
+void io_wq_exit_start(struct io_wq *wq)
+{
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+}
+
static void io_wq_exit_workers(struct io_wq *wq)
{
struct callback_head *cb;
int node;
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
-
if (!wq->task)
return;
@@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq)
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
- io_wq_exit_workers(wq);
-
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
struct io_cb_cancel_data match = {
@@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq)
kfree(wq);
}
-void io_wq_put(struct io_wq *wq)
-{
- if (refcount_dec_and_test(&wq->refs))
- io_wq_destroy(wq);
-}
-
void io_wq_put_and_exit(struct io_wq *wq)
{
+ WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
io_wq_exit_workers(wq);
- io_wq_put(wq);
+ if (refcount_dec_and_test(&wq->refs))
+ io_wq_destroy(wq);
}
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 0e6d310999e8..af2df0680ee2 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -122,7 +122,7 @@ struct io_wq_data {
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-void io_wq_put(struct io_wq *wq);
+void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f82954004f6..6af8ca0cb01c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
if (!current->io_uring)
return;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
WARN_ON_ONCE(!sqd || sqd->thread != current);
atomic_inc(&tctx->in_idle);
@@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files)
DEFINE_WAIT(wait);
s64 inflight;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
do {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 17a91051fe63b40ec651b80097c9fff5b093fdc5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Sun, 23 May 2021 15:48:39 +0100
Subject: [PATCH] io_uring/io-wq: close io-wq full-stop gap
There is an old problem with io-wq cancellation where requests should be
killed and are in io-wq but are not discoverable, e.g. in @next_hashed
or @linked vars of io_worker_handle_work(). It adds some unreliability
to individual request canellation, but also may potentially get
__io_uring_cancel() stuck. For instance:
1) An __io_uring_cancel()'s cancellation round have not found any
request but there are some as desribed.
2) __io_uring_cancel() goes to sleep
3) Then workers wake up and try to execute those hidden requests
that happen to be unbound.
As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT
in advance, so preventing 3) from executing unbound requests. The
workers will initially break looping because of getting a signal as they
are threads of the dying/exec()'ing user task.
Cc: stable(a)vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.16217812…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5361a9b4b47b..de9b7ba3ba01 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
return cwd->wqe->wq == data;
}
+void io_wq_exit_start(struct io_wq *wq)
+{
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+}
+
static void io_wq_exit_workers(struct io_wq *wq)
{
struct callback_head *cb;
int node;
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
-
if (!wq->task)
return;
@@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq)
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
- io_wq_exit_workers(wq);
-
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
struct io_cb_cancel_data match = {
@@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq)
kfree(wq);
}
-void io_wq_put(struct io_wq *wq)
-{
- if (refcount_dec_and_test(&wq->refs))
- io_wq_destroy(wq);
-}
-
void io_wq_put_and_exit(struct io_wq *wq)
{
+ WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
io_wq_exit_workers(wq);
- io_wq_put(wq);
+ if (refcount_dec_and_test(&wq->refs))
+ io_wq_destroy(wq);
}
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 0e6d310999e8..af2df0680ee2 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -122,7 +122,7 @@ struct io_wq_data {
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-void io_wq_put(struct io_wq *wq);
+void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f82954004f6..6af8ca0cb01c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
if (!current->io_uring)
return;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
WARN_ON_ONCE(!sqd || sqd->thread != current);
atomic_inc(&tctx->in_idle);
@@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files)
DEFINE_WAIT(wait);
s64 inflight;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
do {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 46b5c4fb87ce8211e0f9b0383dbde72c3652d2ba Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Mon, 7 Dec 2020 09:17:14 +0100
Subject: [PATCH] spi: mt7621: Don't leak SPI master in probe error path
If the calls to device_reset() or devm_spi_register_controller() fail on
probe of the MediaTek MT7621 SPI driver, the spi_controller struct is
erroneously not freed. Fix by switching over to the new
devm_spi_alloc_master() helper.
Additionally, there's an ordering issue in mt7621_spi_remove() wherein
the spi_controller is unregistered after disabling the SYS clock.
The correct order is to call spi_unregister_controller() *before* this
teardown step because bus accesses may still be ongoing until that
function returns.
All of these bugs have existed since the driver was first introduced,
so it seems fair to fix them together in a single commit.
Fixes: 1ab7f2a43558 ("staging: mt7621-spi: add mt7621 support")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Reviewed-by: Stefan Roese <sr(a)denx.de>
Cc: <stable(a)vger.kernel.org> # v4.17+: 5e844cc37a5c: spi: Introduce device-managed SPI controller allocation
Cc: <stable(a)vger.kernel.org> # v4.17+
Link: https://lore.kernel.org/r/72b680796149f5fcda0b3f530ffb7ee73b04f224.16072868…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c
index e227700808cb..b4b9b7309b5e 100644
--- a/drivers/spi/spi-mt7621.c
+++ b/drivers/spi/spi-mt7621.c
@@ -350,7 +350,7 @@ static int mt7621_spi_probe(struct platform_device *pdev)
if (status)
return status;
- master = spi_alloc_master(&pdev->dev, sizeof(*rs));
+ master = devm_spi_alloc_master(&pdev->dev, sizeof(*rs));
if (!master) {
dev_info(&pdev->dev, "master allocation failed\n");
clk_disable_unprepare(clk);
@@ -382,7 +382,7 @@ static int mt7621_spi_probe(struct platform_device *pdev)
return ret;
}
- ret = devm_spi_register_controller(&pdev->dev, master);
+ ret = spi_register_controller(master);
if (ret)
clk_disable_unprepare(clk);
@@ -397,6 +397,7 @@ static int mt7621_spi_remove(struct platform_device *pdev)
master = dev_get_drvdata(&pdev->dev);
rs = spi_controller_get_devdata(master);
+ spi_unregister_controller(master);
clk_disable_unprepare(rs->clk);
return 0;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 24f7033405abe195224ec793dbc3d7a27dec0b98 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Mon, 7 Dec 2020 09:17:13 +0100
Subject: [PATCH] spi: mt7621: Disable clock in probe error path
Commit 702b15cb9712 ("spi: mt7621: fix missing clk_disable_unprepare()
on error in mt7621_spi_probe") sought to disable the SYS clock on probe
errors, but only did so for 2 of 3 potentially failing calls: The clock
needs to be disabled on failure of devm_spi_register_controller() as
well.
Moreover, the commit purports to fix a bug in commit cbd66c626e16 ("spi:
mt7621: Move SPI driver out of staging") but in reality the bug has
existed since the driver was first introduced.
Fixes: 1ab7f2a43558 ("staging: mt7621-spi: add mt7621 support")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: <stable(a)vger.kernel.org> # v4.17+: 702b15cb9712: spi: mt7621: fix missing clk_disable_unprepare() on error in mt7621_spi_probe
Cc: <stable(a)vger.kernel.org> # v4.17+
Cc: Qinglang Miao <miaoqinglang(a)huawei.com>
Link: https://lore.kernel.org/r/36ad42760087952fb7c10aae7d2628547c26a7ec.16072868…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c
index 2cdae7994e2a..e227700808cb 100644
--- a/drivers/spi/spi-mt7621.c
+++ b/drivers/spi/spi-mt7621.c
@@ -382,7 +382,11 @@ static int mt7621_spi_probe(struct platform_device *pdev)
return ret;
}
- return devm_spi_register_controller(&pdev->dev, master);
+ ret = devm_spi_register_controller(&pdev->dev, master);
+ if (ret)
+ clk_disable_unprepare(clk);
+
+ return ret;
}
static int mt7621_spi_remove(struct platform_device *pdev)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e77df3eca12be4b17f13cf9f215cff248c57d98f Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Mon, 7 Dec 2020 09:17:04 +0100
Subject: [PATCH] spi: spi-sh: Fix use-after-free on unbind
spi_sh_remove() accesses the driver's private data after calling
spi_unregister_master() even though that function releases the last
reference on the spi_master and thereby frees the private data.
Fix by switching over to the new devm_spi_alloc_master() helper which
keeps the private data accessible until the driver has unbound.
Fixes: 680c1305e259 ("spi/spi_sh: use spi_unregister_master instead of spi_master_put in remove path")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: <stable(a)vger.kernel.org> # v3.0+: 5e844cc37a5c: spi: Introduce device-managed SPI controller allocation
Cc: <stable(a)vger.kernel.org> # v3.0+
Cc: Axel Lin <axel.lin(a)ingics.com>
Link: https://lore.kernel.org/r/6d97628b536baf01d5e3e39db61108f84d44c8b2.16072868…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-sh.c b/drivers/spi/spi-sh.c
index 20bdae5fdf3b..15123a8f41e1 100644
--- a/drivers/spi/spi-sh.c
+++ b/drivers/spi/spi-sh.c
@@ -440,7 +440,7 @@ static int spi_sh_probe(struct platform_device *pdev)
if (irq < 0)
return irq;
- master = spi_alloc_master(&pdev->dev, sizeof(struct spi_sh_data));
+ master = devm_spi_alloc_master(&pdev->dev, sizeof(struct spi_sh_data));
if (master == NULL) {
dev_err(&pdev->dev, "spi_alloc_master error.\n");
return -ENOMEM;
@@ -458,16 +458,14 @@ static int spi_sh_probe(struct platform_device *pdev)
break;
default:
dev_err(&pdev->dev, "No support width\n");
- ret = -ENODEV;
- goto error1;
+ return -ENODEV;
}
ss->irq = irq;
ss->master = master;
ss->addr = devm_ioremap(&pdev->dev, res->start, resource_size(res));
if (ss->addr == NULL) {
dev_err(&pdev->dev, "ioremap error.\n");
- ret = -ENOMEM;
- goto error1;
+ return -ENOMEM;
}
INIT_LIST_HEAD(&ss->queue);
spin_lock_init(&ss->lock);
@@ -477,7 +475,7 @@ static int spi_sh_probe(struct platform_device *pdev)
ret = request_irq(irq, spi_sh_irq, 0, "spi_sh", ss);
if (ret < 0) {
dev_err(&pdev->dev, "request_irq error\n");
- goto error1;
+ return ret;
}
master->num_chipselect = 2;
@@ -496,9 +494,6 @@ static int spi_sh_probe(struct platform_device *pdev)
error3:
free_irq(irq, ss);
- error1:
- spi_master_put(master);
-
return ret;
}
This is the start of the stable review cycle for the 5.10.41 release.
There are 9 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat, 29 May 2021 15:11:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.41-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.41-rc1
Dongliang Mu <mudongliangabcd(a)gmail.com>
NFC: nci: fix memory leak in nci_allocate_device
Dave Rigby <d.rigby(a)me.com>
perf unwind: Set userdata for all __report_module() paths
Jan Kratochvil <jan.kratochvil(a)redhat.com>
perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder
Wanpeng Li <wanpengli(a)tencent.com>
KVM: x86: Defer vtime accounting 'til after IRQ handling
Wanpeng Li <wanpengli(a)tencent.com>
context_tracking: Move guest exit vtime accounting to separate helpers
Wanpeng Li <wanpengli(a)tencent.com>
context_tracking: Move guest exit context tracking to separate helpers
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: No need to simulate speculative domain for immediates
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix mask direction swap upon off reg sign change
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Wrap aux data inside bpf_sanitize_info container
-------------
Diffstat:
Makefile | 4 ++--
arch/x86/kvm/svm/svm.c | 6 +++---
arch/x86/kvm/vmx/vmx.c | 6 +++---
arch/x86/kvm/x86.c | 9 ++++++++
include/linux/context_tracking.h | 25 ++++++++++++++++++----
include/net/nfc/nci_core.h | 1 +
kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++----------------
net/nfc/nci/core.c | 1 +
net/nfc/nci/hci.c | 5 +++++
tools/perf/util/unwind-libdw.c | 35 ++++++++++++++++++++++++++----
10 files changed, 104 insertions(+), 34 deletions(-)
In AP mode WPA-PSK connections were not established.
The reason was that the AP was sending the first message
of the 4 way handshake encrypted, even though no key had
(correctly) yet been set.
Fix this by adding an extra check that we have a key.
The redpine downstream out of tree driver does it this way too.
Signed-off-by: Martin Fuzzey <martin.fuzzey(a)flowbird.group>
CC: stable(a)vger.kernel.org
---
drivers/net/wireless/rsi/rsi_91x_hal.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index ce98921..ef84262 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -203,6 +203,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE);
if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) &&
+ (info->control.hw_key) &&
(common->secinfo.security_enable)) {
if (rsi_is_cipher_wep(common))
ieee80211_size += 4;
--
1.9.1
commit cf6d100dd238 ("drm/rockchip: dsi: add dual mipi support") added
this devcnt field and call to component_del(). However, these both
appear to be erroneous changes left over from an earlier version of the
patch. In the version merged, nothing ever modifies devcnt, meaning
component_del() runs unconditionally and in addition to the
component_del() calls in dw_mipi_dsi_rockchip_host_detach(). The second
call fails to delete anything and produces a warning in dmesg.
If we look at the previous version of the patch[1], however, we see that
it had logic to calculate devcnt and call component_add() in certain
situations. This was removed in v6, and the fact that the deletion code
was not appears to have been an oversight.
[1] https://patchwork.kernel.org/project/dri-devel/patch/20180821140515.22246-8…
Fixes: cf6d100dd238 ("drm/rockchip: dsi: add dual mipi support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Hebb <tommyhebb(a)gmail.com>
---
drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index 24a71091759c..8cc81d5b82f0 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -243,7 +243,6 @@ struct dw_mipi_dsi_rockchip {
struct dw_mipi_dsi *dmd;
const struct rockchip_dw_dsi_chip_data *cdata;
struct dw_mipi_dsi_plat_data pdata;
- int devcnt;
};
struct dphy_pll_parameter_map {
@@ -1121,9 +1120,6 @@ static int dw_mipi_dsi_rockchip_remove(struct platform_device *pdev)
{
struct dw_mipi_dsi_rockchip *dsi = platform_get_drvdata(pdev);
- if (dsi->devcnt == 0)
- component_del(dsi->dev, &dw_mipi_dsi_rockchip_ops);
-
dw_mipi_dsi_remove(dsi->dmd);
return 0;
--
2.30.0
This is the start of the stable review cycle for the 5.4.123 release.
There are 7 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat, 29 May 2021 15:11:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.123-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.123-rc1
Dongliang Mu <mudongliangabcd(a)gmail.com>
NFC: nci: fix memory leak in nci_allocate_device
Dave Rigby <d.rigby(a)me.com>
perf unwind: Set userdata for all __report_module() paths
Jan Kratochvil <jan.kratochvil(a)redhat.com>
perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder
Jack Pham <jackp(a)codeaurora.org>
usb: dwc3: gadget: Enable suspend events
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: No need to simulate speculative domain for immediates
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix mask direction swap upon off reg sign change
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Wrap aux data inside bpf_sanitize_info container
-------------
Diffstat:
Makefile | 4 ++--
drivers/usb/dwc3/gadget.c | 4 ++++
include/net/nfc/nci_core.h | 1 +
kernel/bpf/verifier.c | 46 +++++++++++++++++++++++++-----------------
net/nfc/nci/core.c | 1 +
net/nfc/nci/hci.c | 5 +++++
tools/perf/util/unwind-libdw.c | 35 ++++++++++++++++++++++++++++----
7 files changed, 72 insertions(+), 24 deletions(-)
There exists a possible scenario in which dwc3_gadget_init() can fail:
during during host -> peripheral mode switch in dwc3_set_mode(), and
a pending gadget driver fails to bind. Then, if the DRD undergoes
another mode switch from peripheral->host the resulting
dwc3_gadget_exit() will attempt to reference an invalid and dangling
dwc->gadget pointer as well as call dma_free_coherent() on unmapped
DMA pointers.
The exact scenario can be reproduced as follows:
- Start DWC3 in peripheral mode
- Configure ConfigFS gadget with FunctionFS instance (or use g_ffs)
- Run FunctionFS userspace application (open EPs, write descriptors, etc)
- Bind gadget driver to DWC3's UDC
- Switch DWC3 to host mode
=> dwc3_gadget_exit() is called. usb_del_gadget() will put the
ConfigFS driver instance on the gadget_driver_pending_list
- Stop FunctionFS application (closes the ep files)
- Switch DWC3 to peripheral mode
=> dwc3_gadget_init() fails as usb_add_gadget() calls
check_pending_gadget_drivers() and attempts to rebind the UDC
to the ConfigFS gadget but fails with -19 (-ENODEV) because the
FFS instance is not in FFS_ACTIVE state (userspace has not
re-opened and written the descriptors yet, i.e. desc_ready!=0).
- Switch DWC3 back to host mode
=> dwc3_gadget_exit() is called again, but this time dwc->gadget
is invalid.
Although it can be argued that userspace should take responsibility
for ensuring that the FunctionFS application be ready prior to
allowing the composite driver bind to the UDC, failure to do so
should not result in a panic from the kernel driver.
Fix this by setting dwc->gadget to NULL in the failure path of
dwc3_gadget_init() and add a check to dwc3_gadget_exit() to bail out
unless the gadget pointer is valid.
Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Peter Chen <peter.chen(a)kernel.org>
Signed-off-by: Jack Pham <jackp(a)codeaurora.org>
---
v2: Fixed commit message to refer to FFS_ACTIVE state; added
Peter's Reviewed-by tag.
drivers/usb/dwc3/gadget.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 612825a39f82..65d9b7227752 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4046,6 +4046,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
dwc3_gadget_free_endpoints(dwc);
err4:
usb_put_gadget(dwc->gadget);
+ dwc->gadget = NULL;
err3:
dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
dwc->bounce_addr);
@@ -4065,6 +4066,9 @@ int dwc3_gadget_init(struct dwc3 *dwc)
void dwc3_gadget_exit(struct dwc3 *dwc)
{
+ if (!dwc->gadget)
+ return;
+
usb_del_gadget(dwc->gadget);
dwc3_gadget_free_endpoints(dwc);
usb_put_gadget(dwc->gadget);
--
2.24.0
This is the start of the stable review cycle for the 5.12.8 release.
There are 7 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat, 29 May 2021 15:11:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.12.8-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.12.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.12.8-rc1
Dongliang Mu <mudongliangabcd(a)gmail.com>
NFC: nci: fix memory leak in nci_allocate_device
Wanpeng Li <wanpengli(a)tencent.com>
KVM: x86: Defer vtime accounting 'til after IRQ handling
Wanpeng Li <wanpengli(a)tencent.com>
context_tracking: Move guest exit vtime accounting to separate helpers
Wanpeng Li <wanpengli(a)tencent.com>
context_tracking: Move guest exit context tracking to separate helpers
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: No need to simulate speculative domain for immediates
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Fix mask direction swap upon off reg sign change
Daniel Borkmann <daniel(a)iogearbox.net>
bpf: Wrap aux data inside bpf_sanitize_info container
-------------
Diffstat:
Makefile | 4 ++--
arch/x86/kvm/svm/svm.c | 6 +++---
arch/x86/kvm/vmx/vmx.c | 6 +++---
arch/x86/kvm/x86.c | 9 ++++++++
include/linux/context_tracking.h | 25 ++++++++++++++++++----
include/net/nfc/nci_core.h | 1 +
kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++----------------
net/nfc/nci/core.c | 1 +
net/nfc/nci/hci.c | 5 +++++
9 files changed, 73 insertions(+), 30 deletions(-)
I'm announcing the release of the 5.12.8 kernel.
All users of the 5.12 kernel series must upgrade.
The updated 5.12.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.12.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 -
arch/x86/kvm/svm/svm.c | 6 ++---
arch/x86/kvm/vmx/vmx.c | 6 ++---
arch/x86/kvm/x86.c | 9 +++++++
include/linux/context_tracking.h | 25 +++++++++++++++++----
include/net/nfc/nci_core.h | 1
kernel/bpf/verifier.c | 46 +++++++++++++++++++++++----------------
net/nfc/nci/core.c | 1
net/nfc/nci/hci.c | 5 ++++
9 files changed, 72 insertions(+), 29 deletions(-)
Daniel Borkmann (3):
bpf: Wrap aux data inside bpf_sanitize_info container
bpf: Fix mask direction swap upon off reg sign change
bpf: No need to simulate speculative domain for immediates
Dongliang Mu (1):
NFC: nci: fix memory leak in nci_allocate_device
Greg Kroah-Hartman (1):
Linux 5.12.8
Wanpeng Li (3):
context_tracking: Move guest exit context tracking to separate helpers
context_tracking: Move guest exit vtime accounting to separate helpers
KVM: x86: Defer vtime accounting 'til after IRQ handling
I'm announcing the release of the 5.10.41 kernel.
All users of the 5.10 kernel series must upgrade.
The updated 5.10.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.10.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 -
arch/x86/kvm/svm/svm.c | 6 ++---
arch/x86/kvm/vmx/vmx.c | 6 ++---
arch/x86/kvm/x86.c | 9 +++++++
include/linux/context_tracking.h | 25 +++++++++++++++++----
include/net/nfc/nci_core.h | 1
kernel/bpf/verifier.c | 46 +++++++++++++++++++++++----------------
net/nfc/nci/core.c | 1
net/nfc/nci/hci.c | 5 ++++
tools/perf/util/unwind-libdw.c | 35 ++++++++++++++++++++++++++---
10 files changed, 103 insertions(+), 33 deletions(-)
Daniel Borkmann (3):
bpf: Wrap aux data inside bpf_sanitize_info container
bpf: Fix mask direction swap upon off reg sign change
bpf: No need to simulate speculative domain for immediates
Dave Rigby (1):
perf unwind: Set userdata for all __report_module() paths
Dongliang Mu (1):
NFC: nci: fix memory leak in nci_allocate_device
Greg Kroah-Hartman (1):
Linux 5.10.41
Jan Kratochvil (1):
perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder
Wanpeng Li (3):
context_tracking: Move guest exit context tracking to separate helpers
context_tracking: Move guest exit vtime accounting to separate helpers
KVM: x86: Defer vtime accounting 'til after IRQ handling
I'm announcing the release of the 5.4.123 kernel.
All users of the 5.4 kernel series must upgrade.
The updated 5.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.4.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 -
drivers/usb/dwc3/gadget.c | 4 +++
include/net/nfc/nci_core.h | 1
kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++-----------------
net/nfc/nci/core.c | 1
net/nfc/nci/hci.c | 5 ++++
tools/perf/util/unwind-libdw.c | 35 +++++++++++++++++++++++++++----
7 files changed, 71 insertions(+), 23 deletions(-)
Daniel Borkmann (3):
bpf: Wrap aux data inside bpf_sanitize_info container
bpf: Fix mask direction swap upon off reg sign change
bpf: No need to simulate speculative domain for immediates
Dave Rigby (1):
perf unwind: Set userdata for all __report_module() paths
Dongliang Mu (1):
NFC: nci: fix memory leak in nci_allocate_device
Greg Kroah-Hartman (1):
Linux 5.4.123
Jack Pham (1):
usb: dwc3: gadget: Enable suspend events
Jan Kratochvil (1):
perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder
This patchset is based on Frank van der Linden's backport of CVE-2021-29155
fixes to 5.4 and 4.14:
https://lore.kernel.org/stable/20210429220839.15667-1-fllinden@amazon.com/https://lore.kernel.org/stable/20210501043014.33300-1-fllinden@amazon.com/
With this series, all verifier selftests but one (that has already been
failing, see [1] for more details) succeed.
What the series does is:
* Fix verifier selftests by backporting various bpf/selftest upstream commits +
add two 4.19 specific fixes
* Backport fixes for CVE-2021-29155 from 5.4 stable, including selftest
changes. Only minor context adjustements were made for 4.19 backport.
The following commits that fix selftests are 4.19 specific:
Ovidiu Panait (2):
1. bpf: fix up selftests after backports were fixed
This is the 4.19 equivalent of
https://lore.kernel.org/stable/20210501043014.33300-3-fllinden@amazon.com/
Basically a backport of upstream commit 80c9b2fae87b ("bpf: add various
test cases to selftests") adapted to 4.19 in order to fix the
selftests that began to fail after CVE-2019-7308 fixes.
2. selftests/bpf: add selftest part of "bpf: improve verifier branch
analysis"
This is a cherry-pick of the selftest parts that have been left out when
backporting 4f7b3e82589e0 ("bpf: improve verifier branch analysis") to 4.19.
[1] Note:
There is one verifier selftest that still fails:
...
#640/p bpf_get_stack return R0 within range FAIL
Failed to load prog 'Invalid argument'!
0: (bf) r6 = r1
1: (7a) *(u64 *)(r10 -8) = 0
2: (bf) r2 = r10
3: (07) r2 += -8
4: (18) r1 = 0xffff89a8f5503000
6: (85) call bpf_map_lookup_elem#1
7: (15) if r0 == 0x0 goto pc+28
R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R6=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
8: (bf) r7 = r0
9: (b7) r9 = 48
10: (bf) r1 = r6
11: (bf) r2 = r7
12: (b7) r3 = 48
13: (b7) r4 = 256
14: (85) call bpf_get_stack#67
R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=8,vs=48,imm=0) R3_w=inv48 R4_w=inv256 R6=ctx(id=0,off=0,imm=0) R7_w=map_value(id=0,off=0,ks=8,vs=48,imm=0) R9_w=inv48 R10=fp0,call_-1
15: (b7) r1 = 0
16: (bf) r8 = r0
17: (67) r8 <<= 32
18: (c7) r8 s>>= 32
19: (cd) if r1 s< r8 goto pc+16
R0=inv(id=0,umax_value=48,var_off=(0x0; 0x3f)) R1=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=8,vs=48,imm=0) R8=inv0 R9=inv48 R10=fp0,call_-1
20: (1f) r9 -= r8
21: (bf) r2 = r7
22: (0f) r2 += r8
23: (bf) r1 = r9
24: (67) r1 <<= 32
25: (c7) r1 s>>= 32
26: (bf) r3 = r2
27: (0f) r3 += r1
28: (bf) r1 = r7
29: (b7) r5 = 48
30: (0f) r1 += r5
31: (3d) if r3 >= r1 goto pc+4
R0=inv(id=0,umax_value=48,var_off=(0x0; 0x3f)) R1=map_value(id=0,off=48,ks=8,vs=48,imm=0) R2=map_value(id=0,off=0,ks=8,vs=48,imm=0) R3=map_value(id=0,off=48,ks=8,vs=48,imm=0) R5=inv48 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=8,vs=48,imm=0) R8=inv0 R9=inv48 R10=fp0,call_-1
32: (bf) r1 = r6
33: (bf) r3 = r9
34: (b7) r4 = 0
35: (85) call bpf_get_stack#67
R0=inv(id=0,umax_value=48,var_off=(0x0; 0x3f)) R1_w=ctx(id=0,off=0,imm=0) R2=map_value(id=0,off=0,ks=8,vs=48,imm=0) R3_w=inv48 R4_w=inv0 R5=inv48 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=8,vs=48,imm=0) R8=inv0 R9=inv48 R10=fp0,call_-1
36: (95) exit
from 35 to 36: R0=inv(id=0,umin_value=18446744071562067968,var_off=(0xffffffff80000000; 0x7fffffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=8,vs=48,imm=0) R8=inv0 R9=inv48 R10=fp0,call_-1
36: (95) exit
from 31 to 36: safe
from 19 to 36: safe
from 14 to 15: R0=inv(id=0,umin_value=18446744071562067968,var_off=(0xffffffff80000000; 0x7fffffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=8,vs=48,imm=0) R9=inv48 R10=fp0,call_-1
15: (b7) r1 = 0
16: (bf) r8 = r0
17: (67) r8 <<= 32
18: (c7) r8 s>>= 32
19: (cd) if r1 s< r8 goto pc+16
R0=inv(id=0,umin_value=18446744071562067968,var_off=(0xffffffff80000000; 0x7fffffff)) R1=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=8,vs=48,imm=0) R8=inv(id=0,umin_value=18446744071562067968,var_off=(0xffffffff80000000; 0x7fffffff)) R9=inv48 R10=fp0,call_-1
20: (1f) r9 -= r8
21: (bf) r2 = r7
22: (0f) r2 += r8
value -2147483648 makes map_value pointer be out of bounds
This failure was introduced after the following 4.19 fix:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
In 5.4 it was fixed by the following commits, but backporting them to 4.19 is
not enough to fix the failing test:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
After bisect, the following upstream commit needs to be present as well in
order for the selftest to pass, but I am not sure if it is suitable for stable
backport:
https://github.com/torvalds/linux/commit/2589726d12a1b12eaaa93c7f1ea64287e3…
Andrey Ignatov (1):
selftests/bpf: Test narrow loads with off > 0 in test_verifier
Daniel Borkmann (8):
bpf: Move off_reg into sanitize_ptr_alu
bpf: Ensure off_reg has no mixed signed bounds for all types
bpf: Rework ptr_limit into alu_limit and add common error path
bpf: Improve verifier error messages for users
bpf: Refactor and streamline bounds check into helper
bpf: Move sanitize_val_alu out of op switch
bpf: Tighten speculative pointer arithmetic mask
bpf: Update selftests to reflect new error states
Ovidiu Panait (2):
bpf: fix up selftests after backports were fixed
selftests/bpf: add selftest part of "bpf: improve verifier branch
analysis"
Piotr Krysiuk (1):
bpf, selftests: Fix up some test_verifier cases for unprivileged
kernel/bpf/verifier.c | 229 ++++++++++++++------
tools/testing/selftests/bpf/test_verifier.c | 104 +++++++--
2 files changed, 241 insertions(+), 92 deletions(-)
--
2.17.1
Dear Greg, Dear Sasha,
please consider applying the patch below to v4.4-stable.
It is a backport of upstream commit 794aaf01444d, which has already
been applied to all stable kernels going back to v4.14.
Thanks!
Lukas
-- >8 --
>From 429a36a750568599640ae8b9e603d639181fee9a Mon Sep 17 00:00:00 2001
From: "William A. Kennington III" <wak(a)google.com>
Date: Wed, 7 Apr 2021 02:55:27 -0700
Subject: [PATCH] spi: Fix use-after-free with devm_spi_alloc_*
commit 794aaf01444d4e765e2b067cba01cc69c1c68ed9 upstream.
We can't rely on the contents of the devres list during
spi_unregister_controller(), as the list is already torn down at the
time we perform devres_find() for devm_spi_release_controller. This
causes devices registered with devm_spi_alloc_{master,slave}() to be
mistakenly identified as legacy, non-devm managed devices and have their
reference counters decremented below 0.
------------[ cut here ]------------
WARNING: CPU: 1 PID: 660 at lib/refcount.c:28 refcount_warn_saturate+0x108/0x174
[<b0396f04>] (refcount_warn_saturate) from [<b03c56a4>] (kobject_put+0x90/0x98)
[<b03c5614>] (kobject_put) from [<b0447b4c>] (put_device+0x20/0x24)
r4:b6700140
[<b0447b2c>] (put_device) from [<b07515e8>] (devm_spi_release_controller+0x3c/0x40)
[<b07515ac>] (devm_spi_release_controller) from [<b045343c>] (release_nodes+0x84/0xc4)
r5:b6700180 r4:b6700100
[<b04533b8>] (release_nodes) from [<b0454160>] (devres_release_all+0x5c/0x60)
r8:b1638c54 r7:b117ad94 r6:b1638c10 r5:b117ad94 r4:b163dc10
[<b0454104>] (devres_release_all) from [<b044e41c>] (__device_release_driver+0x144/0x1ec)
r5:b117ad94 r4:b163dc10
[<b044e2d8>] (__device_release_driver) from [<b044f70c>] (device_driver_detach+0x84/0xa0)
r9:00000000 r8:00000000 r7:b117ad94 r6:b163dc54 r5:b1638c10 r4:b163dc10
[<b044f688>] (device_driver_detach) from [<b044d274>] (unbind_store+0xe4/0xf8)
Instead, determine the devm allocation state as a flag on the
controller which is guaranteed to be stable during cleanup.
Fixes: 5e844cc37a5c ("spi: Introduce device-managed SPI controller allocation")
Signed-off-by: William A. Kennington III <wak(a)google.com>
Link: https://lore.kernel.org/r/20210407095527.2771582-1-wak@google.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
[lukas: backport to v4.4.270]
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
---
drivers/spi/spi.c | 9 ++-------
include/linux/spi/spi.h | 3 +++
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e85feee750e3..f743b95d5171 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1762,6 +1762,7 @@ struct spi_master *devm_spi_alloc_master(struct device *dev, unsigned int size)
master = spi_alloc_master(dev, size);
if (master) {
+ master->devm_allocated = true;
*ptr = master;
devres_add(dev, ptr);
} else {
@@ -1951,11 +1952,6 @@ int devm_spi_register_master(struct device *dev, struct spi_master *master)
}
EXPORT_SYMBOL_GPL(devm_spi_register_master);
-static int devm_spi_match_master(struct device *dev, void *res, void *master)
-{
- return *(struct spi_master **)res == master;
-}
-
static int __unregister(struct device *dev, void *null)
{
spi_unregister_device(to_spi_device(dev));
@@ -1994,8 +1990,7 @@ void spi_unregister_master(struct spi_master *master)
/* Release the last reference on the master if its driver
* has not yet been converted to devm_spi_alloc_master().
*/
- if (!devres_find(master->dev.parent, devm_spi_release_master,
- devm_spi_match_master, master))
+ if (!master->devm_allocated)
put_device(&master->dev);
if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f5d387140c46..da487e905337 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -425,6 +425,9 @@ struct spi_master {
#define SPI_MASTER_MUST_RX BIT(3) /* requires rx */
#define SPI_MASTER_MUST_TX BIT(4) /* requires tx */
+ /* flag indicating this is a non-devres managed controller */
+ bool devm_allocated;
+
/* lock and mutex for SPI bus locking */
spinlock_t bus_lock_spinlock;
struct mutex bus_lock_mutex;
--
2.31.1
Dear Greg, Dear Sasha,
please consider applying the patch below to v4.9-stable.
It is a backport of upstream commit 794aaf01444d, which has already
been applied to all stable kernels going back to v4.14.
Thanks!
Lukas
-- >8 --
From: "William A. Kennington III" <wak(a)google.com>
Date: Wed, 7 Apr 2021 02:55:27 -0700
Subject: [PATCH] spi: Fix use-after-free with devm_spi_alloc_*
commit 794aaf01444d4e765e2b067cba01cc69c1c68ed9 upstream.
We can't rely on the contents of the devres list during
spi_unregister_controller(), as the list is already torn down at the
time we perform devres_find() for devm_spi_release_controller. This
causes devices registered with devm_spi_alloc_{master,slave}() to be
mistakenly identified as legacy, non-devm managed devices and have their
reference counters decremented below 0.
------------[ cut here ]------------
WARNING: CPU: 1 PID: 660 at lib/refcount.c:28 refcount_warn_saturate+0x108/0x174
[<b0396f04>] (refcount_warn_saturate) from [<b03c56a4>] (kobject_put+0x90/0x98)
[<b03c5614>] (kobject_put) from [<b0447b4c>] (put_device+0x20/0x24)
r4:b6700140
[<b0447b2c>] (put_device) from [<b07515e8>] (devm_spi_release_controller+0x3c/0x40)
[<b07515ac>] (devm_spi_release_controller) from [<b045343c>] (release_nodes+0x84/0xc4)
r5:b6700180 r4:b6700100
[<b04533b8>] (release_nodes) from [<b0454160>] (devres_release_all+0x5c/0x60)
r8:b1638c54 r7:b117ad94 r6:b1638c10 r5:b117ad94 r4:b163dc10
[<b0454104>] (devres_release_all) from [<b044e41c>] (__device_release_driver+0x144/0x1ec)
r5:b117ad94 r4:b163dc10
[<b044e2d8>] (__device_release_driver) from [<b044f70c>] (device_driver_detach+0x84/0xa0)
r9:00000000 r8:00000000 r7:b117ad94 r6:b163dc54 r5:b1638c10 r4:b163dc10
[<b044f688>] (device_driver_detach) from [<b044d274>] (unbind_store+0xe4/0xf8)
Instead, determine the devm allocation state as a flag on the
controller which is guaranteed to be stable during cleanup.
Fixes: 5e844cc37a5c ("spi: Introduce device-managed SPI controller allocation")
Signed-off-by: William A. Kennington III <wak(a)google.com>
Link: https://lore.kernel.org/r/20210407095527.2771582-1-wak@google.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
[lukas: backport to v4.9.270]
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
---
drivers/spi/spi.c | 9 ++-------
include/linux/spi/spi.h | 3 +++
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f0ba5eb26128..84e2296c45a2 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1869,6 +1869,7 @@ struct spi_master *devm_spi_alloc_master(struct device *dev, unsigned int size)
master = spi_alloc_master(dev, size);
if (master) {
+ master->devm_allocated = true;
*ptr = master;
devres_add(dev, ptr);
} else {
@@ -2059,11 +2060,6 @@ int devm_spi_register_master(struct device *dev, struct spi_master *master)
}
EXPORT_SYMBOL_GPL(devm_spi_register_master);
-static int devm_spi_match_master(struct device *dev, void *res, void *master)
-{
- return *(struct spi_master **)res == master;
-}
-
static int __unregister(struct device *dev, void *null)
{
spi_unregister_device(to_spi_device(dev));
@@ -2102,8 +2098,7 @@ void spi_unregister_master(struct spi_master *master)
/* Release the last reference on the master if its driver
* has not yet been converted to devm_spi_alloc_master().
*/
- if (!devres_find(master->dev.parent, devm_spi_release_master,
- devm_spi_match_master, master))
+ if (!master->devm_allocated)
put_device(&master->dev);
if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 8470695e5dd7..9c8445f1af0c 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -443,6 +443,9 @@ struct spi_master {
#define SPI_MASTER_MUST_RX BIT(3) /* requires rx */
#define SPI_MASTER_MUST_TX BIT(4) /* requires tx */
+ /* flag indicating this is a non-devres managed controller */
+ bool devm_allocated;
+
/*
* on some hardware transfer / message size may be constrained
* the limit may depend on device transfer settings
--
2.31.1
There exists a possible scenario in which dwc3_gadget_init() can fail:
during during host -> peripheral mode switch in dwc3_set_mode(), and
a pending gadget driver fails to bind. Then, if the DRD undergoes
another mode switch from peripheral->host the resulting
dwc3_gadget_exit() will attempt to reference an invalid and dangling
dwc->gadget pointer as well as call dma_free_coherent() on unmapped
DMA pointers.
The exact scenario can be reproduced as follows:
- Start DWC3 in peripheral mode
- Configure ConfigFS gadget with FunctionFS instance (or use g_ffs)
- Run FunctionFS userspace application (open EPs, write descriptors, etc)
- Bind gadget driver to DWC3's UDC
- Switch DWC3 to host mode
=> dwc3_gadget_exit() is called. usb_del_gadget() will put the
ConfigFS driver instance on the gadget_driver_pending_list
- Stop FunctionFS application (closes the ep files)
- Switch DWC3 to peripheral mode
=> dwc3_gadget_init() fails as usb_add_gadget() calls
check_pending_gadget_drivers() and attempts to rebind the UDC
to the ConfigFS gadget but fails with -19 (-ENODEV) because the
FFS instance is not in FFS_READY state.
- Switch DWC3 back to host mode
=> dwc3_gadget_exit() is called again, but this time dwc->gadget
is invalid.
Although it can be argued that userspace should take responsibility
for ensuring that the FunctionFS application be ready prior to
allowing the composite driver bind to the UDC, failure to do so
should not result in a panic from the kernel driver.
Fix this by setting dwc->gadget to NULL in the failure path of
dwc3_gadget_init() and add a check to dwc3_gadget_exit() to bail out
unless the gadget pointer is valid.
Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jack Pham <jackp(a)codeaurora.org>
---
Hi Felipe,
Although I marked the 'Fixes' tag above to e81a7018d93a, the problem
theoretically exists prior to Peter's change. But I'm not sure how
best to fix on versions prior to this change since dwc->gadget used
to be an embedded struct so we can't do a simple NULL check as below.
Suggestions on alternative approaches welcome if we want to proceed
with backporting to older (pre-5.9) stable releases.
Thanks,
Jack
drivers/usb/dwc3/gadget.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 612825a39f82..65d9b7227752 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4046,6 +4046,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
dwc3_gadget_free_endpoints(dwc);
err4:
usb_put_gadget(dwc->gadget);
+ dwc->gadget = NULL;
err3:
dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
dwc->bounce_addr);
@@ -4065,6 +4066,9 @@ int dwc3_gadget_init(struct dwc3 *dwc)
void dwc3_gadget_exit(struct dwc3 *dwc)
{
+ if (!dwc->gadget)
+ return;
+
usb_del_gadget(dwc->gadget);
dwc3_gadget_free_endpoints(dwc);
usb_put_gadget(dwc->gadget);
--
2.24.0
This patch series contains stability fixes and error handling for remoteproc.
The changes included in this series do the following:
Patch 1: Fixes the creation of the rproc character device.
Patch 2: Validates rproc as the first step of rproc_add().
Patch 3: Adds error handling in rproc_add().
Siddharth Gupta (3):
remoteproc: core: Move cdev add before device add
remoteproc: core: Move validate before device add
remoteproc: core: Cleanup device in case of failure
drivers/remoteproc/remoteproc_core.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
--
Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
The patch titled
Subject: kthread: fix kthread_mod_delayed_work vs kthread_cancel_delayed_work_sync race
has been removed from the -mm tree. Its filename was
kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: Martin Liu <liumartin(a)google.com>
Subject: kthread: fix kthread_mod_delayed_work vs kthread_cancel_delayed_work_sync race
We encountered a system hang issue while doing the tests. The callstack
is as following
schedule+0x80/0x100
schedule_timeout+0x48/0x138
wait_for_common+0xa4/0x134
wait_for_completion+0x1c/0x2c
kthread_flush_work+0x114/0x1cc
kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144
kthread_cancel_delayed_work_sync+0x18/0x2c
xxxx_pm_notify+0xb0/0xd8
blocking_notifier_call_chain_robust+0x80/0x194
pm_notifier_call_chain_robust+0x28/0x4c
suspend_prepare+0x40/0x260
enter_state+0x80/0x3f4
pm_suspend+0x60/0xdc
state_store+0x108/0x144
kobj_attr_store+0x38/0x88
sysfs_kf_write+0x64/0xc0
kernfs_fop_write_iter+0x108/0x1d0
vfs_write+0x2f4/0x368
ksys_write+0x7c/0xec
When we started investigating, we found race between
kthread_mod_delayed_work vs kthread_cancel_delayed_work_sync. The race's
result could be simply reproduced as a kthread_mod_delayed_work with a
following kthread_flush_work call.
Thing is we release kthread_mod_delayed_work kspin_lock in
__kthread_cancel_work so it opens a race window for
kthread_cancel_delayed_work_sync to change the canceling count used to
prevent dwork from being requeued before calling kthread_flush_work.
However, we don't check the canceling count after returning from
__kthread_cancel_work and then insert the dwork to the worker. It results
the following kthread_flush_work inserts flush work to dwork's tail which
is at worker's dealyed_work_list. Therefore, flush work will never get
moved to the worker's work_list to be executed. Finally,
kthread_cancel_delayed_work_sync will NOT be able to get completed and
wait forever. The code sequence diagram is as following
Thread A Thread B
kthread_mod_delayed_work
spin_lock
__kthread_cancel_work
canceling = 1
spin_unlock
kthread_cancel_delayed_work_sync
spin_lock
kthread_cancel_work
canceling = 2
spin_unlock
del_timer_sync
spin_lock
canceling = 1 // canceling count gets update in ThreadB before
queue_delayed_work // dwork is put into the woker's dealyed_work_list
without checking the canceling count
spin_unlock
kthread_flush_work
spin_lock
Insert flush work // at the tail of the
dwork which is at
the worker's
dealyed_work_list
spin_unlock
wait_for_completion // Thread B stuck here as
flush work will never
get executed
The canceling count could change in __kthread_cancel_work as the spinlock
get released and regained in between, let's check the count again before
we queue the delayed work to avoid the race.
Link: https://lkml.kernel.org/r/20210513065458.941403-1-liumartin@google.com
Fixes: 37be45d49dec2 ("kthread: allow to cancel kthread work")
Signed-off-by: Martin Liu <liumartin(a)google.com>
Tested-by: David Chao <davidchao(a)google.com>
Reviewed-by: Petr Mladek <pmladek(a)suse.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: "Paul E. McKenney" <paulmck(a)linux.vnet.ibm.com>
Cc: Josh Triplett <josh(a)joshtriplett.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Jiri Kosina <jkosina(a)suse.cz>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Michal Hocko <mhocko(a)suse.cz>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: <jenhaochen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kthread.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
--- a/kernel/kthread.c~kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race
+++ a/kernel/kthread.c
@@ -1181,6 +1181,19 @@ bool kthread_mod_delayed_work(struct kth
goto out;
ret = __kthread_cancel_work(work, true, &flags);
+
+ /*
+ * Canceling could run in parallel from kthread_cancel_delayed_work_sync
+ * and change work's canceling count as the spinlock is released and regain
+ * in __kthread_cancel_work so we need to check the count again. Otherwise,
+ * we might incorrectly queue the dwork and further cause
+ * cancel_delayed_work_sync thread waiting for flush dwork endlessly.
+ */
+ if (work->canceling) {
+ ret = false;
+ goto out;
+ }
+
fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay);
out:
_
Patches currently in -mm which might be from liumartin(a)google.com are
The recent change in elants_i2c driver to support more chips
introduced a regression leading to Oops at probing. The driver reads
id->driver_data, but the id may be NULL depending on the device type
the driver gets bound.
Add a NULL check and falls back to the default EKTH3500.
Fixes: 9517b95bdc46 ("Input: elants_i2c - add support for eKTF3624")
BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1186454
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
---
drivers/input/touchscreen/elants_i2c.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index 17540bdb1eaf..172a6951cead 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -1396,7 +1396,10 @@ static int elants_i2c_probe(struct i2c_client *client,
init_completion(&ts->cmd_done);
ts->client = client;
- ts->chip_id = (enum elants_chip_id)id->driver_data;
+ if (id)
+ ts->chip_id = (enum elants_chip_id)id->driver_data;
+ else
+ ts->chip_id = EKTH3500;
i2c_set_clientdata(client, ts);
ts->vcc33 = devm_regulator_get(&client->dev, "vcc33");
--
2.26.2
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 160457140187c5fb127b844e5a85f87f00a01b14 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli(a)tencent.com>
Date: Tue, 4 May 2021 17:27:30 -0700
Subject: [PATCH] KVM: x86: Defer vtime accounting 'til after IRQ handling
Defer the call to account guest time until after servicing any IRQ(s)
that happened in the guest or immediately after VM-Exit. Tick-based
accounting of vCPU time relies on PF_VCPU being set when the tick IRQ
handler runs, and IRQs are blocked throughout the main sequence of
vcpu_enter_guest(), including the call into vendor code to actually
enter and exit the guest.
This fixes a bug where reported guest time remains '0', even when
running an infinite loop in the guest:
https://bugzilla.kernel.org/show_bug.cgi?id=209831
Fixes: 87fa7f3e98a131 ("x86/kvm: Move context tracking where it belongs")
Suggested-by: Thomas Gleixner <tglx(a)linutronix.de>
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Wanpeng Li <wanpengli(a)tencent.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20210505002735.1684165-4-seanjc@google.com
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9790c73f2a32..c400def6220b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3753,15 +3753,15 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
* have them in state 'on' as recorded before entering guest mode.
* Same as enter_from_user_mode().
*
- * guest_exit_irqoff() restores host context and reinstates RCU if
- * enabled and required.
+ * context_tracking_guest_exit() restores host context and reinstates
+ * RCU if enabled and required.
*
* This needs to be done before the below as native_read_msr()
* contains a tracepoint and x86_spec_ctrl_restore_host() calls
* into world and some more.
*/
lockdep_hardirqs_off(CALLER_ADDR0);
- guest_exit_irqoff();
+ context_tracking_guest_exit();
instrumentation_begin();
trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b21d751407b5..e108fb47855b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6703,15 +6703,15 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
* have them in state 'on' as recorded before entering guest mode.
* Same as enter_from_user_mode().
*
- * guest_exit_irqoff() restores host context and reinstates RCU if
- * enabled and required.
+ * context_tracking_guest_exit() restores host context and reinstates
+ * RCU if enabled and required.
*
* This needs to be done before the below as native_read_msr()
* contains a tracepoint and x86_spec_ctrl_restore_host() calls
* into world and some more.
*/
lockdep_hardirqs_off(CALLER_ADDR0);
- guest_exit_irqoff();
+ context_tracking_guest_exit();
instrumentation_begin();
trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cebdaa1e3cf5..6eda2834fc05 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9315,6 +9315,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_disable();
kvm_after_interrupt(vcpu);
+ /*
+ * Wait until after servicing IRQs to account guest time so that any
+ * ticks that occurred while running the guest are properly accounted
+ * to the guest. Waiting until IRQs are enabled degrades the accuracy
+ * of accounting via context tracking, but the loss of accuracy is
+ * acceptable for all known use cases.
+ */
+ vtime_account_guest_exit();
+
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
if (delta != S64_MIN) {
This is a note to let you know that I've just added the patch titled
serial: 8250_pci: handle FL_NOIRQ board flag
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 9808f9be31c68af43f6e531f2c851ebb066513fe Mon Sep 17 00:00:00 2001
From: Christian Gmeiner <christian.gmeiner(a)gmail.com>
Date: Thu, 27 May 2021 11:54:40 +0200
Subject: serial: 8250_pci: handle FL_NOIRQ board flag
In commit 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support")
the way the irq gets allocated was changed. With that change the
handling FL_NOIRQ got lost. Restore the old behaviour.
Fixes: 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Christian Gmeiner <christian.gmeiner(a)gmail.com>
Link: https://lore.kernel.org/r/20210527095529.26281-1-christian.gmeiner@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/8250/8250_pci.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 04fe42469990..780cc99732b6 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -3958,21 +3958,26 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ;
uart.port.uartclk = board->base_baud * 16;
- if (pci_match_id(pci_use_msi, dev)) {
- dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n");
- pci_set_master(dev);
- rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (board->flags & FL_NOIRQ) {
+ uart.port.irq = 0;
} else {
- dev_dbg(&dev->dev, "Using legacy interrupts\n");
- rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
- }
- if (rc < 0) {
- kfree(priv);
- priv = ERR_PTR(rc);
- goto err_deinit;
+ if (pci_match_id(pci_use_msi, dev)) {
+ dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n");
+ pci_set_master(dev);
+ rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES);
+ } else {
+ dev_dbg(&dev->dev, "Using legacy interrupts\n");
+ rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
+ }
+ if (rc < 0) {
+ kfree(priv);
+ priv = ERR_PTR(rc);
+ goto err_deinit;
+ }
+
+ uart.port.irq = pci_irq_vector(dev, 0);
}
- uart.port.irq = pci_irq_vector(dev, 0);
uart.port.dev = &dev->dev;
for (i = 0; i < nr_ports; i++) {
--
2.31.1
This is a note to let you know that I've just added the patch titled
mei: request autosuspend after sending rx flow control
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From bbf0a94744edfeee298e4a9ab6fd694d639a5cdf Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin(a)intel.com>
Date: Wed, 26 May 2021 22:33:34 +0300
Subject: mei: request autosuspend after sending rx flow control
A rx flow control waiting in the control queue may block autosuspend.
Re-request autosuspend after flow control been sent to unblock
the transition to the low power state.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Alexander Usyskin <alexander.usyskin(a)intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
Link: https://lore.kernel.org/r/20210526193334.445759-1-tomas.winkler@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/misc/mei/interrupt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index a98f6b895af7..aab3ebfa9fc4 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -277,6 +277,9 @@ static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
return ret;
}
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_request_autosuspend(dev->dev);
+
list_move_tail(&cb->list, &cl->rd_pending);
return 0;
--
2.31.1
This is a note to let you know that I've just added the patch titled
ipack/carriers/tpci200: Fix a double free in tpci200_pci_probe
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
>From 9272e5d0028d45a3b45b58c9255e6e0df53f7ad9 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019(a)mail.ustc.edu.cn>
Date: Mon, 24 May 2021 02:32:05 -0700
Subject: ipack/carriers/tpci200: Fix a double free in tpci200_pci_probe
In the out_err_bus_register error branch of tpci200_pci_probe,
tpci200->info->cfg_regs is freed by tpci200_uninstall()->
tpci200_unregister()->pci_iounmap(..,tpci200->info->cfg_regs)
in the first time.
But later, iounmap() is called to free tpci200->info->cfg_regs
again.
My patch sets tpci200->info->cfg_regs to NULL after tpci200_uninstall()
to avoid the double free.
Fixes: cea2f7cdff2af ("Staging: ipack/bridges/tpci200: Use the TPCI200 in big endian mode")
Cc: stable <stable(a)vger.kernel.org>
Acked-by: Samuel Iglesias Gonsalvez <siglesias(a)igalia.com>
Signed-off-by: Lv Yunlong <lyl2019(a)mail.ustc.edu.cn>
Link: https://lore.kernel.org/r/20210524093205.8333-1-lyl2019@mail.ustc.edu.cn
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/ipack/carriers/tpci200.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c
index ec71063fff76..e1822e87ec3d 100644
--- a/drivers/ipack/carriers/tpci200.c
+++ b/drivers/ipack/carriers/tpci200.c
@@ -596,8 +596,11 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
out_err_bus_register:
tpci200_uninstall(tpci200);
+ /* tpci200->info->cfg_regs is unmapped in tpci200_uninstall */
+ tpci200->info->cfg_regs = NULL;
out_err_install:
- iounmap(tpci200->info->cfg_regs);
+ if (tpci200->info->cfg_regs)
+ iounmap(tpci200->info->cfg_regs);
out_err_ioremap:
pci_release_region(pdev, TPCI200_CFG_MEM_BAR);
out_err_pci_request:
--
2.31.1
We don't set the SB_BORN flag on submounts. This is wrong as these
superblocks are then considered as partially constructed or dying
in the rest of the code and can break some assumptions.
One such case is when you have a virtiofs filesystem with submounts
and you try to mount it again : virtio_fs_get_tree() tries to obtain
a superblock with sget_fc(). The logic in sget_fc() is to loop until
it has either found an existing matching superblock with SB_BORN set
or to create a brand new one. It is assumed that a superblock without
SB_BORN is transient and should go away. Forgetting to set SB_BORN on
submounts hence causes sget_fc() to retry forever.
Setting SB_BORN requires special care, i.e. a write barrier for
super_cache_count() which can check SB_BORN without taking any lock.
We should call vfs_get_tree() to deal with that but this requires
to have a proper ->get_tree() implementation for submounts, which
is a bigger piece of work. Go for a simple bug fix in the meatime.
Fixes: bf109c64040f ("fuse: implement crossmounts")
Cc: mreitz(a)redhat.com
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Greg Kurz <groug(a)kaod.org>
---
fs/fuse/dir.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 01559061cbfb..3b0482738741 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -346,6 +346,16 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
goto out_put_sb;
}
+ /*
+ * FIXME: setting SB_BORN requires a write barrier for
+ * super_cache_count(). We should actually come
+ * up with a proper ->get_tree() implementation
+ * for submounts and call vfs_get_tree() to take
+ * care of the write barrier.
+ */
+ smp_wmb();
+ sb->s_flags |= SB_BORN;
+
sb->s_flags |= SB_ACTIVE;
fsc->root = dget(sb->s_root);
/* We are done configuring the superblock, so unlock it */
--
2.31.1
In the cache missing code path of cached device, if a proper location
from the internal B+ tree is matched for a cache miss range, function
cached_dev_cache_miss() will be called in cache_lookup_fn() in the
following code block,
[code block 1]
526 unsigned int sectors = KEY_INODE(k) == s->iop.inode
527 ? min_t(uint64_t, INT_MAX,
528 KEY_START(k) - bio->bi_iter.bi_sector)
529 : INT_MAX;
530 int ret = s->d->cache_miss(b, s, bio, sectors);
Here s->d->cache_miss() is the call backfunction pointer initialized as
cached_dev_cache_miss(), the last parameter 'sectors' is an important
hint to calculate the size of read request to backing device of the
missing cache data.
Current calculation in above code block may generate oversized value of
'sectors', which consequently may trigger 2 different potential kernel
panics by BUG() or BUG_ON() as listed below,
1) BUG_ON() inside bch_btree_insert_key(),
[code block 2]
886 BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
2) BUG() inside biovec_slab(),
[code block 3]
51 default:
52 BUG();
53 return NULL;
All the above panics are original from cached_dev_cache_miss() by the
oversized parameter 'sectors'.
Inside cached_dev_cache_miss(), parameter 'sectors' is used to calculate
the size of data read from backing device for the cache missing. This
size is stored in s->insert_bio_sectors by the following lines of code,
[code block 4]
909 s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
Then the actual key inserting to the internal B+ tree is generated and
stored in s->iop.replace_key by the following lines of code,
[code block 5]
911 s->iop.replace_key = KEY(s->iop.inode,
912 bio->bi_iter.bi_sector + s->insert_bio_sectors,
913 s->insert_bio_sectors);
The oversized parameter 'sectors' may trigger panic 1) by BUG_ON() from
the above code block.
And the bio sending to backing device for the missing data is allocated
with hint from s->insert_bio_sectors by the following lines of code,
[code block 6]
926 cache_bio = bio_alloc_bioset(GFP_NOWAIT,
927 DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
928 &dc->disk.bio_split);
The oversized parameter 'sectors' may trigger panic 2) by BUG() from the
agove code block.
Now let me explain how the panics happen with the oversized 'sectors'.
In code block 5, replace_key is generated by macro KEY(). From the
definition of macro KEY(),
[code block 7]
71 #define KEY(inode, offset, size) \
72 ((struct bkey) { \
73 .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \
74 .low = (offset) \
75 })
Here 'size' is 16bits width embedded in 64bits member 'high' of struct
bkey. But in code block 1, if "KEY_START(k) - bio->bi_iter.bi_sector" is
very probably to be larger than (1<<16) - 1, which makes the bkey size
calculation in code block 5 is overflowed. In one bug report the value
of parameter 'sectors' is 131072 (= 1 << 17), the overflowed 'sectors'
results the overflowed s->insert_bio_sectors in code block 4, then makes
size field of s->iop.replace_key to be 0 in code block 5. Then the 0-
sized s->iop.replace_key is inserted into the internal B+ tree as cache
missing check key (a special key to detect and avoid a racing between
normal write request and cache missing read request) as,
[code block 8]
915 ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);
Then the 0-sized s->iop.replace_key as 3rd parameter triggers the bkey
size check BUG_ON() in code block 2, and causes the kernel panic 1).
Another kernel panic is from code block 6, is by the bvecs number
oversized value s->insert_bio_sectors from code block 4,
min(sectors, bio_sectors(bio) + reada)
There are two possibility for oversized reresult,
- bio_sectors(bio) is valid, but bio_sectors(bio) + reada is oversized.
- sectors < bio_sectors(bio) + reada, but sectors is oversized.
>From a bug report the result of "DIV_ROUND_UP(s->insert_bio_sectors,
PAGE_SECTORS)" from code block 6 can be 344, 282, 946, 342 and many
other values which larther than BIO_MAX_VECS (a.k.a 256). When calling
bio_alloc_bioset() with such larger-than-256 value as the 2nd parameter,
this value will eventually be sent to biovec_slab() as parameter
'nr_vecs' in following code path,
bio_alloc_bioset() ==> bvec_alloc() ==> biovec_slab()
Because parameter 'nr_vecs' is larger-than-256 value, the panic by BUG()
in code block 3 is triggered inside biovec_slab().
>From the above analysis, we know that the 4th parameter 'sector' sent
into cached_dev_cache_miss() may cause overflow in code block 5 and 6,
and finally cause kernel panic in code block 2 and 3. And if result of
bio_sectors(bio) + reada exceeds valid bvecs number, it may also trigger
kernel panic in code block 3 from code block 6.
In this patch, the above two panics are avoided by the following
changes,
- If DIV_ROUND_UP(bio_sectors(bio) + reada, PAGE_SECTORS) exceeds the
maximum bvecs counter, reduce reada to make sure the DIV_ROUND_UP()
result won't generate a oversized s->insert_bio_sectors to cause
invalid bvecs number to cache_bio.
- If sectors exceeds the maximum bkey size, then set the maximum valid
bkey size to sectors.
By the above changes, in code block 5 the size value in KEY() macro will
always be in valid range. As well in code block 6, the nr_iovecs
parameter of bio_alloc_bioset() calculated by
DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS) will always be a valid
bvecs number. Now both panics won't happen anymore.
Current problmatic code can be partially found since Linux v5.13-rc1,
therefore all maintained stable kernels should try to apply this fix.
Reported-by: Diego Ercolani <diego.ercolani(a)gmail.com>
Reported-by: Jan Szubiak <jan.szubiak(a)linuxpolska.pl>
Reported-by: Marco Rebhan <me(a)dblsaiko.net>
Reported-by: Matthias Ferdinand <bcache(a)mfedv.net>
Reported-by: Thorsten Knabe <linux(a)thorsten-knabe.de>
Reported-by: Victor Westerhuis <victor(a)westerhu.is>
Reported-by: Vojtech Pavlik <vojtech(a)suse.cz>
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: stable(a)vger.kernel.org
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Kent Overstreet <kent.overstreet(a)gmail.com>
Cc: Takashi Iwai <tiwai(a)suse.com>
---
Changelog:
v4, not directly access BIO_MAX_VECS and reduce reada value to avoid
oversized bvecs number, by hint from Christoph Hellwig.
v3, fix typo in v2.
v2, fix the bypass bio size calculation in v1.
v1, the initial version
drivers/md/bcache/request.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 29c231758293..054948f037ed 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -883,6 +883,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
unsigned int reada = 0;
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
+ unsigned int nr_bvecs, max_segs;
s->cache_missed = 1;
@@ -899,6 +900,24 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
get_capacity(bio->bi_bdev->bd_disk) -
bio_end_sector(bio));
+ /*
+ * If "bio_sectors(bio) + reada" may causes an oversized bio bvecs
+ * number, reada size must be deducted to make sure the following
+ * calculated s->insert_bio_sectors won't cause oversized bvecs number
+ * to cache_bio.
+ */
+ nr_bvecs = DIV_ROUND_UP(bio_sectors(bio) + reada, PAGE_SECTORS);
+ max_segs = bio_max_segs(nr_bvecs);
+ if (nr_bvecs > max_segs)
+ reada = max_segs * PAGE_SECTORS - bio_sectors(bio);
+
+ /*
+ * Make sure sectors won't exceed (1 << KEY_SIZE_BITS) - 1, which is
+ * the maximum bkey size in unit of sector. Then s->insert_bio_sectors
+ * will always be a valid bio in valid bkey size range.
+ */
+ if (sectors > ((1 << KEY_SIZE_BITS) - 1))
+ sectors = (1 << KEY_SIZE_BITS) - 1;
s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
s->iop.replace_key = KEY(s->iop.inode,
--
2.26.2
Hi Greg,
Can you please cherry-pick this to LTS:
commit bf53fc6b5f415cddc7118091cb8fd6a211b2320d
Author: Jan Kratochvil <jan.kratochvil(a)redhat.com>
Date: Fri Dec 4 09:17:02 2020 -0300
perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder
-Tommi
The patch titled
Subject: kthread: fix kthread_mod_delayed_work vs kthread_cancel_delayed_work_sync race
has been added to the -mm tree. Its filename is
kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/kthread-fix-kthread_mod_delayed_w…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/kthread-fix-kthread_mod_delayed_w…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Martin Liu <liumartin(a)google.com>
Subject: kthread: fix kthread_mod_delayed_work vs kthread_cancel_delayed_work_sync race
We encountered a system hang issue while doing the tests. The callstack
is as following
schedule+0x80/0x100
schedule_timeout+0x48/0x138
wait_for_common+0xa4/0x134
wait_for_completion+0x1c/0x2c
kthread_flush_work+0x114/0x1cc
kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144
kthread_cancel_delayed_work_sync+0x18/0x2c
xxxx_pm_notify+0xb0/0xd8
blocking_notifier_call_chain_robust+0x80/0x194
pm_notifier_call_chain_robust+0x28/0x4c
suspend_prepare+0x40/0x260
enter_state+0x80/0x3f4
pm_suspend+0x60/0xdc
state_store+0x108/0x144
kobj_attr_store+0x38/0x88
sysfs_kf_write+0x64/0xc0
kernfs_fop_write_iter+0x108/0x1d0
vfs_write+0x2f4/0x368
ksys_write+0x7c/0xec
When we started investigating, we found race between
kthread_mod_delayed_work vs kthread_cancel_delayed_work_sync. The race's
result could be simply reproduced as a kthread_mod_delayed_work with a
following kthread_flush_work call.
Thing is we release kthread_mod_delayed_work kspin_lock in
__kthread_cancel_work so it opens a race window for
kthread_cancel_delayed_work_sync to change the canceling count used to
prevent dwork from being requeued before calling kthread_flush_work.
However, we don't check the canceling count after returning from
__kthread_cancel_work and then insert the dwork to the worker. It results
the following kthread_flush_work inserts flush work to dwork's tail which
is at worker's dealyed_work_list. Therefore, flush work will never get
moved to the worker's work_list to be executed. Finally,
kthread_cancel_delayed_work_sync will NOT be able to get completed and
wait forever. The code sequence diagram is as following
Thread A Thread B
kthread_mod_delayed_work
spin_lock
__kthread_cancel_work
canceling = 1
spin_unlock
kthread_cancel_delayed_work_sync
spin_lock
kthread_cancel_work
canceling = 2
spin_unlock
del_timer_sync
spin_lock
canceling = 1 // canceling count gets update in ThreadB before
queue_delayed_work // dwork is put into the woker's dealyed_work_list
without checking the canceling count
spin_unlock
kthread_flush_work
spin_lock
Insert flush work // at the tail of the
dwork which is at
the worker's
dealyed_work_list
spin_unlock
wait_for_completion // Thread B stuck here as
flush work will never
get executed
The canceling count could change in __kthread_cancel_work as the spinlock
get released and regained in between, let's check the count again before
we queue the delayed work to avoid the race.
Link: https://lkml.kernel.org/r/20210513065458.941403-1-liumartin@google.com
Fixes: 37be45d49dec2 ("kthread: allow to cancel kthread work")
Signed-off-by: Martin Liu <liumartin(a)google.com>
Tested-by: David Chao <davidchao(a)google.com>
Reviewed-by: Petr Mladek <pmladek(a)suse.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: "Paul E. McKenney" <paulmck(a)linux.vnet.ibm.com>
Cc: Josh Triplett <josh(a)joshtriplett.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Jiri Kosina <jkosina(a)suse.cz>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Michal Hocko <mhocko(a)suse.cz>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: <jenhaochen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kthread.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
--- a/kernel/kthread.c~kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race
+++ a/kernel/kthread.c
@@ -1181,6 +1181,19 @@ bool kthread_mod_delayed_work(struct kth
goto out;
ret = __kthread_cancel_work(work, true, &flags);
+
+ /*
+ * Canceling could run in parallel from kthread_cancel_delayed_work_sync
+ * and change work's canceling count as the spinlock is released and regain
+ * in __kthread_cancel_work so we need to check the count again. Otherwise,
+ * we might incorrectly queue the dwork and further cause
+ * cancel_delayed_work_sync thread waiting for flush dwork endlessly.
+ */
+ if (work->canceling) {
+ ret = false;
+ goto out;
+ }
+
fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay);
out:
_
Patches currently in -mm which might be from liumartin(a)google.com are
kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race.patch
If fuse_fill_super_submount() returns an error, the error path
triggers a crash:
[ 26.206673] BUG: kernel NULL pointer dereference, address: 0000000000000000
[...]
[ 26.226362] RIP: 0010:__list_del_entry_valid+0x25/0x90
[...]
[ 26.247938] Call Trace:
[ 26.248300] fuse_mount_remove+0x2c/0x70 [fuse]
[ 26.248892] virtio_kill_sb+0x22/0x160 [virtiofs]
[ 26.249487] deactivate_locked_super+0x36/0xa0
[ 26.250077] fuse_dentry_automount+0x178/0x1a0 [fuse]
The crash happens because fuse_mount_remove() assumes that the FUSE
mount was already added to list under the FUSE connection, but this
only done after fuse_fill_super_submount() has returned success.
This means that until fuse_fill_super_submount() has returned success,
the FUSE mount isn't actually owned by the superblock. We should thus
reclaim ownership by clearing sb->s_fs_info, which will skip the call
to fuse_mount_remove(), and perform rollback, like virtio_fs_get_tree()
already does for the root sb.
Fixes: bf109c64040f ("fuse: implement crossmounts")
Cc: mreitz(a)redhat.com
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Greg Kurz <groug(a)kaod.org>
---
fs/fuse/dir.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 1b6c001a7dd1..01559061cbfb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -339,8 +339,12 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
/* Initialize superblock, making @mp_fi its root */
err = fuse_fill_super_submount(sb, mp_fi);
- if (err)
+ if (err) {
+ fuse_conn_put(fc);
+ kfree(fm);
+ sb->s_fs_info = NULL;
goto out_put_sb;
+ }
sb->s_flags |= SB_ACTIVE;
fsc->root = dget(sb->s_root);
--
2.31.1
On Wed, 26 May 2021 22:18:31 +0800, Zenghui Yu wrote:
> Commit 26778aaa134a ("KVM: arm64: Commit pending PC adjustemnts before
> returning to userspace") fixed the PC updating issue by forcing an explicit
> synchronisation of the exception state on vcpu exit to userspace.
>
> However, we forgot to take into account the case where immediate_exit is
> set by userspace and KVM_RUN will exit immediately. Fix it by resolving all
> pending PC updates before returning to userspace.
>
> [...]
Applied to fixes, thanks!
[1/1] KVM: arm64: Resolve all pending PC updates before immediate exit
commit: e3e880bb1518eb10a4b4bb4344ed614d6856f190
with:
Fixes: 26778aaa134a ("KVM: arm64: Commit pending PC adjustemnts before returning to userspace")
Cc: stable(a)vger.kernel.org # 5.11
Cheers,
M.
--
Without deviation from the norm, progress is not possible.
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d1d90dd27254c44d087ad3f8b5b3e4fff0571f45 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp(a)codeaurora.org>
Date: Wed, 28 Apr 2021 02:01:10 -0700
Subject: [PATCH] usb: dwc3: gadget: Enable suspend events
commit 72704f876f50 ("dwc3: gadget: Implement the suspend entry event
handler") introduced (nearly 5 years ago!) an interrupt handler for
U3/L1-L2 suspend events. The problem is that these events aren't
currently enabled in the DEVTEN register so the handler is never
even invoked. Fix this simply by enabling the corresponding bit
in dwc3_gadget_enable_irq() using the same revision check as found
in the handler.
Fixes: 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler")
Acked-by: Felipe Balbi <balbi(a)kernel.org>
Signed-off-by: Jack Pham <jackp(a)codeaurora.org>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20210428090111.3370-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dd80e5ca8c78..cab3a9184068 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2323,6 +2323,10 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc)
if (DWC3_VER_IS_PRIOR(DWC3, 250A))
reg |= DWC3_DEVTEN_ULSTCNGEN;
+ /* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */
+ if (!DWC3_VER_IS_PRIOR(DWC3, 230A))
+ reg |= DWC3_DEVTEN_EOPFEN;
+
dwc3_writel(dwc->regs, DWC3_DEVTEN, reg);
}
Commit 93b3a674485f6a4b8ffff85d1682d5e8b7c51560 upstream
Commit 93b3a674485f ("mm,vmstat: reduce zone->lock holding time by
/proc/pagetypeinfo") upstream caps the number of iterations over each
free_list at 100,000, and also drops the zone->lock in between each
migrate type. Capping the iteration count alters the file contents in
some cases, which means this approach may not be suitable for stable
backports.
However, dropping zone->lock in between migrate types (and, as a result,
page orders) will not change the /proc/pagetypeinfo file contents. It
can significantly reduce the length of time spent with IRQs disabled,
which can prevent missed interrupts or soft lockups which we have
observed on systems with particularly large memory.
Thus, this commit is a modified version of the upstream one which only
drops the lock in between migrate types.
Fixes: 467c996c1e19 ("Print out statistics in relation to fragmentation avoidance to /proc/pagetypeinfo")
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Aruna Ramakrishna <aruna.ramakrishna(a)oracle.com>
Reviewed-by: Khalid Aziz <khalid.aziz(a)oracle.com>
---
mm/vmstat.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 13b74c4314a7e..663069cf7724a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1316,6 +1316,9 @@ static void pagetypeinfo_showfree_print(struct seq_file *m,
list_for_each(curr, &area->free_list[mtype])
freecount++;
seq_printf(m, "%6lu ", freecount);
+ spin_unlock_irq(&zone->lock);
+ cond_resched();
+ spin_lock_irq(&zone->lock);
}
seq_putc(m, '\n');
}
--
2.27.0
From: Mark Tomlinson <mark.tomlinson(a)alliedtelesis.co.nz>
commit 175e476b8cdf2a4de7432583b49c871345e4f8a1 upstream.
When a new table value was assigned, it was followed by a write memory
barrier. This ensured that all writes before this point would complete
before any writes after this point. However, to determine whether the
rules are unused, the sequence counter is read. To ensure that all
writes have been done before these reads, a full memory barrier is
needed, not just a write memory barrier. The same argument applies when
incrementing the counter, before the rules are read.
Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic
reported in cc00bcaa5899 (which is still present), while still
maintaining the same speed of replacing tables.
The smb_mb() barriers potentially slow the packet path, however testing
has shown no measurable change in performance on a 4-core MIPS64
platform.
Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path")
Signed-off-by: Mark Tomlinson <mark.tomlinson(a)alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
[Ported to stable, affected barrier is added by d3d40f237480abf3268956daf18cdc56edd32834 in mainline]
Signed-off-by: Pavel Machek (CIP) <pavel(a)denx.de>
Signed-off-by: Nobuhiro Iwamatsu (CIP) <nobuhiro1.iwamatsu(a)toshiba.co.jp>
---
include/linux/netfilter/x_tables.h | 2 +-
net/netfilter/x_tables.c | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6923e4049de3af..304b60b4952627 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -327,7 +327,7 @@ static inline unsigned int xt_write_recseq_begin(void)
* since addend is most likely 1
*/
__this_cpu_add(xt_recseq.sequence, addend);
- smp_wmb();
+ smp_mb();
return addend;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 7e261fab7ef8de..480ccd52a73fdb 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1140,6 +1140,9 @@ xt_replace_table(struct xt_table *table,
smp_wmb();
table->private = newinfo;
+ /* make sure all cpus see new ->private value */
+ smp_mb();
+
/*
* Even though table entries have now been swapped, other CPU's
* may still be using the old entries. This is okay, because
--
2.31.1
The patch titled
Subject: mm/page_alloc: fix counting of free pages after take off from buddy
has been added to the -mm tree. Its filename is
mm-page_alloc-fix-counting-of-free-pages-after-take-off-from-buddy.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-page_alloc-fix-counting-of-fre…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-page_alloc-fix-counting-of-fre…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Ding Hui <dinghui(a)sangfor.com.cn>
Subject: mm/page_alloc: fix counting of free pages after take off from buddy
Recently we found that there is a lot MemFree left in /proc/meminfo after
do a lot of pages soft offline, it's not quite correct.
Before Oscar rework soft offline for free pages [1], if we soft offline
free pages, these pages are left in buddy with HWPoison flag, and
NR_FREE_PAGES is not updated immediately. So the difference between
NR_FREE_PAGES and real number of available free pages is also even big at
the beginning.
However, with the workload running, when we catch HWPoison page in any
alloc functions subsequently, we will remove it from buddy, meanwhile
update the NR_FREE_PAGES and try again, so the NR_FREE_PAGES will get more
and more closer to the real number of available free pages. (regardless
of unpoison_memory())
Now, for offline free pages, after a successful call
take_page_off_buddy(), the page is no longer belong to buddy allocator,
and will not be used any more, but we missed accounting NR_FREE_PAGES in
this situation, and there is no chance to be updated later.
Do update in take_page_off_buddy() like rmqueue() does, but avoid double
counting if some one already set_migratetype_isolate() on the page.
[1]: commit 06be6ff3d2ec ("mm,hwpoison: rework soft offline for free pages")
Link: https://lkml.kernel.org/r/20210526075247.11130-1-dinghui@sangfor.com.cn
Fixes: 06be6ff3d2ec ("mm,hwpoison: rework soft offline for free pages")
Signed-off-by: Ding Hui <dinghui(a)sangfor.com.cn>
Suggested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 2 ++
1 file changed, 2 insertions(+)
--- a/mm/page_alloc.c~mm-page_alloc-fix-counting-of-free-pages-after-take-off-from-buddy
+++ a/mm/page_alloc.c
@@ -9158,6 +9158,8 @@ bool take_page_off_buddy(struct page *pa
del_page_from_free_list(page_head, zone, page_order);
break_down_buddy_pages(zone, page_head, page, 0,
page_order, migratetype);
+ if (!is_migrate_isolate(migratetype))
+ __mod_zone_freepage_state(zone, -1, migratetype);
ret = true;
break;
}
_
Patches currently in -mm which might be from dinghui(a)sangfor.com.cn are
mm-page_alloc-fix-counting-of-free-pages-after-take-off-from-buddy.patch
From: Alexander Usyskin <alexander.usyskin(a)intel.com>
A rx flow control waiting in the control queue may block autosuspend.
Re-request autosuspend after flow control been sent to unblock
the transition to the low power state.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Alexander Usyskin <alexander.usyskin(a)intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
---
drivers/misc/mei/interrupt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index a98f6b895af7..aab3ebfa9fc4 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -277,6 +277,9 @@ static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
return ret;
}
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_request_autosuspend(dev->dev);
+
list_move_tail(&cb->list, &cl->rd_pending);
return 0;
--
2.31.1
On QUERY2 IOCTL don't query counts of correctable
and uncorrectable errors, since when RAS is
enabled and supported on Vega20 server boards,
this takes insurmountably long time, in O(n^3),
which slows the system down to the point of it
being unusable when we have GUI up.
Fixes: ae363a212b14 ("drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2")
Cc: Alexander Deucher <Alexander.Deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Luben Tuikov <luben.tuikov(a)amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher(a)amd.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 16 ----------------
1 file changed, 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fc83445fbc40..bb0cfe871aba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -337,7 +337,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr;
- unsigned long ras_counter;
if (!fpriv)
return -EINVAL;
@@ -362,21 +361,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
- /*query ue count*/
- ras_counter = amdgpu_ras_query_error_count(adev, false);
- /*ras counter is monotonic increasing*/
- if (ras_counter != ctx->ras_counter_ue) {
- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
- ctx->ras_counter_ue = ras_counter;
- }
-
- /*query ce count*/
- ras_counter = amdgpu_ras_query_error_count(adev, true);
- if (ras_counter != ctx->ras_counter_ce) {
- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
- ctx->ras_counter_ce = ras_counter;
- }
-
mutex_unlock(&mgr->lock);
return 0;
}
--
2.31.1.527.g2d677e5b15
From: linma <linma(a)zju.edu.cn>
In the cleanup routine for failed initialization of HCI device,
the flush_work(&hdev->rx_work) need to be finished before the
flush_work(&hdev->cmd_work). Otherwise, the hci_rx_work() can
possibly invoke new cmd_work and cause a bug, like double free,
in late processings.
This was assigned CVE-2021-3564.
This patch reorder the flush_work() to fix this bug.
Cc: Marcel Holtmann <marcel(a)holtmann.org>
Cc: Johan Hedberg <johan.hedberg(a)gmail.com>
Cc: Luiz Augusto von Dentz <luiz.dentz(a)gmail.com>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: linux-bluetooth(a)vger.kernel.org
Cc: netdev(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Signed-off-by: Lin Ma <linma(a)zju.edu.cn>
Signed-off-by: Hao Xiong <mart1n(a)zju.edu.cn>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/bluetooth/hci_core.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fd12f1652bdf..88aa32f44e68 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1610,8 +1610,13 @@ static int hci_dev_do_open(struct hci_dev *hdev)
} else {
/* Init failed, cleanup */
flush_work(&hdev->tx_work);
- flush_work(&hdev->cmd_work);
+ /*
+ * Since hci_rx_work() is possible to awake new cmd_work
+ * it should be flushed first to avoid unexpected call of
+ * hci_cmd_work()
+ */
flush_work(&hdev->rx_work);
+ flush_work(&hdev->cmd_work);
skb_queue_purge(&hdev->cmd_q);
skb_queue_purge(&hdev->rx_q);
--
2.31.1
In pmd/pud_advanced_tests(), the vaddr is aligned up to the next pmd/pud
entry, and so it does not match the given pmdp/pudp and (aligned down) pfn
any more.
For s390, this results in memory corruption, because the IDTE instruction
used e.g. in xxx_get_and_clear() will take the vaddr for some calculations,
in combination with the given pmdp. It will then end up with a wrong table
origin, ending on ...ff8, and some of those wrongly set low-order bits will
also select a wrong pagetable level for the index addition. IDTE could
therefore invalidate (or 0x20) something outside of the page tables,
depending on the wrongly picked index, which in turn depends on the random
vaddr.
As result, we sometimes see "BUG task_struct (Not tainted): Padding
overwritten" on s390, where one 0x5a padding value got overwritten with
0x7a.
Fix this by aligning down, similar to how the pmd/pud_aligned pfns are
calculated.
Fixes: a5c3b9ffb0f40 ("mm/debug_vm_pgtable: add tests validating advanced arch page table helpers")
Cc: <stable(a)vger.kernel.org> # v5.9+
Signed-off-by: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
---
mm/debug_vm_pgtable.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 6ff92c8b0a00..f7b23565a04f 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -193,7 +193,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
pr_debug("Validating PMD advanced\n");
/* Align the address wrt HPAGE_PMD_SIZE */
- vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
+ vaddr &= HPAGE_PMD_MASK;
pgtable_trans_huge_deposit(mm, pmdp, pgtable);
@@ -318,7 +318,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
pr_debug("Validating PUD advanced\n");
/* Align the address wrt HPAGE_PUD_SIZE */
- vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;
+ vaddr &= HPAGE_PUD_MASK;
pud = pfn_pud(pfn, prot);
set_pud_at(mm, vaddr, pudp, pud);
--
2.25.1
Commit 9d7b18668956 ("HID: magicmouse: add support for Apple Magic
Trackpad 2") added a sanity check for an Apple trackpad but returned
success instead of -ENODEV when the check failed. This means that the
remove callback will dereference the never-initialised driver data
pointer when the driver is later unbound (e.g. on USB disconnect).
Reported-by: syzbot+ee6f6e2e68886ca256a8(a)syzkaller.appspotmail.com
Fixes: 9d7b18668956 ("HID: magicmouse: add support for Apple Magic Trackpad 2")
Cc: stable(a)vger.kernel.org # 4.20
Cc: Claudio Mettler <claudio(a)ponyfleisch.ch>
Cc: Marek Wyborski <marek.wyborski(a)emwesoft.com>
Cc: Sean O'Brien <seobrien(a)chromium.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/hid/hid-magicmouse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 2bb473d8c424..56dda50aa3d8 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -693,7 +693,7 @@ static int magicmouse_probe(struct hid_device *hdev,
if (id->vendor == USB_VENDOR_ID_APPLE &&
id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 &&
hdev->type != HID_TYPE_USBMOUSE)
- return 0;
+ return -ENODEV;
msc = devm_kzalloc(&hdev->dev, sizeof(*msc), GFP_KERNEL);
if (msc == NULL) {
--
2.26.3
This is the start of the stable review cycle for the 4.4.270 release.
There are 31 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 26 May 2021 15:23:11 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.270-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.270-rc1
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tty: vt: always invoke vc->vc_sw->con_resize callback
Maciej W. Rozycki <macro(a)orcam.me.uk>
vt: Fix character height handling with VT_RESIZEX
Maciej W. Rozycki <macro(a)orcam.me.uk>
vgacon: Record video mode changes with VT_RESIZEX
Igor Matheus Andrade Torrente <igormtorrente(a)gmail.com>
video: hgafb: fix potential NULL pointer dereference
Tom Seewald <tseewald(a)gmail.com>
qlcnic: Add null check after calling netdev_alloc_skb
Phillip Potter <phil(a)philpotter.co.uk>
leds: lp5523: check return value of lp5xx_read and jump to cleanup code
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
net: rtlwifi: properly check for alloc_workqueue() failure
Anirudh Rayabharam <mail(a)anirudhrb.com>
net: stmicro: handle clk_prepare() failure during init
Du Cheng <ducheng2(a)gmail.com>
ethernet: sun: niu: fix missing checks of niu_pci_eeprom_read()
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "niu: fix missing checks of niu_pci_eeprom_read"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "qlcnic: Avoid potential NULL pointer dereference"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "rtlwifi: fix a potential NULL pointer dereference"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
cdrom: gdrom: initialize global variable at init time
Atul Gopinathan <atulgopinathan(a)gmail.com>
cdrom: gdrom: deallocate struct gdrom_unit fields in remove_gdrom
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "gdrom: fix a memory leak bug"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ecryptfs: replace BUG_ON with error handling code"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "video: imsttfb: fix potential NULL pointer dereferences"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "hwmon: (lm80) fix a missing check of bus read in lm80 probe"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "leds: lp5523: fix a missing check of return value of lp55xx_read"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "net: stmicro: fix a missing check of clk_prepare"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "video: hgafb: fix potential NULL pointer dereference"
Mikulas Patocka <mpatocka(a)redhat.com>
dm snapshot: fix crash with transient storage and zero chunk size
Mikulas Patocka <mpatocka(a)redhat.com>
dm snapshot: fix a crash when an origin has no snapshots
Jan Beulich <jbeulich(a)suse.com>
xen-pciback: reconfigure also from backend watch handler
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ALSA: sb8: add a check for request_region"
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: bebob/oxfw: fix Kconfig entry for Mackie d.2 Pro
Takashi Iwai <tiwai(a)suse.de>
ALSA: usb-audio: Validate MS endpoint descriptors
Ronnie Sahlberg <lsahlber(a)redhat.com>
cifs: fix memory leak in smb2_copychunk_range
Oleg Nesterov <oleg(a)redhat.com>
ptrace: make ptrace() fail if the tracee changed its pid unexpectedly
Zhen Lei <thunder.leizhen(a)huawei.com>
scsi: qla2xxx: Fix error return code in qla82xx_write_flash_dword()
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
openrisc: Fix a memory leak
-------------
Diffstat:
Makefile | 4 +-
arch/openrisc/kernel/setup.c | 2 +
drivers/cdrom/gdrom.c | 13 +++--
drivers/hwmon/lm80.c | 11 +----
drivers/leds/leds-lp5523.c | 2 +-
drivers/md/dm-snap.c | 6 +--
.../net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 3 +-
drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 8 ++--
drivers/net/ethernet/sun/niu.c | 32 ++++++++-----
drivers/net/wireless/realtek/rtlwifi/base.c | 19 ++++----
drivers/scsi/qla2xxx/qla_nx.c | 3 +-
drivers/tty/vt/vt.c | 2 +-
drivers/tty/vt/vt_ioctl.c | 6 +--
drivers/video/console/fbcon.c | 2 +-
drivers/video/console/vgacon.c | 56 ++++++++++++----------
drivers/video/fbdev/hgafb.c | 21 ++++----
drivers/video/fbdev/imsttfb.c | 5 --
drivers/xen/xen-pciback/xenbus.c | 22 +++++++--
fs/cifs/smb2ops.c | 2 +
fs/ecryptfs/crypto.c | 6 +--
include/linux/console_struct.h | 1 +
kernel/ptrace.c | 18 ++++++-
sound/firewire/Kconfig | 4 +-
sound/firewire/bebob/bebob.c | 2 +-
sound/firewire/oxfw/oxfw.c | 1 -
sound/isa/sb/sb8.c | 4 --
sound/usb/midi.c | 4 ++
27 files changed, 152 insertions(+), 107 deletions(-)
The defrag loop processes leaves in batches and starting transaction for
each. The whole defragmentation on a given root is protected by a bit
but in case the transaction fails, the bit is not cleared
In case the transaction fails the bit would prevent starting
defragmentation again, so make sure it's cleared.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/transaction.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e0a82aa7da89..22951621363f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1406,8 +1406,10 @@ int btrfs_defrag_root(struct btrfs_root *root)
while (1) {
trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
ret = btrfs_defrag_leaves(trans, root);
--
2.29.2
On QUERY2 IOCTL don't query counts of correctable
and uncorrectable errors, since when RAS is
enabled and supported on Vega20 server boards,
this takes insurmountably long time, in O(n^3),
which slows the system down to the point of it
being unusable when we have GUI up.
Fixes: ae363a212b14 ("drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2")
Cc: Alexander Deucher <Alexander.Deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Luben Tuikov <luben.tuikov(a)amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher(a)amd.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 16 ----------------
1 file changed, 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fc83445fbc40..bb0cfe871aba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -337,7 +337,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
{
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr;
- unsigned long ras_counter;
if (!fpriv)
return -EINVAL;
@@ -362,21 +361,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
- /*query ue count*/
- ras_counter = amdgpu_ras_query_error_count(adev, false);
- /*ras counter is monotonic increasing*/
- if (ras_counter != ctx->ras_counter_ue) {
- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
- ctx->ras_counter_ue = ras_counter;
- }
-
- /*query ce count*/
- ras_counter = amdgpu_ras_query_error_count(adev, true);
- if (ras_counter != ctx->ras_counter_ce) {
- out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
- ctx->ras_counter_ce = ras_counter;
- }
-
mutex_unlock(&mgr->lock);
return 0;
}
--
2.31.1.527.g2d677e5b15
The patch titled
Subject: mm,hwpoison: fix race with hugetlb page allocation
has been removed from the -mm tree. Its filename was
mmhwpoison-fix-race-with-hugetlb-page-allocation.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Subject: mm,hwpoison: fix race with hugetlb page allocation
When hugetlb page fault (under overcommitting situation) and
memory_failure() race, VM_BUG_ON_PAGE() is triggered by the following
race:
CPU0: CPU1:
gather_surplus_pages()
page = alloc_surplus_huge_page()
memory_failure_hugetlb()
get_hwpoison_page(page)
__get_hwpoison_page(page)
get_page_unless_zero(page)
zero = put_page_testzero(page)
VM_BUG_ON_PAGE(!zero, page)
enqueue_huge_page(h, page)
put_page(page)
__get_hwpoison_page() only checks the page refcount before taking an
additional one for memory error handling, which is wrong because there's a
time window where compound pages have non-zero refcount during
initialization. So make __get_hwpoison_page() check page status a bit
more for hugetlb pages.
Link: https://lkml.kernel.org/r/20210518231259.2553203-2-nao.horiguchi@gmail.com
Fixes: ead07f6a867b ("mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Reported-by: Muchun Song <songmuchun(a)bytedance.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: <stable(a)vger.kernel.org> [5.12+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 6 ++++++
mm/hugetlb.c | 15 +++++++++++++++
mm/memory-failure.c | 8 +++++++-
3 files changed, 28 insertions(+), 1 deletion(-)
--- a/include/linux/hugetlb.h~mmhwpoison-fix-race-with-hugetlb-page-allocation
+++ a/include/linux/hugetlb.h
@@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
void putback_active_hugepage(struct page *page);
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@ static inline bool isolate_huge_page(str
return false;
}
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+ return 0;
+}
+
static inline void putback_active_hugepage(struct page *page)
{
}
--- a/mm/hugetlb.c~mmhwpoison-fix-race-with-hugetlb-page-allocation
+++ a/mm/hugetlb.c
@@ -5847,6 +5847,21 @@ unlock:
return ret;
}
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+ int ret = 0;
+
+ *hugetlb = false;
+ spin_lock_irq(&hugetlb_lock);
+ if (PageHeadHuge(page)) {
+ *hugetlb = true;
+ if (HPageFreed(page) || HPageMigratable(page))
+ ret = get_page_unless_zero(page);
+ }
+ spin_unlock_irq(&hugetlb_lock);
+ return ret;
+}
+
void putback_active_hugepage(struct page *page)
{
spin_lock_irq(&hugetlb_lock);
--- a/mm/memory-failure.c~mmhwpoison-fix-race-with-hugetlb-page-allocation
+++ a/mm/memory-failure.c
@@ -959,8 +959,14 @@ static int page_action(struct page_state
static int __get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
+ int ret = 0;
+ bool hugetlb = false;
- if (!PageHuge(head) && PageTransHuge(head)) {
+ ret = get_hwpoison_huge_page(head, &hugetlb);
+ if (hugetlb)
+ return ret;
+
+ if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
_
Patches currently in -mm which might be from naoya.horiguchi(a)nec.com are
mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch
mmhwpoison-send-sigbus-with-error-virutal-address.patch
The patch titled
Subject: mm/debug_vm_pgtable: fix alignment for pmd/pud_advanced_tests()
has been added to the -mm tree. Its filename is
mm-debug_vm_pgtable-fix-alignment-for-pmd-pud_advanced_tests.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-debug_vm_pgtable-fix-alignment…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-debug_vm_pgtable-fix-alignment…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Subject: mm/debug_vm_pgtable: fix alignment for pmd/pud_advanced_tests()
In pmd/pud_advanced_tests(), the vaddr is aligned up to the next pmd/pud
entry, and so it does not match the given pmdp/pudp and (aligned down) pfn
any more.
For s390, this results in memory corruption, because the IDTE instruction
used e.g. in xxx_get_and_clear() will take the vaddr for some calculations,
in combination with the given pmdp. It will then end up with a wrong table
origin, ending on ...ff8, and some of those wrongly set low-order bits will
also select a wrong pagetable level for the index addition. IDTE could
therefore invalidate (or 0x20) something outside of the page tables,
depending on the wrongly picked index, which in turn depends on the random
vaddr.
As result, we sometimes see "BUG task_struct (Not tainted): Padding
overwritten" on s390, where one 0x5a padding value got overwritten with
0x7a.
Fix this by aligning down, similar to how the pmd/pud_aligned pfns are
calculated.
Link: https://lkml.kernel.org/r/20210525130043.186290-2-gerald.schaefer@linux.ibm…
Fixes: a5c3b9ffb0f40 ("mm/debug_vm_pgtable: add tests validating advanced arch page table helpers")
Signed-off-by: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: <stable(a)vger.kernel.org> [5.9+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/debug_vm_pgtable.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/debug_vm_pgtable.c~mm-debug_vm_pgtable-fix-alignment-for-pmd-pud_advanced_tests
+++ a/mm/debug_vm_pgtable.c
@@ -192,7 +192,7 @@ static void __init pmd_advanced_tests(st
pr_debug("Validating PMD advanced\n");
/* Align the address wrt HPAGE_PMD_SIZE */
- vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
+ vaddr &= HPAGE_PMD_MASK;
pgtable_trans_huge_deposit(mm, pmdp, pgtable);
@@ -330,7 +330,7 @@ static void __init pud_advanced_tests(st
pr_debug("Validating PUD advanced\n");
/* Align the address wrt HPAGE_PUD_SIZE */
- vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;
+ vaddr &= HPAGE_PUD_MASK;
set_pud_at(mm, vaddr, pudp, pud);
pudp_set_wrprotect(mm, vaddr, pudp);
_
Patches currently in -mm which might be from gerald.schaefer(a)linux.ibm.com are
mm-debug_vm_pgtable-fix-alignment-for-pmd-pud_advanced_tests.patch
Fix another "confused deputy" weakness[1]. Writes to /proc/$pid/attr/
files need to check the opener credentials, since these fds do not
transition state across execve(). Without this, it is possible to
trick another process (which may have different credentials) to write
to its own /proc/$pid/attr/ files, leading to unexpected and possibly
exploitable behaviors.
[1] https://www.kernel.org/doc/html/latest/security/credentials.html?highlight=…
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kees Cook <keescook(a)chromium.org>
---
fs/proc/base.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3851bfcdba56..58bbf334265b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2703,6 +2703,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
void *page;
int rv;
+ /* A task may only write when it was the opener. */
+ if (file->f_cred != current_real_cred())
+ return -EPERM;
+
rcu_read_lock();
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (!task) {
--
2.25.1