[TCWG CI] Regression caused by linux: wifi: mac80211_hwsim: fix race condition in pending packet: commit 4ee186fa7e40ae06ebbfbad77e249e3746e14114 Author: Jeongik Cha jeongik@google.com
wifi: mac80211_hwsim: fix race condition in pending packet
Results regressed to # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1: -5 # build_abe qemu: -2 # linux_n_obj: 20671 # First few build errors in logs: # 00:23:33 drivers/net/wireless/mac80211_hwsim.c:1431:37: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] # 00:23:33 drivers/net/wireless/mac80211_hwsim.c:4213:30: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] # 00:23:33 make[3]: *** [drivers/net/wireless/mac80211_hwsim.o] Error 1 # 00:24:02 make[2]: *** [drivers/net/wireless] Error 2 # 00:26:07 make[1]: *** [drivers/net] Error 2 # 00:26:08 make: *** [drivers] Error 2
from # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1: -5 # build_abe qemu: -2 # linux_n_obj: 20764 # linux build successful: all
THIS IS THE END OF INTERESTING STUFF. BELOW ARE LINKS TO BUILDS, REPRODUCTION INSTRUCTIONS, AND THE RAW COMMIT.
This commit has regressed these CI configurations: - tcwg_kernel/gnu-release-arm-next-allyesconfig
First_bad build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes... Last_good build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes... Baseline build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes... Even more details: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes...
Reproduce builds: <cut> mkdir investigate-linux-4ee186fa7e40ae06ebbfbad77e249e3746e14114 cd investigate-linux-4ee186fa7e40ae06ebbfbad77e249e3746e14114
# Fetch scripts git clone https://git.linaro.org/toolchain/jenkins-scripts
# Fetch manifests and test.sh script mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes... --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes... --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-release-arm-next-allyes... --fail chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/
cd linux
# Reproduce first_bad build git checkout --detach 4ee186fa7e40ae06ebbfbad77e249e3746e14114 ../artifacts/test.sh
# Reproduce last_good build git checkout --detach 37babce9127f3145366a8f36334f24afa9a5d196 ../artifacts/test.sh
cd .. </cut>
Full commit (up to 1000 lines): <cut> commit 4ee186fa7e40ae06ebbfbad77e249e3746e14114 Author: Jeongik Cha jeongik@google.com Date: Mon Jul 4 17:43:54 2022 +0900
wifi: mac80211_hwsim: fix race condition in pending packet
A pending packet uses a cookie as an unique key, but it can be duplicated because it didn't use atomic operators.
And also, a pending packet can be null in hwsim_tx_info_frame_received_nl due to race condition with mac80211_hwsim_stop.
For this, * Use an atomic type and operator for a cookie * Add a lock around the loop for pending packets
Signed-off-by: Jeongik Cha jeongik@google.com Link: https://lore.kernel.org/r/20220704084354.3556326-1-jeongik@google.com Signed-off-by: Johannes Berg johannes.berg@intel.com --- drivers/net/wireless/mac80211_hwsim.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index c5bb97b381cf..ea006248ffcd 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -687,7 +687,7 @@ struct mac80211_hwsim_data { bool ps_poll_pending; struct dentry *debugfs;
- uintptr_t pending_cookie; + atomic64_t pending_cookie; struct sk_buff_head pending; /* packets pending */ /* * Only radios in the same group can communicate together (the @@ -1358,7 +1358,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, int i; struct hwsim_tx_rate tx_attempts[IEEE80211_TX_MAX_RATES]; struct hwsim_tx_rate_flag tx_attempts_flags[IEEE80211_TX_MAX_RATES]; - uintptr_t cookie; + u64 cookie;
if (data->ps != PS_DISABLED) hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); @@ -1427,8 +1427,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure;
/* We create a cookie to identify this skb */ - data->pending_cookie++; - cookie = data->pending_cookie; + cookie = (u64)atomic64_inc_return(&data->pending_cookie); info->rate_driver_data[0] = (void *)cookie; if (nla_put_u64_64bit(skb, HWSIM_ATTR_COOKIE, cookie, HWSIM_ATTR_PAD)) goto nla_put_failure; @@ -4178,6 +4177,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, const u8 *src; unsigned int hwsim_flags; int i; + unsigned long flags; bool found = false;
if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || @@ -4205,18 +4205,20 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, }
/* look for the skb matching the cookie passed back from user */ + spin_lock_irqsave(&data2->pending.lock, flags); skb_queue_walk_safe(&data2->pending, skb, tmp) { u64 skb_cookie;
txi = IEEE80211_SKB_CB(skb); - skb_cookie = (u64)(uintptr_t)txi->rate_driver_data[0]; + skb_cookie = (u64)txi->rate_driver_data[0];
if (skb_cookie == ret_skb_cookie) { - skb_unlink(skb, &data2->pending); + __skb_unlink(skb, &data2->pending); found = true; break; } } + spin_unlock_irqrestore(&data2->pending.lock, flags);
/* not found */ if (!found) </cut>