The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4e8ef34e36f2839ef8c8da521ab7035956436818
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052257-efficient-hungrily-839b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
4e8ef34e36f2 ("usb: dwc3: fix gadget mode suspend interrupt handler issue")
92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
40edb52298df ("usb: dwc3: avoid NULL access of usb_gadget_driver")
c560e76319a9 ("usb: dwc3: gadget: Fix START_TRANSFER link state check")
475e8be53d04 ("usb: dwc3: gadget: Check for disabled LPM quirk")
f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
a66a7d48f34a ("Merge 5.11-rc3 into usb-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e8ef34e36f2839ef8c8da521ab7035956436818 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Date: Fri, 12 May 2023 08:45:24 +0800
Subject: [PATCH] usb: dwc3: fix gadget mode suspend interrupt handler issue
When work in gadget mode, currently driver doesn't update software level
link_state correctly as link state change event is not enabled for most
devices, in function dwc3_gadget_suspend_interrupt(), it will only pass
suspend event to UDC core when software level link state changes, so when
interrupt generated in sequences of suspend -> reset -> conndone ->
suspend, link state is not updated during reset and conndone, so second
suspend interrupt event will not pass to UDC core.
Remove link_state compare in dwc3_gadget_suspend_interrupt() and add a
suspended flag to replace the compare function.
Fixes: 799e9dc82968 ("usb: dwc3: gadget: conditionally disable Link State change events")
Cc: stable <stable(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Link: https://lore.kernel.org/r/20230512004524.31950-1-quic_linyyuan@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index d56457c02996..1f043c31a096 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1116,6 +1116,7 @@ struct dwc3_scratchpad_array {
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
* @wakeup_configured: set if the device is configured for remote wakeup.
+ * @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
* increments or 0 to disable.
* @max_cfg_eps: current max number of IN eps used across all USB configs.
@@ -1332,6 +1333,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
unsigned wakeup_configured:1;
+ unsigned suspended:1;
u16 imod_interval;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2996bcb4d53d..d831f5acf7b5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2440,6 +2440,7 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id)
return -EINVAL;
}
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
dwc->link_state = DWC3_LINK_STATE_U0;
}
@@ -3942,6 +3943,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
{
int reg;
+ dwc->suspended = false;
+
dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -3966,6 +3969,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
{
u32 reg;
+ dwc->suspended = false;
+
/*
* Ideally, dwc3_reset_gadget() would trigger the function
* drivers to stop any active transfers through ep disable.
@@ -4184,6 +4189,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc, unsigned int evtinfo)
{
+ dwc->suspended = false;
+
/*
* TODO take core out of low power mode when that's
* implemented.
@@ -4281,6 +4288,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
if (dwc->gadget->wakeup_armed) {
dwc3_gadget_enable_linksts_evts(dwc, false);
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
}
break;
case DWC3_LINK_STATE_U1:
@@ -4307,8 +4315,10 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
{
enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
+ dwc->suspended = true;
dwc3_suspend_gadget(dwc);
+ }
dwc->link_state = next;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4e8ef34e36f2839ef8c8da521ab7035956436818
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052256-hence-answering-7047@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
4e8ef34e36f2 ("usb: dwc3: fix gadget mode suspend interrupt handler issue")
92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
40edb52298df ("usb: dwc3: avoid NULL access of usb_gadget_driver")
c560e76319a9 ("usb: dwc3: gadget: Fix START_TRANSFER link state check")
475e8be53d04 ("usb: dwc3: gadget: Check for disabled LPM quirk")
f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
a66a7d48f34a ("Merge 5.11-rc3 into usb-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e8ef34e36f2839ef8c8da521ab7035956436818 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Date: Fri, 12 May 2023 08:45:24 +0800
Subject: [PATCH] usb: dwc3: fix gadget mode suspend interrupt handler issue
When work in gadget mode, currently driver doesn't update software level
link_state correctly as link state change event is not enabled for most
devices, in function dwc3_gadget_suspend_interrupt(), it will only pass
suspend event to UDC core when software level link state changes, so when
interrupt generated in sequences of suspend -> reset -> conndone ->
suspend, link state is not updated during reset and conndone, so second
suspend interrupt event will not pass to UDC core.
Remove link_state compare in dwc3_gadget_suspend_interrupt() and add a
suspended flag to replace the compare function.
Fixes: 799e9dc82968 ("usb: dwc3: gadget: conditionally disable Link State change events")
Cc: stable <stable(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Link: https://lore.kernel.org/r/20230512004524.31950-1-quic_linyyuan@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index d56457c02996..1f043c31a096 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1116,6 +1116,7 @@ struct dwc3_scratchpad_array {
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
* @wakeup_configured: set if the device is configured for remote wakeup.
+ * @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
* increments or 0 to disable.
* @max_cfg_eps: current max number of IN eps used across all USB configs.
@@ -1332,6 +1333,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
unsigned wakeup_configured:1;
+ unsigned suspended:1;
u16 imod_interval;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2996bcb4d53d..d831f5acf7b5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2440,6 +2440,7 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id)
return -EINVAL;
}
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
dwc->link_state = DWC3_LINK_STATE_U0;
}
@@ -3942,6 +3943,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
{
int reg;
+ dwc->suspended = false;
+
dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -3966,6 +3969,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
{
u32 reg;
+ dwc->suspended = false;
+
/*
* Ideally, dwc3_reset_gadget() would trigger the function
* drivers to stop any active transfers through ep disable.
@@ -4184,6 +4189,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc, unsigned int evtinfo)
{
+ dwc->suspended = false;
+
/*
* TODO take core out of low power mode when that's
* implemented.
@@ -4281,6 +4288,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
if (dwc->gadget->wakeup_armed) {
dwc3_gadget_enable_linksts_evts(dwc, false);
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
}
break;
case DWC3_LINK_STATE_U1:
@@ -4307,8 +4315,10 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
{
enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
+ dwc->suspended = true;
dwc3_suspend_gadget(dwc);
+ }
dwc->link_state = next;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4e8ef34e36f2839ef8c8da521ab7035956436818
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052255-basics-sixfold-353c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
4e8ef34e36f2 ("usb: dwc3: fix gadget mode suspend interrupt handler issue")
92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e8ef34e36f2839ef8c8da521ab7035956436818 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Date: Fri, 12 May 2023 08:45:24 +0800
Subject: [PATCH] usb: dwc3: fix gadget mode suspend interrupt handler issue
When work in gadget mode, currently driver doesn't update software level
link_state correctly as link state change event is not enabled for most
devices, in function dwc3_gadget_suspend_interrupt(), it will only pass
suspend event to UDC core when software level link state changes, so when
interrupt generated in sequences of suspend -> reset -> conndone ->
suspend, link state is not updated during reset and conndone, so second
suspend interrupt event will not pass to UDC core.
Remove link_state compare in dwc3_gadget_suspend_interrupt() and add a
suspended flag to replace the compare function.
Fixes: 799e9dc82968 ("usb: dwc3: gadget: conditionally disable Link State change events")
Cc: stable <stable(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Link: https://lore.kernel.org/r/20230512004524.31950-1-quic_linyyuan@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index d56457c02996..1f043c31a096 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1116,6 +1116,7 @@ struct dwc3_scratchpad_array {
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
* @wakeup_configured: set if the device is configured for remote wakeup.
+ * @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
* increments or 0 to disable.
* @max_cfg_eps: current max number of IN eps used across all USB configs.
@@ -1332,6 +1333,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
unsigned wakeup_configured:1;
+ unsigned suspended:1;
u16 imod_interval;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2996bcb4d53d..d831f5acf7b5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2440,6 +2440,7 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id)
return -EINVAL;
}
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
dwc->link_state = DWC3_LINK_STATE_U0;
}
@@ -3942,6 +3943,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
{
int reg;
+ dwc->suspended = false;
+
dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -3966,6 +3969,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
{
u32 reg;
+ dwc->suspended = false;
+
/*
* Ideally, dwc3_reset_gadget() would trigger the function
* drivers to stop any active transfers through ep disable.
@@ -4184,6 +4189,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc, unsigned int evtinfo)
{
+ dwc->suspended = false;
+
/*
* TODO take core out of low power mode when that's
* implemented.
@@ -4281,6 +4288,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
if (dwc->gadget->wakeup_armed) {
dwc3_gadget_enable_linksts_evts(dwc, false);
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
}
break;
case DWC3_LINK_STATE_U1:
@@ -4307,8 +4315,10 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
{
enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
+ dwc->suspended = true;
dwc3_suspend_gadget(dwc);
+ }
dwc->link_state = next;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 4e8ef34e36f2839ef8c8da521ab7035956436818
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052254-autistic-elixir-a6e8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
4e8ef34e36f2 ("usb: dwc3: fix gadget mode suspend interrupt handler issue")
92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e8ef34e36f2839ef8c8da521ab7035956436818 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Date: Fri, 12 May 2023 08:45:24 +0800
Subject: [PATCH] usb: dwc3: fix gadget mode suspend interrupt handler issue
When work in gadget mode, currently driver doesn't update software level
link_state correctly as link state change event is not enabled for most
devices, in function dwc3_gadget_suspend_interrupt(), it will only pass
suspend event to UDC core when software level link state changes, so when
interrupt generated in sequences of suspend -> reset -> conndone ->
suspend, link state is not updated during reset and conndone, so second
suspend interrupt event will not pass to UDC core.
Remove link_state compare in dwc3_gadget_suspend_interrupt() and add a
suspended flag to replace the compare function.
Fixes: 799e9dc82968 ("usb: dwc3: gadget: conditionally disable Link State change events")
Cc: stable <stable(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Link: https://lore.kernel.org/r/20230512004524.31950-1-quic_linyyuan@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index d56457c02996..1f043c31a096 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1116,6 +1116,7 @@ struct dwc3_scratchpad_array {
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
* @wakeup_configured: set if the device is configured for remote wakeup.
+ * @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
* increments or 0 to disable.
* @max_cfg_eps: current max number of IN eps used across all USB configs.
@@ -1332,6 +1333,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
unsigned wakeup_configured:1;
+ unsigned suspended:1;
u16 imod_interval;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2996bcb4d53d..d831f5acf7b5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2440,6 +2440,7 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id)
return -EINVAL;
}
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
dwc->link_state = DWC3_LINK_STATE_U0;
}
@@ -3942,6 +3943,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
{
int reg;
+ dwc->suspended = false;
+
dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -3966,6 +3969,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
{
u32 reg;
+ dwc->suspended = false;
+
/*
* Ideally, dwc3_reset_gadget() would trigger the function
* drivers to stop any active transfers through ep disable.
@@ -4184,6 +4189,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc, unsigned int evtinfo)
{
+ dwc->suspended = false;
+
/*
* TODO take core out of low power mode when that's
* implemented.
@@ -4281,6 +4288,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
if (dwc->gadget->wakeup_armed) {
dwc3_gadget_enable_linksts_evts(dwc, false);
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
}
break;
case DWC3_LINK_STATE_U1:
@@ -4307,8 +4315,10 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
{
enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
+ dwc->suspended = true;
dwc3_suspend_gadget(dwc);
+ }
dwc->link_state = next;
}
The patch below does not apply to the 6.3-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.3.y
git checkout FETCH_HEAD
git cherry-pick -x 4e8ef34e36f2839ef8c8da521ab7035956436818
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052253-pushiness-pureblood-3523@gregkh' --subject-prefix 'PATCH 6.3.y' HEAD^..
Possible dependencies:
4e8ef34e36f2 ("usb: dwc3: fix gadget mode suspend interrupt handler issue")
92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e8ef34e36f2839ef8c8da521ab7035956436818 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Date: Fri, 12 May 2023 08:45:24 +0800
Subject: [PATCH] usb: dwc3: fix gadget mode suspend interrupt handler issue
When work in gadget mode, currently driver doesn't update software level
link_state correctly as link state change event is not enabled for most
devices, in function dwc3_gadget_suspend_interrupt(), it will only pass
suspend event to UDC core when software level link state changes, so when
interrupt generated in sequences of suspend -> reset -> conndone ->
suspend, link state is not updated during reset and conndone, so second
suspend interrupt event will not pass to UDC core.
Remove link_state compare in dwc3_gadget_suspend_interrupt() and add a
suspended flag to replace the compare function.
Fixes: 799e9dc82968 ("usb: dwc3: gadget: conditionally disable Link State change events")
Cc: stable <stable(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Linyu Yuan <quic_linyyuan(a)quicinc.com>
Link: https://lore.kernel.org/r/20230512004524.31950-1-quic_linyyuan@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index d56457c02996..1f043c31a096 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1116,6 +1116,7 @@ struct dwc3_scratchpad_array {
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
* @wakeup_configured: set if the device is configured for remote wakeup.
+ * @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
* increments or 0 to disable.
* @max_cfg_eps: current max number of IN eps used across all USB configs.
@@ -1332,6 +1333,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
unsigned wakeup_configured:1;
+ unsigned suspended:1;
u16 imod_interval;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2996bcb4d53d..d831f5acf7b5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2440,6 +2440,7 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id)
return -EINVAL;
}
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
dwc->link_state = DWC3_LINK_STATE_U0;
}
@@ -3942,6 +3943,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
{
int reg;
+ dwc->suspended = false;
+
dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -3966,6 +3969,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
{
u32 reg;
+ dwc->suspended = false;
+
/*
* Ideally, dwc3_reset_gadget() would trigger the function
* drivers to stop any active transfers through ep disable.
@@ -4184,6 +4189,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc, unsigned int evtinfo)
{
+ dwc->suspended = false;
+
/*
* TODO take core out of low power mode when that's
* implemented.
@@ -4281,6 +4288,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
if (dwc->gadget->wakeup_armed) {
dwc3_gadget_enable_linksts_evts(dwc, false);
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
}
break;
case DWC3_LINK_STATE_U1:
@@ -4307,8 +4315,10 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
{
enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
+ dwc->suspended = true;
dwc3_suspend_gadget(dwc);
+ }
dwc->link_state = next;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x c8540870af4ce6ddeb27a7bb5498b75fb29b643c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052253-bloomers-footwear-7df3@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
c8540870af4c ("usb: dwc3: gadget: Improve dwc3_gadget_suspend() and dwc3_gadget_resume()")
bdb19d01026a ("USB: dwc3: gadget: drop dead hibernation code")
af870d93c706 ("usb: dwc3: Fix typos in gadget.c")
5265397f9442 ("usb: dwc3: Remove DWC3 locking during gadget suspend/resume")
9711c67de748 ("usb: dwc3: gadget: Synchronize IRQ between soft connect/disconnect")
8f8034f493b5 ("usb: dwc3: gadget: Don't modify GEVNTCOUNT in pullup()")
861c010a2ee1 ("usb: dwc3: gadget: Refactor pullup()")
0066472de157 ("usb: dwc3: Issue core soft reset before enabling run/stop")
8217f07a5023 ("usb: dwc3: gadget: Avoid starting DWC3 gadget during UDC unbind")
8212937305f8 ("usb: dwc3: gadget: Disable gadget IRQ during pullup disable")
f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
a66a7d48f34a ("Merge 5.11-rc3 into usb-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8540870af4ce6ddeb27a7bb5498b75fb29b643c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq(a)kernel.org>
Date: Wed, 3 May 2023 14:00:48 +0300
Subject: [PATCH] usb: dwc3: gadget: Improve dwc3_gadget_suspend() and
dwc3_gadget_resume()
Prevent -ETIMEDOUT error on .suspend().
e.g. If gadget driver is loaded and we are connected to a USB host,
all transfers must be stopped before stopping the controller else
we will not get a clean stop i.e. dwc3_gadget_run_stop() will take
several seconds to complete and will return -ETIMEDOUT.
Handle error cases properly in dwc3_gadget_suspend().
Simplify dwc3_gadget_resume() by using the introduced helper function.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
Cc: stable(a)vger.kernel.org
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Roger Quadros <rogerq(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20230503110048.30617-1-rogerq@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c0ca4d12f95d..2996bcb4d53d 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2699,6 +2699,21 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
return ret;
}
+static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
+{
+ /*
+ * In the Synopsys DWC_usb31 1.90a programming guide section
+ * 4.1.9, it specifies that for a reconnect after a
+ * device-initiated disconnect requires a core soft reset
+ * (DCTL.CSftRst) before enabling the run/stop bit.
+ */
+ dwc3_core_soft_reset(dwc);
+
+ dwc3_event_buffers_setup(dwc);
+ __dwc3_gadget_start(dwc);
+ return dwc3_gadget_run_stop(dwc, true);
+}
+
static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -2737,21 +2752,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
synchronize_irq(dwc->irq_gadget);
- if (!is_on) {
+ if (!is_on)
ret = dwc3_gadget_soft_disconnect(dwc);
- } else {
- /*
- * In the Synopsys DWC_usb31 1.90a programming guide section
- * 4.1.9, it specifies that for a reconnect after a
- * device-initiated disconnect requires a core soft reset
- * (DCTL.CSftRst) before enabling the run/stop bit.
- */
- dwc3_core_soft_reset(dwc);
-
- dwc3_event_buffers_setup(dwc);
- __dwc3_gadget_start(dwc);
- ret = dwc3_gadget_run_stop(dwc, true);
- }
+ else
+ ret = dwc3_gadget_soft_connect(dwc);
pm_runtime_put(dwc->dev);
@@ -4655,42 +4659,39 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
if (!dwc->gadget_driver)
return 0;
- dwc3_gadget_run_stop(dwc, false);
+ ret = dwc3_gadget_soft_disconnect(dwc);
+ if (ret)
+ goto err;
spin_lock_irqsave(&dwc->lock, flags);
dwc3_disconnect_gadget(dwc);
- __dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
+
+err:
+ /*
+ * Attempt to reset the controller's state. Likely no
+ * communication can be established until the host
+ * performs a port reset.
+ */
+ if (dwc->softconnect)
+ dwc3_gadget_soft_connect(dwc);
+
+ return ret;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver || !dwc->softconnect)
return 0;
- ret = __dwc3_gadget_start(dwc);
- if (ret < 0)
- goto err0;
-
- ret = dwc3_gadget_run_stop(dwc, true);
- if (ret < 0)
- goto err1;
-
- return 0;
-
-err1:
- __dwc3_gadget_stop(dwc);
-
-err0:
- return ret;
+ return dwc3_gadget_soft_connect(dwc);
}
void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x c8540870af4ce6ddeb27a7bb5498b75fb29b643c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052252-overstock-junior-02c2@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
c8540870af4c ("usb: dwc3: gadget: Improve dwc3_gadget_suspend() and dwc3_gadget_resume()")
bdb19d01026a ("USB: dwc3: gadget: drop dead hibernation code")
af870d93c706 ("usb: dwc3: Fix typos in gadget.c")
5265397f9442 ("usb: dwc3: Remove DWC3 locking during gadget suspend/resume")
9711c67de748 ("usb: dwc3: gadget: Synchronize IRQ between soft connect/disconnect")
8f8034f493b5 ("usb: dwc3: gadget: Don't modify GEVNTCOUNT in pullup()")
861c010a2ee1 ("usb: dwc3: gadget: Refactor pullup()")
0066472de157 ("usb: dwc3: Issue core soft reset before enabling run/stop")
8217f07a5023 ("usb: dwc3: gadget: Avoid starting DWC3 gadget during UDC unbind")
8212937305f8 ("usb: dwc3: gadget: Disable gadget IRQ during pullup disable")
f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
a66a7d48f34a ("Merge 5.11-rc3 into usb-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8540870af4ce6ddeb27a7bb5498b75fb29b643c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq(a)kernel.org>
Date: Wed, 3 May 2023 14:00:48 +0300
Subject: [PATCH] usb: dwc3: gadget: Improve dwc3_gadget_suspend() and
dwc3_gadget_resume()
Prevent -ETIMEDOUT error on .suspend().
e.g. If gadget driver is loaded and we are connected to a USB host,
all transfers must be stopped before stopping the controller else
we will not get a clean stop i.e. dwc3_gadget_run_stop() will take
several seconds to complete and will return -ETIMEDOUT.
Handle error cases properly in dwc3_gadget_suspend().
Simplify dwc3_gadget_resume() by using the introduced helper function.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
Cc: stable(a)vger.kernel.org
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Roger Quadros <rogerq(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20230503110048.30617-1-rogerq@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c0ca4d12f95d..2996bcb4d53d 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2699,6 +2699,21 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
return ret;
}
+static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
+{
+ /*
+ * In the Synopsys DWC_usb31 1.90a programming guide section
+ * 4.1.9, it specifies that for a reconnect after a
+ * device-initiated disconnect requires a core soft reset
+ * (DCTL.CSftRst) before enabling the run/stop bit.
+ */
+ dwc3_core_soft_reset(dwc);
+
+ dwc3_event_buffers_setup(dwc);
+ __dwc3_gadget_start(dwc);
+ return dwc3_gadget_run_stop(dwc, true);
+}
+
static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -2737,21 +2752,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
synchronize_irq(dwc->irq_gadget);
- if (!is_on) {
+ if (!is_on)
ret = dwc3_gadget_soft_disconnect(dwc);
- } else {
- /*
- * In the Synopsys DWC_usb31 1.90a programming guide section
- * 4.1.9, it specifies that for a reconnect after a
- * device-initiated disconnect requires a core soft reset
- * (DCTL.CSftRst) before enabling the run/stop bit.
- */
- dwc3_core_soft_reset(dwc);
-
- dwc3_event_buffers_setup(dwc);
- __dwc3_gadget_start(dwc);
- ret = dwc3_gadget_run_stop(dwc, true);
- }
+ else
+ ret = dwc3_gadget_soft_connect(dwc);
pm_runtime_put(dwc->dev);
@@ -4655,42 +4659,39 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
if (!dwc->gadget_driver)
return 0;
- dwc3_gadget_run_stop(dwc, false);
+ ret = dwc3_gadget_soft_disconnect(dwc);
+ if (ret)
+ goto err;
spin_lock_irqsave(&dwc->lock, flags);
dwc3_disconnect_gadget(dwc);
- __dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
+
+err:
+ /*
+ * Attempt to reset the controller's state. Likely no
+ * communication can be established until the host
+ * performs a port reset.
+ */
+ if (dwc->softconnect)
+ dwc3_gadget_soft_connect(dwc);
+
+ return ret;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver || !dwc->softconnect)
return 0;
- ret = __dwc3_gadget_start(dwc);
- if (ret < 0)
- goto err0;
-
- ret = dwc3_gadget_run_stop(dwc, true);
- if (ret < 0)
- goto err1;
-
- return 0;
-
-err1:
- __dwc3_gadget_stop(dwc);
-
-err0:
- return ret;
+ return dwc3_gadget_soft_connect(dwc);
}
void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x c8540870af4ce6ddeb27a7bb5498b75fb29b643c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052251-fantastic-trickery-cd3b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
c8540870af4c ("usb: dwc3: gadget: Improve dwc3_gadget_suspend() and dwc3_gadget_resume()")
bdb19d01026a ("USB: dwc3: gadget: drop dead hibernation code")
af870d93c706 ("usb: dwc3: Fix typos in gadget.c")
5265397f9442 ("usb: dwc3: Remove DWC3 locking during gadget suspend/resume")
9711c67de748 ("usb: dwc3: gadget: Synchronize IRQ between soft connect/disconnect")
8f8034f493b5 ("usb: dwc3: gadget: Don't modify GEVNTCOUNT in pullup()")
861c010a2ee1 ("usb: dwc3: gadget: Refactor pullup()")
0066472de157 ("usb: dwc3: Issue core soft reset before enabling run/stop")
8217f07a5023 ("usb: dwc3: gadget: Avoid starting DWC3 gadget during UDC unbind")
8212937305f8 ("usb: dwc3: gadget: Disable gadget IRQ during pullup disable")
f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
a66a7d48f34a ("Merge 5.11-rc3 into usb-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8540870af4ce6ddeb27a7bb5498b75fb29b643c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq(a)kernel.org>
Date: Wed, 3 May 2023 14:00:48 +0300
Subject: [PATCH] usb: dwc3: gadget: Improve dwc3_gadget_suspend() and
dwc3_gadget_resume()
Prevent -ETIMEDOUT error on .suspend().
e.g. If gadget driver is loaded and we are connected to a USB host,
all transfers must be stopped before stopping the controller else
we will not get a clean stop i.e. dwc3_gadget_run_stop() will take
several seconds to complete and will return -ETIMEDOUT.
Handle error cases properly in dwc3_gadget_suspend().
Simplify dwc3_gadget_resume() by using the introduced helper function.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
Cc: stable(a)vger.kernel.org
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Roger Quadros <rogerq(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20230503110048.30617-1-rogerq@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c0ca4d12f95d..2996bcb4d53d 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2699,6 +2699,21 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
return ret;
}
+static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
+{
+ /*
+ * In the Synopsys DWC_usb31 1.90a programming guide section
+ * 4.1.9, it specifies that for a reconnect after a
+ * device-initiated disconnect requires a core soft reset
+ * (DCTL.CSftRst) before enabling the run/stop bit.
+ */
+ dwc3_core_soft_reset(dwc);
+
+ dwc3_event_buffers_setup(dwc);
+ __dwc3_gadget_start(dwc);
+ return dwc3_gadget_run_stop(dwc, true);
+}
+
static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -2737,21 +2752,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
synchronize_irq(dwc->irq_gadget);
- if (!is_on) {
+ if (!is_on)
ret = dwc3_gadget_soft_disconnect(dwc);
- } else {
- /*
- * In the Synopsys DWC_usb31 1.90a programming guide section
- * 4.1.9, it specifies that for a reconnect after a
- * device-initiated disconnect requires a core soft reset
- * (DCTL.CSftRst) before enabling the run/stop bit.
- */
- dwc3_core_soft_reset(dwc);
-
- dwc3_event_buffers_setup(dwc);
- __dwc3_gadget_start(dwc);
- ret = dwc3_gadget_run_stop(dwc, true);
- }
+ else
+ ret = dwc3_gadget_soft_connect(dwc);
pm_runtime_put(dwc->dev);
@@ -4655,42 +4659,39 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
if (!dwc->gadget_driver)
return 0;
- dwc3_gadget_run_stop(dwc, false);
+ ret = dwc3_gadget_soft_disconnect(dwc);
+ if (ret)
+ goto err;
spin_lock_irqsave(&dwc->lock, flags);
dwc3_disconnect_gadget(dwc);
- __dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
+
+err:
+ /*
+ * Attempt to reset the controller's state. Likely no
+ * communication can be established until the host
+ * performs a port reset.
+ */
+ if (dwc->softconnect)
+ dwc3_gadget_soft_connect(dwc);
+
+ return ret;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver || !dwc->softconnect)
return 0;
- ret = __dwc3_gadget_start(dwc);
- if (ret < 0)
- goto err0;
-
- ret = dwc3_gadget_run_stop(dwc, true);
- if (ret < 0)
- goto err1;
-
- return 0;
-
-err1:
- __dwc3_gadget_stop(dwc);
-
-err0:
- return ret;
+ return dwc3_gadget_soft_connect(dwc);
}
void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x c8540870af4ce6ddeb27a7bb5498b75fb29b643c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052250-sassy-sagging-9cf0@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
c8540870af4c ("usb: dwc3: gadget: Improve dwc3_gadget_suspend() and dwc3_gadget_resume()")
bdb19d01026a ("USB: dwc3: gadget: drop dead hibernation code")
af870d93c706 ("usb: dwc3: Fix typos in gadget.c")
5265397f9442 ("usb: dwc3: Remove DWC3 locking during gadget suspend/resume")
9711c67de748 ("usb: dwc3: gadget: Synchronize IRQ between soft connect/disconnect")
8f8034f493b5 ("usb: dwc3: gadget: Don't modify GEVNTCOUNT in pullup()")
861c010a2ee1 ("usb: dwc3: gadget: Refactor pullup()")
0066472de157 ("usb: dwc3: Issue core soft reset before enabling run/stop")
8217f07a5023 ("usb: dwc3: gadget: Avoid starting DWC3 gadget during UDC unbind")
8212937305f8 ("usb: dwc3: gadget: Disable gadget IRQ during pullup disable")
f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
a66a7d48f34a ("Merge 5.11-rc3 into usb-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8540870af4ce6ddeb27a7bb5498b75fb29b643c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq(a)kernel.org>
Date: Wed, 3 May 2023 14:00:48 +0300
Subject: [PATCH] usb: dwc3: gadget: Improve dwc3_gadget_suspend() and
dwc3_gadget_resume()
Prevent -ETIMEDOUT error on .suspend().
e.g. If gadget driver is loaded and we are connected to a USB host,
all transfers must be stopped before stopping the controller else
we will not get a clean stop i.e. dwc3_gadget_run_stop() will take
several seconds to complete and will return -ETIMEDOUT.
Handle error cases properly in dwc3_gadget_suspend().
Simplify dwc3_gadget_resume() by using the introduced helper function.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
Cc: stable(a)vger.kernel.org
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Roger Quadros <rogerq(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20230503110048.30617-1-rogerq@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c0ca4d12f95d..2996bcb4d53d 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2699,6 +2699,21 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
return ret;
}
+static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
+{
+ /*
+ * In the Synopsys DWC_usb31 1.90a programming guide section
+ * 4.1.9, it specifies that for a reconnect after a
+ * device-initiated disconnect requires a core soft reset
+ * (DCTL.CSftRst) before enabling the run/stop bit.
+ */
+ dwc3_core_soft_reset(dwc);
+
+ dwc3_event_buffers_setup(dwc);
+ __dwc3_gadget_start(dwc);
+ return dwc3_gadget_run_stop(dwc, true);
+}
+
static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -2737,21 +2752,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
synchronize_irq(dwc->irq_gadget);
- if (!is_on) {
+ if (!is_on)
ret = dwc3_gadget_soft_disconnect(dwc);
- } else {
- /*
- * In the Synopsys DWC_usb31 1.90a programming guide section
- * 4.1.9, it specifies that for a reconnect after a
- * device-initiated disconnect requires a core soft reset
- * (DCTL.CSftRst) before enabling the run/stop bit.
- */
- dwc3_core_soft_reset(dwc);
-
- dwc3_event_buffers_setup(dwc);
- __dwc3_gadget_start(dwc);
- ret = dwc3_gadget_run_stop(dwc, true);
- }
+ else
+ ret = dwc3_gadget_soft_connect(dwc);
pm_runtime_put(dwc->dev);
@@ -4655,42 +4659,39 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
if (!dwc->gadget_driver)
return 0;
- dwc3_gadget_run_stop(dwc, false);
+ ret = dwc3_gadget_soft_disconnect(dwc);
+ if (ret)
+ goto err;
spin_lock_irqsave(&dwc->lock, flags);
dwc3_disconnect_gadget(dwc);
- __dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
+
+err:
+ /*
+ * Attempt to reset the controller's state. Likely no
+ * communication can be established until the host
+ * performs a port reset.
+ */
+ if (dwc->softconnect)
+ dwc3_gadget_soft_connect(dwc);
+
+ return ret;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver || !dwc->softconnect)
return 0;
- ret = __dwc3_gadget_start(dwc);
- if (ret < 0)
- goto err0;
-
- ret = dwc3_gadget_run_stop(dwc, true);
- if (ret < 0)
- goto err1;
-
- return 0;
-
-err1:
- __dwc3_gadget_stop(dwc);
-
-err0:
- return ret;
+ return dwc3_gadget_soft_connect(dwc);
}
void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x c8540870af4ce6ddeb27a7bb5498b75fb29b643c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052249-educator-oboe-be38@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
c8540870af4c ("usb: dwc3: gadget: Improve dwc3_gadget_suspend() and dwc3_gadget_resume()")
bdb19d01026a ("USB: dwc3: gadget: drop dead hibernation code")
af870d93c706 ("usb: dwc3: Fix typos in gadget.c")
5265397f9442 ("usb: dwc3: Remove DWC3 locking during gadget suspend/resume")
9711c67de748 ("usb: dwc3: gadget: Synchronize IRQ between soft connect/disconnect")
8f8034f493b5 ("usb: dwc3: gadget: Don't modify GEVNTCOUNT in pullup()")
861c010a2ee1 ("usb: dwc3: gadget: Refactor pullup()")
0066472de157 ("usb: dwc3: Issue core soft reset before enabling run/stop")
8217f07a5023 ("usb: dwc3: gadget: Avoid starting DWC3 gadget during UDC unbind")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8540870af4ce6ddeb27a7bb5498b75fb29b643c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq(a)kernel.org>
Date: Wed, 3 May 2023 14:00:48 +0300
Subject: [PATCH] usb: dwc3: gadget: Improve dwc3_gadget_suspend() and
dwc3_gadget_resume()
Prevent -ETIMEDOUT error on .suspend().
e.g. If gadget driver is loaded and we are connected to a USB host,
all transfers must be stopped before stopping the controller else
we will not get a clean stop i.e. dwc3_gadget_run_stop() will take
several seconds to complete and will return -ETIMEDOUT.
Handle error cases properly in dwc3_gadget_suspend().
Simplify dwc3_gadget_resume() by using the introduced helper function.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
Cc: stable(a)vger.kernel.org
Suggested-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Roger Quadros <rogerq(a)kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20230503110048.30617-1-rogerq@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c0ca4d12f95d..2996bcb4d53d 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2699,6 +2699,21 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
return ret;
}
+static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
+{
+ /*
+ * In the Synopsys DWC_usb31 1.90a programming guide section
+ * 4.1.9, it specifies that for a reconnect after a
+ * device-initiated disconnect requires a core soft reset
+ * (DCTL.CSftRst) before enabling the run/stop bit.
+ */
+ dwc3_core_soft_reset(dwc);
+
+ dwc3_event_buffers_setup(dwc);
+ __dwc3_gadget_start(dwc);
+ return dwc3_gadget_run_stop(dwc, true);
+}
+
static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -2737,21 +2752,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
synchronize_irq(dwc->irq_gadget);
- if (!is_on) {
+ if (!is_on)
ret = dwc3_gadget_soft_disconnect(dwc);
- } else {
- /*
- * In the Synopsys DWC_usb31 1.90a programming guide section
- * 4.1.9, it specifies that for a reconnect after a
- * device-initiated disconnect requires a core soft reset
- * (DCTL.CSftRst) before enabling the run/stop bit.
- */
- dwc3_core_soft_reset(dwc);
-
- dwc3_event_buffers_setup(dwc);
- __dwc3_gadget_start(dwc);
- ret = dwc3_gadget_run_stop(dwc, true);
- }
+ else
+ ret = dwc3_gadget_soft_connect(dwc);
pm_runtime_put(dwc->dev);
@@ -4655,42 +4659,39 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
if (!dwc->gadget_driver)
return 0;
- dwc3_gadget_run_stop(dwc, false);
+ ret = dwc3_gadget_soft_disconnect(dwc);
+ if (ret)
+ goto err;
spin_lock_irqsave(&dwc->lock, flags);
dwc3_disconnect_gadget(dwc);
- __dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
+
+err:
+ /*
+ * Attempt to reset the controller's state. Likely no
+ * communication can be established until the host
+ * performs a port reset.
+ */
+ if (dwc->softconnect)
+ dwc3_gadget_soft_connect(dwc);
+
+ return ret;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver || !dwc->softconnect)
return 0;
- ret = __dwc3_gadget_start(dwc);
- if (ret < 0)
- goto err0;
-
- ret = dwc3_gadget_run_stop(dwc, true);
- if (ret < 0)
- goto err1;
-
- return 0;
-
-err1:
- __dwc3_gadget_stop(dwc);
-
-err0:
- return ret;
+ return dwc3_gadget_soft_connect(dwc);
}
void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
On Mon, May 22 2023, SeongJae Park wrote:
> Hi Pratyush,
>
> On Mon, 22 May 2023 17:30:20 +0200 Pratyush Yadav <ptyadav(a)amazon.de> wrote:
>
>> Commit 50749f2dd685 ("tcp/udp: Fix memleaks of sk and zerocopy skbs with
>> TX timestamp.") added a call to skb_orphan_frags_rx() to fix leaks with
>> zerocopy skbs. But it ended up adding a leak of its own. When
>> skb_orphan_frags_rx() fails, the function just returns, leaking the skb
>> it just cloned. Free it before returning.
>>
>> This bug was discovered and resolved using Coverity Static Analysis
>> Security Testing (SAST) by Synopsys, Inc.
>>
>> Fixes: 50749f2dd685 ("tcp/udp: Fix memleaks of sk and zerocopy skbs with TX timestamp.")
>
> Seems the commit has merged in several stable kernels. Is the bug also
> affecting those? If so, would it be better to Cc stable(a)vger.kernel.org?
>
It affects v5.4.243 at least, since that is where I first saw this. But
I would expect it to affect other stable kernels it has been backported
to as well. I thought using the Fixes tag pointing to the bad upstream
commit would be enough for the stable maintainers' tooling/bots to pick
this patch up.
In either case, +Cc stable. Link to the patch this thread is talking
about [0].
[0] https://lore.kernel.org/netdev/20230522153020.32422-1-ptyadav@amazon.de/T/#u
>
>
> Thanks,
> SJ
>
>> Signed-off-by: Pratyush Yadav <ptyadav(a)amazon.de>
>> ---
>>
>> I do not know this code very well, this was caught by our static
>> analysis tool. I did not try specifically reproducing the leak but I did
>> do a boot test by adding this patch on 6.4-rc3 and the kernel boots
>> fine.
>>
>> net/core/skbuff.c | 4 +++-
>> 1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index 515ec5cdc79c..cea28d30abb5 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -5224,8 +5224,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
>> } else {
>> skb = skb_clone(orig_skb, GFP_ATOMIC);
>>
>> - if (skb_orphan_frags_rx(skb, GFP_ATOMIC))
>> + if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
>> + kfree_skb(skb);
>> return;
>> + }
>> }
>> if (!skb)
>> return;
>> --
>> 2.39.2
>>
--
Regards,
Pratyush Yadav
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
[Adding a few pople to the list of recipients that were involved in
developing the culprit; also CCing the regression list, as it should be
in the loop for regressions:
https://docs.kernel.org/admin-guide/reporting-regressions.html]
[TLDR: I'm adding this report to the list of tracked Linux kernel
regressions; the text you find below is based on a few templates
paragraphs you might have encountered already in similar form.
See link in footer if these mails annoy you.]
On 29.03.23 16:31, Kristof Havasi wrote:
>
> I was rebasing the Kernel branch of our SAMA5D35 based board from
> v5.4.189 to v5.4.238.
> I noticed that after the rebase we could _only send, but not receive_
> through our RS485 interface.
>
> I could bisect the problem to 77b97ef4908aa917e7b68667ec6b344cc5dc5034
> in the v5.4.225 release.
FWIW, that's 7176a6a8982d ("dmaengine: at_hdmac: Don't start
transactions at tx_submit level") in mainline.
Kristof Havasi: would be good to know if this is something that happens
with recent mainline as well, because if not it might be something the
stable team needs to handle.
> If I revert this commit, the tx/rx works just
> like before.
> Maybe this use-case wasn't considered when this patch was created?
> I haven't seen a documentation change regarding this in DT bindings,
> but if the config should be something else, please let me know.
> Otherwise this commit breaks the RS485 function of atmel_serial at
> least in the v5.4.y branch.
>
> Best Regards,
> Kristóf Havasi
>
> The relevant device tree nodes:
>
> from sama5d3.dtsi:
>
> usart1: serial@f0020000 {
> compatible = "atmel,at91sam9260-usart";
> reg = <0xf0020000 0x100>;
> interrupts = <13 IRQ_TYPE_LEVEL_HIGH 5>;
> dmas = <&dma0 2 AT91_DMA_CFG_PER_ID(5)>,
> <&dma0 2 (AT91_DMA_CFG_PER_ID(6) | AT91_DMA_CFG_FIFOCFG_ASAP)>;
> dma-names = "tx", "rx";
> pinctrl-names = "default";
> pinctrl-0 = <&pinctrl_usart1>;
> clocks = <&usart1_clk>;
> clock-names = "usart";
> status = "disabled";
> };
>
> pinctrl_usart1: usart1-0 {
> atmel,pins =
> <AT91_PIOB 28 AT91_PERIPH_A AT91_PINCTRL_PULL_UP
> AT91_PIOB 29 AT91_PERIPH_A AT91_PINCTRL_NONE>;
> };
> pinctrl_usart1_rts_cts: usart1_rts_cts-0 {
> atmel,pins =
> <AT91_PIOB 26 AT91_PERIPH_A AT91_PINCTRL_NONE /* PB26 periph A,
> conflicts with GRX7 */
> AT91_PIOB 27 AT91_PERIPH_A AT91_PINCTRL_NONE>; /* PB27 periph A,
> conflicts with G125CKO */
> };
>
> from our dts:
>
> &usart1 {
> pinctrl-0 = <&pinctrl_usart1 &pinctrl_usart1_rts_cts>;
> atmel,use-dma-rx;
> atmel,use-dma-tx;
> rs485-rx-during-tx;
> linux,rs485-enabled-at-boot-time;
> status = "okay";
> };
>
> HW:
> The SAMA5D3's PB27 is connected to the |RE+DE of the RS485 transceiver
> SP3458EN-L
Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:
#regzbot ^introduced 77b97ef4908aa
#regzbot title dmaengine: at_hdmac: receiving data through the RS485
interface broke
#regzbot ignore-activity
This isn't a regression? This issue or a fix for it are already
discussed somewhere else? It was fixed already? You want to clarify when
the regression started to happen? Or point out I got the title or
something else totally wrong? Then just reply and tell me -- ideally
while also telling regzbot about it, as explained by the page listed in
the footer of this mail.
Developers: When fixing the issue, remember to add 'Link:' tags pointing
to the report (the parent of this mail). See page linked in footer for
details.
Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.
It's trivial to trigger a use-after-free bug in the ocfs2 quotas code using
fstest generic/452. After mounting a filesystem as read-only, quotas are
suspended and ocfs2_mem_dqinfo is freed through ->ocfs2_local_free_info(). When
unmounting the filesystem, an UAF access to the oinfo will eventually cause a
crash.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Luís Henriques <lhenriques(a)suse.de>
---
fs/ocfs2/super.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0b0e6a132101..988d1c076861 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -952,8 +952,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (!sb_has_quota_loaded(sb, type))
continue;
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ if (!sb_has_quota_suspended(sb, type)) {
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ }
inode = igrab(sb->s_dquot.files[type]);
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
It's trivial to trigger a use-after-free bug in the ocfs2 quotas code using
fstest generic/452. After mounting a filesystem as read-only, quotas are
suspended and ocfs2_mem_dqinfo is freed through ->ocfs2_local_free_info(). When
unmounting the filesystem, an UAF access to the oinfo will eventually cause a
crash.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Luís Henriques <lhenriques(a)suse.de>
---
fs/ocfs2/super.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0b0e6a132101..988d1c076861 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -952,8 +952,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (!sb_has_quota_loaded(sb, type))
continue;
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ if (!sb_has_quota_suspended(sb, type)) {
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ }
inode = igrab(sb->s_dquot.files[type]);
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
It's trivial to trigger a use-after-free bug in the ocfs2 quotas code using
fstest generic/452. After mounting a filesystem as read-only, quotas are
suspended and ocfs2_mem_dqinfo is freed through ->ocfs2_local_free_info(). When
unmounting the filesystem, an UAF access to the oinfo will eventually cause a
crash.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Luís Henriques <lhenriques(a)suse.de>
---
fs/ocfs2/super.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0b0e6a132101..988d1c076861 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -952,8 +952,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (!sb_has_quota_loaded(sb, type))
continue;
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ if (!sb_has_quota_suspended(sb, type)) {
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ }
inode = igrab(sb->s_dquot.files[type]);
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
From: Liu Peibao <liupeibao(a)loongson.cn>
In DeviceTree path, when ht_vec_base is not zero, the hwirq of PCH PIC will
be assigned incorrectly. Because when pch_pic_domain_translate() adds the
ht_vec_base to hwirq, the hwirq dose not subtract the ht_vec_base when
calling irq_domain_set_info().
The ht_vec_base is designed for the parent irq chip/domain of the PCH PIC.
It seems not proper to deal this in callbacks of the PCH PIC domain and
let's put this back like the initial commit ef8c01eb64ca ("irqchip: Add
Loongson PCH PIC controller").
Fixes: bcdd75c596c8 ("irqchip/loongson-pch-pic: Add ACPI init support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Liu Peibao <liupeibao(a)loongson.cn>
Signed-off-by: Jianmin Lv <lvjianmin(a)loongson.cn>
---
drivers/irqchip/irq-loongson-pch-pic.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index 921c5c0190d1..93a71f66efeb 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -164,7 +164,7 @@ static int pch_pic_domain_translate(struct irq_domain *d,
if (fwspec->param_count < 2)
return -EINVAL;
- *hwirq = fwspec->param[0] + priv->ht_vec_base;
+ *hwirq = fwspec->param[0];
*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
} else {
if (fwspec->param_count < 1)
@@ -196,7 +196,7 @@ static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq,
parent_fwspec.fwnode = domain->parent->fwnode;
parent_fwspec.param_count = 1;
- parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[0] = hwirq + priv->ht_vec_base;
err = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
if (err)
--
2.31.1
In a dual-bridge system based ACPI, the IRQ on PCH PIC of
each bridge sent to CPU is always a zero-based number, which
means that the IRQ on PCH PIC of each bridge is mapped into
vector range from 0 to 63 of upstream irqchip(e.g. EIOINTC).
EIOINTC N: [0 ... 63 | 64 ... 255]
-------- ----------
^ ^
| |
PCH PIC N |
PCH MSI N
For example, the IRQ vector number of sata controller on
PCH PIC of each bridge is 16, which is sent to upstream
irqchip of EIOINTC when an interrupt occurs, which will set
bit 16 of EIOINTC. Since hwirq of 16 on EIOINTC has been
mapped to a irq_desc for sata controller during hierarchy
irq allocation, the related mapped IRQ will be found through
irq_resolve_mapping() in the IRQ domain of EIOINTC.
So, the IRQ number set in HT vector register should be fixed
to be a zero-based number.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jianmin Lv <lvjianmin(a)loongson.cn>
Signed-off-by: liuyun <liuyun(a)loongson.cn>
---
drivers/irqchip/irq-loongson-pch-pic.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index e5fe4d50be05..921c5c0190d1 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -401,14 +401,12 @@ static int __init acpi_cascade_irqdomain_init(void)
int __init pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic)
{
- int ret, vec_base;
+ int ret;
struct fwnode_handle *domain_handle;
if (find_pch_pic(acpi_pchpic->gsi_base) >= 0)
return 0;
- vec_base = acpi_pchpic->gsi_base - GSI_MIN_PCH_IRQ;
-
domain_handle = irq_domain_alloc_fwnode(&acpi_pchpic->address);
if (!domain_handle) {
pr_err("Unable to allocate domain handle\n");
@@ -416,7 +414,7 @@ int __init pch_pic_acpi_init(struct irq_domain *parent,
}
ret = pch_pic_init(acpi_pchpic->address, acpi_pchpic->size,
- vec_base, parent, domain_handle, acpi_pchpic->gsi_base);
+ 0, parent, domain_handle, acpi_pchpic->gsi_base);
if (ret < 0) {
irq_domain_free_fwnode(domain_handle);
--
2.31.1
[Why]
The sequence for collecting down_reply from source perspective should
be:
Request_n->repeat (get partial reply of Request_n->clear message ready
flag to ack DPRX that the message is received) till all partial
replies for Request_n are received->new Request_n+1.
Now there is chance that drm_dp_mst_hpd_irq() will fire new down
request in the tx queue when the down reply is incomplete. Source is
restricted to generate interveleaved message transactions so we should
avoid it.
Also, while assembling partial reply packets, reading out DPCD DOWN_REP
Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag should be
wrapped up as a complete operation for reading out a reply packet.
Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might
be risky. e.g. If the reply of the new request has overwritten the
DPRX DOWN_REP Sideband MSG buffer before source writing one to clear
DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply
for the new request. Should handle the up request in the same way.
[How]
Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ
event, driver calls drm_dp_mst_hpd_irq_step2() and might trigger
drm_dp_mst_kick_tx() only when there is no on going message transaction.
Changes since v1:
* Reworked on review comments received
-> Adjust the fix to let driver explicitly kick off new down request
when mst irq event is handled and acked
-> Adjust the commit message
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
Cc: stable(a)vger.kernel.org
---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++---
drivers/gpu/drm/display/drm_dp_mst_topology.c | 35 ++++++++++++++++---
drivers/gpu/drm/i915/display/intel_dp.c | 5 ++-
drivers/gpu/drm/nouveau/dispnv50/disp.c | 5 ++-
include/drm/display/drm_dp_mst_helper.h | 4 +--
5 files changed, 45 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1ad67c2a697e..48bdcb2ee9b1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3259,10 +3259,9 @@ static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
/* handle HPD short pulse irq */
if (aconnector->mst_mgr.mst_state)
- drm_dp_mst_hpd_irq(
- &aconnector->mst_mgr,
- esi,
- &new_irq_handled);
+ drm_dp_mst_hpd_irq_step1(&aconnector->mst_mgr,
+ esi,
+ &new_irq_handled);
if (new_irq_handled) {
/* ACK at DPCD to notify down stream */
@@ -3281,6 +3280,7 @@ static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
break;
}
+ drm_dp_mst_hpd_irq_step2(&aconnector->mst_mgr);
/* check if there is new irq to be handled */
dret = drm_dp_dpcd_read(
&aconnector->dm_dp_aux.aux,
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 70df29fe92db..2e0a38a6509c 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -4045,7 +4045,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
}
/**
- * drm_dp_mst_hpd_irq() - MST hotplug IRQ notify
+ * drm_dp_mst_hpd_irq_step1() - MST hotplug IRQ notify
* @mgr: manager to notify irq for.
* @esi: 4 bytes from SINK_COUNT_ESI
* @handled: whether the hpd interrupt was consumed or not
@@ -4055,7 +4055,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
* topology manager will process the sideband messages received as a result
* of this.
*/
-int drm_dp_mst_hpd_irq(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handled)
+int drm_dp_mst_hpd_irq_step1(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handled)
{
int ret = 0;
int sc;
@@ -4077,11 +4077,38 @@ int drm_dp_mst_hpd_irq(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handl
*handled = true;
}
- drm_dp_mst_kick_tx(mgr);
return ret;
}
-EXPORT_SYMBOL(drm_dp_mst_hpd_irq);
+EXPORT_SYMBOL(drm_dp_mst_hpd_irq_step1);
+
+/**
+ * drm_dp_mst_hpd_irq_step2() - MST hotplug IRQ 2nd part handling
+ * @mgr: manager to notify irq for.
+ *
+ * This should be called from the driver when mst irq event is handled
+ * and acked. Note that new down request should only be sent when
+ * previous message transaction is done. Source is not supposed to generate
+ * interleaved message transactions.
+ */
+void drm_dp_mst_hpd_irq_step2(struct drm_dp_mst_topology_mgr *mgr)
+{
+ struct drm_dp_sideband_msg_tx *txmsg;
+ bool skip = false;
+ mutex_lock(&mgr->qlock);
+ txmsg = list_first_entry_or_null(&mgr->tx_msg_downq,
+ struct drm_dp_sideband_msg_tx, next);
+ /* If last transaction is not completed yet*/
+ if (!txmsg ||
+ txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND ||
+ txmsg->state == DRM_DP_SIDEBAND_TX_SENT)
+ skip = true;
+ mutex_unlock(&mgr->qlock);
+
+ if (!skip)
+ drm_dp_mst_kick_tx(mgr);
+}
+EXPORT_SYMBOL(drm_dp_mst_hpd_irq_step2);
/**
* drm_dp_mst_detect_port() - get connection status for an MST port
* @connector: DRM connector for this port
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 75070eb07d4b..9a9a5aec9534 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -3803,7 +3803,7 @@ intel_dp_mst_hpd_irq(struct intel_dp *intel_dp, u8 *esi, u8 *ack)
{
bool handled = false;
- drm_dp_mst_hpd_irq(&intel_dp->mst_mgr, esi, &handled);
+ drm_dp_mst_hpd_irq_step1(&intel_dp->mst_mgr, esi, &handled);
if (handled)
ack[1] |= esi[1] & (DP_DOWN_REP_MSG_RDY | DP_UP_REQ_MSG_RDY);
@@ -3880,6 +3880,9 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp)
if (!intel_dp_ack_sink_irq_esi(intel_dp, ack))
drm_dbg_kms(&i915->drm, "Failed to ack ESI\n");
+
+ if (ack[1] & (DP_DOWN_REP_MSG_RDY | DP_UP_REQ_MSG_RDY))
+ drm_dp_mst_hpd_irq_step2(&intel_dp->mst_mgr);
}
return link_ok;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index ed9d374147b8..00c36fcc8afd 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -1332,12 +1332,15 @@ nv50_mstm_service(struct nouveau_drm *drm,
break;
}
- drm_dp_mst_hpd_irq(&mstm->mgr, esi, &handled);
+ drm_dp_mst_hpd_irq_step1(&mstm->mgr, esi, &handled);
if (!handled)
break;
rc = drm_dp_dpcd_write(aux, DP_SINK_COUNT_ESI + 1, &esi[1],
3);
+
+ drm_dp_mst_hpd_irq_step2(&mstm->mgr);
+
if (rc != 3) {
ret = false;
break;
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index 32c764fb9cb5..6c08ba765d5a 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -815,8 +815,8 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr);
bool drm_dp_read_mst_cap(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE]);
int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state);
-int drm_dp_mst_hpd_irq(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handled);
-
+int drm_dp_mst_hpd_irq_step1(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handled);
+void drm_dp_mst_hpd_irq_step2(struct drm_dp_mst_topology_mgr *mgr);
int
drm_dp_mst_detect_port(struct drm_connector *connector,
--
2.37.3
Add a timeout for busydetect IRQs using a delayed work.
It might happen (and does happen) on Ux500 that the first
busy detect IRQ appears and not the second one. This will
make the host hang indefinitely waiting for the second
IRQ to appear.
Fire a delayed work after 10ms and re-engage the command
IRQ so the transaction finishes: we are certainly done
at this point, or we will catch an error in the status
register.
This makes the eMMC work again on Skomer and Codina phones.
Notice that the hardware time-out cannot be used, because
the state machine in the MMCI will not see that something
is wrong.
Cc: stable(a)vger.kernel.org
Cc: phone-devel(a)vger.kernel.org
Cc: Stefan Hansson <newbyte(a)disroot.org>
Signed-off-by: Linus Walleij <linus.walleij(a)linaro.org>
---
ChangeLog v2->v3:
- Took out the most urgent fix from the pile of changes
and send separately, without the rest of the refactorings
that were used for debugging the issue. After this the
Skomer and Codina with problematic eMMC boots fine.
- Now just a single patch!
- This version should be easier to backport as well.
---
drivers/mmc/host/mmci.c | 22 ++++++++++++++++++++++
drivers/mmc/host/mmci.h | 1 +
2 files changed, 23 insertions(+)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index f2b2e8b0574e..f3349fb99590 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -37,6 +37,7 @@
#include <linux/pinctrl/consumer.h>
#include <linux/reset.h>
#include <linux/gpio/consumer.h>
+#include <linux/workqueue.h>
#include <asm/div64.h>
#include <asm/io.h>
@@ -695,6 +696,8 @@ static bool ux500_busy_complete(struct mmci_host *host, u32 status, u32 err_msk)
if (host->busy_status &&
(status & host->variant->busy_detect_flag)) {
writel(host->variant->busy_detect_mask, base + MMCICLEAR);
+ schedule_delayed_work(&host->busy_timeout_work,
+ msecs_to_jiffies(10));
return false;
}
@@ -1429,6 +1432,22 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
}
}
+/*
+ * This busy timeout worker is used to "kick" the command IRQ if a
+ * busy detect IRQ fails to appear in reasonable time. Only used on
+ * variants with busy detection IRQ delivery.
+ */
+static void busy_timeout_work(struct work_struct *work)
+{
+ struct mmci_host *host =
+ container_of(work, struct mmci_host, busy_timeout_work.work);
+ u32 status;
+
+ dev_dbg(mmc_dev(host->mmc), "timeout waiting for busy IRQ\n");
+ status = readl(host->base + MMCISTATUS);
+ mmci_cmd_irq(host, host->cmd, status);
+}
+
static int mmci_get_rx_fifocnt(struct mmci_host *host, u32 status, int remain)
{
return remain - (readl(host->base + MMCIFIFOCNT) << 2);
@@ -2242,6 +2261,9 @@ static int mmci_probe(struct amba_device *dev,
goto clk_disable;
}
+ if (host->variant->busy_detect && host->ops->busy_complete)
+ INIT_DELAYED_WORK(&host->busy_timeout_work, busy_timeout_work);
+
writel(MCI_IRQENABLE | variant->start_err, host->base + MMCIMASK0);
amba_set_drvdata(dev, mmc);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index e1a9b96a3396..de2c1436f4cd 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -437,6 +437,7 @@ struct mmci_host {
void *dma_priv;
s32 next_cookie;
+ struct delayed_work busy_timeout_work;
};
#define dma_inprogress(host) ((host)->dma_in_progress)
--
2.40.1
Hello,
On Fri, May 19, 2023 at 09:49:37PM -0400, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> iommu/arm-smmu: Drop if with an always false condition
>
> to the 6.3-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> iommu-arm-smmu-drop-if-with-an-always-false-conditio.patch
> and it can be found in the queue-6.3 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
I'd not add that patch to stable. It's just about dropping an
if (false) {
something();
}
The compiler probably isn't able to see that the condition is always
false, so the only benefit is that the patch makes the compiled code a
bit smaller.
Best regards
Uwe
--
Pengutronix e.K. | Uwe Kleine-König |
Industrial Linux Solutions | https://www.pengutronix.de/ |
From: Haibo Li <haibo.li(a)mediatek.com>
[ Upstream commit fa3eeb638de0c1a9d2d860e5b48259facdd65176 ]
When unwind instruction is 0xb2,the subsequent instructions
are uleb128 bytes.
For now,it uses only the first uleb128 byte in code.
For vsp increments of 0x204~0x400,use one uleb128 byte like below:
0xc06a00e4 <unwind_test_work>: 0x80b27fac
Compact model index: 0
0xb2 0x7f vsp = vsp + 1024
0xac pop {r4, r5, r6, r7, r8, r14}
For vsp increments larger than 0x400,use two uleb128 bytes like below:
0xc06a00e4 <unwind_test_work>: @0xc0cc9e0c
Compact model index: 1
0xb2 0x81 0x01 vsp = vsp + 1032
0xac pop {r4, r5, r6, r7, r8, r14}
The unwind works well since the decoded uleb128 byte is also 0x81.
For vsp increments larger than 0x600,use two uleb128 bytes like below:
0xc06a00e4 <unwind_test_work>: @0xc0cc9e0c
Compact model index: 1
0xb2 0x81 0x02 vsp = vsp + 1544
0xac pop {r4, r5, r6, r7, r8, r14}
In this case,the decoded uleb128 result is 0x101(vsp=0x204+(0x101<<2)).
While the uleb128 used in code is 0x81(vsp=0x204+(0x81<<2)).
The unwind aborts at this frame since it gets incorrect vsp.
To fix this,add uleb128 decode to cover all the above case.
Signed-off-by: Haibo Li <haibo.li(a)mediatek.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Reviewed-by: Alexandre Mergnat <amergnat(a)baylibre.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/kernel/unwind.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 314cfb232a635..f2bb090373c67 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -313,6 +313,29 @@ static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
return URC_OK;
}
+static unsigned long unwind_decode_uleb128(struct unwind_ctrl_block *ctrl)
+{
+ unsigned long bytes = 0;
+ unsigned long insn;
+ unsigned long result = 0;
+
+ /*
+ * unwind_get_byte() will advance `ctrl` one instruction at a time, so
+ * loop until we get an instruction byte where bit 7 is not set.
+ *
+ * Note: This decodes a maximum of 4 bytes to output 28 bits data where
+ * max is 0xfffffff: that will cover a vsp increment of 1073742336, hence
+ * it is sufficient for unwinding the stack.
+ */
+ do {
+ insn = unwind_get_byte(ctrl);
+ result |= (insn & 0x7f) << (bytes * 7);
+ bytes++;
+ } while (!!(insn & 0x80) && (bytes != sizeof(result)));
+
+ return result;
+}
+
/*
* Execute the current unwind instruction.
*/
@@ -366,7 +389,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
if (ret)
goto error;
} else if (insn == 0xb2) {
- unsigned long uleb128 = unwind_get_byte(ctrl);
+ unsigned long uleb128 = unwind_decode_uleb128(ctrl);
ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
} else {
--
2.39.2
From: Haibo Li <haibo.li(a)mediatek.com>
[ Upstream commit fa3eeb638de0c1a9d2d860e5b48259facdd65176 ]
When unwind instruction is 0xb2,the subsequent instructions
are uleb128 bytes.
For now,it uses only the first uleb128 byte in code.
For vsp increments of 0x204~0x400,use one uleb128 byte like below:
0xc06a00e4 <unwind_test_work>: 0x80b27fac
Compact model index: 0
0xb2 0x7f vsp = vsp + 1024
0xac pop {r4, r5, r6, r7, r8, r14}
For vsp increments larger than 0x400,use two uleb128 bytes like below:
0xc06a00e4 <unwind_test_work>: @0xc0cc9e0c
Compact model index: 1
0xb2 0x81 0x01 vsp = vsp + 1032
0xac pop {r4, r5, r6, r7, r8, r14}
The unwind works well since the decoded uleb128 byte is also 0x81.
For vsp increments larger than 0x600,use two uleb128 bytes like below:
0xc06a00e4 <unwind_test_work>: @0xc0cc9e0c
Compact model index: 1
0xb2 0x81 0x02 vsp = vsp + 1544
0xac pop {r4, r5, r6, r7, r8, r14}
In this case,the decoded uleb128 result is 0x101(vsp=0x204+(0x101<<2)).
While the uleb128 used in code is 0x81(vsp=0x204+(0x81<<2)).
The unwind aborts at this frame since it gets incorrect vsp.
To fix this,add uleb128 decode to cover all the above case.
Signed-off-by: Haibo Li <haibo.li(a)mediatek.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Reviewed-by: Alexandre Mergnat <amergnat(a)baylibre.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/kernel/unwind.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 314cfb232a635..f2bb090373c67 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -313,6 +313,29 @@ static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
return URC_OK;
}
+static unsigned long unwind_decode_uleb128(struct unwind_ctrl_block *ctrl)
+{
+ unsigned long bytes = 0;
+ unsigned long insn;
+ unsigned long result = 0;
+
+ /*
+ * unwind_get_byte() will advance `ctrl` one instruction at a time, so
+ * loop until we get an instruction byte where bit 7 is not set.
+ *
+ * Note: This decodes a maximum of 4 bytes to output 28 bits data where
+ * max is 0xfffffff: that will cover a vsp increment of 1073742336, hence
+ * it is sufficient for unwinding the stack.
+ */
+ do {
+ insn = unwind_get_byte(ctrl);
+ result |= (insn & 0x7f) << (bytes * 7);
+ bytes++;
+ } while (!!(insn & 0x80) && (bytes != sizeof(result)));
+
+ return result;
+}
+
/*
* Execute the current unwind instruction.
*/
@@ -366,7 +389,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
if (ret)
goto error;
} else if (insn == 0xb2) {
- unsigned long uleb128 = unwind_get_byte(ctrl);
+ unsigned long uleb128 = unwind_decode_uleb128(ctrl);
ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
} else {
--
2.39.2
From: Haibo Li <haibo.li(a)mediatek.com>
[ Upstream commit fa3eeb638de0c1a9d2d860e5b48259facdd65176 ]
When unwind instruction is 0xb2,the subsequent instructions
are uleb128 bytes.
For now,it uses only the first uleb128 byte in code.
For vsp increments of 0x204~0x400,use one uleb128 byte like below:
0xc06a00e4 <unwind_test_work>: 0x80b27fac
Compact model index: 0
0xb2 0x7f vsp = vsp + 1024
0xac pop {r4, r5, r6, r7, r8, r14}
For vsp increments larger than 0x400,use two uleb128 bytes like below:
0xc06a00e4 <unwind_test_work>: @0xc0cc9e0c
Compact model index: 1
0xb2 0x81 0x01 vsp = vsp + 1032
0xac pop {r4, r5, r6, r7, r8, r14}
The unwind works well since the decoded uleb128 byte is also 0x81.
For vsp increments larger than 0x600,use two uleb128 bytes like below:
0xc06a00e4 <unwind_test_work>: @0xc0cc9e0c
Compact model index: 1
0xb2 0x81 0x02 vsp = vsp + 1544
0xac pop {r4, r5, r6, r7, r8, r14}
In this case,the decoded uleb128 result is 0x101(vsp=0x204+(0x101<<2)).
While the uleb128 used in code is 0x81(vsp=0x204+(0x81<<2)).
The unwind aborts at this frame since it gets incorrect vsp.
To fix this,add uleb128 decode to cover all the above case.
Signed-off-by: Haibo Li <haibo.li(a)mediatek.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Reviewed-by: Alexandre Mergnat <amergnat(a)baylibre.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/kernel/unwind.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 4574e6aea0a52..f321c2aa94e1c 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -300,6 +300,29 @@ static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
return URC_OK;
}
+static unsigned long unwind_decode_uleb128(struct unwind_ctrl_block *ctrl)
+{
+ unsigned long bytes = 0;
+ unsigned long insn;
+ unsigned long result = 0;
+
+ /*
+ * unwind_get_byte() will advance `ctrl` one instruction at a time, so
+ * loop until we get an instruction byte where bit 7 is not set.
+ *
+ * Note: This decodes a maximum of 4 bytes to output 28 bits data where
+ * max is 0xfffffff: that will cover a vsp increment of 1073742336, hence
+ * it is sufficient for unwinding the stack.
+ */
+ do {
+ insn = unwind_get_byte(ctrl);
+ result |= (insn & 0x7f) << (bytes * 7);
+ bytes++;
+ } while (!!(insn & 0x80) && (bytes != sizeof(result)));
+
+ return result;
+}
+
/*
* Execute the current unwind instruction.
*/
@@ -353,7 +376,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
if (ret)
goto error;
} else if (insn == 0xb2) {
- unsigned long uleb128 = unwind_get_byte(ctrl);
+ unsigned long uleb128 = unwind_decode_uleb128(ctrl);
ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
} else {
--
2.39.2
From: Filipe Manana <fdmanana(a)suse.com>
[ Upstream commit 9ae5afd02a03d4e22a17a9609b19400b77c36273 ]
If the sibling keys check fails before we move keys from one sibling
leaf to another, we are not aborting the transaction - we leave that to
some higher level caller of btrfs_search_slot() (or anything else that
uses it to insert items into a b+tree).
This means that the transaction abort will provide a stack trace that
omits the b+tree modification call chain. So change this to immediately
abort the transaction and therefore get a more useful stack trace that
shows us the call chain in the bt+tree modification code.
It's also important to immediately abort the transaction just in case
some higher level caller is not doing it, as this indicates a very
serious corruption and we should stop the possibility of doing further
damage.
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/btrfs/ctree.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3e55245e54e7c..41a7ace9998e4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -3872,6 +3872,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
@@ -4116,6 +4117,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
return __push_leaf_left(path, min_data_size,
--
2.39.2
From: Jammy Huang <jammy_huang(a)aspeedtech.com>
[ Upstream commit 4327a6137ed43a091d900b1ac833345d60f32228 ]
ARM architecture only has 'memory', so all devices are accessed by
MMIO if possible.
Signed-off-by: Jammy Huang <jammy_huang(a)aspeedtech.com>
Reviewed-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230421003354.27767-1-jammy_…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/ast/ast_main.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 79a3618679554..754a08c92d3d1 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -423,11 +423,12 @@ struct ast_private *ast_device_create(const struct drm_driver *drv,
return ERR_PTR(-EIO);
/*
- * If we don't have IO space at all, use MMIO now and
- * assume the chip has MMIO enabled by default (rev 0x20
- * and higher).
+ * After AST2500, MMIO is enabled by default, and it should be adopted
+ * to be compatible with Arm.
*/
- if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
+ if (pdev->revision >= 0x40) {
+ ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+ } else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
drm_info(dev, "platform has no IO space, trying MMIO\n");
ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
}
--
2.39.2
From: Jammy Huang <jammy_huang(a)aspeedtech.com>
[ Upstream commit 4327a6137ed43a091d900b1ac833345d60f32228 ]
ARM architecture only has 'memory', so all devices are accessed by
MMIO if possible.
Signed-off-by: Jammy Huang <jammy_huang(a)aspeedtech.com>
Reviewed-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230421003354.27767-1-jammy_…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/ast/ast_main.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 067453266897f..5df527051177a 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -427,11 +427,12 @@ struct ast_private *ast_device_create(const struct drm_driver *drv,
return ERR_PTR(-EIO);
/*
- * If we don't have IO space at all, use MMIO now and
- * assume the chip has MMIO enabled by default (rev 0x20
- * and higher).
+ * After AST2500, MMIO is enabled by default, and it should be adopted
+ * to be compatible with Arm.
*/
- if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
+ if (pdev->revision >= 0x40) {
+ ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+ } else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
drm_info(dev, "platform has no IO space, trying MMIO\n");
ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
}
--
2.39.2
From: Jammy Huang <jammy_huang(a)aspeedtech.com>
[ Upstream commit 4327a6137ed43a091d900b1ac833345d60f32228 ]
ARM architecture only has 'memory', so all devices are accessed by
MMIO if possible.
Signed-off-by: Jammy Huang <jammy_huang(a)aspeedtech.com>
Reviewed-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230421003354.27767-1-jammy_…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/ast/ast_main.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index f83ce77127cb4..a6d0ee4da2b88 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -425,11 +425,12 @@ struct ast_private *ast_device_create(const struct drm_driver *drv,
return ERR_PTR(-EIO);
/*
- * If we don't have IO space at all, use MMIO now and
- * assume the chip has MMIO enabled by default (rev 0x20
- * and higher).
+ * After AST2500, MMIO is enabled by default, and it should be adopted
+ * to be compatible with Arm.
*/
- if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
+ if (pdev->revision >= 0x40) {
+ ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+ } else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
drm_info(dev, "platform has no IO space, trying MMIO\n");
ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
}
--
2.39.2
This is a note to let you know that I've just added the patch titled
binder: fix UAF of alloc->vma in race with munmap()
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From d1d8875c8c13517f6fd1ff8d4d3e1ac366a17e07 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas(a)google.com>
Date: Fri, 19 May 2023 19:59:49 +0000
Subject: binder: fix UAF of alloc->vma in race with munmap()
[ cmllamas: clean forward port from commit 015ac18be7de ("binder: fix
UAF of alloc->vma in race with munmap()") in 5.10 stable. It is needed
in mainline after the revert of commit a43cfc87caaf ("android: binder:
stop saving a pointer to the VMA") as pointed out by Liam. The commit
log and tags have been tweaked to reflect this. ]
In commit 720c24192404 ("ANDROID: binder: change down_write to
down_read") binder assumed the mmap read lock is sufficient to protect
alloc->vma inside binder_update_page_range(). This used to be accurate
until commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in
munmap"), which now downgrades the mmap_lock after detaching the vma
from the rbtree in munmap(). Then it proceeds to teardown and free the
vma with only the read lock held.
This means that accesses to alloc->vma in binder_update_page_range() now
will race with vm_area_free() in munmap() and can cause a UAF as shown
in the following KASAN trace:
==================================================================
BUG: KASAN: use-after-free in vm_insert_page+0x7c/0x1f0
Read of size 8 at addr ffff16204ad00600 by task server/558
CPU: 3 PID: 558 Comm: server Not tainted 5.10.150-00001-gdc8dcf942daa #1
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x0/0x2a0
show_stack+0x18/0x2c
dump_stack+0xf8/0x164
print_address_description.constprop.0+0x9c/0x538
kasan_report+0x120/0x200
__asan_load8+0xa0/0xc4
vm_insert_page+0x7c/0x1f0
binder_update_page_range+0x278/0x50c
binder_alloc_new_buf+0x3f0/0xba0
binder_transaction+0x64c/0x3040
binder_thread_write+0x924/0x2020
binder_ioctl+0x1610/0x2e5c
__arm64_sys_ioctl+0xd4/0x120
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
Allocated by task 559:
kasan_save_stack+0x38/0x6c
__kasan_kmalloc.constprop.0+0xe4/0xf0
kasan_slab_alloc+0x18/0x2c
kmem_cache_alloc+0x1b0/0x2d0
vm_area_alloc+0x28/0x94
mmap_region+0x378/0x920
do_mmap+0x3f0/0x600
vm_mmap_pgoff+0x150/0x17c
ksys_mmap_pgoff+0x284/0x2dc
__arm64_sys_mmap+0x84/0xa4
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
Freed by task 560:
kasan_save_stack+0x38/0x6c
kasan_set_track+0x28/0x40
kasan_set_free_info+0x24/0x4c
__kasan_slab_free+0x100/0x164
kasan_slab_free+0x14/0x20
kmem_cache_free+0xc4/0x34c
vm_area_free+0x1c/0x2c
remove_vma+0x7c/0x94
__do_munmap+0x358/0x710
__vm_munmap+0xbc/0x130
__arm64_sys_munmap+0x4c/0x64
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
[...]
==================================================================
To prevent the race above, revert back to taking the mmap write lock
inside binder_update_page_range(). One might expect an increase of mmap
lock contention. However, binder already serializes these calls via top
level alloc->mutex. Also, there was no performance impact shown when
running the binder benchmark tests.
Fixes: c0fd2101781e ("Revert "android: binder: stop saving a pointer to the VMA"")
Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
Reported-by: Jann Horn <jannh(a)google.com>
Closes: https://lore.kernel.org/all/20230518144052.xkj6vmddccq4v66b@revolver
Cc: <stable(a)vger.kernel.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Acked-by: Todd Kjos <tkjos(a)google.com>
Link: https://lore.kernel.org/r/20230519195950.1775656-1-cmllamas@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder_alloc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index e7c9d466f8e8..662a2a2e2e84 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
mm = alloc->mm;
if (mm) {
- mmap_read_lock(mm);
+ mmap_write_lock(mm);
vma = alloc->vma;
}
@@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
trace_binder_alloc_page_end(alloc, index);
}
if (mm) {
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
mmput(mm);
}
return 0;
@@ -303,7 +303,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
}
err_no_vma:
if (mm) {
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
mmput(mm);
}
return vma ? -ENOMEM : -ESRCH;
--
2.40.1
[ cmllamas: clean forward port from commit 015ac18be7de ("binder: fix
UAF of alloc->vma in race with munmap()") in 5.10 stable. It is needed
in mainline after the revert of commit a43cfc87caaf ("android: binder:
stop saving a pointer to the VMA") as pointed out by Liam. The commit
log and tags have been tweaked to reflect this. ]
In commit 720c24192404 ("ANDROID: binder: change down_write to
down_read") binder assumed the mmap read lock is sufficient to protect
alloc->vma inside binder_update_page_range(). This used to be accurate
until commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in
munmap"), which now downgrades the mmap_lock after detaching the vma
from the rbtree in munmap(). Then it proceeds to teardown and free the
vma with only the read lock held.
This means that accesses to alloc->vma in binder_update_page_range() now
will race with vm_area_free() in munmap() and can cause a UAF as shown
in the following KASAN trace:
==================================================================
BUG: KASAN: use-after-free in vm_insert_page+0x7c/0x1f0
Read of size 8 at addr ffff16204ad00600 by task server/558
CPU: 3 PID: 558 Comm: server Not tainted 5.10.150-00001-gdc8dcf942daa #1
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x0/0x2a0
show_stack+0x18/0x2c
dump_stack+0xf8/0x164
print_address_description.constprop.0+0x9c/0x538
kasan_report+0x120/0x200
__asan_load8+0xa0/0xc4
vm_insert_page+0x7c/0x1f0
binder_update_page_range+0x278/0x50c
binder_alloc_new_buf+0x3f0/0xba0
binder_transaction+0x64c/0x3040
binder_thread_write+0x924/0x2020
binder_ioctl+0x1610/0x2e5c
__arm64_sys_ioctl+0xd4/0x120
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
Allocated by task 559:
kasan_save_stack+0x38/0x6c
__kasan_kmalloc.constprop.0+0xe4/0xf0
kasan_slab_alloc+0x18/0x2c
kmem_cache_alloc+0x1b0/0x2d0
vm_area_alloc+0x28/0x94
mmap_region+0x378/0x920
do_mmap+0x3f0/0x600
vm_mmap_pgoff+0x150/0x17c
ksys_mmap_pgoff+0x284/0x2dc
__arm64_sys_mmap+0x84/0xa4
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
Freed by task 560:
kasan_save_stack+0x38/0x6c
kasan_set_track+0x28/0x40
kasan_set_free_info+0x24/0x4c
__kasan_slab_free+0x100/0x164
kasan_slab_free+0x14/0x20
kmem_cache_free+0xc4/0x34c
vm_area_free+0x1c/0x2c
remove_vma+0x7c/0x94
__do_munmap+0x358/0x710
__vm_munmap+0xbc/0x130
__arm64_sys_munmap+0x4c/0x64
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
[...]
==================================================================
To prevent the race above, revert back to taking the mmap write lock
inside binder_update_page_range(). One might expect an increase of mmap
lock contention. However, binder already serializes these calls via top
level alloc->mutex. Also, there was no performance impact shown when
running the binder benchmark tests.
Fixes: c0fd2101781e ("Revert "android: binder: stop saving a pointer to the VMA"")
Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
Reported-by: Jann Horn <jannh(a)google.com>
Closes: https://lore.kernel.org/all/20230518144052.xkj6vmddccq4v66b@revolver
Cc: <stable(a)vger.kernel.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Acked-by: Todd Kjos <tkjos(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder_alloc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index e7c9d466f8e8..662a2a2e2e84 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
mm = alloc->mm;
if (mm) {
- mmap_read_lock(mm);
+ mmap_write_lock(mm);
vma = alloc->vma;
}
@@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
trace_binder_alloc_page_end(alloc, index);
}
if (mm) {
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
mmput(mm);
}
return 0;
@@ -303,7 +303,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
}
err_no_vma:
if (mm) {
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
mmput(mm);
}
return vma ? -ENOMEM : -ESRCH;
--
2.40.1.698.g37aff9b760-goog
The patch titled
Subject: riscv/purgatory: remove PGO flags
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
riscv-purgatory-remove-pgo-flags.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ricardo Ribalda <ribalda(a)chromium.org>
Subject: riscv/purgatory: remove PGO flags
Date: Fri, 19 May 2023 16:47:39 +0200
If profile-guided optimization is enabled, the purgatory ends up with
multiple .text sections. This is not supported by kexec and crashes the
system.
Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-4-b05c520b7296@chromium…
Fixes: 930457057abe ("kernel/kexec_file.c: split up __kexec_load_puragory")
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
Acked-by: Palmer Dabbelt <palmer(a)rivosinc.com>
Cc: <stable(a)vger.kernel.org>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Philipp Rudo <prudo(a)redhat.com>
Cc: Ross Zwisler <zwisler(a)google.com>
Cc: Simon Horman <horms(a)kernel.org>
Cc: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tom Rix <trix(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/riscv/purgatory/Makefile | 5 +++++
1 file changed, 5 insertions(+)
--- a/arch/riscv/purgatory/Makefile~riscv-purgatory-remove-pgo-flags
+++ a/arch/riscv/purgatory/Makefile
@@ -35,6 +35,11 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
# When linking purgatory.ro with -r unresolved symbols are not checked,
# also link a purgatory.chk binary without -r to check for unresolved symbols.
PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
_
Patches currently in -mm which might be from ribalda(a)chromium.org are
kexec-support-purgatories-with-texthot-sections.patch
x86-purgatory-remove-pgo-flags.patch
powerpc-purgatory-remove-pgo-flags.patch
riscv-purgatory-remove-pgo-flags.patch
The patch titled
Subject: powerpc/purgatory: remove PGO flags
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
powerpc-purgatory-remove-pgo-flags.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ricardo Ribalda <ribalda(a)chromium.org>
Subject: powerpc/purgatory: remove PGO flags
Date: Fri, 19 May 2023 16:47:38 +0200
If profile-guided optimization is enabled, the purgatory ends up with
multiple .text sections. This is not supported by kexec and crashes the
system.
Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-3-b05c520b7296@chromium…
Fixes: 930457057abe ("kernel/kexec_file.c: split up __kexec_load_puragory")
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: <stable(a)vger.kernel.org>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Palmer Dabbelt <palmer(a)rivosinc.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Philipp Rudo <prudo(a)redhat.com>
Cc: Ross Zwisler <zwisler(a)google.com>
Cc: Simon Horman <horms(a)kernel.org>
Cc: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tom Rix <trix(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/powerpc/purgatory/Makefile | 5 +++++
1 file changed, 5 insertions(+)
--- a/arch/powerpc/purgatory/Makefile~powerpc-purgatory-remove-pgo-flags
+++ a/arch/powerpc/purgatory/Makefile
@@ -5,6 +5,11 @@ KCSAN_SANITIZE := n
targets += trampoline_$(BITS).o purgatory.ro
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE
_
Patches currently in -mm which might be from ribalda(a)chromium.org are
kexec-support-purgatories-with-texthot-sections.patch
x86-purgatory-remove-pgo-flags.patch
powerpc-purgatory-remove-pgo-flags.patch
riscv-purgatory-remove-pgo-flags.patch
The patch titled
Subject: x86/purgatory: remove PGO flags
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
x86-purgatory-remove-pgo-flags.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ricardo Ribalda <ribalda(a)chromium.org>
Subject: x86/purgatory: remove PGO flags
Date: Fri, 19 May 2023 16:47:37 +0200
If profile-guided optimization is enabled, the purgatory ends up with
multiple .text sections. This is not supported by kexec and crashes the
system.
Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-2-b05c520b7296@chromium…
Fixes: 930457057abe ("kernel/kexec_file.c: split up __kexec_load_puragory")
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
Cc: <stable(a)vger.kernel.org>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Palmer Dabbelt <palmer(a)rivosinc.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Philipp Rudo <prudo(a)redhat.com>
Cc: Ross Zwisler <zwisler(a)google.com>
Cc: Simon Horman <horms(a)kernel.org>
Cc: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tom Rix <trix(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/purgatory/Makefile | 5 +++++
1 file changed, 5 insertions(+)
--- a/arch/x86/purgatory/Makefile~x86-purgatory-remove-pgo-flags
+++ a/arch/x86/purgatory/Makefile
@@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/s
CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
# When linking purgatory.ro with -r unresolved symbols are not checked,
# also link a purgatory.chk binary without -r to check for unresolved symbols.
PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
_
Patches currently in -mm which might be from ribalda(a)chromium.org are
kexec-support-purgatories-with-texthot-sections.patch
x86-purgatory-remove-pgo-flags.patch
powerpc-purgatory-remove-pgo-flags.patch
riscv-purgatory-remove-pgo-flags.patch
The patch titled
Subject: kexec: support purgatories with .text.hot sections
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kexec-support-purgatories-with-texthot-sections.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ricardo Ribalda <ribalda(a)chromium.org>
Subject: kexec: support purgatories with .text.hot sections
Date: Fri, 19 May 2023 16:47:36 +0200
Patch series "kexec: Fix kexec_file_load for llvm16 with PGO", v7.
When upreving llvm I realised that kexec stopped working on my test
platform.
The reason seems to be that due to PGO there are multiple .text sections
on the purgatory, and kexec does not supports that.
This patch (of 4):
Clang16 links the purgatory text in two sections when PGO is in use:
[ 1] .text PROGBITS 0000000000000000 00000040
00000000000011a1 0000000000000000 AX 0 0 16
[ 2] .rela.text RELA 0000000000000000 00003498
0000000000000648 0000000000000018 I 24 1 8
...
[17] .text.hot. PROGBITS 0000000000000000 00003220
000000000000020b 0000000000000000 AX 0 0 1
[18] .rela.text.hot. RELA 0000000000000000 00004428
0000000000000078 0000000000000018 I 24 17 8
And both of them have their range [sh_addr ... sh_addr+sh_size] on the
area pointed by `e_entry`.
This causes that image->start is calculated twice, once for .text and
another time for .text.hot. The second calculation leaves image->start
in a random location.
Because of this, the system crashes immediately after:
kexec_core: Starting new kernel
Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-0-b05c520b7296@chromium…
Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-1-b05c520b7296@chromium…
Fixes: 930457057abe ("kernel/kexec_file.c: split up __kexec_load_puragory")
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
Reviewed-by: Ross Zwisler <zwisler(a)google.com>
Reviewed-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Reviewed-by: Philipp Rudo <prudo(a)redhat.com>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Palmer Dabbelt <palmer(a)rivosinc.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Simon Horman <horms(a)kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tom Rix <trix(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kexec_file.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
--- a/kernel/kexec_file.c~kexec-support-purgatories-with-texthot-sections
+++ a/kernel/kexec_file.c
@@ -901,10 +901,22 @@ static int kexec_purgatory_setup_sechdrs
}
offset = ALIGN(offset, align);
+
+ /*
+ * Check if the segment contains the entry point, if so,
+ * calculate the value of image->start based on it.
+ * If the compiler has produced more than one .text section
+ * (Eg: .text.hot), they are generally after the main .text
+ * section, and they shall not be used to calculate
+ * image->start. So do not re-calculate image->start if it
+ * is not set to the initial value, and warn the user so they
+ * have a chance to fix their purgatory's linker script.
+ */
if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
pi->ehdr->e_entry < (sechdrs[i].sh_addr
- + sechdrs[i].sh_size)) {
+ + sechdrs[i].sh_size) &&
+ !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
kbuf->image->start -= sechdrs[i].sh_addr;
kbuf->image->start += kbuf->mem + offset;
}
_
Patches currently in -mm which might be from ribalda(a)chromium.org are
kexec-support-purgatories-with-texthot-sections.patch
x86-purgatory-remove-pgo-flags.patch
powerpc-purgatory-remove-pgo-flags.patch
riscv-purgatory-remove-pgo-flags.patch
This patch fixes to set local processes and their pid value represented
inside the struct flock when using F_GETLK if there is a conflict with
another process.
Currently every pid in struct flock l_pid is set as negative pid number.
This was changed by commit 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use
fs-specific l_pid for remote locks"). There is still the question how to
represent remote pid lock holders, which is possible for DLM posix
handling, in the flock structure. This patch however will only change
local process holders to be represented as positive pids.
Further patches may change the behaviour for remote pid lock holders,
for now this patch will leave the behaviour of remote lock holders
unchanged which will be represented as negative pid.
There exists a simple ltp testcase [0] as reproducer.
[0] https://gitlab.com/netcoder/ltp/-/blob/dlm_fcntl_owner_testcase/testcases/k…
Cc: stable(a)vger.kernel.org
Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks")
Signed-off-by: Alexander Aring <aahringo(a)redhat.com>
---
fs/dlm/plock.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index ed4357e62f35..ff364901f22b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -360,7 +360,9 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
locks_init_lock(fl);
fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
fl->fl_flags = FL_POSIX;
- fl->fl_pid = -op->info.pid;
+ fl->fl_pid = op->info.pid;
+ if (op->info.nodeid != dlm_our_nodeid())
+ fl->fl_pid = -fl->fl_pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
rv = 0;
--
2.31.1
When running on an AMD vIOMMU, we observed multiple invalidations (of
decreasing power of 2 aligned sizes) when unmapping a single page.
Domain flush takes gather bounds (end-start) as size param. However,
gather->end is defined as the last inclusive address (start + size - 1).
This leads to an off by 1 error.
With this patch, verified that 1 invalidation occurs when unmapping a
single page.
Fixes: a270be1b3fdf ("iommu/amd: Use only natural aligned flushes in a VM")
Cc: <stable(a)vger.kernel.org> # 5.15.x
Suggested-by: Gary Zibrat <gzibrat(a)google.com>
Tested-by: Sudheer Dantuluri <dantuluris(a)google.com>
Acked-by: Nadav Amit <namit(a)vmware.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Signed-off-by: Jon Pan-Doh <pandoh(a)google.com>
---
drivers/iommu/amd/iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 5a505ba5467e..da45b1ab042d 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2378,7 +2378,7 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+ domain_flush_pages(dom, gather->start, gather->end - gather->start + 1, 1);
amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
--
2.40.0.634.g4ca3ef3211-goog
Commit c145e0b47c77 ("mm: streamline COW logic in do_swap_page()") moved
the call to swap_free() before the call to set_pte_at(), which meant that
the MTE tags could end up being freed before set_pte_at() had a chance
to restore them. Fix it by adding a call to the arch_swap_restore() hook
before the call to swap_free().
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/I6470efa669e8bd2f841049b8c61020c51…
Cc: <stable(a)vger.kernel.org> # 6.1
Fixes: c145e0b47c77 ("mm: streamline COW logic in do_swap_page()")
Reported-by: Qun-wei Lin (林群崴) <Qun-wei.Lin(a)mediatek.com>
Closes: https://lore.kernel.org/all/5050805753ac469e8d727c797c2218a9d780d434.camel@…
---
v2:
- Call arch_swap_restore() directly instead of via arch_do_swap_page()
mm/memory.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/mm/memory.c b/mm/memory.c
index f69fbc251198..fc25764016b3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3932,6 +3932,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
}
+ /*
+ * Some architectures may have to restore extra metadata to the page
+ * when reading from swap. This metadata may be indexed by swap entry
+ * so this must be called before swap_free().
+ */
+ arch_swap_restore(entry, folio);
+
/*
* Remove the swap entry and conditionally try to free up the swapcache.
* We're already holding a reference on the page but haven't mapped it
--
2.40.1.606.ga4b1b128d6-goog
Commit c145e0b47c77 ("mm: streamline COW logic in do_swap_page()") moved
the call to swap_free() before the call to set_pte_at(), which meant that
the MTE tags could end up being freed before set_pte_at() had a chance
to restore them. One other possibility was to hook arch_do_swap_page(),
but this had a number of problems:
- The call to the hook was also after swap_free().
- The call to the hook was after the call to set_pte_at(), so there was a
racy window where uninitialized metadata may be exposed to userspace.
This likely also affects SPARC ADI, which implements this hook to
restore tags.
- As a result of commit 1eba86c096e3 ("mm: change page type prior to
adding page table entry"), we were also passing the new PTE as the
oldpte argument, preventing the hook from knowing the swap index.
Fix all of these problems by moving the arch_do_swap_page() call before
the call to free_page(), and ensuring that we do not set orig_pte until
after the call.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Suggested-by: Catalin Marinas <catalin.marinas(a)arm.com>
Link: https://linux-review.googlesource.com/id/I6470efa669e8bd2f841049b8c61020c51…
Cc: <stable(a)vger.kernel.org> # 6.1
Fixes: ca827d55ebaa ("mm, swap: Add infrastructure for saving page metadata on swap")
Fixes: 1eba86c096e3 ("mm: change page type prior to adding page table entry")
---
mm/memory.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 01a23ad48a04..83268d287ff1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3914,19 +3914,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
}
- /*
- * Remove the swap entry and conditionally try to free up the swapcache.
- * We're already holding a reference on the page but haven't mapped it
- * yet.
- */
- swap_free(entry);
- if (should_try_to_free_swap(folio, vma, vmf->flags))
- folio_free_swap(folio);
-
- inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
- dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot);
-
/*
* Same logic as in do_wp_page(); however, optimize for pages that are
* certainly not shared either because we just allocated them without
@@ -3946,8 +3934,21 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
pte = pte_mksoft_dirty(pte);
if (pte_swp_uffd_wp(vmf->orig_pte))
pte = pte_mkuffd_wp(pte);
+ arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
vmf->orig_pte = pte;
+ /*
+ * Remove the swap entry and conditionally try to free up the swapcache.
+ * We're already holding a reference on the page but haven't mapped it
+ * yet.
+ */
+ swap_free(entry);
+ if (should_try_to_free_swap(folio, vma, vmf->flags))
+ folio_free_swap(folio);
+
+ inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
+ dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
+
/* ksm created a completely new copy */
if (unlikely(folio != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address);
@@ -3959,7 +3960,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
VM_BUG_ON(!folio_test_anon(folio) ||
(pte_write(pte) && !PageAnonExclusive(page)));
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
- arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
folio_unlock(folio);
if (folio != swapcache && swapcache) {
--
2.40.1.606.ga4b1b128d6-goog
After the listener svc_sock be freed, and before invoking svc_tcp_accept()
for the established child sock, there is a window that the newsock
retaining a freed listener svc_sock in sk_user_data which cloning from
parent. In the race windows if data is received on the newsock, we will
observe use-after-free report in svc_tcp_listen_data_ready().
Reproduce by two tasks:
1. while :; do rpc.nfsd 0 ; rpc.nfsd; done
2. while :; do echo "" | ncat -4 127.0.0.1 2049 ; done
KASAN report:
==================================================================
BUG: KASAN: slab-use-after-free in svc_tcp_listen_data_ready+0x1cf/0x1f0 [sunrpc]
Read of size 8 at addr ffff888139d96228 by task nc/102553
CPU: 7 PID: 102553 Comm: nc Not tainted 6.3.0+ #18
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
Call Trace:
<IRQ>
dump_stack_lvl+0x33/0x50
print_address_description.constprop.0+0x27/0x310
print_report+0x3e/0x70
kasan_report+0xae/0xe0
svc_tcp_listen_data_ready+0x1cf/0x1f0 [sunrpc]
tcp_data_queue+0x9f4/0x20e0
tcp_rcv_established+0x666/0x1f60
tcp_v4_do_rcv+0x51c/0x850
tcp_v4_rcv+0x23fc/0x2e80
ip_protocol_deliver_rcu+0x62/0x300
ip_local_deliver_finish+0x267/0x350
ip_local_deliver+0x18b/0x2d0
ip_rcv+0x2fb/0x370
__netif_receive_skb_one_core+0x166/0x1b0
process_backlog+0x24c/0x5e0
__napi_poll+0xa2/0x500
net_rx_action+0x854/0xc90
__do_softirq+0x1bb/0x5de
do_softirq+0xcb/0x100
</IRQ>
<TASK>
...
</TASK>
Allocated by task 102371:
kasan_save_stack+0x1e/0x40
kasan_set_track+0x21/0x30
__kasan_kmalloc+0x7b/0x90
svc_setup_socket+0x52/0x4f0 [sunrpc]
svc_addsock+0x20d/0x400 [sunrpc]
__write_ports_addfd+0x209/0x390 [nfsd]
write_ports+0x239/0x2c0 [nfsd]
nfsctl_transaction_write+0xac/0x110 [nfsd]
vfs_write+0x1c3/0xae0
ksys_write+0xed/0x1c0
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x72/0xdc
Freed by task 102551:
kasan_save_stack+0x1e/0x40
kasan_set_track+0x21/0x30
kasan_save_free_info+0x2a/0x50
__kasan_slab_free+0x106/0x190
__kmem_cache_free+0x133/0x270
svc_xprt_free+0x1e2/0x350 [sunrpc]
svc_xprt_destroy_all+0x25a/0x440 [sunrpc]
nfsd_put+0x125/0x240 [nfsd]
nfsd_svc+0x2cb/0x3c0 [nfsd]
write_threads+0x1ac/0x2a0 [nfsd]
nfsctl_transaction_write+0xac/0x110 [nfsd]
vfs_write+0x1c3/0xae0
ksys_write+0xed/0x1c0
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x72/0xdc
Fix the UAF by simply doing nothing in svc_tcp_listen_data_ready()
if state != TCP_LISTEN, that will avoid dereferencing svsk for all
child socket.
Link: https://lore.kernel.org/lkml/20230507091131.23540-1-dinghui@sangfor.com.cn/
Fixes: fa9251afc33c ("SUNRPC: Call the default socket callbacks instead of open coding")
Signed-off-by: Ding Hui <dinghui(a)sangfor.com.cn>
Cc: <stable(a)vger.kernel.org>
---
net/sunrpc/svcsock.c | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a51c9b989d58..9aca6e1e78e4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -825,12 +825,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
trace_sk_data_ready(sk);
- if (svsk) {
- /* Refer to svc_setup_socket() for details. */
- rmb();
- svsk->sk_odata(sk);
- }
-
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -839,13 +833,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted.
- * In case of 2, we should ignore it silently.
+ * In case of 2, we should ignore it silently and DO NOT
+ * dereference svsk.
*/
- if (sk->sk_state == TCP_LISTEN) {
- if (svsk) {
- set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- }
+ if (sk->sk_state != TCP_LISTEN)
+ return;
+
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
+ svsk->sk_odata(sk);
+ set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_enqueue(&svsk->sk_xprt);
}
}
--
2.17.1
Implementation Note at the end of PCIe r6.0.1 sec 7.5.3.7 recommends
handling LTSSM race to ensure link retraining acquires correct
parameters from the LNKCTL register. According to the implementation
note, LTSSM might transition into Recovery or Configuration state
independently of the driver requesting it, and if retraining due to
such an event is still ongoing, the value written into the LNKCTL
register might not be considered by the link retraining.
Ensure link training bit is clear before toggling link retraining bit
to meet the requirements of the Implementation Note.
Fixes: 7d715a6c1ae5 ("PCI: add PCI Express ASPM support")
Suggested-by: Lukas Wunner <lukas(a)wunner.de>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Reviewed-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org
---
drivers/pci/pcie/aspm.c | 37 +++++++++++++++++++++++++++----------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 66d7514ca111..dde1ef13d0d1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -193,12 +193,37 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist)
link->clkpm_disable = blacklist ? 1 : 0;
}
+static bool pcie_wait_for_retrain(struct pci_dev *pdev)
+{
+ unsigned long end_jiffies;
+ u16 reg16;
+
+ /* Wait for link training end. Break out after waiting for timeout */
+ end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
+ do {
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, ®16);
+ if (!(reg16 & PCI_EXP_LNKSTA_LT))
+ break;
+ msleep(1);
+ } while (time_before(jiffies, end_jiffies));
+
+ return !(reg16 & PCI_EXP_LNKSTA_LT);
+}
+
static bool pcie_retrain_link(struct pcie_link_state *link)
{
struct pci_dev *parent = link->pdev;
- unsigned long end_jiffies;
u16 reg16;
+ /*
+ * Ensure the updated LNKCTL parameters are used during link
+ * training by checking that there is no ongoing link training to
+ * avoid LTSSM race as recommended in Implementation Note at the end
+ * of PCIe r6.0.1 sec 7.5.3.7.
+ */
+ if (!pcie_wait_for_retrain(parent))
+ return false;
+
pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16);
reg16 |= PCI_EXP_LNKCTL_RL;
pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
@@ -212,15 +237,7 @@ static bool pcie_retrain_link(struct pcie_link_state *link)
pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
}
- /* Wait for link training end. Break out after waiting for timeout */
- end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
- do {
- pcie_capability_read_word(parent, PCI_EXP_LNKSTA, ®16);
- if (!(reg16 & PCI_EXP_LNKSTA_LT))
- break;
- msleep(1);
- } while (time_before(jiffies, end_jiffies));
- return !(reg16 & PCI_EXP_LNKSTA_LT);
+ return pcie_wait_for_retrain(parent);
}
/*
--
2.30.2
Dear,
Please grant me the permission to share important discussion with you.
I am looking forward to hearing from you at your earliest convenience.
Best Regards.
Mrs. Nina Coulibaly
Dzień dobry,
chciałbym poinformować Państwa o możliwości pozyskania nowych zleceń ze strony www.
Widzimy zainteresowanie potencjalnych Klientów Państwa firmą, dlatego chętnie pomożemy Państwu dotrzeć z ofertą do większego grona odbiorców poprzez efektywne metody pozycjonowania strony w Google.
Czy mógłbym liczyć na kontakt zwrotny?
Pozdrawiam serdecznie,
Wiktor Nurek
Currently, due to the sequential use of min_t() and clamp_t() macros,
in cdc_ncm_check_tx_max(), if dwNtbOutMaxSize is not set, the logic
sets tx_max to 0. This is then used to allocate the data area of the
SKB requested later in cdc_ncm_fill_tx_frame().
This does not cause an issue presently because when memory is
allocated during initialisation phase of SKB creation, more memory
(512b) is allocated than is required for the SKB headers alone (320b),
leaving some space (512b - 320b = 192b) for CDC data (172b).
However, if more elements (for example 3 x u64 = [24b]) were added to
one of the SKB header structs, say 'struct skb_shared_info',
increasing its original size (320b [320b aligned]) to something larger
(344b [384b aligned]), then suddenly the CDC data (172b) no longer
fits in the spare SKB data area (512b - 384b = 128b).
Consequently the SKB bounds checking semantics fails and panics:
skbuff: skb_over_panic: text:ffffffff830a5b5f len:184 put:172 \
head:ffff888119227c00 data:ffff888119227c00 tail:0xb8 end:0x80 dev:<NULL>
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:110!
RIP: 0010:skb_panic+0x14f/0x160 net/core/skbuff.c:106
<snip>
Call Trace:
<IRQ>
skb_over_panic+0x2c/0x30 net/core/skbuff.c:115
skb_put+0x205/0x210 net/core/skbuff.c:1877
skb_put_zero include/linux/skbuff.h:2270 [inline]
cdc_ncm_ndp16 drivers/net/usb/cdc_ncm.c:1116 [inline]
cdc_ncm_fill_tx_frame+0x127f/0x3d50 drivers/net/usb/cdc_ncm.c:1293
cdc_ncm_tx_fixup+0x98/0xf0 drivers/net/usb/cdc_ncm.c:1514
By overriding the max value with the default CDC_NCM_NTB_MAX_SIZE_TX
when not offered through the system provided params, we ensure enough
data space is allocated to handle the CDC data, meaning no crash will
occur.
Cc: stable(a)vger.kernel.org
Cc: Oliver Neukum <oliver(a)neukum.org>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: linux-usb(a)vger.kernel.org
Cc: netdev(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Fixes: 289507d3364f9 ("net: cdc_ncm: use sysfs for rx/tx aggregation tuning")
Signed-off-by: Lee Jones <lee.jones(a)linaro.org>
---
drivers/net/usb/cdc_ncm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 24753a4da7e60..e303b522efb50 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -181,6 +181,8 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32);
max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
+ if (max == 0)
+ max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */
/* some devices set dwNtbOutMaxSize too low for the above default */
min = min(min, max);
--
2.34.0.384.gca35af8252-goog
From: Xiubo Li <xiubli(a)redhat.com>
When the MClientSnap reqeust's op is not CEPH_SNAP_OP_SPLIT the
request may still contain a list of 'split_realms', and we need
to skip it anyway. Or it will be parsed as a corrupt snaptrace.
Cc: stable(a)vger.kernel.org
Cc: Frank Schilder <frans(a)dtu.dk>
Reported-by: Frank Schilder <frans(a)dtu.dk>
URL: https://tracker.ceph.com/issues/61200
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
fs/ceph/snap.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 0e59e95a96d9..d95dfe16b624 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1114,6 +1114,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
continue;
adjust_snap_realm_parent(mdsc, child, realm->ino);
}
+ } else {
+ p += sizeof(u64) * num_split_inos;
+ p += sizeof(u64) * num_split_realms;
}
/*
--
2.40.1
From: Kees Cook <keescook(a)chromium.org>
[ Upstream commit bfcc8ba45eb87bfaaff900bbad2b87b204899d41 ]
The memcpy() in ath_key_config() was attempting to write across
neighboring struct members in struct ath_keyval. Introduce a wrapping
struct_group, kv_values, to be the addressable target of the memcpy
without overflowing an individual member. Silences the false positive
run-time warning:
memcpy: detected field-spanning write (size 32) of single field "hk.kv_val" at drivers/net/wireless/ath/key.c:506 (size 16)
Link: https://bbs.archlinux.org/viewtopic.php?id=282254
Cc: Kalle Valo <kvalo(a)kernel.org>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: Paolo Abeni <pabeni(a)redhat.com>
Cc: linux-wireless(a)vger.kernel.org
Cc: netdev(a)vger.kernel.org
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Kalle Valo <quic_kvalo(a)quicinc.com>
Link: https://lore.kernel.org/r/20230210054310.never.554-kees@kernel.org
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/wireless/ath/ath.h | 12 +++++++-----
drivers/net/wireless/ath/key.c | 2 +-
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h
index f083fb9038c36..f02a308a9ffc5 100644
--- a/drivers/net/wireless/ath/ath.h
+++ b/drivers/net/wireless/ath/ath.h
@@ -96,11 +96,13 @@ struct ath_keyval {
u8 kv_type;
u8 kv_pad;
u16 kv_len;
- u8 kv_val[16]; /* TK */
- u8 kv_mic[8]; /* Michael MIC key */
- u8 kv_txmic[8]; /* Michael MIC TX key (used only if the hardware
- * supports both MIC keys in the same key cache entry;
- * in that case, kv_mic is the RX key) */
+ struct_group(kv_values,
+ u8 kv_val[16]; /* TK */
+ u8 kv_mic[8]; /* Michael MIC key */
+ u8 kv_txmic[8]; /* Michael MIC TX key (used only if the hardware
+ * supports both MIC keys in the same key cache entry;
+ * in that case, kv_mic is the RX key) */
+ );
};
enum ath_cipher {
diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c
index 61b59a804e308..b7b61d4f02bae 100644
--- a/drivers/net/wireless/ath/key.c
+++ b/drivers/net/wireless/ath/key.c
@@ -503,7 +503,7 @@ int ath_key_config(struct ath_common *common,
hk.kv_len = key->keylen;
if (key->keylen)
- memcpy(hk.kv_val, key->key, key->keylen);
+ memcpy(&hk.kv_values, key->key, key->keylen);
if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
switch (vif->type) {
--
2.39.2
With a raw socket bound to IPPROTO_RAW (ie with hdrincl enabled), the
protocol field of the flow structure, build by raw_sendmsg() /
rawv6_sendmsg()), is set to IPPROTO_RAW. This breaks the ipsec policy
lookup when some policies are defined with a protocol in the selector.
For ipv6, the sin6_port field from 'struct sockaddr_in6' could be used to
specify the protocol. Just accept all values for IPPROTO_RAW socket.
For ipv4, the sin_port field of 'struct sockaddr_in' could not be used
without breaking backward compatibility (the value of this field was never
checked). Let's add a new kind of control message, so that the userland
could specify which protocol is used.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable(a)vger.kernel.org
Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
---
The first version has been marked 'Awaiting Upstream'. Steffen confirmed
that the 'net' tree should be the target, thus I resend this patch.
I also CC stable(a)vger.kernel.org.
include/net/ip.h | 2 ++
include/uapi/linux/in.h | 1 +
net/ipv4/ip_sockglue.c | 15 ++++++++++++++-
net/ipv4/raw.c | 5 ++++-
net/ipv6/raw.c | 3 ++-
5 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..acec504c469a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options_rcu *opt;
+ __u8 protocol;
__u8 ttl;
__s16 tos;
char priority;
@@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
+ ipcm->protocol = inet->inet_num;
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 4b7f2df66b99..e682ab628dfa 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -163,6 +163,7 @@ struct in_addr {
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
#define IP_LOCAL_PORT_RANGE 51
+#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b511ff0adc0a..ec0fbe874426 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -317,7 +317,17 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos);
break;
-
+ case IP_PROTOCOL:
+ if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
+ val = *(int *)CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
+ val = *(u8 *)CMSG_DATA(cmsg);
+ else
+ return -EINVAL;
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->protocol = val;
+ break;
default:
return -EINVAL;
}
@@ -1761,6 +1771,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
+ case IP_PROTOCOL:
+ val = inet_sk(sk)->inet_num;
+ break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff712bf2a98d..eadf1c9ef7e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
+ /* Keep backward compat */
+ if (hdrincl)
+ ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -599,7 +602,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE,
- hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d0adb612bdd..44ee7a2e72ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto)
proto = inet->inet_num;
- else if (proto != inet->inet_num)
+ else if (proto != inet->inet_num &&
+ inet->inet_num != IPPROTO_RAW)
return -EINVAL;
if (proto > 255)
--
2.39.2
From: Xiubo Li <xiubli(a)redhat.com>
When the MClientSnap reqeust's op is not CEPH_SNAP_OP_SPLIT the
request may still contain a list of 'split_realms', and we need
to skip it anyway. Or it will be parsed as a corrupt snaptrace.
Cc: stable(a)vger.kernel.org
Cc: Frank Schilder <frans(a)dtu.dk>
Reported-by: Frank Schilder <frans(a)dtu.dk>
URL: https://tracker.ceph.com/issues/61200
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
V2:
- Add a detail comment for the code.
fs/ceph/snap.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 0e59e95a96d9..0f00f977c0f0 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1114,6 +1114,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
continue;
adjust_snap_realm_parent(mdsc, child, realm->ino);
}
+ } else {
+ /*
+ * In non-SPLIT op case both the 'num_split_inos' and
+ * 'num_split_realms' should always be 0 and this will
+ * do nothing. But the MDS has one bug that in one of
+ * the UPDATE op cases it will pass a 'split_realms'
+ * list by mistake, and then will corrupted the snap
+ * trace in ceph_update_snap_trace().
+ *
+ * So we should skip them anyway here.
+ */
+ p += sizeof(u64) * num_split_inos;
+ p += sizeof(u64) * num_split_realms;
}
/*
--
2.40.1
This patch replaces preempt_{disable, enable} with its corresponding
notrace version in rethook_trampoline_handler so no worries about stack
recursion or overflow introduced by preempt_count_{add, sub} under
fprobe + rethook context.
Fixes: 54ecbe6f1ed5 ("rethook: Add a generic return hook")
Signed-off-by: Ze Gao <zegao(a)tencent.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/linux-trace-kernel/20230516071830.8190-2-zegao@tenc…
---
kernel/trace/rethook.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 32c3dfdb4d6a..60f6cb2b486b 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -288,7 +288,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
* These loops must be protected from rethook_free_rcu() because those
* are accessing 'rhn->rethook'.
*/
- preempt_disable();
+ preempt_disable_notrace();
/*
* Run the handler on the shadow stack. Do not unlink the list here because
@@ -321,7 +321,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
first = first->next;
rethook_recycle(rhn);
}
- preempt_enable();
+ preempt_enable_notrace();
return correct_ret_addr;
}
--
2.40.1
The quilt patch titled
Subject: nilfs2: fix use-after-free bug of nilfs_root in nilfs_evict_inode()
has been removed from the -mm tree. Its filename was
nilfs2-fix-use-after-free-bug-of-nilfs_root-in-nilfs_evict_inode.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix use-after-free bug of nilfs_root in nilfs_evict_inode()
Date: Wed, 10 May 2023 00:29:56 +0900
During unmount process of nilfs2, nothing holds nilfs_root structure after
nilfs2 detaches its writer in nilfs_detach_log_writer(). However, since
nilfs_evict_inode() uses nilfs_root for some cleanup operations, it may
cause use-after-free read if inodes are left in "garbage_list" and
released by nilfs_dispose_list() at the end of nilfs_detach_log_writer().
Fix this issue by modifying nilfs_evict_inode() to only clear inode
without additional metadata changes that use nilfs_root if the file system
is degraded to read-only or the writer is detached.
Link: https://lkml.kernel.org/r/20230509152956.8313-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+78d4495558999f55d1da(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000099e5ac05fb1c3b85@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/inode.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
--- a/fs/nilfs2/inode.c~nilfs2-fix-use-after-free-bug-of-nilfs_root-in-nilfs_evict_inode
+++ a/fs/nilfs2/inode.c
@@ -917,6 +917,7 @@ void nilfs_evict_inode(struct inode *ino
struct nilfs_transaction_info ti;
struct super_block *sb = inode->i_sb;
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs;
int ret;
if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
@@ -929,6 +930,23 @@ void nilfs_evict_inode(struct inode *ino
truncate_inode_pages_final(&inode->i_data);
+ nilfs = sb->s_fs_info;
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
+ /*
+ * If this inode is about to be disposed after the file system
+ * has been degraded to read-only due to file system corruption
+ * or after the writer has been detached, do not make any
+ * changes that cause writes, just clear it.
+ * Do this check after read-locking ns_segctor_sem by
+ * nilfs_transaction_begin() in order to avoid a race with
+ * the writer detach operation.
+ */
+ clear_inode(inode);
+ nilfs_clear_inode(inode);
+ nilfs_transaction_abort(sb);
+ return;
+ }
+
/* TODO: some of the following operations may fail. */
nilfs_truncate_bmap(ii, 0);
nilfs_mark_inode_dirty(inode);
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-fix-incomplete-buffer-cleanup-in-nilfs_btnode_abort_change_key.patch
The quilt patch titled
Subject: mm: fix zswap writeback race condition
has been removed from the -mm tree. Its filename was
mm-fix-zswap-writeback-race-condition.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Domenico Cerasuolo <cerasuolodomenico(a)gmail.com>
Subject: mm: fix zswap writeback race condition
Date: Wed, 3 May 2023 17:12:00 +0200
The zswap writeback mechanism can cause a race condition resulting in
memory corruption, where a swapped out page gets swapped in with data that
was written to a different page.
The race unfolds like this:
1. a page with data A and swap offset X is stored in zswap
2. page A is removed off the LRU by zpool driver for writeback in
zswap-shrink work, data for A is mapped by zpool driver
3. user space program faults and invalidates page entry A, offset X is
considered free
4. kswapd stores page B at offset X in zswap (zswap could also be
full, if so, page B would then be IOed to X, then skip step 5.)
5. entry A is replaced by B in tree->rbroot, this doesn't affect the
local reference held by zswap-shrink work
6. zswap-shrink work writes back A at X, and frees zswap entry A
7. swapin of slot X brings A in memory instead of B
The fix:
Once the swap page cache has been allocated (case ZSWAP_SWAPCACHE_NEW),
zswap-shrink work just checks that the local zswap_entry reference is
still the same as the one in the tree. If it's not the same it means that
it's either been invalidated or replaced, in both cases the writeback is
aborted because the local entry contains stale data.
Reproducer:
I originally found this by running `stress` overnight to validate my work
on the zswap writeback mechanism, it manifested after hours on my test
machine. The key to make it happen is having zswap writebacks, so
whatever setup pumps /sys/kernel/debug/zswap/written_back_pages should do
the trick.
In order to reproduce this faster on a vm, I setup a system with ~100M of
available memory and a 500M swap file, then running `stress --vm 1
--vm-bytes 300000000 --vm-stride 4000` makes it happen in matter of tens
of minutes. One can speed things up even more by swinging
/sys/module/zswap/parameters/max_pool_percent up and down between, say, 20
and 1; this makes it reproduce in tens of seconds. It's crucial to set
`--vm-stride` to something other than 4096 otherwise `stress` won't
realize that memory has been corrupted because all pages would have the
same data.
Link: https://lkml.kernel.org/r/20230503151200.19707-1-cerasuolodomenico@gmail.com
Signed-off-by: Domenico Cerasuolo <cerasuolodomenico(a)gmail.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Chris Li (Google) <chrisl(a)kernel.org>
Cc: Dan Streetman <ddstreet(a)ieee.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Nitin Gupta <ngupta(a)vflare.org>
Cc: Seth Jennings <sjenning(a)redhat.com>
Cc: Vitaly Wool <vitaly.wool(a)konsulko.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zswap.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
--- a/mm/zswap.c~mm-fix-zswap-writeback-race-condition
+++ a/mm/zswap.c
@@ -1020,6 +1020,22 @@ static int zswap_writeback_entry(struct
goto fail;
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
+ /*
+ * Having a local reference to the zswap entry doesn't exclude
+ * swapping from invalidating and recycling the swap slot. Once
+ * the swapcache is secured against concurrent swapping to and
+ * from the slot, recheck that the entry is still current before
+ * writing.
+ */
+ spin_lock(&tree->lock);
+ if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) {
+ spin_unlock(&tree->lock);
+ delete_from_swap_cache(page_folio(page));
+ ret = -ENOMEM;
+ goto fail;
+ }
+ spin_unlock(&tree->lock);
+
/* decompress */
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
dlen = PAGE_SIZE;
_
Patches currently in -mm which might be from cerasuolodomenico(a)gmail.com are
The quilt patch titled
Subject: zsmalloc: move LRU update from zs_map_object() to zs_malloc()
has been removed from the -mm tree. Its filename was
zsmalloc-move-lru-update-from-zs_map_object-to-zs_malloc.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Nhat Pham <nphamcs(a)gmail.com>
Subject: zsmalloc: move LRU update from zs_map_object() to zs_malloc()
Date: Fri, 5 May 2023 11:50:54 -0700
Under memory pressure, we sometimes observe the following crash:
[ 5694.832838] ------------[ cut here ]------------
[ 5694.842093] list_del corruption, ffff888014b6a448->next is LIST_POISON1 (dead000000000100)
[ 5694.858677] WARNING: CPU: 33 PID: 418824 at lib/list_debug.c:47 __list_del_entry_valid+0x42/0x80
[ 5694.961820] CPU: 33 PID: 418824 Comm: fuse_counters.s Kdump: loaded Tainted: G S 5.19.0-0_fbk3_rc3_hoangnhatpzsdynshrv41_10870_g85a9558a25de #1
[ 5694.990194] Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM16 05/24/2021
[ 5695.007072] RIP: 0010:__list_del_entry_valid+0x42/0x80
[ 5695.017351] Code: 08 48 83 c2 22 48 39 d0 74 24 48 8b 10 48 39 f2 75 2c 48 8b 51 08 b0 01 48 39 f2 75 34 c3 48 c7 c7 55 d7 78 82 e8 4e 45 3b 00 <0f> 0b eb 31 48 c7 c7 27 a8 70 82 e8 3e 45 3b 00 0f 0b eb 21 48 c7
[ 5695.054919] RSP: 0018:ffffc90027aef4f0 EFLAGS: 00010246
[ 5695.065366] RAX: 41fe484987275300 RBX: ffff888008988180 RCX: 0000000000000000
[ 5695.079636] RDX: ffff88886006c280 RSI: ffff888860060480 RDI: ffff888860060480
[ 5695.093904] RBP: 0000000000000002 R08: 0000000000000000 R09: ffffc90027aef370
[ 5695.108175] R10: 0000000000000000 R11: ffffffff82fdf1c0 R12: 0000000010000002
[ 5695.122447] R13: ffff888014b6a448 R14: ffff888014b6a420 R15: 00000000138dc240
[ 5695.136717] FS: 00007f23a7d3f740(0000) GS:ffff888860040000(0000) knlGS:0000000000000000
[ 5695.152899] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 5695.164388] CR2: 0000560ceaab6ac0 CR3: 000000001c06c001 CR4: 00000000007706e0
[ 5695.178659] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 5695.192927] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 5695.207197] PKRU: 55555554
[ 5695.212602] Call Trace:
[ 5695.217486] <TASK>
[ 5695.221674] zs_map_object+0x91/0x270
[ 5695.229000] zswap_frontswap_store+0x33d/0x870
[ 5695.237885] ? do_raw_spin_lock+0x5d/0xa0
[ 5695.245899] __frontswap_store+0x51/0xb0
[ 5695.253742] swap_writepage+0x3c/0x60
[ 5695.261063] shrink_page_list+0x738/0x1230
[ 5695.269255] shrink_lruvec+0x5ec/0xcd0
[ 5695.276749] ? shrink_slab+0x187/0x5f0
[ 5695.284240] ? mem_cgroup_iter+0x6e/0x120
[ 5695.292255] shrink_node+0x293/0x7b0
[ 5695.299402] do_try_to_free_pages+0xea/0x550
[ 5695.307940] try_to_free_pages+0x19a/0x490
[ 5695.316126] __folio_alloc+0x19ff/0x3e40
[ 5695.323971] ? __filemap_get_folio+0x8a/0x4e0
[ 5695.332681] ? walk_component+0x2a8/0xb50
[ 5695.340697] ? generic_permission+0xda/0x2a0
[ 5695.349231] ? __filemap_get_folio+0x8a/0x4e0
[ 5695.357940] ? walk_component+0x2a8/0xb50
[ 5695.365955] vma_alloc_folio+0x10e/0x570
[ 5695.373796] ? walk_component+0x52/0xb50
[ 5695.381634] wp_page_copy+0x38c/0xc10
[ 5695.388953] ? filename_lookup+0x378/0xbc0
[ 5695.397140] handle_mm_fault+0x87f/0x1800
[ 5695.405157] do_user_addr_fault+0x1bd/0x570
[ 5695.413520] exc_page_fault+0x5d/0x110
[ 5695.421017] asm_exc_page_fault+0x22/0x30
After some investigation, I have found the following issue: unlike other
zswap backends, zsmalloc performs the LRU list update at the object
mapping time, rather than when the slot for the object is allocated.
This deviation was discussed and agreed upon during the review process
of the zsmalloc writeback patch series:
https://lore.kernel.org/lkml/Y3flcAXNxxrvy3ZH@cmpxchg.org/
Unfortunately, this introduces a subtle bug that occurs when there is a
concurrent store and reclaim, which interleave as follows:
zswap_frontswap_store() shrink_worker()
zs_malloc() zs_zpool_shrink()
spin_lock(&pool->lock) zs_reclaim_page()
zspage = find_get_zspage()
spin_unlock(&pool->lock)
spin_lock(&pool->lock)
zspage = list_first_entry(&pool->lru)
list_del(&zspage->lru)
zspage->lru.next = LIST_POISON1
zspage->lru.prev = LIST_POISON2
spin_unlock(&pool->lock)
zs_map_object()
spin_lock(&pool->lock)
if (!list_empty(&zspage->lru))
list_del(&zspage->lru)
CHECK_DATA_CORRUPTION(next == LIST_POISON1) /* BOOM */
With the current upstream code, this issue rarely happens. zswap only
triggers writeback when the pool is already full, at which point all
further store attempts are short-circuited. This creates an implicit
pseudo-serialization between reclaim and store. I am working on a new
zswap shrinking mechanism, which makes interleaving reclaim and store
more likely, exposing this bug.
zbud and z3fold do not have this problem, because they perform the LRU
list update in the alloc function, while still holding the pool's lock.
This patch fixes the aforementioned bug by moving the LRU update back to
zs_malloc(), analogous to zbud and z3fold.
Link: https://lkml.kernel.org/r/20230505185054.2417128-1-nphamcs@gmail.com
Fixes: 64f768c6b32e ("zsmalloc: add a LRU to zs_pool to keep track of zspages in LRU order")
Signed-off-by: Nhat Pham <nphamcs(a)gmail.com>
Suggested-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Acked-by: Minchan Kim <minchan(a)kernel.org>
Cc: Dan Streetman <ddstreet(a)ieee.org>
Cc: Nitin Gupta <ngupta(a)vflare.org>
Cc: Seth Jennings <sjenning(a)redhat.com>
Cc: Vitaly Wool <vitaly.wool(a)konsulko.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zsmalloc.c | 36 +++++++++---------------------------
1 file changed, 9 insertions(+), 27 deletions(-)
--- a/mm/zsmalloc.c~zsmalloc-move-lru-update-from-zs_map_object-to-zs_malloc
+++ a/mm/zsmalloc.c
@@ -1331,31 +1331,6 @@ void *zs_map_object(struct zs_pool *pool
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
-#ifdef CONFIG_ZPOOL
- /*
- * Move the zspage to front of pool's LRU.
- *
- * Note that this is swap-specific, so by definition there are no ongoing
- * accesses to the memory while the page is swapped out that would make
- * it "hot". A new entry is hot, then ages to the tail until it gets either
- * written back or swaps back in.
- *
- * Furthermore, map is also called during writeback. We must not put an
- * isolated page on the LRU mid-reclaim.
- *
- * As a result, only update the LRU when the page is mapped for write
- * when it's first instantiated.
- *
- * This is a deviation from the other backends, which perform this update
- * in the allocation function (zbud_alloc, z3fold_alloc).
- */
- if (mm == ZS_MM_WO) {
- if (!list_empty(&zspage->lru))
- list_del(&zspage->lru);
- list_add(&zspage->lru, &pool->lru);
- }
-#endif
-
/*
* migration cannot move any zpages in this zspage. Here, pool->lock
* is too heavy since callers would take some time until they calls
@@ -1525,9 +1500,8 @@ unsigned long zs_malloc(struct zs_pool *
fix_fullness_group(class, zspage);
record_obj(handle, obj);
class_stat_inc(class, ZS_OBJS_INUSE, 1);
- spin_unlock(&pool->lock);
- return handle;
+ goto out;
}
spin_unlock(&pool->lock);
@@ -1550,6 +1524,14 @@ unsigned long zs_malloc(struct zs_pool *
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
+out:
+#ifdef CONFIG_ZPOOL
+ /* Add/move zspage to beginning of LRU */
+ if (!list_empty(&zspage->lru))
+ list_del(&zspage->lru);
+ list_add(&zspage->lru, &pool->lru);
+#endif
+
spin_unlock(&pool->lock);
return handle;
_
Patches currently in -mm which might be from nphamcs(a)gmail.com are
workingset-refactor-lru-refault-to-expose-refault-recency-check.patch
cachestat-implement-cachestat-syscall.patch
cachestat-implement-cachestat-syscall-fix.patch
cachestat-wire-up-cachestat-for-other-architectures.patch
cachestat-wire-up-cachestat-for-other-architectures-fix.patch
selftests-add-selftests-for-cachestat.patch
The quilt patch titled
Subject: maple_tree: make maple state reusable after mas_empty_area()
has been removed from the -mm tree. Its filename was
maple_tree-make-maple-state-reusable-after-mas_empty_area.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Peng Zhang <zhangpeng.00(a)bytedance.com>
Subject: maple_tree: make maple state reusable after mas_empty_area()
Date: Fri, 5 May 2023 22:58:29 +0800
Make mas->min and mas->max point to a node range instead of a leaf entry
range. This allows mas to still be usable after mas_empty_area() returns.
Users would get unexpected results from other operations on the maple
state after calling the affected function.
For example, x86 MAP_32BIT mmap() acts as if there is no suitable gap when
there should be one.
Link: https://lkml.kernel.org/r/20230505145829.74574-1-zhangpeng.00@bytedance.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Peng Zhang <zhangpeng.00(a)bytedance.com>
Reported-by: "Edgecombe, Rick P" <rick.p.edgecombe(a)intel.com>
Reported-by: Tad <support(a)spotco.us>
Reported-by: Michael Keyes <mgkeyes(a)vigovproductions.net>
Link: https://lore.kernel.org/linux-mm/32f156ba80010fd97dbaf0a0cdfc84366608624d.c…
Link: https://lore.kernel.org/linux-mm/e6108286ac025c268964a7ead3aab9899f9bc6e9.c…
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
--- a/lib/maple_tree.c~maple_tree-make-maple-state-reusable-after-mas_empty_area
+++ a/lib/maple_tree.c
@@ -5317,15 +5317,9 @@ int mas_empty_area(struct ma_state *mas,
mt = mte_node_type(mas->node);
pivots = ma_pivots(mas_mn(mas), mt);
- if (offset)
- mas->min = pivots[offset - 1] + 1;
-
- if (offset < mt_pivots[mt])
- mas->max = pivots[offset];
-
- if (mas->index < mas->min)
- mas->index = mas->min;
-
+ min = mas_safe_min(mas, pivots, offset);
+ if (mas->index < min)
+ mas->index = min;
mas->last = mas->index + size - 1;
return 0;
}
_
Patches currently in -mm which might be from zhangpeng.00(a)bytedance.com are
maple_tree-fix-potential-out-of-bounds-access-in-mas_wr_end_piv.patch
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: ce0b15d11ad837fbacc5356941712218e38a0a83
Gitweb: https://git.kernel.org/tip/ce0b15d11ad837fbacc5356941712218e38a0a83
Author: Dave Hansen <dave.hansen(a)linux.intel.com>
AuthorDate: Tue, 16 May 2023 12:24:25 -07:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Wed, 17 May 2023 08:55:02 -07:00
x86/mm: Avoid incomplete Global INVLPG flushes
The INVLPG instruction is used to invalidate TLB entries for a
specified virtual address. When PCIDs are enabled, INVLPG is supposed
to invalidate TLB entries for the specified address for both the
current PCID *and* Global entries. (Note: Only kernel mappings set
Global=1.)
Unfortunately, some INVLPG implementations can leave Global
translations unflushed when PCIDs are enabled.
As a workaround, never enable PCIDs on affected processors.
I expect there to eventually be microcode mitigations to replace this
software workaround. However, the exact version numbers where that
will happen are not known today. Once the version numbers are set in
stone, the processor list can be tweaked to only disable PCIDs on
affected processors with affected microcode.
Note: if anyone wants a quick fix that doesn't require patching, just
stick 'nopcid' on your kernel command-line.
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
---
arch/x86/mm/init.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3cdac0f..8192452 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -9,6 +9,7 @@
#include <linux/sched/task.h>
#include <asm/set_memory.h>
+#include <asm/cpu_device_id.h>
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/page.h>
@@ -261,6 +262,24 @@ static void __init probe_page_size_mask(void)
}
}
+#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \
+ .family = 6, \
+ .model = _model, \
+ }
+/*
+ * INVLPG may not properly flush Global entries
+ * on these CPUs when PCIDs are enabled.
+ */
+static const struct x86_cpu_id invlpg_miss_ids[] = {
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
+ {}
+};
+
static void setup_pcid(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
@@ -269,6 +288,12 @@ static void setup_pcid(void)
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
+ if (x86_match_cpu(invlpg_miss_ids)) {
+ pr_info("Incomplete global flushes, disabling PCID");
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ return;
+ }
+
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() -- the
It seems vma merging with uffd paths is broken with either
register/unregister, where right now we can feed wrong parameters to
vma_merge() and it's found by recent patch which moved asserts upwards in
vma_merge() by Lorenzo Stoakes:
https://lore.kernel.org/all/ZFunF7DmMdK05MoF@FVFF77S0Q05N.cambridge.arm.com/
It's possible that "start" is contained within vma but not clamped to its
start. We need to convert this into either "cannot merge" case or "can
merge" case 4 which permits subdivision of prev by assigning vma to
prev. As we loop, each subsequent VMA will be clamped to the start.
This patch will eliminate the report and make sure vma_merge() calls will
become legal again.
One thing to mention is that the "Fixes: 29417d292bd0" below is there only
to help explain where the warning can start to trigger, the real commit to
fix should be 69dbe6daf104. Commit 29417d292bd0 helps us to identify the
issue, but unfortunately we may want to keep it in Fixes too just to ease
kernel backporters for easier tracking.
Cc: Lorenzo Stoakes <lstoakes(a)gmail.com>
Cc: Mike Rapoport (IBM) <rppt(a)kernel.org>
Cc: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Mark Rutland <mark.rutland(a)arm.com>
Reviewed-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Fixes: 29417d292bd0 ("mm/mmap/vma_merge: always check invariants")
Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs")
Closes: https://lore.kernel.org/all/ZFunF7DmMdK05MoF@FVFF77S0Q05N.cambridge.arm.com/
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
fs/userfaultfd.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0fd96d6e39ce..17c8c345dac4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1459,6 +1459,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
ret = 0;
for_each_vma_range(vmi, vma, end) {
@@ -1625,6 +1627,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
+
ret = 0;
for_each_vma_range(vmi, vma, end) {
cond_resched();
--
2.39.1
v1 -> v2:
- Move "Write to VIDC_CTRL_INIT after unmasking interrupts" up and add
a Fixes tag & Cc stable
- Reword the comment in "Correct IS_V6() checks"
- Move up "media: venus: Remap bufreq fields on HFI6XX", add Fixes and
Cc stable
- Use better English in "Use newly-introduced hfi_buffer_requirements
accessors" commit message
- Mention "Restrict writing SCIACMDARG3 to Venus V1/V2" doesn't seem to
regress SM8250 in the commit message
- Pick up tags (note: I capitalized the R in Dikshita's 'reviewed-by'
and removed one occurrence of random '**' to make sure review tools
like b4 don't go crazy)
- Handle AR50_LITE in "Assign registers based on VPU version"
- Drop /* VPUn */ comments, they're invalid as explained by Vikash
- Take a different approach to the sys_idle problem in patch 1
v1: https://lore.kernel.org/r/20230228-topic-venus-v1-0-58c2c88384e9@linaro.org
Currently upstream assumes all (well, almost all - see 7280 or CrOS
specific checks) Venus implementations using the same version of the
Hardware Firmware Interface can be treated the same way. This is
however not the case.
This series tries to introduce the groundwork to start differentiating
them based on the VPU (Video Processing Unit) hardware type, fixes a
couple of issues that were an effect of that generalized assumption
and lays the foundation for supporting 8150 (IRIS1) and SM6115/QCM2290
(AR50 Lite), which will hopefully come soon.
Tested on 8250, but pretty please test it on your boards too!
Signed-off-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
---
Konrad Dybcio (18):
media: venus: hfi_venus: Only consider sys_idle_indicator on V1
media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts
media: venus: Remap bufreq fields on HFI6XX
media: venus: Introduce VPU version distinction
media: venus: Add vpu_version to most SoCs
media: venus: firmware: Leave a clue for homegrown porters
media: venus: hfi_venus: Sanitize venus_boot_core() per-VPU-version
media: venus: core: Assign registers based on VPU version
media: venus: hfi_venus: Fix version checks in venus_halt_axi()
media: venus: hfi_venus: Fix version checks in venus_isr()
media: venus: hfi_venus: Fix version check in venus_cpu_and_video_core_idle()
media: venus: hfi_venus: Fix version check in venus_cpu_idle_and_pc_ready()
media: venus: firmware: Correct IS_V6() checks
media: venus: hfi_platform: Check vpu_version instead of device compatible
media: venus: vdec: Fix version check in vdec_set_work_route()
media: venus: Introduce accessors for remapped hfi_buffer_reqs members
media: venus: Use newly-introduced hfi_buffer_requirements accessors
media: venus: hfi_venus: Restrict writing SCIACMDARG3 to Venus V1/V2
drivers/media/platform/qcom/venus/core.c | 11 ++--
drivers/media/platform/qcom/venus/core.h | 15 ++++++
drivers/media/platform/qcom/venus/firmware.c | 19 +++++--
drivers/media/platform/qcom/venus/helpers.c | 7 +--
drivers/media/platform/qcom/venus/hfi_helper.h | 61 +++++++++++++++++++---
drivers/media/platform/qcom/venus/hfi_msgs.c | 2 +-
.../media/platform/qcom/venus/hfi_plat_bufs_v6.c | 22 ++++----
drivers/media/platform/qcom/venus/hfi_platform.c | 2 +-
drivers/media/platform/qcom/venus/hfi_venus.c | 45 ++++++++--------
drivers/media/platform/qcom/venus/vdec.c | 10 ++--
drivers/media/platform/qcom/venus/vdec_ctrls.c | 2 +-
drivers/media/platform/qcom/venus/venc.c | 4 +-
drivers/media/platform/qcom/venus/venc_ctrls.c | 2 +-
13 files changed, 138 insertions(+), 64 deletions(-)
---
base-commit: 92e815cf07ed24ee1c51b122f24ffcf2964b4b13
change-id: 20230228-topic-venus-70ea3bc76688
Best regards,
--
Konrad Dybcio <konrad.dybcio(a)linaro.org>
The patch titled
Subject: mm/uffd: fix vma operation where start addr cuts part of vma
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-uffd-fix-vma-operation-where-start-addr-cuts-part-of-vma.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/uffd: fix vma operation where start addr cuts part of vma
Date: Wed, 17 May 2023 15:09:15 -0400
Patch series "mm/uffd: Fix vma merge/split", v2.
This series contains two patches that fix vma merge/split for userfaultfd
on two separate issues.
Patch 1 fixes a regression since 6.1+ due to something we overlooked when
converting to maple tree apis. The plan is we use patch 1 to replace the
commit "2f628010799e (mm: userfaultfd: avoid passing an invalid range to
vma_merge())" in mm-hostfixes-unstable tree if possible, so as to bring
uffd vma operations back aligned with the rest code again.
Patch 2 fixes a long standing issue that vma can be left unmerged even if
we can for either uffd register or unregister.
Many thanks to Lorenzo on either noticing this issue from the assert
movement patch, looking at this problem, and also provided a reproducer on
the unmerged vma issue [1].
[1] https://gist.github.com/lorenzo-stoakes/a11a10f5f479e7a977fc456331266e0e
This patch (of 2):
It seems vma merging with uffd paths is broken with either
register/unregister, where right now we can feed wrong parameters to
vma_merge() and it's found by recent patch which moved asserts upwards in
vma_merge() by Lorenzo Stoakes:
https://lore.kernel.org/all/ZFunF7DmMdK05MoF@FVFF77S0Q05N.cambridge.arm.com/
It's possible that "start" is contained within vma but not clamped to its
start. We need to convert this into either "cannot merge" case or "can
merge" case 4 which permits subdivision of prev by assigning vma to prev.
As we loop, each subsequent VMA will be clamped to the start.
This patch will eliminate the report and make sure vma_merge() calls will
become legal again.
One thing to mention is that the "Fixes: 29417d292bd0" below is there only
to help explain where the warning can start to trigger, the real commit to
fix should be 69dbe6daf104. Commit 29417d292bd0 helps us to identify the
issue, but unfortunately we may want to keep it in Fixes too just to ease
kernel backporters for easier tracking.
Link: https://lkml.kernel.org/r/20230517190916.3429499-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20230517190916.3429499-2-peterx@redhat.com
Fixes: 29417d292bd0 ("mm/mmap/vma_merge: always check invariants")
Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reported-by: Mark Rutland <mark.rutland(a)arm.com>
Reviewed-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Closes: https://lore.kernel.org/all/ZFunF7DmMdK05MoF@FVFF77S0Q05N.cambridge.arm.com/
Cc: Lorenzo Stoakes <lstoakes(a)gmail.com>
Cc: Mike Rapoport (IBM) <rppt(a)kernel.org>
Cc: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/userfaultfd.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/fs/userfaultfd.c~mm-uffd-fix-vma-operation-where-start-addr-cuts-part-of-vma
+++ a/fs/userfaultfd.c
@@ -1459,6 +1459,8 @@ static int userfaultfd_register(struct u
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
ret = 0;
for_each_vma_range(vmi, vma, end) {
@@ -1625,6 +1627,9 @@ static int userfaultfd_unregister(struct
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
+
ret = 0;
for_each_vma_range(vmi, vma, end) {
cond_resched();
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-uffd-fix-vma-operation-where-start-addr-cuts-part-of-vma.patch
mm-uffd-allow-vma-to-merge-as-much-as-possible.patch
We used to not pass in the pgoff correctly when register/unregister uffd
regions, it caused incorrect behavior on vma merging and can cause
mergeable vmas being separate after ioctls return.
For example, when we have:
vma1(range 0-9, with uffd), vma2(range 10-19, no uffd)
Then someone unregisters uffd on range (5-9), it should logically become:
vma1(range 0-4, with uffd), vma2(range 5-19, no uffd)
But with current code we'll have:
vma1(range 0-4, with uffd), vma3(range 5-9, no uffd), vma2(range 10-19, no uffd)
This patch allows such merge to happen correctly before ioctl returns.
This behavior seems to have existed since the 1st day of uffd. Since pgoff
for vma_merge() is only used to identify the possibility of vma merging,
meanwhile here what we did was always passing in a pgoff smaller than what
we should, so there should have no other side effect besides not merging
it. Let's still tentatively copy stable for this, even though I don't see
anything will go wrong besides vma being split (which is mostly not user
visible).
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Rapoport (IBM) <rppt(a)kernel.org>
Cc: linux-stable <stable(a)vger.kernel.org>
Reported-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Acked-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
fs/userfaultfd.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 17c8c345dac4..4e800bb7d2ab 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1332,6 +1332,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
bool basic_ioctls;
unsigned long start, end, vma_end;
struct vma_iterator vmi;
+ pgoff_t pgoff;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1484,8 +1485,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_end = min(end, vma->vm_end);
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ vma->anon_vma, vma->vm_file, pgoff,
vma_policy(vma),
((struct vm_userfaultfd_ctx){ ctx }),
anon_vma_name(vma));
@@ -1565,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
unsigned long start, end, vma_end;
const void __user *buf = (void __user *)arg;
struct vma_iterator vmi;
+ pgoff_t pgoff;
ret = -EFAULT;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
@@ -1667,8 +1670,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
uffd_wp_range(vma, start, vma_end - start, false);
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ vma->anon_vma, vma->vm_file, pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
--
2.39.1
It seems vma merging with uffd paths is broken with either
register/unregister, where right now we can feed wrong parameters to
vma_merge() and it's found by recent patch which moved asserts upwards in
vma_merge() by Lorenzo Stoakes:
https://lore.kernel.org/all/ZFunF7DmMdK05MoF@FVFF77S0Q05N.cambridge.arm.com/
The problem is in the current code base we didn't fixup "prev" for the case
where "start" address can be within the "prev" vma section. In that case
we should have "prev" points to the current vma rather than the previous
one when feeding to vma_merge().
This patch will eliminate the report and make sure vma_merge() calls will
become legal again.
One thing to mention is that the "Fixes: 29417d292bd0" below is there only
to help explain where the warning can start to trigger, the real commit to
fix should be 69dbe6daf104. Commit 29417d292bd0 helps us to identify the
issue, but unfortunately we may want to keep it in Fixes too just to ease
kernel backporters for easier tracking.
Cc: Lorenzo Stoakes <lstoakes(a)gmail.com>
Cc: Mike Rapoport (IBM) <rppt(a)kernel.org>
Cc: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Mark Rutland <mark.rutland(a)arm.com>
Fixes: 29417d292bd0 ("mm/mmap/vma_merge: always check invariants")
Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs")
Closes: https://lore.kernel.org/all/ZFunF7DmMdK05MoF@FVFF77S0Q05N.cambridge.arm.com/
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
fs/userfaultfd.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0fd96d6e39ce..17c8c345dac4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1459,6 +1459,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
ret = 0;
for_each_vma_range(vmi, vma, end) {
@@ -1625,6 +1627,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
+
ret = 0;
for_each_vma_range(vmi, vma, end) {
cond_resched();
--
2.39.1
We used to not pass in the pgoff correctly when register/unregister uffd
regions, it caused incorrect behavior on vma merging and can cause
mergeable vmas being separate after ioctls return.
For example, when we have:
vma1(range 0-9, with uffd), vma2(range 10-19, no uffd)
Then someone unregisters uffd on range (5-9), it should logically become:
vma1(range 0-4, with uffd), vma2(range 5-19, no uffd)
But with current code we'll have:
vma1(range 0-4, with uffd), vma3(range 5-9, no uffd), vma2(range 10-19, no uffd)
This patch allows such merge to happen correctly before ioctl returns.
This behavior seems to have existed since the 1st day of uffd. Since pgoff
for vma_merge() is only used to identify the possibility of vma merging,
meanwhile here what we did was always passing in a pgoff smaller than what
we should, so there should have no other side effect besides not merging
it. Let's still tentatively copy stable for this, even though I don't see
anything will go wrong besides vma being split (which is mostly not user
visible).
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Rapoport (IBM) <rppt(a)kernel.org>
Cc: linux-stable <stable(a)vger.kernel.org>
Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
fs/userfaultfd.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 17c8c345dac4..4e800bb7d2ab 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1332,6 +1332,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
bool basic_ioctls;
unsigned long start, end, vma_end;
struct vma_iterator vmi;
+ pgoff_t pgoff;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1484,8 +1485,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_end = min(end, vma->vm_end);
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ vma->anon_vma, vma->vm_file, pgoff,
vma_policy(vma),
((struct vm_userfaultfd_ctx){ ctx }),
anon_vma_name(vma));
@@ -1565,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
unsigned long start, end, vma_end;
const void __user *buf = (void __user *)arg;
struct vma_iterator vmi;
+ pgoff_t pgoff;
ret = -EFAULT;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
@@ -1667,8 +1670,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
uffd_wp_range(vma, start, vma_end - start, false);
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ vma->anon_vma, vma->vm_file, pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
--
2.39.1
txs may be dropped if the frame is aggregated in AMSDU. When the problem
shows up, some SKBs would be hold in driver to cause network stopped
temporarily. Even if the problem can be recovered by txs timeout handling,
mt7921 still need to disable txs in AMSDU to avoid this issue.
Cc: stable(a)vger.kernel.org
Fixes: 163f4d22c118 ("mt76: mt7921: add MAC support")
Reviewed-by: Shayne Chen <shayne.chen(a)mediatek.com>
Signed-off-by: Deren Wu <deren.wu(a)mediatek.com>
---
v2: * update operation for booleans, not bitfields (suggested by Simon Horman)
* rectify fixes tag for downstream backport
---
drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index ee0fbfcd07d6..a548b8debf5f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -495,6 +495,7 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi,
BSS_CHANGED_BEACON_ENABLED));
bool inband_disc = !!(changed & (BSS_CHANGED_UNSOL_BCAST_PROBE_RESP |
BSS_CHANGED_FILS_DISCOVERY));
+ bool amsdu_en = wcid->amsdu;
if (vif) {
struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv;
@@ -554,12 +555,14 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi,
txwi[4] = 0;
val = FIELD_PREP(MT_TXD5_PID, pid);
- if (pid >= MT_PACKET_ID_FIRST)
+ if (pid >= MT_PACKET_ID_FIRST) {
val |= MT_TXD5_TX_STATUS_HOST;
+ amsdu_en = amsdu_en && !is_mt7921(dev);
+ }
txwi[5] = cpu_to_le32(val);
txwi[6] = 0;
- txwi[7] = wcid->amsdu ? cpu_to_le32(MT_TXD7_HW_AMSDU) : 0;
+ txwi[7] = amsdu_en ? cpu_to_le32(MT_TXD7_HW_AMSDU) : 0;
if (is_8023)
mt76_connac2_mac_write_txwi_8023(txwi, skb, wcid);
--
2.18.0
Many places in the kernel write the Link Control and Root Control PCI
Express Capability Registers without proper concurrency control and
this could result in losing the changes one of the writers intended to
make.
Add pcie_cap_lock spinlock into the struct pci_dev and use it to
protect bit changes made in the RMW capability accessors. Protect only
a selected set of registers by differentiating the RMW accessor
internally to locked/unlocked variants using a wrapper which has the
same signature as pcie_capability_clear_and_set_word(). As the
Capability Register (pos) given to the wrapper is always a constant,
the compiler should be able to simplify all the dead-code away.
The RMW locking is only added to pcie_capability_clear_and_set_word()
because so far only the Link Control Register (ASPM, hotplug, various
drivers) and the Root Control Register (AER & PME) require RMW locking.
Fixes: c7f486567c1d ("PCI PM: PCIe PME root port service driver")
Fixes: f12eb72a268b ("PCI/ASPM: Use PCI Express Capability accessors")
Fixes: 7d715a6c1ae5 ("PCI: add PCI Express ASPM support")
Fixes: affa48de8417 ("staging/rdma/hfi1: Add support for enabling/disabling PCIe ASPM")
Fixes: 849a9366cba9 ("misc: rtsx: Add support new chip rts5228 mmc: rtsx: Add support MMC_CAP2_NO_MMC")
Fixes: 3d1e7aa80d1c ("misc: rtsx: Use pcie_capability_clear_and_set_word() for PCI_EXP_LNKCTL")
Fixes: c0e5f4e73a71 ("misc: rtsx: Add support for RTS5261")
Fixes: 3df4fce739e2 ("misc: rtsx: separate aspm mode into MODE_REG and MODE_CFG")
Fixes: 121e9c6b5c4c ("misc: rtsx: modify and fix init_hw function")
Fixes: 19f3bd548f27 ("mfd: rtsx: Remove LCTLR defination")
Fixes: 773ccdfd9cc6 ("mfd: rtsx: Read vendor setting from config space")
Fixes: 8275b77a1513 ("mfd: rts5249: Add support for RTS5250S power saving")
Fixes: 5da4e04ae480 ("misc: rtsx: Add support for RTS5260")
Fixes: 0f49bfbd0f2e ("tg3: Use PCI Express Capability accessors")
Fixes: 5e7dfd0fb94a ("tg3: Prevent corruption at 10 / 100Mbps w CLKREQ")
Fixes: b726e493e8dc ("r8169: sync existing 8168 device hardware start sequences with vendor driver")
Fixes: e6de30d63eb1 ("r8169: more 8168dp support.")
Fixes: 8a06127602de ("Bluetooth: hci_bcm4377: Add new driver for BCM4377 PCIe boards")
Fixes: 6f461f6c7c96 ("e1000e: enable/disable ASPM L0s and L1 and ERT according to hardware errata")
Fixes: 1eae4eb2a1c7 ("e1000e: Disable L1 ASPM power savings for 82573 mobile variants")
Fixes: 8060e169e02f ("ath9k: Enable extended synch for AR9485 to fix L0s recovery issue")
Fixes: 69ce674bfa69 ("ath9k: do btcoex ASPM disabling at initialization time")
Fixes: f37f05503575 ("mt76: mt76x2e: disable pcie_aspm by default")
Suggested-by: Lukas Wunner <lukas(a)wunner.de>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
drivers/pci/access.c | 20 +++++++++++++++++---
drivers/pci/probe.c | 1 +
include/linux/pci.h | 34 ++++++++++++++++++++++++++++++++--
3 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 3c230ca3de58..0b2e90d2f04f 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -497,8 +497,8 @@ int pcie_capability_write_dword(struct pci_dev *dev, int pos, u32 val)
}
EXPORT_SYMBOL(pcie_capability_write_dword);
-int pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
- u16 clear, u16 set)
+int pcie_capability_clear_and_set_word_unlocked(struct pci_dev *dev, int pos,
+ u16 clear, u16 set)
{
int ret;
u16 val;
@@ -512,7 +512,21 @@ int pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
return ret;
}
-EXPORT_SYMBOL(pcie_capability_clear_and_set_word);
+EXPORT_SYMBOL(pcie_capability_clear_and_set_word_unlocked);
+
+int pcie_capability_clear_and_set_word_locked(struct pci_dev *dev, int pos,
+ u16 clear, u16 set)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&dev->pcie_cap_lock, flags);
+ ret = pcie_capability_clear_and_set_word_unlocked(dev, pos, clear, set);
+ spin_unlock_irqrestore(&dev->pcie_cap_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(pcie_capability_clear_and_set_word_locked);
int pcie_capability_clear_and_set_dword(struct pci_dev *dev, int pos,
u32 clear, u32 set)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 0b2826c4a832..53ac0d3287a8 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2318,6 +2318,7 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
.end = -1,
};
+ spin_lock_init(&dev->pcie_cap_lock);
#ifdef CONFIG_PCI_MSI
raw_spin_lock_init(&dev->msi_lock);
#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 60b8772b5bd4..ab7682ed172f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -467,6 +467,7 @@ struct pci_dev {
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
+ spinlock_t pcie_cap_lock; /* Protects RMW ops in capability accessors */
u32 saved_config_space[16]; /* Config space saved at suspend time */
struct hlist_head saved_cap_space;
int rom_attr_enabled; /* Display of ROM attribute enabled? */
@@ -1217,11 +1218,40 @@ int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val);
int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val);
int pcie_capability_write_dword(struct pci_dev *dev, int pos, u32 val);
-int pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
- u16 clear, u16 set);
+int pcie_capability_clear_and_set_word_unlocked(struct pci_dev *dev, int pos,
+ u16 clear, u16 set);
+int pcie_capability_clear_and_set_word_locked(struct pci_dev *dev, int pos,
+ u16 clear, u16 set);
int pcie_capability_clear_and_set_dword(struct pci_dev *dev, int pos,
u32 clear, u32 set);
+/**
+ * pcie_capability_clear_and_set_word - RMW accessor for PCI Express Capability Registers
+ * @dev: PCI device structure of the PCI Express device
+ * @pos: PCI Express Capability Register
+ * @clear: Clear bitmask
+ * @set: Set bitmask
+ *
+ * Perform a Read-Modify-Write (RMW) operation using @clear and @set
+ * bitmasks on PCI Express Capability Register at @pos. Certain PCI Express
+ * Capability Registers are accessed concurrently in RMW fashion, hence
+ * require locking which is handled transparently to the caller.
+ */
+static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev,
+ int pos,
+ u16 clear, u16 set)
+{
+ switch (pos) {
+ case PCI_EXP_LNKCTL:
+ case PCI_EXP_RTCTL:
+ return pcie_capability_clear_and_set_word_locked(dev, pos,
+ clear, set);
+ default:
+ return pcie_capability_clear_and_set_word_unlocked(dev, pos,
+ clear, set);
+ }
+}
+
static inline int pcie_capability_set_word(struct pci_dev *dev, int pos,
u16 set)
{
--
2.30.2