There have been reports of races in evtchn_from_irq() where the info pointer has been NULL.
Avoid that case by testing info before dereferencing it.
In order to avoid accessing a just freed info structure do the kfree() via kfree_rcu().
Cc: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Signed-off-by: Juergen Gross jgross@suse.com --- drivers/xen/events/events_base.c | 10 ++++++++-- drivers/xen/events/events_internal.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 499eff7d3f65..838762fe3d6e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -247,10 +247,16 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { + struct irq_info *info; + if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) return 0;
- return info_for_irq(irq)->evtchn; + info = info_for_irq(irq); + if (info == NULL) + return 0; + + return info->evtchn; }
unsigned irq_from_evtchn(unsigned int evtchn) @@ -436,7 +442,7 @@ static void xen_free_irq(unsigned irq)
WARN_ON(info->refcnt > 0);
- kfree(info); + kfree_rcu(info, rcu);
/* Legacy IRQ descriptors are managed by the arch. */ if (irq < nr_legacy_irqs()) diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 82938cff6c7a..c421055843c8 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -7,6 +7,8 @@ #ifndef __EVENTS_INTERNAL_H__ #define __EVENTS_INTERNAL_H__
+#include <linux/rcupdate.h> + /* Interrupt types. */ enum xen_irq_type { IRQT_UNBOUND = 0, @@ -30,6 +32,7 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; + struct rcu_head rcu; int refcnt; enum xen_irq_type type; /* type */ unsigned irq;
On 3/19/20 3:14 AM, Juergen Gross wrote:
There have been reports of races in evtchn_from_irq() where the info pointer has been NULL.
Do you have an example of how this race happens (and how moving kfree to rcu will help)?
(I am mostly wondering whether we'd be masking a more serious problem)
-boris
On 3/19/20 3:14 AM, Juergen Gross wrote:
There have been reports of races in evtchn_from_irq() where the info pointer has been NULL.
Avoid that case by testing info before dereferencing it.
In order to avoid accessing a just freed info structure do the kfree() via kfree_rcu().
Looks like noone ever responded to this.
This change looks fine but is there a background on the problem? I looked in the archives and didn't find the relevant discussion.
-boris
Cc: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Signed-off-by: Juergen Gross jgross@suse.com
drivers/xen/events/events_base.c | 10 ++++++++-- drivers/xen/events/events_internal.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 499eff7d3f65..838762fe3d6e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -247,10 +247,16 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) {
- struct irq_info *info;
- if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) return 0;
- return info_for_irq(irq)->evtchn;
- info = info_for_irq(irq);
- if (info == NULL)
return 0;
- return info->evtchn;
} unsigned irq_from_evtchn(unsigned int evtchn) @@ -436,7 +442,7 @@ static void xen_free_irq(unsigned irq) WARN_ON(info->refcnt > 0);
- kfree(info);
- kfree_rcu(info, rcu);
/* Legacy IRQ descriptors are managed by the arch. */ if (irq < nr_legacy_irqs()) diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 82938cff6c7a..c421055843c8 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -7,6 +7,8 @@ #ifndef __EVENTS_INTERNAL_H__ #define __EVENTS_INTERNAL_H__ +#include <linux/rcupdate.h>
/* Interrupt types. */ enum xen_irq_type { IRQT_UNBOUND = 0, @@ -30,6 +32,7 @@ enum xen_irq_type { */ struct irq_info { struct list_head list;
- struct rcu_head rcu; int refcnt; enum xen_irq_type type; /* type */ unsigned irq;
On Thu, May 21, 2020 at 01:22:03PM -0400, Boris Ostrovsky wrote:
On 3/19/20 3:14 AM, Juergen Gross wrote:
There have been reports of races in evtchn_from_irq() where the info pointer has been NULL.
Avoid that case by testing info before dereferencing it.
In order to avoid accessing a just freed info structure do the kfree() via kfree_rcu().
Looks like noone ever responded to this.
This change looks fine but is there a background on the problem? I looked in the archives and didn't find the relevant discussion.
Here is the original bug report: https://xen.markmail.org/thread/44apwkwzeme4uavo
On 5/21/20 2:46 PM, Marek Marczykowski-Górecki wrote:
On Thu, May 21, 2020 at 01:22:03PM -0400, Boris Ostrovsky wrote:
On 3/19/20 3:14 AM, Juergen Gross wrote:
There have been reports of races in evtchn_from_irq() where the info pointer has been NULL.
Avoid that case by testing info before dereferencing it.
In order to avoid accessing a just freed info structure do the kfree() via kfree_rcu().
Looks like noone ever responded to this.
This change looks fine but is there a background on the problem? I looked in the archives and didn't find the relevant discussion.
Here is the original bug report: https://xen.markmail.org/thread/44apwkwzeme4uavo
Thanks. Do we know what the race is? Is it because an event is being delivered from a dying guest who is in the process of tearing down event channels?
-boris
On 21.05.20 23:57, Boris Ostrovsky wrote:
On 5/21/20 2:46 PM, Marek Marczykowski-Górecki wrote:
On Thu, May 21, 2020 at 01:22:03PM -0400, Boris Ostrovsky wrote:
On 3/19/20 3:14 AM, Juergen Gross wrote:
There have been reports of races in evtchn_from_irq() where the info pointer has been NULL.
Avoid that case by testing info before dereferencing it.
In order to avoid accessing a just freed info structure do the kfree() via kfree_rcu().
Looks like noone ever responded to this.
This change looks fine but is there a background on the problem? I looked in the archives and didn't find the relevant discussion.
Here is the original bug report: https://xen.markmail.org/thread/44apwkwzeme4uavo
Thanks. Do we know what the race is? Is it because an event is being delivered from a dying guest who is in the process of tearing down event channels?
Missing synchronization between event channel (de-)allocation and handling.
I have a patch sitting here, just didn't have the time to do some proper testing and sending it out.
Juergen
linux-stable-mirror@lists.linaro.org