This driver's remove path calls cancel_delayed_work().
However, that function does not wait until the work function
finishes. This could mean that the work function is still
running after the driver's remove function has finished,
which would result in a use-after-free.
Fix by calling cancel_delayed_work_sync(), which ensures that
that the work is properly cancelled, no longer running, and
unable to re-schedule itself.
This issue was detected with the help of Coccinelle.
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Sven Van Asbroeck <TheSven73(a)gmail.com>
---
drivers/power/supply/ltc2941-battery-gauge.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c
index da49436176cd..30a9014b2f95 100644
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -449,7 +449,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client)
{
struct ltc294x_info *info = i2c_get_clientdata(client);
- cancel_delayed_work(&info->work);
+ cancel_delayed_work_sync(&info->work);
power_supply_unregister(info->supply);
return 0;
}
--
2.17.1
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07928d9bfc81640bab36f5190e8725894d93b659 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 19 Nov 2019 13:17:31 +0800
Subject: [PATCH] padata: Remove broken queue flushing
The function padata_flush_queues is fundamentally broken because
it cannot force padata users to complete the request that is
underway. IOW padata has to passively wait for the completion
of any outstanding work.
As it stands flushing is used in two places. Its use in padata_stop
is simply unnecessary because nothing depends on the queues to
be flushed afterwards.
The other use in padata_replace is more substantial as we depend
on it to free the old pd structure. This patch instead uses the
pd->refcnt to dynamically free the pd structure once all requests
are complete.
Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Reviewed-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/kernel/padata.c b/kernel/padata.c
index c3fec1413295..da56a235a255 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -35,6 +35,8 @@
#define MAX_OBJ_NUM 1000
+static void padata_free_pd(struct parallel_data *pd);
+
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
@@ -283,6 +285,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
struct padata_serial_queue *squeue;
struct parallel_data *pd;
LIST_HEAD(local_list);
+ int cnt;
local_bh_disable();
squeue = container_of(serial_work, struct padata_serial_queue, work);
@@ -292,6 +295,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_replace_init(&squeue->serial.list, &local_list);
spin_unlock(&squeue->serial.lock);
+ cnt = 0;
+
while (!list_empty(&local_list)) {
struct padata_priv *padata;
@@ -301,9 +306,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_del_init(&padata->list);
padata->serial(padata);
- atomic_dec(&pd->refcnt);
+ cnt++;
}
local_bh_enable();
+
+ if (atomic_sub_and_test(cnt, &pd->refcnt))
+ padata_free_pd(pd);
}
/**
@@ -440,7 +448,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_squeues(pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
- atomic_set(&pd->refcnt, 0);
+ atomic_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -466,29 +474,6 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}
-/* Flush all objects out of the padata queues. */
-static void padata_flush_queues(struct parallel_data *pd)
-{
- int cpu;
- struct padata_parallel_queue *pqueue;
- struct padata_serial_queue *squeue;
-
- for_each_cpu(cpu, pd->cpumask.pcpu) {
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
- flush_work(&pqueue->work);
- }
-
- if (atomic_read(&pd->reorder_objects))
- padata_reorder(pd);
-
- for_each_cpu(cpu, pd->cpumask.cbcpu) {
- squeue = per_cpu_ptr(pd->squeue, cpu);
- flush_work(&squeue->work);
- }
-
- BUG_ON(atomic_read(&pd->refcnt) != 0);
-}
-
static void __padata_start(struct padata_instance *pinst)
{
pinst->flags |= PADATA_INIT;
@@ -502,10 +487,6 @@ static void __padata_stop(struct padata_instance *pinst)
pinst->flags &= ~PADATA_INIT;
synchronize_rcu();
-
- get_online_cpus();
- padata_flush_queues(pinst->pd);
- put_online_cpus();
}
/* Replace the internal control structure with a new one. */
@@ -526,8 +507,8 @@ static void padata_replace(struct padata_instance *pinst,
if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
notification_mask |= PADATA_CPU_SERIAL;
- padata_flush_queues(pd_old);
- padata_free_pd(pd_old);
+ if (atomic_dec_and_test(&pd_old->refcnt))
+ padata_free_pd(pd_old);
if (notification_mask)
blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07928d9bfc81640bab36f5190e8725894d93b659 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 19 Nov 2019 13:17:31 +0800
Subject: [PATCH] padata: Remove broken queue flushing
The function padata_flush_queues is fundamentally broken because
it cannot force padata users to complete the request that is
underway. IOW padata has to passively wait for the completion
of any outstanding work.
As it stands flushing is used in two places. Its use in padata_stop
is simply unnecessary because nothing depends on the queues to
be flushed afterwards.
The other use in padata_replace is more substantial as we depend
on it to free the old pd structure. This patch instead uses the
pd->refcnt to dynamically free the pd structure once all requests
are complete.
Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Reviewed-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/kernel/padata.c b/kernel/padata.c
index c3fec1413295..da56a235a255 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -35,6 +35,8 @@
#define MAX_OBJ_NUM 1000
+static void padata_free_pd(struct parallel_data *pd);
+
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
@@ -283,6 +285,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
struct padata_serial_queue *squeue;
struct parallel_data *pd;
LIST_HEAD(local_list);
+ int cnt;
local_bh_disable();
squeue = container_of(serial_work, struct padata_serial_queue, work);
@@ -292,6 +295,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_replace_init(&squeue->serial.list, &local_list);
spin_unlock(&squeue->serial.lock);
+ cnt = 0;
+
while (!list_empty(&local_list)) {
struct padata_priv *padata;
@@ -301,9 +306,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_del_init(&padata->list);
padata->serial(padata);
- atomic_dec(&pd->refcnt);
+ cnt++;
}
local_bh_enable();
+
+ if (atomic_sub_and_test(cnt, &pd->refcnt))
+ padata_free_pd(pd);
}
/**
@@ -440,7 +448,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_squeues(pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
- atomic_set(&pd->refcnt, 0);
+ atomic_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -466,29 +474,6 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}
-/* Flush all objects out of the padata queues. */
-static void padata_flush_queues(struct parallel_data *pd)
-{
- int cpu;
- struct padata_parallel_queue *pqueue;
- struct padata_serial_queue *squeue;
-
- for_each_cpu(cpu, pd->cpumask.pcpu) {
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
- flush_work(&pqueue->work);
- }
-
- if (atomic_read(&pd->reorder_objects))
- padata_reorder(pd);
-
- for_each_cpu(cpu, pd->cpumask.cbcpu) {
- squeue = per_cpu_ptr(pd->squeue, cpu);
- flush_work(&squeue->work);
- }
-
- BUG_ON(atomic_read(&pd->refcnt) != 0);
-}
-
static void __padata_start(struct padata_instance *pinst)
{
pinst->flags |= PADATA_INIT;
@@ -502,10 +487,6 @@ static void __padata_stop(struct padata_instance *pinst)
pinst->flags &= ~PADATA_INIT;
synchronize_rcu();
-
- get_online_cpus();
- padata_flush_queues(pinst->pd);
- put_online_cpus();
}
/* Replace the internal control structure with a new one. */
@@ -526,8 +507,8 @@ static void padata_replace(struct padata_instance *pinst,
if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
notification_mask |= PADATA_CPU_SERIAL;
- padata_flush_queues(pd_old);
- padata_free_pd(pd_old);
+ if (atomic_dec_and_test(&pd_old->refcnt))
+ padata_free_pd(pd_old);
if (notification_mask)
blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07928d9bfc81640bab36f5190e8725894d93b659 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 19 Nov 2019 13:17:31 +0800
Subject: [PATCH] padata: Remove broken queue flushing
The function padata_flush_queues is fundamentally broken because
it cannot force padata users to complete the request that is
underway. IOW padata has to passively wait for the completion
of any outstanding work.
As it stands flushing is used in two places. Its use in padata_stop
is simply unnecessary because nothing depends on the queues to
be flushed afterwards.
The other use in padata_replace is more substantial as we depend
on it to free the old pd structure. This patch instead uses the
pd->refcnt to dynamically free the pd structure once all requests
are complete.
Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Reviewed-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/kernel/padata.c b/kernel/padata.c
index c3fec1413295..da56a235a255 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -35,6 +35,8 @@
#define MAX_OBJ_NUM 1000
+static void padata_free_pd(struct parallel_data *pd);
+
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
@@ -283,6 +285,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
struct padata_serial_queue *squeue;
struct parallel_data *pd;
LIST_HEAD(local_list);
+ int cnt;
local_bh_disable();
squeue = container_of(serial_work, struct padata_serial_queue, work);
@@ -292,6 +295,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_replace_init(&squeue->serial.list, &local_list);
spin_unlock(&squeue->serial.lock);
+ cnt = 0;
+
while (!list_empty(&local_list)) {
struct padata_priv *padata;
@@ -301,9 +306,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_del_init(&padata->list);
padata->serial(padata);
- atomic_dec(&pd->refcnt);
+ cnt++;
}
local_bh_enable();
+
+ if (atomic_sub_and_test(cnt, &pd->refcnt))
+ padata_free_pd(pd);
}
/**
@@ -440,7 +448,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_squeues(pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
- atomic_set(&pd->refcnt, 0);
+ atomic_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -466,29 +474,6 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}
-/* Flush all objects out of the padata queues. */
-static void padata_flush_queues(struct parallel_data *pd)
-{
- int cpu;
- struct padata_parallel_queue *pqueue;
- struct padata_serial_queue *squeue;
-
- for_each_cpu(cpu, pd->cpumask.pcpu) {
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
- flush_work(&pqueue->work);
- }
-
- if (atomic_read(&pd->reorder_objects))
- padata_reorder(pd);
-
- for_each_cpu(cpu, pd->cpumask.cbcpu) {
- squeue = per_cpu_ptr(pd->squeue, cpu);
- flush_work(&squeue->work);
- }
-
- BUG_ON(atomic_read(&pd->refcnt) != 0);
-}
-
static void __padata_start(struct padata_instance *pinst)
{
pinst->flags |= PADATA_INIT;
@@ -502,10 +487,6 @@ static void __padata_stop(struct padata_instance *pinst)
pinst->flags &= ~PADATA_INIT;
synchronize_rcu();
-
- get_online_cpus();
- padata_flush_queues(pinst->pd);
- put_online_cpus();
}
/* Replace the internal control structure with a new one. */
@@ -526,8 +507,8 @@ static void padata_replace(struct padata_instance *pinst,
if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
notification_mask |= PADATA_CPU_SERIAL;
- padata_flush_queues(pd_old);
- padata_free_pd(pd_old);
+ if (atomic_dec_and_test(&pd_old->refcnt))
+ padata_free_pd(pd_old);
if (notification_mask)
blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
The "Intel 64 and IA-32 Architectures Software Developer’s Manual
Volume 4: Model-Specific Registers" has the following table for the
values from freq_desc_byt:
000B: 083.3 MHz
001B: 100.0 MHz
010B: 133.3 MHz
011B: 116.7 MHz
100B: 080.0 MHz
Notice how for e.g the 83.3 MHz value there are 3 significant digits,
which translates to an accuracy of a 1000 ppm, where as your typical
crystal oscillator is 20 - 100 ppm, so the accuracy of the frequency
format used in the Software Developer’s Manual is not really helpful.
As far as we know Bay Trail SoCs use a 25 MHz crystal and Cherry Trail
uses a 19.2 MHz crystal, the crystal is the source clk for a root PLL
which outputs 1600 and 100 MHz. It is unclear if the root PLL outputs are
used directly by the CPU clock PLL or if there is another PLL in between.
This does not matter though, we can model the chain of PLLs as a single
PLL with a quotient equal to the quotients of all PLLs in the chain
multiplied.
So we can create a simplified model of the CPU clock setup using a
reference clock of 100 MHz plus a quotient which gets us as close to the
frequency from the SDM as possible.
For the 83.3 MHz example from above this would give us 100 MHz * 5 / 6 =
83 and 1/3 MHz, which matches exactly what has been measured on actual hw.
This commit makes the tsc_msr.c code use a simplified PLL model with a
reference clock of 100 MHz for all Bay and Cherry Trail models.
This has been tested on the following models:
CPU freq before: CPU freq after this commit:
Intel N2840 2165.800 MHz 2166.667 MHz
Intel Z3736 1332.800 MHz 1333.333 MHz
Intel Z3775 1466.300 MHz 1466.667 MHz
Intel Z8350 1440.000 MHz 1440.000 MHz
Intel Z8750 1600.000 MHz 1600.000 MHz
This fixes the time drifting by about 1 second per hour (20 - 30 seconds
per day) on (some) devices which rely on the tsc_msr.c code to determine
the TSC frequency.
Cc: stable(a)vger.kernel.org
Reported-by: Vipul Kumar <vipulk0511(a)gmail.com>
Suggested-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
-s/DSM/SDM/
-Do not refer to Merrifield / Moorefield as BYT / CHT, they only share the
CPU core design and otherwise are significantly different
---
arch/x86/kernel/tsc_msr.c | 90 ++++++++++++++++++++++++++++++++++-----
1 file changed, 80 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 95030895fffa..b10be7f4d760 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -17,6 +17,23 @@
#define MAX_NUM_FREQS 16 /* 4 bits to select the frequency */
+/*
+ * The frequency numbers in the SDM are e.g. 83.3 MHz, which does not contain a
+ * lot of accuracy which leads to clock drift. As far as we know Bay Trail SoCs
+ * use a 25 MHz crystal and Cherry Trail uses a 19.2 MHz crystal, the crystal
+ * is the source clk for a root PLL which outputs 1600 and 100 MHz. It is
+ * unclear if the root PLL outputs are used directly by the CPU clock PLL or
+ * if there is another PLL in between.
+ * This does not matter though, we can model the chain of PLLs as a single PLL
+ * with a quotient equal to the quotients of all PLLs in the chain multiplied.
+ * So we can create a simplified model of the CPU clock setup using a reference
+ * clock of 100 MHz plus a quotient which gets us as close to the frequency
+ * from the SDM as possible.
+ * For the 83.3 MHz example from above this would give us 100 MHz * 5 / 6 =
+ * 83 and 1/3 MHz, which matches exactly what has been measured on actual hw.
+ */
+#define REFERENCE_KHZ 100000
+
/*
* If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
* read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40].
@@ -26,6 +43,14 @@
*/
struct freq_desc {
bool use_msr_plat;
+ struct {
+ u32 multiplier;
+ u32 divider;
+ } pairs[MAX_NUM_FREQS];
+ /*
+ * Some CPU frequencies in the SDM do not map to known PLL freqs, in
+ * that case the pairs arrays is empty and the freqs array is used.
+ */
u32 freqs[MAX_NUM_FREQS];
u32 mask;
};
@@ -47,31 +72,64 @@ static const struct freq_desc freq_desc_clv = {
.mask = 0x07,
};
+/*
+ * Bay Trail SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 000: 100 * 5 / 6 = 83.3333 MHz
+ * 001: 100 * 1 / 1 = 100.0000 MHz
+ * 010: 100 * 4 / 3 = 133.3333 MHz
+ * 011: 100 * 7 / 6 = 116.6667 MHz
+ * 100: 100 * 4 / 5 = 80.0000 MHz
+ */
static const struct freq_desc freq_desc_byt = {
.use_msr_plat = true,
- .freqs = { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 },
+ .pairs = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 }, { 4, 5 } },
.mask = 0x07,
};
+/*
+ * Cherry Trail SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 0000: 100 * 5 / 6 = 83.3333 MHz
+ * 0001: 100 * 1 / 1 = 100.0000 MHz
+ * 0010: 100 * 4 / 3 = 133.3333 MHz
+ * 0011: 100 * 7 / 6 = 116.6667 MHz
+ * 0100: 100 * 4 / 5 = 80.0000 MHz
+ * 0101: 100 * 14 / 15 = 93.3333 MHz
+ * 0110: 100 * 9 / 10 = 90.0000 MHz
+ * 0111: 100 * 8 / 9 = 88.8889 MHz
+ * 1000: 100 * 7 / 8 = 87.5000 MHz
+ */
static const struct freq_desc freq_desc_cht = {
.use_msr_plat = true,
- .freqs = { 83300, 100000, 133300, 116700, 80000, 93300, 90000,
- 88900, 87500 },
+ .pairs = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 }, { 4, 5 },
+ { 14, 15 }, { 9, 10 }, { 8, 9 }, { 7, 8 } },
.mask = 0x0f,
};
+/*
+ * Merriefield SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 0001: 100 * 1 / 1 = 100.0000 MHz
+ * 0010: 100 * 4 / 3 = 133.3333 MHz
+ */
static const struct freq_desc freq_desc_tng = {
.use_msr_plat = true,
- .freqs = { 0, 100000, 133300, 0, 0, 0, 0, 0 },
+ .pairs = { { 0, 0 }, { 1, 1 }, { 4, 3 } },
.mask = 0x07,
};
+/*
+ * Moorefield SDM MSR_FSB_FREQ frequencies simplified PLL model:
+ * 0000: 100 * 5 / 6 = 83.3333 MHz
+ * 0001: 100 * 1 / 1 = 100.0000 MHz
+ * 0010: 100 * 4 / 3 = 133.3333 MHz
+ * 0011: 100 * 1 / 1 = 100.0000 MHz
+ */
static const struct freq_desc freq_desc_ann = {
.use_msr_plat = true,
- .freqs = { 83300, 100000, 133300, 100000, 0, 0, 0, 0 },
+ .pairs = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 1, 1 } },
.mask = 0x0f,
};
+/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
static const struct freq_desc freq_desc_lgm = {
.use_msr_plat = true,
.freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
@@ -120,11 +178,23 @@ unsigned long cpu_khz_from_msr(void)
rdmsr(MSR_FSB_FREQ, lo, hi);
index = lo & freq_desc->mask;
- /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
- freq = freq_desc->freqs[index];
-
- /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
- res = freq * ratio;
+ /*
+ * Note this also catches cases where the index points to an unpopulated
+ * part of pairs, in that case the else will set freq and res to 0.
+ */
+ if (freq_desc->pairs[index].divider) {
+ freq = DIV_ROUND_CLOSEST(REFERENCE_KHZ *
+ freq_desc->pairs[index].multiplier,
+ freq_desc->pairs[index].divider);
+ /* Multiply by ratio before the divide for better accuracy */
+ res = DIV_ROUND_CLOSEST(REFERENCE_KHZ *
+ freq_desc->pairs[index].multiplier *
+ ratio,
+ freq_desc->pairs[index].divider);
+ } else {
+ freq = freq_desc->freqs[index];
+ res = freq * ratio;
+ }
if (freq == 0)
pr_err("Error MSR_FSB_FREQ index %d is unknown\n", index);
--
2.24.1
Hey all,
I wanted to send these out for comment and thoughts.
Since ~4.20, when the functionfs gadget enabled scatter-gather
support, we have seen problems with adb connections stalling and
stopping to function on hardware with dwc3 usb controllers.
Specifically, HiKey960, Dragonboard 845c, and Pixel3 devices.
Initally the workaround we used was to simply disable scatter
gather support on the dwc3 by commenting out the
"dwc->gadget.sg_supported = true;" line.
After working with Fei Yang, who was seeing similar trouble on
Intel dwc3 based hardare, Thinh Nguyen mentioned that a fix had
already been found and pointed me to one of Anurag's patches.
This solved the issue on HiKey960 and I sent it out to the list
but didn't get any feedback.
Additional testing with the Dragonboard 845c found that that
first fix was not sufficient, and so I've sat on the fix
thinking something deeper was amiss and went back to the hack
of disabling sg_supported on all dwc3 platforms.
In the following months Fei's continued and repeated efforts
didn't seem to get enough review to result in a fix, and they've
since moved on to other work.
Recently, I found that folks at qcom have seen similer issues
and pointed me to the second patch in this series, which does
seem to resolve the issue on the Dragonboard 845c, but not the
HiKey960 on its own.
So I wanted to send these patches out for comment. There's
clearly a number of folks seeing broken behavior for ahwile on
dwc3 hardware, and we're all seeemingly working around it in our
own ways, so either those individual fixes need to get upstream
or we need to figure out some deeper solution to the issue.
So I wanted to send these two out for review and feedback.
thanks
-john
Cc: Felipe Balbi <felipe.balbi(a)linux.intel.com>
Cc: Yang Fei <fei.yang(a)intel.com>
Cc: Thinh Nguyen <thinhn(a)synopsys.com>
Cc: Tejas Joglekar <tejas.joglekar(a)synopsys.com>
Cc: Andrzej Pietrasiewicz <andrzej.p(a)collabora.com>
Cc: Jack Pham <jackp(a)codeaurora.org>
Cc: Todd Kjos <tkjos(a)google.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: Linux USB List <linux-usb(a)vger.kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Anurag Kumar Vulisha (2):
usb: dwc3: gadget: Check for IOC/LST bit in both event->status and
TRB->ctrl fields
usb: dwc3: gadget: Correct the logic for finding last SG entry
drivers/usb/dwc3/gadget.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--
2.17.1
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f5de5b83303e61b1f3fb09bd77ce3ac2d7a475f2 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1(a)huawei.com>
Date: Sat, 11 Jan 2020 17:50:36 +0800
Subject: [PATCH] ubifs: Fix deadlock in concurrent bulk-read and writepage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In ubifs, concurrent execution of writepage and bulk read on the same file
may cause ABBA deadlock, for example (Reproduce method see Link):
Process A(Bulk-read starts from page4) Process B(write page4 back)
vfs_read wb_workfn or fsync
... ...
generic_file_buffered_read write_cache_pages
ubifs_readpage LOCK(page4)
ubifs_bulk_read ubifs_writepage
LOCK(ui->ui_mutex) ubifs_write_inode
ubifs_do_bulk_read LOCK(ui->ui_mutex)
find_or_create_page(alloc page4) ↑
LOCK(page4) <-- ABBA deadlock occurs!
In order to ensure the serialization execution of bulk read, we can't
remove the big lock 'ui->ui_mutex' in ubifs_bulk_read(). Instead, we
allow ubifs_do_bulk_read() to lock page failed by replacing
find_or_create_page(FGP_LOCK) with
pagecache_get_page(FGP_LOCK | FGP_NOWAIT).
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Suggested-by: zhangyi (F) <yi.zhang(a)huawei.com>
Cc: <Stable(a)vger.kernel.org>
Fixes: 4793e7c5e1c ("UBIFS: add bulk-read facility")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206153
Signed-off-by: Richard Weinberger <richard(a)nod.at>
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index cd52585c8f4f..c649048fcc81 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -786,7 +786,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
if (page_offset > end_index)
break;
- page = find_or_create_page(mapping, page_offset, ra_gfp_mask);
+ page = pagecache_get_page(mapping, page_offset,
+ FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
+ ra_gfp_mask);
if (!page)
break;
if (!PageUptodate(page))
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f0d07a98a070bb5e443df19c3aa55693cbca9341 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers(a)google.com>
Date: Mon, 20 Jan 2020 14:31:59 -0800
Subject: [PATCH] ubifs: don't trigger assertion on invalid no-key filename
If userspace provides an invalid fscrypt no-key filename which encodes a
hash value with any of the UBIFS node type bits set (i.e. the high 3
bits), gracefully report ENOENT rather than triggering ubifs_assert().
Test case with kvm-xfstests shell:
. fs/ubifs/config
. ~/xfstests/common/encrypt
dev=$(__blkdev_to_ubi_volume /dev/vdc)
ubiupdatevol $dev -t
mount $dev /mnt -t ubifs
mkdir /mnt/edir
xfs_io -c set_encpolicy /mnt/edir
rm /mnt/edir/_,,,,,DAAAAAAAAAAAAAAAAAAAAAAAAAA
With the bug, the following assertion fails on the 'rm' command:
[ 19.066048] UBIFS error (ubi0:0 pid 379): ubifs_assert_failed: UBIFS assert failed: !(hash & ~UBIFS_S_KEY_HASH_MASK), in fs/ubifs/key.h:170
Fixes: f4f61d2cc6d8 ("ubifs: Implement encrypted filenames")
Cc: <stable(a)vger.kernel.org> # v4.10+
Link: https://lore.kernel.org/r/20200120223201.241390-5-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 636c3222c230..5f937226976a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -228,6 +228,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
if (nm.hash) {
ubifs_assert(c, fname_len(&nm) == 0);
ubifs_assert(c, fname_name(&nm) == NULL);
+ if (nm.hash & ~UBIFS_S_KEY_HASH_MASK)
+ goto done; /* ENOENT */
dent_key_init_hash(c, &key, dir->i_ino, nm.hash);
err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash);
} else {
After a treclaim, we expect to be in non-transactional state. If we don't
immediately clear the current thread's MSR[TS] and we get preempted, then
tm_recheckpoint_new_task() will recheckpoint and we get rescheduled in
suspended transaction state.
When handling a signal caught in transactional state, handle_rt_signal64()
calls get_tm_stackpointer() that treclaims the transaction using
tm_reclaim_current() but without clearing the thread's MSR[TS]. This can cause
the TM Bad Thing exception below if later we pagefault and get preempted trying
to access the user's sigframe, using __put_user(). Afterwards, when we are
rescheduled back into do_page_fault() (but now in suspended state since the
thread's MSR[TS] was not cleared), upon executing 'rfid' after completion of
the page fault handling, the exception is raised because a transition from
suspended to non-transactional state is invalid.
Unexpected TM Bad Thing exception at c00000000000de44 (msr 0x8000000302a03031) tm_scratch=800000010280b033
Oops: Unrecoverable exception, sig: 6 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6_tables ip_tables nft_compat ip_set nf_tables nfnetlink xts vmx_crypto sg virtio_balloon
r_mod cdrom virtio_net net_failover virtio_blk virtio_scsi failover dm_mirror dm_region_hash dm_log dm_mod
CPU: 25 PID: 15547 Comm: a.out Not tainted 5.4.0-rc2 #32
NIP: c00000000000de44 LR: c000000000034728 CTR: 0000000000000000
REGS: c00000003fe7bd70 TRAP: 0700 Not tainted (5.4.0-rc2)
MSR: 8000000302a03031 <SF,VEC,VSX,FP,ME,IR,DR,LE,TM[SE]> CR: 44000884 XER: 00000000
CFAR: c00000000000dda4 IRQMASK: 0
PACATMSCRATCH: 800000010280b033
GPR00: c000000000034728 c000000f65a17c80 c000000001662800 00007fffacf3fd78
GPR04: 0000000000001000 0000000000001000 0000000000000000 c000000f611f8af0
GPR08: 0000000000000000 0000000078006001 0000000000000000 000c000000000000
GPR12: c000000f611f84b0 c00000003ffcb200 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000f611f8140
GPR24: 0000000000000000 00007fffacf3fd68 c000000f65a17d90 c000000f611f7800
GPR28: c000000f65a17e90 c000000f65a17e90 c000000001685e18 00007fffacf3f000
NIP [c00000000000de44] fast_exception_return+0xf4/0x1b0
LR [c000000000034728] handle_rt_signal64+0x78/0xc50
Call Trace:
[c000000f65a17c80] [c000000000034710] handle_rt_signal64+0x60/0xc50 (unreliable)
[c000000f65a17d30] [c000000000023640] do_notify_resume+0x330/0x460
[c000000f65a17e20] [c00000000000dcc4] ret_from_except_lite+0x70/0x74
Instruction dump:
7c4ff120 e8410170 7c5a03a6 38400000 f8410060 e8010070 e8410080 e8610088
60000000 60000000 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed0989
---[ end trace 93094aa44b442f87 ]---
The simplified sequence of events that triggers the above exception is:
... # userspace in NON-TRANSACTIONAL state
tbegin # userspace in TRANSACTIONAL state
signal delivery # kernelspace in SUSPENDED state
handle_rt_signal64()
get_tm_stackpointer()
treclaim # kernelspace in NON-TRANSACTIONAL state
__put_user()
page fault happens. We will never get back here because of the TM Bad Thing exception.
page fault handling kicks in and we voluntarily preempt ourselves
do_page_fault()
__schedule()
__switch_to(other_task)
our task is rescheduled and we recheckpoint because the thread's MSR[TS] was not cleared
__switch_to(our_task)
switch_to_tm()
tm_recheckpoint_new_task()
trechkpt # kernelspace in SUSPENDED state
The page fault handling resumes, but now we are in suspended transaction state
do_page_fault() completes
rfid <----- trying to get back where the page fault happened (we were non-transactional back then)
TM Bad Thing # illegal transition from suspended to non-transactional
This patch fixes that issue by clearing the current thread's MSR[TS] just after
treclaim in get_tm_stackpointer() so that we stay in non-transactional state in
case we are preempted. In order to make treclaim and clearing the thread's
MSR[TS] atomic from a preemption perspective when CONFIG_PREEMPT is set,
preempt_disable/enable() is used. It's also necessary to save the previous
value of the thread's MSR before get_tm_stackpointer() is called so that it can
be exposed to the signal handler later in setup_tm_sigcontexts() to inform the
userspace MSR at the moment of the signal delivery.
Found with tm-signal-context-force-tm kernel selftest on P8 KVM.
v2: Fix build failure when tm is disabled.
Fixes: 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context")
Cc: stable(a)vger.kernel.org # v3.9
Signed-off-by: Gustavo Luiz Duarte <gustavold(a)linux.ibm.com>
---
arch/powerpc/kernel/signal.c | 17 +++++++++++++++--
arch/powerpc/kernel/signal_32.c | 28 ++++++++++++++--------------
arch/powerpc/kernel/signal_64.c | 22 ++++++++++------------
3 files changed, 39 insertions(+), 28 deletions(-)
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e6c30cee6abf..1660be1061ac 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk)
* normal/non-checkpointed stack pointer.
*/
+ unsigned long ret = tsk->thread.regs->gpr[1];
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BUG_ON(tsk != current);
if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
+ preempt_disable();
tm_reclaim_current(TM_CAUSE_SIGNAL);
if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
- return tsk->thread.ckpt_regs.gpr[1];
+ ret = tsk->thread.ckpt_regs.gpr[1];
+
+ /* If we treclaim, we must immediately clear the current
+ * thread's TM bits. Otherwise we might be preempted and have
+ * the live MSR[TS] changed behind our back
+ * (tm_recheckpoint_new_task() would recheckpoint).
+ * Besides, we enter the signal handler in non-transactional
+ * state.
+ */
+ tsk->thread.regs->msr &= ~MSR_TS_MASK;
+ preempt_enable();
}
#endif
- return tsk->thread.regs->gpr[1];
+ return ret;
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 98600b276f76..1b090a76b444 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
*/
static int save_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *frame,
- struct mcontext __user *tm_frame, int sigret)
+ struct mcontext __user *tm_frame, int sigret,
+ unsigned long msr)
{
- unsigned long msr = regs->msr;
-
WARN_ON(tm_suspend_disabled);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
-
/* Save both sets of general registers */
if (save_general_regs(¤t->thread.ckpt_regs, frame)
|| save_general_regs(regs, tm_frame))
@@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
int sigret;
unsigned long tramp;
struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+#endif
BUG_ON(tsk != current);
@@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_frame = &rt_sf->uc_transact.uc_mcontext;
- if (MSR_TM_ACTIVE(regs->msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
if (__put_user((unsigned long)&rt_sf->uc_transact,
&rt_sf->uc.uc_link) ||
__put_user((unsigned long)tm_frame,
&rt_sf->uc_transact.uc_regs))
goto badframe;
- if (save_tm_user_regs(regs, frame, tm_frame, sigret))
+ if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr))
goto badframe;
}
else
@@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
int sigret;
unsigned long tramp;
struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+#endif
BUG_ON(tsk != current);
@@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_mctx = &frame->mctx_transact;
- if (MSR_TM_ACTIVE(regs->msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
- sigret))
+ sigret, msr))
goto badframe;
}
else
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 117515564ec7..84ed2e77ef9c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
static long setup_tm_sigcontexts(struct sigcontext __user *sc,
struct sigcontext __user *tm_sc,
struct task_struct *tsk,
- int signr, sigset_t *set, unsigned long handler)
+ int signr, sigset_t *set, unsigned long handler,
+ unsigned long msr)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
struct pt_regs *regs = tsk->thread.regs;
- unsigned long msr = tsk->thread.regs->msr;
long err = 0;
BUG_ON(tsk != current);
- BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ BUG_ON(!MSR_TM_ACTIVE(msr));
WARN_ON(tm_suspend_disabled);
@@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
*/
msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
err |= __put_user(tm_v_regs, &tm_sc->v_regs);
@@ -824,6 +817,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long newsp = 0;
long err = 0;
struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+#endif
BUG_ON(tsk != current);
@@ -841,7 +838,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
err |= __put_user(0, &frame->uc.uc_flags);
err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(regs->msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
@@ -849,7 +846,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
tsk, ksig->sig, NULL,
- (unsigned long)ksig->ka.sa.sa_handler);
+ (unsigned long)ksig->ka.sa.sa_handler,
+ msr);
} else
#endif
{
--
2.21.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6df19872d881641e6394f93ef2938cffcbdae5bb Mon Sep 17 00:00:00 2001
From: Yurii Monakov <monakov.y(a)gmail.com>
Date: Tue, 17 Dec 2019 14:38:36 +0300
Subject: [PATCH] PCI: keystone: Fix link training retries initiation
ks_pcie_stop_link() function does not clear LTSSM_EN_VAL bit so
link training was not triggered more than once after startup.
In configurations where link can be unstable during early boot,
for example, under low temperature, it will never be established.
Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver")
Signed-off-by: Yurii Monakov <monakov.y(a)gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Acked-by: Andrew Murray <andrew.murray(a)arm.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index af677254a072..d4de4f6cff8b 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci)
/* Disable Link training */
val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
val &= ~LTSSM_EN_VAL;
- ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val);
+ ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
}
static int ks_pcie_start_link(struct dw_pcie *pci)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f5de5b83303e61b1f3fb09bd77ce3ac2d7a475f2 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1(a)huawei.com>
Date: Sat, 11 Jan 2020 17:50:36 +0800
Subject: [PATCH] ubifs: Fix deadlock in concurrent bulk-read and writepage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In ubifs, concurrent execution of writepage and bulk read on the same file
may cause ABBA deadlock, for example (Reproduce method see Link):
Process A(Bulk-read starts from page4) Process B(write page4 back)
vfs_read wb_workfn or fsync
... ...
generic_file_buffered_read write_cache_pages
ubifs_readpage LOCK(page4)
ubifs_bulk_read ubifs_writepage
LOCK(ui->ui_mutex) ubifs_write_inode
ubifs_do_bulk_read LOCK(ui->ui_mutex)
find_or_create_page(alloc page4) ↑
LOCK(page4) <-- ABBA deadlock occurs!
In order to ensure the serialization execution of bulk read, we can't
remove the big lock 'ui->ui_mutex' in ubifs_bulk_read(). Instead, we
allow ubifs_do_bulk_read() to lock page failed by replacing
find_or_create_page(FGP_LOCK) with
pagecache_get_page(FGP_LOCK | FGP_NOWAIT).
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Suggested-by: zhangyi (F) <yi.zhang(a)huawei.com>
Cc: <Stable(a)vger.kernel.org>
Fixes: 4793e7c5e1c ("UBIFS: add bulk-read facility")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206153
Signed-off-by: Richard Weinberger <richard(a)nod.at>
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index cd52585c8f4f..c649048fcc81 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -786,7 +786,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
if (page_offset > end_index)
break;
- page = find_or_create_page(mapping, page_offset, ra_gfp_mask);
+ page = pagecache_get_page(mapping, page_offset,
+ FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
+ ra_gfp_mask);
if (!page)
break;
if (!PageUptodate(page))
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a04184ce777b46e92c2b3c93c6dcb2754cb005e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux(a)rere.qmqm.pl>
Date: Thu, 2 Jan 2020 11:42:16 +0100
Subject: [PATCH] mmc: sdhci-of-at91: fix memleak on clk_get failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sdhci_alloc_host() does its work not using managed infrastructure, so
needs explicit free on error path. Add it where needed.
Cc: <stable(a)vger.kernel.org>
Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC")
Signed-off-by: Michał Mirosław <mirq-linux(a)rere.qmqm.pl>
Acked-by: Ludovic Desroches <ludovic.desroches(a)microchip.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Link: https://lore.kernel.org/r/b2a44d5be2e06ff075f32477e466598bb0f07b36.15779616…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index b2a8c45c9c23..ab2bd314a390 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -345,20 +345,23 @@ static int sdhci_at91_probe(struct platform_device *pdev)
priv->mainck = NULL;
} else {
dev_err(&pdev->dev, "failed to get baseclk\n");
- return PTR_ERR(priv->mainck);
+ ret = PTR_ERR(priv->mainck);
+ goto sdhci_pltfm_free;
}
}
priv->hclock = devm_clk_get(&pdev->dev, "hclock");
if (IS_ERR(priv->hclock)) {
dev_err(&pdev->dev, "failed to get hclock\n");
- return PTR_ERR(priv->hclock);
+ ret = PTR_ERR(priv->hclock);
+ goto sdhci_pltfm_free;
}
priv->gck = devm_clk_get(&pdev->dev, "multclk");
if (IS_ERR(priv->gck)) {
dev_err(&pdev->dev, "failed to get multclk\n");
- return PTR_ERR(priv->gck);
+ ret = PTR_ERR(priv->gck);
+ goto sdhci_pltfm_free;
}
ret = sdhci_at91_set_clks_presets(&pdev->dev);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a04184ce777b46e92c2b3c93c6dcb2754cb005e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux(a)rere.qmqm.pl>
Date: Thu, 2 Jan 2020 11:42:16 +0100
Subject: [PATCH] mmc: sdhci-of-at91: fix memleak on clk_get failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sdhci_alloc_host() does its work not using managed infrastructure, so
needs explicit free on error path. Add it where needed.
Cc: <stable(a)vger.kernel.org>
Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC")
Signed-off-by: Michał Mirosław <mirq-linux(a)rere.qmqm.pl>
Acked-by: Ludovic Desroches <ludovic.desroches(a)microchip.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Link: https://lore.kernel.org/r/b2a44d5be2e06ff075f32477e466598bb0f07b36.15779616…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index b2a8c45c9c23..ab2bd314a390 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -345,20 +345,23 @@ static int sdhci_at91_probe(struct platform_device *pdev)
priv->mainck = NULL;
} else {
dev_err(&pdev->dev, "failed to get baseclk\n");
- return PTR_ERR(priv->mainck);
+ ret = PTR_ERR(priv->mainck);
+ goto sdhci_pltfm_free;
}
}
priv->hclock = devm_clk_get(&pdev->dev, "hclock");
if (IS_ERR(priv->hclock)) {
dev_err(&pdev->dev, "failed to get hclock\n");
- return PTR_ERR(priv->hclock);
+ ret = PTR_ERR(priv->hclock);
+ goto sdhci_pltfm_free;
}
priv->gck = devm_clk_get(&pdev->dev, "multclk");
if (IS_ERR(priv->gck)) {
dev_err(&pdev->dev, "failed to get multclk\n");
- return PTR_ERR(priv->gck);
+ ret = PTR_ERR(priv->gck);
+ goto sdhci_pltfm_free;
}
ret = sdhci_at91_set_clks_presets(&pdev->dev);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a04184ce777b46e92c2b3c93c6dcb2754cb005e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux(a)rere.qmqm.pl>
Date: Thu, 2 Jan 2020 11:42:16 +0100
Subject: [PATCH] mmc: sdhci-of-at91: fix memleak on clk_get failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sdhci_alloc_host() does its work not using managed infrastructure, so
needs explicit free on error path. Add it where needed.
Cc: <stable(a)vger.kernel.org>
Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC")
Signed-off-by: Michał Mirosław <mirq-linux(a)rere.qmqm.pl>
Acked-by: Ludovic Desroches <ludovic.desroches(a)microchip.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Link: https://lore.kernel.org/r/b2a44d5be2e06ff075f32477e466598bb0f07b36.15779616…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index b2a8c45c9c23..ab2bd314a390 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -345,20 +345,23 @@ static int sdhci_at91_probe(struct platform_device *pdev)
priv->mainck = NULL;
} else {
dev_err(&pdev->dev, "failed to get baseclk\n");
- return PTR_ERR(priv->mainck);
+ ret = PTR_ERR(priv->mainck);
+ goto sdhci_pltfm_free;
}
}
priv->hclock = devm_clk_get(&pdev->dev, "hclock");
if (IS_ERR(priv->hclock)) {
dev_err(&pdev->dev, "failed to get hclock\n");
- return PTR_ERR(priv->hclock);
+ ret = PTR_ERR(priv->hclock);
+ goto sdhci_pltfm_free;
}
priv->gck = devm_clk_get(&pdev->dev, "multclk");
if (IS_ERR(priv->gck)) {
dev_err(&pdev->dev, "failed to get multclk\n");
- return PTR_ERR(priv->gck);
+ ret = PTR_ERR(priv->gck);
+ goto sdhci_pltfm_free;
}
ret = sdhci_at91_set_clks_presets(&pdev->dev);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a04184ce777b46e92c2b3c93c6dcb2754cb005e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux(a)rere.qmqm.pl>
Date: Thu, 2 Jan 2020 11:42:16 +0100
Subject: [PATCH] mmc: sdhci-of-at91: fix memleak on clk_get failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sdhci_alloc_host() does its work not using managed infrastructure, so
needs explicit free on error path. Add it where needed.
Cc: <stable(a)vger.kernel.org>
Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC")
Signed-off-by: Michał Mirosław <mirq-linux(a)rere.qmqm.pl>
Acked-by: Ludovic Desroches <ludovic.desroches(a)microchip.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Link: https://lore.kernel.org/r/b2a44d5be2e06ff075f32477e466598bb0f07b36.15779616…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index b2a8c45c9c23..ab2bd314a390 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -345,20 +345,23 @@ static int sdhci_at91_probe(struct platform_device *pdev)
priv->mainck = NULL;
} else {
dev_err(&pdev->dev, "failed to get baseclk\n");
- return PTR_ERR(priv->mainck);
+ ret = PTR_ERR(priv->mainck);
+ goto sdhci_pltfm_free;
}
}
priv->hclock = devm_clk_get(&pdev->dev, "hclock");
if (IS_ERR(priv->hclock)) {
dev_err(&pdev->dev, "failed to get hclock\n");
- return PTR_ERR(priv->hclock);
+ ret = PTR_ERR(priv->hclock);
+ goto sdhci_pltfm_free;
}
priv->gck = devm_clk_get(&pdev->dev, "multclk");
if (IS_ERR(priv->gck)) {
dev_err(&pdev->dev, "failed to get multclk\n");
- return PTR_ERR(priv->gck);
+ ret = PTR_ERR(priv->gck);
+ goto sdhci_pltfm_free;
}
ret = sdhci_at91_set_clks_presets(&pdev->dev);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a04184ce777b46e92c2b3c93c6dcb2754cb005e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux(a)rere.qmqm.pl>
Date: Thu, 2 Jan 2020 11:42:16 +0100
Subject: [PATCH] mmc: sdhci-of-at91: fix memleak on clk_get failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
sdhci_alloc_host() does its work not using managed infrastructure, so
needs explicit free on error path. Add it where needed.
Cc: <stable(a)vger.kernel.org>
Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC")
Signed-off-by: Michał Mirosław <mirq-linux(a)rere.qmqm.pl>
Acked-by: Ludovic Desroches <ludovic.desroches(a)microchip.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Link: https://lore.kernel.org/r/b2a44d5be2e06ff075f32477e466598bb0f07b36.15779616…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index b2a8c45c9c23..ab2bd314a390 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -345,20 +345,23 @@ static int sdhci_at91_probe(struct platform_device *pdev)
priv->mainck = NULL;
} else {
dev_err(&pdev->dev, "failed to get baseclk\n");
- return PTR_ERR(priv->mainck);
+ ret = PTR_ERR(priv->mainck);
+ goto sdhci_pltfm_free;
}
}
priv->hclock = devm_clk_get(&pdev->dev, "hclock");
if (IS_ERR(priv->hclock)) {
dev_err(&pdev->dev, "failed to get hclock\n");
- return PTR_ERR(priv->hclock);
+ ret = PTR_ERR(priv->hclock);
+ goto sdhci_pltfm_free;
}
priv->gck = devm_clk_get(&pdev->dev, "multclk");
if (IS_ERR(priv->gck)) {
dev_err(&pdev->dev, "failed to get multclk\n");
- return PTR_ERR(priv->gck);
+ ret = PTR_ERR(priv->gck);
+ goto sdhci_pltfm_free;
}
ret = sdhci_at91_set_clks_presets(&pdev->dev);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 811ba67632aae618985209cb9455f1d342426e6d Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas(a)ti.com>
Date: Fri, 6 Dec 2019 17:13:26 +0530
Subject: [PATCH] mmc: sdhci: Update the tuning failed messages to pr_debug
level
Tuning support in DDR50 speed mode was added in SD Specifications Part1
Physical Layer Specification v3.01. Its not possible to distinguish
between v3.00 and v3.01 from the SCR and that is why since
commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode")
tuning failures are ignored in DDR50 speed mode.
Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this
error gets printed during enumeration and also if retune is triggered at
any time during operation. Update the printk level to pr_debug so that
these errors don't lead to false error reports.
Signed-off-by: Faiz Abbas <faiz_abbas(a)ti.com>
Cc: stable(a)vger.kernel.org # v4.4+
Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 012b1d9145b9..d8f1d87c58c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2408,8 +2408,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 811ba67632aae618985209cb9455f1d342426e6d Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas(a)ti.com>
Date: Fri, 6 Dec 2019 17:13:26 +0530
Subject: [PATCH] mmc: sdhci: Update the tuning failed messages to pr_debug
level
Tuning support in DDR50 speed mode was added in SD Specifications Part1
Physical Layer Specification v3.01. Its not possible to distinguish
between v3.00 and v3.01 from the SCR and that is why since
commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode")
tuning failures are ignored in DDR50 speed mode.
Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this
error gets printed during enumeration and also if retune is triggered at
any time during operation. Update the printk level to pr_debug so that
these errors don't lead to false error reports.
Signed-off-by: Faiz Abbas <faiz_abbas(a)ti.com>
Cc: stable(a)vger.kernel.org # v4.4+
Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 012b1d9145b9..d8f1d87c58c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2408,8 +2408,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 811ba67632aae618985209cb9455f1d342426e6d Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas(a)ti.com>
Date: Fri, 6 Dec 2019 17:13:26 +0530
Subject: [PATCH] mmc: sdhci: Update the tuning failed messages to pr_debug
level
Tuning support in DDR50 speed mode was added in SD Specifications Part1
Physical Layer Specification v3.01. Its not possible to distinguish
between v3.00 and v3.01 from the SCR and that is why since
commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode")
tuning failures are ignored in DDR50 speed mode.
Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this
error gets printed during enumeration and also if retune is triggered at
any time during operation. Update the printk level to pr_debug so that
these errors don't lead to false error reports.
Signed-off-by: Faiz Abbas <faiz_abbas(a)ti.com>
Cc: stable(a)vger.kernel.org # v4.4+
Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 012b1d9145b9..d8f1d87c58c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2408,8 +2408,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 811ba67632aae618985209cb9455f1d342426e6d Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas(a)ti.com>
Date: Fri, 6 Dec 2019 17:13:26 +0530
Subject: [PATCH] mmc: sdhci: Update the tuning failed messages to pr_debug
level
Tuning support in DDR50 speed mode was added in SD Specifications Part1
Physical Layer Specification v3.01. Its not possible to distinguish
between v3.00 and v3.01 from the SCR and that is why since
commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode")
tuning failures are ignored in DDR50 speed mode.
Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this
error gets printed during enumeration and also if retune is triggered at
any time during operation. Update the printk level to pr_debug so that
these errors don't lead to false error reports.
Signed-off-by: Faiz Abbas <faiz_abbas(a)ti.com>
Cc: stable(a)vger.kernel.org # v4.4+
Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 012b1d9145b9..d8f1d87c58c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2408,8 +2408,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 811ba67632aae618985209cb9455f1d342426e6d Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas(a)ti.com>
Date: Fri, 6 Dec 2019 17:13:26 +0530
Subject: [PATCH] mmc: sdhci: Update the tuning failed messages to pr_debug
level
Tuning support in DDR50 speed mode was added in SD Specifications Part1
Physical Layer Specification v3.01. Its not possible to distinguish
between v3.00 and v3.01 from the SCR and that is why since
commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode")
tuning failures are ignored in DDR50 speed mode.
Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this
error gets printed during enumeration and also if retune is triggered at
any time during operation. Update the printk level to pr_debug so that
these errors don't lead to false error reports.
Signed-off-by: Faiz Abbas <faiz_abbas(a)ti.com>
Cc: stable(a)vger.kernel.org # v4.4+
Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 012b1d9145b9..d8f1d87c58c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2408,8 +2408,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
The patch below does not apply to the 5.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 811ba67632aae618985209cb9455f1d342426e6d Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas(a)ti.com>
Date: Fri, 6 Dec 2019 17:13:26 +0530
Subject: [PATCH] mmc: sdhci: Update the tuning failed messages to pr_debug
level
Tuning support in DDR50 speed mode was added in SD Specifications Part1
Physical Layer Specification v3.01. Its not possible to distinguish
between v3.00 and v3.01 from the SCR and that is why since
commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode")
tuning failures are ignored in DDR50 speed mode.
Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this
error gets printed during enumeration and also if retune is triggered at
any time during operation. Update the printk level to pr_debug so that
these errors don't lead to false error reports.
Signed-off-by: Faiz Abbas <faiz_abbas(a)ti.com>
Cc: stable(a)vger.kernel.org # v4.4+
Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 012b1d9145b9..d8f1d87c58c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2408,8 +2408,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6df19872d881641e6394f93ef2938cffcbdae5bb Mon Sep 17 00:00:00 2001
From: Yurii Monakov <monakov.y(a)gmail.com>
Date: Tue, 17 Dec 2019 14:38:36 +0300
Subject: [PATCH] PCI: keystone: Fix link training retries initiation
ks_pcie_stop_link() function does not clear LTSSM_EN_VAL bit so
link training was not triggered more than once after startup.
In configurations where link can be unstable during early boot,
for example, under low temperature, it will never be established.
Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver")
Signed-off-by: Yurii Monakov <monakov.y(a)gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Acked-by: Andrew Murray <andrew.murray(a)arm.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index af677254a072..d4de4f6cff8b 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci)
/* Disable Link training */
val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
val &= ~LTSSM_EN_VAL;
- ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val);
+ ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
}
static int ks_pcie_start_link(struct dw_pcie *pci)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6df19872d881641e6394f93ef2938cffcbdae5bb Mon Sep 17 00:00:00 2001
From: Yurii Monakov <monakov.y(a)gmail.com>
Date: Tue, 17 Dec 2019 14:38:36 +0300
Subject: [PATCH] PCI: keystone: Fix link training retries initiation
ks_pcie_stop_link() function does not clear LTSSM_EN_VAL bit so
link training was not triggered more than once after startup.
In configurations where link can be unstable during early boot,
for example, under low temperature, it will never be established.
Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver")
Signed-off-by: Yurii Monakov <monakov.y(a)gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Acked-by: Andrew Murray <andrew.murray(a)arm.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index af677254a072..d4de4f6cff8b 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci)
/* Disable Link training */
val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
val &= ~LTSSM_EN_VAL;
- ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val);
+ ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
}
static int ks_pcie_start_link(struct dw_pcie *pci)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6df19872d881641e6394f93ef2938cffcbdae5bb Mon Sep 17 00:00:00 2001
From: Yurii Monakov <monakov.y(a)gmail.com>
Date: Tue, 17 Dec 2019 14:38:36 +0300
Subject: [PATCH] PCI: keystone: Fix link training retries initiation
ks_pcie_stop_link() function does not clear LTSSM_EN_VAL bit so
link training was not triggered more than once after startup.
In configurations where link can be unstable during early boot,
for example, under low temperature, it will never be established.
Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver")
Signed-off-by: Yurii Monakov <monakov.y(a)gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Acked-by: Andrew Murray <andrew.murray(a)arm.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index af677254a072..d4de4f6cff8b 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci)
/* Disable Link training */
val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
val &= ~LTSSM_EN_VAL;
- ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val);
+ ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
}
static int ks_pcie_start_link(struct dw_pcie *pci)
There's two references floating around here (for the object reference,
not the handle_count reference, that's a different thing):
- The temporary reference held by vgem_gem_create, acquired by
creating the object and released by calling
drm_gem_object_put_unlocked.
- The reference held by the object handle, created by
drm_gem_handle_create. This one generally outlives the function,
except if a 2nd thread races with a GEM_CLOSE ioctl call.
So usually everything is correct, except in that race case, where the
access to gem_object->size could be looking at freed data already.
Which again isn't a real problem (userspace shot its feet off already
with the race, we could return garbage), but maybe someone can exploit
this as an information leak.
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: Hillf Danton <hdanton(a)sina.com>
Cc: Reported-by: syzbot+0dc4444774d419e916c8(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Cc: Emil Velikov <emil.velikov(a)collabora.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Sean Paul <seanpaul(a)chromium.org>
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Eric Anholt <eric(a)anholt.net>
Cc: Sam Ravnborg <sam(a)ravnborg.org>
Cc: Rob Clark <robdclark(a)chromium.org>
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
---
drivers/gpu/drm/vgem/vgem_drv.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 5bd60ded3d81..909eba43664a 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -196,9 +196,10 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
return ERR_CAST(obj);
ret = drm_gem_handle_create(file, &obj->base, handle);
- drm_gem_object_put_unlocked(&obj->base);
- if (ret)
+ if (ret) {
+ drm_gem_object_put_unlocked(&obj->base);
return ERR_PTR(ret);
+ }
return &obj->base;
}
@@ -221,7 +222,9 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
args->size = gem_object->size;
args->pitch = pitch;
- DRM_DEBUG("Created object of size %lld\n", size);
+ drm_gem_object_put_unlocked(gem_object);
+
+ DRM_DEBUG("Created object of size %llu\n", args->size);
return 0;
}
--
2.24.1
There's no logged information about tree-log replay although this is
something that points to previous unclean unmount. Other filesystems
report that as well.
Suggested-by: Chris Murphy <lists(a)colorremedies.com>
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/disk-io.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 28622de9e642..295d5ebc9d5e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3146,6 +3146,7 @@ int __cold open_ctree(struct super_block *sb,
/* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 &&
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
+ btrfs_info("start tree-log replay");
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
--
2.24.0
When all CPUs in the system implement the SSBS instructions, we
advertise this via an HWCAP and allow EL0 to toggle the SSBS field
in PSTATE directly. Consequently, the state of the mitigation is not
accurately tracked by the TIF_SSBD thread flag and the PSTATE value
is authoritative.
Avoid forcing the SSBS field in context-switch on such a system, and
simply rely on the PSTATE register instead.
Cc: <stable(a)vger.kernel.org>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Srinivas Ramana <sramana(a)codeaurora.org>
Fixes: cbdf8a189a66 ("arm64: Force SSBS on context switch")
Signed-off-by: Will Deacon <will(a)kernel.org>
---
arch/arm64/kernel/process.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index d54586d5b031..45e867f40a7a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next)
if (unlikely(next->flags & PF_KTHREAD))
return;
+ /*
+ * If all CPUs implement the SSBS instructions, then we just
+ * need to context-switch the PSTATE field.
+ */
+ if (cpu_have_feature(cpu_feature(SSBS)))
+ return;
+
/* If the mitigation is enabled, then we leave SSBS clear. */
if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
test_tsk_thread_flag(next, TIF_SSBD))
--
2.25.0.341.g760bfbb309-goog
boot_aggregate is the first entry of IMA measurement list. Its purpose is
to link pre-boot measurements to IMA measurements. As IMA was designed to
work with a TPM 1.2, the SHA1 PCR bank was always selected.
Currently, even if a TPM 2.0 is used, the SHA1 PCR bank is selected.
However, the assumption that the SHA1 PCR bank is always available is not
correct, as PCR banks can be selected with the PCR_Allocate() TPM command.
This patch tries to use ima_hash_algo as hash algorithm for boot_aggregate.
If no PCR bank uses that algorithm, the patch tries to find the SHA256 PCR
bank (which is mandatory in the TCG PC Client specification). If also this
bank is not found, the patch selects the first one. If the TPM algorithm
of that bank is not mapped to a crypto ID, boot_aggregate is set to zero.
Changelog
v1:
- add Mimi's comments
- if there is no PCR bank for the IMA default algorithm use SHA256 or the
first bank (suggested by James Bottomley)
Reported-by: Jerry Snitselaar <jsnitsel(a)redhat.com>
Suggested-by: James Bottomley <James.Bottomley(a)HansenPartnership.com>
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Cc: stable(a)vger.kernel.org
---
security/integrity/ima/ima_crypto.c | 45 +++++++++++++++++++++++++----
security/integrity/ima/ima_init.c | 22 ++++++++++----
2 files changed, 56 insertions(+), 11 deletions(-)
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 73044fc6a952..f2f41a2bc3d4 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -655,18 +655,29 @@ static void __init ima_pcrread(u32 idx, struct tpm_digest *d)
}
/*
- * Calculate the boot aggregate hash
+ * The boot_aggregate is a cumulative hash over TPM registers 0 - 7. With
+ * TPM 1.2 the boot_aggregate was based on reading the SHA1 PCRs, but with
+ * TPM 2.0 hash agility, TPM chips could support multiple TPM PCR banks,
+ * allowing firmware to configure and enable different banks.
+ *
+ * Knowing which TPM bank is read to calculate the boot_aggregate digest
+ * needs to be conveyed to a verifier. For this reason, use the same
+ * hash algorithm for reading the TPM PCRs as for calculating the boot
+ * aggregate digest as stored in the measurement list.
*/
-static int __init ima_calc_boot_aggregate_tfm(char *digest,
+static int __init ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id,
struct crypto_shash *tfm)
{
- struct tpm_digest d = { .alg_id = TPM_ALG_SHA1, .digest = {0} };
+ struct tpm_digest d = { .alg_id = alg_id, .digest = {0} };
int rc;
u32 i;
SHASH_DESC_ON_STACK(shash, tfm);
shash->tfm = tfm;
+ pr_devel("calculating the boot-aggregate based on TPM bank: %04x\n",
+ d.alg_id);
+
rc = crypto_shash_init(shash);
if (rc != 0)
return rc;
@@ -675,7 +686,8 @@ static int __init ima_calc_boot_aggregate_tfm(char *digest,
for (i = TPM_PCR0; i < TPM_PCR8; i++) {
ima_pcrread(i, &d);
/* now accumulate with current aggregate */
- rc = crypto_shash_update(shash, d.digest, TPM_DIGEST_SIZE);
+ rc = crypto_shash_update(shash, d.digest,
+ crypto_shash_digestsize(tfm));
}
if (!rc)
crypto_shash_final(shash, digest);
@@ -685,14 +697,35 @@ static int __init ima_calc_boot_aggregate_tfm(char *digest,
int __init ima_calc_boot_aggregate(struct ima_digest_data *hash)
{
struct crypto_shash *tfm;
- int rc;
+ u16 crypto_id, alg_id;
+ int rc, i, bank_idx = 0;
+
+ for (i = 0; i < ima_tpm_chip->nr_allocated_banks; i++) {
+ crypto_id = ima_tpm_chip->allocated_banks[i].crypto_id;
+ if (crypto_id == hash->algo) {
+ bank_idx = i;
+ break;
+ }
+
+ if (crypto_id == HASH_ALGO_SHA256)
+ bank_idx = i;
+ }
+
+ if (bank_idx == 0 &&
+ ima_tpm_chip->allocated_banks[0].crypto_id == HASH_ALGO__LAST) {
+ pr_err("No suitable TPM algorithm for boot aggregate\n");
+ return 0;
+ }
+
+ hash->algo = ima_tpm_chip->allocated_banks[bank_idx].crypto_id;
tfm = ima_alloc_tfm(hash->algo);
if (IS_ERR(tfm))
return PTR_ERR(tfm);
hash->length = crypto_shash_digestsize(tfm);
- rc = ima_calc_boot_aggregate_tfm(hash->digest, tfm);
+ alg_id = ima_tpm_chip->allocated_banks[bank_idx].alg_id;
+ rc = ima_calc_boot_aggregate_tfm(hash->digest, alg_id, tfm);
ima_free_tfm(tfm);
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 5d55ade5f3b9..fbd7a8e28a6b 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -27,7 +27,7 @@ struct tpm_chip *ima_tpm_chip;
/* Add the boot aggregate to the IMA measurement list and extend
* the PCR register.
*
- * Calculate the boot aggregate, a SHA1 over tpm registers 0-7,
+ * Calculate the boot aggregate, a hash over tpm registers 0-7,
* assuming a TPM chip exists, and zeroes if the TPM chip does not
* exist. Add the boot aggregate measurement to the measurement
* list and extend the PCR register.
@@ -51,15 +51,27 @@ static int __init ima_add_boot_aggregate(void)
int violation = 0;
struct {
struct ima_digest_data hdr;
- char digest[TPM_DIGEST_SIZE];
+ char digest[TPM_MAX_DIGEST_SIZE];
} hash;
memset(iint, 0, sizeof(*iint));
memset(&hash, 0, sizeof(hash));
iint->ima_hash = &hash.hdr;
- iint->ima_hash->algo = HASH_ALGO_SHA1;
- iint->ima_hash->length = SHA1_DIGEST_SIZE;
-
+ iint->ima_hash->algo = ima_hash_algo;
+ iint->ima_hash->length = hash_digest_size[ima_hash_algo];
+
+ /*
+ * With TPM 2.0 hash agility, TPM chips could support multiple TPM
+ * PCR banks, allowing firmware to configure and enable different
+ * banks. The SHA1 bank is not necessarily enabled.
+ *
+ * Use the same hash algorithm for reading the TPM PCRs as for
+ * calculating the boot aggregate digest. Preference is given to
+ * the configured IMA default hash algorithm. Otherwise, use the
+ * TPM required banks - SHA256 for TPM 2.0, SHA1 for TPM 1.2. If
+ * SHA256 is not available, use the first PCR bank and if that is
+ * not mapped to a crypto ID, set boot_aggregate to zero.
+ */
if (ima_tpm_chip) {
result = ima_calc_boot_aggregate(&hash.hdr);
if (result < 0) {
--
2.17.1
A remount to a read-write filesystem is not safe when there's tree-log
to be replayed. Files that could be opened until now might be affected
by the changes in the tree-log.
A regular mount is needed to replay the log so the filesystem presents
the consistent view with the pending changes included.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/super.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 580ba7db67e5..a2554c651548 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1834,6 +1834,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
if (btrfs_super_log_root(fs_info->super_copy) != 0) {
+ btrfs_warn(f_info,
+ "mount required to replay tree-log, cannot remount read-write");
ret = -EINVAL;
goto restore;
}
--
2.24.0
[ This is a fix specific to 4.4.y and 4.9.y stable trees;
4.14.y and older are not affected ]
The of-node refcount fixes were made in commit 8d1667200850 ("ASoC: qcom:
Fix of-node refcount unbalance in apq8016_sbc_parse_of()"), but not enough
in 4.4.y and 4.9.y. The modification of link->codec_of_node is missing.
This fixes of-node refcount unbalance to link->codec_of_node.
Fixes: 8d1667200850 ("ASoC: qcom: Fix of-node refcount unbalance in apq8016_sbc_parse_of()")
Cc: Patrick Lai <plai(a)codeaurora.org>
Cc: Banajit Goswami <bgoswami(a)codeaurora.org>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu(a)toshiba.co.jp>
---
sound/soc/qcom/apq8016_sbc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c
index 886f2027e671..f2c71bcd06fa 100644
--- a/sound/soc/qcom/apq8016_sbc.c
+++ b/sound/soc/qcom/apq8016_sbc.c
@@ -112,7 +112,8 @@ static struct apq8016_sbc_data *apq8016_sbc_parse_of(struct snd_soc_card *card)
link->codec_of_node = of_parse_phandle(codec, "sound-dai", 0);
if (!link->codec_of_node) {
dev_err(card->dev, "error getting codec phandle\n");
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto error;
}
ret = snd_soc_of_get_dai_name(cpu, &link->cpu_dai_name);
--
2.23.0
From: Michael Ellerman <mpe(a)ellerman.id.au>
There's an OF helper called of_dma_is_coherent(), which checks if a
device has a "dma-coherent" property to see if the device is coherent
for DMA.
But on some platforms devices are coherent by default, and on some
platforms it's not possible to update existing device trees to add the
"dma-coherent" property.
So add a Kconfig symbol to allow arch code to tell
of_dma_is_coherent() that devices are coherent by default, regardless
of the presence of the property.
Select that symbol on powerpc when NOT_COHERENT_CACHE is not set, ie.
when the system has a coherent cache.
Fixes: 92ea637edea3 ("of: introduce of_dma_is_coherent() helper")
Cc: stable(a)vger.kernel.org # v3.16+
Reported-by: Christian Zigotzky <chzigotzky(a)xenosoft.de>
Tested-by: Christian Zigotzky <chzigotzky(a)xenosoft.de>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Signed-off-by: Rob Herring <robh(a)kernel.org>
---
arch/powerpc/Kconfig | 1 +
drivers/of/Kconfig | 4 ++++
drivers/of/address.c | 6 +++++-
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2a70433..fd05ff4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -237,6 +237,7 @@ config PPC
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_SG_DMA_LENGTH
select OF
+ select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select OF_EARLY_FLATTREE
select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 37c2ccb..d916186 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -103,4 +103,8 @@ config OF_OVERLAY
config OF_NUMA
bool
+config OF_DMA_DEFAULT_COHERENT
+ # arches should select this if DMA is coherent by default for OF devices
+ bool
+
endif # OF
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 978427a..8f74c46 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -998,12 +998,16 @@ EXPORT_SYMBOL_GPL(of_dma_get_range);
* @np: device node
*
* It returns true if "dma-coherent" property was found
- * for this device in DT.
+ * for this device in the DT, or if DMA is coherent by
+ * default for OF devices on the current platform.
*/
bool of_dma_is_coherent(struct device_node *np)
{
struct device_node *node = of_node_get(np);
+ if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT))
+ return true;
+
while (node) {
if (of_property_read_bool(node, "dma-coherent")) {
of_node_put(node);
--
2.7.4
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: bd0c6624a110 - Linux 5.4.18-rc4
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: OK
Tests: FAILED
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/421403
One or more kernel tests failed:
ppc64le:
❌ Boot test
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking MACsec: sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking sctp-auth: sockopts test
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ audit: audit testsuite test
⚡⚡⚡ httpd: mod_ssl smoke sanity
⚡⚡⚡ tuned: tune-processes-through-perf
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ LTP: openposix test suite
🚧 ⚡⚡⚡ Networking vnic: ipvlan/basic
🚧 ⚡⚡⚡ iotop: sanity
🚧 ⚡⚡⚡ Usex - version 1.9-29
🚧 ⚡⚡⚡ storage: dm/common
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking MACsec: sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking sctp-auth: sockopts test
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ audit: audit testsuite test
⚡⚡⚡ httpd: mod_ssl smoke sanity
⚡⚡⚡ tuned: tune-processes-through-perf
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ LTP: openposix test suite
🚧 ⚡⚡⚡ Networking vnic: ipvlan/basic
🚧 ⚡⚡⚡ iotop: sanity
🚧 ⚡⚡⚡ Usex - version 1.9-29
🚧 ⚡⚡⚡ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
❌ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking MACsec: sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking sctp-auth: sockopts test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ audit: audit testsuite test
⚡⚡⚡ httpd: mod_ssl smoke sanity
⚡⚡⚡ tuned: tune-processes-through-perf
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ LTP: openposix test suite
🚧 ⚡⚡⚡ Networking vnic: ipvlan/basic
🚧 ⚡⚡⚡ iotop: sanity
🚧 ⚡⚡⚡ Usex - version 1.9-29
🚧 ⚡⚡⚡ storage: dm/common
x86_64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 4:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
This is the start of the stable review cycle for the 4.4.213 release.
There are 53 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 05 Feb 2020 16:17:59 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.213-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.213-rc1
Praveen Chaudhary <praveen5582(a)gmail.com>
net: Fix skb->csum update in inet_proto_csum_replace16().
Vasily Averin <vvs(a)virtuozzo.com>
l2t_seq_next should increase position index
Vasily Averin <vvs(a)virtuozzo.com>
seq_tab_next() should increase position index
Finn Thain <fthain(a)telegraphics.com.au>
net/sonic: Quiesce SONIC before re-initializing descriptor memory
Finn Thain <fthain(a)telegraphics.com.au>
net/sonic: Fix receive buffer handling
Finn Thain <fthain(a)telegraphics.com.au>
net/sonic: Use MMIO accessors
Finn Thain <fthain(a)telegraphics.com.au>
net/sonic: Add mutual exclusion for accessing shared state
Madalin Bucur <madalin.bucur(a)oss.nxp.com>
net/fsl: treat fsl,erratum-a011043
Manish Chopra <manishc(a)marvell.com>
qlcnic: Fix CPU soft lockup while collecting firmware dump
Hayes Wang <hayeswang(a)realtek.com>
r8152: get default setting of WOL before initializing
Michael Ellerman <mpe(a)ellerman.id.au>
airo: Add missing CAP_NET_ADMIN check in AIROOLDIOCTL/SIOCDEVPRIVATE
Michael Ellerman <mpe(a)ellerman.id.au>
airo: Fix possible info leak in AIROOLDIOCTL/SIOCDEVPRIVATE
Vladimir Murzin <vladimir.murzin(a)arm.com>
ARM: 8955/1: virt: Relax arch timer version check during early boot
Hannes Reinecke <hare(a)suse.de>
scsi: fnic: do not queue commands during fwreset
Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
vti[6]: fix packet tx through bpf_redirect()
Johan Hovold <johan(a)kernel.org>
Input: aiptek - use descriptors of current altsetting
Arnd Bergmann <arnd(a)arndb.de>
wireless: wext: avoid gcc -O3 warning
Cambda Zhu <cambda(a)linux.alibaba.com>
ixgbe: Fix calculation of queue with VFs and flow director on interface flap
Radoslaw Tyl <radoslawx.tyl(a)intel.com>
ixgbevf: Remove limit of 10 entries for unicast filter list
Lubomir Rintel <lkundrak(a)v3.sk>
clk: mmp2: Fix the order of timer mux parents
Lee Jones <lee.jones(a)linaro.org>
media: si470x-i2c: Move free() past last use of 'radio'
Bin Liu <b-liu(a)ti.com>
usb: dwc3: turn off VBUS when leaving host mode
Zhenzhong Duan <zhenzhong.duan(a)gmail.com>
ttyprintk: fix a potential deadlock in interrupt context issue
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: dvb-usb/dvb-usb-urb.c: initialize actlen to 0
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: gspca: zero usb_buf
Sean Young <sean(a)mess.org>
media: digitv: don't continue if remote control state can't be read
Jan Kara <jack(a)suse.cz>
reiserfs: Fix memory leak of journal device string
Dan Carpenter <dan.carpenter(a)oracle.com>
mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
Dirk Behme <dirk.behme(a)de.bosch.com>
arm64: kbuild: remove compressed images on 'make ARCH=arm64 (dist)clean'
Herbert Xu <herbert(a)gondor.apana.org.au>
crypto: pcrypt - Fix user-after-free on module unload
Al Viro <viro(a)zeniv.linux.org.uk>
vfs: fix do_last() regression
Herbert Xu <herbert(a)gondor.apana.org.au>
crypto: af_alg - Use bh_lock_sock in sk_destruct
Eric Dumazet <edumazet(a)google.com>
net_sched: ematch: reject invalid TCF_EM_SIMPLE
Laura Abbott <labbott(a)fedoraproject.org>
usb-storage: Disable UAS on JMicron SATA enclosure
Arnd Bergmann <arnd(a)arndb.de>
atm: eni: fix uninitialized variable warning
Krzysztof Kozlowski <krzk(a)kernel.org>
net: wan: sdla: Fix cast from pointer to integer of different size
Fenghua Yu <fenghua.yu(a)intel.com>
drivers/net/b44: Change to non-atomic bit operations on pwol_mask
Andreas Kemnade <andreas(a)kemnade.info>
watchdog: rn5t618_wdt: fix module aliases
Johan Hovold <johan(a)kernel.org>
zd1211rw: fix storage endpoint lookup
Johan Hovold <johan(a)kernel.org>
rtl8xxxu: fix interface sanity check
Johan Hovold <johan(a)kernel.org>
brcmfmac: fix interface sanity check
Johan Hovold <johan(a)kernel.org>
ath9k: fix storage endpoint lookup
Malcolm Priestley <tvboxspy(a)gmail.com>
staging: vt6656: Fix false Tx excessive retries reporting.
Malcolm Priestley <tvboxspy(a)gmail.com>
staging: vt6656: use NULLFUCTION stack on mac80211
Malcolm Priestley <tvboxspy(a)gmail.com>
staging: vt6656: correct packet types for CTS protect, mode.
Colin Ian King <colin.king(a)canonical.com>
staging: wlan-ng: ensure error return is actually returned
Andrey Shvetsov <andrey.shvetsov(a)k2l.de>
staging: most: net: fix buffer overflow
Johan Hovold <johan(a)kernel.org>
USB: serial: ir-usb: fix IrLAP framing
Johan Hovold <johan(a)kernel.org>
USB: serial: ir-usb: fix link-speed handling
Johan Hovold <johan(a)kernel.org>
USB: serial: ir-usb: add missing endpoint sanity check
Johan Hovold <johan(a)kernel.org>
rsi_91x_usb: fix interface sanity check
Johan Hovold <johan(a)kernel.org>
orinoco_usb: fix interface sanity check
Takashi Iwai <tiwai(a)suse.de>
ALSA: pcm: Add missing copy ops check before clearing buffer
-------------
Diffstat:
Makefile | 4 +-
arch/arm/kernel/hyp-stub.S | 7 +-
arch/arm64/boot/Makefile | 2 +-
crypto/af_alg.c | 6 +-
crypto/pcrypt.c | 3 +-
drivers/atm/eni.c | 4 +-
drivers/char/ttyprintk.c | 15 ++-
drivers/clk/mmp/clk-of-mmp2.c | 2 +-
drivers/input/tablet/aiptek.c | 2 +-
drivers/media/radio/si470x/radio-si470x-i2c.c | 2 +-
drivers/media/usb/dvb-usb/digitv.c | 10 +-
drivers/media/usb/dvb-usb/dvb-usb-urb.c | 2 +-
drivers/media/usb/gspca/gspca.c | 2 +-
drivers/net/ethernet/broadcom/b44.c | 9 +-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 3 +-
drivers/net/ethernet/chelsio/cxgb4/l2t.c | 3 +-
drivers/net/ethernet/freescale/xgmac_mdio.c | 7 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 37 ++++--
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 5 -
drivers/net/ethernet/natsemi/sonic.c | 109 ++++++++++++++---
drivers/net/ethernet/natsemi/sonic.h | 25 ++--
.../net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 1 +
.../net/ethernet/qlogic/qlcnic/qlcnic_minidump.c | 2 +
drivers/net/usb/r8152.c | 9 +-
drivers/net/wan/sdla.c | 2 +-
drivers/net/wireless/airo.c | 20 ++-
drivers/net/wireless/ath/ath9k/hif_usb.c | 2 +-
drivers/net/wireless/brcm80211/brcmfmac/usb.c | 4 +-
drivers/net/wireless/orinoco/orinoco_usb.c | 4 +-
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.c | 2 +-
drivers/net/wireless/rsi/rsi_91x_usb.c | 2 +-
drivers/net/wireless/zd1211rw/zd_usb.c | 2 +-
drivers/scsi/fnic/fnic_scsi.c | 3 +
drivers/staging/most/aim-network/networking.c | 10 ++
drivers/staging/vt6656/device.h | 2 +
drivers/staging/vt6656/int.c | 6 +-
drivers/staging/vt6656/main_usb.c | 1 +
drivers/staging/vt6656/rxtx.c | 26 ++--
drivers/staging/wlan-ng/prism2mgmt.c | 2 +-
drivers/usb/dwc3/core.c | 3 +
drivers/usb/serial/ir-usb.c | 136 ++++++++++++++++-----
drivers/usb/storage/unusual_uas.h | 7 +-
drivers/watchdog/rn5t618_wdt.c | 1 +
fs/namei.c | 4 +-
fs/reiserfs/super.c | 2 +
include/linux/usb/irda.h | 13 +-
mm/mempolicy.c | 6 +-
net/core/utils.c | 20 ++-
net/ipv4/ip_vti.c | 13 +-
net/ipv6/ip6_vti.c | 13 +-
net/sched/ematch.c | 3 +
net/wireless/wext-core.c | 3 +-
sound/core/pcm_native.c | 2 +-
53 files changed, 418 insertions(+), 167 deletions(-)
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
As function_graph tracer can run when RCU is not "watching", it can not be
protected by synchronize_rcu() it requires running a task on each CPU before
it can be freed. Calling schedule_on_each_cpu(ftrace_sync) needs to be used.
Link: https://lore.kernel.org/r/20200205131110.GT2935@paulmck-ThinkPad-P72
Cc: stable(a)vger.kernel.org
Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables")
Reported-by: "Paul E. McKenney" <paulmck(a)kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 11 +++++++++--
kernel/trace/trace.h | 2 ++
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 481ede3eac13..3f7ee102868a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5867,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file)
mutex_unlock(&graph_lock);
- /* Wait till all users are no longer using the old hash */
- synchronize_rcu();
+ /*
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ schedule_on_each_cpu(ftrace_sync);
free_ftrace_hash(old_hash);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8c52f5de9384..3c75d29bd861 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -979,6 +979,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible());
@@ -1031,6 +1032,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
* Have to open code "rcu_dereference_sched()" because the
* function graph tracer can be called when RCU is not
* "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
*/
notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
!preemptible());
--
2.24.1
chip->allocated_banks, an array of tpm_bank_info structures, contains the
list of TPM algorithm IDs of allocated PCR banks. It also contains the
corresponding ID of the crypto subsystem, so that users of the TPM driver
can calculate a digest for a PCR extend operation.
However, if there is no mapping between TPM algorithm ID and crypto ID, the
crypto_id field of tpm_bank_info remains set to zero (the array is
allocated and initialized with kcalloc() in tpm2_get_pcr_allocation()).
Zero should not be used as value for unknown mappings, as it is a valid
crypto ID (HASH_ALGO_MD4).
Thus, initialize crypto_id to HASH_ALGO__LAST.
Fixes: 879b589210a9 ("tpm: retrieve digest size of unknown algorithms with PCR read")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Petr Vorel <pvorel(a)suse.cz>
Cc: stable(a)vger.kernel.org
---
drivers/char/tpm/tpm2-cmd.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 13696deceae8..760329598b99 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -525,6 +525,8 @@ static int tpm2_init_bank_info(struct tpm_chip *chip, u32 bank_index)
return 0;
}
+ bank->crypto_id = HASH_ALGO__LAST;
+
return tpm2_pcr_read(chip, 0, &digest, &bank->digest_size);
}
--
2.17.1
Sasha Levin <sashal(a)kernel.org> writes:
> Hi,
>
> [This is an automated email]
>
>
> NOTE: The patch will not be queued to stable trees until it is upstream.
>
> How should we proceed with this patch?
Use V2 :)
> -----Original Message-----
> From: Sasha Levin [mailto:sashal@kernel.org]
> Sent: Wednesday, February 5, 2020 3:45 PM
> To: Sasha Levin <sashal(a)kernel.org>; Roberto Sassu
> <roberto.sassu(a)huawei.com>; zohar(a)linux.ibm.com;
> James.Bottomley(a)HansenPartnership.com
> Cc: linux-integrity(a)vger.kernel.org; stable(a)vger.kernel.org;
> stable(a)vger.kernel.org
> Subject: Re: [PATCH v2 2/8] ima: Switch to ima_hash_algo for boot
> aggregate
>
> Hi,
>
> [This is an automated email]
>
> This commit has been processed because it contains a -stable tag.
> The stable tag indicates that it's relevant for the following trees: all
>
> The bot has tested the following trees: v5.5.1, v5.4.17, v4.19.101, v4.14.169,
> v4.9.212, v4.4.212.
>
> v5.5.1: Build OK!
> v5.4.17: Build OK!
> v4.19.101: Failed to apply! Possible dependencies:
> 100b16a6f290 ("tpm: sort objects in the Makefile")
> 1ad6640cd614 ("tpm: move tpm1_pcr_extend to tpm1-cmd.c")
> 70a3199a7101 ("tpm: factor out tpm_get_timeouts()")
> 879b589210a9 ("tpm: retrieve digest size of unknown algorithms with PCR
> read")
Hi Sasha
this patch is necessary. However, backporting it won't be that easy
as it was part of a patch set. Before this patch, users of the TPM driver
could only read the SHA1 PCR bank. The IMA patch needs to read also
other PCR banks.
> NOTE: The patch will not be queued to stable trees until it is upstream.
>
> How should we proceed with this patch?
This question should be for Jarkko (added in CC), as some patches for the
TPM driver must be backported to apply the IMA patch.
Roberto
HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Li Peng, Li Jian, Shi Yanli
According to the "Intel 64 and IA-32 Architectures Software Developer’s
Manual Volume 4: Model-Specific Registers" on Cherry Trail (Airmont)
devices the 4 lowest bits of the MSR_FSB_FREQ mask indicate the bus freq
unlike on e.g. Bay Trail where only the lowest 3 bits are used.
This is also the reason why MAX_NUM_FREQS is defined as 9, since
Cherry Trail SoCs have 9 possible frequencies, so we need to mask
the lo value from the MSR with 0x0f, not with 0x07 otherwise the
9th frequency will get interpreted as the 1st.
This commits bumps MAX_NUM_FREQS to 16 to avoid any possibility of
addressing the array out of bounds and makes the mask part of
the cpufreq struct so that we can set it per model.
While at it also log an error when the index points to an uninitialized
part of the freqs lookup-table.
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
arch/x86/kernel/tsc_msr.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 5fa41ac3feb1..95030895fffa 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -15,7 +15,7 @@
#include <asm/param.h>
#include <asm/tsc.h>
-#define MAX_NUM_FREQS 9
+#define MAX_NUM_FREQS 16 /* 4 bits to select the frequency */
/*
* If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
@@ -27,6 +27,7 @@
struct freq_desc {
bool use_msr_plat;
u32 freqs[MAX_NUM_FREQS];
+ u32 mask;
};
/*
@@ -37,37 +38,44 @@ struct freq_desc {
static const struct freq_desc freq_desc_pnw = {
.use_msr_plat = false,
.freqs = { 0, 0, 0, 0, 0, 99840, 0, 83200 },
+ .mask = 0x07,
};
static const struct freq_desc freq_desc_clv = {
.use_msr_plat = false,
.freqs = { 0, 133200, 0, 0, 0, 99840, 0, 83200 },
+ .mask = 0x07,
};
static const struct freq_desc freq_desc_byt = {
.use_msr_plat = true,
.freqs = { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 },
+ .mask = 0x07,
};
static const struct freq_desc freq_desc_cht = {
.use_msr_plat = true,
.freqs = { 83300, 100000, 133300, 116700, 80000, 93300, 90000,
88900, 87500 },
+ .mask = 0x0f,
};
static const struct freq_desc freq_desc_tng = {
.use_msr_plat = true,
.freqs = { 0, 100000, 133300, 0, 0, 0, 0, 0 },
+ .mask = 0x07,
};
static const struct freq_desc freq_desc_ann = {
.use_msr_plat = true,
.freqs = { 83300, 100000, 133300, 100000, 0, 0, 0, 0 },
+ .mask = 0x0f,
};
static const struct freq_desc freq_desc_lgm = {
.use_msr_plat = true,
.freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
+ .mask = 0x0f,
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
@@ -93,6 +101,7 @@ unsigned long cpu_khz_from_msr(void)
const struct freq_desc *freq_desc;
const struct x86_cpu_id *id;
unsigned long res;
+ int index;
id = x86_match_cpu(tsc_msr_cpu_ids);
if (!id)
@@ -109,13 +118,17 @@ unsigned long cpu_khz_from_msr(void)
/* Get FSB FREQ ID */
rdmsr(MSR_FSB_FREQ, lo, hi);
+ index = lo & freq_desc->mask;
/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
- freq = freq_desc->freqs[lo & 0x7];
+ freq = freq_desc->freqs[index];
/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
res = freq * ratio;
+ if (freq == 0)
+ pr_err("Error MSR_FSB_FREQ index %d is unknown\n", index);
+
#ifdef CONFIG_X86_LOCAL_APIC
lapic_timer_period = (freq * 1000) / HZ;
#endif
--
2.24.1
Hi,
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: 4.4+
The bot has tested the following trees: v5.5.1, v5.4.17, v4.19.101, v4.14.169, v4.9.212, v4.4.212.
v5.5.1: Build OK!
v5.4.17: Build OK!
v4.19.101: Build OK!
v4.14.169: Build OK!
v4.9.212: Build failed! Errors:
fs/btrfs/extent_map.c:238:6: error: implicit declaration of function ‘refcount_read’ [-Werror=implicit-function-declaration]
v4.4.212: Build failed! Errors:
fs/btrfs/extent_map.c:238:6: error: implicit declaration of function ‘refcount_read’ [-Werror=implicit-function-declaration]
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks,
Sasha
Congratulations,
Your email was randomly selected for the 2020 first quarter reimbursement via certified ATM CARD. Please reach Mrs. Sarah Buchiri with your code:U.N.D.C/2020/10/0109 for more information.
Contact Name: Mrs. Sarah Buchiri
Email: sarah.buchiri(a)gmail.com
Robert Andrew Piper
Assistant Secretary-General for Development Coordination
The driver was checking the number of endpoints of the first alternate
setting instead of the current one, something which could be used by a
malicious device (or USB descriptor fuzzer) to trigger a NULL-pointer
dereference.
Fixes: 1afca2b66aac ("Input: add Pegasus Notetaker tablet driver")
Cc: stable <stable(a)vger.kernel.org> # 4.8
Cc: Martin Kepplinger <martink(a)posteo.de>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/input/tablet/pegasus_notetaker.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/input/tablet/pegasus_notetaker.c b/drivers/input/tablet/pegasus_notetaker.c
index a1f3a0cb197e..38f087404f7a 100644
--- a/drivers/input/tablet/pegasus_notetaker.c
+++ b/drivers/input/tablet/pegasus_notetaker.c
@@ -275,7 +275,7 @@ static int pegasus_probe(struct usb_interface *intf,
return -ENODEV;
/* Sanity check that the device has an endpoint */
- if (intf->altsetting[0].desc.bNumEndpoints < 1) {
+ if (intf->cur_altsetting->desc.bNumEndpoints < 1) {
dev_err(&intf->dev, "Invalid number of endpoints\n");
return -EINVAL;
}
--
2.24.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/sashal/linux-stable.git
Commit: 0e9e3bd889e5 - iwlwifi: don't throw error when trying to remove IGTK
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: FAILED
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/420812
We attempted to compile the kernel for multiple architectures, but the compile
failed on one or more architectures:
ppc64le: FAILED (see build-ppc64le.log.xz attachment)
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j6 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j6 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j6 INSTALL_MOD_STRIP=1 targz-pkg
I'm announcing the release of the 5.5.2 kernel.
All users of the 5.5 kernel series must upgrade.
The updated 5.5.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.5.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/ABI/testing/sysfs-class-devfreq | 7 ++
Makefile | 2
arch/arm64/boot/Makefile | 2
arch/powerpc/kvm/book3s_pr.c | 1
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 32 +++++++-----
drivers/char/ttyprintk.c | 15 +++--
drivers/devfreq/devfreq.c | 9 +++
drivers/media/usb/dvb-usb/af9005.c | 2
drivers/media/usb/dvb-usb/digitv.c | 10 ++-
drivers/media/usb/dvb-usb/dvb-usb-urb.c | 2
drivers/media/usb/dvb-usb/vp7045.c | 21 +++++---
drivers/media/usb/gspca/gspca.c | 2
fs/btrfs/super.c | 10 +++
fs/cifs/smb2pdu.c | 2
fs/gfs2/lops.c | 68 ++++++++++++++++----------
fs/namei.c | 4 -
fs/reiserfs/super.c | 2
kernel/cgroup/cgroup.c | 11 ++--
lib/test_bitmap.c | 9 +--
mm/mempolicy.c | 6 +-
net/bluetooth/hci_sock.c | 3 +
security/tomoyo/common.c | 11 +---
tools/include/linux/string.h | 8 +++
tools/lib/string.c | 7 ++
tools/perf/builtin-c2c.c | 10 ++-
25 files changed, 172 insertions(+), 84 deletions(-)
Al Viro (1):
vfs: fix do_last() regression
Andreas Gruenbacher (1):
gfs2: Another gfs2_find_jhead fix
Andres Freund (1):
perf c2c: Fix return type for histogram sorting comparision functions
Andy Shevchenko (1):
lib/test_bitmap: correct test data offsets for 32-bit
Chanwoo Choi (1):
PM / devfreq: Add new name attribute for sysfs
Dan Carpenter (2):
mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
Bluetooth: Fix race condition in hci_release_sock()
David Michael (1):
KVM: PPC: Book3S PR: Fix -Werror=return-type build failure
Dirk Behme (1):
arm64: kbuild: remove compressed images on 'make ARCH=arm64 (dist)clean'
Greg Kroah-Hartman (1):
Linux 5.5.2
Hans Verkuil (2):
media: gspca: zero usb_buf
media: dvb-usb/dvb-usb-urb.c: initialize actlen to 0
Jan Kara (1):
reiserfs: Fix memory leak of journal device string
Josef Bacik (1):
btrfs: do not zero f_bavail if we have available space
Michal Koutný (1):
cgroup: Prevent double killing of css when enabling threaded cgroup
Ronnie Sahlberg (1):
cifs: fix soft mounts hanging in the reconnect code
Sean Young (3):
media: digitv: don't continue if remote control state can't be read
media: af9005: uninitialized variable printked
media: vp7045: do not read uninitialized values if usb transfer fails
Tetsuo Handa (1):
tomoyo: Use atomic_t for statistics counter
Vitaly Chikunov (1):
tools lib: Fix builds when glibc contains strlcpy()
Xiaochen Shen (3):
x86/resctrl: Fix a deadlock due to inaccurate reference
x86/resctrl: Fix use-after-free when deleting resource groups
x86/resctrl: Fix use-after-free due to inaccurate refcount of rdtgroup
Zhenzhong Duan (1):
ttyprintk: fix a potential deadlock in interrupt context issue
The driver currently creates a broken topology,
with a source-to-source link and a sink-to-sink
link instead of two source-to-sink links.
Reported-by: Nicolas Dufresne <nicolas(a)ndufresne.ca>
Cc: <stable(a)vger.kernel.org> # for v5.3 and up
Signed-off-by: Ezequiel Garcia <ezequiel(a)collabora.com>
---
drivers/staging/media/hantro/hantro_drv.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 448493a08805..840b96bee082 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -556,13 +556,13 @@ static int hantro_attach_func(struct hantro_dev *vpu,
goto err_rel_entity1;
/* Connect the three entities */
- ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 1,
+ ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 0,
MEDIA_LNK_FL_IMMUTABLE |
MEDIA_LNK_FL_ENABLED);
if (ret)
goto err_rel_entity2;
- ret = media_create_pad_link(&func->proc, 0, &func->sink, 0,
+ ret = media_create_pad_link(&func->proc, 1, &func->sink, 0,
MEDIA_LNK_FL_IMMUTABLE |
MEDIA_LNK_FL_ENABLED);
if (ret)
--
2.25.0
The patch titled
Subject: ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
has been removed from the -mm tree. Its filename was
arm-dma-api-fix-max_pfn-off-by-one-error-in-__dma_supported.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Chen-Yu Tsai <wens(a)csie.org>
Subject: ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
max_pfn, as set in arch/arm/mm/init.c:
static void __init find_limits(unsigned long *min,
unsigned long *max_low,
unsigned long *max_high)
{
*max_low = PFN_DOWN(memblock_get_current_limit());
*min = PFN_UP(memblock_start_of_DRAM());
*max_high = PFN_DOWN(memblock_end_of_DRAM());
}
with memblock_end_of_DRAM() pointing to the next byte after DRAM. As
such, max_pfn points to the PFN after the end of DRAM.
Thus when using max_pfn to check DMA masks, we should subtract one when
checking DMA ranges against it.
Commit 8bf1268f48ad ("ARM: dma-api: fix off-by-one error in
__dma_supported()") fixed the same issue, but missed this spot.
This issue was found while working on the sun4i-csi v4l2 driver on the
Allwinner R40 SoC. On Allwinner SoCs, DRAM is offset at 0x40000000, and
we are starting to use of_dma_configure() with the "dma-ranges" property
in the device tree to have the DMA API handle the offset.
In this particular instance, dma-ranges was set to the same range as the
actual available (2 GiB) DRAM. The following error appeared when the
driver attempted to allocate a buffer:
sun4i-csi 1c09000.csi: Coherent DMA mask 0x7fffffff (pfn 0x40000-0xc0000)
covers a smaller range of system memory than the DMA zone pfn 0x0-0xc0001
sun4i-csi 1c09000.csi: dma_alloc_coherent of size 307200 failed
Fixing the off-by-one error makes things work.
Link: http://lkml.kernel.org/r/20191224030239.5656-1-wens@kernel.org
Fixes: 11a5aa32562e ("ARM: dma-mapping: check DMA mask against available memory")
Fixes: 9f28cde0bc64 ("ARM: another fix for the DMA mapping checks")
Fixes: ab746573c405 ("ARM: dma-mapping: allow larger DMA mask than supported")
Signed-off-by: Chen-Yu Tsai <wens(a)csie.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Cc: Russell King <linux(a)armlinux.org.uk>
Cc: Robin Murphy <robin.murphy(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm/mm/dma-mapping.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm/mm/dma-mapping.c~arm-dma-api-fix-max_pfn-off-by-one-error-in-__dma_supported
+++ a/arch/arm/mm/dma-mapping.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops);
static int __dma_supported(struct device *dev, u64 mask, bool warn)
{
- unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+ unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit);
/*
* Translate the device's DMA mask to a PFN limit. This
_
Patches currently in -mm which might be from wens(a)csie.org are
The patch titled
Subject: mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
has been removed from the -mm tree. Its filename was
mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Peter Zijlstra <peterz(a)infradead.org>
Subject: mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
Architectures for which we have hardware walkers of Linux page table
should flush TLB on mmu gather batch allocation failures and batch flush.
Some architectures like POWER supports multiple translation modes (hash
and radix) and in the case of POWER only radix translation mode needs the
above TLBI. This is because for hash translation mode kernel wants to
avoid this extra flush since there are no hardware walkers of linux page
table. With radix translation, the hardware also walks linux page table
and with that, kernel needs to make sure to TLB invalidate page walk cache
before page table pages are freed.
More details in commit d86564a2f085 ("mm/tlb, x86/mm: Support invalidating
TLB caches for RCU_TABLE_FREE")
The changes to sparc are to make sure we keep the old behavior since we
are now removing HAVE_RCU_TABLE_NO_INVALIDATE. The default value for
tlb_needs_table_invalidate is to always force an invalidate and sparc can
avoid the table invalidate. Hence we define tlb_needs_table_invalidate to
false for sparc architecture.
Link: http://lkml.kernel.org/r/20200116064531.483522-3-aneesh.kumar@linux.ibm.com
Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Acked-by: Michael Ellerman <mpe(a)ellerman.id.au> [powerpc]
Cc: <stable(a)vger.kernel.org> [4.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/Kconfig | 3 ---
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/tlb.h | 11 +++++++++++
arch/sparc/Kconfig | 1 -
arch/sparc/include/asm/tlb_64.h | 9 +++++++++
include/asm-generic/tlb.h | 22 +++++++++++++++-------
mm/mmu_gather.c | 16 ++++++++--------
7 files changed, 43 insertions(+), 20 deletions(-)
--- a/arch/Kconfig~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/Kconfig
@@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
config HAVE_RCU_TABLE_FREE
bool
-config HAVE_RCU_TABLE_NO_INVALIDATE
- bool
-
config HAVE_MMU_GATHER_PAGE_SIZE
bool
--- a/arch/powerpc/include/asm/tlb.h~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/powerpc/include/asm/tlb.h
@@ -26,6 +26,17 @@
#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate() radix_enabled()
/* Get the generic bits... */
#include <asm-generic/tlb.h>
--- a/arch/powerpc/Kconfig~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/powerpc/Kconfig
@@ -223,7 +223,6 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
--- a/arch/sparc/include/asm/tlb_64.h~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,15 @@ void flush_tlb_pending(void);
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()
+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_needs_table_invalidate() (false)
+#endif
+
#include <asm-generic/tlb.h>
#endif /* _SPARC64_TLB_H */
--- a/arch/sparc/Kconfig~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/sparc/Kconfig
@@ -65,7 +65,6 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
--- a/include/asm-generic/tlb.h~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/include/asm-generic/tlb.h
@@ -137,13 +137,6 @@
* When used, an architecture is expected to provide __tlb_remove_table()
* which does the actual freeing of these pages.
*
- * HAVE_RCU_TABLE_NO_INVALIDATE
- *
- * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
- * freeing the page-table pages. This can be avoided if you use
- * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- * page-tables natively.
- *
* MMU_GATHER_NO_RANGE
*
* Use this if your architecture lacks an efficient flush_tlb_range().
@@ -189,8 +182,23 @@ struct mmu_table_batch {
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
#endif
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
+#endif
+
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+
#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
/*
* If we can't allocate a page to make a big batch of page pointers
--- a/mm/mmu_gather.c~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/mm/mmu_gather.c
@@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_g
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
- /*
- * Invalidate page-table caches used by hardware walkers. Then we still
- * need to RCU-sched wait while freeing the pages because software
- * walkers can still be in-flight.
- */
- tlb_flush_mmu_tlbonly(tlb);
-#endif
+ if (tlb_needs_table_invalidate()) {
+ /*
+ * Invalidate page-table caches used by hardware walkers. Then
+ * we still need to RCU-sched wait while freeing the pages
+ * because software walkers can still be in-flight.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}
static void tlb_remove_table_smp_sync(void *arg)
_
Patches currently in -mm which might be from peterz(a)infradead.org are
The patch titled
Subject: powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case
has been removed from the -mm tree. Its filename was
powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Subject: powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case
Patch series "Fixup page directory freeing", v4.
This is a repost of patch series from Peter with the arch specific changes
except ppc64 dropped. ppc64 changes are added here because we are redoing
the patch series on top of ppc64 changes. This makes it easy to backport
these changes. Only the first 2 patches need to be backported to stable.
The thing is, on anything SMP, freeing page directories should observe the
exact same order as normal page freeing:
1) unhook page/directory
2) TLB invalidate
3) free page/directory
Without this, any concurrent page-table walk could end up with a
Use-after-Free. This is esp. trivial for anything that has software
page-table walkers (HAVE_FAST_GUP / software TLB fill) or the hardware
caches partial page-walks (ie. caches page directories).
Even on UP this might give issues since mmu_gather is preemptible these
days. An interrupt or preempted task accessing user pages might stumble
into the free page if the hardware caches page directories.
This patch series fixes ppc64 and add generic MMU_GATHER changes to
support the conversion of other architectures. I haven't added patches
w.r.t other architecture because they are yet to be acked.
This patch (of 9):
A followup patch is going to make sure we correctly invalidate page walk
cache before we free page table pages. In order to keep things simple
enable RCU_TABLE_FREE even for !SMP so that we don't have to fixup the
!SMP case differently in the followup patch
!SMP case is right now broken for radix translation w.r.t page walk
cache flush. We can get interrupted in between page table free and
that would imply we have page walk cache entries pointing to tables
which got freed already. Michael said "both our platforms that run on
Power9 force SMP on in Kconfig, so the !SMP case is unlikely to be a
problem for anyone in practice, unless they've hacked their kernel to
build it !SMP."
Link: http://lkml.kernel.org/r/20200116064531.483522-2-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/powerpc/Kconfig | 2 +-
arch/powerpc/include/asm/book3s/32/pgalloc.h | 8 --------
arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 --
arch/powerpc/include/asm/nohash/pgalloc.h | 8 --------
arch/powerpc/mm/book3s64/pgtable.c | 7 -------
5 files changed, 1 insertion(+), 26 deletions(-)
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -49,7 +49,6 @@ static inline void pgtable_free(void *ta
#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb,
void *table, int shift)
{
@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(vo
pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_l
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
extern void __tlb_remove_table(void *_table);
-#endif
void pte_frag_destroy(void *pte_frag);
static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
--- a/arch/powerpc/include/asm/nohash/pgalloc.h~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -46,7 +46,6 @@ static inline void pgtable_free(void *ta
#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{
unsigned long pgf = (unsigned long)table;
@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(vo
pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
--- a/arch/powerpc/Kconfig~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/Kconfig
@@ -222,7 +222,7 @@ config PPC
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_FREE
select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
--- a/arch/powerpc/mm/book3s64/pgtable.c~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/mm/book3s64/pgtable.c
@@ -378,7 +378,6 @@ static inline void pgtable_free(void *ta
}
}
-#ifdef CONFIG_SMP
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
{
unsigned long pgf = (unsigned long)table;
@@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table)
return pgtable_free(table, index);
}
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
-{
- return pgtable_free(table, index);
-}
-#endif
#ifdef CONFIG_PROC_FS
atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
_
Patches currently in -mm which might be from aneesh.kumar(a)linux.ibm.com are
The patch titled
Subject: mm/page_alloc.c: fix uninitialized memmaps on a partially populated last section
has been removed from the -mm tree. Its filename was
mm-fix-uninitialized-memmaps-on-a-partially-populated-last-section.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/page_alloc.c: fix uninitialized memmaps on a partially populated last section
Patch series "mm: fix max_pfn not falling on section boundary", v2.
Playing with different memory sizes for a x86-64 guest, I discovered that
some memmaps (highest section if max_mem does not fall on the section
boundary) are marked as being valid and online, but contain garbage. We
have to properly initialize these memmaps.
Looking at /proc/kpageflags and friends, I found some more issues,
partially related to this.
This patch (of 3):
If max_pfn is not aligned to a section boundary, we can easily run into
BUGs. This can e.g., be triggered on x86-64 under QEMU by specifying a
memory size that is not a multiple of 128MB (e.g., 4097MB, but also
4160MB). I was told that on real HW, we can easily have this scenario
(esp., one of the main reasons sub-section hotadd of devmem was added).
The issue is, that we have a valid memmap (pfn_valid()) for the whole
section, and the whole section will be marked "online".
pfn_to_online_page() will succeed, but the memmap contains garbage.
E.g., doing a "./page-types -r -a 0x144001" when QEMU was started with "-m
4160M" - (see tools/vm/page-types.c):
[ 200.476376] BUG: unable to handle page fault for address: fffffffffffffffe
[ 200.477500] #PF: supervisor read access in kernel mode
[ 200.478334] #PF: error_code(0x0000) - not-present page
[ 200.479076] PGD 59614067 P4D 59614067 PUD 59616067 PMD 0
[ 200.479557] Oops: 0000 [#4] SMP NOPTI
[ 200.479875] CPU: 0 PID: 603 Comm: page-types Tainted: G D W 5.5.0-rc1-next-20191209 #93
[ 200.480646] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4
[ 200.481648] RIP: 0010:stable_page_flags+0x4d/0x410
[ 200.482061] Code: f3 ff 41 89 c0 48 b8 00 00 00 00 01 00 00 00 45 84 c0 0f 85 cd 02 00 00 48 8b 53 08 48 8b 2b 48f
[ 200.483644] RSP: 0018:ffffb139401cbe60 EFLAGS: 00010202
[ 200.484091] RAX: fffffffffffffffe RBX: fffffbeec5100040 RCX: 0000000000000000
[ 200.484697] RDX: 0000000000000001 RSI: ffffffff9535c7cd RDI: 0000000000000246
[ 200.485313] RBP: ffffffffffffffff R08: 0000000000000000 R09: 0000000000000000
[ 200.485917] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000144001
[ 200.486523] R13: 00007ffd6ba55f48 R14: 00007ffd6ba55f40 R15: ffffb139401cbf08
[ 200.487130] FS: 00007f68df717580(0000) GS:ffff9ec77fa00000(0000) knlGS:0000000000000000
[ 200.487804] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 200.488295] CR2: fffffffffffffffe CR3: 0000000135d48000 CR4: 00000000000006f0
[ 200.488897] Call Trace:
[ 200.489115] kpageflags_read+0xe9/0x140
[ 200.489447] proc_reg_read+0x3c/0x60
[ 200.489755] vfs_read+0xc2/0x170
[ 200.490037] ksys_pread64+0x65/0xa0
[ 200.490352] do_syscall_64+0x5c/0xa0
[ 200.490665] entry_SYSCALL_64_after_hwframe+0x49/0xbe
But it can be triggered much easier via "cat /proc/kpageflags > /dev/null"
after cold/hot plugging a DIMM to such a system:
[root@localhost ~]# cat /proc/kpageflags > /dev/null
[ 111.517275] BUG: unable to handle page fault for address: fffffffffffffffe
[ 111.517907] #PF: supervisor read access in kernel mode
[ 111.518333] #PF: error_code(0x0000) - not-present page
[ 111.518771] PGD a240e067 P4D a240e067 PUD a2410067 PMD 0
This patch fixes that by at least zero-ing out that memmap (so e.g.,
page_to_pfn() will not crash). Commit 907ec5fca3dc ("mm: zero remaining
unavailable struct pages") tried to fix a similar issue, but forgot to
consider this special case.
After this patch, there are still problems to solve. E.g., not all of
these pages falling into a memory hole will actually get initialized later
and set PageReserved - they are only zeroed out - but at least the
immediate crashes are gone. A follow-up patch will take care of this.
Link: http://lkml.kernel.org/r/20191211163201.17179-2-david@redhat.com
Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Steven Sistare <steven.sistare(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Cc: Bob Picco <bob.picco(a)oracle.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Alexey Dobriyan <adobriyan(a)gmail.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Stephen Rothwell <sfr(a)canb.auug.org.au>
Cc: <stable(a)vger.kernel.org> [4.15+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
--- a/mm/page_alloc.c~mm-fix-uninitialized-memmaps-on-a-partially-populated-last-section
+++ a/mm/page_alloc.c
@@ -6947,7 +6947,8 @@ static u64 zero_pfn_range(unsigned long
* This function also addresses a similar issue where struct pages are left
* uninitialized because the physical address range is not covered by
* memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=.
+ * layout is manually configured via memmap=, or when the highest physical
+ * address (max_pfn) does not end on a section boundary.
*/
void __init zero_resv_unavail(void)
{
@@ -6965,7 +6966,16 @@ void __init zero_resv_unavail(void)
pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start));
next = end;
}
- pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn);
+
+ /*
+ * Early sections always have a fully populated memmap for the whole
+ * section - see pfn_valid(). If the last section has holes at the
+ * end and that section is marked "online", the memmap will be
+ * considered initialized. Make sure that memmap has a well defined
+ * state.
+ */
+ pgcnt += zero_pfn_range(PFN_DOWN(next),
+ round_up(max_pfn, PAGES_PER_SECTION));
/*
* Struct pages that do not have backing memory. This could be because
_
Patches currently in -mm which might be from david(a)redhat.com are
drivers-base-memoryc-cache-memory-blocks-in-xarray-to-accelerate-lookup-fix.patch
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 6213eed0e444 - Linux 5.4.18-rc1
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: OK
Tests: FAILED
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/419091
One or more kernel tests failed:
ppc64le:
❌ LTP
❌ ALSA PCM loopback test
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
❌ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
❌ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 2:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests: ext4
⚡⚡⚡ xfstests: xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Host 4:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 5:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 9028ac4fc837 - Linux 5.4.18-rc3
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: OK
Tests: FAILED
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/419366
One or more kernel tests failed:
ppc64le:
❌ LTP
aarch64:
❌ Boot test
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
❌ Boot test
⚡⚡⚡ xfstests: ext4
⚡⚡⚡ xfstests: xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
❌ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
x86_64:
Host 1:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Host 2:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 3:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 4:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
From: Chen-Yu Tsai <wens(a)csie.org>
Subject: ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
max_pfn, as set in arch/arm/mm/init.c:
static void __init find_limits(unsigned long *min,
unsigned long *max_low,
unsigned long *max_high)
{
*max_low = PFN_DOWN(memblock_get_current_limit());
*min = PFN_UP(memblock_start_of_DRAM());
*max_high = PFN_DOWN(memblock_end_of_DRAM());
}
with memblock_end_of_DRAM() pointing to the next byte after DRAM. As
such, max_pfn points to the PFN after the end of DRAM.
Thus when using max_pfn to check DMA masks, we should subtract one when
checking DMA ranges against it.
Commit 8bf1268f48ad ("ARM: dma-api: fix off-by-one error in
__dma_supported()") fixed the same issue, but missed this spot.
This issue was found while working on the sun4i-csi v4l2 driver on the
Allwinner R40 SoC. On Allwinner SoCs, DRAM is offset at 0x40000000, and
we are starting to use of_dma_configure() with the "dma-ranges" property
in the device tree to have the DMA API handle the offset.
In this particular instance, dma-ranges was set to the same range as the
actual available (2 GiB) DRAM. The following error appeared when the
driver attempted to allocate a buffer:
sun4i-csi 1c09000.csi: Coherent DMA mask 0x7fffffff (pfn 0x40000-0xc0000)
covers a smaller range of system memory than the DMA zone pfn 0x0-0xc0001
sun4i-csi 1c09000.csi: dma_alloc_coherent of size 307200 failed
Fixing the off-by-one error makes things work.
Link: http://lkml.kernel.org/r/20191224030239.5656-1-wens@kernel.org
Fixes: 11a5aa32562e ("ARM: dma-mapping: check DMA mask against available memory")
Fixes: 9f28cde0bc64 ("ARM: another fix for the DMA mapping checks")
Fixes: ab746573c405 ("ARM: dma-mapping: allow larger DMA mask than supported")
Signed-off-by: Chen-Yu Tsai <wens(a)csie.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Cc: Russell King <linux(a)armlinux.org.uk>
Cc: Robin Murphy <robin.murphy(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm/mm/dma-mapping.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm/mm/dma-mapping.c~arm-dma-api-fix-max_pfn-off-by-one-error-in-__dma_supported
+++ a/arch/arm/mm/dma-mapping.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops);
static int __dma_supported(struct device *dev, u64 mask, bool warn)
{
- unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+ unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit);
/*
* Translate the device's DMA mask to a PFN limit. This
_
From: Peter Zijlstra <peterz(a)infradead.org>
Subject: mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
Architectures for which we have hardware walkers of Linux page table
should flush TLB on mmu gather batch allocation failures and batch flush.
Some architectures like POWER supports multiple translation modes (hash
and radix) and in the case of POWER only radix translation mode needs the
above TLBI. This is because for hash translation mode kernel wants to
avoid this extra flush since there are no hardware walkers of linux page
table. With radix translation, the hardware also walks linux page table
and with that, kernel needs to make sure to TLB invalidate page walk cache
before page table pages are freed.
More details in commit d86564a2f085 ("mm/tlb, x86/mm: Support invalidating
TLB caches for RCU_TABLE_FREE")
The changes to sparc are to make sure we keep the old behavior since we
are now removing HAVE_RCU_TABLE_NO_INVALIDATE. The default value for
tlb_needs_table_invalidate is to always force an invalidate and sparc can
avoid the table invalidate. Hence we define tlb_needs_table_invalidate to
false for sparc architecture.
Link: http://lkml.kernel.org/r/20200116064531.483522-3-aneesh.kumar@linux.ibm.com
Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Acked-by: Michael Ellerman <mpe(a)ellerman.id.au> [powerpc]
Cc: <stable(a)vger.kernel.org> [4.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/Kconfig | 3 ---
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/tlb.h | 11 +++++++++++
arch/sparc/Kconfig | 1 -
arch/sparc/include/asm/tlb_64.h | 9 +++++++++
include/asm-generic/tlb.h | 22 +++++++++++++++-------
mm/mmu_gather.c | 16 ++++++++--------
7 files changed, 43 insertions(+), 20 deletions(-)
--- a/arch/Kconfig~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/Kconfig
@@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
config HAVE_RCU_TABLE_FREE
bool
-config HAVE_RCU_TABLE_NO_INVALIDATE
- bool
-
config HAVE_MMU_GATHER_PAGE_SIZE
bool
--- a/arch/powerpc/include/asm/tlb.h~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/powerpc/include/asm/tlb.h
@@ -26,6 +26,17 @@
#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate() radix_enabled()
/* Get the generic bits... */
#include <asm-generic/tlb.h>
--- a/arch/powerpc/Kconfig~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/powerpc/Kconfig
@@ -223,7 +223,6 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
--- a/arch/sparc/include/asm/tlb_64.h~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,15 @@ void flush_tlb_pending(void);
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()
+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_needs_table_invalidate() (false)
+#endif
+
#include <asm-generic/tlb.h>
#endif /* _SPARC64_TLB_H */
--- a/arch/sparc/Kconfig~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/arch/sparc/Kconfig
@@ -65,7 +65,6 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
--- a/include/asm-generic/tlb.h~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/include/asm-generic/tlb.h
@@ -137,13 +137,6 @@
* When used, an architecture is expected to provide __tlb_remove_table()
* which does the actual freeing of these pages.
*
- * HAVE_RCU_TABLE_NO_INVALIDATE
- *
- * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
- * freeing the page-table pages. This can be avoided if you use
- * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- * page-tables natively.
- *
* MMU_GATHER_NO_RANGE
*
* Use this if your architecture lacks an efficient flush_tlb_range().
@@ -189,8 +182,23 @@ struct mmu_table_batch {
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
#endif
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
+#endif
+
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+
#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
/*
* If we can't allocate a page to make a big batch of page pointers
--- a/mm/mmu_gather.c~mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush
+++ a/mm/mmu_gather.c
@@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_g
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
- /*
- * Invalidate page-table caches used by hardware walkers. Then we still
- * need to RCU-sched wait while freeing the pages because software
- * walkers can still be in-flight.
- */
- tlb_flush_mmu_tlbonly(tlb);
-#endif
+ if (tlb_needs_table_invalidate()) {
+ /*
+ * Invalidate page-table caches used by hardware walkers. Then
+ * we still need to RCU-sched wait while freeing the pages
+ * because software walkers can still be in-flight.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}
static void tlb_remove_table_smp_sync(void *arg)
_
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Subject: powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case
Patch series "Fixup page directory freeing", v4.
This is a repost of patch series from Peter with the arch specific changes
except ppc64 dropped. ppc64 changes are added here because we are redoing
the patch series on top of ppc64 changes. This makes it easy to backport
these changes. Only the first 2 patches need to be backported to stable.
The thing is, on anything SMP, freeing page directories should observe the
exact same order as normal page freeing:
1) unhook page/directory
2) TLB invalidate
3) free page/directory
Without this, any concurrent page-table walk could end up with a
Use-after-Free. This is esp. trivial for anything that has software
page-table walkers (HAVE_FAST_GUP / software TLB fill) or the hardware
caches partial page-walks (ie. caches page directories).
Even on UP this might give issues since mmu_gather is preemptible these
days. An interrupt or preempted task accessing user pages might stumble
into the free page if the hardware caches page directories.
This patch series fixes ppc64 and add generic MMU_GATHER changes to
support the conversion of other architectures. I haven't added patches
w.r.t other architecture because they are yet to be acked.
This patch (of 9):
A followup patch is going to make sure we correctly invalidate page walk
cache before we free page table pages. In order to keep things simple
enable RCU_TABLE_FREE even for !SMP so that we don't have to fixup the
!SMP case differently in the followup patch
!SMP case is right now broken for radix translation w.r.t page walk
cache flush. We can get interrupted in between page table free and
that would imply we have page walk cache entries pointing to tables
which got freed already. Michael said "both our platforms that run on
Power9 force SMP on in Kconfig, so the !SMP case is unlikely to be a
problem for anyone in practice, unless they've hacked their kernel to
build it !SMP."
Link: http://lkml.kernel.org/r/20200116064531.483522-2-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/powerpc/Kconfig | 2 +-
arch/powerpc/include/asm/book3s/32/pgalloc.h | 8 --------
arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 --
arch/powerpc/include/asm/nohash/pgalloc.h | 8 --------
arch/powerpc/mm/book3s64/pgtable.c | 7 -------
5 files changed, 1 insertion(+), 26 deletions(-)
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -49,7 +49,6 @@ static inline void pgtable_free(void *ta
#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb,
void *table, int shift)
{
@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(vo
pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_l
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
extern void __tlb_remove_table(void *_table);
-#endif
void pte_frag_destroy(void *pte_frag);
static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
--- a/arch/powerpc/include/asm/nohash/pgalloc.h~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -46,7 +46,6 @@ static inline void pgtable_free(void *ta
#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{
unsigned long pgf = (unsigned long)table;
@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(vo
pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
--- a/arch/powerpc/Kconfig~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/Kconfig
@@ -222,7 +222,7 @@ config PPC
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_FREE
select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
--- a/arch/powerpc/mm/book3s64/pgtable.c~powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case
+++ a/arch/powerpc/mm/book3s64/pgtable.c
@@ -378,7 +378,6 @@ static inline void pgtable_free(void *ta
}
}
-#ifdef CONFIG_SMP
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
{
unsigned long pgf = (unsigned long)table;
@@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table)
return pgtable_free(table, index);
}
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
-{
- return pgtable_free(table, index);
-}
-#endif
#ifdef CONFIG_PROC_FS
atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
_
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/page_alloc.c: fix uninitialized memmaps on a partially populated last section
Patch series "mm: fix max_pfn not falling on section boundary", v2.
Playing with different memory sizes for a x86-64 guest, I discovered that
some memmaps (highest section if max_mem does not fall on the section
boundary) are marked as being valid and online, but contain garbage. We
have to properly initialize these memmaps.
Looking at /proc/kpageflags and friends, I found some more issues,
partially related to this.
This patch (of 3):
If max_pfn is not aligned to a section boundary, we can easily run into
BUGs. This can e.g., be triggered on x86-64 under QEMU by specifying a
memory size that is not a multiple of 128MB (e.g., 4097MB, but also
4160MB). I was told that on real HW, we can easily have this scenario
(esp., one of the main reasons sub-section hotadd of devmem was added).
The issue is, that we have a valid memmap (pfn_valid()) for the whole
section, and the whole section will be marked "online".
pfn_to_online_page() will succeed, but the memmap contains garbage.
E.g., doing a "./page-types -r -a 0x144001" when QEMU was started with "-m
4160M" - (see tools/vm/page-types.c):
[ 200.476376] BUG: unable to handle page fault for address: fffffffffffffffe
[ 200.477500] #PF: supervisor read access in kernel mode
[ 200.478334] #PF: error_code(0x0000) - not-present page
[ 200.479076] PGD 59614067 P4D 59614067 PUD 59616067 PMD 0
[ 200.479557] Oops: 0000 [#4] SMP NOPTI
[ 200.479875] CPU: 0 PID: 603 Comm: page-types Tainted: G D W 5.5.0-rc1-next-20191209 #93
[ 200.480646] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4
[ 200.481648] RIP: 0010:stable_page_flags+0x4d/0x410
[ 200.482061] Code: f3 ff 41 89 c0 48 b8 00 00 00 00 01 00 00 00 45 84 c0 0f 85 cd 02 00 00 48 8b 53 08 48 8b 2b 48f
[ 200.483644] RSP: 0018:ffffb139401cbe60 EFLAGS: 00010202
[ 200.484091] RAX: fffffffffffffffe RBX: fffffbeec5100040 RCX: 0000000000000000
[ 200.484697] RDX: 0000000000000001 RSI: ffffffff9535c7cd RDI: 0000000000000246
[ 200.485313] RBP: ffffffffffffffff R08: 0000000000000000 R09: 0000000000000000
[ 200.485917] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000144001
[ 200.486523] R13: 00007ffd6ba55f48 R14: 00007ffd6ba55f40 R15: ffffb139401cbf08
[ 200.487130] FS: 00007f68df717580(0000) GS:ffff9ec77fa00000(0000) knlGS:0000000000000000
[ 200.487804] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 200.488295] CR2: fffffffffffffffe CR3: 0000000135d48000 CR4: 00000000000006f0
[ 200.488897] Call Trace:
[ 200.489115] kpageflags_read+0xe9/0x140
[ 200.489447] proc_reg_read+0x3c/0x60
[ 200.489755] vfs_read+0xc2/0x170
[ 200.490037] ksys_pread64+0x65/0xa0
[ 200.490352] do_syscall_64+0x5c/0xa0
[ 200.490665] entry_SYSCALL_64_after_hwframe+0x49/0xbe
But it can be triggered much easier via "cat /proc/kpageflags > /dev/null"
after cold/hot plugging a DIMM to such a system:
[root@localhost ~]# cat /proc/kpageflags > /dev/null
[ 111.517275] BUG: unable to handle page fault for address: fffffffffffffffe
[ 111.517907] #PF: supervisor read access in kernel mode
[ 111.518333] #PF: error_code(0x0000) - not-present page
[ 111.518771] PGD a240e067 P4D a240e067 PUD a2410067 PMD 0
This patch fixes that by at least zero-ing out that memmap (so e.g.,
page_to_pfn() will not crash). Commit 907ec5fca3dc ("mm: zero remaining
unavailable struct pages") tried to fix a similar issue, but forgot to
consider this special case.
After this patch, there are still problems to solve. E.g., not all of
these pages falling into a memory hole will actually get initialized later
and set PageReserved - they are only zeroed out - but at least the
immediate crashes are gone. A follow-up patch will take care of this.
Link: http://lkml.kernel.org/r/20191211163201.17179-2-david@redhat.com
Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Steven Sistare <steven.sistare(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Cc: Bob Picco <bob.picco(a)oracle.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Alexey Dobriyan <adobriyan(a)gmail.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Stephen Rothwell <sfr(a)canb.auug.org.au>
Cc: <stable(a)vger.kernel.org> [4.15+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
--- a/mm/page_alloc.c~mm-fix-uninitialized-memmaps-on-a-partially-populated-last-section
+++ a/mm/page_alloc.c
@@ -6947,7 +6947,8 @@ static u64 zero_pfn_range(unsigned long
* This function also addresses a similar issue where struct pages are left
* uninitialized because the physical address range is not covered by
* memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=.
+ * layout is manually configured via memmap=, or when the highest physical
+ * address (max_pfn) does not end on a section boundary.
*/
void __init zero_resv_unavail(void)
{
@@ -6965,7 +6966,16 @@ void __init zero_resv_unavail(void)
pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start));
next = end;
}
- pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn);
+
+ /*
+ * Early sections always have a fully populated memmap for the whole
+ * section - see pfn_valid(). If the last section has holes at the
+ * end and that section is marked "online", the memmap will be
+ * considered initialized. Make sure that memmap has a well defined
+ * state.
+ */
+ pgcnt += zero_pfn_range(PFN_DOWN(next),
+ round_up(max_pfn, PAGES_PER_SECTION));
/*
* Struct pages that do not have backing memory. This could be because
_
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: bd2c938b1aeb - Linux 5.4.18-rc2
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: FAILED
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/419306
We attempted to compile the kernel for multiple architectures, but the compile
failed on one or more architectures:
ppc64le: FAILED (see build-ppc64le.log.xz attachment)
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hi,
I'd like to ask the stable team to add the patch
d55966c4279bfc6a0cf0b32bf13f5df228a1eeb6
btrfs: do not zero f_bavail if we have available space
to 5.4 and 5.5 stable trees as early as possible.
I'm not familiar with your release schedules but I saw the large patch
sets for review and I hope I could squeeze that one in the upcoming
release.
The commit fixes a problem in 'df' that causes false alerts and there
are a lot of users hitting it. The patch itself is a one-liner, but
with a high impact on usability.
Thanks in advance.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 19cb499e8335 - Linux 5.4.18-rc1
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/418718
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 2:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests: ext4
⚡⚡⚡ xfstests: xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 4:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Host 5:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
Intel Software Developer's Manual, volume 3, chapter 9.11.6 says:
"Note that the microcode update must be aligned on a 16-byte
boundary and the size of the microcode update must be 1-KByte
granular"
When early-load Intel microcode is loaded from initramfs,
userspace tool 'iucode_tool' has already 16-byte aligned those
microcode bits in that initramfs image. Image that was created
something like this:
iucode_tool --write-earlyfw=FOO.cpio microcode-files...
However, when early-load Intel microcode is loaded from built-in
firmware BLOB using CONFIG_EXTRA_FIRMWARE= kernel config option,
that 16-byte alignment is not guaranteed.
Fix this by forcing all built-in firmware BLOBs to 16-byte
alignment.
Signed-off-by: Jari Ruusu <jari.ruusu(a)gmail.com>
--- a/drivers/base/firmware_loader/builtin/Makefile
+++ b/drivers/base/firmware_loader/builtin/Makefile
@@ -17,7 +17,7 @@
filechk_fwbin = \
echo "/* Generated by $(src)/Makefile */" ;\
echo " .section .rodata" ;\
- echo " .p2align $(ASM_ALIGN)" ;\
+ echo " .p2align 4" ;\
echo "_fw_$(FWSTR)_bin:" ;\
echo " .incbin \"$(fwdir)/$(FWNAME)\"" ;\
echo "_fw_end:" ;\
--
Jari Ruusu 4096R/8132F189 12D6 4C3A DCDA 0AA4 27BD ACDF F073 3C80 8132 F189
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d55966c4279bfc6a0cf0b32bf13f5df228a1eeb6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Fri, 31 Jan 2020 09:31:05 -0500
Subject: [PATCH] btrfs: do not zero f_bavail if we have available space
There was some logic added a while ago to clear out f_bavail in statfs()
if we did not have enough free metadata space to satisfy our global
reserve. This was incorrect at the time, however didn't really pose a
problem for normal file systems because we would often allocate chunks
if we got this low on free metadata space, and thus wouldn't really hit
this case unless we were actually full.
Fast forward to today and now we are much better about not allocating
metadata chunks all of the time. Couple this with d792b0f19711 ("btrfs:
always reserve our entire size for the global reserve") which now means
we'll easily have a larger global reserve than our free space, we are
now more likely to trip over this while still having plenty of space.
Fix this by skipping this logic if the global rsv's space_info is not
full. space_info->full is 0 unless we've attempted to allocate a chunk
for that space_info and that has failed. If this happens then the space
for the global reserve is definitely sacred and we need to report
b_avail == 0, but before then we can just use our calculated b_avail.
Reported-by: Martin Steigerwald <martin(a)lichtvoll.de>
Fixes: ca8a51b3a979 ("btrfs: statfs: report zero available if metadata are exhausted")
CC: stable(a)vger.kernel.org # 4.5+
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Tested-By: Martin Steigerwald <martin(a)lichtvoll.de>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a906315efd19..0616a5434793 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2135,7 +2135,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
*/
thresh = SZ_4M;
- if (!mixed && total_free_meta - thresh < block_rsv->size)
+ /*
+ * We only want to claim there's no available space if we can no longer
+ * allocate chunks for our metadata profile and our global reserve will
+ * not fit in the free metadata space. If we aren't ->full then we
+ * still can allocate chunks and thus are fine using the currently
+ * calculated f_bavail.
+ */
+ if (!mixed && block_rsv->space_info->full &&
+ total_free_meta - thresh < block_rsv->size)
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
dock") added the USB id for the Acer SW5-012's keyboard dock to the
hid-ite driver to fix the rfkill driver not working.
Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
"Wireless Radio Control" bits which need the special hid-ite driver on the
second USB interface (the mouse interface) and their touchpad only supports
mouse emulation, so using generic hid-input handling for anything but
the "Wireless Radio Control" bits is fine. On these devices we simply bind
to all USB interfaces.
But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
(SW5-012)'s touchpad not only does mouse emulation it also supports
HID-multitouch and all the keys including the "Wireless Radio Control"
bits have been moved to the first USB interface (the keyboard intf).
So we need hid-ite to handle the first (keyboard) USB interface and have
it NOT bind to the second (mouse) USB interface so that that can be
handled by hid-multitouch.c and we get proper multi-touch support.
This commit changes the hid_device_id for the SW5-012 keyboard dock to
only match on hid devices from the HID_GROUP_GENERIC group, this way
hid-ite will not bind the the mouse/multi-touch interface which has
HID_GROUP_MULTITOUCH_WIN_8 as group.
This fixes the regression to mouse-emulation mode introduced by adding
the keyboard dock USB id.
Cc: stable(a)vger.kernel.org
Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
- Extend hid_device_id to also match on the HID_GROUP_GENERIC group,
instead of adding a match callback which peeks at the USB descriptors
---
drivers/hid/hid-ite.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index c436e12feb23..6c55682c5974 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -41,8 +41,9 @@ static const struct hid_device_id ite_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
/* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
- { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
- USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
{ }
};
MODULE_DEVICE_TABLE(hid, ite_devices);
--
2.23.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/sashal/linux-stable.git
Commit: b3e3082be48b - ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: OK
Tests: FAILED
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/417813
One or more kernel tests failed:
ppc64le:
❌ Boot test
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 2:
❌ Boot test
⚡⚡⚡ xfstests: ext4
⚡⚡⚡ xfstests: xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 3:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Host 4:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
On Mon, Feb 03, 2020 at 10:12:16AM -0500, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> btrfs: dev-replace: remove warning for unknown return codes when finished
>
> to the 5.4-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> btrfs-dev-replace-remove-warning-for-unknown-return-.patch
> and it can be found in the queue-5.4 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
> commit 87058e8dca6c3ecb0ae52d1275ee0e775fac06b9
> Author: David Sterba <dsterba(a)suse.com>
> Date: Sat Jan 25 12:35:38 2020 +0100
>
> btrfs: dev-replace: remove warning for unknown return codes when finished
Please remove the commit from stable-queue, the patch makes sense with
1bbb97b8ce7ddf3a56 ("btrfs: scrub: Require mandatory block group RO for
dev-replace") which was a regression fix in 5.5.
A pointer to 'struct si470x_device' is currently used after free:
drivers/media/radio/si470x/radio-si470x-i2c.c:462:25-30: ERROR: reference
preceded by free on line 460
Shift the call to free() down past its final use.
NB: Not sending to Mainline, since the problem does not exist there.
Cc: <stable(a)vger.kernel.org> # v3.18+
Reported-by: kbuild test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)lip6.fr>
Signed-off-by: Lee Jones <lee.jones(a)linaro.org>
---
drivers/media/radio/si470x/radio-si470x-i2c.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c
index ae7540b765e1d..aa12fd2663895 100644
--- a/drivers/media/radio/si470x/radio-si470x-i2c.c
+++ b/drivers/media/radio/si470x/radio-si470x-i2c.c
@@ -483,10 +483,10 @@ static int si470x_i2c_remove(struct i2c_client *client)
free_irq(client->irq, radio);
video_unregister_device(&radio->videodev);
- kfree(radio);
v4l2_ctrl_handler_free(&radio->hdl);
v4l2_device_unregister(&radio->v4l2_dev);
+ kfree(radio);
return 0;
}
--
2.24.0
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 5.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
On Mon, 03 Feb 2020 06:51:22 +0100,
Hillf Danton wrote:
>
>
> Sun, 02 Feb 2020 04:56:10 -0800 (PST)
> > syzbot has found a reproducer for the following crash on:
> >
> > HEAD commit: 2747d5fd Add linux-next specific files for 20200116
This looks like a fairly old head, and missing the fix that is already
included in 5.5 release:
60adcfde92fa40fcb2dbf7cc52f9b096e0cd109a
ALSA: seq: Fix racy access for queue timer in proc read
thanks,
Takashi
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 09ed259fac621634d51cd986aa8d65f035662658 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu(a)ti.com>
Date: Wed, 11 Dec 2019 10:10:03 -0600
Subject: [PATCH] usb: dwc3: turn off VBUS when leaving host mode
VBUS should be turned off when leaving the host mode.
Set GCTL_PRTCAP to device mode in teardown to de-assert DRVVBUS pin to
turn off VBUS power.
Fixes: 5f94adfeed97 ("usb: dwc3: core: refactor mode initialization to its own function")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bin Liu <b-liu(a)ti.com>
Signed-off-by: Felipe Balbi <balbi(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index f561c6c9e8a9..1d85c42b9c67 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1246,6 +1246,9 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc)
/* do nothing */
break;
}
+
+ /* de-assert DRVVBUS for HOST and OTG mode */
+ dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE);
}
static void dwc3_get_properties(struct dwc3 *dwc)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e93cd35101b61e4c79149be2cfc927c4b28dc60c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Thu, 28 Nov 2019 18:22:00 +0100
Subject: [PATCH] rsi: fix use-after-free on failed probe and unbind
Make sure to stop both URBs before returning after failed probe as well
as on disconnect to avoid use-after-free in the completion handler.
Reported-by: syzbot+b563b7f8dbe8223a51e8(a)syzkaller.appspotmail.com
Fixes: a4302bff28e2 ("rsi: add bluetooth rx endpoint")
Fixes: dad0d04fa7ba ("rsi: Add RS9113 wireless driver")
Cc: stable <stable(a)vger.kernel.org> # 3.15
Cc: Siva Rebbagondla <siva.rebbagondla(a)redpinesignals.com>
Cc: Prameela Rani Garnepudi <prameela.j04cs(a)gmail.com>
Cc: Amitkumar Karwar <amit.karwar(a)redpinesignals.com>
Cc: Fariya Fatima <fariyaf(a)gmail.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 53f41fc2cadf..30bed719486e 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -292,6 +292,15 @@ static void rsi_rx_done_handler(struct urb *urb)
dev_kfree_skb(rx_cb->rx_skb);
}
+static void rsi_rx_urb_kill(struct rsi_hw *adapter, u8 ep_num)
+{
+ struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+ struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1];
+ struct urb *urb = rx_cb->rx_urb;
+
+ usb_kill_urb(urb);
+}
+
/**
* rsi_rx_urb_submit() - This function submits the given URB to the USB stack.
* @adapter: Pointer to the adapter structure.
@@ -823,10 +832,13 @@ static int rsi_probe(struct usb_interface *pfunction,
if (adapter->priv->coex_mode > 1) {
status = rsi_rx_urb_submit(adapter, BT_EP);
if (status)
- goto err1;
+ goto err_kill_wlan_urb;
}
return 0;
+
+err_kill_wlan_urb:
+ rsi_rx_urb_kill(adapter, WLAN_EP);
err1:
rsi_deinit_usb_interface(adapter);
err:
@@ -857,6 +869,10 @@ static void rsi_disconnect(struct usb_interface *pfunction)
adapter->priv->bt_adapter = NULL;
}
+ if (adapter->priv->coex_mode > 1)
+ rsi_rx_urb_kill(adapter, BT_EP);
+ rsi_rx_urb_kill(adapter, WLAN_EP);
+
rsi_reset_card(adapter);
rsi_deinit_usb_interface(adapter);
rsi_91x_deinit(adapter);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07bfd9bdf568a38d9440c607b72342036011f727 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 19 Nov 2019 17:41:31 +0800
Subject: [PATCH] crypto: pcrypt - Fix user-after-free on module unload
On module unload of pcrypt we must unregister the crypto algorithms
first and then tear down the padata structure. As otherwise the
crypto algorithms are still alive and can be used while the padata
structure is being freed.
Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto...")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 543792e0ebf0..81bbea7f2ba6 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -362,11 +362,12 @@ static int __init pcrypt_init(void)
static void __exit pcrypt_exit(void)
{
+ crypto_unregister_template(&pcrypt_tmpl);
+
pcrypt_fini_padata(pencrypt);
pcrypt_fini_padata(pdecrypt);
kset_unregister(pcrypt_kset);
- crypto_unregister_template(&pcrypt_tmpl);
}
subsys_initcall(pcrypt_init);
commit b8511ccc75c033f6d54188ea4df7bf1e85778740 upstream.
A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount)
that indicates how many waiters expect this rdtgrp to exist. Waiters
could be waiting on rdtgroup_mutex or some work sitting on a task's
workqueue for when the task returns from kernel mode or exits.
The deletion of a rdtgrp is intended to have two phases:
(1) while holding rdtgroup_mutex the necessary cleanup is done and
rdtgrp->flags is set to RDT_DELETED,
(2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed
only if there are no waiters and its flag is set to RDT_DELETED. Upon
gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check
for the RDT_DELETED flag.
When unmounting the resctrl file system or deleting ctrl_mon groups,
all of the subdirectories are removed and the data structure of rdtgrp
is forcibly freed without checking rdtgrp->waitcount. If at this point
there was a waiter on rdtgrp then a use-after-free issue occurs when the
waiter starts running and accesses the rdtgrp structure it was waiting
on.
See kfree() calls in [1], [2] and [3] in these two call paths in
following scenarios:
(1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp()
(2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
There are several scenarios that result in use-after-free issue in
following:
Scenario 1:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdt_kill_sb() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb)
------------------------------- ----------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
mutex_lock
rmdir_all_sub
/*
* sentry and rdtgrp are freed
* without checking refcount
*/
free_all_child_rdtgrp
kfree(sentry)*[1]
kfree(rdtgrp)*[2]
mutex_unlock
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1] or [2].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
Scenario 2:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir)
------------------------------- -------------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry is freed without
* checking refcount
*/
kfree(sentry)*[3]
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [3].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed
like following. Note that "0x6b" is POISON_FREE after kfree(). The
corrupted bits "0x6a", "0x64" at offset 0x424 correspond to
waitcount member of struct rdtgroup which was freed:
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048
420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk
Single bit error detected. Probably bad RAM.
Run memtest86+ or a similar memory test tool.
Next obj: start=ffff9504c5b0d800, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048
420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk
Prev obj: start=ffff9504c58ab000, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Fix this by taking reference count (waitcount) of rdtgrp into account in
the two call paths that currently do not do so. Instead of always
freeing the resource group it will only be freed if there are no waiters
on it. If there are waiters, the resource group will have its flags set
to RDT_DELETED.
It will be left to the waiter to free the resource group when it starts
running and finding that it was the last waiter and the resource group
has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb()
-> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() ->
rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
in older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 841a024..db22ba0 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2167,7 +2167,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
free_rmid(sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
- kfree(sentry);
+
+ if (atomic_read(&sentry->waitcount) != 0)
+ sentry->flags = RDT_DELETED;
+ else
+ kfree(sentry);
}
}
@@ -2205,7 +2209,11 @@ static void rmdir_all_sub(void)
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
- kfree(rdtgrp);
+
+ if (atomic_read(&rdtgrp->waitcount) != 0)
+ rdtgrp->flags = RDT_DELETED;
+ else
+ kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
--
1.8.3.1
Commit f3ee99087c8ca0ecfdd549ef5a94f557c42d5428 ("ASoC: tegra: Allow
24bit and 32bit samples") added 24-bit and 32-bit support for to the
Tegra30 I2S driver. However, there are two additional commits that are
also needed to get 24-bit and 32-bit support to work correctly. These
commits are not yet applied because there are still some review comments
that need to be addressed. With only this change applied, 24-bit and
32-bit support is advertised by the I2S driver, but it does not work and
the audio is distorted. Therefore, revert this patch for now until the
other changes are also ready.
Furthermore, a clock issue with 24-bit support has been identified with
this change and so if we revert this now, we can also fix that in the
updated version.
Cc: stable(a)vger.kernel.org
Reported-by: Dmitry Osipenko <digetx(a)gmail.com>
Signed-off-by: Jon Hunter <jonathanh(a)nvidia.com>
---
sound/soc/tegra/tegra30_i2s.c | 25 +++++--------------------
1 file changed, 5 insertions(+), 20 deletions(-)
diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c
index dbed3c5408e7..d59882ec48f1 100644
--- a/sound/soc/tegra/tegra30_i2s.c
+++ b/sound/soc/tegra/tegra30_i2s.c
@@ -127,7 +127,7 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream,
struct device *dev = dai->dev;
struct tegra30_i2s *i2s = snd_soc_dai_get_drvdata(dai);
unsigned int mask, val, reg;
- int ret, sample_size, srate, i2sclock, bitcnt, audio_bits;
+ int ret, sample_size, srate, i2sclock, bitcnt;
struct tegra30_ahub_cif_conf cif_conf;
if (params_channels(params) != 2)
@@ -137,19 +137,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream,
switch (params_format(params)) {
case SNDRV_PCM_FORMAT_S16_LE:
val = TEGRA30_I2S_CTRL_BIT_SIZE_16;
- audio_bits = TEGRA30_AUDIOCIF_BITS_16;
sample_size = 16;
break;
- case SNDRV_PCM_FORMAT_S24_LE:
- val = TEGRA30_I2S_CTRL_BIT_SIZE_24;
- audio_bits = TEGRA30_AUDIOCIF_BITS_24;
- sample_size = 24;
- break;
- case SNDRV_PCM_FORMAT_S32_LE:
- val = TEGRA30_I2S_CTRL_BIT_SIZE_32;
- audio_bits = TEGRA30_AUDIOCIF_BITS_32;
- sample_size = 32;
- break;
default:
return -EINVAL;
}
@@ -181,8 +170,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream,
cif_conf.threshold = 0;
cif_conf.audio_channels = 2;
cif_conf.client_channels = 2;
- cif_conf.audio_bits = audio_bits;
- cif_conf.client_bits = audio_bits;
+ cif_conf.audio_bits = TEGRA30_AUDIOCIF_BITS_16;
+ cif_conf.client_bits = TEGRA30_AUDIOCIF_BITS_16;
cif_conf.expand = 0;
cif_conf.stereo_conv = 0;
cif_conf.replicate = 0;
@@ -317,18 +306,14 @@ static const struct snd_soc_dai_driver tegra30_i2s_dai_template = {
.channels_min = 2,
.channels_max = 2,
.rates = SNDRV_PCM_RATE_8000_96000,
- .formats = SNDRV_PCM_FMTBIT_S32_LE |
- SNDRV_PCM_FMTBIT_S24_LE |
- SNDRV_PCM_FMTBIT_S16_LE,
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
},
.capture = {
.stream_name = "Capture",
.channels_min = 2,
.channels_max = 2,
.rates = SNDRV_PCM_RATE_8000_96000,
- .formats = SNDRV_PCM_FMTBIT_S32_LE |
- SNDRV_PCM_FMTBIT_S24_LE |
- SNDRV_PCM_FMTBIT_S16_LE,
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
},
.ops = &tegra30_i2s_dai_ops,
.symmetric_rates = 1,
--
2.17.1
commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e upstream.
There is a race condition which results in a deadlock when rmdir and
mkdir execute concurrently:
$ ls /sys/fs/resctrl/c1/mon_groups/m1/
cpus cpus_list mon_data tasks
Thread 1: rmdir /sys/fs/resctrl/c1
Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1
3 locks held by mkdir/48649:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c13b>] filename_create+0x7b/0x170
#2: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
4 locks held by rmdir/48652:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c3cf>] do_rmdir+0x13f/0x1e0
#2: (&type->i_mutex_dir_key#8){++++}, at: [<ffffffffb4c86d5d>] vfs_rmdir+0x4d/0x120
#3: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
Thread 1 is deleting control group "c1". Holding rdtgroup_mutex,
kernfs_remove() removes all kernfs nodes under directory "c1"
recursively, then waits for sub kernfs node "mon_groups" to drop active
reference.
Thread 2 is trying to create a subdirectory "m1" in the "mon_groups"
directory. The wrapper kernfs_iop_mkdir() takes an active reference to
the "mon_groups" directory but the code drops the active reference to
the parent directory "c1" instead.
As a result, Thread 1 is blocked on waiting for active reference to drop
and never release rdtgroup_mutex, while Thread 2 is also blocked on
trying to get rdtgroup_mutex.
Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir)
(rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1)
------------------------- -------------------------
kernfs_iop_mkdir
/*
* kn: "m1", parent_kn: "mon_groups",
* prgrp_kn: parent_kn->parent: "c1",
*
* "mon_groups", parent_kn->active++: 1
*/
kernfs_get_active(parent_kn)
kernfs_iop_rmdir
/* "c1", kn->active++ */
kernfs_get_active(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
/* "c1", kn->active-- */
kernfs_break_active_protection(kn)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
sentry->flags = RDT_DELETED
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
kernfs_get(kn)
kernfs_remove(rdtgrp->kn)
__kernfs_remove
/* "mon_groups", sub_kn */
atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active)
kernfs_drain(sub_kn)
/*
* sub_kn->active == KN_DEACTIVATED_BIAS + 1,
* waiting on sub_kn->active to drop, but it
* never drops in Thread 2 which is blocked
* on getting rdtgroup_mutex.
*/
Thread 1 hangs here ---->
wait_event(sub_kn->active == KN_DEACTIVATED_BIAS)
...
rdtgroup_mkdir
rdtgroup_mkdir_mon(parent_kn, prgrp_kn)
mkdir_rdt_prepare(parent_kn, prgrp_kn)
rdtgroup_kn_lock_live(prgrp_kn)
atomic_inc(&rdtgrp->waitcount)
/*
* "c1", prgrp_kn->active--
*
* The active reference on "c1" is
* dropped, but not matching the
* actual active reference taken
* on "mon_groups", thus causing
* Thread 1 to wait forever while
* holding rdtgroup_mutex.
*/
kernfs_break_active_protection(
prgrp_kn)
/*
* Trying to get rdtgroup_mutex
* which is held by Thread 1.
*/
Thread 2 hangs here ----> mutex_lock
...
The problem is that the creation of a subdirectory in the "mon_groups"
directory incorrectly releases the active protection of its parent
directory instead of itself before it starts waiting for rdtgroup_mutex.
This is triggered by the rdtgroup_mkdir() flow calling
rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the
parent control group ("c1") as argument. It should be called with kernfs
node "mon_groups" instead. What is currently missing is that the
kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp.
Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then
it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock()
instead. And then it operates on the same rdtgroup structure but handles
the active reference of kernfs node "mon_groups" to prevent deadlock.
The same changes are also made to the "mon_data" directories.
This results in some unused function parameters that will be cleaned up
in follow-up patch as the focus here is on the fix only in support of
backporting efforts.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 77770ca..11c5acc 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2005,7 +2005,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
- NULL, "mon_groups",
+ &rdtgroup_default, "mon_groups",
&kn_mongrp);
if (ret) {
dentry = ERR_PTR(ret);
@@ -2415,7 +2415,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
/*
* Create the mon_data directory first.
*/
- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
if (ret)
return ret;
@@ -2568,7 +2568,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
uint files = 0;
int ret;
- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
@@ -2643,7 +2643,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_activate(kn);
/*
- * The caller unlocks the prgrp_kn upon success.
+ * The caller unlocks the parent_kn upon success.
*/
return 0;
@@ -2654,7 +2654,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
out_free_rgrp:
kfree(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2692,7 +2692,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2735,7 +2735,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
*/
- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
if (ret) {
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
@@ -2751,7 +2751,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
--
1.8.3.1
commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f upstream.
There is a race condition in the following scenario which results in an
use-after-free issue when reading a monitoring file and deleting the
parent ctrl_mon group concurrently:
Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls
kernfs_break_active_protection() to drop the active reference of kernfs
node in rdtgroup_kn_lock_live().
In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub
kernfs nodes to drop the active reference when removing all subtree
kernfs nodes recursively. Thread 2 could block on kernfs_remove() until
Thread 1 calls kernfs_break_active_protection(). Only after
kernfs_remove() completes the refcount of rdtgrp could be trusted.
Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(),
Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0
instead of 1 due to the race.
In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir)
-------------------------------- -------------------------
rdtgroup_kn_lock_live
/*
* kn active protection until
* kernfs_break_active_protection(kn)
*/
rdtgrp = kernfs_to_rdtgroup(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry->waitcount should be 1
* but is 0 now due to the race.
*/
kfree(sentry)*[1]
/*
* Only after kernfs_remove()
* completes, the refcount of
* rdtgrp could be trusted.
*/
atomic_inc(&rdtgrp->waitcount)
/* kn->active-- */
kernfs_break_active_protection(kn)
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
/*
* Blocking routine, wait for
* all sub kernfs nodes to drop
* active reference in
* kernfs_break_active_protection.
*/
kernfs_remove(rdtgrp->kn)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
mutex_lock
mon_event_read
rdtgroup_kn_unlock
mutex_unlock
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
/* kn->active++ */
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in
rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index db22ba0..77770ca 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2877,13 +2877,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
+ rdtgroup_ctrl_remove(kn, rdtgrp);
+
/*
* Free all the child monitor group rmids.
*/
free_all_child_rdtgrp(rdtgrp);
- rdtgroup_ctrl_remove(kn, rdtgrp);
-
return 0;
}
--
1.8.3.1
commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e upstream.
There is a race condition which results in a deadlock when rmdir and
mkdir execute concurrently:
$ ls /sys/fs/resctrl/c1/mon_groups/m1/
cpus cpus_list mon_data tasks
Thread 1: rmdir /sys/fs/resctrl/c1
Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1
3 locks held by mkdir/48649:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c13b>] filename_create+0x7b/0x170
#2: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
4 locks held by rmdir/48652:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c3cf>] do_rmdir+0x13f/0x1e0
#2: (&type->i_mutex_dir_key#8){++++}, at: [<ffffffffb4c86d5d>] vfs_rmdir+0x4d/0x120
#3: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
Thread 1 is deleting control group "c1". Holding rdtgroup_mutex,
kernfs_remove() removes all kernfs nodes under directory "c1"
recursively, then waits for sub kernfs node "mon_groups" to drop active
reference.
Thread 2 is trying to create a subdirectory "m1" in the "mon_groups"
directory. The wrapper kernfs_iop_mkdir() takes an active reference to
the "mon_groups" directory but the code drops the active reference to
the parent directory "c1" instead.
As a result, Thread 1 is blocked on waiting for active reference to drop
and never release rdtgroup_mutex, while Thread 2 is also blocked on
trying to get rdtgroup_mutex.
Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir)
(rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1)
------------------------- -------------------------
kernfs_iop_mkdir
/*
* kn: "m1", parent_kn: "mon_groups",
* prgrp_kn: parent_kn->parent: "c1",
*
* "mon_groups", parent_kn->active++: 1
*/
kernfs_get_active(parent_kn)
kernfs_iop_rmdir
/* "c1", kn->active++ */
kernfs_get_active(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
/* "c1", kn->active-- */
kernfs_break_active_protection(kn)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
sentry->flags = RDT_DELETED
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
kernfs_get(kn)
kernfs_remove(rdtgrp->kn)
__kernfs_remove
/* "mon_groups", sub_kn */
atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active)
kernfs_drain(sub_kn)
/*
* sub_kn->active == KN_DEACTIVATED_BIAS + 1,
* waiting on sub_kn->active to drop, but it
* never drops in Thread 2 which is blocked
* on getting rdtgroup_mutex.
*/
Thread 1 hangs here ---->
wait_event(sub_kn->active == KN_DEACTIVATED_BIAS)
...
rdtgroup_mkdir
rdtgroup_mkdir_mon(parent_kn, prgrp_kn)
mkdir_rdt_prepare(parent_kn, prgrp_kn)
rdtgroup_kn_lock_live(prgrp_kn)
atomic_inc(&rdtgrp->waitcount)
/*
* "c1", prgrp_kn->active--
*
* The active reference on "c1" is
* dropped, but not matching the
* actual active reference taken
* on "mon_groups", thus causing
* Thread 1 to wait forever while
* holding rdtgroup_mutex.
*/
kernfs_break_active_protection(
prgrp_kn)
/*
* Trying to get rdtgroup_mutex
* which is held by Thread 1.
*/
Thread 2 hangs here ----> mutex_lock
...
The problem is that the creation of a subdirectory in the "mon_groups"
directory incorrectly releases the active protection of its parent
directory instead of itself before it starts waiting for rdtgroup_mutex.
This is triggered by the rdtgroup_mkdir() flow calling
rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the
parent control group ("c1") as argument. It should be called with kernfs
node "mon_groups" instead. What is currently missing is that the
kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp.
Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then
it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock()
instead. And then it operates on the same rdtgroup structure but handles
the active reference of kernfs node "mon_groups" to prevent deadlock.
The same changes are also made to the "mon_data" directories.
This results in some unused function parameters that will be cleaned up
in follow-up patch as the focus here is on the fix only in support of
backporting efforts.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 0157496..0ec30b2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1107,7 +1107,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
- NULL, "mon_groups",
+ &rdtgroup_default, "mon_groups",
&kn_mongrp);
if (ret) {
dentry = ERR_PTR(ret);
@@ -1499,7 +1499,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
/*
* Create the mon_data directory first.
*/
- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
if (ret)
return ret;
@@ -1533,7 +1533,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
uint files = 0;
int ret;
- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
if (!prdtgrp) {
ret = -ENODEV;
goto out_unlock;
@@ -1589,7 +1589,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_activate(kn);
/*
- * The caller unlocks the prgrp_kn upon success.
+ * The caller unlocks the parent_kn upon success.
*/
return 0;
@@ -1600,7 +1600,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
out_free_rgrp:
kfree(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -1638,7 +1638,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -1675,7 +1675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
*/
- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
if (ret)
goto out_id_free;
}
@@ -1688,7 +1688,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
--
1.8.3.1
commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f upstream.
There is a race condition in the following scenario which results in an
use-after-free issue when reading a monitoring file and deleting the
parent ctrl_mon group concurrently:
Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls
kernfs_break_active_protection() to drop the active reference of kernfs
node in rdtgroup_kn_lock_live().
In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub
kernfs nodes to drop the active reference when removing all subtree
kernfs nodes recursively. Thread 2 could block on kernfs_remove() until
Thread 1 calls kernfs_break_active_protection(). Only after
kernfs_remove() completes the refcount of rdtgrp could be trusted.
Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(),
Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0
instead of 1 due to the race.
In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir)
-------------------------------- -------------------------
rdtgroup_kn_lock_live
/*
* kn active protection until
* kernfs_break_active_protection(kn)
*/
rdtgrp = kernfs_to_rdtgroup(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry->waitcount should be 1
* but is 0 now due to the race.
*/
kfree(sentry)*[1]
/*
* Only after kernfs_remove()
* completes, the refcount of
* rdtgrp could be trusted.
*/
atomic_inc(&rdtgrp->waitcount)
/* kn->active-- */
kernfs_break_active_protection(kn)
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
/*
* Blocking routine, wait for
* all sub kernfs nodes to drop
* active reference in
* kernfs_break_active_protection.
*/
kernfs_remove(rdtgrp->kn)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
mutex_lock
mon_event_read
rdtgroup_kn_unlock
mutex_unlock
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
/* kn->active++ */
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in
rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Upstream commit 17eafd076291 ("x86/intel_rdt: Split resource group
removal in two") moved part of resource group removal code from
rdtgroup_rmdir_mon() into a separate function rdtgroup_ctrl_remove().
Apply the change against original code base of rdtgroup_rmdir_mon() for
older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 7349969..0157496 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1800,11 +1800,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
- /*
- * Free all the child monitor group rmids.
- */
- free_all_child_rdtgrp(rdtgrp);
-
list_del(&rdtgrp->rdtgroup_list);
/*
@@ -1814,6 +1809,11 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
kernfs_get(kn);
kernfs_remove(rdtgrp->kn);
+ /*
+ * Free all the child monitor group rmids.
+ */
+ free_all_child_rdtgrp(rdtgrp);
+
return 0;
}
--
1.8.3.1
commit b8511ccc75c033f6d54188ea4df7bf1e85778740 upstream.
A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount)
that indicates how many waiters expect this rdtgrp to exist. Waiters
could be waiting on rdtgroup_mutex or some work sitting on a task's
workqueue for when the task returns from kernel mode or exits.
The deletion of a rdtgrp is intended to have two phases:
(1) while holding rdtgroup_mutex the necessary cleanup is done and
rdtgrp->flags is set to RDT_DELETED,
(2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed
only if there are no waiters and its flag is set to RDT_DELETED. Upon
gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check
for the RDT_DELETED flag.
When unmounting the resctrl file system or deleting ctrl_mon groups,
all of the subdirectories are removed and the data structure of rdtgrp
is forcibly freed without checking rdtgrp->waitcount. If at this point
there was a waiter on rdtgrp then a use-after-free issue occurs when the
waiter starts running and accesses the rdtgrp structure it was waiting
on.
See kfree() calls in [1], [2] and [3] in these two call paths in
following scenarios:
(1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp()
(2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
There are several scenarios that result in use-after-free issue in
following:
Scenario 1:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdt_kill_sb() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb)
------------------------------- ----------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
mutex_lock
rmdir_all_sub
/*
* sentry and rdtgrp are freed
* without checking refcount
*/
free_all_child_rdtgrp
kfree(sentry)*[1]
kfree(rdtgrp)*[2]
mutex_unlock
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1] or [2].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
Scenario 2:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir)
------------------------------- -------------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry is freed without
* checking refcount
*/
kfree(sentry)*[3]
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [3].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed
like following. Note that "0x6b" is POISON_FREE after kfree(). The
corrupted bits "0x6a", "0x64" at offset 0x424 correspond to
waitcount member of struct rdtgroup which was freed:
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048
420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk
Single bit error detected. Probably bad RAM.
Run memtest86+ or a similar memory test tool.
Next obj: start=ffff9504c5b0d800, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048
420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk
Prev obj: start=ffff9504c58ab000, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Fix this by taking reference count (waitcount) of rdtgrp into account in
the two call paths that currently do not do so. Instead of always
freeing the resource group it will only be freed if there are no waiters
on it. If there are waiters, the resource group will have its flags set
to RDT_DELETED.
It will be left to the waiter to free the resource group when it starts
running and finding that it was the last waiter and the resource group
has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb()
-> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() ->
rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
in older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 2dae1b3..7349969 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1260,7 +1260,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
free_rmid(sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
- kfree(sentry);
+
+ if (atomic_read(&sentry->waitcount) != 0)
+ sentry->flags = RDT_DELETED;
+ else
+ kfree(sentry);
}
}
@@ -1294,7 +1298,11 @@ static void rmdir_all_sub(void)
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
- kfree(rdtgrp);
+
+ if (atomic_read(&rdtgrp->waitcount) != 0)
+ rdtgrp->flags = RDT_DELETED;
+ else
+ kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
--
1.8.3.1
Please pull the following change since commit
68353984d63d8d7ea728819dbdb7aecc5f32d360:
Merge tag '5.6-smb3-fixes-and-dfs-and-readdir-improvements' of
git://git.samba.org/sfrench/cifs-2.6 (2020-01-28 15:34:03 -0800)
are available in the Git repository at:
git://git.samba.org/sfrench/cifs-2.6.git tags/5.6-rc-small-smb3-fix-for-stable
for you to fetch changes up to b581098482e6f177a4f64ea021fd5a9327ea08d5:
cifs: update internal module version number (2020-01-31 15:13:22 -0600)
----------------------------------------------------------------
Small SMB3 fix for stable (fixes problem with reconnect for soft mounts)
----------------------------------------------------------------
Ronnie Sahlberg (1):
cifs: fix soft mounts hanging in the reconnect code
Steve French (1):
cifs: update internal module version number
fs/cifs/cifsfs.h | 2 +-
fs/cifs/smb2pdu.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--
Thanks,
Steve
In year 2007 high performance SSD was still expensive, in order to
save more space for real workload or meta data, the readahead I/Os
for non-meta data was bypassed and not cached on SSD.
In now days, SSD price drops a lot and people can find larger size
SSD with more comfortable price. It is unncessary to alway bypass
normal readahead I/Os to save SSD space for now.
This patch adds options for readahead data cache policies via sysfs
file /sys/block/bcache<N>/readahead_cache_policy, the options are,
- "all": cache all readahead data I/Os.
- "meta-only": only cache meta data, and bypass other regular I/Os.
If users want to make bcache continue to only cache readahead request
for metadata and bypass regular data readahead, please set "meta-only"
to this sysfs file. By default, bcache will back to cache all read-
ahead requests now.
Cc: stable(a)vger.kernel.org
Signed-off-by: Coly Li <colyli(a)suse.de>
Acked-by: Eric Wheeler <bcache(a)linux.ewheeler.net>
Cc: Michael Lyle <mlyle(a)lyle.org>
---
drivers/md/bcache/bcache.h | 3 +++
drivers/md/bcache/request.c | 17 ++++++++++++-----
drivers/md/bcache/sysfs.c | 22 ++++++++++++++++++++++
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index adf26a21fcd1..74a9849ea164 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -330,6 +330,9 @@ struct cached_dev {
*/
atomic_t has_dirty;
+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 73478a91a342..820d8402a1dc 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -379,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;
/*
- * Flag for bypass if the IO is for read-ahead or background,
- * unless the read-ahead request is for metadata
+ * If the bio is for read-ahead or background IO, bypass it or
+ * not depends on the following situations,
+ * - If the IO is for meta data, always cache it and no bypass
+ * - If the IO is not meta data, check dc->cache_reada_policy,
+ * BCH_CACHE_READA_ALL: cache it and not bypass
+ * BCH_CACHE_READA_META_ONLY: not cache it and bypass
+ * That is, read-ahead request for metadata always get cached
* (eg, for gfs2 or xfs).
*/
- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
- !(bio->bi_opf & (REQ_META|REQ_PRIO)))
- goto skip;
+ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
+ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
+ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
+ goto skip;
+ }
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 733e2ddf3c78..3470fae4eabc 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = {
NULL
};
+static const char * const bch_reada_cache_policies[] = {
+ "all",
+ "meta-only",
+ NULL
+};
+
/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(readahead_cache_policy);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
@@ -168,6 +175,11 @@ SHOW(__bch_cached_dev)
bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));
+ if (attr == &sysfs_readahead_cache_policy)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_reada_cache_policies,
+ dc->cache_readahead_policy);
+
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes,
@@ -353,6 +365,15 @@ STORE(__cached_dev)
}
}
+ if (attr == &sysfs_readahead_cache_policy) {
+ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
+ if (v < 0)
+ return v;
+
+ if ((unsigned int) v != dc->cache_readahead_policy)
+ dc->cache_readahead_policy = v;
+ }
+
if (attr == &sysfs_stop_when_cache_set_failed) {
v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
@@ -467,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_readahead_cache_policy,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
--
2.16.4
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: febac332a819f0e764aa4da62757ba21d18c182b
Gitweb: https://git.kernel.org/tip/febac332a819f0e764aa4da62757ba21d18c182b
Author: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
AuthorDate: Fri, 31 Jan 2020 19:08:59 +03:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Sat, 01 Feb 2020 11:07:56 +01:00
clocksource: Prevent double add_timer_on() for watchdog_timer
Kernel crashes inside QEMU/KVM are observed:
kernel BUG at kernel/time/timer.c:1154!
BUG_ON(timer_pending(timer) || !timer->function) in add_timer_on().
At the same time another cpu got:
general protection fault: 0000 [#1] SMP PTI of poinson pointer 0xdead000000000200 in:
__hlist_del at include/linux/list.h:681
(inlined by) detach_timer at kernel/time/timer.c:818
(inlined by) expire_timers at kernel/time/timer.c:1355
(inlined by) __run_timers at kernel/time/timer.c:1686
(inlined by) run_timer_softirq at kernel/time/timer.c:1699
Unfortunately kernel logs are badly scrambled, stacktraces are lost.
Printing the timer->function before the BUG_ON() pointed to
clocksource_watchdog().
The execution of clocksource_watchdog() can race with a sequence of
clocksource_stop_watchdog() .. clocksource_start_watchdog():
expire_timers()
detach_timer(timer, true);
timer->entry.pprev = NULL;
raw_spin_unlock_irq(&base->lock);
call_timer_fn
clocksource_watchdog()
clocksource_watchdog_kthread() or
clocksource_unbind()
spin_lock_irqsave(&watchdog_lock, flags);
clocksource_stop_watchdog();
del_timer(&watchdog_timer);
watchdog_running = 0;
spin_unlock_irqrestore(&watchdog_lock, flags);
spin_lock_irqsave(&watchdog_lock, flags);
clocksource_start_watchdog();
add_timer_on(&watchdog_timer, ...);
watchdog_running = 1;
spin_unlock_irqrestore(&watchdog_lock, flags);
spin_lock(&watchdog_lock);
add_timer_on(&watchdog_timer, ...);
BUG_ON(timer_pending(timer) || !timer->function);
timer_pending() -> true
BUG()
I.e. inside clocksource_watchdog() watchdog_timer could be already armed.
Check timer_pending() before calling add_timer_on(). This is sufficient as
all operations are synchronized by watchdog_lock.
Fixes: 75c5158f70c0 ("timekeeping: Update clocksource with stop_machine")
Signed-off-by: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/158048693917.4378.13823603769948933793.stgit@buzz
---
kernel/time/clocksource.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index fff5f64..428beb6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused)
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
- watchdog_timer.expires += WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer, next_cpu);
+
+ /*
+ * Arm timer if not already pending: could race with concurrent
+ * pair clocksource_stop_watchdog() clocksource_start_watchdog().
+ */
+ if (!timer_pending(&watchdog_timer)) {
+ watchdog_timer.expires += WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, next_cpu);
+ }
out:
spin_unlock(&watchdog_lock);
}
Hi,
On 1/31/20 6:17 PM, Z R wrote:
> Hi Benjamin,
> in last log touchpad.log (omg, should be keyboard.log), I pressed fn-f3 multiple times. I got one two liner:
>
> # ReportID: 3 / Wireless Radio Button: 0 | #
> E: 000007.606583 2 03 00
>
> every time i release f3. Does not matter what is happening with fn-key. (could be released already or still pushed down). This log was collected with last patch from Hans applied.
>
> Sorry for the mess I caused :-) I still don't get how you guys manage to have your emails so well polished and readable.
I don't think you've, caused a mess. Thank you both for the bug report and for your
help in testing this.
One last test request, can you run evemu-record on a kernel with my latest simplified patch,
select the keyboard device and then press (and release) the "rfkill" key and see it generates
a RF_KILL key press + release evdev event?
What would also be helpful is the output of:
ls -l /sys/bus/hid/devices/0003*/driver
Regards,
Hans
> pá 31. 1. 2020 v 18:00 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>
> On Fri, Jan 31, 2020 at 5:09 PM Z R <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>> wrote:
> >
> > I believe I pressed wifi button on both replays for keyboard.
>
> Yep, I can see that. Just to double check, on the last log, you
> pressed the wifi button twice?
>
> Anyway, thanks for all the logs, I should have enough to implement the
> regression tests.
>
> Cheers,
> Benjamin
>
> >
> > With latest patch from Hans on top of v5.5.0 touchpads "two finger scrolling" is working again. Attaching current hid-record for keyboard with Wifi button pressed. Events in log appeared after f3 button was "released".
> >
> > Thanks
> >
> > Zdeněk
> >
> > pá 31. 1. 2020 v 16:45 odesílatel Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> napsal:
> >>
> >> Hi,
> >>
> >> On 1/31/20 4:38 PM, Z R wrote:
> >> > Hi Benjamin,
> >> > hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
> >> >
> >> > I hope it is what you asked for :-)
> >> >
> >> > Currently waiting for reworked patch from Hans.
> >>
> >> I just send you the reworked patch.
> >>
> >> Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
> >> That is the whole reason why the special hid-ite driver is necessary.
> >>
> >> Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
> >> single hid input report for the generic-desktop Wireless Radio Controls group
> >> is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
> >> in there and it is always 0. It is as if the input-report is only send on release
> >> and not on press. So the hid-ite code emulates a press + release whenever the
> >> input-report is send.
> >>
> >> IOW the receiving of the input report is (ab)used as indication of the button
> >> having been pressed.
> >>
> >> Regards,
> >>
> >> Hans
> >>
> >>
> >> >
> >> > Bye for now
> >> > Zdeněk
> >> >
> >> > pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com> <mailto:benjamin.tissoires@redhat.com <mailto:benjamin.tissoires@redhat.com>>> napsal:
> >> >
> >> > On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>> wrote:
> >> > >
> >> > > Hi,
> >> > >
> >> > > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
> >> > > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>> wrote:
> >> > > >>
> >> > > >> Hi,
> >> > > >>
> >> > > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
> >> > > >>> Hi Hans,
> >> > > >>>
> >> > > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>> wrote:
> >> > > >>>>
> >> > > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
> >> > > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
> >> > > >>>> hid-ite driver to fix the rfkill driver not working.
> >> > > >>>>
> >> > > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
> >> > > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
> >> > > >>>> second USB interface (the mouse interface) and their touchpad only supports
> >> > > >>>> mouse emulation, so using generic hid-input handling for anything but
> >> > > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
> >> > > >>>> to all USB interfaces.
> >> > > >>>>
> >> > > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
> >> > > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
> >> > > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
> >> > > >>>> bits have been moved to the first USB interface (the keyboard intf).
> >> > > >>>>
> >> > > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
> >> > > >>>> it NOT bind to the second (mouse) USB interface so that that can be
> >> > > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
> >> > > >>>>
> >> > > >>>> This commit adds a match callback to hid-ite which makes it only
> >> > > >>>> match the first USB interface when running on the Acer SW5-012,
> >> > > >>>> fixing the regression to mouse-emulation mode introduced by adding the
> >> > > >>>> keyboard dock USB id.
> >> > > >>>>
> >> > > >>>> Note the match function only does the special only bind to the first
> >> > > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
> >> > > >>>> actually must bind to the second interface as that is where the
> >> > > >>>> "Wireless Radio Control" bits are.
> >> > > >>>
> >> > > >>> This is not a full review, but a couple of things that popped out
> >> > > >>> while scrolling through the patch.
> >> > > >>>
> >> > > >>>>
> >> > > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org> <mailto:stable@vger.kernel.org <mailto:stable@vger.kernel.org>>
> >> > > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
> >> > > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com> <mailto:zdenda.rampas@gmail.com <mailto:zdenda.rampas@gmail.com>>>
> >> > > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>>
> >> > > >>>> ---
> >> > > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
> >> > > >>>> 1 file changed, 34 insertions(+)
> >> > > >>>>
> >> > > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> >> > > >>>> index c436e12feb23..69a4ddfd033d 100644
> >> > > >>>> --- a/drivers/hid/hid-ite.c
> >> > > >>>> +++ b/drivers/hid/hid-ite.c
> >> > > >>>> @@ -8,9 +8,12 @@
> >> > > >>>> #include <linux/input.h>
> >> > > >>>> #include <linux/hid.h>
> >> > > >>>> #include <linux/module.h>
> >> > > >>>> +#include <linux/usb.h>
> >> > > >>>>
> >> > > >>>> #include "hid-ids.h"
> >> > > >>>>
> >> > > >>>> +#define ITE8595_KBD_USB_INTF 0
> >> > > >>>> +
> >> > > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
> >> > > >>>> struct hid_usage *usage, __s32 value)
> >> > > >>>> {
> >> > > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
> >> > > >>>> return 0;
> >> > > >>>> }
> >> > > >>>>
> >> > > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> >> > > >>>> +{
> >> > > >>>> + struct usb_interface *intf;
> >> > > >>>> +
> >> > > >>>> + if (ignore_special_driver)
> >> > > >>>> + return false;
> >> > > >>>> +
> >> > > >>>> + /*
> >> > > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
> >> > > >>>> + * have the "Wireless Radio Control" bits which need this special
> >> > > >>>> + * driver on the second USB interface (the mouse interface). On
> >> > > >>>> + * these devices we simply bind to all USB interfaces.
> >> > > >>>> + *
> >> > > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
> >> > > >>>> + * not only does mouse emulation it also supports HID-multitouch
> >> > > >>>> + * and all the keys including the "Wireless Radio Control" bits
> >> > > >>>> + * have been moved to the first USB interface (the keyboard intf).
> >> > > >>>> + *
> >> > > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
> >> > > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
> >> > > >>>> + */
> >> > > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
> >> > > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
> >> > > >>>
> >> > > >>> Isn't there an existing matching function we can use here, instead of
> >> > > >>> checking each individual field?
> >> > > >>
> >> > > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
> >> > > >> requires a struct hid_device_id, which either requires declaring an extra
> >> > > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
> >> > > >> index into the existing hid_device_id array for the driver (with the hardcoding
> >> > > >> being error prone, so not a good idea).
> >> > > >>
> >> > > >> Given the problems with using hid_match_one_id() I decided to just go with
> >> > > >> the above.
> >> > > >
> >> > > > right. An other solution would be to have a local macro/function that
> >> > > > does that. Because as soon as you start adding a quirk, an other comes
> >> > > > right after.
> >> > > >
> >> > > >>
> >> > > >> But see below.
> >> > > >>
> >> > > >>>
> >> > > >>>> + return true;
> >> > > >>>> +
> >> > > >>>> + intf = to_usb_interface(hdev->dev.parent);
> >> > > >>>
> >> > > >>> And this is oops-prone. You need:
> >> > > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
> >> > > >>> - add a dependency on USBHID in the KConfig now that you are checking
> >> > > >>> on the USB transport layer.
> >> > > >>>
> >> > > >>> That being said, I would love instead:
> >> > > >>> - to have a non USB version of this match, where you decide which
> >> > > >>> component needs to be handled based on the report descriptor
> >> > > >>
> >> > > >> Actually your idea to use the desciptors is not bad, but since what
> >> > > >> we really want is to not bind to the interface which is marked for the
> >> > > >> hid-multitouch driver I just realized we can just check that.
> >> > > >>
> >> > > >> So how about:
> >> > > >>
> >> > > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> >> > > >> {
> >> > > >> if (ignore_special_driver)
> >> > > >> return false;
> >> > > >>
> >> > > >> /*
> >> > > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
> >> > > >> * support the HID multitouch protocol for the touchpad, in that
> >> > > >> * case the "Wireless Radio Control" bits which we care about are
> >> > > >> * on the other interface; and we should not bind to the multitouch
> >> > > >> * capable interface as that breaks multitouch support.
> >> > > >> */
> >> > > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
> >> > > >> }
> >> > > >
> >> > > > Yep, I like that very much :)
> >> > >
> >> > > Actually if we want to check the group and there are only 2 interfaces we do
> >> > > not need to use the match callback at all, w e can simply match on the
> >> > > group of the interface which we do want:
> >> > >
> >> > > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> >> > > index db0f35be5a8b..21bd48f16033 100644
> >> > > --- a/drivers/hid/hid-ite.c
> >> > > +++ b/drivers/hid/hid-ite.c
> >> > > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
> >> > > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
> >> > > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
> >> > > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> >> > > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
> >> > > + USB_VENDOR_ID_SYNAPTICS,
> >> > > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> >> > > { }
> >> > > };
> >> > > MODULE_DEVICE_TABLE(hid, ite_devices);
> >> > >
> >> > > Much cleaner
> >> >
> >> > yep
> >> >
> >> > > (and now I don't need to write a test, which is always
> >> > > a good motivation to come up with a cleaner solution :)
> >> >
> >> > Hehe, too bad, you already picked up my curiosity on this one, and I
> >> > really would like to see the report descriptors and some events of the
> >> > keys that are fixed by hid-ite.c.
> >> > <with a low voice>This will be a hard requirement to accept this patch </joke>.
> >> >
> >> > More seriously, Zdeněk, can you run hid-recorder from
> >> > https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
> >> > report descriptor for all of your ITE HID devices? I'll add the
> >> > matching tests in hid-tools and be sure we do not regress in the
> >> > future.
> >> >
> >> > >
> >> > > Let me turn this into a proper patch and then I will send that to
> >> > > Zdeněk (off-list) for him to test (note don't worry if you do
> >> > > not have time to test this weekend, then I'll do it on Monday).
> >> > >
> >> > > Regards,
> >> > >
> >> > > Hans
> >> > >
> >> > > p.s.
> >> > >
> >> > > 1. My train is approaching Brussels (Fosdem) so my email response
> >> > > time will soon become irregular.
> >> >
> >> > How dare you? :)
> >> >
> >> > >
> >> > > 2. Benjamin will you be at Fosdem too ?
> >> > >
> >> >
> >> > Unfortunately no. Already got my quota of meeting people for this year
> >> > between Kernel Recipes in September, XDC in October and LCA last week.
> >> > So I need to keep in a quiet environment for a little bit :)
> >> >
> >> > Cheers,
> >> > Benjamin
> >> >
> >>
>
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 6f1a4891a5928a5969c87fa5a584844c983ec823
Gitweb: https://git.kernel.org/tip/6f1a4891a5928a5969c87fa5a584844c983ec823
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Fri, 31 Jan 2020 15:26:52 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Sat, 01 Feb 2020 09:31:47 +01:00
x86/apic/msi: Plug non-maskable MSI affinity race
Evan tracked down a subtle race between the update of the MSI message and
the device raising an interrupt internally on PCI devices which do not
support MSI masking. The update of the MSI message is non-atomic and
consists of either 2 or 3 sequential 32bit wide writes to the PCI config
space.
- Write address low 32bits
- Write address high 32bits (If supported by device)
- Write data
When an interrupt is migrated then both address and data might change, so
the kernel attempts to mask the MSI interrupt first. But for MSI masking is
optional, so there exist devices which do not provide it. That means that
if the device raises an interrupt internally between the writes then a MSI
message is sent built from half updated state.
On x86 this can lead to spurious interrupts on the wrong interrupt
vector when the affinity setting changes both address and data. As a
consequence the device interrupt can be lost causing the device to
become stuck or malfunctioning.
Evan tried to handle that by disabling MSI accross an MSI message
update. That's not feasible because disabling MSI has issues on its own:
If MSI is disabled the PCI device is routing an interrupt to the legacy
INTx mechanism. The INTx delivery can be disabled, but the disablement is
not working on all devices.
Some devices lose interrupts when both MSI and INTx delivery are disabled.
Another way to solve this would be to enforce the allocation of the same
vector on all CPUs in the system for this kind of screwed devices. That
could be done, but it would bring back the vector space exhaustion problems
which got solved a few years ago.
Fortunately the high address (if supported by the device) is only relevant
when X2APIC is enabled which implies interrupt remapping. In the interrupt
remapping case the affinity setting is happening at the interrupt remapping
unit and the PCI MSI message is programmed only once when the PCI device is
initialized.
That makes it possible to solve it with a two step update:
1) Target the MSI msg to the new vector on the current target CPU
2) Target the MSI msg to the new vector on the new target CPU
In both cases writing the MSI message is only changing a single 32bit word
which prevents the issue of inconsistency.
After writing the final destination it is necessary to check whether the
device issued an interrupt while the intermediate state #1 (new vector,
current CPU) was in effect.
This is possible because the affinity change is always happening on the
current target CPU. The code runs with interrupts disabled, so the
interrupt can be detected by checking the IRR of the local APIC. If the
vector is pending in the IRR then the interrupt is retriggered on the new
target CPU by sending an IPI for the associated vector on the target CPU.
This can cause spurious interrupts on both the local and the new target
CPU.
1) If the new vector is not in use on the local CPU and the device
affected by the affinity change raised an interrupt during the
transitional state (step #1 above) then interrupt entry code will
ignore that spurious interrupt. The vector is marked so that the
'No irq handler for vector' warning is supressed once.
2) If the new vector is in use already on the local CPU then the IRR check
might see an pending interrupt from the device which is using this
vector. The IPI to the new target CPU will then invoke the handler of
the device, which got the affinity change, even if that device did not
issue an interrupt
3) If the new vector is in use already on the local CPU and the device
affected by the affinity change raised an interrupt during the
transitional state (step #1 above) then the handler of the device which
uses that vector on the local CPU will be invoked.
expose issues in device driver interrupt handlers which are not prepared to
handle a spurious interrupt correctly. This not a regression, it's just
exposing something which was already broken as spurious interrupts can
happen for a lot of reasons and all driver handlers need to be able to deal
with them.
Reported-by: Evan Green <evgreen(a)chromium.org>
Debugged-by: Evan Green <evgreen(a)chromium.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Evan Green <evgreen(a)chromium.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/87imkr4s7n.fsf@nanos.tec.linutronix.de
---
arch/x86/include/asm/apic.h | 8 ++-
arch/x86/kernel/apic/msi.c | 128 ++++++++++++++++++++++++++++++++++-
include/linux/irq.h | 18 +++++-
include/linux/irqdomain.h | 7 ++-
kernel/irq/debugfs.c | 1 +-
kernel/irq/msi.c | 5 +-
6 files changed, 163 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index be0b9cf..19e94af 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -454,6 +454,14 @@ static inline void ack_APIC_irq(void)
apic_eoi();
}
+
+static inline bool lapic_vector_set_in_irr(unsigned int vector)
+{
+ u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+
+ return !!(irr & (1U << (vector % 32)));
+}
+
static inline unsigned default_get_apic_id(unsigned long x)
{
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 7f75334..159bd0c 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -23,10 +23,8 @@
static struct irq_domain *msi_default_domain;
-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
-
msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
@@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
MSI_DATA_VECTOR(cfg->vector);
}
+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg);
+}
+
+static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
+{
+ struct msi_msg msg[2] = { [1] = { }, };
+
+ __irq_msi_compose_msg(cfg, msg);
+ irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
+}
+
+static int
+msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
+{
+ struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd);
+ struct irq_data *parent = irqd->parent_data;
+ unsigned int cpu;
+ int ret;
+
+ /* Save the current configuration */
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
+ old_cfg = *cfg;
+
+ /* Allocate a new target vector */
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
+
+ /*
+ * For non-maskable and non-remapped MSI interrupts the migration
+ * to a different destination CPU and a different vector has to be
+ * done careful to handle the possible stray interrupt which can be
+ * caused by the non-atomic update of the address/data pair.
+ *
+ * Direct update is possible when:
+ * - The MSI is maskable (remapped MSI does not use this code path)).
+ * The quirk bit is not set in this case.
+ * - The new vector is the same as the old vector
+ * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The new destination CPU is the same as the old destination CPU
+ */
+ if (!irqd_msi_nomask_quirk(irqd) ||
+ cfg->vector == old_cfg.vector ||
+ old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ cfg->dest_apicid == old_cfg.dest_apicid) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Paranoia: Validate that the interrupt target is the local
+ * CPU.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id())) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Redirect the interrupt to the new vector on the current CPU
+ * first. This might cause a spurious interrupt on this vector if
+ * the device raises an interrupt right between this update and the
+ * update to the final destination CPU.
+ *
+ * If the vector is in use then the installed device handler will
+ * denote it as spurious which is no harm as this is a rare event
+ * and interrupt handlers have to cope with spurious interrupts
+ * anyway. If the vector is unused, then it is marked so it won't
+ * trigger the 'No irq handler for vector' warning in do_IRQ().
+ *
+ * This requires to hold vector lock to prevent concurrent updates to
+ * the affected vector.
+ */
+ lock_vector_lock();
+
+ /*
+ * Mark the new target vector on the local CPU if it is currently
+ * unused. Reuse the VECTOR_RETRIGGERED state which is also used in
+ * the CPU hotplug path for a similar purpose. This cannot be
+ * undone here as the current CPU has interrupts disabled and
+ * cannot handle the interrupt before the whole set_affinity()
+ * section is done. In the CPU unplug case, the current CPU is
+ * about to vanish and will not handle any interrupts anymore. The
+ * vector is cleaned up when the CPU comes online again.
+ */
+ if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector])))
+ this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED);
+
+ /* Redirect it to the new vector on the local CPU temporarily */
+ old_cfg.vector = cfg->vector;
+ irq_msi_update_msg(irqd, &old_cfg);
+
+ /* Now transition it to the target CPU */
+ irq_msi_update_msg(irqd, cfg);
+
+ /*
+ * All interrupts after this point are now targeted at the new
+ * vector/CPU.
+ *
+ * Drop vector lock before testing whether the temporary assignment
+ * to the local CPU was hit by an interrupt raised in the device,
+ * because the retrigger function acquires vector lock again.
+ */
+ unlock_vector_lock();
+
+ /*
+ * Check whether the transition raced with a device interrupt and
+ * is pending in the local APICs IRR. It is safe to do this outside
+ * of vector lock as the irq_desc::lock of this interrupt is still
+ * held and interrupts are disabled: The check is not accessing the
+ * underlying vector store. It's just checking the local APIC's
+ * IRR.
+ */
+ if (lapic_vector_set_in_irr(cfg->vector))
+ irq_data_get_irq_chip(irqd)->irq_retrigger(irqd);
+
+ return ret;
+}
+
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
* which implement the MSI or MSI-X Capability Structure.
@@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = irq_msi_compose_msg,
+ .irq_set_affinity = msi_set_affinity,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
}
if (!msi_default_domain)
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
+ else
+ msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
}
#ifdef CONFIG_IRQ_REMAP
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7853eb9..3ed5a05 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -209,6 +209,8 @@ struct irq_data {
* IRQD_SINGLE_TARGET - IRQ allows only a single affinity target
* IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set
* IRQD_CAN_RESERVE - Can use reservation mode
+ * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change
+ * required
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -231,6 +233,7 @@ enum {
IRQD_SINGLE_TARGET = (1 << 24),
IRQD_DEFAULT_TRIGGER_SET = (1 << 25),
IRQD_CAN_RESERVE = (1 << 26),
+ IRQD_MSI_NOMASK_QUIRK = (1 << 27),
};
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d)
return __irqd_to_state(d) & IRQD_CAN_RESERVE;
}
+static inline void irqd_set_msi_nomask_quirk(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK;
+}
+
+static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d)
+{
+ __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK;
+}
+
+static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
+}
+
#undef __irqd_to_state
static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 3c340db..4da8df5 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -207,6 +207,13 @@ enum {
IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5),
/*
+ * Quirk to handle MSI implementations which do not provide
+ * masking. Currently known to affect x86, but partially
+ * handled in core code.
+ */
+ IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6),
+
+ /*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
* core code.
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index c1eccd4..a949bd3 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
BIT_MASK_DESCR(IRQD_CAN_RESERVE),
+ BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK),
BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index ad26fbc..eb95f61 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
continue;
irq_data = irq_domain_get_irq_data(domain, desc->irq);
- if (!can_reserve)
+ if (!can_reserve) {
irqd_clr_can_reserve(irq_data);
+ if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+ irqd_set_msi_nomask_quirk(irq_data);
+ }
ret = irq_domain_activate_irq(irq_data, can_reserve);
if (ret)
goto cleanup;
The patch titled
Subject: lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more()
has been removed from the -mm tree. Its filename was
lib-test_kasanc-fix-memory-leak-in-kmalloc_oob_krealloc_more.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Gustavo A. R. Silva" <gustavo(a)embeddedor.com>
Subject: lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more()
In case memory resources for _ptr2_ were allocated, release them before
return.
Notice that in case _ptr1_ happens to be NULL, krealloc() behaves exactly
like kmalloc().
Addresses-Coverity-ID: 1490594 ("Resource leak")
Link: http://lkml.kernel.org/r/20200123160115.GA4202@embeddedor
Fixes: 3f15801cdc23 ("lib: add kasan test module")
Signed-off-by: Gustavo A. R. Silva <gustavo(a)embeddedor.com>
Reviewed-by: Dmitry Vyukov <dvyukov(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/test_kasan.c | 1 +
1 file changed, 1 insertion(+)
--- a/lib/test_kasan.c~lib-test_kasanc-fix-memory-leak-in-kmalloc_oob_krealloc_more
+++ a/lib/test_kasan.c
@@ -158,6 +158,7 @@ static noinline void __init kmalloc_oob_
if (!ptr1 || !ptr2) {
pr_err("Allocation failed\n");
kfree(ptr1);
+ kfree(ptr2);
return;
}
_
Patches currently in -mm which might be from gustavo(a)embeddedor.com are
The patch titled
Subject: media/v4l2-core: set pages dirty upon releasing DMA buffers
has been removed from the -mm tree. Its filename was
media-v4l2-core-set-pages-dirty-upon-releasing-dma-buffers.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: John Hubbard <jhubbard(a)nvidia.com>
Subject: media/v4l2-core: set pages dirty upon releasing DMA buffers
After DMA is complete, and the device and CPU caches are synchronized,
it's still required to mark the CPU pages as dirty, if the data was coming
from the device. However, this driver was just issuing a bare put_page()
call, without any set_page_dirty*() call.
Fix the problem, by calling set_page_dirty_lock() if the CPU pages were
potentially receiving data from the device.
Link: http://lkml.kernel.org/r/20200107224558.2362728-11-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Acked-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Cc: Alex Williamson <alex.williamson(a)redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Björn Töpel <bjorn.topel(a)intel.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Kirill A. Shutemov <kirill(a)shutemov.name>
Cc: Leon Romanovsky <leonro(a)mellanox.com>
Cc: Mike Rapoport <rppt(a)linux.ibm.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/media/v4l2-core/videobuf-dma-sg.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c~media-v4l2-core-set-pages-dirty-upon-releasing-dma-buffers
+++ a/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dm
BUG_ON(dma->sglen);
if (dma->pages) {
- for (i = 0; i < dma->nr_pages; i++)
+ for (i = 0; i < dma->nr_pages; i++) {
+ if (dma->direction == DMA_FROM_DEVICE)
+ set_page_dirty_lock(dma->pages[i]);
put_page(dma->pages[i]);
+ }
kfree(dma->pages);
dma->pages = NULL;
}
_
Patches currently in -mm which might be from jhubbard(a)nvidia.com are
The patch titled
Subject: mm: move_pages: report the number of non-attempted pages
has been removed from the -mm tree. Its filename was
mm-move_pages-report-the-number-of-non-attempted-pages.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Yang Shi <yang.shi(a)linux.alibaba.com>
Subject: mm: move_pages: report the number of non-attempted pages
Since commit a49bd4d71637 ("mm, numa: rework do_pages_move"), the semantic
of move_pages() has changed to return the number of non-migrated pages if
they were result of a non-fatal reasons (usually a busy page). This was
an unintentional change that hasn't been noticed except for LTP tests
which checked for the documented behavior.
There are two ways to go around this change. We can even get back to the
original behavior and return -EAGAIN whenever migrate_pages is not able to
migrate pages due to non-fatal reasons. Another option would be to simply
continue with the changed semantic and extend move_pages documentation to
clarify that -errno is returned on an invalid input or when migration
simply cannot succeed (e.g. -ENOMEM, -EBUSY) or the number of pages that
couldn't have been migrated due to ephemeral reasons (e.g. page is pinned
or locked for other reasons).
This patch implements the second option because this behavior is in place
for some time without anybody complaining and possibly new users depending
on it. Also it allows to have a slightly easier error handling as the
caller knows that it is worth to retry when err > 0.
But since the new semantic would be aborted immediately if migration is
failed due to ephemeral reasons, need include the number of non-attempted
pages in the return value too.
Link: http://lkml.kernel.org/r/1580160527-109104-1-git-send-email-yang.shi@linux.…
Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move")
Signed-off-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Suggested-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Wei Yang <richardw.yang(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
--- a/mm/migrate.c~mm-move_pages-report-the-number-of-non-attempted-pages
+++ a/mm/migrate.c
@@ -1627,8 +1627,19 @@ static int do_pages_move(struct mm_struc
start = i;
} else if (node != current_node) {
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ /*
+ * Positive err means the number of failed
+ * pages to migrate. Since we are going to
+ * abort and return the number of non-migrated
+ * pages, so need to incude the rest of the
+ * nr_pages that have not been attempted as
+ * well.
+ */
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
err = store_status(status, start, current_node, i - start);
if (err)
goto out;
@@ -1659,8 +1670,11 @@ static int do_pages_move(struct mm_struc
goto out_flush;
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
if (i > start) {
err = store_status(status, start, current_node, i - start);
if (err)
@@ -1674,6 +1688,13 @@ out_flush:
/* Make sure we do not overwrite the existing error */
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+ /*
+ * Don't have to report non-attempted pages here since:
+ * - If the above loop is done gracefully all pages have been
+ * attempted.
+ * - If the above loop is aborted it means a fatal error
+ * happened, should return ret.
+ */
if (!err1)
err1 = store_status(status, start, current_node, i - start);
if (err >= 0)
_
Patches currently in -mm which might be from yang.shi(a)linux.alibaba.com are
The patch titled
Subject: mm: thp: don't need care deferred split queue in memcg charge move path
has been removed from the -mm tree. Its filename was
mm-thp-remove-the-defer-list-related-code-since-this-will-not-happen.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Wei Yang <richardw.yang(a)linux.intel.com>
Subject: mm: thp: don't need care deferred split queue in memcg charge move path
If compound is true, this means it is a PMD mapped THP. Which implies
the page is not linked to any defer list. So the first code chunk will
not be executed.
Also with this reason, it would not be proper to add this page to a
defer list. So the second code chunk is not correct.
Based on this, we should remove the defer list related code.
[yang.shi(a)linux.alibaba.com: better patch title]
Link: http://lkml.kernel.org/r/20200117233836.3434-1-richardw.yang@linux.intel.com
Fixes: 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware")
Signed-off-by: Wei Yang <richardw.yang(a)linux.intel.com>
Suggested-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 18 ------------------
1 file changed, 18 deletions(-)
--- a/mm/memcontrol.c~mm-thp-remove-the-defer-list-related-code-since-this-will-not-happen
+++ a/mm/memcontrol.c
@@ -5340,14 +5340,6 @@ static int mem_cgroup_move_account(struc
__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && !list_empty(page_deferred_list(page))) {
- spin_lock(&from->deferred_split_queue.split_queue_lock);
- list_del_init(page_deferred_list(page));
- from->deferred_split_queue.split_queue_len--;
- spin_unlock(&from->deferred_split_queue.split_queue_lock);
- }
-#endif
/*
* It is safe to change page->mem_cgroup here because the page
* is referenced, charged, and isolated - we can't race with
@@ -5357,16 +5349,6 @@ static int mem_cgroup_move_account(struc
/* caller should have done css_get */
page->mem_cgroup = to;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && list_empty(page_deferred_list(page))) {
- spin_lock(&to->deferred_split_queue.split_queue_lock);
- list_add_tail(page_deferred_list(page),
- &to->deferred_split_queue.split_queue);
- to->deferred_split_queue.split_queue_len++;
- spin_unlock(&to->deferred_split_queue.split_queue_lock);
- }
-#endif
-
spin_unlock_irqrestore(&from->move_lock, flags);
ret = 0;
_
Patches currently in -mm which might be from richardw.yang(a)linux.intel.com are
The patch titled
Subject: mm/memory_hotplug: fix remove_memory() lockdep splat
has been removed from the -mm tree. Its filename was
mm-memory_hotplug-fix-remove_memory-lockdep-splat.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: mm/memory_hotplug: fix remove_memory() lockdep splat
The daxctl unit test for the dax_kmem driver currently triggers the (false
positive) lockdep splat below. It results from the fact that
remove_memory_block_devices() is invoked under the mem_hotplug_lock()
causing lockdep entanglements with cpu_hotplug_lock() and sysfs (kernfs
active state tracking). It is a false positive because the sysfs
attribute path triggering the memory remove is not the same attribute path
associated with memory-block device.
sysfs_break_active_protection() is not applicable since there is no real
deadlock conflict, instead move memory-block device removal outside the
lock. The mem_hotplug_lock() is not needed to synchronize the
memory-block device removal vs the page online state, that is already
handled by lock_device_hotplug(). Specifically, lock_device_hotplug() is
sufficient to allow try_remove_memory() to check the offline state of the
memblocks and be assured that any in progress online attempts are flushed
/ blocked by kernfs_drain() / attribute removal.
The add_memory() path safely creates memblock devices under the
mem_hotplug_lock(). There is no kernfs active state synchronization in
the memblock device_register() path, so nothing to fix there.
This change is only possible thanks to the recent change that refactored
memory block device removal out of arch_remove_memory() (commit
4c4b7f9ba948 mm/memory_hotplug: remove memory block devices before
arch_remove_memory()), and David's due diligence tracking down the
guarantees afforded by kernfs_drain(). Not flagged for -stable since this
only impacts ongoing development and lockdep validation, not a runtime
issue.
======================================================
WARNING: possible circular locking dependency detected
5.5.0-rc3+ #230 Tainted: G OE
------------------------------------------------------
lt-daxctl/6459 is trying to acquire lock:
ffff99c7f0003510 (kn->count#241){++++}, at: kernfs_remove_by_name_ns+0x41/0x80
but task is already holding lock:
ffffffffa76a5450 (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x20/0xe0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (mem_hotplug_lock.rw_sem){++++}:
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
get_online_mems+0x3e/0xb0
kmem_cache_create_usercopy+0x2e/0x260
kmem_cache_create+0x12/0x20
ptlock_cache_init+0x20/0x28
start_kernel+0x243/0x547
secondary_startup_64+0xb6/0xc0
-> #1 (cpu_hotplug_lock.rw_sem){++++}:
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
cpus_read_lock+0x3e/0xb0
online_pages+0x37/0x300
memory_subsys_online+0x17d/0x1c0
device_online+0x60/0x80
state_store+0x65/0xd0
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xdb/0x1d0
ksys_write+0x65/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
-> #0 (kn->count#241){++++}:
check_prev_add+0x98/0xa40
validate_chain+0x576/0x860
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
__kernfs_remove+0x25f/0x2e0
kernfs_remove_by_name_ns+0x41/0x80
remove_files.isra.0+0x30/0x70
sysfs_remove_group+0x3d/0x80
sysfs_remove_groups+0x29/0x40
device_remove_attrs+0x39/0x70
device_del+0x16a/0x3f0
device_unregister+0x16/0x60
remove_memory_block_devices+0x82/0xb0
try_remove_memory+0xb5/0x130
remove_memory+0x26/0x40
dev_dax_kmem_remove+0x44/0x6a [kmem]
device_release_driver_internal+0xe4/0x1c0
unbind_store+0xef/0x120
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xdb/0x1d0
ksys_write+0x65/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Chain exists of:
kn->count#241 --> cpu_hotplug_lock.rw_sem --> mem_hotplug_lock.rw_sem
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(mem_hotplug_lock.rw_sem);
lock(cpu_hotplug_lock.rw_sem);
lock(mem_hotplug_lock.rw_sem);
lock(kn->count#241);
*** DEADLOCK ***
No fixes tag as this has been a long standing issue that predated the
addition of kernfs lockdep annotations.
Link: http://lkml.kernel.org/r/157991441887.2763922.4770790047389427325.stgit@dwi…
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/mm/memory_hotplug.c~mm-memory_hotplug-fix-remove_memory-lockdep-splat
+++ a/mm/memory_hotplug.c
@@ -1764,8 +1764,6 @@ static int __ref try_remove_memory(int n
BUG_ON(check_hotplug_memory_range(start, size));
- mem_hotplug_begin();
-
/*
* All memory blocks must be offlined before removing memory. Check
* whether all memory blocks in question are offline and return error
@@ -1778,9 +1776,14 @@ static int __ref try_remove_memory(int n
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
- /* remove memory block devices before removing memory */
+ /*
+ * Memory block device removal under the device_hotplug_lock is
+ * a barrier against racing online attempts.
+ */
remove_memory_block_devices(start, size);
+ mem_hotplug_begin();
+
arch_remove_memory(nid, start, size, NULL);
memblock_free(start, size);
memblock_remove(start, size);
_
Patches currently in -mm which might be from dan.j.williams(a)intel.com are
The patch titled
Subject: mm/migrate.c: also overwrite error when it is bigger than zero
has been removed from the -mm tree. Its filename was
mm-migratec-also-overwrite-error-when-it-is-bigger-than-zero.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Wei Yang <richardw.yang(a)linux.intel.com>
Subject: mm/migrate.c: also overwrite error when it is bigger than zero
If we get here after successfully adding page to list, err would be 1 to
indicate the page is queued in the list.
Current code has two problems:
* on success, 0 is not returned
* on error, if add_page_for_migratioin() return 1, and the following err1
from do_move_pages_to_node() is set, the err1 is not returned since err
is 1
And these behaviors break the user interface.
Link: http://lkml.kernel.org/r/20200119065753.21694-1-richardw.yang@linux.intel.c…
Fixes: e0153fc2c760 ("mm: move_pages: return valid node id in status if the page is already on the target node").
Signed-off-by: Wei Yang <richardw.yang(a)linux.intel.com>
Acked-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/migrate.c~mm-migratec-also-overwrite-error-when-it-is-bigger-than-zero
+++ a/mm/migrate.c
@@ -1676,7 +1676,7 @@ out_flush:
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
if (!err1)
err1 = store_status(status, start, current_node, i - start);
- if (!err)
+ if (err >= 0)
err = err1;
out:
return err;
_
Patches currently in -mm which might be from richardw.yang(a)linux.intel.com are
The patch titled
Subject: mm/sparse.c: reset section's mem_map when fully deactivated
has been removed from the -mm tree. Its filename was
mm-sparse-reset-sections-mem_map-when-fully-deactivated.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Pingfan Liu <kernelfans(a)gmail.com>
Subject: mm/sparse.c: reset section's mem_map when fully deactivated
After commit ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug"),
when a mem section is fully deactivated, section_mem_map still records the
section's start pfn, which is not used any more and will be reassigned
during re-addition.
In analogy with alloc/free pattern, it is better to clear all fields of
section_mem_map.
Beside this, it breaks the user space tool "makedumpfile" [1], which makes
assumption that a hot-removed section has mem_map as NULL, instead of
checking directly against SECTION_MARKED_PRESENT bit. (makedumpfile will
be better to change the assumption, and need a patch)
The bug can be reproduced on IBM POWERVM by "drmgr -c mem -r -q 5" ,
trigger a crash, and save vmcore by makedumpfile
[1]: makedumpfile, commit e73016540293 ("[v1.6.7] Update version")
Link: http://lkml.kernel.org/r/1579487594-28889-1-git-send-email-kernelfans@gmail…
Signed-off-by: Pingfan Liu <kernelfans(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/sparse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/sparse.c~mm-sparse-reset-sections-mem_map-when-fully-deactivated
+++ a/mm/sparse.c
@@ -789,7 +789,7 @@ static void section_deactivate(unsigned
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
+ ms->section_mem_map = (unsigned long)NULL;
}
if (section_is_early && memmap)
_
Patches currently in -mm which might be from kernelfans(a)gmail.com are
The patch titled
Subject: mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
has been removed from the -mm tree. Its filename was
mm-mempolicyc-fix-out-of-bounds-write-in-mpol_parse_str.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Subject: mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
What we are trying to do is change the '=' character to a NUL terminator
and then at the end of the function we restore it back to an '='. The
problem is there are two error paths where we jump to the end of the
function before we have replaced the '=' with NUL. We end up putting the
'=' in the wrong place (possibly one element before the start of the
buffer).
Link: http://lkml.kernel.org/r/20200115055426.vdjwvry44nfug7yy@kili.mountain
Reported-by: syzbot+e64a13c5369a194d67df(a)syzkaller.appspotmail.com
Fixes: 095f1fc4ebf3 ("mempolicy: rework shmem mpol parsing and display")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Dmitry Vyukov <dvyukov(a)google.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: Lee Schermerhorn <lee.schermerhorn(a)hp.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mempolicy.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/mempolicy.c~mm-mempolicyc-fix-out-of-bounds-write-in-mpol_parse_str
+++ a/mm/mempolicy.c
@@ -2821,6 +2821,9 @@ int mpol_parse_str(char *str, struct mem
char *flags = strchr(str, '=');
int err = 1, mode;
+ if (flags)
+ *flags++ = '\0'; /* terminate mode string */
+
if (nodelist) {
/* NUL-terminate mode or flags string */
*nodelist++ = '\0';
@@ -2831,9 +2834,6 @@ int mpol_parse_str(char *str, struct mem
} else
nodes_clear(nodes);
- if (flags)
- *flags++ = '\0'; /* terminate mode string */
-
mode = match_string(policy_modes, MPOL_MAX, str);
if (mode < 0)
goto out;
_
Patches currently in -mm which might be from dan.carpenter(a)oracle.com are
The patch titled
Subject: memcg: fix a crash in wb_workfn when a device disappears
has been removed from the -mm tree. Its filename was
memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Theodore Ts'o" <tytso(a)mit.edu>
Subject: memcg: fix a crash in wb_workfn when a device disappears
Without memcg, there is a one-to-one mapping between the bdi and
bdi_writeback structures. In this world, things are fairly
straightforward; the first thing bdi_unregister() does is to shutdown the
bdi_writeback structure (or wb), and part of that writeback ensures that
no other work queued against the wb, and that the wb is fully drained.
With memcg, however, there is a one-to-many relationship between the bdi
and bdi_writeback structures; that is, there are multiple wb objects which
can all point to a single bdi. There is a refcount which prevents the bdi
object from being released (and hence, unregistered). So in theory, the
bdi_unregister() *should* only get called once its refcount goes to zero
(bdi_put will drop the refcount, and when it is zero, release_bdi gets
called, which calls bdi_unregister).
Unfortunately, del_gendisk() in block/gen_hd.c never got the memo about
the Brave New memcg World, and calls bdi_unregister directly. It does
this without informing the file system, or the memcg code, or anything
else. This causes the root wb associated with the bdi to be unregistered,
but none of the memcg-specific wb's are shutdown. So when one of these
wb's are woken up to do delayed work, they try to dereference their
wb->bdi->dev to fetch the device name, but unfortunately bdi->dev is now
NULL, thanks to the bdi_unregister() called by del_gendisk(). As a
result, *boom*.
Fortunately, it looks like the rest of the writeback path is perfectly
happy with bdi->dev and bdi->owner being NULL, so the simplest fix is to
create a bdi_dev_name() function which can handle bdi->dev being NULL.
This also allows us to bulletproof the writeback tracepoints to prevent
them from dereferencing a NULL pointer and crashing the kernel if one is
tracing with memcg's enabled, and an iSCSI device dies or a USB storage
stick is pulled.
The most common way of triggering this will be hotremoval of a device
while writeback with memcg enabled is going on. It was triggering several
times a day in a heavily loaded production environment.
Google Bug Id: 145475544
Link: https://lore.kernel.org/r/20191227194829.150110-1-tytso@mit.edu
Link: http://lkml.kernel.org/r/20191228005211.163952-1-tytso@mit.edu
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: Chris Mason <clm(a)fb.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/fs-writeback.c | 2 -
include/linux/backing-dev.h | 10 +++++++
include/trace/events/writeback.h | 37 +++++++++++++----------------
mm/backing-dev.c | 1
4 files changed, 29 insertions(+), 21 deletions(-)
--- a/fs/fs-writeback.c~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/fs/fs-writeback.c
@@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
--- a/include/linux/backing-dev.h~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/include/linux/backing-dev.h
@@ -13,6 +13,7 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
+#include <linux/device.h>
#include <linux/writeback.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
@@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struc
(1 << WB_async_congested));
}
+extern const char *bdi_unknown_name;
+
+static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return dev_name(bdi->dev);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
--- a/include/trace/events/writeback.h~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/include/trace/events/writeback.h
@@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_templ
TP_fast_assign(
strscpy_pad(__entry->name,
- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
- 32);
+ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
+ NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
__entry->index = page->index;
),
@@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inod
struct backing_dev_info *bdi = inode_to_bdi(inode);
/* may be called for files on pseudo FSes w/ unregistered bdi */
- strscpy_pad(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags;
@@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
__entry->history = history;
@@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32);
__entry->ino = inode->i_ino;
__entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
__entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
@@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty,
struct address_space *mapping = page_mapping(page);
struct inode *inode = mapping ? mapping->host : NULL;
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->bdi_id = wb->bdi->id;
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
@@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
__entry->frn_bdi_id = frn_bdi_id;
__entry->frn_memcg_id = frn_memcg_id;
@@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inod
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
@@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name,
- wb->bdi->dev ? dev_name(wb->bdi->dev) :
- "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class,
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: cgroup_ino=%lu",
@@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register,
__array(char, name, 32)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
),
TP_printk("bdi %s",
__entry->name
@@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class,
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
@@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io,
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->older = older_than_this ? *older_than_this : 0;
__entry->age = older_than_this ?
(jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
),
TP_fast_assign(
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->write_bw = KBps(wb->write_bandwidth);
__entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
@@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages,
TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
@@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_ino
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
--- a/mm/backing-dev.c~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/mm/backing-dev.c
@@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class;
+const char *bdi_unknown_name = "(unknown)";
/*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
_
Patches currently in -mm which might be from tytso(a)mit.edu are
On Fri, Jan 31, 2020 at 6:17 PM Z R <zdenda.rampas(a)gmail.com> wrote:
>
> Hi Benjamin,
> in last log touchpad.log (omg, should be keyboard.log), I pressed fn-f3 multiple times. I got one two liner:
>
> # ReportID: 3 / Wireless Radio Button: 0 | #
> E: 000007.606583 2 03 00
great, thanks.
>
> every time i release f3. Does not matter what is happening with fn-key. (could be released already or still pushed down). This log was collected with last patch from Hans applied.
Actually, it doesn't really matter if the patch from Hans was applied
or not. I am looking at the raw event from the device before kernel
processing :)
May I ask you one more thing: can you run `libinput record` (as root)
on the touchpad? I need to know how many fingers the touchpad is
capable to detect, as right now, my tests are failing because of that.
>
> Sorry for the mess I caused :-) I still don't get how you guys manage to have your emails so well polished and readable.
No worries. Having a responsive user is much more valuable than
someone who takes ages to reply even in a clear and well polished
message :)
Cheers,
Benjamin
>
> Bye
> Zdeněk
>
> pá 31. 1. 2020 v 18:00 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com> napsal:
>>
>> On Fri, Jan 31, 2020 at 5:09 PM Z R <zdenda.rampas(a)gmail.com> wrote:
>> >
>> > I believe I pressed wifi button on both replays for keyboard.
>>
>> Yep, I can see that. Just to double check, on the last log, you
>> pressed the wifi button twice?
>>
>> Anyway, thanks for all the logs, I should have enough to implement the
>> regression tests.
>>
>> Cheers,
>> Benjamin
>>
>> >
>> > With latest patch from Hans on top of v5.5.0 touchpads "two finger scrolling" is working again. Attaching current hid-record for keyboard with Wifi button pressed. Events in log appeared after f3 button was "released".
>> >
>> > Thanks
>> >
>> > Zdeněk
>> >
>> > pá 31. 1. 2020 v 16:45 odesílatel Hans de Goede <hdegoede(a)redhat.com> napsal:
>> >>
>> >> Hi,
>> >>
>> >> On 1/31/20 4:38 PM, Z R wrote:
>> >> > Hi Benjamin,
>> >> > hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
>> >> >
>> >> > I hope it is what you asked for :-)
>> >> >
>> >> > Currently waiting for reworked patch from Hans.
>> >>
>> >> I just send you the reworked patch.
>> >>
>> >> Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
>> >> That is the whole reason why the special hid-ite driver is necessary.
>> >>
>> >> Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
>> >> single hid input report for the generic-desktop Wireless Radio Controls group
>> >> is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
>> >> in there and it is always 0. It is as if the input-report is only send on release
>> >> and not on press. So the hid-ite code emulates a press + release whenever the
>> >> input-report is send.
>> >>
>> >> IOW the receiving of the input report is (ab)used as indication of the button
>> >> having been pressed.
>> >>
>> >> Regards,
>> >>
>> >> Hans
>> >>
>> >>
>> >> >
>> >> > Bye for now
>> >> > Zdeněk
>> >> >
>> >> > pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>> >> >
>> >> > On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> >> > >
>> >> > > Hi,
>> >> > >
>> >> > > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
>> >> > > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> >> > > >>
>> >> > > >> Hi,
>> >> > > >>
>> >> > > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
>> >> > > >>> Hi Hans,
>> >> > > >>>
>> >> > > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> >> > > >>>>
>> >> > > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
>> >> > > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
>> >> > > >>>> hid-ite driver to fix the rfkill driver not working.
>> >> > > >>>>
>> >> > > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
>> >> > > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
>> >> > > >>>> second USB interface (the mouse interface) and their touchpad only supports
>> >> > > >>>> mouse emulation, so using generic hid-input handling for anything but
>> >> > > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
>> >> > > >>>> to all USB interfaces.
>> >> > > >>>>
>> >> > > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
>> >> > > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
>> >> > > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
>> >> > > >>>> bits have been moved to the first USB interface (the keyboard intf).
>> >> > > >>>>
>> >> > > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
>> >> > > >>>> it NOT bind to the second (mouse) USB interface so that that can be
>> >> > > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
>> >> > > >>>>
>> >> > > >>>> This commit adds a match callback to hid-ite which makes it only
>> >> > > >>>> match the first USB interface when running on the Acer SW5-012,
>> >> > > >>>> fixing the regression to mouse-emulation mode introduced by adding the
>> >> > > >>>> keyboard dock USB id.
>> >> > > >>>>
>> >> > > >>>> Note the match function only does the special only bind to the first
>> >> > > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
>> >> > > >>>> actually must bind to the second interface as that is where the
>> >> > > >>>> "Wireless Radio Control" bits are.
>> >> > > >>>
>> >> > > >>> This is not a full review, but a couple of things that popped out
>> >> > > >>> while scrolling through the patch.
>> >> > > >>>
>> >> > > >>>>
>> >> > > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>
>> >> > > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
>> >> > > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>>
>> >> > > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>>
>> >> > > >>>> ---
>> >> > > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
>> >> > > >>>> 1 file changed, 34 insertions(+)
>> >> > > >>>>
>> >> > > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> >> > > >>>> index c436e12feb23..69a4ddfd033d 100644
>> >> > > >>>> --- a/drivers/hid/hid-ite.c
>> >> > > >>>> +++ b/drivers/hid/hid-ite.c
>> >> > > >>>> @@ -8,9 +8,12 @@
>> >> > > >>>> #include <linux/input.h>
>> >> > > >>>> #include <linux/hid.h>
>> >> > > >>>> #include <linux/module.h>
>> >> > > >>>> +#include <linux/usb.h>
>> >> > > >>>>
>> >> > > >>>> #include "hid-ids.h"
>> >> > > >>>>
>> >> > > >>>> +#define ITE8595_KBD_USB_INTF 0
>> >> > > >>>> +
>> >> > > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> >> > > >>>> struct hid_usage *usage, __s32 value)
>> >> > > >>>> {
>> >> > > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> >> > > >>>> return 0;
>> >> > > >>>> }
>> >> > > >>>>
>> >> > > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> >> > > >>>> +{
>> >> > > >>>> + struct usb_interface *intf;
>> >> > > >>>> +
>> >> > > >>>> + if (ignore_special_driver)
>> >> > > >>>> + return false;
>> >> > > >>>> +
>> >> > > >>>> + /*
>> >> > > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
>> >> > > >>>> + * have the "Wireless Radio Control" bits which need this special
>> >> > > >>>> + * driver on the second USB interface (the mouse interface). On
>> >> > > >>>> + * these devices we simply bind to all USB interfaces.
>> >> > > >>>> + *
>> >> > > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
>> >> > > >>>> + * not only does mouse emulation it also supports HID-multitouch
>> >> > > >>>> + * and all the keys including the "Wireless Radio Control" bits
>> >> > > >>>> + * have been moved to the first USB interface (the keyboard intf).
>> >> > > >>>> + *
>> >> > > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
>> >> > > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
>> >> > > >>>> + */
>> >> > > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
>> >> > > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
>> >> > > >>>
>> >> > > >>> Isn't there an existing matching function we can use here, instead of
>> >> > > >>> checking each individual field?
>> >> > > >>
>> >> > > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
>> >> > > >> requires a struct hid_device_id, which either requires declaring an extra
>> >> > > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
>> >> > > >> index into the existing hid_device_id array for the driver (with the hardcoding
>> >> > > >> being error prone, so not a good idea).
>> >> > > >>
>> >> > > >> Given the problems with using hid_match_one_id() I decided to just go with
>> >> > > >> the above.
>> >> > > >
>> >> > > > right. An other solution would be to have a local macro/function that
>> >> > > > does that. Because as soon as you start adding a quirk, an other comes
>> >> > > > right after.
>> >> > > >
>> >> > > >>
>> >> > > >> But see below.
>> >> > > >>
>> >> > > >>>
>> >> > > >>>> + return true;
>> >> > > >>>> +
>> >> > > >>>> + intf = to_usb_interface(hdev->dev.parent);
>> >> > > >>>
>> >> > > >>> And this is oops-prone. You need:
>> >> > > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
>> >> > > >>> - add a dependency on USBHID in the KConfig now that you are checking
>> >> > > >>> on the USB transport layer.
>> >> > > >>>
>> >> > > >>> That being said, I would love instead:
>> >> > > >>> - to have a non USB version of this match, where you decide which
>> >> > > >>> component needs to be handled based on the report descriptor
>> >> > > >>
>> >> > > >> Actually your idea to use the desciptors is not bad, but since what
>> >> > > >> we really want is to not bind to the interface which is marked for the
>> >> > > >> hid-multitouch driver I just realized we can just check that.
>> >> > > >>
>> >> > > >> So how about:
>> >> > > >>
>> >> > > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> >> > > >> {
>> >> > > >> if (ignore_special_driver)
>> >> > > >> return false;
>> >> > > >>
>> >> > > >> /*
>> >> > > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
>> >> > > >> * support the HID multitouch protocol for the touchpad, in that
>> >> > > >> * case the "Wireless Radio Control" bits which we care about are
>> >> > > >> * on the other interface; and we should not bind to the multitouch
>> >> > > >> * capable interface as that breaks multitouch support.
>> >> > > >> */
>> >> > > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
>> >> > > >> }
>> >> > > >
>> >> > > > Yep, I like that very much :)
>> >> > >
>> >> > > Actually if we want to check the group and there are only 2 interfaces we do
>> >> > > not need to use the match callback at all, w e can simply match on the
>> >> > > group of the interface which we do want:
>> >> > >
>> >> > > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> >> > > index db0f35be5a8b..21bd48f16033 100644
>> >> > > --- a/drivers/hid/hid-ite.c
>> >> > > +++ b/drivers/hid/hid-ite.c
>> >> > > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
>> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
>> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
>> >> > > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
>> >> > > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
>> >> > > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> >> > > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
>> >> > > + USB_VENDOR_ID_SYNAPTICS,
>> >> > > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> >> > > { }
>> >> > > };
>> >> > > MODULE_DEVICE_TABLE(hid, ite_devices);
>> >> > >
>> >> > > Much cleaner
>> >> >
>> >> > yep
>> >> >
>> >> > > (and now I don't need to write a test, which is always
>> >> > > a good motivation to come up with a cleaner solution :)
>> >> >
>> >> > Hehe, too bad, you already picked up my curiosity on this one, and I
>> >> > really would like to see the report descriptors and some events of the
>> >> > keys that are fixed by hid-ite.c.
>> >> > <with a low voice>This will be a hard requirement to accept this patch </joke>.
>> >> >
>> >> > More seriously, Zdeněk, can you run hid-recorder from
>> >> > https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
>> >> > report descriptor for all of your ITE HID devices? I'll add the
>> >> > matching tests in hid-tools and be sure we do not regress in the
>> >> > future.
>> >> >
>> >> > >
>> >> > > Let me turn this into a proper patch and then I will send that to
>> >> > > Zdeněk (off-list) for him to test (note don't worry if you do
>> >> > > not have time to test this weekend, then I'll do it on Monday).
>> >> > >
>> >> > > Regards,
>> >> > >
>> >> > > Hans
>> >> > >
>> >> > > p.s.
>> >> > >
>> >> > > 1. My train is approaching Brussels (Fosdem) so my email response
>> >> > > time will soon become irregular.
>> >> >
>> >> > How dare you? :)
>> >> >
>> >> > >
>> >> > > 2. Benjamin will you be at Fosdem too ?
>> >> > >
>> >> >
>> >> > Unfortunately no. Already got my quota of meeting people for this year
>> >> > between Kernel Recipes in September, XDC in October and LCA last week.
>> >> > So I need to keep in a quiet environment for a little bit :)
>> >> >
>> >> > Cheers,
>> >> > Benjamin
>> >> >
>> >>
>>
On Fri, Jan 31, 2020 at 5:09 PM Z R <zdenda.rampas(a)gmail.com> wrote:
>
> I believe I pressed wifi button on both replays for keyboard.
Yep, I can see that. Just to double check, on the last log, you
pressed the wifi button twice?
Anyway, thanks for all the logs, I should have enough to implement the
regression tests.
Cheers,
Benjamin
>
> With latest patch from Hans on top of v5.5.0 touchpads "two finger scrolling" is working again. Attaching current hid-record for keyboard with Wifi button pressed. Events in log appeared after f3 button was "released".
>
> Thanks
>
> Zdeněk
>
> pá 31. 1. 2020 v 16:45 odesílatel Hans de Goede <hdegoede(a)redhat.com> napsal:
>>
>> Hi,
>>
>> On 1/31/20 4:38 PM, Z R wrote:
>> > Hi Benjamin,
>> > hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
>> >
>> > I hope it is what you asked for :-)
>> >
>> > Currently waiting for reworked patch from Hans.
>>
>> I just send you the reworked patch.
>>
>> Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
>> That is the whole reason why the special hid-ite driver is necessary.
>>
>> Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
>> single hid input report for the generic-desktop Wireless Radio Controls group
>> is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
>> in there and it is always 0. It is as if the input-report is only send on release
>> and not on press. So the hid-ite code emulates a press + release whenever the
>> input-report is send.
>>
>> IOW the receiving of the input report is (ab)used as indication of the button
>> having been pressed.
>>
>> Regards,
>>
>> Hans
>>
>>
>> >
>> > Bye for now
>> > Zdeněk
>> >
>> > pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>> >
>> > On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> > >
>> > > Hi,
>> > >
>> > > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
>> > > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> > > >>
>> > > >> Hi,
>> > > >>
>> > > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
>> > > >>> Hi Hans,
>> > > >>>
>> > > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> > > >>>>
>> > > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
>> > > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
>> > > >>>> hid-ite driver to fix the rfkill driver not working.
>> > > >>>>
>> > > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
>> > > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
>> > > >>>> second USB interface (the mouse interface) and their touchpad only supports
>> > > >>>> mouse emulation, so using generic hid-input handling for anything but
>> > > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
>> > > >>>> to all USB interfaces.
>> > > >>>>
>> > > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
>> > > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
>> > > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
>> > > >>>> bits have been moved to the first USB interface (the keyboard intf).
>> > > >>>>
>> > > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
>> > > >>>> it NOT bind to the second (mouse) USB interface so that that can be
>> > > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
>> > > >>>>
>> > > >>>> This commit adds a match callback to hid-ite which makes it only
>> > > >>>> match the first USB interface when running on the Acer SW5-012,
>> > > >>>> fixing the regression to mouse-emulation mode introduced by adding the
>> > > >>>> keyboard dock USB id.
>> > > >>>>
>> > > >>>> Note the match function only does the special only bind to the first
>> > > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
>> > > >>>> actually must bind to the second interface as that is where the
>> > > >>>> "Wireless Radio Control" bits are.
>> > > >>>
>> > > >>> This is not a full review, but a couple of things that popped out
>> > > >>> while scrolling through the patch.
>> > > >>>
>> > > >>>>
>> > > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>
>> > > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
>> > > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>>
>> > > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>>
>> > > >>>> ---
>> > > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
>> > > >>>> 1 file changed, 34 insertions(+)
>> > > >>>>
>> > > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> > > >>>> index c436e12feb23..69a4ddfd033d 100644
>> > > >>>> --- a/drivers/hid/hid-ite.c
>> > > >>>> +++ b/drivers/hid/hid-ite.c
>> > > >>>> @@ -8,9 +8,12 @@
>> > > >>>> #include <linux/input.h>
>> > > >>>> #include <linux/hid.h>
>> > > >>>> #include <linux/module.h>
>> > > >>>> +#include <linux/usb.h>
>> > > >>>>
>> > > >>>> #include "hid-ids.h"
>> > > >>>>
>> > > >>>> +#define ITE8595_KBD_USB_INTF 0
>> > > >>>> +
>> > > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> > > >>>> struct hid_usage *usage, __s32 value)
>> > > >>>> {
>> > > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> > > >>>> return 0;
>> > > >>>> }
>> > > >>>>
>> > > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> > > >>>> +{
>> > > >>>> + struct usb_interface *intf;
>> > > >>>> +
>> > > >>>> + if (ignore_special_driver)
>> > > >>>> + return false;
>> > > >>>> +
>> > > >>>> + /*
>> > > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
>> > > >>>> + * have the "Wireless Radio Control" bits which need this special
>> > > >>>> + * driver on the second USB interface (the mouse interface). On
>> > > >>>> + * these devices we simply bind to all USB interfaces.
>> > > >>>> + *
>> > > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
>> > > >>>> + * not only does mouse emulation it also supports HID-multitouch
>> > > >>>> + * and all the keys including the "Wireless Radio Control" bits
>> > > >>>> + * have been moved to the first USB interface (the keyboard intf).
>> > > >>>> + *
>> > > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
>> > > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
>> > > >>>> + */
>> > > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
>> > > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
>> > > >>>
>> > > >>> Isn't there an existing matching function we can use here, instead of
>> > > >>> checking each individual field?
>> > > >>
>> > > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
>> > > >> requires a struct hid_device_id, which either requires declaring an extra
>> > > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
>> > > >> index into the existing hid_device_id array for the driver (with the hardcoding
>> > > >> being error prone, so not a good idea).
>> > > >>
>> > > >> Given the problems with using hid_match_one_id() I decided to just go with
>> > > >> the above.
>> > > >
>> > > > right. An other solution would be to have a local macro/function that
>> > > > does that. Because as soon as you start adding a quirk, an other comes
>> > > > right after.
>> > > >
>> > > >>
>> > > >> But see below.
>> > > >>
>> > > >>>
>> > > >>>> + return true;
>> > > >>>> +
>> > > >>>> + intf = to_usb_interface(hdev->dev.parent);
>> > > >>>
>> > > >>> And this is oops-prone. You need:
>> > > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
>> > > >>> - add a dependency on USBHID in the KConfig now that you are checking
>> > > >>> on the USB transport layer.
>> > > >>>
>> > > >>> That being said, I would love instead:
>> > > >>> - to have a non USB version of this match, where you decide which
>> > > >>> component needs to be handled based on the report descriptor
>> > > >>
>> > > >> Actually your idea to use the desciptors is not bad, but since what
>> > > >> we really want is to not bind to the interface which is marked for the
>> > > >> hid-multitouch driver I just realized we can just check that.
>> > > >>
>> > > >> So how about:
>> > > >>
>> > > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> > > >> {
>> > > >> if (ignore_special_driver)
>> > > >> return false;
>> > > >>
>> > > >> /*
>> > > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
>> > > >> * support the HID multitouch protocol for the touchpad, in that
>> > > >> * case the "Wireless Radio Control" bits which we care about are
>> > > >> * on the other interface; and we should not bind to the multitouch
>> > > >> * capable interface as that breaks multitouch support.
>> > > >> */
>> > > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
>> > > >> }
>> > > >
>> > > > Yep, I like that very much :)
>> > >
>> > > Actually if we want to check the group and there are only 2 interfaces we do
>> > > not need to use the match callback at all, w e can simply match on the
>> > > group of the interface which we do want:
>> > >
>> > > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> > > index db0f35be5a8b..21bd48f16033 100644
>> > > --- a/drivers/hid/hid-ite.c
>> > > +++ b/drivers/hid/hid-ite.c
>> > > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
>> > > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
>> > > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
>> > > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
>> > > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
>> > > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> > > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
>> > > + USB_VENDOR_ID_SYNAPTICS,
>> > > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> > > { }
>> > > };
>> > > MODULE_DEVICE_TABLE(hid, ite_devices);
>> > >
>> > > Much cleaner
>> >
>> > yep
>> >
>> > > (and now I don't need to write a test, which is always
>> > > a good motivation to come up with a cleaner solution :)
>> >
>> > Hehe, too bad, you already picked up my curiosity on this one, and I
>> > really would like to see the report descriptors and some events of the
>> > keys that are fixed by hid-ite.c.
>> > <with a low voice>This will be a hard requirement to accept this patch </joke>.
>> >
>> > More seriously, Zdeněk, can you run hid-recorder from
>> > https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
>> > report descriptor for all of your ITE HID devices? I'll add the
>> > matching tests in hid-tools and be sure we do not regress in the
>> > future.
>> >
>> > >
>> > > Let me turn this into a proper patch and then I will send that to
>> > > Zdeněk (off-list) for him to test (note don't worry if you do
>> > > not have time to test this weekend, then I'll do it on Monday).
>> > >
>> > > Regards,
>> > >
>> > > Hans
>> > >
>> > > p.s.
>> > >
>> > > 1. My train is approaching Brussels (Fosdem) so my email response
>> > > time will soon become irregular.
>> >
>> > How dare you? :)
>> >
>> > >
>> > > 2. Benjamin will you be at Fosdem too ?
>> > >
>> >
>> > Unfortunately no. Already got my quota of meeting people for this year
>> > between Kernel Recipes in September, XDC in October and LCA last week.
>> > So I need to keep in a quiet environment for a little bit :)
>> >
>> > Cheers,
>> > Benjamin
>> >
>>
Hi,
On 1/31/20 4:38 PM, Z R wrote:
> Hi Benjamin,
> hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
>
> I hope it is what you asked for :-)
>
> Currently waiting for reworked patch from Hans.
I just send you the reworked patch.
Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
That is the whole reason why the special hid-ite driver is necessary.
Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
single hid input report for the generic-desktop Wireless Radio Controls group
is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
in there and it is always 0. It is as if the input-report is only send on release
and not on press. So the hid-ite code emulates a press + release whenever the
input-report is send.
IOW the receiving of the input report is (ab)used as indication of the button
having been pressed.
Regards,
Hans
>
> Bye for now
> Zdeněk
>
> pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>
> On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
> >
> > Hi,
> >
> > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
> > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
> > >>
> > >> Hi,
> > >>
> > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
> > >>> Hi Hans,
> > >>>
> > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
> > >>>>
> > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
> > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
> > >>>> hid-ite driver to fix the rfkill driver not working.
> > >>>>
> > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
> > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
> > >>>> second USB interface (the mouse interface) and their touchpad only supports
> > >>>> mouse emulation, so using generic hid-input handling for anything but
> > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
> > >>>> to all USB interfaces.
> > >>>>
> > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
> > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
> > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
> > >>>> bits have been moved to the first USB interface (the keyboard intf).
> > >>>>
> > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
> > >>>> it NOT bind to the second (mouse) USB interface so that that can be
> > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
> > >>>>
> > >>>> This commit adds a match callback to hid-ite which makes it only
> > >>>> match the first USB interface when running on the Acer SW5-012,
> > >>>> fixing the regression to mouse-emulation mode introduced by adding the
> > >>>> keyboard dock USB id.
> > >>>>
> > >>>> Note the match function only does the special only bind to the first
> > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
> > >>>> actually must bind to the second interface as that is where the
> > >>>> "Wireless Radio Control" bits are.
> > >>>
> > >>> This is not a full review, but a couple of things that popped out
> > >>> while scrolling through the patch.
> > >>>
> > >>>>
> > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>
> > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
> > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>>
> > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>>
> > >>>> ---
> > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
> > >>>> 1 file changed, 34 insertions(+)
> > >>>>
> > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> > >>>> index c436e12feb23..69a4ddfd033d 100644
> > >>>> --- a/drivers/hid/hid-ite.c
> > >>>> +++ b/drivers/hid/hid-ite.c
> > >>>> @@ -8,9 +8,12 @@
> > >>>> #include <linux/input.h>
> > >>>> #include <linux/hid.h>
> > >>>> #include <linux/module.h>
> > >>>> +#include <linux/usb.h>
> > >>>>
> > >>>> #include "hid-ids.h"
> > >>>>
> > >>>> +#define ITE8595_KBD_USB_INTF 0
> > >>>> +
> > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
> > >>>> struct hid_usage *usage, __s32 value)
> > >>>> {
> > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
> > >>>> return 0;
> > >>>> }
> > >>>>
> > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> > >>>> +{
> > >>>> + struct usb_interface *intf;
> > >>>> +
> > >>>> + if (ignore_special_driver)
> > >>>> + return false;
> > >>>> +
> > >>>> + /*
> > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
> > >>>> + * have the "Wireless Radio Control" bits which need this special
> > >>>> + * driver on the second USB interface (the mouse interface). On
> > >>>> + * these devices we simply bind to all USB interfaces.
> > >>>> + *
> > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
> > >>>> + * not only does mouse emulation it also supports HID-multitouch
> > >>>> + * and all the keys including the "Wireless Radio Control" bits
> > >>>> + * have been moved to the first USB interface (the keyboard intf).
> > >>>> + *
> > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
> > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
> > >>>> + */
> > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
> > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
> > >>>
> > >>> Isn't there an existing matching function we can use here, instead of
> > >>> checking each individual field?
> > >>
> > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
> > >> requires a struct hid_device_id, which either requires declaring an extra
> > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
> > >> index into the existing hid_device_id array for the driver (with the hardcoding
> > >> being error prone, so not a good idea).
> > >>
> > >> Given the problems with using hid_match_one_id() I decided to just go with
> > >> the above.
> > >
> > > right. An other solution would be to have a local macro/function that
> > > does that. Because as soon as you start adding a quirk, an other comes
> > > right after.
> > >
> > >>
> > >> But see below.
> > >>
> > >>>
> > >>>> + return true;
> > >>>> +
> > >>>> + intf = to_usb_interface(hdev->dev.parent);
> > >>>
> > >>> And this is oops-prone. You need:
> > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
> > >>> - add a dependency on USBHID in the KConfig now that you are checking
> > >>> on the USB transport layer.
> > >>>
> > >>> That being said, I would love instead:
> > >>> - to have a non USB version of this match, where you decide which
> > >>> component needs to be handled based on the report descriptor
> > >>
> > >> Actually your idea to use the desciptors is not bad, but since what
> > >> we really want is to not bind to the interface which is marked for the
> > >> hid-multitouch driver I just realized we can just check that.
> > >>
> > >> So how about:
> > >>
> > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> > >> {
> > >> if (ignore_special_driver)
> > >> return false;
> > >>
> > >> /*
> > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
> > >> * support the HID multitouch protocol for the touchpad, in that
> > >> * case the "Wireless Radio Control" bits which we care about are
> > >> * on the other interface; and we should not bind to the multitouch
> > >> * capable interface as that breaks multitouch support.
> > >> */
> > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
> > >> }
> > >
> > > Yep, I like that very much :)
> >
> > Actually if we want to check the group and there are only 2 interfaces we do
> > not need to use the match callback at all, w e can simply match on the
> > group of the interface which we do want:
> >
> > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> > index db0f35be5a8b..21bd48f16033 100644
> > --- a/drivers/hid/hid-ite.c
> > +++ b/drivers/hid/hid-ite.c
> > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
> > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
> > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
> > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
> > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
> > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
> > + USB_VENDOR_ID_SYNAPTICS,
> > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> > { }
> > };
> > MODULE_DEVICE_TABLE(hid, ite_devices);
> >
> > Much cleaner
>
> yep
>
> > (and now I don't need to write a test, which is always
> > a good motivation to come up with a cleaner solution :)
>
> Hehe, too bad, you already picked up my curiosity on this one, and I
> really would like to see the report descriptors and some events of the
> keys that are fixed by hid-ite.c.
> <with a low voice>This will be a hard requirement to accept this patch </joke>.
>
> More seriously, Zdeněk, can you run hid-recorder from
> https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
> report descriptor for all of your ITE HID devices? I'll add the
> matching tests in hid-tools and be sure we do not regress in the
> future.
>
> >
> > Let me turn this into a proper patch and then I will send that to
> > Zdeněk (off-list) for him to test (note don't worry if you do
> > not have time to test this weekend, then I'll do it on Monday).
> >
> > Regards,
> >
> > Hans
> >
> > p.s.
> >
> > 1. My train is approaching Brussels (Fosdem) so my email response
> > time will soon become irregular.
>
> How dare you? :)
>
> >
> > 2. Benjamin will you be at Fosdem too ?
> >
>
> Unfortunately no. Already got my quota of meeting people for this year
> between Kernel Recipes in September, XDC in October and LCA last week.
> So I need to keep in a quiet environment for a little bit :)
>
> Cheers,
> Benjamin
>
sd->devnode is released after calling
v4l2_subdev_release. Therefore it should be set
to NULL so that the subdev won't hold a pointer
to a released object. This fixes a reference
after free bug in function
v4l2_device_unregister_subdev
Cc: stable(a)vger.kernel.org
Fixes: 0e43734d4c46e ("media: v4l2-subdev: add release() internal op")
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld(a)collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel(a)collabora.com>
---
changes since v2:
- since this is a regresion fix, I added Fixes and Cc to stable tags,
- change the commit title and log to be more clear.
drivers/media/v4l2-core/v4l2-device.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/media/v4l2-core/v4l2-device.c b/drivers/media/v4l2-core/v4l2-device.c
index 63d6b147b21e..2b3595671d62 100644
--- a/drivers/media/v4l2-core/v4l2-device.c
+++ b/drivers/media/v4l2-core/v4l2-device.c
@@ -177,6 +177,7 @@ static void v4l2_subdev_release(struct v4l2_subdev *sd)
{
struct module *owner = !sd->owner_v4l2_dev ? sd->owner : NULL;
+ sd->devnode = NULL;
if (sd->internal_ops && sd->internal_ops->release)
sd->internal_ops->release(sd);
module_put(owner);
--
2.20.1
Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
dock") added the USB id for the Acer SW5-012's keyboard dock to the
hid-ite driver to fix the rfkill driver not working.
Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
"Wireless Radio Control" bits which need the special hid-ite driver on the
second USB interface (the mouse interface) and their touchpad only supports
mouse emulation, so using generic hid-input handling for anything but
the "Wireless Radio Control" bits is fine. On these devices we simply bind
to all USB interfaces.
But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
(SW5-012)'s touchpad not only does mouse emulation it also supports
HID-multitouch and all the keys including the "Wireless Radio Control"
bits have been moved to the first USB interface (the keyboard intf).
So we need hid-ite to handle the first (keyboard) USB interface and have
it NOT bind to the second (mouse) USB interface so that that can be
handled by hid-multitouch.c and we get proper multi-touch support.
This commit adds a match callback to hid-ite which makes it only
match the first USB interface when running on the Acer SW5-012,
fixing the regression to mouse-emulation mode introduced by adding the
keyboard dock USB id.
Note the match function only does the special only bind to the first
USB interface on the Acer SW5-012, on other devices the hid-ite driver
actually must bind to the second interface as that is where the
"Wireless Radio Control" bits are.
Cc: stable(a)vger.kernel.org
Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index c436e12feb23..69a4ddfd033d 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -8,9 +8,12 @@
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
+#include <linux/usb.h>
#include "hid-ids.h"
+#define ITE8595_KBD_USB_INTF 0
+
static int ite_event(struct hid_device *hdev, struct hid_field *field,
struct hid_usage *usage, __s32 value)
{
@@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
return 0;
}
+static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
+{
+ struct usb_interface *intf;
+
+ if (ignore_special_driver)
+ return false;
+
+ /*
+ * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
+ * have the "Wireless Radio Control" bits which need this special
+ * driver on the second USB interface (the mouse interface). On
+ * these devices we simply bind to all USB interfaces.
+ *
+ * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
+ * not only does mouse emulation it also supports HID-multitouch
+ * and all the keys including the "Wireless Radio Control" bits
+ * have been moved to the first USB interface (the keyboard intf).
+ *
+ * We want the hid-multitouch driver to bind to the touchpad, so on
+ * the Acer SW5-012 we should only bind to the keyboard USB intf.
+ */
+ if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
+ hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
+ return true;
+
+ intf = to_usb_interface(hdev->dev.parent);
+
+ return intf->altsetting->desc.bInterfaceNumber == ITE8595_KBD_USB_INTF;
+}
+
static const struct hid_device_id ite_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
@@ -50,6 +83,7 @@ MODULE_DEVICE_TABLE(hid, ite_devices);
static struct hid_driver ite_driver = {
.name = "itetech",
.id_table = ite_devices,
+ .match = ite_match,
.event = ite_event,
};
module_hid_driver(ite_driver);
--
2.23.0