From: Rabin Vincent <rabinv(a)axis.com>
softirq time accounting is broken on v4.9.x if ksoftirqd runs.
With
CONFIG_IRQ_TIME_ACCOUNTING=y
# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
this test code:
struct tasklet_struct tasklet;
static void delay_tasklet(unsigned long data)
{
udelay(10);
tasklet_schedule(&tasklet);
}
tasklet_init(&tasklet, delay_tasklet, 0);
tasklet_schedule(&tasklet);
results in:
$ while :; do grep cpu0 /proc/stat; done
cpu0 5 0 80 25 16 107 1 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 81 25 16 107 0 0 0 0
cpu0 5 0 81 25 16 107 1 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 0 0 0 0
cpu0 6 0 81 25 16 108 0 0 0 0
cpu0 6 0 81 25 16 108 0 0 0 0
As can be seen, the softirq numbers are totally bogus.
When ksoftirq is running, irqtime_account_process_tick() increments
cpustat[CPUSTAT_SOFTIRQ]. This causes the "nsecs_to_cputime64(irqtime)
- cpustat[CPUSTAT_SOFTIRQ]" calculation in irqtime_account_update() to
underflow the next time a softirq is handled leading to the above
values.
The underflow bug was added by 57430218317e5b280 ("sched/cputime: Count
actually elapsed irq & softirq time").
But ksoftirqd accounting was wrong even in earlier kernels. In earlier
kernels, after a ksoftirq run, the kernel would simply stop accounting
softirq time spent outside of ksoftirqd until that (accumulated) time
exceeded the time for which ksofirqd previously had run.
Fix both the underflow and the wrong accounting by using a counter
specifically for the non-ksoftirqd softirq case.
This code has been fixed in current mainline by a499a5a14db
("sched/cputime: Increment kcpustat directly on irqtime account") [note
also the followup 25e2d8c1b9e327e ("sched/cputime: Fix ksoftirqd cputime
accounting regression")], but that patch is a part of the many changes
for eliminating of cputime_t so it does not seem suitable for backport.
Signed-off-by: Rabin Vincent <rabinv(a)axis.com>
---
include/linux/kernel_stat.h | 1 +
kernel/sched/cputime.c | 9 ++++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44fda64..d0826f1 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -33,6 +33,7 @@ enum cpu_usage_stat {
struct kernel_cpustat {
u64 cpustat[NR_STATS];
+ u64 softirq_no_ksoftirqd;
};
struct kernel_stat {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5ebee31..1b5a9e6 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -73,12 +73,19 @@ EXPORT_SYMBOL_GPL(irqtime_account_irq);
static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
+ u64 base = cpustat[idx];
cputime_t irq_cputime;
- irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
+ if (idx == CPUTIME_SOFTIRQ)
+ base = kcpustat_this_cpu->softirq_no_ksoftirqd;
+
+ irq_cputime = nsecs_to_cputime64(irqtime) - base;
irq_cputime = min(irq_cputime, maxtime);
cpustat[idx] += irq_cputime;
+ if (idx == CPUTIME_SOFTIRQ)
+ kcpustat_this_cpu->softirq_no_ksoftirqd += irq_cputime;
+
return irq_cputime;
}
--
2.1.4
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma(a)intel.com>
Date: Mon, 18 Dec 2017 09:28:39 -0700
Subject: [PATCH] libnvdimm, btt: Fix an incompatibility in the log layout
Due to a spec misinterpretation, the Linux implementation of the BTT log
area had different padding scheme from other implementations, such as
UEFI and NVML.
This fixes the padding scheme, and defaults to it for new BTT layouts.
We attempt to detect the padding scheme in use when probing for an
existing BTT. If we detect the older/incompatible scheme, we continue
using it.
Reported-by: Juston Li <juston.li(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Fixes: 5212e11fde4d ("nd_btt: atomic sector updates")
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index e949e3302af4..c586bcdb5190 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
return ret;
}
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
- struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+ struct log_group *log)
{
return arena_read_bytes(arena,
- arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
- 2 * LOG_ENT_SIZE, 0);
+ arena->logoff + (lane * LOG_GRP_SIZE), log,
+ LOG_GRP_SIZE, 0);
}
static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+ debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+ debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
}
static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
}
}
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+ return le32_to_cpu(log->ent[log_idx].seq);
+}
+
/*
* This function accepts two log entries, and uses the
* sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
*
* TODO The logic feels a bit kludge-y. make it better..
*/
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
{
+ int idx0 = a->log_index[0];
+ int idx1 = a->log_index[1];
int old;
/*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
* the next time, the following logic works out to put this
* (next) entry into [1]
*/
- if (ent[0].seq == 0) {
- ent[0].seq = cpu_to_le32(1);
+ if (log_seq(log, idx0) == 0) {
+ log->ent[idx0].seq = cpu_to_le32(1);
return 0;
}
- if (ent[0].seq == ent[1].seq)
+ if (log_seq(log, idx0) == log_seq(log, idx1))
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+ if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
- if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+ if (log_seq(log, idx0) < log_seq(log, idx1)) {
+ if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
old = 0;
else
old = 1;
} else {
- if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+ if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
old = 1;
else
old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
{
int ret;
int old_ent, ret_ent;
- struct log_entry log[2];
+ struct log_group log;
- ret = btt_log_read_pair(arena, lane, log);
+ ret = btt_log_group_read(arena, lane, &log);
if (ret)
return -EIO;
- old_ent = btt_log_get_old(log);
+ old_ent = btt_log_get_old(arena, &log);
if (old_ent < 0 || old_ent > 1) {
dev_err(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n",
- old_ent, lane, log[0].seq, log[1].seq);
+ old_ent, lane, log.ent[arena->log_index[0]].seq,
+ log.ent[arena->log_index[1]].seq);
/* TODO set error state? */
return -EIO;
}
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
ret_ent = (old_flag ? old_ent : (1 - old_ent));
if (ent != NULL)
- memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+ memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
return ret_ent;
}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
u32 sub, struct log_entry *ent, unsigned long flags)
{
int ret;
- /*
- * Ignore the padding in log_entry for calculating log_half.
- * The entry is 'committed' when we write the sequence number,
- * and we want to ensure that that is the last thing written.
- * We don't bother writing the padding as that would be extra
- * media wear and write amplification
- */
- unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
- u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+ u32 group_slot = arena->log_index[sub];
+ unsigned int log_half = LOG_ENT_SIZE / 2;
void *src = ent;
+ u64 ns_off;
+ ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+ (group_slot * LOG_ENT_SIZE);
/* split the 16B write into atomic, durable halves */
ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
{
size_t logsize = arena->info2off - arena->logoff;
size_t chunk_size = SZ_4K, offset = 0;
- struct log_entry log;
+ struct log_entry ent;
void *zerobuf;
int ret;
u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
}
for (i = 0; i < arena->nfree; i++) {
- log.lba = cpu_to_le32(i);
- log.old_map = cpu_to_le32(arena->external_nlba + i);
- log.new_map = cpu_to_le32(arena->external_nlba + i);
- log.seq = cpu_to_le32(LOG_SEQ_INIT);
- ret = __btt_log_write(arena, i, 0, &log, 0);
+ ent.lba = cpu_to_le32(i);
+ ent.old_map = cpu_to_le32(arena->external_nlba + i);
+ ent.new_map = cpu_to_le32(arena->external_nlba + i);
+ ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+ ret = __btt_log_write(arena, i, 0, &ent, 0);
if (ret)
goto free;
}
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
return 0;
}
+static bool ent_is_padding(struct log_entry *ent)
+{
+ return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+ && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+ bool idx_set = false, initial_state = true;
+ int ret, log_index[2] = {-1, -1};
+ u32 i, j, next_idx = 0;
+ struct log_group log;
+ u32 pad_count = 0;
+
+ for (i = 0; i < arena->nfree; i++) {
+ ret = btt_log_group_read(arena, i, &log);
+ if (ret < 0)
+ return ret;
+
+ for (j = 0; j < 4; j++) {
+ if (!idx_set) {
+ if (ent_is_padding(&log.ent[j])) {
+ pad_count++;
+ continue;
+ } else {
+ /* Skip if index has been recorded */
+ if ((next_idx == 1) &&
+ (j == log_index[0]))
+ continue;
+ /* valid entry, record index */
+ log_index[next_idx] = j;
+ next_idx++;
+ }
+ if (next_idx == 2) {
+ /* two valid entries found */
+ idx_set = true;
+ } else if (next_idx > 2) {
+ /* too many valid indices */
+ return -ENXIO;
+ }
+ } else {
+ /*
+ * once the indices have been set, just verify
+ * that all subsequent log groups are either in
+ * their initial state or follow the same
+ * indices.
+ */
+ if (j == log_index[0]) {
+ /* entry must be 'valid' */
+ if (ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ } else if (j == log_index[1]) {
+ ;
+ /*
+ * log_index[1] can be padding if the
+ * lane never got used and it is still
+ * in the initial state (three 'padding'
+ * entries)
+ */
+ } else {
+ /* entry must be invalid (padding) */
+ if (!ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ }
+ }
+ }
+ /*
+ * If any of the log_groups have more than one valid,
+ * non-padding entry, then the we are no longer in the
+ * initial_state
+ */
+ if (pad_count < 3)
+ initial_state = false;
+ pad_count = 0;
+ }
+
+ if (!initial_state && !idx_set)
+ return -ENXIO;
+
+ /*
+ * If all the entries in the log were in the initial state,
+ * assume new padding scheme
+ */
+ if (initial_state)
+ log_index[1] = 1;
+
+ /*
+ * Only allow the known permutations of log/padding indices,
+ * i.e. (0, 1), and (0, 2)
+ */
+ if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+ ; /* known index possibilities */
+ else {
+ dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+ return -ENXIO;
+ }
+
+ arena->log_index[0] = log_index[0];
+ arena->log_index[1] = log_index[1];
+ dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+ dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+ return 0;
+}
+
static int btt_rtt_init(struct arena_info *arena)
{
arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
available -= 2 * BTT_PG_SIZE;
/* The log takes a fixed amount of space based on nfree */
- logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
- BTT_PG_SIZE);
+ logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
available -= logsize;
/* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
arena->mapoff = arena->dataoff + datasize;
arena->logoff = arena->mapoff + mapsize;
arena->info2off = arena->logoff + logsize;
+
+ /* Default log indices are (0,1) */
+ arena->log_index[0] = 0;
+ arena->log_index[1] = 1;
return arena;
}
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
+ ret = log_set_indices(arena);
+ if (ret) {
+ dev_err(to_dev(arena),
+ "Unable to deduce log/padding indices\n");
+ goto out;
+ }
+
mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena);
if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 884fbbbdd18a..db3cb6d4d0d4 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
#define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
#define LOG_ENT_SIZE sizeof(struct log_entry)
#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
INIT_READY
};
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | pad | pad |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
struct log_entry {
__le32 lba;
__le32 old_map;
__le32 new_map;
__le32 seq;
- __le64 padding[2];
+};
+
+struct log_group {
+ struct log_entry ent[4];
};
struct btt_sb {
@@ -126,6 +167,7 @@ struct aligned_lock {
* @debugfs_dir: Debugfs dentry
* @flags: Arena flags - may signify error states.
* @err_lock: Mutex for synchronizing error clearing.
+ * @log_index: Indices of the valid log entries in a log_group
*
* arena_info is a per-arena handle. Once an arena is narrowed down for an
* IO, this struct is passed around for the duration of the IO.
@@ -158,6 +200,7 @@ struct arena_info {
/* Arena flags */
u32 flags;
struct mutex err_lock;
+ int log_index[2];
};
/**
On older versions of binutils, \sym points to an aligned address. On
newer versions of binutils, \sym sometimes points to the unaligned thumb
address in mysterious and buggy circumstances. In order to homogenize
this behavior, rather than adding 1, we simply OR in 1, so that already
unaligned instructions don't change. This fix is required for a
pedestrian THUMB2_KERNEL to boot without crashing when built with
non-old binutils.
While it works, the downside is that we have to add an `orr` instruction
to a fast path. The assembler can't do this at assemble time via "|1"
because "invalid operands (.text and *ABS* sections) for `|'", so we're
forced to do this. A better solution would be to have consistent
binutils behavior, or to have some kind of \sym feature detection that
won't turn into a maze of version comparisons. However, it's at the
moment unclear how to achieve this.
The rest of this commit message contains all of the relevant
information.
My tests concerned these versions:
broken: GNU ld (Gentoo 2.29.1 p3) 2.29.1
working: GNU ld (GNU Binutils for Ubuntu) 2.26.1
These produced the following code:
--- broken 2017-11-21 17:44:14.523416082 +0100
+++ working 2017-11-21 17:44:44.548461234 +0100
@@ -133,7 +133,7 @@
160: f01a 0ff0 tst.w sl, #240 ; 0xf0
164: d111 bne.n 18a <__sys_trace>
166: f5b7 7fc8 cmp.w r7, #400 ; 0x190
- 16a: f2af 1e6a subw lr, pc, #362 ; 0x16a
+ 16a: f2af 1e6b subw lr, pc, #363 ; 0x16b
16e: bf38 it cc
170: f858 f027 ldrcc.w pc, [r8, r7, lsl #2]
174: a902 add r1, sp, #8
The differing instruction corresponds with this actual line in
arch/arm/kernel/entry-common.S:
badr lr, ret_fast_syscall @ return address
Running the broken kernel results in a runtime OOPS with:
PC is at ret_fast_syscall+0x4/0x52
LR is at ret_fast_syscall+0x2/0x52
The disassembly of that function for the crashing kernel is:
.text:00000000 ret_fast_syscall ; CODE XREF: sys_syscall+1C↓j
.text:00000000 CPSID I ; jumptable 00000840 cases 15,18-376
.text:00000002
.text:00000002 loc_2 ; DATA XREF: sys_syscall-6BA↓o
.text:00000002 LDR.W R2, [R9,#8]
.text:00000006 CMP.W R2, #0xBF000000
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Cc: stable(a)vger.kernel.org
---
arch/arm/include/asm/assembler.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index ad301f107dd2..c62a3b6b0a3e 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -194,10 +194,9 @@
*/
.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
.macro badr\c, rd, sym
-#ifdef CONFIG_THUMB2_KERNEL
- adr\c \rd, \sym + 1
-#else
adr\c \rd, \sym
+#ifdef CONFIG_THUMB2_KERNEL
+ orr\c \rd, \rd, 1
#endif
.endm
.endr
--
2.15.0
From: Vincent Wang <vincent.wang(a)spreadtrum.com>
list_del_rcu() should be used to replace list_del() in the function
_remove_list_dev(), since the opp is a rcu protected pointer.
For example, on an ARM big.Little platform of spreadtrum, there are
little cluster, big cluster and gpu using pm_opp. And the opp_table
of big cluster will be removed when big cluster is removed, which
is implemented in the cpufreq driver. Sometimes an issue maybe occur:
[ 237.647758] c0 Unable to handle kernel paging request at virtual address dead000000000110
[ 237.647776] c0 pgd = ffffffc073e78000
[ 237.647786] c0 [dead000000000110] *pgd=0000000000000000, *pud=0000000000000000
[ 237.647808] c0 Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 237.653535] c0 Modules linked in: sprdwl_ng(O) mtty marlin2_fm mali_kbase(O)
[ 237.653569] c0 CPU: 0 PID: 38 Comm: kworker/u12:1 Tainted: G S W O 4.4.83+ #1
[ 237.653578] c0 Hardware name: Spreadtrum SP9850KHsmt 1h10 Board (DT)
[ 237.653594] c0 Workqueue: devfreq_wq devfreq_monitor
[ 237.653605] c0 task: ffffffc0babd0d80 task.stack: ffffffc0badbc000
[ 237.653619] c0 PC is at _find_device_opp+0x58/0xac
[ 237.653629] c0 LR is at dev_pm_opp_find_freq_ceil+0x2c/0xb8
[ 237.921294] c0 Call trace:
[ 237.921425] c0 [<ffffff80085362b0>] _find_device_opp+0x58/0xac
[ 237.921437] c0 [<ffffff8008536560>] dev_pm_opp_find_freq_ceil+0x2c/0xb8
[ 237.921452] c0 [<ffffff80088760f4>] devfreq_recommended_opp+0x54/0x7c
[ 237.921494] c0 [<ffffff8000b6a96c>] kbase_wait_write_flush+0x164/0x358 [mali_kbase]
[ 237.921504] c0 [<ffffff800887485c>] update_devfreq+0x8c/0xf8
[ 237.921514] c0 [<ffffff80088749e4>] devfreq_monitor+0x34/0x94
[ 237.921529] c0 [<ffffff80080bd75c>] process_one_work+0x154/0x458
[ 237.921539] c0 [<ffffff80080be428>] worker_thread+0x134/0x4a4
[ 237.921551] c0 [<ffffff80080c4bec>] kthread+0xdc/0xf0
[ 237.921564] c0 [<ffffff8008085f20>] ret_from_fork+0x10/0x30
Cc: stable <stable(a)vger.kernel.org> # 4.4
Signed-off-by: Vincent Wang <vincent.wang(a)spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang(a)spreadtrum.com>
Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org>
---
This patch is for 4.4 stable branch only.
Once this patch accepted, I can cook a similar patch for 4.9 stable branch.
This fix can't be done to upstream kernel as the OPP code doesn't
use RCUs anymore.
---
drivers/base/power/opp/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index db6e7e5..e0eac53 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -463,7 +463,7 @@ static void _kfree_list_dev_rcu(struct rcu_head *head)
static void _remove_list_dev(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{
- list_del(&list_dev->node);
+ list_del_rcu(&list_dev->node);
call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
_kfree_list_dev_rcu);
}
--
1.9.1
HiSilicon Hip06/Hip07 can work as RC mode and EP mode. The PCIe Root Port
in Hip06/Hip07 SoCs advertises an MSI capability, but it can not generate
MSIs. It can generate MSIs When it works as EP mode, but the vendor id and
device id of RP and EP are the same. Current code also disable MSI for EP
mode, we only need to disable MSI for Root Port.
Cc: <stable(a)vger.kernel.org>
Fixes: 72f2ff0deb87 ("PCI: Disable MSI for HiSilicon Hip06/Hip07 Root Ports")
Signed-off-by: Dongdong Liu <liudongdong3(a)huawei.com>
---
drivers/pci/quirks.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 10684b1..d22750e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1636,8 +1636,8 @@ static void quirk_pcie_mch(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_pcie_mch);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_pcie_mch);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_pcie_mch);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0x1610, quirk_pcie_mch);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_HUAWEI, 0x1610, PCI_CLASS_BRIDGE_PCI, 8, quirk_pcie_mch);
/*
* It's possible for the MSI to get corrupted if shpc and acpi
--
1.9.1
The patch below was submitted to be applied to the 4.14-stable tree.
I fail to see how this patch meets the stable kernel rules as found at
Documentation/process/stable-kernel-rules.rst.
I could be totally wrong, and if so, please respond to
<stable(a)vger.kernel.org> and let me know why this patch should be
applied. Otherwise, it is now dropped from my patch queues, never to be
seen again.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3572f04c69ed4369da5d3c65d84fb18774aa60b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Thu, 16 Nov 2017 18:02:15 +0200
Subject: [PATCH] drm/i915: Fix init_clock_gating for resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Moving the init_clock_gating() call from intel_modeset_init_hw() to
intel_modeset_gem_init() had an unintended effect of not applying
some workarounds on resume. This, for example, cause some kind of
corruption to appear at the top of my IVB Thinkpad X1 Carbon LVDS
screen after hibernation. Fix the problem by explicitly calling
init_clock_gating() from the resume path.
I really hope this doesn't break something else again. At least
the problems reported at https://bugs.freedesktop.org/show_bug.cgi?id=103549
didn't make a comeback, even after a hibernate cycle.
v2: Reorder the init_clock_gating vs. modeset_init_hw to match
the display reset path (Rodrigo)
Cc: stable(a)vger.kernel.org
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Fixes: 6ac43272768c ("drm/i915: Move init_clock_gating() back to where it was")
Reviewed-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20171116160215.25715-1-ville.…
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
(cherry picked from commit 675f7ff35bd256e65d3d0f52718d8babf5d1002a)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 34191028bbad..7d9b07df32fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1714,6 +1714,7 @@ static int i915_drm_resume(struct drm_device *dev)
intel_guc_resume(dev_priv);
intel_modeset_init_hw(dev);
+ intel_init_clock_gating(dev_priv);
spin_lock_irq(&dev_priv->irq_lock);
if (dev_priv->display.hpd_irq_setup)
CFL was missing from intel_early_ids[]. The PCI ID needs to be there to
allow the memory region to be stolen, otherwise we could have RAM being
arbitrarily overwritten if for example we keep using the UEFI framebuffer,
depending on how BIOS has set up the e820 map.
Fixes: b056f8f3d6b9 ("drm/i915/cfl: Add Coffee Lake PCI IDs for S Skus.")
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: Anusha Srivatsa <anusha.srivatsa(a)intel.com>
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: David Airlie <airlied(a)linux.ie>
Cc: intel-gfx(a)lists.freedesktop.org
Cc: dri-devel(a)lists.freedesktop.org
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: x86(a)kernel.org
Cc: <stable(a)vger.kernel.org> # v4.13+ 0890540e21cf drm/i915: add GT number to intel_device_info
Cc: <stable(a)vger.kernel.org> # v4.13+ 41693fd52373 drm/i915/kbl: Change a KBL pci id to GT2 from GT1.5
Cc: <stable(a)vger.kernel.org> # v4.13+
Reviewed-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
---
v2: improve commit message, add Fixes tag and CC stable
arch/x86/kernel/early-quirks.c | 1 +
include/drm/i915_pciids.h | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3cbb2c78a9df..bae0d32e327b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -528,6 +528,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_SKL_IDS(&gen9_early_ops),
INTEL_BXT_IDS(&gen9_early_ops),
INTEL_KBL_IDS(&gen9_early_ops),
+ INTEL_CFL_IDS(&gen9_early_ops),
INTEL_GLK_IDS(&gen9_early_ops),
INTEL_CNL_IDS(&gen9_early_ops),
};
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 972a25633525..c65e4489006d 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -392,6 +392,12 @@
INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \
INTEL_VGA_DEVICE(0x3EA5, info) /* ULT GT3 */
+#define INTEL_CFL_IDS(info) \
+ INTEL_CFL_S_GT1_IDS(info), \
+ INTEL_CFL_S_GT2_IDS(info), \
+ INTEL_CFL_H_GT2_IDS(info), \
+ INTEL_CFL_U_GT3_IDS(info)
+
/* CNL U 2+2 */
#define INTEL_CNL_U_GT2_IDS(info) \
INTEL_VGA_DEVICE(0x5A52, info), \
--
2.14.3