usbip host lists devices attached to vhci_hcd on the same server
when user does attach over localhost or specifies the server as the
remote.
usbip attach -r localhost -b busid
or
usbip attach -r servername (or server IP)
Fix it to check and not list devices that are attached to vhci_hcd.
Cc: stable(a)vger.kernel.org
Signed-off-by: Shuah Khan <shuahkh(a)osg.samsung.com>
---
tools/usb/usbip/src/usbip_list.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c
index f1b38e866dd7..d65a9f444174 100644
--- a/tools/usb/usbip/src/usbip_list.c
+++ b/tools/usb/usbip/src/usbip_list.c
@@ -187,6 +187,7 @@ static int list_devices(bool parsable)
const char *busid;
char product_name[128];
int ret = -1;
+ const char *devpath;
/* Create libudev context. */
udev = udev_new();
@@ -209,6 +210,14 @@ static int list_devices(bool parsable)
path = udev_list_entry_get_name(dev_list_entry);
dev = udev_device_new_from_syspath(udev, path);
+ /* Ignore devices attached to vhci_hcd */
+ devpath = udev_device_get_devpath(dev);
+ if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+ dbg("Skip the device %s already attached to %s\n",
+ devpath, USBIP_VHCI_DRV_NAME);
+ continue;
+ }
+
/* Get device information. */
idVendor = udev_device_get_sysattr_value(dev, "idVendor");
idProduct = udev_device_get_sysattr_value(dev, "idProduct");
--
2.14.1
usbip host binds to devices attached to vhci_hcd on the same server
when user does attach over localhost or specifies the server as the
remote.
usbip attach -r localhost -b busid
or
usbip attach -r servername (or server IP)
Unbind followed by bind works, however device is left in a bad state with
accesses via the attached busid result in errors and system hangs during
shutdown.
Fix it to check and bail out if the device is already attached to vhci_hcd.
Cc: stable(a)vger.kernel.org
Signed-off-by: Shuah Khan <shuahkh(a)osg.samsung.com>
---
tools/usb/usbip/src/usbip_bind.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c
index fa46141ae68b..e121cfb1746a 100644
--- a/tools/usb/usbip/src/usbip_bind.c
+++ b/tools/usb/usbip/src/usbip_bind.c
@@ -144,6 +144,7 @@ static int bind_device(char *busid)
int rc;
struct udev *udev;
struct udev_device *dev;
+ const char *devpath;
/* Check whether the device with this bus ID exists. */
udev = udev_new();
@@ -152,8 +153,16 @@ static int bind_device(char *busid)
err("device with the specified bus ID does not exist");
return -1;
}
+ devpath = udev_device_get_devpath(dev);
udev_unref(udev);
+ /* If the device is already attached to vhci_hcd - bail out */
+ if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+ err("bind loop detected: device: %s is attached to %s\n",
+ devpath, USBIP_VHCI_DRV_NAME);
+ return -1;
+ }
+
rc = unbind_other(busid);
if (rc == UNBIND_ST_FAILED) {
err("could not unbind driver from device on busid %s", busid);
--
2.14.1
If ubifs_wbuf_sync() fails we must not write a master node with the
dirty marker cleared.
Otherwise it is possible that in case of an IO error while syncing we
mark the filesystem as clean and UBIFS refuses to recover upon next
mount.
Cc: <stable(a)vger.kernel.org>
Fixes: 1e51764a3c2a ("UBIFS: add new flash file system")
Signed-off-by: Richard Weinberger <richard(a)nod.at>
---
fs/ubifs/super.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 0beb285b143d..468bca452d0a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1739,8 +1739,11 @@ static void ubifs_remount_ro(struct ubifs_info *c)
dbg_save_space_info(c);
- for (i = 0; i < c->jhead_cnt; i++)
- ubifs_wbuf_sync(&c->jheads[i].wbuf);
+ for (i = 0; i < c->jhead_cnt; i++) {
+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+ if (err)
+ ubifs_ro_mode(c, err);
+ }
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
@@ -1806,8 +1809,11 @@ static void ubifs_put_super(struct super_block *sb)
int err;
/* Synchronize write-buffers */
- for (i = 0; i < c->jhead_cnt; i++)
- ubifs_wbuf_sync(&c->jheads[i].wbuf);
+ for (i = 0; i < c->jhead_cnt; i++) {
+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+ if (err)
+ ubifs_ro_mode(c, err);
+ }
/*
* We are being cleanly unmounted which means the
--
2.13.6
Hi Greg,
Could you please pick up commit 1b5c7ef3d0d0 ("drm/nouveau/disp/gf119:
add missing drive vfunc ptr") for the 4.14 series? It fixes
https://bugs.freedesktop.org/show_bug.cgi?id=103421 which seems to break
nouveau for everyone with a GF119 card.
This problem has also been reported twice in Debian[1,2], and the Debian
kernel team has already applied the patch.
Cheers,
Sven
1. https://bugs.debian.org/880660
2. https://bugs.debian.org/886727
This is a note to let you know that I've just added the patch titled
drm/nouveau/disp/gf119: add missing drive vfunc ptr
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
drm-nouveau-disp-gf119-add-missing-drive-vfunc-ptr.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 1b5c7ef3d0d0610bda9b63263f7c5b7178d11015 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark(a)gmail.com>
Date: Sat, 6 Jan 2018 10:59:41 -0500
Subject: drm/nouveau/disp/gf119: add missing drive vfunc ptr
From: Rob Clark <robdclark(a)gmail.com>
commit 1b5c7ef3d0d0610bda9b63263f7c5b7178d11015 upstream.
Fixes broken dp on GF119:
Call Trace:
? nvkm_dp_train_drive+0x183/0x2c0 [nouveau]
nvkm_dp_acquire+0x4f3/0xcd0 [nouveau]
nv50_disp_super_2_2+0x5d/0x470 [nouveau]
? nvkm_devinit_pll_set+0xf/0x20 [nouveau]
gf119_disp_super+0x19c/0x2f0 [nouveau]
process_one_work+0x193/0x3c0
worker_thread+0x35/0x3b0
kthread+0x125/0x140
? process_one_work+0x3c0/0x3c0
? kthread_park+0x60/0x60
ret_from_fork+0x25/0x30
Code: Bad RIP value.
RIP: (null) RSP: ffffb1e243e4bc38
CR2: 0000000000000000
Fixes: af85389c614a drm/nouveau/disp: shuffle functions around
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103421
Signed-off-by: Rob Clark <robdclark(a)gmail.com>
Signed-off-by: Ben Skeggs <bskeggs(a)redhat.com>
Cc: Sven Joachim <svenjoac(a)gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -174,6 +174,7 @@ gf119_sor = {
.links = gf119_sor_dp_links,
.power = g94_sor_dp_power,
.pattern = gf119_sor_dp_pattern,
+ .drive = gf119_sor_dp_drive,
.vcpi = gf119_sor_dp_vcpi,
.audio = gf119_sor_dp_audio,
.audio_sym = gf119_sor_dp_audio_sym,
Patches currently in stable-queue which might be from robdclark(a)gmail.com are
queue-4.14/drm-nouveau-disp-gf119-add-missing-drive-vfunc-ptr.patch
Patch 3 fixes the userspace segfaults caused by the PVCLOCK_FIXMAP user
mapping (which I've copied from the 3.2 kaiser patchset). I don't claim I fully
understand this so the fix might be too broad.
Andrea Arcangeli (1):
x86/mm/kaiser: remove paravirt clock warning
Juerg Haefliger (3):
Revert "x86: kvmclock: Disable use from vDSO if KPTI is enabled"
x86/kaiser: Add PVCLOCK_FIXMAP user mapping
x86/kaiser: Fix segfaults caused by the PVCLOCK_FIXMAP user mapping
Marcelo Tosatti (1):
kvmclock: export kvmclock clocksource and data pointers
arch/x86/include/asm/kvmclock.h | 6 ++++++
arch/x86/kernel/kvmclock.c | 9 +++------
arch/x86/mm/kaiser.c | 12 +++++++++++-
3 files changed, 20 insertions(+), 7 deletions(-)
create mode 100644 arch/x86/include/asm/kvmclock.h
--
2.14.1
This is a note to let you know that I've just added the patch titled
libnvdimm, btt: Fix an incompatibility in the log layout
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma(a)intel.com>
Date: Mon, 18 Dec 2017 09:28:39 -0700
Subject: libnvdimm, btt: Fix an incompatibility in the log layout
From: Vishal Verma <vishal.l.verma(a)intel.com>
commit 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 upstream.
Due to a spec misinterpretation, the Linux implementation of the BTT log
area had different padding scheme from other implementations, such as
UEFI and NVML.
This fixes the padding scheme, and defaults to it for new BTT layouts.
We attempt to detect the padding scheme in use when probing for an
existing BTT. If we detect the older/incompatible scheme, we continue
using it.
Reported-by: Juston Li <juston.li(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Fixes: 5212e11fde4d ("nd_btt: atomic sector updates")
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/nvdimm/btt.c | 203 ++++++++++++++++++++++++++++++++++++++++++---------
drivers/nvdimm/btt.h | 45 +++++++++++
2 files changed, 212 insertions(+), 36 deletions(-)
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -183,13 +183,13 @@ static int btt_map_read(struct arena_inf
return ret;
}
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
- struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+ struct log_group *log)
{
- WARN_ON(!ent);
+ WARN_ON(!log);
return arena_read_bytes(arena,
- arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
- 2 * LOG_ENT_SIZE);
+ arena->logoff + (lane * LOG_GRP_SIZE), log,
+ LOG_GRP_SIZE);
}
static struct dentry *debugfs_root;
@@ -229,6 +229,8 @@ static void arena_debugfs_init(struct ar
debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+ debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+ debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
}
static void btt_debugfs_init(struct btt *btt)
@@ -247,6 +249,11 @@ static void btt_debugfs_init(struct btt
}
}
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+ return le32_to_cpu(log->ent[log_idx].seq);
+}
+
/*
* This function accepts two log entries, and uses the
* sequence number to find the 'older' entry.
@@ -256,8 +263,10 @@ static void btt_debugfs_init(struct btt
*
* TODO The logic feels a bit kludge-y. make it better..
*/
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
{
+ int idx0 = a->log_index[0];
+ int idx1 = a->log_index[1];
int old;
/*
@@ -265,23 +274,23 @@ static int btt_log_get_old(struct log_en
* the next time, the following logic works out to put this
* (next) entry into [1]
*/
- if (ent[0].seq == 0) {
- ent[0].seq = cpu_to_le32(1);
+ if (log_seq(log, idx0) == 0) {
+ log->ent[idx0].seq = cpu_to_le32(1);
return 0;
}
- if (ent[0].seq == ent[1].seq)
+ if (log_seq(log, idx0) == log_seq(log, idx1))
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+ if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
- if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+ if (log_seq(log, idx0) < log_seq(log, idx1)) {
+ if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
old = 0;
else
old = 1;
} else {
- if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+ if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
old = 1;
else
old = 0;
@@ -306,17 +315,18 @@ static int btt_log_read(struct arena_inf
{
int ret;
int old_ent, ret_ent;
- struct log_entry log[2];
+ struct log_group log;
- ret = btt_log_read_pair(arena, lane, log);
+ ret = btt_log_group_read(arena, lane, &log);
if (ret)
return -EIO;
- old_ent = btt_log_get_old(log);
+ old_ent = btt_log_get_old(arena, &log);
if (old_ent < 0 || old_ent > 1) {
dev_info(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n",
- old_ent, lane, log[0].seq, log[1].seq);
+ old_ent, lane, log.ent[arena->log_index[0]].seq,
+ log.ent[arena->log_index[1]].seq);
/* TODO set error state? */
return -EIO;
}
@@ -324,7 +334,7 @@ static int btt_log_read(struct arena_inf
ret_ent = (old_flag ? old_ent : (1 - old_ent));
if (ent != NULL)
- memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+ memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
return ret_ent;
}
@@ -338,17 +348,13 @@ static int __btt_log_write(struct arena_
u32 sub, struct log_entry *ent)
{
int ret;
- /*
- * Ignore the padding in log_entry for calculating log_half.
- * The entry is 'committed' when we write the sequence number,
- * and we want to ensure that that is the last thing written.
- * We don't bother writing the padding as that would be extra
- * media wear and write amplification
- */
- unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
- u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+ u32 group_slot = arena->log_index[sub];
+ unsigned int log_half = LOG_ENT_SIZE / 2;
void *src = ent;
+ u64 ns_off;
+ ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+ (group_slot * LOG_ENT_SIZE);
/* split the 16B write into atomic, durable halves */
ret = arena_write_bytes(arena, ns_off, src, log_half);
if (ret)
@@ -419,16 +425,16 @@ static int btt_log_init(struct arena_inf
{
int ret;
u32 i;
- struct log_entry log, zerolog;
+ struct log_entry ent, zerolog;
memset(&zerolog, 0, sizeof(zerolog));
for (i = 0; i < arena->nfree; i++) {
- log.lba = cpu_to_le32(i);
- log.old_map = cpu_to_le32(arena->external_nlba + i);
- log.new_map = cpu_to_le32(arena->external_nlba + i);
- log.seq = cpu_to_le32(LOG_SEQ_INIT);
- ret = __btt_log_write(arena, i, 0, &log);
+ ent.lba = cpu_to_le32(i);
+ ent.old_map = cpu_to_le32(arena->external_nlba + i);
+ ent.new_map = cpu_to_le32(arena->external_nlba + i);
+ ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+ ret = __btt_log_write(arena, i, 0, &ent);
if (ret)
return ret;
ret = __btt_log_write(arena, i, 1, &zerolog);
@@ -490,6 +496,123 @@ static int btt_freelist_init(struct aren
return 0;
}
+static bool ent_is_padding(struct log_entry *ent)
+{
+ return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+ && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+ bool idx_set = false, initial_state = true;
+ int ret, log_index[2] = {-1, -1};
+ u32 i, j, next_idx = 0;
+ struct log_group log;
+ u32 pad_count = 0;
+
+ for (i = 0; i < arena->nfree; i++) {
+ ret = btt_log_group_read(arena, i, &log);
+ if (ret < 0)
+ return ret;
+
+ for (j = 0; j < 4; j++) {
+ if (!idx_set) {
+ if (ent_is_padding(&log.ent[j])) {
+ pad_count++;
+ continue;
+ } else {
+ /* Skip if index has been recorded */
+ if ((next_idx == 1) &&
+ (j == log_index[0]))
+ continue;
+ /* valid entry, record index */
+ log_index[next_idx] = j;
+ next_idx++;
+ }
+ if (next_idx == 2) {
+ /* two valid entries found */
+ idx_set = true;
+ } else if (next_idx > 2) {
+ /* too many valid indices */
+ return -ENXIO;
+ }
+ } else {
+ /*
+ * once the indices have been set, just verify
+ * that all subsequent log groups are either in
+ * their initial state or follow the same
+ * indices.
+ */
+ if (j == log_index[0]) {
+ /* entry must be 'valid' */
+ if (ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ } else if (j == log_index[1]) {
+ ;
+ /*
+ * log_index[1] can be padding if the
+ * lane never got used and it is still
+ * in the initial state (three 'padding'
+ * entries)
+ */
+ } else {
+ /* entry must be invalid (padding) */
+ if (!ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ }
+ }
+ }
+ /*
+ * If any of the log_groups have more than one valid,
+ * non-padding entry, then the we are no longer in the
+ * initial_state
+ */
+ if (pad_count < 3)
+ initial_state = false;
+ pad_count = 0;
+ }
+
+ if (!initial_state && !idx_set)
+ return -ENXIO;
+
+ /*
+ * If all the entries in the log were in the initial state,
+ * assume new padding scheme
+ */
+ if (initial_state)
+ log_index[1] = 1;
+
+ /*
+ * Only allow the known permutations of log/padding indices,
+ * i.e. (0, 1), and (0, 2)
+ */
+ if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+ ; /* known index possibilities */
+ else {
+ dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+ return -ENXIO;
+ }
+
+ arena->log_index[0] = log_index[0];
+ arena->log_index[1] = log_index[1];
+ dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+ dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+ return 0;
+}
+
static int btt_rtt_init(struct arena_info *arena)
{
arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -545,8 +668,7 @@ static struct arena_info *alloc_arena(st
available -= 2 * BTT_PG_SIZE;
/* The log takes a fixed amount of space based on nfree */
- logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
- BTT_PG_SIZE);
+ logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
available -= logsize;
/* Calculate optimal split between map and data area */
@@ -563,6 +685,10 @@ static struct arena_info *alloc_arena(st
arena->mapoff = arena->dataoff + datasize;
arena->logoff = arena->mapoff + mapsize;
arena->info2off = arena->logoff + logsize;
+
+ /* Default log indices are (0,1) */
+ arena->log_index[0] = 0;
+ arena->log_index[1] = 1;
return arena;
}
@@ -653,6 +779,13 @@ static int discover_arenas(struct btt *b
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
+ ret = log_set_indices(arena);
+ if (ret) {
+ dev_err(to_dev(arena),
+ "Unable to deduce log/padding indices\n");
+ goto out;
+ }
+
ret = btt_freelist_init(arena);
if (ret)
goto out;
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -26,6 +26,7 @@
#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
#define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
#define LOG_ENT_SIZE sizeof(struct log_entry)
#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -44,12 +45,52 @@ enum btt_init_state {
INIT_READY
};
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | pad | pad |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
struct log_entry {
__le32 lba;
__le32 old_map;
__le32 new_map;
__le32 seq;
- __le64 padding[2];
+};
+
+struct log_group {
+ struct log_entry ent[4];
};
struct btt_sb {
@@ -117,6 +158,7 @@ struct aligned_lock {
* @list: List head for list of arenas
* @debugfs_dir: Debugfs dentry
* @flags: Arena flags - may signify error states.
+ * @log_index: Indices of the valid log entries in a log_group
*
* arena_info is a per-arena handle. Once an arena is narrowed down for an
* IO, this struct is passed around for the duration of the IO.
@@ -147,6 +189,7 @@ struct arena_info {
struct dentry *debugfs_dir;
/* Arena flags */
u32 flags;
+ int log_index[2];
};
/**
Patches currently in stable-queue which might be from vishal.l.verma(a)intel.com are
queue-4.9/libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch
From: Rabin Vincent <rabinv(a)axis.com>
softirq time accounting is broken on v4.9.x if ksoftirqd runs.
With
CONFIG_IRQ_TIME_ACCOUNTING=y
# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
this test code:
struct tasklet_struct tasklet;
static void delay_tasklet(unsigned long data)
{
udelay(10);
tasklet_schedule(&tasklet);
}
tasklet_init(&tasklet, delay_tasklet, 0);
tasklet_schedule(&tasklet);
results in:
$ while :; do grep cpu0 /proc/stat; done
cpu0 5 0 80 25 16 107 1 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 80 25 16 107 0 0 0 0
cpu0 5 0 81 25 16 107 0 0 0 0
cpu0 5 0 81 25 16 107 1 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 18446744073709551615 0 0 0
cpu0 5 0 81 25 16 108 0 0 0 0
cpu0 6 0 81 25 16 108 0 0 0 0
cpu0 6 0 81 25 16 108 0 0 0 0
As can be seen, the softirq numbers are totally bogus.
When ksoftirq is running, irqtime_account_process_tick() increments
cpustat[CPUSTAT_SOFTIRQ]. This causes the "nsecs_to_cputime64(irqtime)
- cpustat[CPUSTAT_SOFTIRQ]" calculation in irqtime_account_update() to
underflow the next time a softirq is handled leading to the above
values.
The underflow bug was added by 57430218317e5b280 ("sched/cputime: Count
actually elapsed irq & softirq time").
But ksoftirqd accounting was wrong even in earlier kernels. In earlier
kernels, after a ksoftirq run, the kernel would simply stop accounting
softirq time spent outside of ksoftirqd until that (accumulated) time
exceeded the time for which ksofirqd previously had run.
Fix both the underflow and the wrong accounting by using a counter
specifically for the non-ksoftirqd softirq case.
This code has been fixed in current mainline by a499a5a14db
("sched/cputime: Increment kcpustat directly on irqtime account") [note
also the followup 25e2d8c1b9e327e ("sched/cputime: Fix ksoftirqd cputime
accounting regression")], but that patch is a part of the many changes
for eliminating of cputime_t so it does not seem suitable for backport.
Signed-off-by: Rabin Vincent <rabinv(a)axis.com>
---
include/linux/kernel_stat.h | 1 +
kernel/sched/cputime.c | 9 ++++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44fda64..d0826f1 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -33,6 +33,7 @@ enum cpu_usage_stat {
struct kernel_cpustat {
u64 cpustat[NR_STATS];
+ u64 softirq_no_ksoftirqd;
};
struct kernel_stat {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5ebee31..1b5a9e6 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -73,12 +73,19 @@ EXPORT_SYMBOL_GPL(irqtime_account_irq);
static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
+ u64 base = cpustat[idx];
cputime_t irq_cputime;
- irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
+ if (idx == CPUTIME_SOFTIRQ)
+ base = kcpustat_this_cpu->softirq_no_ksoftirqd;
+
+ irq_cputime = nsecs_to_cputime64(irqtime) - base;
irq_cputime = min(irq_cputime, maxtime);
cpustat[idx] += irq_cputime;
+ if (idx == CPUTIME_SOFTIRQ)
+ kcpustat_this_cpu->softirq_no_ksoftirqd += irq_cputime;
+
return irq_cputime;
}
--
2.1.4
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma(a)intel.com>
Date: Mon, 18 Dec 2017 09:28:39 -0700
Subject: [PATCH] libnvdimm, btt: Fix an incompatibility in the log layout
Due to a spec misinterpretation, the Linux implementation of the BTT log
area had different padding scheme from other implementations, such as
UEFI and NVML.
This fixes the padding scheme, and defaults to it for new BTT layouts.
We attempt to detect the padding scheme in use when probing for an
existing BTT. If we detect the older/incompatible scheme, we continue
using it.
Reported-by: Juston Li <juston.li(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Fixes: 5212e11fde4d ("nd_btt: atomic sector updates")
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index e949e3302af4..c586bcdb5190 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
return ret;
}
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
- struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+ struct log_group *log)
{
return arena_read_bytes(arena,
- arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
- 2 * LOG_ENT_SIZE, 0);
+ arena->logoff + (lane * LOG_GRP_SIZE), log,
+ LOG_GRP_SIZE, 0);
}
static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+ debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+ debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
}
static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
}
}
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+ return le32_to_cpu(log->ent[log_idx].seq);
+}
+
/*
* This function accepts two log entries, and uses the
* sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
*
* TODO The logic feels a bit kludge-y. make it better..
*/
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
{
+ int idx0 = a->log_index[0];
+ int idx1 = a->log_index[1];
int old;
/*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
* the next time, the following logic works out to put this
* (next) entry into [1]
*/
- if (ent[0].seq == 0) {
- ent[0].seq = cpu_to_le32(1);
+ if (log_seq(log, idx0) == 0) {
+ log->ent[idx0].seq = cpu_to_le32(1);
return 0;
}
- if (ent[0].seq == ent[1].seq)
+ if (log_seq(log, idx0) == log_seq(log, idx1))
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+ if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
- if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+ if (log_seq(log, idx0) < log_seq(log, idx1)) {
+ if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
old = 0;
else
old = 1;
} else {
- if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+ if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
old = 1;
else
old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
{
int ret;
int old_ent, ret_ent;
- struct log_entry log[2];
+ struct log_group log;
- ret = btt_log_read_pair(arena, lane, log);
+ ret = btt_log_group_read(arena, lane, &log);
if (ret)
return -EIO;
- old_ent = btt_log_get_old(log);
+ old_ent = btt_log_get_old(arena, &log);
if (old_ent < 0 || old_ent > 1) {
dev_err(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n",
- old_ent, lane, log[0].seq, log[1].seq);
+ old_ent, lane, log.ent[arena->log_index[0]].seq,
+ log.ent[arena->log_index[1]].seq);
/* TODO set error state? */
return -EIO;
}
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
ret_ent = (old_flag ? old_ent : (1 - old_ent));
if (ent != NULL)
- memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+ memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
return ret_ent;
}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
u32 sub, struct log_entry *ent, unsigned long flags)
{
int ret;
- /*
- * Ignore the padding in log_entry for calculating log_half.
- * The entry is 'committed' when we write the sequence number,
- * and we want to ensure that that is the last thing written.
- * We don't bother writing the padding as that would be extra
- * media wear and write amplification
- */
- unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
- u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+ u32 group_slot = arena->log_index[sub];
+ unsigned int log_half = LOG_ENT_SIZE / 2;
void *src = ent;
+ u64 ns_off;
+ ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+ (group_slot * LOG_ENT_SIZE);
/* split the 16B write into atomic, durable halves */
ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
{
size_t logsize = arena->info2off - arena->logoff;
size_t chunk_size = SZ_4K, offset = 0;
- struct log_entry log;
+ struct log_entry ent;
void *zerobuf;
int ret;
u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
}
for (i = 0; i < arena->nfree; i++) {
- log.lba = cpu_to_le32(i);
- log.old_map = cpu_to_le32(arena->external_nlba + i);
- log.new_map = cpu_to_le32(arena->external_nlba + i);
- log.seq = cpu_to_le32(LOG_SEQ_INIT);
- ret = __btt_log_write(arena, i, 0, &log, 0);
+ ent.lba = cpu_to_le32(i);
+ ent.old_map = cpu_to_le32(arena->external_nlba + i);
+ ent.new_map = cpu_to_le32(arena->external_nlba + i);
+ ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+ ret = __btt_log_write(arena, i, 0, &ent, 0);
if (ret)
goto free;
}
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
return 0;
}
+static bool ent_is_padding(struct log_entry *ent)
+{
+ return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+ && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+ bool idx_set = false, initial_state = true;
+ int ret, log_index[2] = {-1, -1};
+ u32 i, j, next_idx = 0;
+ struct log_group log;
+ u32 pad_count = 0;
+
+ for (i = 0; i < arena->nfree; i++) {
+ ret = btt_log_group_read(arena, i, &log);
+ if (ret < 0)
+ return ret;
+
+ for (j = 0; j < 4; j++) {
+ if (!idx_set) {
+ if (ent_is_padding(&log.ent[j])) {
+ pad_count++;
+ continue;
+ } else {
+ /* Skip if index has been recorded */
+ if ((next_idx == 1) &&
+ (j == log_index[0]))
+ continue;
+ /* valid entry, record index */
+ log_index[next_idx] = j;
+ next_idx++;
+ }
+ if (next_idx == 2) {
+ /* two valid entries found */
+ idx_set = true;
+ } else if (next_idx > 2) {
+ /* too many valid indices */
+ return -ENXIO;
+ }
+ } else {
+ /*
+ * once the indices have been set, just verify
+ * that all subsequent log groups are either in
+ * their initial state or follow the same
+ * indices.
+ */
+ if (j == log_index[0]) {
+ /* entry must be 'valid' */
+ if (ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ } else if (j == log_index[1]) {
+ ;
+ /*
+ * log_index[1] can be padding if the
+ * lane never got used and it is still
+ * in the initial state (three 'padding'
+ * entries)
+ */
+ } else {
+ /* entry must be invalid (padding) */
+ if (!ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ }
+ }
+ }
+ /*
+ * If any of the log_groups have more than one valid,
+ * non-padding entry, then the we are no longer in the
+ * initial_state
+ */
+ if (pad_count < 3)
+ initial_state = false;
+ pad_count = 0;
+ }
+
+ if (!initial_state && !idx_set)
+ return -ENXIO;
+
+ /*
+ * If all the entries in the log were in the initial state,
+ * assume new padding scheme
+ */
+ if (initial_state)
+ log_index[1] = 1;
+
+ /*
+ * Only allow the known permutations of log/padding indices,
+ * i.e. (0, 1), and (0, 2)
+ */
+ if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+ ; /* known index possibilities */
+ else {
+ dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+ return -ENXIO;
+ }
+
+ arena->log_index[0] = log_index[0];
+ arena->log_index[1] = log_index[1];
+ dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+ dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+ return 0;
+}
+
static int btt_rtt_init(struct arena_info *arena)
{
arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
available -= 2 * BTT_PG_SIZE;
/* The log takes a fixed amount of space based on nfree */
- logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
- BTT_PG_SIZE);
+ logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
available -= logsize;
/* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
arena->mapoff = arena->dataoff + datasize;
arena->logoff = arena->mapoff + mapsize;
arena->info2off = arena->logoff + logsize;
+
+ /* Default log indices are (0,1) */
+ arena->log_index[0] = 0;
+ arena->log_index[1] = 1;
return arena;
}
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
+ ret = log_set_indices(arena);
+ if (ret) {
+ dev_err(to_dev(arena),
+ "Unable to deduce log/padding indices\n");
+ goto out;
+ }
+
mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena);
if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 884fbbbdd18a..db3cb6d4d0d4 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
#define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
#define LOG_ENT_SIZE sizeof(struct log_entry)
#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
INIT_READY
};
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | pad | pad |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
struct log_entry {
__le32 lba;
__le32 old_map;
__le32 new_map;
__le32 seq;
- __le64 padding[2];
+};
+
+struct log_group {
+ struct log_entry ent[4];
};
struct btt_sb {
@@ -126,6 +167,7 @@ struct aligned_lock {
* @debugfs_dir: Debugfs dentry
* @flags: Arena flags - may signify error states.
* @err_lock: Mutex for synchronizing error clearing.
+ * @log_index: Indices of the valid log entries in a log_group
*
* arena_info is a per-arena handle. Once an arena is narrowed down for an
* IO, this struct is passed around for the duration of the IO.
@@ -158,6 +200,7 @@ struct arena_info {
/* Arena flags */
u32 flags;
struct mutex err_lock;
+ int log_index[2];
};
/**
This is a note to let you know that I've just added the patch titled
x86/retpoline/xen: Convert Xen hypercall indirect jumps
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From ea08816d5b185ab3d09e95e393f265af54560350 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Thu, 11 Jan 2018 21:46:31 +0000
Subject: x86/retpoline/xen: Convert Xen hypercall indirect jumps
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit ea08816d5b185ab3d09e95e393f265af54560350 upstream.
Convert indirect call in Xen hypercall to use non-speculative sequence,
when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Arjan van de Ven <arjan(a)linux.intel.com>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co…
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/xen/hypercall.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -215,9 +216,9 @@ privcmd_call(unsigned call,
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline: Remove compile time warning
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-remove-compile-time-warning.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Sun, 14 Jan 2018 22:13:29 +0100
Subject: x86/retpoline: Remove compile time warning
From: Thomas Gleixner <tglx(a)linutronix.de>
commit b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 upstream.
Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler
does not have retpoline support. Linus rationale for this is:
It's wrong because it will just make people turn off RETPOLINE, and the
asm updates - and return stack clearing - that are independent of the
compiler are likely the most important parts because they are likely the
ones easiest to target.
And it's annoying because most people won't be able to do anything about
it. The number of people building their own compiler? Very small. So if
their distro hasn't got a compiler yet (and pretty much nobody does), the
warning is just annoying crap.
It is already properly reported as part of the sysfs interface. The
compile-time warning only encourages bad things.
Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support")
Requested-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uA…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/Makefile | 2 --
1 file changed, 2 deletions(-)
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -194,8 +194,6 @@ ifdef CONFIG_RETPOLINE
RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
ifneq ($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
- else
- $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.)
endif
endif
Patches currently in stable-queue which might be from tglx(a)linutronix.de are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-asm-use-register-variable-to-get-stack-pointer-value.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
queue-4.4/x86-asm-make-asm-alternative.h-safe-from-assembly.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline/hyperv: Convert assembler indirect jumps
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From e70e5892b28c18f517f29ab6e83bd57705104b31 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Thu, 11 Jan 2018 21:46:30 +0000
Subject: x86/retpoline/hyperv: Convert assembler indirect jumps
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit e70e5892b28c18f517f29ab6e83bd57705104b31 upstream.
Convert all indirect jumps in hyperv inline asm code to use non-speculative
sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Arjan van de Ven <arjan(a)linux.intel.com>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.…
[ backport to 4.4, hopefully correct, not tested... - gregkh ]
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/hv/hv.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -31,6 +31,7 @@
#include <linux/clockchips.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
+#include <asm/nospec-branch.h>
#include "hyperv_vmbus.h"
/* The one and only */
@@ -103,9 +104,10 @@ static u64 do_hypercall(u64 control, voi
return (u64)ULLONG_MAX;
__asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
+ __asm__ __volatile__(CALL_NOSPEC :
+ "=a" (hv_status) :
"c" (control), "d" (input_address),
- "m" (hypercall_page));
+ THUNK_TARGET(hypercall_page));
return hv_status;
@@ -123,11 +125,12 @@ static u64 do_hypercall(u64 control, voi
if (!hypercall_page)
return (u64)ULLONG_MAX;
- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
+ __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
"=a"(hv_status_lo) : "d" (control_hi),
"a" (control_lo), "b" (input_address_hi),
"c" (input_address_lo), "D"(output_address_hi),
- "S"(output_address_lo), "m" (hypercall_page));
+ "S"(output_address_lo),
+ THUNK_TARGET(hypercall_page));
return hv_status_lo | ((u64)hv_status_hi << 32);
#endif /* !x86_64 */
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline/ftrace: Convert ftrace assembler indirect jumps
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 9351803bd803cdbeb9b5a7850b7b6f464806e3db Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Thu, 11 Jan 2018 21:46:29 +0000
Subject: x86/retpoline/ftrace: Convert ftrace assembler indirect jumps
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit 9351803bd803cdbeb9b5a7850b7b6f464806e3db upstream.
Convert all indirect jumps in ftrace assembler code to use non-speculative
sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Arjan van de Ven <arjan(a)linux.intel.com>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.…
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Razvan Ghitulete <rga(a)amazon.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_32.S | 5 +++--
arch/x86/kernel/mcount_64.S | 7 ++++---
2 files changed, 7 insertions(+), 5 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -862,7 +862,8 @@ trace:
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ CALL_NOSPEC %ecx
popl %edx
popl %ecx
@@ -897,7 +898,7 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ JMP_NOSPEC %ecx
#endif
#ifdef CONFIG_TRACING
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -7,7 +7,7 @@
#include <linux/linkage.h>
#include <asm/ptrace.h>
#include <asm/ftrace.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
@@ -285,8 +285,9 @@ trace:
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
restore_mcount_regs
jmp fgraph_trace
@@ -329,5 +330,5 @@ GLOBAL(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline: Fill return stack buffer on vmexit
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 117cc7a908c83697b0b737d15ae1eb5943afe35b Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Fri, 12 Jan 2018 11:11:27 +0000
Subject: x86/retpoline: Fill return stack buffer on vmexit
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream.
In accordance with the Intel and AMD documentation, we need to overwrite
all entries in the RSB on exiting a guest, to prevent malicious branch
target predictions from affecting the host kernel. This is needed both
for retpoline and for IBRS.
[ak: numbers again for the RSB stuffing labels]
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Razvan Ghitulete <rga(a)amazon.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/nospec-branch.h | 76 ++++++++++++++++++++++++++++++++++-
arch/x86/kvm/svm.c | 4 +
arch/x86/kvm/vmx.c | 4 +
3 files changed, 83 insertions(+), 1 deletion(-)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,6 +7,48 @@
#include <asm/alternative-asm.h>
#include <asm/cpufeature.h>
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
#ifdef __ASSEMBLY__
/*
@@ -61,6 +103,19 @@
#endif
.endm
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
#else /* __ASSEMBLY__ */
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
@@ -97,7 +152,7 @@
X86_FEATURE_RETPOLINE)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
-#else /* No retpoline */
+#else /* No retpoline for C / inline asm */
# define CALL_NOSPEC "call *%[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
@@ -112,5 +167,24 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+ unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+ asm volatile (ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=&r" (loops), ASM_CALL_CONSTRAINT
+ : "r" (loops) : "memory" );
+#endif
+}
#endif /* __ASSEMBLY__ */
#endif /* __NOSPEC_BRANCH_H__ */
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,6 +37,7 @@
#include <asm/desc.h>
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -3904,6 +3905,9 @@ static void svm_vcpu_run(struct kvm_vcpu
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -47,6 +47,7 @@
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include "trace.h"
#include "pmu.h"
@@ -8701,6 +8702,9 @@ static void __noclone vmx_vcpu_run(struc
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline/entry: Convert entry assembler indirect jumps
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 2641f08bb7fc63a636a2b18173221d7040a3512e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Thu, 11 Jan 2018 21:46:28 +0000
Subject: x86/retpoline/entry: Convert entry assembler indirect jumps
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit 2641f08bb7fc63a636a2b18173221d7040a3512e upstream.
Convert indirect jumps in core 32/64bit entry assembler code to use
non-speculative sequences when CONFIG_RETPOLINE is enabled.
Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return
address after the 'call' instruction must be *precisely* at the
.Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work,
and the use of alternatives will mess that up unless we play horrid
games to prepend with NOPs and make the variants the same length. It's
not worth it; in the case where we ALTERNATIVE out the retpoline, the
first instruction at __x86.indirect_thunk.rax is going to be a bare
jmp *%rax anyway.
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Acked-by: Arjan van de Ven <arjan(a)linux.intel.com>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.…
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Razvan Ghitulete <rga(a)amazon.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_32.S | 5 +++--
arch/x86/entry/entry_64.S | 14 +++++++++++++-
2 files changed, 16 insertions(+), 3 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
@@ -226,7 +227,7 @@ ENTRY(ret_from_kernel_thread)
pushl $0x0202 # Reset kernel eflags
popfl
movl PT_EBP(%esp), %eax
- call *PT_EBX(%esp)
+ CALL_NOSPEC PT_EBX(%esp)
movl $0, PT_EAX(%esp)
/*
@@ -938,7 +939,7 @@ error_code:
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ CALL_NOSPEC %edi
jmp ret_from_exception
END(page_fault)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/kaiser.h>
+#include <asm/nospec-branch.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -184,7 +185,13 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
+
movq %rax, RAX(%rsp)
1:
/*
@@ -276,7 +283,12 @@ tracesys_phase2:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx /* fixup for C */
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
movq %rax, RAX(%rsp)
1:
/* Use IRET because user could have changed pt_regs->foo */
@@ -491,7 +503,7 @@ ENTRY(ret_from_fork)
* nb: we depend on RESTORE_EXTRA_REGS above
*/
movq %rbp, %rdi
- call *%rbx
+ CALL_NOSPEC %rbx
movl $0, RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline/checksum32: Convert assembler indirect jumps
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 5096732f6f695001fa2d6f1335a2680b37912c69 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Thu, 11 Jan 2018 21:46:32 +0000
Subject: x86/retpoline/checksum32: Convert assembler indirect jumps
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit 5096732f6f695001fa2d6f1335a2680b37912c69 upstream.
Convert all indirect jumps in 32bit checksum assembler code to use
non-speculative sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Arjan van de Ven <arjan(a)linux.intel.com>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/lib/checksum_32.S | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -28,7 +28,8 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/asm.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
@@ -155,7 +156,7 @@ ENTRY(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -437,7 +438,7 @@ ENTRY(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
This is a note to let you know that I've just added the patch titled
x86/retpoline: Add initial retpoline support
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-retpoline-add-initial-retpoline-support.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 76b043848fd22dbf7f8bf3a1452f8c70d557b860 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw(a)amazon.co.uk>
Date: Thu, 11 Jan 2018 21:46:25 +0000
Subject: x86/retpoline: Add initial retpoline support
From: David Woodhouse <dwmw(a)amazon.co.uk>
commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream.
Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide
the corresponding thunks. Provide assembler macros for invoking the thunks
in the same way that GCC does, from native and inline assembler.
This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In
some circumstances, IBRS microcode features may be used instead, and the
retpoline can be disabled.
On AMD CPUs if lfence is serialising, the retpoline can be dramatically
simplified to a simple "lfence; jmp *\reg". A future patch, after it has
been verified that lfence really is serialising in all circumstances, can
enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition
to X86_FEATURE_RETPOLINE.
Do not align the retpoline in the altinstr section, because there is no
guarantee that it stays aligned when it's copied over the oldinstr during
alternative patching.
[ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks]
[ tglx: Put actual function CALL/JMP in front of the macros, convert to
symbolic labels ]
[ dwmw2: Convert back to numeric labels, merge objtool fixes ]
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Arjan van de Ven <arjan(a)linux.intel.com>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Cc: gnomes(a)lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: thomas.lendacky(a)amd.com
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Kees Cook <keescook(a)google.com>
Cc: Tim Chen <tim.c.chen(a)linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linux-foundation.org>
Cc: Paul Turner <pjt(a)google.com>
Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.…
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
[ 4.4 backport: removed objtool annotation since there is no objtool ]
Signed-off-by: Razvan Ghitulete <rga(a)amazon.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/Kconfig | 13 ++++
arch/x86/Makefile | 10 +++
arch/x86/include/asm/asm-prototypes.h | 25 ++++++++
arch/x86/include/asm/cpufeature.h | 2
arch/x86/include/asm/nospec-branch.h | 106 ++++++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 4 +
arch/x86/lib/Makefile | 1
arch/x86/lib/retpoline.S | 48 +++++++++++++++
8 files changed, 209 insertions(+)
create mode 100644 arch/x86/include/asm/nospec-branch.h
create mode 100644 arch/x86/lib/retpoline.S
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -379,6 +379,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ ---help---
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
if X86_32
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -189,6 +189,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ else
+ $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.)
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -10,7 +10,32 @@
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
+#include <asm/asm.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -200,6 +200,8 @@
#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */
/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeature.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+ jmp .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+ RETPOLINE_JMP \reg
+.Ldo_call_\@:
+ call .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC \
+ ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+ X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -837,6 +837,10 @@ static void __init early_identify_cpu(st
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+#ifdef CONFIG_RETPOLINE
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+#endif
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -21,6 +21,7 @@ lib-y += usercopy_$(BITS).o usercopy.o g
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
+#include <asm-generic/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
+ JMP_NOSPEC %\reg
+ CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
Patches currently in stable-queue which might be from dwmw(a)amazon.co.uk are
queue-4.4/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
queue-4.4/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
queue-4.4/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
queue-4.4/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
queue-4.4/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
queue-4.4/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
queue-4.4/x86-retpoline-remove-compile-time-warning.patch
queue-4.4/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
queue-4.4/x86-retpoline-add-initial-retpoline-support.patch
On Wed, Jan 17, 2018 at 10:38:30AM +0000, Harsh Shandilya wrote:
> On Wed 17 Jan, 2018, 3:11 PM Greg KH, <gregkh(a)linuxfoundation.org> wrote:
>
> > I'm announcing the release of the 3.18.92 kernel.
> >
> > All users of the 3.18 kernel series must upgrade.
> >
> > The updated 3.18.y git tree can be found at:
> > git://
> > git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git
> > linux-3.18.y
> > and can be browsed at the normal kernel.org git web browser:
> >
> > http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
> >
> > thanks,
> >
> > greg k-h
> >
>
> Builds and boots on the OnePlus 3T, no regressions noticed in general usage.
Great, thanks for testing and letting me know.
greg k-h
This is a note to let you know that I've just added the patch titled
x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From b8b7abaed7a49b350f8ba659ddc264b04931d581 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Sun, 17 Sep 2017 09:03:50 -0700
Subject: x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
From: Andy Lutomirski <luto(a)kernel.org>
commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream.
Otherwise we might have the PCID feature bit set during cpu_init().
This is just for robustness. I haven't seen any actual bugs here.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels")
Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.150566353…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/cpu/bugs.c | 8 --------
arch/x86/kernel/cpu/common.c | 8 ++++++++
2 files changed, 8 insertions(+), 8 deletions(-)
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -22,14 +22,6 @@
void __init check_bugs(void)
{
-#ifdef CONFIG_X86_32
- /*
- * Regardless of whether PCID is enumerated, the SDM says
- * that it can't be enabled in 32-bit mode.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
identify_boot_cpu();
if (!IS_ENABLED(CONFIG_SMP)) {
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -838,6 +838,14 @@ static void __init early_identify_cpu(st
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
}
void __init early_cpu_init(void)
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.4/x86-asm-use-register-variable-to-get-stack-pointer-value.patch
queue-4.4/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
queue-4.4/x86-asm-make-asm-alternative.h-safe-from-assembly.patch