From: Theodore Ts'o <tytso(a)mit.edu>
CVE-2018-1092
If the root directory has an i_links_count of zero, then when the file
system is mounted, then when ext4_fill_super() notices the problem and
tries to call iput() the root directory in the error return path,
ext4_evict_inode() will try to free the inode on disk, before all of
the file system structures are set up, and this will result in an OOPS
caused by a NULL pointer dereference.
This issue has been assigned CVE-2018-1092.
https://bugzilla.kernel.org/show_bug.cgi?id=199179https://bugzilla.redhat.com/show_bug.cgi?id=1560777
Reported-by: Wen Xu <wen.xu(a)gatech.edu>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
(cherry-picked from 8e4b5eae5decd9dfe5a4ee369c22028f90ab4c44)
Signed-off-by: Khalid Elmously <khalid.elmously(a)canonical.com>
---
fs/ext4/inode.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3d4d1dccc8a1..398faeff1938 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4745,6 +4745,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);
+ if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
+ EXT4_ERROR_INODE(inode, "root inode unallocated");
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
--
2.17.0
commit c3856aeb29402e94ad9b3879030165cc6a4fdc56 upstream.
This fixes several bugs in the radix page fault handler relating to
the way large pages in the memory backing the guest were handled.
First, the check for large pages only checked for explicit huge pages
and missed transparent huge pages. Then the check that the addresses
(host virtual vs. guest physical) had appropriate alignment was
wrong, meaning that the code never put a large page in the partition
scoped radix tree; it was always demoted to a small page.
Fixing this exposed bugs in kvmppc_create_pte(). We were never
invalidating a 2MB PTE, which meant that if a page was initially
faulted in without write permission and the guest then attempted
to store to it, we would never update the PTE to have write permission.
If we find a valid 2MB PTE in the PMD, we need to clear it and
do a TLB invalidation before installing either the new 2MB PTE or
a pointer to a page table page.
This also corrects an assumption that get_user_pages_fast would set
the _PAGE_DIRTY bit if we are writing, which is not true. Instead we
mark the page dirty explicitly with set_page_dirty_lock(). This
also means we don't need the dirty bit set on the host PTE when
providing write access on a read fault.
[paulus(a)ozlabs.org - use mark_pages_dirty instead of
kvmppc_update_dirty_map]
Signed-off-by: Paul Mackerras <paulus(a)ozlabs.org>
---
arch/powerpc/kvm/book3s_64_mmu_radix.c | 72 ++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 26 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index c5d7435455f1..27a41695fcfd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -19,6 +19,9 @@
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
+static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn, unsigned int order);
+
/*
* Supported radix tree geometry.
* Like p9, we support either 5 or 9 bits at the first (lowest) level,
@@ -195,6 +198,12 @@ static void kvmppc_pte_free(pte_t *ptep)
kmem_cache_free(kvm_pte_cache, ptep);
}
+/* Like pmd_huge() and pmd_large(), but works regardless of config options */
+static inline int pmd_is_leaf(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
unsigned int level, unsigned long mmu_seq)
{
@@ -219,7 +228,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
else
new_pmd = pmd_alloc_one(kvm->mm, gpa);
- if (level == 0 && !(pmd && pmd_present(*pmd)))
+ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
new_ptep = kvmppc_pte_alloc();
/* Check if we might have been invalidated; let the guest retry if so */
@@ -244,12 +253,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pmd = NULL;
}
pmd = pmd_offset(pud, gpa);
- if (pmd_large(*pmd)) {
- /* Someone else has instantiated a large page here; retry */
- ret = -EAGAIN;
- goto out_unlock;
- }
- if (level == 1 && !pmd_none(*pmd)) {
+ if (pmd_is_leaf(*pmd)) {
+ unsigned long lgpa = gpa & PMD_MASK;
+
+ /*
+ * If we raced with another CPU which has just put
+ * a 2MB pte in after we saw a pte page, try again.
+ */
+ if (level == 0 && !new_ptep) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ /* Valid 2MB page here already, remove it */
+ old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+ ~0UL, 0, lgpa, PMD_SHIFT);
+ kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
+ if (old & _PAGE_DIRTY) {
+ unsigned long gfn = lgpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot)
+ mark_pages_dirty(kvm, memslot, gfn,
+ PMD_SHIFT - PAGE_SHIFT);
+ }
+ } else if (level == 1 && !pmd_none(*pmd)) {
/*
* There's a page table page here, but we wanted
* to install a large page. Tell the caller and let
@@ -412,28 +439,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
} else {
page = pages[0];
pfn = page_to_pfn(page);
- if (PageHuge(page)) {
- page = compound_head(page);
- pte_size <<= compound_order(page);
+ if (PageCompound(page)) {
+ pte_size <<= compound_order(compound_head(page));
/* See if we can insert a 2MB large-page PTE here */
if (pte_size >= PMD_SIZE &&
- (gpa & PMD_MASK & PAGE_MASK) ==
- (hva & PMD_MASK & PAGE_MASK)) {
+ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+ (hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
}
}
/* See if we can provide write access */
if (writing) {
- /*
- * We assume gup_fast has set dirty on the host PTE.
- */
pgflags |= _PAGE_WRITE;
} else {
local_irq_save(flags);
ptep = find_current_mm_pte(current->mm->pgd,
hva, NULL, NULL);
- if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
+ if (ptep && pte_write(*ptep))
pgflags |= _PAGE_WRITE;
local_irq_restore(flags);
}
@@ -459,18 +482,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pte = pfn_pte(pfn, __pgprot(pgflags));
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
}
- if (ret == 0 || ret == -EAGAIN)
- ret = RESUME_GUEST;
if (page) {
- /*
- * We drop pages[0] here, not page because page might
- * have been set to the head page of a compound, but
- * we have to drop the reference on the correct tail
- * page to match the get inside gup()
- */
- put_page(pages[0]);
+ if (!ret && (pgflags & _PAGE_WRITE))
+ set_page_dirty_lock(page);
+ put_page(page);
}
+
+ if (ret == 0 || ret == -EAGAIN)
+ ret = RESUME_GUEST;
return ret;
}
@@ -676,7 +696,7 @@ void kvmppc_free_radix(struct kvm *kvm)
continue;
pmd = pmd_offset(pud, 0);
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
- if (pmd_huge(*pmd)) {
+ if (pmd_is_leaf(*pmd)) {
pmd_clear(pmd);
continue;
}
--
2.11.0
From: Finley Xiao <finley.xiao(a)rock-chips.com>
Solve the pd could only ever turn off but never turn them on again,
If the pd registers have the writemask bits.
Fix up the code error for commit:
commit 79bb17ce8edb3141339b5882e372d0ec7346217c
Author: Elaine Zhang <zhangqing(a)rock-chips.com>
Date: Fri Dec 23 11:47:52 2016 +0800
soc: rockchip: power-domain: Support domain control in hiword-registers
New Rockchips SoCs may have their power-domain control in registers
using a writemask-based access scheme (upper 16bit being the write
mask). So add a DOMAIN_M type and handle this case accordingly.
Signed-off-by: Elaine Zhang <zhangqing(a)rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko(a)sntech.de>
Signed-off-by: Finley Xiao <finley.xiao(a)rock-chips.com>
Signed-off-by: Elaine Zhang <zhangqing(a)rock-chips.com>
---
drivers/soc/rockchip/pm_domains.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index ebd7c41898c0..01d4ba26a054 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -264,7 +264,7 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
return;
else if (pd->info->pwr_w_mask)
regmap_write(pmu->regmap, pmu->info->pwr_offset,
- on ? pd->info->pwr_mask :
+ on ? pd->info->pwr_w_mask :
(pd->info->pwr_mask | pd->info->pwr_w_mask));
else
regmap_update_bits(pmu->regmap, pmu->info->pwr_offset,
--
1.9.1
commit a8b48a4dccea77e29462e59f1dbf0d5aa1ff167c upstream.
This fixes a bug where the trap number that is returned by
__kvmppc_vcore_entry gets corrupted. The effect of the corruption
is that IPIs get ignored on POWER9 systems when the IPI is sent via
a doorbell interrupt to a CPU which is executing in a KVM guest.
The effect of the IPI being ignored is often that another CPU locks
up inside smp_call_function_many() (and if that CPU is holding a
spinlock, other CPUs then lock up inside raw_spin_lock()).
The trap number is currently held in register r12 for most of the
assembly-language part of the guest exit path. In that path, we
call kvmppc_subcore_exit_guest(), which is a C function, without
restoring r12 afterwards. Depending on the kernel config and the
compiler, it may modify r12 or it may not, so some config/compiler
combinations see the bug and others don't.
To fix this, we arrange for the trap number to be stored on the
stack from the point where kvmhv_commence_exit is called until the
end of the function, then the trap number is loaded and returned in
r12 as before.
Cc: stable(a)vger.kernel.org # v4.8+
Fixes: fd7bacbca47a ("KVM: PPC: Book3S HV: Fix TB corruption in guest exit path on HMI interrupt")
Signed-off-by: Paul Mackerras <paulus(a)ozlabs.org>
---
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2b3194b9608f..663a398449b7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -308,7 +308,6 @@ kvm_novcpu_exit:
stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
@@ -1136,6 +1135,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
secondary_too_late:
li r12, 0
+ stw r12, STACK_SLOT_TRAP(r1)
cmpdi r4, 0
beq 11f
stw r12, VCPU_TRAP(r4)
@@ -1445,12 +1445,12 @@ mc_cont:
1:
#endif /* CONFIG_KVM_XICS */
+ stw r12, STACK_SLOT_TRAP(r1)
mr r3, r12
/* Increment exit count, poke other threads to exit */
bl kvmhv_commence_exit
nop
ld r9, HSTATE_KVM_VCPU(r13)
- lwz r12, VCPU_TRAP(r9)
/* Stop others sending VCPU interrupts to this physical CPU */
li r0, -1
@@ -1816,6 +1816,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
* have to coordinate the hardware threads.
+ * Here STACK_SLOT_TRAP(r1) contains the trap number.
*/
kvmhv_switch_to_host:
/* Secondary threads wait for primary to do partition switch */
@@ -1868,11 +1869,11 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* If HMI, call kvmppc_realmode_hmi_handler() */
+ lwz r12, STACK_SLOT_TRAP(r1)
cmpwi r12, BOOK3S_INTERRUPT_HMI
bne 27f
bl kvmppc_realmode_hmi_handler
nop
- li r12, BOOK3S_INTERRUPT_HMI
/*
* At this point kvmppc_realmode_hmi_handler would have resync-ed
* the TB. Hence it is not required to subtract guest timebase
@@ -1950,6 +1951,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
+ lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */
ld r0, SFS+PPC_LR_STKOFF(r1)
addi r1, r1, SFS
mtlr r0
--
2.11.0
Currently in ext4_punch_hole we're going to skip the mtime update if
there are no actual blocks to release. However we've actually modified
the file by zeroing the partial block so the mtime should be updated.
Moreover the sync and datasync handling is skipped as well, which is
also wrong. Fix it.
Signed-off-by: Lukas Czerner <lczerner(a)redhat.com>
Reported-by: Joe Habermann <joe.habermann(a)quantum.com>
Cc: <stable(a)vger.kernel.org>
---
fs/ext4/inode.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1e50c5e..6b4c5c0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4298,28 +4298,28 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
EXT4_BLOCK_SIZE_BITS(sb);
stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
- /* If there are no blocks to remove, return now */
- if (first_block >= stop_block)
- goto out_stop;
+ /* If there are blocks to remove, do it */
+ if (stop_block > first_block) {
- down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_preallocations(inode);
- ret = ext4_es_remove_extent(inode, first_block,
- stop_block - first_block);
- if (ret) {
- up_write(&EXT4_I(inode)->i_data_sem);
- goto out_stop;
- }
+ ret = ext4_es_remove_extent(inode, first_block,
+ stop_block - first_block);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto out_stop;
+ }
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ret = ext4_ext_remove_space(inode, first_block,
- stop_block - 1);
- else
- ret = ext4_ind_remove_space(handle, inode, first_block,
- stop_block);
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ ret = ext4_ext_remove_space(inode, first_block,
+ stop_block - 1);
+ else
+ ret = ext4_ind_remove_space(handle, inode, first_block,
+ stop_block);
- up_write(&EXT4_I(inode)->i_data_sem);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ }
if (IS_SYNC(inode))
ext4_handle_sync(handle);
--
2.7.5
When ext4_ind_map_blocks() computes a length of a hole, it doesn't count
with the fact that mapped offset may be somewhere in the middle of the
completely empty subtree. In such case it will return too large length
of the hole which then results in lseek(SEEK_DATA) to end up returning
an incorrect offset beyond the end of the hole.
Fix the problem by correctly taking offset within a subtree into account
when computing a length of a hole.
Fixes: facab4d9711e7aa3532cb82643803e8f1b9518e8
CC: stable(a)vger.kernel.org
Reported-by: Jeff Mahoney <jeffm(a)suse.com>
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/ext4/indirect.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
I'll submit corresponding fstest shortly.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index c32802c956d5..bf7fa1507e81 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -561,10 +561,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
int i;
- /* Count number blocks in a subtree under 'partial' */
- count = 1;
- for (i = 0; partial + i != chain + depth - 1; i++)
- count *= epb;
+ /*
+ * Count number blocks in a subtree under 'partial'. At each
+ * level we count number of complete empty subtrees beyond
+ * current offset and then descend into the subtree only
+ * partially beyond current offset.
+ */
+ count = 0;
+ for (i = partial - chain + 1; i < depth; i++)
+ count = count * epb + (epb - offsets[i] - 1);
+ count++;
/* Fill in size of a hole we found */
map->m_pblk = 0;
map->m_len = min_t(unsigned int, map->m_len, count);
--
2.13.6
Hi,
This is the 3rd version of bugfix series for kprobes on arm.
This series fixes 4 different issues which I found.
- Fix to use smp_processor_id() after disabling preemption.
- Prohibit probing on optimized_callback() for avoiding
recursive probe.
- Prohibit kprobes on do_undefinstr() by same reason.
- Prohibit kprobes on get_user() by same reason.
>From v2, I included another 2 bugfixes (1/4 and 2/4)
which are not merged yet, and added "Cc: stable(a)vger.kernel.org",
since there are obvious bugs.
Thanks,
---
Masami Hiramatsu (4):
arm: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed
arm: kprobes: Prohibit probing on optimized_callback
arm: kprobes: Prohibit kprobes on do_undefinstr
arm: kprobes: Prohibit kprobes on get_user functions
arch/arm/include/asm/assembler.h | 10 ++++++++++
arch/arm/kernel/traps.c | 5 ++++-
arch/arm/lib/getuser.S | 10 ++++++++++
arch/arm/probes/kprobes/opt-arm.c | 4 +++-
4 files changed, 27 insertions(+), 2 deletions(-)
--
Masami Hiramatsu (Linaro) <mhiramat(a)kernel.org>
This is the start of the stable review cycle for the 4.4.130 release.
There are 50 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun Apr 29 13:56:42 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.130-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.130-rc1
Heiko Carstens <heiko.carstens(a)de.ibm.com>
s390/uprobes: implement arch_uretprobe_is_alive()
Sebastian Ott <sebott(a)linux.ibm.com>
s390/cio: update chpid descriptor after resource accessibility event
Dan Carpenter <dan.carpenter(a)oracle.com>
cdrom: information leak in cdrom_ioctl_media_changed()
Martin K. Petersen <martin.petersen(a)oracle.com>
scsi: mptsas: Disable WRITE SAME
Eric Dumazet <edumazet(a)google.com>
ipv6: add RTA_TABLE and RTA_PREFSRC to rtm_ipv6_policy
Eric Dumazet <edumazet(a)google.com>
net: af_packet: fix race in PACKET_{R|T}X_RING
Eric Dumazet <edumazet(a)google.com>
tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets
Wolfgang Bumiller <w.bumiller(a)proxmox.com>
net: fix deadlock while clearing neighbor proxy table
Eric Dumazet <edumazet(a)google.com>
tipc: add policy for TIPC_NLA_NET_ADDR
Cong Wang <xiyou.wangcong(a)gmail.com>
llc: fix NULL pointer deref for SOCK_ZAPPED
Cong Wang <xiyou.wangcong(a)gmail.com>
llc: hold llc_sap before release_sock()
Xin Long <lucien.xin(a)gmail.com>
sctp: do not check port in sctp_inet6_cmp_addr
Toshiaki Makita <makita.toshiaki(a)lab.ntt.co.jp>
vlan: Fix reading memory beyond skb->tail in skb_vlan_tagged_multi
Guillaume Nault <g.nault(a)alphalink.fr>
pppoe: check sockaddr length in pppoe_connect()
Willem de Bruijn <willemb(a)google.com>
packet: fix bitfield update race
Xin Long <lucien.xin(a)gmail.com>
team: fix netconsole setup over team
Paolo Abeni <pabeni(a)redhat.com>
team: avoid adding twice the same option to the event list
Jann Horn <jannh(a)google.com>
tcp: don't read out-of-bounds opsize
Cong Wang <xiyou.wangcong(a)gmail.com>
llc: delete timers synchronously in llc_sk_free()
Eric Dumazet <edumazet(a)google.com>
net: validate attribute sizes in neigh_dump_table()
Guillaume Nault <g.nault(a)alphalink.fr>
l2tp: check sockaddr length in pppol2tp_connect()
Eric Biggers <ebiggers(a)google.com>
KEYS: DNS: limit the length of option strings
Xin Long <lucien.xin(a)gmail.com>
bonding: do not set slave_dev npinfo before slave_enable_netpoll in bond_enslave
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: correct module section names for expoline code revert
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: correct nospec auto detection init order
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: add sysfs attributes for spectre
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: report spectre mitigation via syslog
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: add automatic detection of the spectre defense
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: move nobp parameter functions to nospec-branch.c
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390/entry.S: fix spurious zeroing of r0
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: do not bypass BPENTER for interrupt system calls
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: Replace IS_ENABLED(EXPOLINE_*) with IS_ENABLED(CONFIG_EXPOLINE_*)
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: introduce execute-trampolines for branches
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: run user space and KVM guests with modified branch prediction
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: add options to change branch prediction behaviour for the kernel
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390/alternative: use a copy of the facility bit mask
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: add optimized array_index_mask_nospec
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: scrub registers on kernel entry and KVM exit
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
KVM: s390: wire up bpb feature
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: enable CPU alternatives unconditionally
Martin Schwidefsky <schwidefsky(a)de.ibm.com>
s390: introduce CPU alternatives
Karthikeyan Periyasamy <periyasa(a)codeaurora.org>
Revert "ath10k: send (re)assoc peer command when NSS changed"
Sahitya Tummala <stummala(a)codeaurora.org>
jbd2: fix use after free in kjournald2()
Felix Fietkau <nbd(a)nbd.name>
ath9k_hw: check if the chip failed to wake up
Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Input: drv260x - fix initializing overdrive voltage
Grant Grundler <grundler(a)chromium.org>
r8152: add Linksys USB3GIGV1 id
Chen Feng <puck.chen(a)hisilicon.com>
staging: ion : Donnot wakeup kswapd in ion system alloc
Jiri Olsa <jolsa(a)kernel.org>
perf: Return proper values for user stack errors
Xiaoming Gao <gxm.linux.kernel(a)gmail.com>
x86/tsc: Prevent 32bit truncation in calc_hpet_ref()
Steve French <smfrench(a)gmail.com>
cifs: do not allow creating sockets except with SMB1 posix exensions
-------------
Diffstat:
Documentation/kernel-parameters.txt | 3 +
Makefile | 4 +-
arch/s390/Kconfig | 47 +++++
arch/s390/Makefile | 10 ++
arch/s390/include/asm/alternative.h | 149 +++++++++++++++
arch/s390/include/asm/barrier.h | 24 +++
arch/s390/include/asm/facility.h | 18 ++
arch/s390/include/asm/kvm_host.h | 3 +-
arch/s390/include/asm/lowcore.h | 7 +-
arch/s390/include/asm/nospec-branch.h | 17 ++
arch/s390/include/asm/processor.h | 4 +
arch/s390/include/asm/thread_info.h | 4 +
arch/s390/include/uapi/asm/kvm.h | 3 +
arch/s390/kernel/Makefile | 5 +-
arch/s390/kernel/alternative.c | 112 ++++++++++++
arch/s390/kernel/early.c | 5 +
arch/s390/kernel/entry.S | 250 +++++++++++++++++++++++---
arch/s390/kernel/ipl.c | 1 +
arch/s390/kernel/module.c | 65 ++++++-
arch/s390/kernel/nospec-branch.c | 169 +++++++++++++++++
arch/s390/kernel/processor.c | 18 ++
arch/s390/kernel/setup.c | 14 +-
arch/s390/kernel/smp.c | 7 +-
arch/s390/kernel/uprobes.c | 9 +
arch/s390/kernel/vmlinux.lds.S | 37 ++++
arch/s390/kvm/kvm-s390.c | 13 +-
arch/x86/kernel/tsc.c | 2 +-
drivers/cdrom/cdrom.c | 2 +-
drivers/input/misc/drv260x.c | 2 +-
drivers/message/fusion/mptsas.c | 1 +
drivers/net/bonding/bond_main.c | 3 +-
drivers/net/ppp/pppoe.c | 4 +
drivers/net/team/team.c | 38 +++-
drivers/net/usb/cdc_ether.c | 10 ++
drivers/net/usb/r8152.c | 2 +
drivers/net/wireless/ath/ath10k/mac.c | 5 +-
drivers/net/wireless/ath/ath9k/hw.c | 4 +
drivers/s390/char/Makefile | 2 +
drivers/s390/cio/chsc.c | 14 +-
drivers/staging/android/ion/ion_system_heap.c | 2 +-
fs/cifs/dir.c | 9 +-
fs/jbd2/journal.c | 2 +-
include/linux/if_vlan.h | 7 +-
include/net/llc_conn.h | 1 +
include/uapi/linux/kvm.h | 1 +
kernel/events/core.c | 4 +-
net/core/dev.c | 2 +-
net/core/neighbour.c | 40 +++--
net/dns_resolver/dns_key.c | 13 +-
net/ipv4/tcp.c | 6 +-
net/ipv4/tcp_input.c | 7 +-
net/ipv6/route.c | 2 +
net/l2tp/l2tp_ppp.c | 7 +
net/llc/af_llc.c | 14 +-
net/llc/llc_c_ac.c | 9 +-
net/llc/llc_conn.c | 22 ++-
net/packet/af_packet.c | 88 ++++++---
net/packet/internal.h | 10 +-
net/sctp/ipv6.c | 60 +++----
net/tipc/net.c | 3 +-
60 files changed, 1228 insertions(+), 168 deletions(-)
commit ece1397cbc89c51914fae1aec729539cfd8bd62b upstream
Some variants of the Arm Cortex-55 cores (r0p0, r0p1, r1p0) suffer
from an erratum 1024718, which causes incorrect updates when DBM/AP
bits in a page table entry is modified without a break-before-make
sequence. The work around is to disable the hardware DBM feature
on the affected cores. The hardware Access Flag management features
is not affected.
The hardware DBM feature is a non-conflicting capability, i.e, the
kernel could handle cores using the feature and those without having
the features running at the same time. So this work around is detected
at early boot time, rather than delaying it until the CPUs are brought
up into the kernel with MMU turned on. This also avoids other complexities
with late CPUs turning online, with or without the hardware DBM features.
Cc: stable(a)vger.kernel.org # v4.9
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
---
Note: The upstream commit is on top of a reworked capability
infrastructure for arm64 heterogeneous systems, which allows
delaying the CPU model checks. This backport is based on the
original version of the patch [0], which checks the affected
CPU models during the early boot.
[0] https://lkml.kernel.org/r/20180116102323.3470-1-suzuki.poulose@arm.com
---
Documentation/arm64/silicon-errata.txt | 1 +
arch/arm64/Kconfig | 14 ++++++++++++
arch/arm64/include/asm/assembler.h | 40 ++++++++++++++++++++++++++++++++++
arch/arm64/include/asm/cputype.h | 5 +++++
arch/arm64/mm/proc.S | 5 +++++
5 files changed, 65 insertions(+)
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index d11af52..ac9489f 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -54,6 +54,7 @@ stable kernels.
| ARM | Cortex-A57 | #852523 | N/A |
| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
| ARM | Cortex-A72 | #853709 | N/A |
+| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 |
| ARM | MMU-500 | #841119,#826419 | N/A |
| | | | |
| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 90e58bb..d0df361 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -427,6 +427,20 @@ config ARM64_ERRATUM_843419
If unsure, say Y.
+config ARM64_ERRATUM_1024718
+ bool "Cortex-A55: 1024718: Update of DBM/AP bits without break before make might result in incorrect update"
+ default y
+ help
+ This option adds work around for Arm Cortex-A55 Erratum 1024718.
+
+ Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect
+ update of the hardware dirty bit when the DBM/AP bits are updated
+ without a break-before-make. The work around is to disable the usage
+ of hardware DBM locally on the affected cores. CPUs not affected by
+ erratum will continue to use the feature.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index e60375c..bfcfec3 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -25,6 +25,7 @@
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
+#include <asm/cputype.h>
#include <asm/page.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
@@ -435,4 +436,43 @@ alternative_endif
and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
.endm
+/*
+ * Check the MIDR_EL1 of the current CPU for a given model and a range of
+ * variant/revision. See asm/cputype.h for the macros used below.
+ *
+ * model: MIDR_CPU_MODEL of CPU
+ * rv_min: Minimum of MIDR_CPU_VAR_REV()
+ * rv_max: Maximum of MIDR_CPU_VAR_REV()
+ * res: Result register.
+ * tmp1, tmp2, tmp3: Temporary registers
+ *
+ * Corrupts: res, tmp1, tmp2, tmp3
+ * Returns: 0, if the CPU id doesn't match. Non-zero otherwise
+ */
+ .macro cpu_midr_match model, rv_min, rv_max, res, tmp1, tmp2, tmp3
+ mrs \res, midr_el1
+ mov_q \tmp1, (MIDR_REVISION_MASK | MIDR_VARIANT_MASK)
+ mov_q \tmp2, MIDR_CPU_MODEL_MASK
+ and \tmp3, \res, \tmp2 // Extract model
+ and \tmp1, \res, \tmp1 // rev & variant
+ mov_q \tmp2, \model
+ cmp \tmp3, \tmp2
+ cset \res, eq
+ cbz \res, .Ldone\@ // Model matches ?
+
+ .if (\rv_min != 0) // Skip min check if rv_min == 0
+ mov_q \tmp3, \rv_min
+ cmp \tmp1, \tmp3
+ cset \res, ge
+ .endif // \rv_min != 0
+ /* Skip rv_max check if rv_min == rv_max && rv_min != 0 */
+ .if ((\rv_min != \rv_max) || \rv_min == 0)
+ mov_q \tmp2, \rv_max
+ cmp \tmp1, \tmp2
+ cset \tmp2, le
+ and \res, \res, \tmp2
+ .endif
+.Ldone\@:
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 9ee3038..39d1db6 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -56,6 +56,9 @@
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
+#define MIDR_CPU_VAR_REV(var, rev) \
+ (((var) << MIDR_VARIANT_SHIFT) | (rev))
+
#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
MIDR_ARCHITECTURE_MASK)
@@ -74,6 +77,7 @@
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
+#define ARM_CPU_PART_CORTEX_A55 0xD05
#define ARM_CPU_PART_CORTEX_A57 0xD07
#define ARM_CPU_PART_CORTEX_A72 0xD08
#define ARM_CPU_PART_CORTEX_A53 0xD03
@@ -89,6 +93,7 @@
#define BRCM_CPU_PART_VULCAN 0x516
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 619da1c..66cce21 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -425,6 +425,11 @@ ENTRY(__cpu_setup)
cbz x9, 2f
cmp x9, #2
b.lt 1f
+#ifdef CONFIG_ARM64_ERRATUM_1024718
+ /* Disable hardware DBM on Cortex-A55 r0p0, r0p1 & r1p0 */
+ cpu_midr_match MIDR_CORTEX_A55, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, 0), x1, x2, x3, x4
+ cbnz x1, 1f
+#endif
orr x10, x10, #TCR_HD // hardware Dirty flag update
1: orr x10, x10, #TCR_HA // hardware Access flag update
2:
--
2.7.4
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9d219554d9bf59875b4e571a0392d620e8954879 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Date: Wed, 2 May 2018 10:52:55 -0700
Subject: [PATCH] drm/i915: Adjust eDP's logical vco in a reliable place.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
On intel_dp_compute_config() we were calculating the needed vco
for eDP on gen9 and we stashing it in
intel_atomic_state.cdclk.logical.vco
However few moments later on intel_modeset_checks() we fully
replace entire intel_atomic_state.cdclk.logical with
dev_priv->cdclk.logical fully overwriting the logical desired
vco for eDP on gen9.
So, with wrong VCO value we end up with wrong desired cdclk, but
also it will raise a lot of WARNs: On gen9, when we read
CDCLK_CTL to verify if we configured properly the desired
frequency the CD Frequency Select bits [27:26] == 10b can mean
337.5 or 308.57 MHz depending on the VCO. So if we have wrong
VCO value stashed we will believe the frequency selection didn't
stick and start to raise WARNs of cdclk mismatch.
[ 42.857519] [drm:intel_dump_cdclk_state [i915]] Changing CDCLK to 308571 kHz, VCO 8640000 kHz, ref 24000 kHz, bypass 24000 kHz, voltage level 0
[ 42.897269] cdclk state doesn't match!
[ 42.901052] WARNING: CPU: 5 PID: 1116 at drivers/gpu/drm/i915/intel_cdclk.c:2084 intel_set_cdclk+0x5d/0x110 [i915]
[ 42.938004] RIP: 0010:intel_set_cdclk+0x5d/0x110 [i915]
[ 43.155253] WARNING: CPU: 5 PID: 1116 at drivers/gpu/drm/i915/intel_cdclk.c:2084 intel_set_cdclk+0x5d/0x110 [i915]
[ 43.170277] [drm:intel_dump_cdclk_state [i915]] [hw state] 337500 kHz, VCO 8100000 kHz, ref 24000 kHz, bypass 24000 kHz, voltage level 0
[ 43.182566] [drm:intel_dump_cdclk_state [i915]] [sw state] 308571 kHz, VCO 8640000 kHz, ref 24000 kHz, bypass 24000 kHz, voltage level 0
v2: Move the entire eDP's vco logical adjustment to inside
the skl_modeset_calc_cdclk as suggested by Ville.
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Fixes: bb0f4aab0e76 ("drm/i915: Track full cdclk state for the logical and actual cdclk frequencies")
Cc: <stable(a)vger.kernel.org> # v4.12+
Link: https://patchwork.freedesktop.org/patch/msgid/20180502175255.5344-1-rodrigo…
(cherry picked from commit 3297234a05ab1e90091b0574db4c397ef0e90d5f)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 32d24c69da3c..704ddb4d3ca7 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -2302,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state)
return 0;
}
+static int skl_dpll0_vco(struct intel_atomic_state *intel_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev);
+ struct intel_crtc *crtc;
+ struct intel_crtc_state *crtc_state;
+ int vco, i;
+
+ vco = intel_state->cdclk.logical.vco;
+ if (!vco)
+ vco = dev_priv->skl_preferred_vco_freq;
+
+ for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
+ if (!crtc_state->base.enable)
+ continue;
+
+ if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP))
+ continue;
+
+ /*
+ * DPLL0 VCO may need to be adjusted to get the correct
+ * clock for eDP. This will affect cdclk as well.
+ */
+ switch (crtc_state->port_clock / 2) {
+ case 108000:
+ case 216000:
+ vco = 8640000;
+ break;
+ default:
+ vco = 8100000;
+ break;
+ }
+ }
+
+ return vco;
+}
+
static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
{
- struct drm_i915_private *dev_priv = to_i915(state->dev);
struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
int min_cdclk, cdclk, vco;
@@ -2312,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
if (min_cdclk < 0)
return min_cdclk;
- vco = intel_state->cdclk.logical.vco;
- if (!vco)
- vco = dev_priv->skl_preferred_vco_freq;
+ vco = skl_dpll0_vco(intel_state);
/*
* FIXME should also account for plane ratio
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 9a4a51e79fa1..b7b4cfdeb974 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1881,26 +1881,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
reduce_m_n);
}
- /*
- * DPLL0 VCO may need to be adjusted to get the correct
- * clock for eDP. This will affect cdclk as well.
- */
- if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) {
- int vco;
-
- switch (pipe_config->port_clock / 2) {
- case 108000:
- case 216000:
- vco = 8640000;
- break;
- default:
- vco = 8100000;
- break;
- }
-
- to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco;
- }
-
if (!HAS_DDI(dev_priv))
intel_dp_set_clock(encoder, pipe_config);