UBIFS's recovery code strictly assumes that a deleted inode will never
come back, therefore it removes all data which belongs to that inode
as soon it faces an inode with link count 0 in the replay list.
Before O_TMPFILE this assumption was perfectly fine. With O_TMPFILE
it can lead to data loss upon a power-cut.
Consider a journal with entries like:
0: inode X (nlink = 0) /* O_TMPFILE was created */
1: data for inode X /* Someone writes to the temp file */
2: inode X (nlink = 0) /* inode was changed, xattr, chmod, … */
3: inode X (nlink = 1) /* inode was re-linked via linkat() */
Upon replay of entry #2 UBIFS will drop all data that belongs to inode X,
this will lead to an empty file after mounting.
As solution for this problem, scan the replay list for a re-link entry
before dropping data.
Fixes: 474b93704f32 ("ubifs: Implement O_TMPFILE")
Cc: stable(a)vger.kernel.org
Reported-by: Russell Senior <russell(a)personaltelco.net>
Reported-by: Rafał Miłecki <zajec5(a)gmail.com>
Signed-off-by: Richard Weinberger <richard(a)nod.at>
---
Russel, Rafał,
please give this patch another testing.
I'll also run it on different test systems before merging.
Thanks,
//richard
---
fs/ubifs/replay.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 4844538eb926..65a780685b82 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -209,6 +209,34 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
return ubifs_tnc_remove_range(c, &min_key, &max_key);
}
+/**
+ * inode_relinked - check whether inode in question will be re-linked.
+ * @c: UBIFS file-system description object
+ * @rino: replay entry to test
+ *
+ * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1.
+ * This case needs special care, otherwise all references to the inode will
+ * be removed upon the first replay entry of an inode with link count 0
+ * is found.
+ */
+static bool inode_relinked(struct ubifs_info *c, struct replay_entry *rino)
+{
+ struct replay_entry *r = rino;
+
+ ubifs_assert(c, rino->deletion);
+ ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY);
+
+ list_for_each_entry_from(r, &c->replay_list, list) {
+ if (key_inum(c, &r->key) == key_inum(c, &rino->key) &&
+ r->deletion == 0) {
+ ubifs_assert(c, r->sqnum > rino->sqnum);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/**
* apply_replay_entry - apply a replay entry to the TNC.
* @c: UBIFS file-system description object
@@ -236,6 +264,11 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
{
ino_t inum = key_inum(c, &r->key);
+ if (inode_relinked(c, r)) {
+ err = 0;
+ break;
+ }
+
err = ubifs_tnc_remove_ino(c, inum);
break;
}
--
2.19.1
commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the
generic_write_sync prototype") reworked callers of generic_write_sync(),
and ended up dropping the error return for the directio path. Prior to
that commit, in dio_complete(), an error would be bubbled up the stack,
but after that commit, errors passed on to dio_complete were eaten up.
This was reported on the list earlier, and a fix was proposed in
https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but
never followed up with. We recently hit this bug in our testing where
fencing io errors, which were previously erroring out with EIO, were
being returned as success operations after this commit.
The fix proposed on the list earlier was a little short -- it would have
still called generic_write_sync() in case `ret` already contained an
error. This fix ensures generic_write_sync() is only called when
there's no pending error in the write.
CC: stable(a)vger.kernel.org
Reported-by: Ravi Nankani <rnankani(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
Signed-off-by: Torsten Mehlan <tomeh(a)amazon.de>
Signed-off-by: Uwe Dannowski <uwed(a)amazon.de>
Signed-off-by: Amit Shah <aams(a)amazon.de>
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
---
fs/direct-io.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 093fb54cd316..199146036093 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
*/
dio->iocb->ki_pos += transferred;
- if (dio->op == REQ_OP_WRITE)
- ret = generic_write_sync(dio->iocb, transferred);
+ if (ret > 0 && dio->op == REQ_OP_WRITE)
+ ret = generic_write_sync(dio->iocb, ret);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}
--
2.16.2
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
PageTransCompoundMap() returns true for hugetlbfs and THP
hugepages. This behaviour incorrectly leads to stage 2 faults for
unsupported hugepage sizes (e.g., 64K hugepage with 4K pages) to be
treated as THP faults.
Tighten the check to filter out hugetlbfs pages. This also leads to
consistently mapping all unsupported hugepage sizes as PTE level
entries at stage 2.
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Cc: Christoffer Dall <christoffer.dall(a)arm.com>
Cc: Marc Zyngier <marc.zyngier(a)arm.com>
Cc: stable(a)vger.kernel.org # v4.13+
---
virt/kvm/arm/mmu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 7e477b3cae5b..c23a1b323aad 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1231,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *ipap >> PAGE_SHIFT;
+ struct page *page = pfn_to_page(pfn);
- if (PageTransCompoundMap(pfn_to_page(pfn))) {
+ /*
+ * PageTransCompoungMap() returns true for THP and
+ * hugetlbfs. Make sure the adjustment is done only for THP
+ * pages.
+ */
+ if (!PageHuge(page) && PageTransCompoundMap(page)) {
unsigned long mask;
/*
* The address we faulted on is backed by a transparent huge
--
2.18.0
From: Waiman Long <longman(a)redhat.com>
[ Upstream commit 9506a7425b094d2f1d9c877ed5a78f416669269b ]
It was found that when debug_locks was turned off because of a problem
found by the lockdep code, the system performance could drop quite
significantly when the lock_stat code was also configured into the
kernel. For instance, parallel kernel build time on a 4-socket x86-64
server nearly doubled.
Further analysis into the cause of the slowdown traced back to the
frequent call to debug_locks_off() from the __lock_acquired() function
probably due to some inconsistent lockdep states with debug_locks
off. The debug_locks_off() function did an unconditional atomic xchg
to write a 0 value into debug_locks which had already been set to 0.
This led to severe cacheline contention in the cacheline that held
debug_locks. As debug_locks is being referenced in quite a few different
places in the kernel, this greatly slow down the system performance.
To prevent that trashing of debug_locks cacheline, lock_acquired()
and lock_contended() now checks the state of debug_locks before
proceeding. The debug_locks_off() function is also modified to check
debug_locks before calling __debug_locks_off().
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Paul E. McKenney <paulmck(a)linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Will Deacon <will.deacon(a)arm.com>
Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.c…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/locking/lockdep.c | 4 ++--
lib/debug_locks.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index f99008534275..fb90ca3a296e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3808,7 +3808,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
@@ -3828,7 +3828,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 96c4c633d95e..124fdf238b3d 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
*/
int debug_locks_off(void)
{
- if (__debug_locks_off()) {
+ if (debug_locks && __debug_locks_off()) {
if (!debug_locks_silent) {
console_verbose();
return 1;
--
2.17.1
From: Masahisa Kojima <masahisa.kojima(a)linaro.org>
[ Upstream commit 8d5b0bf611ec5b7618d5b772dddc93b8afa78cb8 ]
We observed that packets and bytes count are not reset
when user performs interface down. Eventually, tx queue is
exhausted and packets will not be sent out.
To avoid this problem, resets tx queue in ndo_stop.
Fixes: 533dd11a12f6 ("net: socionext: Add Synquacer NetSec driver")
Signed-off-by: Masahisa Kojima <masahisa.kojima(a)linaro.org>
Signed-off-by: Yoshitoyo Osaki <osaki.yoshitoyo(a)socionext.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/ethernet/socionext/netsec.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index e080d3e7c582..4d7d53fbc0ef 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -945,6 +945,9 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
dring->head = 0;
dring->tail = 0;
dring->pkt_cnt = 0;
+
+ if (id == NETSEC_RING_TX)
+ netdev_reset_queue(priv->ndev);
}
static void netsec_free_dring(struct netsec_priv *priv, int id)
--
2.17.1
We need to make sure, that the carrier check polling is disabled
while suspending. Otherwise we can end up with usbnet_read_cmd()
being issued when only usbnet_read_cmd_nopm() is allowed. If this
happens, read operations lock up.
Fixes: d69d169493 ("usbnet: smsc95xx: fix link detection for disabled autonegotiation")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Frieder Schrempf <frieder.schrempf(a)kontron.de>
---
drivers/net/usb/smsc95xx.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 262e7a3..3bc9633 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1592,6 +1592,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ cancel_delayed_work_sync(&pdata->carrier_check);
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
@@ -1840,6 +1842,11 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
*/
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+
+ if (ret)
+ schedule_delayed_work(&pdata->carrier_check,
+ CARRIER_CHECK_DELAY);
+
return ret;
}
--
2.7.4
Dear Friend,
My name is Mr. Edward Yuan, a consultant/broker. I know you might be a bit apprehensive because you do not know me. Nevertheless, I have a proposal on behalf of a client, a lucrative business that might be of mutual benefit to you.
If interested in this proposition please kindly and urgently contact me for more details.
Best Regards.
Mr. Edward Yuan.
---
This email has been checked for viruses by AVG.
https://www.avg.com
The patch titled
Subject: memory_hotplug: cond_resched in __remove_pages
has been added to the -mm tree. Its filename is
memory_hotplug-cond_resched-in-__remove_pages.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/memory_hotplug-cond_resched-in-__r…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/memory_hotplug-cond_resched-in-__r…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Michal Hocko <mhocko(a)suse.com>
Subject: memory_hotplug: cond_resched in __remove_pages
We have received a bug report that unbinding a large pmem (>1TB) can
result in a soft lockup:
[ 380.339203] NMI watchdog: BUG: soft lockup - CPU#9 stuck for 23s! [ndctl:4365]
[...]
[ 380.339316] Supported: Yes
[ 380.339318] CPU: 9 PID: 4365 Comm: ndctl Not tainted 4.12.14-94.40-default #1 SLE12-SP4
[ 380.339318] Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.01.00.0833.051120182255 05/11/2018
[ 380.339319] task: ffff9cce7d4410c0 task.stack: ffffbe9eb1bc4000
[ 380.339325] RIP: 0010:__put_page+0x62/0x80
[ 380.339326] RSP: 0018:ffffbe9eb1bc7d30 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10
[ 380.339327] RAX: 000040540081c0d3 RBX: ffffeb8f03557200 RCX: 000063af40000000
[ 380.339328] RDX: 0000000000000002 RSI: ffff9cce75bff498 RDI: ffff9e4a76072ff8
[ 380.339329] RBP: 0000000a43557200 R08: 0000000000000000 R09: ffffbe9eb1bc7bb0
[ 380.339329] R10: ffffbe9eb1bc7d08 R11: 0000000000000000 R12: ffff9e194a22a0e0
[ 380.339330] R13: ffff9cce7062fc10 R14: ffff9e194a22a0a0 R15: ffff9cce6559c0e0
[ 380.339331] FS: 00007fd132368880(0000) GS:ffff9cce7ea40000(0000) knlGS:0000000000000000
[ 380.339332] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 380.339332] CR2: 00000000020820a0 CR3: 000000017ef7a003 CR4: 00000000007606e0
[ 380.339333] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 380.339334] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 380.339334] PKRU: 55555554
[ 380.339334] Call Trace:
[ 380.339338] devm_memremap_pages_release+0x152/0x260
[ 380.339342] release_nodes+0x18d/0x1d0
[ 380.339347] device_release_driver_internal+0x160/0x210
[ 380.339350] unbind_store+0xb3/0xe0
[ 380.339355] kernfs_fop_write+0x102/0x180
[ 380.339358] __vfs_write+0x26/0x150
[ 380.339363] ? security_file_permission+0x3c/0xc0
[ 380.339364] vfs_write+0xad/0x1a0
[ 380.339366] SyS_write+0x42/0x90
[ 380.339370] do_syscall_64+0x74/0x150
[ 380.339375] entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[ 380.339377] RIP: 0033:0x7fd13166b3d0
It has been reported on an older (4.12) kernel but the current upstream
code doesn't cond_resched in the hot remove code at all and the given
range to remove might be really large. Fix the issue by calling
cond_resched once per memory section.
Link: http://lkml.kernel.org/r/20181031125840.23982-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Johannes Thumshirn <jthumshirn(a)suse.de>
Cc: Dan Williams <dan.j.williams(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/memory_hotplug.c~memory_hotplug-cond_resched-in-__remove_pages
+++ a/mm/memory_hotplug.c
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, un
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ cond_resched();
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
altmap);
map_offset = 0;
_
Patches currently in -mm which might be from mhocko(a)suse.com are
memory_hotplug-cond_resched-in-__remove_pages.patch
mm-thp-consolidate-thp-gfp-handling-into-alloc_hugepage_direct_gfpmask.patch
The patch titled
Subject: kbuild: fix kernel/bounds.c 'W=1' warning
has been removed from the -mm tree. Its filename was
kbuild-fix-kernel-boundsc-w=1-warning.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Arnd Bergmann <arnd(a)arndb.de>
Subject: kbuild: fix kernel/bounds.c 'W=1' warning
Building any configuration with 'make W=1' produces a warning:
kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes]
When also passing -Werror, this prevents us from building any other files.
Nobody ever calls the function, but we can't make it 'static' either
since we want the compiler output.
Calling it 'main' instead however avoids the warning, because gcc
does not insist on having a declaration for main.
Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Reported-by: Kieran Bingham <kieran.bingham+renesas(a)ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas(a)ideasonboard.com>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: Masahiro Yamada <yamada.masahiro(a)socionext.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/bounds.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/kernel/bounds.c~kbuild-fix-kernel-boundsc-w=1-warning
+++ a/kernel/bounds.c
@@ -13,7 +13,7 @@
#include <linux/log2.h>
#include <linux/spinlock_types.h>
-void foo(void)
+int main(void)
{
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
@@ -23,4 +23,6 @@ void foo(void)
#endif
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
/* End of constants */
+
+ return 0;
}
_
Patches currently in -mm which might be from arnd(a)arndb.de are
ocfs2-dlmglue-clean-up-timestamp-handling.patch
vfs-replace-current_kernel_time64-with-ktime-equivalent.patch
The patch titled
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
has been removed from the -mm tree. Its filename was
mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
In hmm_mirror_unregister(), mm->hmm is set to NULL and then
mmu_notifier_unregister_no_release() is called. That creates a small
window where mmu_notifier can call mmu_notifier_ops with mm->hmm equal to
NULL. Fix this by first unregistering mmu notifier callbacks and then
setting mm->hmm to NULL.
Similarly in hmm_register(), set mm->hmm before registering mmu_notifier
callbacks so callback functions always see mm->hmm set.
Link: http://lkml.kernel.org/r/20181019160442.18723-4-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hmm.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
--- a/mm/hmm.c~mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback
+++ a/mm/hmm.c
@@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct m
spin_lock_init(&hmm->lock);
hmm->mm = mm;
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
- kfree(hmm);
- return NULL;
- }
-
spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
@@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct m
cleanup = true;
spin_unlock(&mm->page_table_lock);
- if (cleanup) {
- mmu_notifier_unregister(&hmm->mmu_notifier, mm);
- kfree(hmm);
- }
+ if (cleanup)
+ goto error;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
+ goto error_mm;
return mm->hmm;
+
+error_mm:
+ spin_lock(&mm->page_table_lock);
+ if (mm->hmm == hmm)
+ mm->hmm = NULL;
+ spin_unlock(&mm->page_table_lock);
+error:
+ kfree(hmm);
+ return NULL;
}
void hmm_mm_destroy(struct mm_struct *mm)
@@ -278,12 +283,13 @@ void hmm_mirror_unregister(struct hmm_mi
if (!should_unregister || mm == NULL)
return;
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
spin_lock(&mm->page_table_lock);
if (mm->hmm == hmm)
mm->hmm = NULL;
spin_unlock(&mm->page_table_lock);
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
kfree(hmm);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
The patch titled
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
has been removed from the -mm tree. Its filename was
mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled in
check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back to any
private ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead to the
CPU constantly page fault looping on the special swap entry.
Link: http://lkml.kernel.org/r/20181019160442.18723-3-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_vma_mapped.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
--- a/mm/page_vma_mapped.c~mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3
+++ a/mm/page_vma_mapped.c
@@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapp
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
- if (!pte_present(*pvmw->pte))
+ /*
+ * We get here when we are trying to unmap a private
+ * device page from the process address space. Such
+ * page is not CPU accessible and thus is mapped as
+ * a special swap entry, nonetheless it still does
+ * count as a valid regular mapping for the page (and
+ * is accounted as such in page maps count).
+ *
+ * So handle this special case as if it was a normal
+ * page mapping ie lock CPU page table and returns
+ * true.
+ *
+ * For more details on device private memory see HMM
+ * (include/linux/hmm.h or mm/hmm.c).
+ */
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
+ return false;
+ } else if (!pte_present(*pvmw->pte))
return false;
}
}
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
From: "David S. Miller" <davem(a)davemloft.net>
When processing using 'perf report -g caller', which is the default, we
ended up reverting the callchain entries received from the kernel, but
simply reverting throws away the information that tells that from a
point onwards the addresses are for userspace, kernel, guest kernel,
guest user, hypervisor.
The idea is that if we are walking backwards, for each cluster of
non-cpumode entries we have to first scan backwards for the next one and
use that for the cluster.
This seems silly and more expensive than it needs to be but it is enough
for a initial fix.
The code here is really complicated because it is intimately intertwined
with the lbr and branch handling, as well as this callchain order,
further fixes will be needed to properly take into account the cpumode
in those cases.
Another problem with ORDER_CALLER is that the NULL "0" IP that is at the
end of most callchains shows up at the top of the histogram because
every callchain contains it and with ORDER_CALLER it is the first entry.
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Tested-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Cc: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: David Ahern <dsahern(a)gmail.com>
Cc: Jiri Olsa <jolsa(a)kernel.org>
Cc: Namhyung Kim <namhyung(a)kernel.org>
Cc: Souvik Banerjee <souvik1997(a)gmail.com>
Cc: Wang Nan <wangnan0(a)huawei.com>
Cc: stable(a)vger.kernel.org # 4.19
Link: https://lkml.kernel.org/n/tip-2wt3ayp6j2y2f2xowixa8y6y@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
---
tools/perf/util/machine.c | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcb..8ee8ab39d8ac 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2140,6 +2140,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
return 0;
}
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct perf_evsel *evsel,
@@ -2246,6 +2267,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
+ if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
for (i = first_call, nr_entries = 0;
i < chain_nr && nr_entries < max_stack; i++) {
u64 ip;
@@ -2260,9 +2287,15 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
#endif
ip = chain->ips[j];
-
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
--
2.14.4
Initially we bumped into problem with 32-bit aligned atomic64_t
on ARC, see [1]. And then during quite lengthly discussion Peter Z.
mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense.
If allocation is done by plain kmalloc() obtained buffer will be
ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via
devm_kmalloc() should have any other alignment?
This way we at least get the same behavior for both types of
allocation.
[1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004009.html
[2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004036.html
Signed-off-by: Alexey Brodkin <abrodkin(a)synopsys.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Geert Uytterhoeven <geert(a)linux-m68k.org>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Vineet Gupta <vgupta(a)synopsys.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: Greg KH <greg(a)kroah.com>
Cc: <stable(a)vger.kernel.org> # 4.8+
---
Changes v3 -> v4:
* Use ARCH_KMALLOC_MINALIGN for alignment instead of "8" [Peter]
Changes v2 -> v3:
* Align explicitly to 8 bytes [David]
* Rephrased in-line comment [David]
* Added more techinical details to commit message [Greg]
* Mention more alignment options in commit message [Geert]
Changes v1 -> v2:
* Reworded commit message
* Inserted comment right in source [Thomas]
drivers/base/devres.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 4aaf00d2098b..e038e2b3b7ea 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -26,8 +26,14 @@ struct devres_node {
struct devres {
struct devres_node node;
- /* -- 3 pointers */
- unsigned long long data[]; /* guarantee ull alignment */
+ /*
+ * Some archs want to perform DMA into kmalloc caches
+ * and need a guaranteed alignment larger than
+ * the alignment of a 64-bit integer.
+ * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+ * buffer alignment as if it was allocated by plain kmalloc().
+ */
+ u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
};
struct devres_group {
--
2.17.2
Hi Greg and other -stable maintainers
Please consider adding da15fc2fa9c (perf tools: Disable parallelism for
'make clean') to your -stable trees. Spurious build failures like that
make it harder to do automatic tests of new -stable (and -stable-rc)
versions.
Thanks,
Rasmus
Hi,
please consider reverting
commit 84379c9afe011020e797e3f50a662b08a6355dcf
netfilter: ipv6: nf_defrag: drop skb dst before queueing
It causes kernel crash for locally generated ipv6 fragments
when netfilter ipv6 defragmentation is used.
The faulty commit is not essential for -stable, it only
delays netns teardown for longer than needed when that netns
still has ipv6 frags queued. Much better than crash :-/
commit ids are:
4.4.y: not affected (not backported)
4.9.y: backported as ad8b1ffc3efae2f65080bdb11145c87d299b8f9a
4.14.y: backported as 28c74ff85efd192aeca9005499ca50c24d795f61
4.18.y: (first affected kernel): 84379c9afe011020e797e3f50a662b08a6355dcf
For 4.19.y, you could also wait for a bug fix to hit Linus tree,
I can ping you again once its in:
https://patchwork.ozlabs.org/patch/988233/
Thanks,
Florian
Typing 'btc' on kdb doing all sorts of fail. Sometimes it would
crash, sometimes display nothing, and sometimes hang.
Bisect tracked this down to the commit ad67b74d2469 ("printk: hash
addresses printed with %p"), suggesting an obvious fix. The pointer
used internally in kdb shouldn't be hashed, so switch it to %px.
Fixes: ad67b74d2469 ("printk: hash addresses printed with %p")
Cc: stable(a)vger.kernel.org
Signed-off-by: Douglas Anderson <dianders(a)chromium.org>
---
kernel/debug/kdb/kdb_bt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 6ad4a9fcbd6f..7921ae4fca8d 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -179,14 +179,14 @@ kdb_bt(int argc, const char **argv)
kdb_printf("no process for cpu %ld\n", cpu);
return 0;
}
- sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+ sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu));
kdb_parse(buf);
return 0;
}
kdb_printf("btc: cpu status: ");
kdb_parse("cpu\n");
for_each_online_cpu(cpu) {
- sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+ sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu));
kdb_parse(buf);
touch_nmi_watchdog();
}
--
2.19.1.568.g152ad8e336-goog
This reverts commit 62aad93f09c1952ede86405894df1b22012fd5ab.
Which was upstream commit 172b06c32b94 ("mm: slowly shrink slabs with a
relatively small number of objects").
The upstream commit was found to cause regressions. While there is a
proposed fix upstream, revent this patch from stable trees for now as
testing the fix will take some time.
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
mm/vmscan.c | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fc0436407471..03822f86f288 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -386,17 +386,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
delta = freeable >> priority;
delta *= 4;
do_div(delta, shrinker->seeks);
-
- /*
- * Make sure we apply some minimal pressure on default priority
- * even on small cgroups. Stale objects are not only consuming memory
- * by themselves, but can also hold a reference to a dying cgroup,
- * preventing it from being reclaimed. A dying cgroup with all
- * corresponding structures like per-cpu stats and kmem caches
- * can be really big, so it may lead to a significant waste of memory.
- */
- delta = max_t(unsigned long long, delta, min(freeable, batch_size));
-
total_scan += delta;
if (total_scan < 0) {
pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
--
2.17.1
Hello there,
I am working on a project at Red Hat where we do quick testing on patches for internal kernels before they merge. The goal is to catch bugs or issues before they merge into kernel trees and avoid situations where kernels need time-consuming bisects when lots of patches are merged at once. We aim to put valuable feedback into a kernel developer's inbox within four hours.
Our team has built a pipeline where we merge patches, compile kernels (for various architectures), and run tests on real hardware (various architectures). The current test set is fairly basic and it includes LTP plus some additional open source tests. We are looking to gradually expand those over time as we evaluate which tests provide the most value and find the most problems.
We would love to bring this to upstream kernel repositories and we thought that linux-stable might be a good place to start. The developer/maintainer experience would look something like this:
1) Developer submits a patchset
2) Those patches end up in Patchwork
3) We pull patches from patchwork, compile kernels, and test them
4) We reply to the thread on the mailing list with a brief set of results (one time per patchset)
Developers do not need to change any existing workflows. We gather the patches, test them, and reply in the appropriate place.
Is this something that the linux-stable community and maintainers would find valuable? If so, feel free to ask any questions about our process and we can go over any of those parts in more detail. If not, please let me know anyway! Our team is always looking for ways to improve. :)
Thanks a bunch for reading this far and I look forward to hearing from you.
--
Major Hayden
Hi,
On 31-10-18 07:02, Mogens Jensen wrote:
> ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
> On Tuesday, October 30, 2018 7:10 PM, Hans de Goede <hdegoede(a)redhat.com> wrote:
>
>> Hi,
>>
>> On 30-10-18 19:56, Mogens Jensen wrote:
>>
>>> ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
>>> On Tuesday, October 30, 2018 4:04 PM, Hans de Goede hdegoede(a)redhat.com wrote:
>>>
>>>> Hi,
>>>> On 30-10-18 16:46, Hans de Goede wrote:
>>>>
>>>>> Hi,
>>>>> On 30-10-18 16:04, Pierre-Louis Bossart wrote:
>>>>>
>>>>>> In addition I am not aware of any baytrail device using plt_clk_0, so moving a common machine driver such a cht_bsw_max98090_ti to use plt_clk0 only would break other devices (e.g. Rambi/Orco). Asking for both clocks to be on might work though,
>>>>>
>>>>> Ok, so we need to have a DMI based quirk for the Swanky and maybe also
>>>>> the clapper to use plt_clk_0 there. Asking for 2 clks if we only need
>>>>> one does not seem like a good plan.
>>>>
>>>> Dean, Mogens,
>>>> To write a proper patch for this I'm going to need DMI strings
>>>> from your devices.
>>>> Can you please run (as normal user):
>>>> grep . /sys/class/dmi/id/* 2> /dev/null
>>>> And reply with the output of this command?
>>>> I have attached the output from a coreboot seabios based clapper.
>>
>> Thank you.
>>
>>> Should I still test 0001-ASoC-intel-cht_bsw_max98090_ti-Use-pmc_plt_clk_0-ins.patch with SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH and asoundrc from Dean? There seems to have been some development in the case since that request was made.
>>
>> Yes please test that, I expect that to also fix things for the
>> Clapper, but I need to have that confirmed before submitting a
>> patch upstream adding a quirk for the Clapper to use pmc_plt_clk_0
>> instead of pmc_plt_clk_3.
>>
>> Regards,
>>
>> Hans
>>
> Unfortunately I only have access to longterm kernel 4.14 for building/running on this system, and 0001-ASoC-intel-cht_bsw_max98090_ti-Use-pmc_plt_clk_0-ins.patch does not patch against 4.14.78. Can a test patch for 4.14 be created?
Can you run (as root):
for i in /sys/kernel/debug/clk/pmc_plt_clk_?; do echo -n "$i: "; cat $i/clk_flags; echo; done
When running a kernel with working audio?
Then I can confirm that the Clapper is also using pmc_plt_clk_0, so that I can
fix this for the clapper for 4.18+
I've just checked the 4.14 sources and in 4.14 the SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH
driver does not support mclk control yet, so for the 4.14 kernel the only way to
fix this is to revert the 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
commit.
Regards,
Hans
Test ptrace-tm-spd-gpr fails on current kernel (4.19) due to a segmentation
fault that happens on the child process prior to setting cptr[2] = 1. This
causes the parent process to wait forever at 'while (!pptr[2])' and the test to
be killed by the test harness framework by timeout, thus, failing.
The segmentation fault happens because of a inline assembly being
generated as:
0x10000355c <tm_spd_gpr+492> lfs f0, 0(0)
This is reading memory position 0x0 and causing the segmentation fault.
This code is being generated by ASM_LOAD_FPR_SINGLE_PRECISION(flt_4), where
flt_4 is passed to the inline assembly block as:
[flt_4] "r" (&d)
Since the inline assembly 'r' constraint means any GPR, gpr0 is being
chosen, thus causing this issue when issuing a Load Floating-Point Single
instruction.
This patch simply changes the constraint to 'b', which specify that this
register will be used as base, and r0 is not allowed to be used, avoiding
this issue.
Other than that, removing flt_2 register from the input operands, since it
is not used by the inline assembly code at all.
Cc: stable(a)vger.kernel.org
Signed-off-by: Breno Leitao <leitao(a)debian.org>
---
tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 327fa943c7f3..dbdffa2e2c82 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -67,8 +67,8 @@ void tm_spd_gpr(void)
"3: ;"
: [res] "=r" (result), [texasr] "=r" (texasr)
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
- [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a),
- [flt_2] "r" (&b), [flt_4] "r" (&d)
+ [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+ [flt_4] "b" (&d)
: "memory", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
--
2.19.0
From: Arnd Bergmann <arnd(a)arndb.de>
Subject: kbuild: fix kernel/bounds.c 'W=1' warning
Building any configuration with 'make W=1' produces a warning:
kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes]
When also passing -Werror, this prevents us from building any other files.
Nobody ever calls the function, but we can't make it 'static' either
since we want the compiler output.
Calling it 'main' instead however avoids the warning, because gcc
does not insist on having a declaration for main.
Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Reported-by: Kieran Bingham <kieran.bingham+renesas(a)ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas(a)ideasonboard.com>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: Masahiro Yamada <yamada.masahiro(a)socionext.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/bounds.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/kernel/bounds.c~kbuild-fix-kernel-boundsc-w=1-warning
+++ a/kernel/bounds.c
@@ -13,7 +13,7 @@
#include <linux/log2.h>
#include <linux/spinlock_types.h>
-void foo(void)
+int main(void)
{
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
@@ -23,4 +23,6 @@ void foo(void)
#endif
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
/* End of constants */
+
+ return 0;
}
_
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
In hmm_mirror_unregister(), mm->hmm is set to NULL and then
mmu_notifier_unregister_no_release() is called. That creates a small
window where mmu_notifier can call mmu_notifier_ops with mm->hmm equal to
NULL. Fix this by first unregistering mmu notifier callbacks and then
setting mm->hmm to NULL.
Similarly in hmm_register(), set mm->hmm before registering mmu_notifier
callbacks so callback functions always see mm->hmm set.
Link: http://lkml.kernel.org/r/20181019160442.18723-4-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hmm.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
--- a/mm/hmm.c~mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback
+++ a/mm/hmm.c
@@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct m
spin_lock_init(&hmm->lock);
hmm->mm = mm;
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
- kfree(hmm);
- return NULL;
- }
-
spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
@@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct m
cleanup = true;
spin_unlock(&mm->page_table_lock);
- if (cleanup) {
- mmu_notifier_unregister(&hmm->mmu_notifier, mm);
- kfree(hmm);
- }
+ if (cleanup)
+ goto error;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
+ goto error_mm;
return mm->hmm;
+
+error_mm:
+ spin_lock(&mm->page_table_lock);
+ if (mm->hmm == hmm)
+ mm->hmm = NULL;
+ spin_unlock(&mm->page_table_lock);
+error:
+ kfree(hmm);
+ return NULL;
}
void hmm_mm_destroy(struct mm_struct *mm)
@@ -278,12 +283,13 @@ void hmm_mirror_unregister(struct hmm_mi
if (!should_unregister || mm == NULL)
return;
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
spin_lock(&mm->page_table_lock);
if (mm->hmm == hmm)
mm->hmm = NULL;
spin_unlock(&mm->page_table_lock);
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
kfree(hmm);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
_
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled in
check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back to any
private ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead to the
CPU constantly page fault looping on the special swap entry.
Link: http://lkml.kernel.org/r/20181019160442.18723-3-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_vma_mapped.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
--- a/mm/page_vma_mapped.c~mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3
+++ a/mm/page_vma_mapped.c
@@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapp
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
- if (!pte_present(*pvmw->pte))
+ /*
+ * We get here when we are trying to unmap a private
+ * device page from the process address space. Such
+ * page is not CPU accessible and thus is mapped as
+ * a special swap entry, nonetheless it still does
+ * count as a valid regular mapping for the page (and
+ * is accounted as such in page maps count).
+ *
+ * So handle this special case as if it was a normal
+ * page mapping ie lock CPU page table and returns
+ * true.
+ *
+ * For more details on device private memory see HMM
+ * (include/linux/hmm.h or mm/hmm.c).
+ */
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
+ return false;
+ } else if (!pte_present(*pvmw->pte))
return false;
}
}
_
Hi,
On 30-10-18 19:56, Mogens Jensen wrote:
> ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
> On Tuesday, October 30, 2018 4:04 PM, Hans de Goede <hdegoede(a)redhat.com> wrote:
>
>> Hi,
>>
>> On 30-10-18 16:46, Hans de Goede wrote:
>>
>>> Hi,
>>> On 30-10-18 16:04, Pierre-Louis Bossart wrote:
>>>
>>>> In addition I am not aware of any baytrail device using plt_clk_0, so moving a common machine driver such a cht_bsw_max98090_ti to use plt_clk0 only would break other devices (e.g. Rambi/Orco). Asking for both clocks to be on might work though,
>>>
>>> Ok, so we need to have a DMI based quirk for the Swanky and maybe also
>>> the clapper to use plt_clk_0 there. Asking for 2 clks if we only need
>>> one does not seem like a good plan.
>>
>> Dean, Mogens,
>>
>> To write a proper patch for this I'm going to need DMI strings
>> from your devices.
>>
>> Can you please run (as normal user):
>>
>> grep . /sys/class/dmi/id/* 2> /dev/null
>>
>> And reply with the output of this command?
> I have attached the output from a coreboot seabios based clapper.
Thank you.
> Should I still test 0001-ASoC-intel-cht_bsw_max98090_ti-Use-pmc_plt_clk_0-ins.patch with SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH and asoundrc from Dean? There seems to have been some development in the case since that request was made.
Yes please test that, I expect that to also fix things for the
Clapper, but I need to have that confirmed before submitting a
patch upstream adding a quirk for the Clapper to use pmc_plt_clk_0
instead of pmc_plt_clk_3.
Regards,
Hans
Hi Arnd, Olof,
On 10/30/2018 4:11 AM, Marc Zyngier wrote:
> The Keystone QMSS driver is pretty damaged, in the sense that it
> does things like this:
>
> irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
>
> where cpu_map is a local variable. As we leave the function, this
> will point to nowhere-land, and things will end-up badly.
>
> Instead, let's use a proper cpumask that gets allocated, giving
> the driver a chance to actually work with things like irqbalance
> as well as have a hypothetical 64bit future.
>
> Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
> ---
Could you please add this patch to your fixes branch ?
FWIW,
Acked-by: Santosh Shilimkar <ssantosh(a)kernel.org>
From: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
If dwc3_core_init_mode() fails with deferred probe,
next probe fails on sysfs with
sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:11.0/dwc3.0.auto/dwc3.0.auto.ulpi'
To avoid this failure, clean up ULPI device.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi(a)linux.intel.com>
---
drivers/usb/dwc3/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 88c80fcc39f5..fec97465ccac 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1499,6 +1499,7 @@ static int dwc3_probe(struct platform_device *pdev)
err5:
dwc3_event_buffers_cleanup(dwc);
+ dwc3_ulpi_exit(dwc);
err4:
dwc3_free_scratch_buffers(dwc);
--
2.19.1
---------------------------------------------------------------------
Intel Finland Oy
Registered Address: PL 281, 00181 Helsinki
Business Identity Code: 0357606 - 4
Domiciled in Helsinki
This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
From: "David S. Miller" <davem(a)davemloft.net>
[ Upstream commit cfdc3170d214046b9509183fe9b9544dc644d40b ]
It is important to clear the hw->state value for non-stopped events
when they are added into the PMU. Otherwise when the event is
scheduled out, we won't read the counter because HES_UPTODATE is still
set. This breaks 'perf stat' and similar use cases, causing all the
events to show zero.
This worked for multi-pcr because we make explicit sparc_pmu_start()
calls in calculate_multiple_pcrs(). calculate_single_pcr() doesn't do
this because the idea there is to accumulate all of the counter
settings into the single pcr value. So we have to add explicit
hw->state handling there.
Like x86, we use the PERF_HES_ARCH bit to track truly stopped events
so that we don't accidently start them on a reload.
Related to all of this, sparc_pmu_start() is missing a userpage update
so add it.
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/sparc/kernel/perf_event.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index af53c25da2e7..8536532970bb 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -919,6 +919,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
sparc_perf_event_update(cp, &cp->hw,
cpuc->current_idx[i]);
cpuc->current_idx[i] = PIC_NO_INDEX;
+ if (cp->hw.state & PERF_HES_STOPPED)
+ cp->hw.state |= PERF_HES_ARCH;
}
}
}
@@ -951,10 +953,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[0] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
+ if (hwc->state & PERF_HES_ARCH) {
cpuc->pcr[0] |= nop_for_index(idx);
- else
+ } else {
cpuc->pcr[0] |= event_encoding(enc, idx);
+ hwc->state = 0;
+ }
}
out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
@@ -980,6 +984,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
cpuc->current_idx[i] = idx;
+ if (cp->hw.state & PERF_HES_ARCH)
+ continue;
+
sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
@@ -1071,6 +1078,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
event->hw.state = 0;
sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+ perf_event_update_userpage(event);
}
static void sparc_pmu_stop(struct perf_event *event, int flags)
@@ -1363,9 +1372,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
cpuc->events[n0] = event->hw.event_base;
cpuc->current_idx[n0] = PIC_NO_INDEX;
- event->hw.state = PERF_HES_UPTODATE;
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(ef_flags & PERF_EF_START))
- event->hw.state |= PERF_HES_STOPPED;
+ event->hw.state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
--
2.17.1
From: "David S. Miller" <davem(a)davemloft.net>
[ Upstream commit cfdc3170d214046b9509183fe9b9544dc644d40b ]
It is important to clear the hw->state value for non-stopped events
when they are added into the PMU. Otherwise when the event is
scheduled out, we won't read the counter because HES_UPTODATE is still
set. This breaks 'perf stat' and similar use cases, causing all the
events to show zero.
This worked for multi-pcr because we make explicit sparc_pmu_start()
calls in calculate_multiple_pcrs(). calculate_single_pcr() doesn't do
this because the idea there is to accumulate all of the counter
settings into the single pcr value. So we have to add explicit
hw->state handling there.
Like x86, we use the PERF_HES_ARCH bit to track truly stopped events
so that we don't accidently start them on a reload.
Related to all of this, sparc_pmu_start() is missing a userpage update
so add it.
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/sparc/kernel/perf_event.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 6596f66ce112..a5d0c2f08110 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -926,6 +926,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
sparc_perf_event_update(cp, &cp->hw,
cpuc->current_idx[i]);
cpuc->current_idx[i] = PIC_NO_INDEX;
+ if (cp->hw.state & PERF_HES_STOPPED)
+ cp->hw.state |= PERF_HES_ARCH;
}
}
}
@@ -958,10 +960,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[0] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
+ if (hwc->state & PERF_HES_ARCH) {
cpuc->pcr[0] |= nop_for_index(idx);
- else
+ } else {
cpuc->pcr[0] |= event_encoding(enc, idx);
+ hwc->state = 0;
+ }
}
out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
@@ -987,6 +991,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
cpuc->current_idx[i] = idx;
+ if (cp->hw.state & PERF_HES_ARCH)
+ continue;
+
sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
@@ -1078,6 +1085,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
event->hw.state = 0;
sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+ perf_event_update_userpage(event);
}
static void sparc_pmu_stop(struct perf_event *event, int flags)
@@ -1370,9 +1379,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
cpuc->events[n0] = event->hw.event_base;
cpuc->current_idx[n0] = PIC_NO_INDEX;
- event->hw.state = PERF_HES_UPTODATE;
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(ef_flags & PERF_EF_START))
- event->hw.state |= PERF_HES_STOPPED;
+ event->hw.state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
--
2.17.1
From: Tomi Valkeinen <tomi.valkeinen(a)ti.com>
[ Upstream commit 064253c1c0625efd0362a0b7ecdbe8bee2a2904d ]
drm_mode_setcrtc() retries modesetting in case one of the functions it
calls returns -EDEADLK. connector_set, mode and fb are freed before
retrying, but they are not set to NULL. This can cause
drm_mode_setcrtc() to use those variables.
For example: On the first try __drm_mode_set_config_internal() returns
-EDEADLK. connector_set, mode and fb are freed. Next retry starts, and
drm_modeset_lock_all_ctx() returns -EDEADLK, and we jump to 'out'. The
code will happily try to release all three again.
This leads to crashes of different kinds, depending on the sequence the
EDEADLKs happen.
Fix this by setting the three variables to NULL at the start of the
retry loop.
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ti.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20180917110054.4053-1-tomi.va…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/drm_crtc.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 98a36e6c69ad..bd207857a964 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -560,9 +560,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
struct drm_mode_crtc *crtc_req = data;
struct drm_crtc *crtc;
struct drm_plane *plane;
- struct drm_connector **connector_set = NULL, *connector;
- struct drm_framebuffer *fb = NULL;
- struct drm_display_mode *mode = NULL;
+ struct drm_connector **connector_set, *connector;
+ struct drm_framebuffer *fb;
+ struct drm_display_mode *mode;
struct drm_mode_set set;
uint32_t __user *set_connectors_ptr;
struct drm_modeset_acquire_ctx ctx;
@@ -591,6 +591,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
mutex_lock(&crtc->dev->mode_config.mutex);
drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
retry:
+ connector_set = NULL;
+ fb = NULL;
+ mode = NULL;
+
ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx);
if (ret)
goto out;
--
2.17.1
From: Masami Hiramatsu <mhiramat(a)kernel.org>
Fix the synthetic event test case to remove event correctly.
If redirecting command to synthetic_event file without append
mode, it cleans up all existing events and execute (parse) the
command. This means "delete event" always fails to find the
target event.
Since previous synthetic event has a bug which doesn't return
-ENOENT even if it fails to find the deleting event, this test
passed. But fixing that bug, this test fails because this test
itself has a bug.
This fixes that bug by trying to delete event right after
adding an event, and use append mode redirection ('>>') instead
of normal redirection ('>').
Link: http://lkml.kernel.org/r/154013452832.25576.2305459545429386517.stgit@devbox
Acked-by: Shuah Khan <shuah(a)kernel.org>
Acked-by: Tom Zanussi <zanussi(a)linux.intel.com>
Tested-by: Tom Zanussi <zanussi(a)linux.intel.com>
Cc: Tom Zanussi <zanussi(a)kernel.org>
Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com>
Cc: Rajvi Jingar <rajvi.jingar(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: stable(a)vger.kernel.org
Fixes: f06eec4d0f2c ('selftests: ftrace: Add inter-event hist triggers testcases')
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
.../trigger-synthetic-event-createremove.tc | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
index cef11377dcbd..c604438df13b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -35,18 +35,18 @@ fi
reset_trigger
-echo "Test create synthetic event with an error"
-echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' >> synthetic_events
if [ -d events/synthetic/wakeup_latency ]; then
- fail "Created wakeup_latency synthetic event with an invalid format"
+ fail "Failed to delete wakeup_latency synthetic event"
fi
reset_trigger
-echo "Test remove synthetic event"
-echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
if [ -d events/synthetic/wakeup_latency ]; then
- fail "Failed to delete wakeup_latency synthetic event"
+ fail "Created wakeup_latency synthetic event with an invalid format"
fi
do_reset
--
2.19.0
This section collects all source .note.* sections together in the
vmlinux image. Without it .note.Linux section may be placed at address
0, while the rest of the kernel is at its normal address, resulting in a
huge vmlinux.bin image that may not be linked into the xtensa Image.elf.
Cc: stable(a)vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
---
arch/xtensa/boot/Makefile | 2 +-
arch/xtensa/kernel/vmlinux.lds.S | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index dc9e0ba7122c..294846117fc2 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile
@@ -33,7 +33,7 @@ uImage: $(obj)/uImage
boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y))
$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
-OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
+OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary
vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index a1c3edb8ad56..fa926995d2a3 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -131,6 +131,7 @@ SECTIONS
.fixup : { *(.fixup) }
EXCEPTION_TABLE(16)
+ NOTES
/* Data section */
_sdata = .;
--
2.11.0
The patch titled
Subject: hugetlbfs: dirty pages as they are added to pagecache
has been removed from the -mm tree. Its filename was
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: dirty pages as they are added to pagecache
Some test systems were experiencing negative huge page reserve counts and
incorrect file block counts. This was traced to /proc/sys/vm/drop_caches
removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs
explicit code removes the pages, the appropriate accounting is not
performed.
This can be recreated as follows:
fallocate -l 2M /dev/hugepages/foo
echo 1 > /proc/sys/vm/drop_caches
fallocate -l 2M /dev/hugepages/foo
grep -i huge /proc/meminfo
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: 2047
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 4194304 kB
ls -lsh /dev/hugepages/foo
4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo
To address this issue, dirty pages as they are added to pagecache. This
can easily be reproduced with fallocate as shown above. Read faulted
pages will eventually end up being marked dirty. But there is a window
where they are clean and could be impacted by code such as drop_caches.
So, just dirty them all as they are added to the pagecache.
Link: http://lkml.kernel.org/r/b5be45b8-5afe-56cd-9482-28384699a049@oracle.com
Fixes: 6bda666a03f0 ("hugepages: fold find_or_alloc_pages into huge_no_page()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Acked-by: Mihcla Hocko <mhocko(a)suse.com>
Reviewed-by: Khalid Aziz <khalid.aziz(a)oracle.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/mm/hugetlb.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2
+++ a/mm/hugetlb.c
@@ -3690,6 +3690,12 @@ int huge_add_to_page_cache(struct page *
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
The patch titled
Subject: userfaultfd: disable irqs when taking the waitqueue lock
has been removed from the -mm tree. Its filename was
userfaultfd-disable-irqs-when-taking-the-waitqueue-lock.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Christoph Hellwig <hch(a)lst.de>
Subject: userfaultfd: disable irqs when taking the waitqueue lock
userfaultfd contains howe-grown locking of the waitqueue lock, and does
not disable interrupts. This relies on the fact that no one else takes it
from interrupt context and violates an invariat of the normal waitqueue
locking scheme. With aio poll it is easy to trigger other locks that
disable interrupts (or are called from interrupt context).
Link: http://lkml.kernel.org/r/20181018154101.18750-1-hch@lst.de
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: <stable(a)vger.kernel.org> [4.19.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/userfaultfd.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/fs/userfaultfd.c~userfaultfd-disable-irqs-when-taking-the-waitqueue-lock
+++ a/fs/userfaultfd.c
@@ -1026,7 +1026,7 @@ static ssize_t userfaultfd_ctx_read(stru
struct userfaultfd_ctx *fork_nctx = NULL;
/* always take the fd_wqh lock before the fault_pending_wqh lock */
- spin_lock(&ctx->fd_wqh.lock);
+ spin_lock_irq(&ctx->fd_wqh.lock);
__add_wait_queue(&ctx->fd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -1112,13 +1112,13 @@ static ssize_t userfaultfd_ctx_read(stru
ret = -EAGAIN;
break;
}
- spin_unlock(&ctx->fd_wqh.lock);
+ spin_unlock_irq(&ctx->fd_wqh.lock);
schedule();
- spin_lock(&ctx->fd_wqh.lock);
+ spin_lock_irq(&ctx->fd_wqh.lock);
}
__remove_wait_queue(&ctx->fd_wqh, &wait);
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->fd_wqh.lock);
+ spin_unlock_irq(&ctx->fd_wqh.lock);
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
_
Patches currently in -mm which might be from hch(a)lst.de are
The patch titled
Subject: mm: /proc/pid/smaps_rollup: fix NULL pointer deref in smaps_pte_range()
has been removed from the -mm tree. Its filename was
mm-proc-pid-smaps_rollup-fix-null-pointer-deref-in-smaps_pte_range.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Vlastimil Babka <vbabka(a)suse.cz>
Subject: mm: /proc/pid/smaps_rollup: fix NULL pointer deref in smaps_pte_range()
Leonardo reports an apparent regression in 4.19-rc7:
BUG: unable to handle kernel NULL pointer dereference at 00000000000000f0
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 3 PID: 6032 Comm: python Not tainted 4.19.0-041900rc7-lowlatency #201810071631
Hardware name: LENOVO 80UG/Toronto 4A2, BIOS 0XCN45WW 08/09/2018
RIP: 0010:smaps_pte_range+0x32d/0x540
Code: 80 00 00 00 00 74 a9 48 89 de 41 f6 40 52 40 0f 85 04 02 00 00 49 2b 30 48 c1 ee 0c 49 03 b0 98 00 00 00 49 8b 80 a0 00 00 00 <48> 8b b8 f0 00 00 00 e8 b7 ef ec ff 48 85 c0 0f 84 71 ff ff ff a8
RSP: 0018:ffffb0cbc484fb88 EFLAGS: 00010202
RAX: 0000000000000000 RBX: 0000560ddb9e9000 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000560ddb9e9 RDI: 0000000000000001
RBP: ffffb0cbc484fbc0 R08: ffff94a5a227a578 R09: ffff94a5a227a578
R10: 0000000000000000 R11: 0000560ddbbe7000 R12: ffffe903098ba728
R13: ffffb0cbc484fc78 R14: ffffb0cbc484fcf8 R15: ffff94a5a2e9cf48
FS: 00007f6dfb683740(0000) GS:ffff94a5aaf80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000000f0 CR3: 000000011c118001 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
__walk_page_range+0x3c2/0x6f0
walk_page_vma+0x42/0x60
smap_gather_stats+0x79/0xe0
? gather_pte_stats+0x320/0x320
? gather_hugetlb_stats+0x70/0x70
show_smaps_rollup+0xcd/0x1c0
seq_read+0x157/0x400
__vfs_read+0x3a/0x180
? security_file_permission+0x93/0xc0
? security_file_permission+0x93/0xc0
vfs_read+0x8f/0x140
ksys_read+0x55/0xc0
__x64_sys_read+0x1a/0x20
do_syscall_64+0x5a/0x110
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Decoded code matched to local compilation+disassembly points to
smaps_pte_entry():
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
&& pte_none(*pte))) {
page = find_get_entry(vma->vm_file->f_mapping,
linear_page_index(vma, addr));
Here, vma->vm_file is NULL. mss->check_shmem_swap should be false in that
case, however for smaps_rollup, smap_gather_stats() can set the flag true
for one vma and leave it true for subsequent vma's where it should be
false.
To fix, reset the check_shmem_swap flag to false. There's also related
bug which sets mss->swap to shmem_swapped, which in the context of
smaps_rollup overwrites any value accumulated from previous vma's. Fix
that as well.
Note that the report suggests a regression between 4.17.19 and 4.19-rc7,
which makes the 4.19 series ending with commit 258f669e7e88 ("mm:
/proc/pid/smaps_rollup: convert to single value seq_file") suspicious.
But the mss was reused for rollup since 493b0e9d945f ("mm: add
/proc/pid/smaps_rollup") so let's play it safe with the stable backport.
Link: http://lkml.kernel.org/r/555fbd1f-4ac9-0b58-dcd4-5dc4380ff7ca@suse.cz
Link: https://bugzilla.kernel.org/show_bug.cgi?id=201377
Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Reported-by: Leonardo Soares Müller <leozinho29_eu(a)hotmail.com>
Tested-by: Leonardo Soares Müller <leozinho29_eu(a)hotmail.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Daniel Colascione <dancol(a)google.com>
Cc: Alexey Dobriyan <adobriyan(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/fs/proc/task_mmu.c~mm-proc-pid-smaps_rollup-fix-null-pointer-deref-in-smaps_pte_range
+++ a/fs/proc/task_mmu.c
@@ -713,6 +713,8 @@ static void smap_gather_stats(struct vm_
smaps_walk.private = mss;
#ifdef CONFIG_SHMEM
+ /* In case of smaps_rollup, reset the value from previous vma */
+ mss->check_shmem_swap = false;
if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
/*
* For shared or readonly shmem mappings we know that all
@@ -728,7 +730,7 @@ static void smap_gather_stats(struct vm_
if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
!(vma->vm_flags & VM_WRITE)) {
- mss->swap = shmem_swapped;
+ mss->swap += shmem_swapped;
} else {
mss->check_shmem_swap = true;
smaps_walk.pte_hole = smaps_pte_hole;
_
Patches currently in -mm which might be from vbabka(a)suse.cz are
From: Ravi Bangoria <ravi.bangoria(a)linux.vnet.ibm.com>
[ Upstream commit 331c7cb307971eac38e9470340e10c87855bf4bc ]
Perf top is often crashing at very random locations on powerpc. After
investigating, I found the crash only happens when sample is of zero
length symbol. Powerpc kernel has many such symbols which does not
contain length details in vmlinux binary and thus start and end
addresses of such symbols are same.
Structure
struct sym_hist {
u64 nr_samples;
u64 period;
struct sym_hist_entry addr[0];
};
has last member 'addr[]' of size zero. 'addr[]' is an array of addresses
that belongs to one symbol (function). If function consist of 100
instructions, 'addr' points to an array of 100 'struct sym_hist_entry'
elements. For zero length symbol, it points to the *empty* array, i.e.
no members in the array and thus offset 0 is also invalid for such
array.
static int __symbol__inc_addr_samples(...)
{
...
offset = addr - sym->start;
h = annotation__histogram(notes, evidx);
h->nr_samples++;
h->addr[offset].nr_samples++;
h->period += sample->period;
h->addr[offset].period += sample->period;
...
}
Here, when 'addr' is same as 'sym->start', 'offset' becomes 0, which is
valid for normal symbols but *invalid* for zero length symbols and thus
updating h->addr[offset] causes memory corruption.
Fix this by adding one dummy element for zero length symbols.
Link: https://lkml.org/lkml/2016/10/10/148
Fixes: edee44be5919 ("perf annotate: Don't throw error for zero length symbols")
Signed-off-by: Ravi Bangoria <ravi.bangoria(a)linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa(a)kernel.org>
Acked-by: Namhyung Kim <namhyung(a)kernel.org>
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Jin Yao <yao.jin(a)linux.intel.com>
Cc: Kim Phillips <kim.phillips(a)arm.com>
Cc: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Taeung Song <treeze.taeung(a)gmail.com>
Link: http://lkml.kernel.org/r/1508854806-10542-1-git-send-email-ravi.bangoria@li…
Signed-off-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/perf/util/annotate.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index a38227eb5450..3336cbc6ec48 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -495,9 +495,19 @@ static struct ins *ins__find(const char *name)
int symbol__alloc_hist(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
- const size_t size = symbol__size(sym);
+ size_t size = symbol__size(sym);
size_t sizeof_sym_hist;
+ /*
+ * Add buffer of one element for zero length symbol.
+ * When sample is taken from first instruction of
+ * zero length symbol, perf still resolves it and
+ * shows symbol name in perf report and allows to
+ * annotate it.
+ */
+ if (size == 0)
+ size = 1;
+
/* Check for overflow when calculating sizeof_sym_hist */
if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(u64))
return -1;
--
2.17.1
On s390 the CPU Measurement Facility for counters now supports
2 PMUs named cpum_cf (CPU Measurement Facility for counters) and
cpum_cf_diag (CPU Measurement Facility for diagnostic counters)
for one and the same CPU.
Running command
[root@s35lp76 perf]# ./perf stat -e tx_c_tend \
-- ~/mytests/cf-tx-events 1
Measuring transactions
TX_C_TABORT_NO_SPECIAL: 0 expected:0
TX_C_TABORT_SPECIAL: 0 expected:0
TX_C_TEND: 1 expected:1
TX_NC_TABORT: 11 expected:11
TX_NC_TEND: 1 expected:1
Performance counter stats for '/root/mytests/cf-tx-events 1':
2 tx_c_tend
0.002120091 seconds time elapsed
0.000121000 seconds user
0.002127000 seconds sys
[root@s35lp76 perf]#
displays output which is unexpected (and wrong):
2 tx_c_tend
The test program definitely triggers only one transaction, as shown
in line 'TX_C_TEND: 1 expected:1'.
This is caused by the following call sequence:
pmu_lookup() scans and installs a PMU.
+--> pmu_aliases() parses all aliases in directory
.../<pmu-name>/events/* which are file names.
+--> pmu_aliases_parse() Read each file in directory and create
an new alias entry. This is done with
+--> perf_pmu__new_alias() and
+--> __perf_pmu__new_alias() which also check for
identical alias names.
After pmu_aliases() returns, a complete list of event names
for this pmu has been created. Now function
pmu_add_cpu_aliases() is called to add the events listed in the json
| files to the alias list of the cpu.
+--> perf_pmu__find_map() Returns a pointer to the json events.
Now function pmu_add_cpu_aliases() scans through all events listed
in the JSON files for this CPU.
Each json event pmu name is compared with the current PMU being
built up and if they mismatch, the json event is added to the
current PMUs alias list.
To avoid duplicate entries the following comparison is done:
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
if (strncmp(pname, name, strlen(pname)))
continue;
}
The culprit is the strncmp() function.
Using current s390 PMU naming, the first PMU is 'cpum_cf'
and a long list of events is added, among them 'tx_c_tend'
When the second PMU named 'cpum_cf_diag' is added, only one event
named 'CF_DIAG' is added by the pmu_aliases() function.
Now function pmu_add_cpu_aliases() is invoked for PMU 'cpum_cf_diag'.
Since the CPUID string is the same for both PMUs, json file events
for PMU named 'cpum_cf' are added to the PMU 'cpm_cf_diag'
This happens because the strncmp() actually compares:
strncmp("cpum_cf", "cpum_cf_diag", 6);
The first parameter is the pmu name taken from the event in
the json file. The second parameter is the pmu name of the PMU
currently being built.
They are different, but the length of the compare only tests the
common prefix and this returns 0(true) when it should return false.
Now all events for PMU cpum_cf are added to the alias list for pmu
cpum_cf_diag.
Later on in function parse_events_add_pmu() the event 'tx_c_end' is
searched in all available PMUs and found twice, adding it two
times to the evsel_list global variable which is the root
of all events. This results in a counter value of 2 instead
of 1.
Output with this patch:
[root@s35lp76 perf]# ./perf stat -e tx_c_tend \
-- ~/mytests/cf-tx-events 1
Measuring transactions
TX_C_TABORT_NO_SPECIAL: 0 expected:0
TX_C_TABORT_SPECIAL: 0 expected:0
TX_C_TEND: 1 expected:1
TX_NC_TABORT: 11 expected:11
TX_NC_TEND: 1 expected:1
Performance counter stats for '/root/mytests/cf-tx-events 1':
1 tx_c_tend
0.001815365 seconds time elapsed
0.000123000 seconds user
0.001756000 seconds sys
[root@s35lp76 perf]#
Fixes: 292c34c10249 ("perf pmu: Fix core PMU alias list for X86 platform")
Signed-off-by: Thomas Richter <tmricht(a)linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner(a)linux.ibm.com>
Cc: Kan Liang <kan.liang(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # 4.18+
---
tools/perf/util/pmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7799788f662f..7e49baad304d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
- if (strncmp(pname, name, strlen(pname)))
+ if (strcmp(pname, name))
continue;
}
--
2.14.3
From: Andrea Arcangeli <aarcange(a)redhat.com>
THP allocation might be really disruptive when allocated on NUMA system
with the local node full or hard to reclaim. Stefan has posted an
allocation stall report on 4.12 based SLES kernel which suggests the
same issue:
[245513.362669] kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null)
[245513.363983] kvm cpuset=/ mems_allowed=0-1
[245513.364604] CPU: 10 PID: 84752 Comm: kvm Tainted: G W 4.12.0+98-ph <a href="/view.php?id=1" title="[geschlossen] Integration Ramdisk" class="resolved">0000001</a> SLE15 (unreleased)
[245513.365258] Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017
[245513.365905] Call Trace:
[245513.366535] dump_stack+0x5c/0x84
[245513.367148] warn_alloc+0xe0/0x180
[245513.367769] __alloc_pages_slowpath+0x820/0xc90
[245513.368406] ? __slab_free+0xa9/0x2f0
[245513.369048] ? __slab_free+0xa9/0x2f0
[245513.369671] __alloc_pages_nodemask+0x1cc/0x210
[245513.370300] alloc_pages_vma+0x1e5/0x280
[245513.370921] do_huge_pmd_wp_page+0x83f/0xf00
[245513.371554] ? set_huge_zero_page.isra.52.part.53+0x9b/0xb0
[245513.372184] ? do_huge_pmd_anonymous_page+0x631/0x6d0
[245513.372812] __handle_mm_fault+0x93d/0x1060
[245513.373439] handle_mm_fault+0xc6/0x1b0
[245513.374042] __do_page_fault+0x230/0x430
[245513.374679] ? get_vtime_delta+0x13/0xb0
[245513.375411] do_page_fault+0x2a/0x70
[245513.376145] ? page_fault+0x65/0x80
[245513.376882] page_fault+0x7b/0x80
[...]
[245513.382056] Mem-Info:
[245513.382634] active_anon:126315487 inactive_anon:1612476 isolated_anon:5
active_file:60183 inactive_file:245285 isolated_file:0
unevictable:15657 dirty:286 writeback:1 unstable:0
slab_reclaimable:75543 slab_unreclaimable:2509111
mapped:81814 shmem:31764 pagetables:370616 bounce:0
free:32294031 free_pcp:6233 free_cma:0
[245513.386615] Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
[245513.388650] Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
The defrag mode is "madvise" and from the above report it is clear that
the THP has been allocated for MADV_HUGEPAGA vma.
Andrea has identified that the main source of the problem is
__GFP_THISNODE usage:
: The problem is that direct compaction combined with the NUMA
: __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very
: hard the local node, instead of failing the allocation if there's no
: THP available in the local node.
:
: Such logic was ok until __GFP_THISNODE was added to the THP allocation
: path even with MPOL_DEFAULT.
:
: The idea behind the __GFP_THISNODE addition, is that it is better to
: provide local memory in PAGE_SIZE units than to use remote NUMA THP
: backed memory. That largely depends on the remote latency though, on
: threadrippers for example the overhead is relatively low in my
: experience.
:
: The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in
: extremely slow qemu startup with vfio, if the VM is larger than the
: size of one host NUMA node. This is because it will try very hard to
: unsuccessfully swapout get_user_pages pinned pages as result of the
: __GFP_THISNODE being set, instead of falling back to PAGE_SIZE
: allocations and instead of trying to allocate THP on other nodes (it
: would be even worse without vfio type1 GUP pins of course, except it'd
: be swapping heavily instead).
Fix this by removing __GFP_THISNODE for THP requests which are
requesting the direct reclaim. This effectivelly reverts 5265047ac301 on
the grounds that the zone/node reclaim was known to be disruptive due
to premature reclaim when there was memory free. While it made sense at
the time for HPC workloads without NUMA awareness on rare machines, it
was ultimately harmful in the majority of cases. The existing behaviour
is similiar, if not as widespare as it applies to a corner case but
crucially, it cannot be tuned around like zone_reclaim_mode can. The
default behaviour should always be to cause the least harm for the
common case.
If there are specialised use cases out there that want zone_reclaim_mode
in specific cases, then it can be built on top. Longterm we should
consider a memory policy which allows for the node reclaim like behavior
for the specific memory ranges which would allow a
[1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com
[mhocko(a)suse.com: rewrote the changelog based on the one from Andrea]
Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node")
Cc: Zi Yan <zi.yan(a)cs.rutgers.edu>
Cc: stable # 4.1+
Reported-by: Stefan Priebe <s.priebe(a)profihost.ag>
Debugged-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reported-by: Alex Williamson <alex.williamson(a)redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com>
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
---
mm/mempolicy.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index da858f794eb6..149b6f4cf023 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2046,8 +2046,36 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ /*
+ * We cannot invoke reclaim if __GFP_THISNODE
+ * is set. Invoking reclaim with
+ * __GFP_THISNODE set, would cause THP
+ * allocations to trigger heavy swapping
+ * despite there may be tons of free memory
+ * (including potentially plenty of THP
+ * already available in the buddy) on all the
+ * other NUMA nodes.
+ *
+ * At most we could invoke compaction when
+ * __GFP_THISNODE is set (but we would need to
+ * refrain from invoking reclaim even if
+ * compaction returned COMPACT_SKIPPED because
+ * there wasn't not enough memory to succeed
+ * compaction). For now just avoid
+ * __GFP_THISNODE instead of limiting the
+ * allocation path to a strict and single
+ * compaction invocation.
+ *
+ * Supposedly if direct reclaim was enabled by
+ * the caller, the app prefers THP regardless
+ * of the node it comes from so this would be
+ * more desiderable behavior than only
+ * providing THP originated from the local
+ * node in such case.
+ */
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ gfp |= __GFP_THISNODE;
+ page = __alloc_pages_node(hpage_node, gfp, order);
goto out;
}
}
--
2.18.0
Hello Dear
Do you have the passion for humanitarian welfare?
Can you devote your time and be totally committed and devoted
to run multi-million pounds humanitarian charity project sponsored
totally by me; with an incentive/compensation accrual to you for
your time and effort and at no cost to you.
If interested, reply me for the full details
Thanks
Les Scadding
Hi,
The 1st & 3rd patch fixes bio size alignment issue.
The 2nd patch cleans up __blkdev_issue_discard() a bit.
Thanks,
Ming Lei (3):
block: make sure discard bio is aligned with logical block size
block: cleanup __blkdev_issue_discard()
block: make sure writesame bio is aligned with logical block size
block/blk-lib.c | 25 ++++++-------------------
1 file changed, 6 insertions(+), 19 deletions(-)
Cc: Rui Salvaterra <rsalvaterra(a)gmail.com>
Cc: stable(a)vger.kernel.org
Cc: Mike Snitzer <snitzer(a)redhat.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Xiao Ni <xni(a)redhat.com>
Cc: Mariusz Dabrowski <mariusz.dabrowski(a)intel.com>
--
2.9.5
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: dirty pages as they are added to pagecache
Some test systems were experiencing negative huge page reserve counts and
incorrect file block counts. This was traced to /proc/sys/vm/drop_caches
removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs
explicit code removes the pages, the appropriate accounting is not
performed.
This can be recreated as follows:
fallocate -l 2M /dev/hugepages/foo
echo 1 > /proc/sys/vm/drop_caches
fallocate -l 2M /dev/hugepages/foo
grep -i huge /proc/meminfo
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: 2047
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 4194304 kB
ls -lsh /dev/hugepages/foo
4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo
To address this issue, dirty pages as they are added to pagecache. This
can easily be reproduced with fallocate as shown above. Read faulted
pages will eventually end up being marked dirty. But there is a window
where they are clean and could be impacted by code such as drop_caches.
So, just dirty them all as they are added to the pagecache.
Link: http://lkml.kernel.org/r/b5be45b8-5afe-56cd-9482-28384699a049@oracle.com
Fixes: 6bda666a03f0 ("hugepages: fold find_or_alloc_pages into huge_no_page()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Acked-by: Mihcla Hocko <mhocko(a)suse.com>
Reviewed-by: Khalid Aziz <khalid.aziz(a)oracle.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/mm/hugetlb.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2
+++ a/mm/hugetlb.c
@@ -3690,6 +3690,12 @@ int huge_add_to_page_cache(struct page *
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
_
From: Christoph Hellwig <hch(a)lst.de>
Subject: userfaultfd: disable irqs when taking the waitqueue lock
userfaultfd contains howe-grown locking of the waitqueue lock, and does
not disable interrupts. This relies on the fact that no one else takes it
from interrupt context and violates an invariat of the normal waitqueue
locking scheme. With aio poll it is easy to trigger other locks that
disable interrupts (or are called from interrupt context).
Link: http://lkml.kernel.org/r/20181018154101.18750-1-hch@lst.de
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: <stable(a)vger.kernel.org> [4.19.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/userfaultfd.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/fs/userfaultfd.c~userfaultfd-disable-irqs-when-taking-the-waitqueue-lock
+++ a/fs/userfaultfd.c
@@ -1026,7 +1026,7 @@ static ssize_t userfaultfd_ctx_read(stru
struct userfaultfd_ctx *fork_nctx = NULL;
/* always take the fd_wqh lock before the fault_pending_wqh lock */
- spin_lock(&ctx->fd_wqh.lock);
+ spin_lock_irq(&ctx->fd_wqh.lock);
__add_wait_queue(&ctx->fd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -1112,13 +1112,13 @@ static ssize_t userfaultfd_ctx_read(stru
ret = -EAGAIN;
break;
}
- spin_unlock(&ctx->fd_wqh.lock);
+ spin_unlock_irq(&ctx->fd_wqh.lock);
schedule();
- spin_lock(&ctx->fd_wqh.lock);
+ spin_lock_irq(&ctx->fd_wqh.lock);
}
__remove_wait_queue(&ctx->fd_wqh, &wait);
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->fd_wqh.lock);
+ spin_unlock_irq(&ctx->fd_wqh.lock);
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
_
uref->usage_index can be indirectly controlled by userspace, hence leading
to a potential exploitation of the Spectre variant 1 vulnerability.
This field is used as an array index by the hiddev_ioctl_usage() function,
when 'cmd' is either HIDIOCGCOLLECTIONINDEX, HIDIOCGUSAGES or
HIDIOCSUSAGES.
For cmd == HIDIOCGCOLLECTIONINDEX case, uref->usage_index is compared to
field->maxusage and then used as an index to dereference field->usage
array. The same thing happens to the cmd == HIDIOC{G,S}USAGES cases, where
uref->usage_index is checked against an array maximum value and then it is
used as an index in an array.
This is a summary of the HIDIOCGCOLLECTIONINDEX case, which matches the
traditional Spectre V1 first load:
copy_from_user(uref, user_arg, sizeof(*uref))
if (uref->usage_index >= field->maxusage)
goto inval;
i = field->usage[uref->usage_index].collection_index;
return i;
This patch fixes this by sanitizing field uref->usage_index before using it
to index field->usage (HIDIOCGCOLLECTIONINDEX) or field->value in
HIDIOC{G,S}USAGES arrays, thus, avoiding speculation in the first load.
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Cc: <stable(a)vger.kernel.org>
--
v2: Contemplate cmd == HIDIOC{G,S}USAGES case
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 23872d08308c..a746017fac17 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -512,14 +512,24 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
if (cmd == HIDIOCGCOLLECTIONINDEX) {
if (uref->usage_index >= field->maxusage)
goto inval;
+ uref->usage_index =
+ array_index_nospec(uref->usage_index,
+ field->maxusage);
} else if (uref->usage_index >= field->report_count)
goto inval;
}
- if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
- (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
- uref->usage_index + uref_multi->num_values > field->report_count))
- goto inval;
+ if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) {
+ if (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+ uref->usage_index + uref_multi->num_values >
+ field->report_count)
+ goto inval;
+
+ uref->usage_index =
+ array_index_nospec(uref->usage_index,
+ field->report_count -
+ uref_multi->num_values);
+ }
switch (cmd) {
case HIDIOCGUSAGE:
--
2.17.1
We are a team of 12 image editors and we are here to edit your photos.
We mainly provide images cut out and images clipping path, masking.
Such as for ecommerce photos, and it is also for beauty portraits and skin
images
We provide test editing if you send us 1 or 2 photos.
Thanks,
Katie
From: Sascha Hauer <s.hauer(a)pengutronix.de>
[ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ]
The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the
1.2GHz OPP. This makes the board work again with configs that have
cpufreq enabled like imx_v6_v7_defconfig on which the board stopped
working with the addition of cpufreq-dt support.
Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support")
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index 683dcbe27cbd..8c11190c5218 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -130,6 +130,17 @@
};
};
+&cpu0 {
+ /* CPU rated to 1GHz, not 1.2GHz as per the default settings */
+ operating-points = <
+ /* kHz uV */
+ 166666 850000
+ 400000 900000
+ 800000 1050000
+ 1000000 1200000
+ >;
+};
+
&esdhc1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_esdhc1>;
--
2.17.1
From: Mike Snitzer <snitzer(a)redhat.com>
[ Upstream commit 172c238612ebf81cabccc86b788c9209af591f61 ]
A thin-pool that is in out-of-data-space (OODS) mode may transition back
to write mode -- without the admin adding more space to the thin-pool --
if/when blocks are released (either by deleting thin devices or
discarding provisioned blocks).
But as part of the thin-pool's earlier transition to out-of-data-space
mode the thin-pool may have set the 'error_if_no_space' flag to true if
the no_space_timeout expires without more space having been made
available. That implementation detail, of changing the pool's
error_if_no_space setting, needs to be reset back to the default that
the user specified when the thin-pool's table was loaded.
Otherwise we'll drop the user requested behaviour on the floor when this
out-of-data-space to write mode transition occurs.
Reported-by: Vivek Goyal <vgoyal(a)redhat.com>
Signed-off-by: Mike Snitzer <snitzer(a)redhat.com>
Acked-by: Joe Thornber <ejt(a)redhat.com>
Fixes: 2c43fd26e4 ("dm thin: fix missing out-of-data-space to write mode transition if blocks are released")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/md/dm-thin.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 68c7102a64c8..936c57b57539 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1909,6 +1909,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
case PM_WRITE:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "write");
+ pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
dm_pool_metadata_read_write(pool->pmd);
pool->process_bio = process_bio;
pool->process_discard = process_discard;
--
2.17.1
From: Eric Biggers <ebiggers(a)google.com>
[ Upstream commit d636bd9f12a66ea3775c9fabbf3f8e118253467a ]
In join_session_keyring(), if install_session_keyring_to_cred() were to
fail, we would leak the keyring reference, just like in the bug fixed by
commit 23567fd052a9 ("KEYS: Fix keyring ref leak in
join_session_keyring()"). Fortunately this cannot happen currently, but
we really should be more careful. Do this by adding and using a new
error label at which the keyring reference is dropped.
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: David Howells <dhowells(a)redhat.com>
Signed-off-by: James Morris <james.l.morris(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
security/keys/process_keys.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index ac1d5b2b1626..a7095372701e 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -808,15 +808,14 @@ long join_session_keyring(const char *name)
ret = PTR_ERR(keyring);
goto error2;
} else if (keyring == new->session_keyring) {
- key_put(keyring);
ret = 0;
- goto error2;
+ goto error3;
}
/* we've got a keyring - now to install it */
ret = install_session_keyring_to_cred(new, keyring);
if (ret < 0)
- goto error2;
+ goto error3;
commit_creds(new);
mutex_unlock(&key_session_mutex);
@@ -826,6 +825,8 @@ long join_session_keyring(const char *name)
okay:
return ret;
+error3:
+ key_put(keyring);
error2:
mutex_unlock(&key_session_mutex);
error:
--
2.17.1
Dear stable(a)vger.kernel.org ,
I am sorry for invading your privacy because we do not know each
other
but I am contacting you because of my late client who died
without a
will.
This is to notify you that your are the beneficiary to the
bequest of
the sum of £10,500,000.00 GBP in the intent of my deceased
client.
If you accept please forward your full names, current address and
your
direct cell for the court documentations and so that we can
obtain the
probate division of court papers required for you to claim the
funds
from the holding bank.
Thanks,
Steven Walter
Disclaimer: Adobe® reserves the rights to protect sensitive
documents
against fraudsters and spyware. Adobe® secured files are
encrypted with
the receivers' email and can only be viewed by the recipient. For
more
details visit http://www.adobe.com/legal/terms.html for details.
Dear stable(a)vger.kernel.org ,
I am sorry for invading your privacy because we do not know each
other
but I am contacting you because of my late client who died
without a
will.
This is to notify you that your are the beneficiary to the
bequest of
the sum of £10,500,000.00 GBP in the intent of my deceased
client.
If you accept please forward your full names, current address and
your
direct cell for the court documentations and so that we can
obtain the
probate division of court papers required for you to claim the
funds
from the holding bank.
Thanks,
Steven Walter
Disclaimer: Adobe® reserves the rights to protect sensitive
documents
against fraudsters and spyware. Adobe® secured files are
encrypted with
the receivers' email and can only be viewed by the recipient. For
more
details visit http://www.adobe.com/legal/terms.html for details.
Dear stable(a)vger.kernel.org ,
I am sorry for invading your privacy because we do not know each
other
but I am contacting you because of my late client who died
without a
will.
This is to notify you that your are the beneficiary to the
bequest of
the sum of £10,500,000.00 GBP in the intent of my deceased
client.
If you accept please forward your full names, current address and
your
direct cell for the court documentations and so that we can
obtain the
probate division of court papers required for you to claim the
funds
from the holding bank.
Thanks,
Steven Walter
Disclaimer: Adobe® reserves the rights to protect sensitive
documents
against fraudsters and spyware. Adobe® secured files are
encrypted with
the receivers' email and can only be viewed by the recipient. For
more
details visit http://www.adobe.com/legal/terms.html for details.
Dear stable(a)vger.kernel.org ,
I am sorry for invading your privacy because we do not know each
other
but I am contacting you because of my late client who died
without a
will.
This is to notify you that your are the beneficiary to the
bequest of
the sum of £10,500,000.00 GBP in the intent of my deceased
client.
If you accept please forward your full names, current address and
your
direct cell for the court documentations and so that we can
obtain the
probate division of court papers required for you to claim the
funds
from the holding bank.
Thanks,
Steven Walter
Disclaimer: Adobe® reserves the rights to protect sensitive
documents
against fraudsters and spyware. Adobe® secured files are
encrypted with
the receivers' email and can only be viewed by the recipient. For
more
details visit http://www.adobe.com/legal/terms.html for details.
This commit fixes incorrect property because it was different
from the actual.
The parameters of '#address-cells' and '#size-cells' were removed,
and 'interrupts', 'pinctrl-names' and 'pinctrl-0' were added.
Fixes: 4dcd5c2781f3 ("spi: add DT bindings for UniPhier SPI controller")
Signed-off-by: Keiji Hayashibara <hayashibara.keiji(a)socionext.com>
---
Documentation/devicetree/bindings/spi/spi-uniphier.txt | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/Documentation/devicetree/bindings/spi/spi-uniphier.txt b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
index 504a4ec..b04e66a 100644
--- a/Documentation/devicetree/bindings/spi/spi-uniphier.txt
+++ b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
@@ -5,18 +5,20 @@ UniPhier SoCs have SCSSI which supports SPI single channel.
Required properties:
- compatible: should be "socionext,uniphier-scssi"
- reg: address and length of the spi master registers
- - #address-cells: must be <1>, see spi-bus.txt
- - #size-cells: must be <0>, see spi-bus.txt
- - clocks: A phandle to the clock for the device.
- - resets: A phandle to the reset control for the device.
+ - interrupts: a single interrupt specifier
+ - pinctrl-names: should be "default"
+ - pinctrl-0: pin control state for the default mode
+ - clocks: a phandle to the clock for the device
+ - resets: a phandle to the reset control for the device
Example:
spi0: spi@54006000 {
compatible = "socionext,uniphier-scssi";
reg = <0x54006000 0x100>;
- #address-cells = <1>;
- #size-cells = <0>;
+ interrupts = <0 39 4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi0>;
clocks = <&peri_clk 11>;
resets = <&peri_rst 11>;
};
--
2.7.4
From: Andi Kleen <ak(a)linux.intel.com>
The Intel microcode revision space is unsigned. Inside Intel there are special
microcode revisions that have the highest bit set, and they are considered to have
a higher revision than any microcodes that don't have this bit set.
The function comparing the microcode revision in the Linux driver compares
u32 with int, which ends up being signed extended to long on 64bit
systems. This results in these highest bit set microcode revision not loading
because their revision appears negative and smaller than the
existing microcode.
Change the comparison to unsigned. With that the loading works
as expected.
Cc: stable(a)vger.kernel.org # Any supported stable
Signed-off-by: Andi Kleen <ak(a)linux.intel.com>
--
v2: White space changes.
v3: Be more verbose
---
arch/x86/kernel/cpu/microcode/intel.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 16936a24795c..e54d402500d3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -93,7 +93,8 @@ static int find_matching_signature(void *mc, unsigned int csig, int cpf)
/*
* Returns 1 if update has been found, 0 otherwise.
*/
-static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev)
+static int has_newer_microcode(void *mc, unsigned int csig, int cpf,
+ unsigned new_rev)
{
struct microcode_header_intel *mc_hdr = mc;
--
2.17.2
From: Dave Jiang <dave.jiang(a)intel.com>
The numa_emulation() routine in the 'uniform' case walks through all the
physical 'memblk' instances and divides them into N emulated nodes with
split_nodes_size_interleave_uniform(). As each physical node is consumed
it is removed from the physical memblk array in the
numa_remove_memblk_from() helper. Since
split_nodes_size_interleave_uniform() handles advancing the array as the
'memblk' is consumed it is expected that the base of the array is always
specified as the argument.
Otherwise, on multi-socket (> 2) configurations the uniform-split
capability can generate an invalid numa configuration leading to boot
failures with signatures like the following:
rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
Sending NMI from CPU 0 to CPUs 2:
NMI backtrace for cpu 2
CPU: 2 PID: 1332 Comm: pgdatinit0 Not tainted 4.19.0-rc8-next-20181019-baseline #59
RIP: 0010:__init_single_page.isra.74+0x81/0x90
[..]
Call Trace:
deferred_init_pages+0xaa/0xe3
deferred_init_memmap+0x18f/0x318
kthread+0xf8/0x130
? deferred_free_pages.isra.105+0xc9/0xc9
? kthread_stop+0x110/0x110
ret_from_fork+0x35/0x40
Cc: x86(a)kernel.org
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Fixes: 1f6a2c6d9f121 ("x86/numa_emulation: Introduce uniform split capability")
Signed-off-by: Dave Jiang <dave.jiang(a)intel.com>
Tested-by: Alexander Duyck <alexander.h.duyck(a)linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
Changes since v2: https://lore.kernel.org/patchwork/patch/988541/
* Update the changelog with details from testing by Alex
arch/x86/mm/numa_emulation.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index b54d52a2d00a..d71d72cf6c66 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -400,9 +400,17 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
ret = -1;
for_each_node_mask(i, physnode_mask) {
+ /*
+ * The reason we pass in blk[0] is due to
+ * numa_remove_memblk_from() called by
+ * emu_setup_memblk() will delete entry 0
+ * and then move everything else up in the pi.blk
+ * array. Therefore we should always be looking
+ * at blk[0].
+ */
ret = split_nodes_size_interleave_uniform(&ei, &pi,
- pi.blk[i].start, pi.blk[i].end, 0,
- n, &pi.blk[i], nid);
+ pi.blk[0].start, pi.blk[0].end, 0,
+ n, &pi.blk[0], nid);
if (ret < 0)
break;
if (ret < n) {
The mce handler for 'nfit' devices is called for memory errors on a
Non-Volatile DIMM, and adds the error location to a 'badblocks' list.
This list is used by the various NVDIMM drivers to avoid consuming known
poison locations during IO.
The mce handler gets called for both corrected and uncorrectable errors.
Until now, both kinds of errors have been added to the badblocks list.
However, corrected memory errors indicate that the problem has already
been fixed by hardware, and the resulting interrupt is merely a
notification to Linux. As far as future accesses to that location are
concerned, it is perfectly fine to use, and thus doesn't need to be
included in the above badblocks list.
Add a check in the nfit mce handler to filter out corrected mce events,
and only process uncorrectable errors.
Reported-by: Omar Avelar <omar.avelar(a)intel.com>
Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Cc: stable(a)vger.kernel.org
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 3 ++-
drivers/acpi/nfit/mce.c | 4 ++--
3 files changed, 5 insertions(+), 3 deletions(-)
v2: Reword the changelog to explain the motivation for this patch better
(Borisalv)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3a17107594c8..3111b3cee2ee 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -216,6 +216,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s
int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
+bool mce_is_correctable(struct mce *m);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 953b3ce92dcc..27015948bc41 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
-static bool mce_is_correctable(struct mce *m)
+bool mce_is_correctable(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
return false;
@@ -544,6 +544,7 @@ static bool mce_is_correctable(struct mce *m)
return true;
}
+EXPORT_SYMBOL_GPL(mce_is_correctable);
static bool cec_add_mce(struct mce *m)
{
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index e9626bf6ca29..7a51707f87e9 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
struct acpi_nfit_desc *acpi_desc;
struct nfit_spa *nfit_spa;
- /* We only care about memory errors */
- if (!mce_is_memory_error(mce))
+ /* We only care about uncorrectable memory errors */
+ if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
return NOTIFY_DONE;
/*
--
2.17.1
This is required for OpenWrt Project to result the flash write issue as
below patche.
<https://git.openwrt.org/?p=openwrt/openwrt.git;a=commitdiff;h=ddc11c3932c7b…>
Also the original patch in OpenWRT is below.
<https://github.com/openwrt/openwrt/blob/v18.06.0/target/linux/ar71xx/patche…>
So change to use chip_good() instead of chip_ready().
The reason to use chip_good() is that just actually fix the issue.
And also in the past I had fixed the erase function also as same way by the
patch below.
<https://patchwork.ozlabs.org/patch/922656/>
Note: The reason for the patch for erase is same.
In my understanding the chip_ready() is just checked the value twice from
flash.
So I think that sometimes incorrect value is read twice and it is depended
on the flash device behavior but not sure..
So change to use chip_good() instead of chip_ready().
Signed-off-by: Tokunori Ikegami <ikegami(a)allied-telesis.co.jp>
Signed-off-by: Hauke Mehrtens <hauke(a)hauke-m.de>
Signed-off-by: Koen Vandeputte <koen.vandeputte(a)ncentric.com>
Signed-off-by: Fabio Bettoni <fbettoni(a)gmail.com>
Co-Developed-by: Hauke Mehrtens <hauke(a)hauke-m.de>
Co-Developed-by: Koen Vandeputte <koen.vandeputte(a)ncentric.com>
Co-Developed-by: Fabio Bettoni <fbettoni(a)gmail.com>
Reported-by: Fabio Bettoni <fbettoni(a)gmail.com>
Cc: Chris Packham <chris.packham(a)alliedtelesis.co.nz>
Cc: Joakim Tjernlund <Joakim.Tjernlund(a)infinera.com>
Cc: Boris Brezillon <boris.brezillon(a)free-electrons.com>
Cc: linux-mtd(a)lists.infradead.org
Cc: stable(a)vger.kernel.org
---
Changes since v1:
- Just update the commit message.
drivers/mtd/chips/cfi_cmdset_0002.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 72428b6bfc47..251c9e1675bd 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1627,31 +1627,37 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
continue;
}
- if (time_after(jiffies, timeo) && !chip_ready(map, adr)){
+ if (chip_good(map, adr, datum))
+ break;
+
+ if (time_after(jiffies, timeo)){
xip_enable(map, chip, adr);
printk(KERN_WARNING "MTD %s(): software timeout\n", __func__);
xip_disable(map, chip, adr);
+ ret = -EIO;
break;
}
- if (chip_ready(map, adr))
- break;
-
/* Latency issues. Drop the lock, wait a while and retry */
UDELAY(map, chip, adr, 1);
}
+
/* Did we succeed? */
- if (!chip_good(map, adr, datum)) {
+ if (ret) {
/* reset on all failures. */
map_write(map, CMD(0xF0), chip->start);
/* FIXME - should have reset delay before continuing */
- if (++retry_cnt <= MAX_RETRIES)
+ if (++retry_cnt <= MAX_RETRIES) {
+ ret = 0;
goto retry;
+ }
ret = -EIO;
}
+
xip_enable(map, chip, adr);
+
op_done:
if (mode == FL_OTP_WRITE)
otp_exit(map, chip, adr, map_bankwidth(map));
--
2.18.0
Dear friend,
Greetings,
I am Abel Brent, a NATO soldier serving in Afghanistan. I and my
comrades we are seeking your assistance to help us receive/invest
our funds in your country in any lucrative business. Please if
this proposal is acceptable by you, kindly respond back to me for
more details.
Thanks and waiting to hear from you
Abel.
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
We're no longer programming any watermarks when we're disabling
a pipe. That means ilk_wm_merge() & co. will keep considering
the any pipe that is getting disabled as still enabled. Thus we
either get no LP1+ watermakrs (ilk-ivb), or we get suboptimal
ones (hsw-bdw).
This seems to have been broken by commit b6b178a77210 ("drm/i915:
Calculate ironlake intermediate watermarks correctly, v2."). Before
that we apparently had some difference between the intermediate
and optimal watermarks and so we would program the optiomal ones.
Now intermediate and optimal are identical for disabled pipes
and so we don't program either.
Fix this by programming the intermediate watermarks even for
disabled pipes. We were already doing that for skl+. We'll
leave out gmch platforms for now since those do the merging
in a different manner and should work as is. We'll want to
unify this eventually, but play it safe for now and just put
in a FIXME.
Cc: stable(a)vger.kernel.org
Cc: Matt Roper <matthew.d.roper(a)intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Fixes: b6b178a77210 ("drm/i915: Calculate ironlake intermediate watermarks correctly, v2.")
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/intel_display.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fe045abb6472..1e963dcebf2d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12818,17 +12818,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
intel_check_cpu_fifo_underruns(dev_priv);
intel_check_pch_fifo_underruns(dev_priv);
- if (!new_crtc_state->active) {
- /*
- * Make sure we don't call initial_watermarks
- * for ILK-style watermark updates.
- *
- * No clue what this is supposed to achieve.
- */
- if (INTEL_GEN(dev_priv) >= 9)
- dev_priv->display.initial_watermarks(intel_state,
- new_intel_crtc_state);
- }
+ /* FIXME unify this for all platforms */
+ if (!new_crtc_state->active &&
+ !HAS_GMCH_DISPLAY(dev_priv) &&
+ dev_priv->display.initial_watermarks)
+ dev_priv->display.initial_watermarks(intel_state,
+ new_intel_crtc_state);
}
}
--
2.18.1
A Xen PVH guest has no associated qemu device model, so trying to
unplug any emulated devices is making no sense at all.
Bail out early from xen_unplug_emulated_devices() when running as PVH
guest. This will avoid issuing the boot message:
[ 0.000000] Xen Platform PCI: unrecognised magic value
Cc: <stable(a)vger.kernel.org> # 4.11
Signed-off-by: Juergen Gross <jgross(a)suse.com>
---
arch/x86/xen/platform-pci-unplug.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 66ab96a4e2b3..96d7f7d39cb9 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -134,6 +134,10 @@ void xen_unplug_emulated_devices(void)
{
int r;
+ /* PVH guests don't have emulated devices. */
+ if (xen_pvh_domain())
+ return;
+
/* user explicitly requested no unplug */
if (xen_emul_unplug & XEN_UNPLUG_NEVER)
return;
--
2.16.4
If there's no entry to drop in bucket that corresponds to the hash,
early_drop() should look for it in other buckets. But since it increments
hash instead of bucket number, it actually looks in the same bucket 8
times: hsize is 16k by default (14 bits) and hash is 32-bit value, so
reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in
most cases.
Fix it by increasing bucket number instead of hash and rename _hash
to bucket to avoid future confusion.
Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic")
Cc: <stable(a)vger.kernel.org> # v4.7+
Signed-off-by: Vasily Khoruzhick <vasilykh(a)arista.com>
---
net/netfilter/nf_conntrack_core.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ca1168d67fac..a04af246b184 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net,
return drops;
}
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
{
unsigned int i;
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned int hash, hsize, drops;
+ unsigned int bucket, hsize, drops;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = reciprocal_scale(_hash++, hsize);
+ if (!i)
+ bucket = reciprocal_scale(hash, hsize);
+ else
+ bucket = (bucket + 1) % hsize;
- drops = early_drop_list(net, &ct_hash[hash]);
+ drops = early_drop_list(net, &ct_hash[bucket]);
rcu_read_unlock();
if (drops) {
--
2.19.1
The patch titled
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
has been added to the -mm tree. Its filename is
mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-hmm-fix-race-between-hmm_mirror…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-hmm-fix-race-between-hmm_mirror…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/hmm: fix race between hmm_mirror_unregister() and mmu_notifier callback
In hmm_mirror_unregister(), mm->hmm is set to NULL and then
mmu_notifier_unregister_no_release() is called. That creates a small
window where mmu_notifier can call mmu_notifier_ops with mm->hmm equal to
NULL. Fix this by first unregistering mmu notifier callbacks and then
setting mm->hmm to NULL.
Similarly in hmm_register(), set mm->hmm before registering mmu_notifier
callbacks so callback functions always see mm->hmm set.
Link: http://lkml.kernel.org/r/20181019160442.18723-4-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hmm.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
--- a/mm/hmm.c~mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback
+++ a/mm/hmm.c
@@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct m
spin_lock_init(&hmm->lock);
hmm->mm = mm;
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
- kfree(hmm);
- return NULL;
- }
-
spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
@@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct m
cleanup = true;
spin_unlock(&mm->page_table_lock);
- if (cleanup) {
- mmu_notifier_unregister(&hmm->mmu_notifier, mm);
- kfree(hmm);
- }
+ if (cleanup)
+ goto error;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
+ goto error_mm;
return mm->hmm;
+
+error_mm:
+ spin_lock(&mm->page_table_lock);
+ if (mm->hmm == hmm)
+ mm->hmm = NULL;
+ spin_unlock(&mm->page_table_lock);
+error:
+ kfree(hmm);
+ return NULL;
}
void hmm_mm_destroy(struct mm_struct *mm)
@@ -278,12 +283,13 @@ void hmm_mirror_unregister(struct hmm_mi
if (!should_unregister || mm == NULL)
return;
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
spin_lock(&mm->page_table_lock);
if (mm->hmm == hmm)
mm->hmm = NULL;
spin_unlock(&mm->page_table_lock);
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
kfree(hmm);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3.patch
mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback.patch
The patch titled
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
has been added to the -mm tree. Its filename is
mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-rmap-map_pte-was-not-handling-p…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-rmap-map_pte-was-not-handling-p…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled in
check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back to any
private ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead to the
CPU constantly page fault looping on the special swap entry.
Link: http://lkml.kernel.org/r/20181019160442.18723-3-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_vma_mapped.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
--- a/mm/page_vma_mapped.c~mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3
+++ a/mm/page_vma_mapped.c
@@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapp
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
- if (!pte_present(*pvmw->pte))
+ /*
+ * We get here when we are trying to unmap a private
+ * device page from the process address space. Such
+ * page is not CPU accessible and thus is mapped as
+ * a special swap entry, nonetheless it still does
+ * count as a valid regular mapping for the page (and
+ * is accounted as such in page maps count).
+ *
+ * So handle this special case as if it was a normal
+ * page mapping ie lock CPU page table and returns
+ * true.
+ *
+ * For more details on device private memory see HMM
+ * (include/linux/hmm.h or mm/hmm.c).
+ */
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
+ return false;
+ } else if (!pte_present(*pvmw->pte))
return false;
}
}
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
mm-rmap-map_pte-was-not-handling-private-zone_device-page-properly-v3.patch
mm-hmm-fix-race-between-hmm_mirror_unregister-and-mmu_notifier-callback.patch
From: Ralph Campbell <rcampbell(a)nvidia.com>
In hmm_mirror_unregister(), mm->hmm is set to NULL and then
mmu_notifier_unregister_no_release() is called. That creates a small
window where mmu_notifier can call mmu_notifier_ops with mm->hmm equal
to NULL. Fix this by first unregistering mmu notifier callbacks and
then setting mm->hmm to NULL.
Similarly in hmm_register(), set mm->hmm before registering mmu_notifier
callbacks so callback functions always see mm->hmm set.
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
---
mm/hmm.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c
index 9a068a1da487..a16678d08127 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct mm_struct *mm)
spin_lock_init(&hmm->lock);
hmm->mm = mm;
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
- kfree(hmm);
- return NULL;
- }
-
spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
@@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct mm_struct *mm)
cleanup = true;
spin_unlock(&mm->page_table_lock);
- if (cleanup) {
- mmu_notifier_unregister(&hmm->mmu_notifier, mm);
- kfree(hmm);
- }
+ if (cleanup)
+ goto error;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
+ goto error_mm;
return mm->hmm;
+
+error_mm:
+ spin_lock(&mm->page_table_lock);
+ if (mm->hmm == hmm)
+ mm->hmm = NULL;
+ spin_unlock(&mm->page_table_lock);
+error:
+ kfree(hmm);
+ return NULL;
}
void hmm_mm_destroy(struct mm_struct *mm)
@@ -278,12 +283,13 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror)
if (!should_unregister || mm == NULL)
return;
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
spin_lock(&mm->page_table_lock);
if (mm->hmm == hmm)
mm->hmm = NULL;
spin_unlock(&mm->page_table_lock);
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
kfree(hmm);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
--
2.17.2
The patch titled
Subject: hugetlbfs: dirty pages as they are added to pagecache
has been removed from the -mm tree. Its filename was
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache.patch
This patch was dropped because an alternative patch was merged
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: dirty pages as they are added to pagecache
Some test systems were experiencing negative huge page reserve counts and
incorrect file block counts. This was traced to /proc/sys/vm/drop_caches
removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs
explicit code removes the pages, the appropriate accounting is not
performed.
This can be recreated as follows:
fallocate -l 2M /dev/hugepages/foo
echo 1 > /proc/sys/vm/drop_caches
fallocate -l 2M /dev/hugepages/foo
grep -i huge /proc/meminfo
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: 2047
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 4194304 kB
ls -lsh /dev/hugepages/foo
4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo
To address this issue, dirty pages as they are added to pagecache. This
can easily be reproduced with fallocate as shown above. Read faulted
pages will eventually end up being marked dirty. But there is a window
where they are clean and could be impacted by code such as drop_caches.
So, just dirty them all as they are added to the pagecache.
In addition, it makes little sense to even try to drop hugetlbfs pagecache
pages, so disable calls to these filesystems in drop_caches code.
Link: http://lkml.kernel.org/r/20181018041022.4529-1-mike.kravetz@oracle.com
Fixes: 70c3547e36f5 ("hugetlbfs: add hugetlbfs_fallocate()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/fs/drop_caches.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache
+++ a/fs/drop_caches.c
@@ -9,6 +9,7 @@
#include <linux/writeback.h>
#include <linux/sysctl.h>
#include <linux/gfp.h>
+#include <linux/magic.h>
#include "internal.h"
/* A global variable is a bit ugly, but it keeps the code simple */
@@ -18,6 +19,12 @@ static void drop_pagecache_sb(struct sup
{
struct inode *inode, *toput_inode = NULL;
+ /*
+ * It makes no sense to try and drop hugetlbfs page cache pages.
+ */
+ if (sb->s_magic == HUGETLBFS_MAGIC)
+ return;
+
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
--- a/mm/hugetlb.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache
+++ a/mm/hugetlb.c
@@ -3690,6 +3690,12 @@ int huge_add_to_page_cache(struct page *
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2.patch
The patch titled
Subject: hugetlbfs: dirty pages as they are added to pagecache
has been added to the -mm tree. Its filename is
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/hugetlbfs-dirty-pages-as-they-are-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/hugetlbfs-dirty-pages-as-they-are-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: dirty pages as they are added to pagecache
Some test systems were experiencing negative huge page reserve counts and
incorrect file block counts. This was traced to /proc/sys/vm/drop_caches
removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs
explicit code removes the pages, the appropriate accounting is not
performed.
This can be recreated as follows:
fallocate -l 2M /dev/hugepages/foo
echo 1 > /proc/sys/vm/drop_caches
fallocate -l 2M /dev/hugepages/foo
grep -i huge /proc/meminfo
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: 2047
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 4194304 kB
ls -lsh /dev/hugepages/foo
4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo
To address this issue, dirty pages as they are added to pagecache. This
can easily be reproduced with fallocate as shown above. Read faulted
pages will eventually end up being marked dirty. But there is a window
where they are clean and could be impacted by code such as drop_caches.
So, just dirty them all as they are added to the pagecache.
Link: http://lkml.kernel.org/r/b5be45b8-5afe-56cd-9482-28384699a049@oracle.com
Fixes: 6bda666a03f0 ("hugepages: fold find_or_alloc_pages into huge_no_page()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Acked-by: Mihcla Hocko <mhocko(a)suse.com>
Reviewed-by: Khalid Aziz <khalid.aziz(a)oracle.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/mm/hugetlb.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2
+++ a/mm/hugetlb.c
@@ -3690,6 +3690,12 @@ int huge_add_to_page_cache(struct page *
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache-v2.patch
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache.patch
We are an imaging team who can process 300+ images daily.
If you need any image editing service, please contact us today.
We do mainly images cut out and clipping path, masking.
Such as for your ecommerce photos, jewelry photos retouching, also it is
for beauty portraits and skin images
and wedding photos.
We provide test editing if you send some photos.
Thanks,
Kate
We are an imaging team who can process 300+ images daily.
If you need any image editing service, please contact us today.
We do mainly images cut out and clipping path, masking.
Such as for your ecommerce photos, jewelry photos retouching, also it is
for beauty portraits and skin images
and wedding photos.
We provide test editing if you send some photos.
Thanks,
Kate
First, rename out_no_locality to out_locality for bailing out on
both tpm_cmd_ready() and tpm_request_locality() failure.
Second, ignore the return value of go_to_idle() as it may override
the return value of the actual tpm operation, the go_to_idle() error
will be caught on any consequent command.
Last, fix the wrong 'goto out', that jumped back instead of forward.
Cc: stable(a)vger.kernel.org
Fixes: 627448e85c76 ("tpm: separate cmd_ready/go_idle from runtime_pm")
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
---
drivers/char/tpm/tpm-interface.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 129f640424b7..95db630dd722 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -477,13 +477,15 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip,
if (need_locality) {
rc = tpm_request_locality(chip, flags);
- if (rc < 0)
- goto out_no_locality;
+ if (rc < 0) {
+ need_locality = false;
+ goto out_locality;
+ }
}
rc = tpm_cmd_ready(chip, flags);
if (rc)
- goto out;
+ goto out_locality;
rc = tpm2_prepare_space(chip, space, ordinal, buf);
if (rc)
@@ -547,14 +549,13 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip,
dev_err(&chip->dev, "tpm2_commit_space: error %d\n", rc);
out:
- rc = tpm_go_idle(chip, flags);
- if (rc)
- goto out;
+ /* may fail but do not override previous error value in rc */
+ tpm_go_idle(chip, flags);
+out_locality:
if (need_locality)
tpm_relinquish_locality(chip, flags);
-out_no_locality:
if (chip->ops->clk_enable != NULL)
chip->ops->clk_enable(chip, false);
--
2.14.4
Some test systems were experiencing negative huge page reserve
counts and incorrect file block counts. This was traced to
/proc/sys/vm/drop_caches removing clean pages from hugetlbfs
file pagecaches. When non-hugetlbfs explicit code removes the
pages, the appropriate accounting is not performed.
This can be recreated as follows:
fallocate -l 2M /dev/hugepages/foo
echo 1 > /proc/sys/vm/drop_caches
fallocate -l 2M /dev/hugepages/foo
grep -i huge /proc/meminfo
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: 2047
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 4194304 kB
ls -lsh /dev/hugepages/foo
4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo
To address this issue, dirty pages as they are added to pagecache.
This can easily be reproduced with fallocate as shown above. Read
faulted pages will eventually end up being marked dirty. But there
is a window where they are clean and could be impacted by code such
as drop_caches. So, just dirty them all as they are added to the
pagecache.
In addition, it makes little sense to even try to drop hugetlbfs
pagecache pages, so disable calls to these filesystems in drop_caches
code.
Fixes: 70c3547e36f5 ("hugetlbfs: add hugetlbfs_fallocate()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
---
fs/drop_caches.c | 7 +++++++
mm/hugetlb.c | 6 ++++++
2 files changed, 13 insertions(+)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 82377017130f..b72c5bc502a8 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -9,6 +9,7 @@
#include <linux/writeback.h>
#include <linux/sysctl.h>
#include <linux/gfp.h>
+#include <linux/magic.h>
#include "internal.h"
/* A global variable is a bit ugly, but it keeps the code simple */
@@ -18,6 +19,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
struct inode *inode, *toput_inode = NULL;
+ /*
+ * It makes no sense to try and drop hugetlbfs page cache pages.
+ */
+ if (sb->s_magic == HUGETLBFS_MAGIC)
+ return;
+
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5c390f5a5207..7b5c0ad9a6bd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3690,6 +3690,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
--
2.17.2
This reverts commit 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3.
splice_dentry() is used by three places. For two places, req->r_dentry
is passed to splice_dentry(). In the case of error, req->r_dentry does
not get updated. So splice_dentry() should not drop reference.
Cc: stable(a)vger.kernel.org #4.18
Signed-off-by: "Yan, Zheng" <zyan(a)redhat.com>
---
fs/ceph/inode.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index c6bbb7aa99e4..375924b2bc86 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1140,7 +1140,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
- dput(dn);
dn = realdn; /* note realdn contains the error */
goto out;
} else if (realdn) {
--
2.17.1
From: "H. Peter Anvin" <hpa(a)zytor.com>
On architectures with CBAUDEX == 0 (Alpha and PowerPC), the code in tty_baudrate.c does
not do any limit checking on the tty_baudrate[] array, and in fact a
buffer overrun is possible on both architectures. Add a limit check to
prevent that situation.
This will be followed by a much bigger cleanup/simplification patch.
Signed-off-by: H. Peter Anvin (Intel) <hpa(a)zytor.com>
Requested-by: Cc: Johan Hovold <johan(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Jiri Slaby <jslaby(a)suse.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Richard Henderson <rth(a)twiddle.net>
Cc: Ivan Kokshaysky <ink(a)jurassic.park.msu.ru>
Cc: Matt Turner <mattst88(a)gmail.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Kate Stewart <kstewart(a)linuxfoundation.org>
Cc: Philippe Ombredanne <pombredanne(a)nexb.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Eugene Syromiatnikov <esyr(a)redhat.com>
Cc: <linux-alpha(a)vger.kernel.org>
Cc: <linux-serial(a)vger.kernel.org>
Cc: Alan Cox <alan(a)lxorguk.ukuu.org.uk>
Cc: <stable(a)vger.kernel.org>
---
drivers/tty/tty_baudrate.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c
index 7576ceace571..f438eaa68246 100644
--- a/drivers/tty/tty_baudrate.c
+++ b/drivers/tty/tty_baudrate.c
@@ -77,7 +77,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios)
else
cbaud += 15;
}
- return baud_table[cbaud];
+ return cbaud >= n_baud_table ? 0 : baud_table[cbaud];
}
EXPORT_SYMBOL(tty_termios_baud_rate);
@@ -113,7 +113,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios)
else
cbaud += 15;
}
- return baud_table[cbaud];
+ return cbaud >= n_baud_table ? 0 : baud_table[cbaud];
#else /* IBSHIFT */
return tty_termios_baud_rate(termios);
#endif /* IBSHIFT */
--
2.14.4
Commit ea7e0480a4b695d0aa6b3 ("MIPS: VDSO: Always map near top of user
memory") set VDSO_RANDOMIZE_SIZE to 256MB for 64bit kernel. But take a
look at arch/mips/mm/mmap.c we can see that MIN_GAP is 128MB, which
means the mmap_base may be at (user_address_top - 128MB). This make the
stack be surrounded by mmaped areas, then stack expanding fails and
causes a segmentation fault. Therefore, VDSO_RANDOMIZE_SIZE should be
less than MIN_GAP and this patch reduce it to 64MB.
By the way, not all VDSO_RANDOMIZE_SIZE can be used for vdso_base()
randomization because VDSO need some room to locate itself (in this
patch we reserve 64KB).
Cc: stable(a)vger.kernel.org
Fixes: ea7e0480a4b695d0aa ("MIPS: VDSO: Always map near top of user memory")
Signed-off-by: Huacai Chen <chenhc(a)lemote.com>
---
arch/mips/include/asm/processor.h | 2 +-
arch/mips/kernel/vdso.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 49d6046..c373eb6 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -81,7 +81,7 @@ extern unsigned int vced_count, vcei_count;
#endif
-#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M)
+#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M)
extern unsigned long mips_stack_top(void);
#define STACK_TOP mips_stack_top()
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 48a9c6b..d6232d9 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -106,7 +106,7 @@ static unsigned long vdso_base(void)
base = STACK_TOP + PAGE_SIZE;
if (current->flags & PF_RANDOMIZE) {
- base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1);
+ base += get_random_int() & (VDSO_RANDOMIZE_SIZE - SZ_64K - 1);
base = PAGE_ALIGN(base);
}
--
2.7.0
From: Sascha Hauer <s.hauer(a)pengutronix.de>
[ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ]
The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the
1.2GHz OPP. This makes the board work again with configs that have
cpufreq enabled like imx_v6_v7_defconfig on which the board stopped
working with the addition of cpufreq-dt support.
Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support")
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index ef7658a78836..c1548adee789 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -123,6 +123,17 @@
};
};
+&cpu0 {
+ /* CPU rated to 1GHz, not 1.2GHz as per the default settings */
+ operating-points = <
+ /* kHz uV */
+ 166666 850000
+ 400000 900000
+ 800000 1050000
+ 1000000 1200000
+ >;
+};
+
&esdhc1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_esdhc1>;
--
2.17.1
UDL doesn't support vblank functionality so we don't need to
initialize vblank here (we are able to send page flip
completion events even without vblank initialization)
Moreover current drm_vblank_init call with num_crtcs > 0 causes
sending DRM_EVENT_FLIP_COMPLETE event with zero timestamp every
time. This breaks userspace apps (for example weston) which
relies on timestamp value.
Cc: stable(a)vger.kernel.org
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev(a)synopsys.com>
---
drivers/gpu/drm/udl/udl_main.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index f455f095a146..1b014d92855b 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -350,15 +350,10 @@ int udl_driver_load(struct drm_device *dev, unsigned long flags)
if (ret)
goto err;
- ret = drm_vblank_init(dev, 1);
- if (ret)
- goto err_fb;
-
drm_kms_helper_poll_init(dev);
return 0;
-err_fb:
- udl_fbdev_cleanup(dev);
+
err:
if (udl->urbs.count)
udl_free_urb_list(dev);
--
2.14.4
This is the start of the stable review cycle for the 4.4.162 release.
There are 48 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Oct 20 17:54:03 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.162-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.162-rc1
Long Li <longli(a)microsoft.com>
HV: properly delay KVP packets when negotiation is in progress
Vitaly Kuznetsov <vkuznets(a)redhat.com>
Drivers: hv: kvp: fix IP Failover
K. Y. Srinivasan <kys(a)microsoft.com>
Drivers: hv: util: Pass the channel information during the init call
K. Y. Srinivasan <kys(a)microsoft.com>
Drivers: hv: utils: Invoke the poll function after handshake
Stephen Warren <swarren(a)nvidia.com>
usb: gadget: serial: fix oops when data rx'd after close
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Get rid of toolchain check
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Avoid possible userspace r1 corruption on reclaim
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Fix userspace r13 corruption
James Cowgill <jcowgill(a)debian.org>
RISC-V: include linux/ftrace.h in asm-prototypes.h
Nathan Chancellor <natechancellor(a)gmail.com>
net/mlx4: Use cpumask_available for eq->affinity_mask
Michael Schmitz <schmitzmic(a)gmail.com>
Input: atakbd - fix Atari CapsLock behaviour
Andreas Schwab <schwab(a)linux-m68k.org>
Input: atakbd - fix Atari keymap
Keerthy <j-keerthy(a)ti.com>
clocksource/drivers/ti-32k: Add CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs
Jozef Balga <jozef.balga(a)gmail.com>
media: af9035: prevent buffer overflow on write
Andy Lutomirski <luto(a)kernel.org>
x86/fpu: Finish excising 'eagerfpu'
Rik van Riel <riel(a)redhat.com>
x86/fpu: Remove struct fpu::counter
Andy Lutomirski <luto(a)kernel.org>
x86/fpu: Remove use_eager_fpu()
Paolo Bonzini <pbonzini(a)redhat.com>
KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch
Eric Dumazet <edumazet(a)google.com>
rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096
Florian Fainelli <f.fainelli(a)gmail.com>
net: systemport: Fix wake-up interrupt race during resume
Maxime Chevallier <maxime.chevallier(a)bootlin.com>
net: mvpp2: Extract the correct ethtype from the skb for tx csum offload
Ido Schimmel <idosch(a)mellanox.com>
team: Forbid enslaving team device to itself
Shahed Shaikh <shahed.shaikh(a)cavium.com>
qlcnic: fix Tx descriptor corruption on 82xx devices
Yu Zhao <yuzhao(a)google.com>
net/usb: cancel pending work when unbinding smsc75xx
Sean Tranchetti <stranche(a)codeaurora.org>
netlabel: check for IPV4MASK in addrinfo_get
Jeff Barnhill <0xeffeff(a)gmail.com>
net/ipv6: Display all addresses in output of /proc/net/if_inet6
Sabrina Dubroca <sd(a)queasysnail.net>
net: ipv4: update fnhe_pmtu when first hop's MTU changes
Eric Dumazet <edumazet(a)google.com>
ipv4: fix use-after-free in ip_cmsg_recv_dstaddr()
Paolo Abeni <pabeni(a)redhat.com>
ip_tunnel: be careful when accessing the inner header
Paolo Abeni <pabeni(a)redhat.com>
ip6_tunnel: be careful when accessing the inner header
Mahesh Bandewar <maheshb(a)google.com>
bonding: avoid possible dead-lock
Michael Chan <michael.chan(a)broadcom.com>
bnxt_en: Fix TX timeout during netpoll.
Hou Tao <houtao1(a)huawei.com>
jffs2: return -ERANGE when xattr buffer is too small
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Don't print a warning when setting link state for disabled ports
Edgar Cherkasov <echerkasov(a)dev.rtsoft.ru>
i2c: i2c-scmi: fix for i2c_smbus_write_block_data
Adrian Hunter <adrian.hunter(a)intel.com>
perf script python: Fix export-to-postgresql.py occasional failure
Mikulas Patocka <mpatocka(a)redhat.com>
mach64: detect the dot clock divider correctly on sparc
Jann Horn <jannh(a)google.com>
mm/vmstat.c: fix outdated vmstat_text
Theodore Ts'o <tytso(a)mit.edu>
ext4: add corruption check in ext4_xattr_set_entry()
Amber Lin <Amber.Lin(a)amd.com>
drm/amdgpu: Fix SDMA HQD destroy error on gfx_v7
Nicolas Ferre <nicolas.ferre(a)microchip.com>
ARM: dts: at91: add new compatibility string for macb on sama5d3
Nicolas Ferre <nicolas.ferre(a)microchip.com>
net: macb: disable scatter-gather for macb on sama5d3
Jongsung Kim <neidhard.kim(a)lge.com>
stmmac: fix valid numbers of unicast filter entries
Yu Zhao <yuzhao(a)google.com>
sound: enable interrupt after dma buffer initialization
Tony Lindgren <tony(a)atomide.com>
mfd: omap-usb-host: Fix dts probe of children
Lei Yang <Lei.Yang(a)windriver.com>
selftests/efivarfs: add required kernel configs
Danny Smith <danny.smith(a)axis.com>
ASoC: sigmadsp: safeload should not have lower byte limit
Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
ASoC: wm8804: Add ACPI support
-------------
Diffstat:
Documentation/devicetree/bindings/net/macb.txt | 1 +
Documentation/kernel-parameters.txt | 5 --
Makefile | 4 +-
arch/arc/Makefile | 14 ----
arch/arm/boot/dts/sama5d3_emac.dtsi | 2 +-
arch/powerpc/kernel/tm.S | 20 +++++-
arch/riscv/include/asm/asm-prototypes.h | 7 ++
arch/x86/crypto/crc32c-intel_glue.c | 17 ++---
arch/x86/include/asm/cpufeatures.h | 1 -
arch/x86/include/asm/fpu/internal.h | 37 +----------
arch/x86/include/asm/fpu/types.h | 34 ----------
arch/x86/include/asm/kvm_host.h | 1 -
arch/x86/kernel/fpu/core.c | 41 ++----------
arch/x86/kernel/fpu/signal.c | 8 +--
arch/x86/kvm/cpuid.c | 5 +-
arch/x86/kvm/x86.c | 10 ---
drivers/clocksource/timer-ti-32k.c | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +-
drivers/hv/hv_fcopy.c | 2 +-
drivers/hv/hv_kvp.c | 40 +++++++++++-
drivers/hv/hv_snapshot.c | 4 +-
drivers/hv/hv_util.c | 1 +
drivers/hv/hyperv_vmbus.h | 5 ++
drivers/i2c/busses/i2c-scmi.c | 1 +
drivers/input/keyboard/atakbd.c | 74 ++++++++--------------
drivers/media/usb/dvb-usb-v2/af9035.c | 6 +-
drivers/mfd/omap-usb-host.c | 11 ++--
drivers/net/bonding/bond_main.c | 43 +++++--------
drivers/net/ethernet/broadcom/bcmsysport.c | 22 +++----
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++-
drivers/net/ethernet/cadence/macb.c | 8 +++
drivers/net/ethernet/marvell/mvpp2.c | 10 +--
drivers/net/ethernet/mellanox/mlx4/eq.c | 3 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 ++-
.../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 +-
.../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++--
.../net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 +-
drivers/net/team/team.c | 5 ++
drivers/net/usb/smsc75xx.c | 1 +
drivers/usb/gadget/function/u_serial.c | 2 +-
drivers/usb/host/xhci-hub.c | 18 +++---
drivers/video/fbdev/aty/atyfb.h | 3 +-
drivers/video/fbdev/aty/atyfb_base.c | 7 +-
drivers/video/fbdev/aty/mach64_ct.c | 10 +--
fs/ext4/xattr.c | 22 ++++---
fs/jffs2/xattr.c | 6 +-
include/linux/hyperv.h | 1 +
include/linux/netdevice.h | 7 ++
include/net/bonding.h | 7 +-
include/net/ip_fib.h | 1 +
mm/vmstat.c | 1 -
net/core/dev.c | 28 +++++++-
net/core/rtnetlink.c | 6 ++
net/ipv4/fib_frontend.c | 12 ++--
net/ipv4/fib_semantics.c | 50 +++++++++++++++
net/ipv4/ip_sockglue.c | 3 +-
net/ipv4/ip_tunnel.c | 9 +++
net/ipv6/addrconf.c | 4 +-
net/ipv6/ip6_tunnel.c | 13 +++-
net/netlabel/netlabel_unlabeled.c | 3 +-
sound/hda/hdac_controller.c | 8 ++-
sound/soc/codecs/sigmadsp.c | 3 +-
sound/soc/codecs/wm8804-i2c.c | 15 ++++-
tools/perf/scripts/python/export-to-postgresql.py | 9 +++
tools/testing/selftests/efivarfs/config | 1 +
67 files changed, 404 insertions(+), 340 deletions(-)
This is the start of the stable review cycle for the 4.14.78 release.
There are 41 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Oct 20 17:53:55 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.78-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.14.78-rc1
Michael J. Ruhl <michael.j.ruhl(a)intel.com>
IB/hfi1: Fix destroy_qp hang after a link down
Wolfram Sang <wsa+renesas(a)sang-engineering.com>
i2c: rcar: handle RXDMA HW behaviour on Gen3
Clint Taylor <clinton.a.taylor(a)intel.com>
drm/i915/glk: Add Quirk for GLK NUC HDMI port issues.
Dave Jiang <dave.jiang(a)intel.com>
mm: disallow mappings that conflict for devm_memremap_pages()
Gilad Ben-Yossef <gilad(a)benyossef.com>
staging: ccree: check DMA pool buf !NULL before free
Ville Syrjälä <ville.syrjala(a)linux.intel.com>
drm/i915: Nuke the LVDS lid notifier
Natanael Copa <ncopa(a)alpinelinux.org>
HID: quirks: fix support for Apple Magic Keyboards
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Don't set CROSS_COMPILE in arch's Makefile
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Get rid of toolchain check
Linus Torvalds <torvalds(a)linux-foundation.org>
mremap: properly flush TLB before releasing the page
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "vfs: fix freeze protection in mnt_want_write_file() for overlayfs"
Christophe Leroy <christophe.leroy(a)c-s.fr>
powerpc/lib/feature-fixups: use raw_patch_instruction()
Arindam Nath <arindam.nath(a)amd.com>
iommu/amd: Return devid as alias for ACPI HID devices
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Avoid possible userspace r1 corruption on reclaim
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Fix userspace r13 corruption
James Cowgill <jcowgill(a)debian.org>
RISC-V: include linux/ftrace.h in asm-prototypes.h
Tao Ren <taoren(a)fb.com>
clocksource/drivers/fttmr010: Fix set_next_event handler
Nathan Chancellor <natechancellor(a)gmail.com>
net/mlx4: Use cpumask_available for eq->affinity_mask
Johannes Thumshirn <jthumshirn(a)suse.de>
scsi: sd: don't crash the host on invalid commands
Wen Xiong <wenxiong(a)linux.vnet.ibm.com>
scsi: ipr: System hung while dlpar adding primary ipr adapter back
Alexandru Gheorghe <alexandru-cosmin.gheorghe(a)arm.com>
drm: mali-dp: Call drm_crtc_vblank_reset on device init
Jisheng Zhang <Jisheng.Zhang(a)synaptics.com>
PCI: dwc: Fix scheduling while atomic issues
Kazuya Mizuguchi <kazuya.mizuguchi.ks(a)renesas.com>
ravb: do not write 1 to reserved bits
Christian Lamparter <chunkeey(a)gmail.com>
net: emac: fix fixed-link setup for the RTL8363SB switch
Michael Schmitz <schmitzmic(a)gmail.com>
Input: atakbd - fix Atari CapsLock behaviour
Andreas Schwab <schwab(a)linux-m68k.org>
Input: atakbd - fix Atari keymap
Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
intel_th: pci: Add Ice Lake PCH support
Laura Abbott <labbott(a)redhat.com>
scsi: ibmvscsis: Ensure partition name is properly NUL terminated
Laura Abbott <labbott(a)redhat.com>
scsi: ibmvscsis: Fix a stringop-overflow warning
Keerthy <j-keerthy(a)ti.com>
clocksource/drivers/ti-32k: Add CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs
Marek Lindner <mareklindner(a)neomailbox.ch>
batman-adv: fix hardif_neigh refcount on queue_work() failure
Marek Lindner <mareklindner(a)neomailbox.ch>
batman-adv: fix backbone_gw refcount on queue_work() failure
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated tvlv handler
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated global TT entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated softif_vlan entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated nc_node entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated gateway_node entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Fix segfault when writing to sysfs elp_interval
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Fix segfault when writing to throughput_override
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Avoid probe ELP information leak
Jozef Balga <jozef.balga(a)gmail.com>
media: af9035: prevent buffer overflow on write
-------------
Diffstat:
Makefile | 4 +-
arch/arc/Makefile | 24 +-----
arch/powerpc/include/asm/code-patching.h | 1 +
arch/powerpc/kernel/tm.S | 20 ++++-
arch/powerpc/lib/code-patching.c | 4 +-
arch/powerpc/lib/feature-fixups.c | 8 +-
arch/riscv/include/asm/asm-prototypes.h | 7 ++
drivers/clocksource/timer-fttmr010.c | 18 ++--
drivers/clocksource/timer-ti-32k.c | 3 +
drivers/gpu/drm/arm/malidp_drv.c | 1 +
drivers/gpu/drm/i915/i915_drv.c | 10 ---
drivers/gpu/drm/i915/i915_drv.h | 9 +-
drivers/gpu/drm/i915/intel_ddi.c | 13 ++-
drivers/gpu/drm/i915/intel_display.c | 21 ++++-
drivers/gpu/drm/i915/intel_drv.h | 3 +-
drivers/gpu/drm/i915/intel_lvds.c | 136 +------------------------------
drivers/hid/hid-core.c | 3 +
drivers/hwtracing/intel_th/pci.c | 5 ++
drivers/i2c/busses/i2c-rcar.c | 54 +++++++++++-
drivers/infiniband/hw/hfi1/chip.c | 7 +-
drivers/infiniband/hw/hfi1/pio.c | 42 ++++++++--
drivers/infiniband/hw/hfi1/pio.h | 2 +
drivers/input/keyboard/atakbd.c | 74 +++++++----------
drivers/iommu/amd_iommu.c | 6 ++
drivers/media/usb/dvb-usb-v2/af9035.c | 6 +-
drivers/net/ethernet/ibm/emac/core.c | 15 ++--
drivers/net/ethernet/mellanox/mlx4/eq.c | 3 +-
drivers/net/ethernet/renesas/ravb.h | 5 ++
drivers/net/ethernet/renesas/ravb_main.c | 11 +--
drivers/net/ethernet/renesas/ravb_ptp.c | 2 +-
drivers/pci/dwc/pcie-designware.c | 8 +-
drivers/pci/dwc/pcie-designware.h | 3 +-
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 5 +-
drivers/scsi/ipr.c | 106 ++++++++++++++----------
drivers/scsi/ipr.h | 1 +
drivers/scsi/sd.c | 3 +-
drivers/staging/ccree/ssi_buffer_mgr.c | 3 +-
fs/namespace.c | 7 +-
include/linux/huge_mm.h | 2 +-
kernel/memremap.c | 18 +++-
mm/huge_memory.c | 10 +--
mm/mremap.c | 30 +++----
net/batman-adv/bat_v_elp.c | 10 ++-
net/batman-adv/bridge_loop_avoidance.c | 10 ++-
net/batman-adv/gateway_client.c | 11 ++-
net/batman-adv/network-coding.c | 27 +++---
net/batman-adv/soft-interface.c | 25 ++++--
net/batman-adv/sysfs.c | 30 ++++---
net/batman-adv/translation-table.c | 6 +-
net/batman-adv/tvlv.c | 8 +-
50 files changed, 445 insertions(+), 395 deletions(-)
Hi,
I didn’t know if you had received my email from last week?
Can you direct me to the person that handles your company promo items?
Do you have any upcoming events, tradeshows or promotional needs?
We manufacture ALL custom LOGO and branded products.
The most asked about product that we make, is the custom printed USB flash
drives!
We can print your logo on them and load your digital images, videos and
files!
If you need marketing, advertising, gifts or incentives, USB flash drives
are the solution!
Here is what we include:
-Any size memory you need: 64MB up to 128GB
-We will print your logo on both sides, just ask!
-Very Low Order Minimums
-Need them quickly? Not a problem, we offer Rush Service
NEW: We can make a custom shaped USB drive to look like your Logo or
product!
Email over a copy of your logo and we will create a design mock up for you
at no cost!
Our higher memory sizes are a really good option right now!
Ask about the “Double Your Memory” upgrade promotion going on right
now!
Pricing is low right now, so let us know what you need and we will get you
a quick quote.
We will beat any competitors pricing, send us your last invoice and we will
beat it!
We always offer great rates for schools and nonprofits as well.
Regards,
Lilly Koller
Logo USB Account Manager
From: Sascha Hauer <s.hauer(a)pengutronix.de>
[ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ]
The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the
1.2GHz OPP. This makes the board work again with configs that have
cpufreq enabled like imx_v6_v7_defconfig on which the board stopped
working with the addition of cpufreq-dt support.
Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support")
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index 53fd75c8ffcf..47894b41e4e2 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -130,6 +130,17 @@
};
};
+&cpu0 {
+ /* CPU rated to 1GHz, not 1.2GHz as per the default settings */
+ operating-points = <
+ /* kHz uV */
+ 166666 850000
+ 400000 900000
+ 800000 1050000
+ 1000000 1200000
+ >;
+};
+
&esdhc1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_esdhc1>;
--
2.17.1
From: Sascha Hauer <s.hauer(a)pengutronix.de>
[ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ]
The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the
1.2GHz OPP. This makes the board work again with configs that have
cpufreq enabled like imx_v6_v7_defconfig on which the board stopped
working with the addition of cpufreq-dt support.
Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support")
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index c05e7cfd0cbc..c8a6a6868c46 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -130,6 +130,17 @@
};
};
+&cpu0 {
+ /* CPU rated to 1GHz, not 1.2GHz as per the default settings */
+ operating-points = <
+ /* kHz uV */
+ 166666 850000
+ 400000 900000
+ 800000 1050000
+ 1000000 1200000
+ >;
+};
+
&esdhc1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_esdhc1>;
--
2.17.1
Tree/Branch: v3.16.60
Git describe: v3.16.60
Commit: d7d78c9371 Linux 3.16.60
Build Time: 33 min 53 sec
Passed: 10 / 10 (100.00 %)
Failed: 0 / 10 ( 0.00 %)
Errors: 0
Warnings: 17
Section Mismatches: 0
-------------------------------------------------------------------------------
defconfigs with issues (other than build errors):
5 warnings 0 mismatches : arm64-allmodconfig
2 warnings 0 mismatches : arm-multi_v5_defconfig
2 warnings 0 mismatches : arm-multi_v7_defconfig
1 warnings 0 mismatches : x86_64-defconfig
6 warnings 0 mismatches : arm-allmodconfig
1 warnings 0 mismatches : arm-allnoconfig
1 warnings 0 mismatches : x86_64-allnoconfig
9 warnings 0 mismatches : x86_64-allmodconfig
2 warnings 0 mismatches : arm64-defconfig
-------------------------------------------------------------------------------
Warnings Summary: 17
7 <stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
2 ../ipc/sem.c:385:6: warning: '___p1' may be used uninitialized in this function [-Wmaybe-uninitialized]
2 ../drivers/staging/vt6656/main_usb.c:1101:7: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
2 ../drivers/staging/vt6656/dpc.c:712:5: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
2 ../drivers/net/ethernet/broadcom/genet/bcmgenet.c:1347:17: warning: unused variable 'kdev' [-Wunused-variable]
2 ../drivers/media/dvb-frontends/drxk_hard.c:2223:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
2 ../drivers/media/dvb-frontends/drxd_hard.c:2631:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
1 ../drivers/staging/rtl8192ee/rtl8192ee/hw.c:529:5: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
1 ../drivers/staging/rtl8192ee/rtl8192ee/hw.c:524:4: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
1 ../drivers/staging/rtl8192ee/btcoexist/halbtc8821a2ant.c:2338:2: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
1 ../drivers/scsi/fnic/fnic_fcs.c:104:6: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
1 ../drivers/platform/x86/eeepc-laptop.c:279:10: warning: 'value' may be used uninitialized in this function [-Wmaybe-uninitialized]
1 ../drivers/net/wireless/rtlwifi/rtl8723be/hw.c:1132:2: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
1 ../drivers/mtd/nand/omap2.c:1318:12: warning: 'omap_calculate_ecc_bch_multi' defined but not used [-Wunused-function]
1 ../drivers/media/platform/davinci/vpfe_capture.c:291:12: warning: 'vpfe_get_ccdc_image_format' defined but not used [-Wunused-function]
1 ../drivers/media/platform/davinci/vpfe_capture.c:1718:1: warning: label 'unlock_out' defined but not used [-Wunused-label]
1 ../arch/x86/kernel/cpu/common.c:1160:13: warning: 'syscall32_cpu_init' defined but not used [-Wunused-function]
===============================================================================
Detailed per-defconfig build reports below:
-------------------------------------------------------------------------------
arm64-allmodconfig : PASS, 0 errors, 5 warnings, 0 section mismatches
Warnings:
../drivers/media/dvb-frontends/drxd_hard.c:2631:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/media/dvb-frontends/drxk_hard.c:2223:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/net/ethernet/broadcom/genet/bcmgenet.c:1347:17: warning: unused variable 'kdev' [-Wunused-variable]
../drivers/staging/vt6656/main_usb.c:1101:7: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/staging/vt6656/dpc.c:712:5: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
-------------------------------------------------------------------------------
arm-multi_v5_defconfig : PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
-------------------------------------------------------------------------------
arm-multi_v7_defconfig : PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
-------------------------------------------------------------------------------
x86_64-defconfig : PASS, 0 errors, 1 warnings, 0 section mismatches
Warnings:
../drivers/platform/x86/eeepc-laptop.c:279:10: warning: 'value' may be used uninitialized in this function [-Wmaybe-uninitialized]
-------------------------------------------------------------------------------
arm-allmodconfig : PASS, 0 errors, 6 warnings, 0 section mismatches
Warnings:
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
../drivers/media/platform/davinci/vpfe_capture.c:1718:1: warning: label 'unlock_out' defined but not used [-Wunused-label]
../drivers/media/platform/davinci/vpfe_capture.c:291:12: warning: 'vpfe_get_ccdc_image_format' defined but not used [-Wunused-function]
../drivers/mtd/nand/omap2.c:1318:12: warning: 'omap_calculate_ecc_bch_multi' defined but not used [-Wunused-function]
../drivers/net/ethernet/broadcom/genet/bcmgenet.c:1347:17: warning: unused variable 'kdev' [-Wunused-variable]
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
-------------------------------------------------------------------------------
arm-allnoconfig : PASS, 0 errors, 1 warnings, 0 section mismatches
Warnings:
<stdin>:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
-------------------------------------------------------------------------------
x86_64-allnoconfig : PASS, 0 errors, 1 warnings, 0 section mismatches
Warnings:
../arch/x86/kernel/cpu/common.c:1160:13: warning: 'syscall32_cpu_init' defined but not used [-Wunused-function]
-------------------------------------------------------------------------------
x86_64-allmodconfig : PASS, 0 errors, 9 warnings, 0 section mismatches
Warnings:
../drivers/media/dvb-frontends/drxd_hard.c:2631:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/media/dvb-frontends/drxk_hard.c:2223:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/scsi/fnic/fnic_fcs.c:104:6: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
../drivers/staging/rtl8192ee/rtl8192ee/hw.c:524:4: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/staging/rtl8192ee/rtl8192ee/hw.c:529:5: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/staging/rtl8192ee/btcoexist/halbtc8821a2ant.c:2338:2: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
../drivers/net/wireless/rtlwifi/rtl8723be/hw.c:1132:2: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
../drivers/staging/vt6656/main_usb.c:1101:7: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
../drivers/staging/vt6656/dpc.c:712:5: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
-------------------------------------------------------------------------------
arm64-defconfig : PASS, 0 errors, 2 warnings, 0 section mismatches
Warnings:
../ipc/sem.c:385:6: warning: '___p1' may be used uninitialized in this function [-Wmaybe-uninitialized]
../ipc/sem.c:385:6: warning: '___p1' may be used uninitialized in this function [-Wmaybe-uninitialized]
-------------------------------------------------------------------------------
Passed with no errors, warnings or mismatches:
arm64-allnoconfig
Allow PCI core to do runtime PM to devices without needing to use dummy
runtime PM callback functions if there is no need to do anything device
specific beyond PCI device power state management.
Implement this by letting core to change device power state during
runtime PM transitions even if no callback functions are defined.
Fixes: a9c8088c7988 ("i2c: i801: Don't restore config registers on runtime PM")
Reported-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jarkko Nikula <jarkko.nikula(a)linux.intel.com>
---
This is related to my i2c-i801.c fix thread back in June which I completely
forgot till now: https://lkml.org/lkml/2018/6/27/642
Discussion back then was that it should be handled in the PCI PM instead
of having dummy functions in the drivers. I wanted to respin with a
patch.
---
drivers/pci/pci-driver.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index bef17c3fca67..6185b878ede1 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1239,7 +1239,7 @@ static int pci_pm_runtime_suspend(struct device *dev)
struct pci_dev *pci_dev = to_pci_dev(dev);
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
pci_power_t prev = pci_dev->current_state;
- int error;
+ int error = 0;
/*
* If pci_dev->driver is not set (unbound), we leave the device in D0,
@@ -1251,11 +1251,9 @@ static int pci_pm_runtime_suspend(struct device *dev)
return 0;
}
- if (!pm || !pm->runtime_suspend)
- return -ENOSYS;
-
pci_dev->state_saved = false;
- error = pm->runtime_suspend(dev);
+ if (pm && pm->runtime_suspend)
+ error = pm->runtime_suspend(dev);
if (error) {
/*
* -EBUSY and -EAGAIN is used to request the runtime PM core
@@ -1292,7 +1290,7 @@ static int pci_pm_runtime_suspend(struct device *dev)
static int pci_pm_runtime_resume(struct device *dev)
{
- int rc;
+ int rc = 0;
struct pci_dev *pci_dev = to_pci_dev(dev);
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
@@ -1306,14 +1304,12 @@ static int pci_pm_runtime_resume(struct device *dev)
if (!pci_dev->driver)
return 0;
- if (!pm || !pm->runtime_resume)
- return -ENOSYS;
-
pci_fixup_device(pci_fixup_resume_early, pci_dev);
pci_enable_wake(pci_dev, PCI_D0, false);
pci_fixup_device(pci_fixup_resume, pci_dev);
- rc = pm->runtime_resume(dev);
+ if (pm && pm->runtime_resume)
+ rc = pm->runtime_resume(dev);
pci_dev->runtime_d3cold = false;
--
2.19.1
Fix the synthetic event test case to remove event correctly.
If redirecting command to synthetic_event file without append
mode, it cleans up all existing events and execute (parse) the
command. This means "delete event" always fails to find the
target event.
Since previous synthetic event has a bug which doesn't return
-ENOENT even if it fails to find the deleting event, this test
passed. But fixing that bug, this test fails because this test
itself has a bug.
This fixes that bug by trying to delete event right after
adding an event, and use append mode redirection ('>>') instead
of normal redirection ('>').
Fixes: f06eec4d0f2c ('selftests: ftrace: Add inter-event hist triggers testcases')
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Cc: Rajvi Jingar <rajvi.jingar(a)intel.com>
Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
.../trigger-synthetic-event-createremove.tc | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
index cef11377dcbd..c604438df13b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -35,18 +35,18 @@ fi
reset_trigger
-echo "Test create synthetic event with an error"
-echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' >> synthetic_events
if [ -d events/synthetic/wakeup_latency ]; then
- fail "Created wakeup_latency synthetic event with an invalid format"
+ fail "Failed to delete wakeup_latency synthetic event"
fi
reset_trigger
-echo "Test remove synthetic event"
-echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
if [ -d events/synthetic/wakeup_latency ]; then
- fail "Failed to delete wakeup_latency synthetic event"
+ fail "Created wakeup_latency synthetic event with an invalid format"
fi
do_reset
Hello,
on both my ppc32 systems I've tested (G4 PowerMac and G4 Mac-mini), I
see a regression after upgrading from 4.14.74 to 4.14.76: Very soon
after starting the kernel from yaboot, they drop me into a OF prompt,
message is "invalid memory access at" on the mini, and another one I
forgot on the PowerMac. FWIW, the ppc64 G5 PowerMac does fine.
Before I start bisecting - might take a few days -, is this already on
radar?
Christoph
On Sun, Oct 21, 2018 at 10:26 AM Monthero <rhmcruiser(a)gmail.com> wrote:
>
> Hi Amir thanks for the pointer.
> I will get in touch with maintainer of 3.16
> Yes it covers Neil's commit and apart from that it would need this replacement for dentry check in 3.16
>
> > - if (S_ISDIR(result->d_inode->i_mode)) {
> > + if (d_is_dir(result)) {
>
Fine, but Why?
How is dereferencing dentry->d_flags any better than dereferencing->d_inode
when dentry has an invalid value?
Thanks,
Amir.
From: Dexuan Cui <decui(a)microsoft.com>
In kvp_send_key(), we do need call process_ib_ipinfo() if
message->kvp_hdr.operation is KVP_OP_GET_IP_INFO, because it turns out
the userland hv_kvp_daemon needs the info of operation, adapter_id and
addr_family. With the incorrect fc62c3b1977d, the host can't get the
VM's IP via KVP.
And, fc62c3b1977d added a "break;", but actually forgot to initialize
the key_size/value in the case of KVP_OP_SET, so the default key_size of
0 is passed to the kvp daemon, and the pool files
/var/lib/hyperv/.kvp_pool_* can't be updated.
This patch effectively rolls back the previous fc62c3b1977d, and
correctly fixes the "this statement may fall through" warnings.
This patch is tested on WS 2012 R2 and 2016.
Fixes: fc62c3b1977d ("Drivers: hv: kvp: Fix two "this statement may fall through" warnings")
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
---
drivers/hv/hv_kvp.c | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index a7513a8a8e37..9fbb15c62c6c 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,6 +353,9 @@ static void process_ib_ipinfo(void *in_msg, void *out_msg, int op)
out->body.kvp_ip_val.dhcp_enabled = in->kvp_ip_val.dhcp_enabled;
+ __attribute__ ((fallthrough));
+
+ case KVP_OP_GET_IP_INFO:
utf16s_to_utf8s((wchar_t *)in->kvp_ip_val.adapter_id,
MAX_ADAPTER_ID_SIZE,
UTF16_LITTLE_ENDIAN,
@@ -405,7 +408,11 @@ kvp_send_key(struct work_struct *dummy)
process_ib_ipinfo(in_msg, message, KVP_OP_SET_IP_INFO);
break;
case KVP_OP_GET_IP_INFO:
- /* We only need to pass on message->kvp_hdr.operation. */
+ /*
+ * We only need to pass on the info of operation, adapter_id
+ * and addr_family to the userland kvp daemon.
+ */
+ process_ib_ipinfo(in_msg, message, KVP_OP_GET_IP_INFO);
break;
case KVP_OP_SET:
switch (in_msg->body.kvp_set.data.value_type) {
@@ -446,9 +453,9 @@ kvp_send_key(struct work_struct *dummy)
}
- break;
-
- case KVP_OP_GET:
+ /*
+ * The key is always a string - utf16 encoding.
+ */
message->body.kvp_set.data.key_size =
utf16s_to_utf8s(
(wchar_t *)in_msg->body.kvp_set.data.key,
@@ -456,6 +463,17 @@ kvp_send_key(struct work_struct *dummy)
UTF16_LITTLE_ENDIAN,
message->body.kvp_set.data.key,
HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
+
+ break;
+
+ case KVP_OP_GET:
+ message->body.kvp_get.data.key_size =
+ utf16s_to_utf8s(
+ (wchar_t *)in_msg->body.kvp_get.data.key,
+ in_msg->body.kvp_get.data.key_size,
+ UTF16_LITTLE_ENDIAN,
+ message->body.kvp_get.data.key,
+ HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
break;
case KVP_OP_DELETE:
--
2.18.0
From: Andi Kleen <ak(a)linux.intel.com>
The Intel microcode revision space is unsigned. Inside Intel there are special
microcodes that have the highest bit set, and they are considered to have
a higher revision than any microcodes that don't have this bit set.
The function comparing the microcode revision in the Linux driver compares
u32 with int, which ends up being signed extended to long on 64bit
systems. This results in these highest bit set microcode revision not loading
because their revision appears negative and smaller than the
existing microcode.
Change the comparison to unsigned. With that the loading works
as expected.
Cc: stable(a)vger.kernel.org # Any supported stable
Signed-off-by: Andi Kleen <ak(a)linux.intel.com>
--
v2: White space changes.
---
arch/x86/kernel/cpu/microcode/intel.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 16936a24795c..e54d402500d3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -93,7 +93,8 @@ static int find_matching_signature(void *mc, unsigned int csig, int cpf)
/*
* Returns 1 if update has been found, 0 otherwise.
*/
-static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev)
+static int has_newer_microcode(void *mc, unsigned int csig, int cpf,
+ unsigned new_rev)
{
struct microcode_header_intel *mc_hdr = mc;
--
2.17.1
We are one image studio who is able to process 300+ photos a day.
If you need any image editing, please let us know. We can do it for you
such as:
Image cut out for photos and clipping path, masking for your photos,
They are mostly used for ecommerce photos, jewelry photos retouching,
beauty and skin images
and wedding photos.
We do also different kind of beauty retouching, portraits retouching.
We can send editing for your photos if you send us one or two photos.
Thanks,
Linda
This is the start of the stable review cycle for the 4.18.16 release.
There are 53 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Oct 20 17:53:52 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.18.16-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.18.16-rc1
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Don't set CROSS_COMPILE in arch's Makefile
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Get rid of toolchain check
Linus Torvalds <torvalds(a)linux-foundation.org>
mremap: properly flush TLB before releasing the page
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "vfs: fix freeze protection in mnt_want_write_file() for overlayfs"
Kairui Song <kasong(a)redhat.com>
x86/boot: Fix kexec booting failure in the SEV bit detection code
Arindam Nath <arindam.nath(a)amd.com>
iommu/amd: Return devid as alias for ACPI HID devices
Srikar Dronamraju <srikar(a)linux.vnet.ibm.com>
powerpc/numa: Use associativity if VPHN hcall is successful
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Avoid possible userspace r1 corruption on reclaim
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Fix userspace r13 corruption
Daniel Kurtz <djkurtz(a)chromium.org>
pinctrl/amd: poll InterruptEnable bits in amd_gpio_irq_set_type
Heiko Stuebner <heiko(a)sntech.de>
iommu/rockchip: Free irqs in shutdown handler
James Cowgill <jcowgill(a)debian.org>
RISC-V: include linux/ftrace.h in asm-prototypes.h
Selvin Xavier <selvin.xavier(a)broadcom.com>
RDMA/bnxt_re: Fix system crash during RDMA resource initialization
Tao Ren <taoren(a)fb.com>
clocksource/drivers/fttmr010: Fix set_next_event handler
Nathan Chancellor <natechancellor(a)gmail.com>
net/mlx4: Use cpumask_available for eq->affinity_mask
John Fastabend <john.fastabend(a)gmail.com>
bpf: test_maps, only support ESTABLISHED socks
John Fastabend <john.fastabend(a)gmail.com>
bpf: sockmap, fix transition through disconnect without close
John Fastabend <john.fastabend(a)gmail.com>
bpf: sockmap only allow ESTABLISHED sock state
Johannes Thumshirn <jthumshirn(a)suse.de>
scsi: sd: don't crash the host on invalid commands
Wen Xiong <wenxiong(a)linux.vnet.ibm.com>
scsi: ipr: System hung while dlpar adding primary ipr adapter back
Alexandru Gheorghe <alexandru-cosmin.gheorghe(a)arm.com>
drm: mali-dp: Call drm_crtc_vblank_reset on device init
James Smart <jsmart2021(a)gmail.com>
scsi: lpfc: Synchronize access to remoteport via rport
Majd Dibbiny <majd(a)mellanox.com>
RDMA/uverbs: Fix validity check for modify QP
Jisheng Zhang <Jisheng.Zhang(a)synaptics.com>
PCI: dwc: Fix scheduling while atomic issues
Sudarsana Reddy Kalluru <sudarsana.kalluru(a)cavium.com>
qed: Do not add VLAN 0 tag to untagged frames in multi-function mode.
Sudarsana Reddy Kalluru <sudarsana.kalluru(a)cavium.com>
qed: Fix populating the invalid stag value in multi function mode.
YueHaibing <yuehaibing(a)huawei.com>
net/smc: fix sizeof to int comparison
Ursula Braun <ubraun(a)linux.ibm.com>
net/smc: fix non-blocking connect problem
Kazuya Mizuguchi <kazuya.mizuguchi.ks(a)renesas.com>
ravb: do not write 1 to reserved bits
Christian Lamparter <chunkeey(a)gmail.com>
net: emac: fix fixed-link setup for the RTL8363SB switch
Sabrina Dubroca <sd(a)queasysnail.net>
selftests: pmtu: properly redirect stderr to /dev/null
Michael Schmitz <schmitzmic(a)gmail.com>
Input: atakbd - fix Atari CapsLock behaviour
Andreas Schwab <schwab(a)linux-m68k.org>
Input: atakbd - fix Atari keymap
Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
intel_th: pci: Add Ice Lake PCH support
Laura Abbott <labbott(a)redhat.com>
scsi: ibmvscsis: Ensure partition name is properly NUL terminated
Laura Abbott <labbott(a)redhat.com>
scsi: ibmvscsis: Fix a stringop-overflow warning
Keerthy <j-keerthy(a)ti.com>
clocksource/drivers/ti-32k: Add CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs
Steve Wise <swise(a)opengridcomputing.com>
cxgb4: fix abort_req_rss6 struct
Marek Lindner <mareklindner(a)neomailbox.ch>
batman-adv: fix hardif_neigh refcount on queue_work() failure
Marek Lindner <mareklindner(a)neomailbox.ch>
batman-adv: fix backbone_gw refcount on queue_work() failure
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated tvlv handler
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated global TT entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated softif_vlan entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated nc_node entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated gateway_node entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Fix segfault when writing to sysfs elp_interval
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Fix segfault when writing to throughput_override
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Avoid probe ELP information leak
Linus Walleij <linus.walleij(a)linaro.org>
spi: gpio: Fix copy-and-paste error
Jozef Balga <jozef.balga(a)gmail.com>
media: af9035: prevent buffer overflow on write
Sanyog Kale <sanyog.r.kale(a)intel.com>
soundwire: Fix acquiring bus lock twice during master release
Shreyas NC <shreyas.nc(a)intel.com>
soundwire: Fix incorrect exit after configuring stream
Shreyas NC <shreyas.nc(a)intel.com>
soundwire: Fix duplicate stream state assignment
-------------
Diffstat:
Makefile | 4 +-
arch/arc/Makefile | 24 +-----
arch/powerpc/kernel/tm.S | 20 ++++-
arch/powerpc/mm/numa.c | 4 +-
arch/riscv/include/asm/asm-prototypes.h | 7 ++
arch/x86/boot/compressed/mem_encrypt.S | 19 -----
drivers/clocksource/timer-fttmr010.c | 18 +++--
drivers/clocksource/timer-ti-32k.c | 3 +
drivers/gpu/drm/arm/malidp_drv.c | 1 +
drivers/hwtracing/intel_th/pci.c | 5 ++
drivers/infiniband/core/uverbs_cmd.c | 68 +++++++++++------
drivers/infiniband/hw/bnxt_re/main.c | 93 ++++++++++-------------
drivers/input/keyboard/atakbd.c | 74 +++++++------------
drivers/iommu/amd_iommu.c | 6 ++
drivers/iommu/rockchip-iommu.c | 6 ++
drivers/media/usb/dvb-usb-v2/af9035.c | 6 +-
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 1 -
drivers/net/ethernet/ibm/emac/core.c | 15 ++--
drivers/net/ethernet/mellanox/mlx4/eq.c | 3 +-
drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 9 ++-
drivers/net/ethernet/qlogic/qed/qed_dcbx.h | 1 +
drivers/net/ethernet/qlogic/qed/qed_dev.c | 15 +++-
drivers/net/ethernet/qlogic/qed/qed_hsi.h | 4 +
drivers/net/ethernet/renesas/ravb.h | 5 ++
drivers/net/ethernet/renesas/ravb_main.c | 11 +--
drivers/net/ethernet/renesas/ravb_ptp.c | 2 +-
drivers/pci/controller/dwc/pcie-designware.c | 8 +-
drivers/pci/controller/dwc/pcie-designware.h | 3 +-
drivers/pinctrl/pinctrl-amd.c | 33 ++++++---
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 5 +-
drivers/scsi/ipr.c | 106 +++++++++++++++------------
drivers/scsi/ipr.h | 1 +
drivers/scsi/lpfc/lpfc_attr.c | 15 ++--
drivers/scsi/lpfc/lpfc_debugfs.c | 10 +--
drivers/scsi/lpfc/lpfc_nvme.c | 11 ++-
drivers/scsi/sd.c | 3 +-
drivers/soundwire/stream.c | 23 ++++--
drivers/spi/spi-gpio.c | 4 +-
fs/namespace.c | 7 +-
include/linux/huge_mm.h | 2 +-
kernel/bpf/sockmap.c | 91 ++++++++++++++++++-----
mm/huge_memory.c | 10 +--
mm/mremap.c | 30 ++++----
net/batman-adv/bat_v_elp.c | 10 ++-
net/batman-adv/bridge_loop_avoidance.c | 10 ++-
net/batman-adv/gateway_client.c | 11 ++-
net/batman-adv/network-coding.c | 27 ++++---
net/batman-adv/soft-interface.c | 25 +++++--
net/batman-adv/sysfs.c | 30 +++++---
net/batman-adv/translation-table.c | 6 +-
net/batman-adv/tvlv.c | 8 +-
net/smc/af_smc.c | 7 +-
net/smc/smc_clc.c | 14 ++--
tools/testing/selftests/bpf/test_maps.c | 10 ++-
tools/testing/selftests/net/pmtu.sh | 4 +-
55 files changed, 563 insertions(+), 385 deletions(-)
From: Masami Hiramatsu <mhiramat(a)kernel.org>
Fix synthetic event to allow independent semicolon at end.
The synthetic_events interface accepts a semicolon after the
last word if there is no space.
# echo "myevent u64 var;" >> synthetic_events
But if there is a space, it returns an error.
# echo "myevent u64 var ;" > synthetic_events
sh: write error: Invalid argument
This behavior is difficult for users to understand. Let's
allow the last independent semicolon too.
Link: http://lkml.kernel.org/r/153986835420.18251.2191216690677025744.stgit@devbox
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Fixes: commit 4b147936fa50 ("tracing: Add support for 'synthetic' events")
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_hist.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6ff83941065a..d239004aaf29 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1088,7 +1088,7 @@ static int create_synth_event(int argc, char **argv)
i += consumed - 1;
}
- if (i < argc) {
+ if (i < argc && strcmp(argv[i], ";") != 0) {
ret = -EINVAL;
goto err;
}
--
2.19.0
This is the start of the stable review cycle for the 4.9.135 release.
There are 35 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Oct 20 17:54:00 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.135-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.9.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.9.135-rc1
Long Li <longli(a)microsoft.com>
HV: properly delay KVP packets when negotiation is in progress
Theodore Ts'o <tytso(a)mit.edu>
ext4: avoid running out of journal credits when appending to an inline file
Frederic Weisbecker <fweisbec(a)gmail.com>
sched/cputime: Fix ksoftirqd cputime accounting regression
Frederic Weisbecker <fweisbec(a)gmail.com>
sched/cputime: Increment kcpustat directly on irqtime account
Frederic Weisbecker <fweisbec(a)gmail.com>
macintosh/rack-meter: Convert cputime64_t use to u64
Frederic Weisbecker <fweisbec(a)gmail.com>
sched/cputime: Convert kcpustat to nsecs
Stephen Warren <swarren(a)nvidia.com>
usb: gadget: serial: fix oops when data rx'd after close
Natanael Copa <ncopa(a)alpinelinux.org>
HID: quirks: fix support for Apple Magic Keyboards
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Don't set CROSS_COMPILE in arch's Makefile
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: build: Get rid of toolchain check
Xin Long <lucien.xin(a)gmail.com>
netfilter: check for seqadj ext existence before adding it in nf_nat_setup_info
Jan Kara <jack(a)suse.cz>
mm: Preserve _PAGE_DEVMAP across mprotect() calls
Linus Torvalds <torvalds(a)linux-foundation.org>
mremap: properly flush TLB before releasing the page
Arindam Nath <arindam.nath(a)amd.com>
iommu/amd: Return devid as alias for ACPI HID devices
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Avoid possible userspace r1 corruption on reclaim
Michael Neuling <mikey(a)neuling.org>
powerpc/tm: Fix userspace r13 corruption
James Cowgill <jcowgill(a)debian.org>
RISC-V: include linux/ftrace.h in asm-prototypes.h
Nathan Chancellor <natechancellor(a)gmail.com>
net/mlx4: Use cpumask_available for eq->affinity_mask
Johannes Thumshirn <jthumshirn(a)suse.de>
scsi: sd: don't crash the host on invalid commands
Alexandru Gheorghe <alexandru-cosmin.gheorghe(a)arm.com>
drm: mali-dp: Call drm_crtc_vblank_reset on device init
Kazuya Mizuguchi <kazuya.mizuguchi.ks(a)renesas.com>
ravb: do not write 1 to reserved bits
Michael Schmitz <schmitzmic(a)gmail.com>
Input: atakbd - fix Atari CapsLock behaviour
Andreas Schwab <schwab(a)linux-m68k.org>
Input: atakbd - fix Atari keymap
Laura Abbott <labbott(a)redhat.com>
scsi: ibmvscsis: Ensure partition name is properly NUL terminated
Laura Abbott <labbott(a)redhat.com>
scsi: ibmvscsis: Fix a stringop-overflow warning
Keerthy <j-keerthy(a)ti.com>
clocksource/drivers/ti-32k: Add CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs
Marek Lindner <mareklindner(a)neomailbox.ch>
batman-adv: fix hardif_neigh refcount on queue_work() failure
Marek Lindner <mareklindner(a)neomailbox.ch>
batman-adv: fix backbone_gw refcount on queue_work() failure
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated tvlv handler
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated global TT entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated softif_vlan entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Prevent duplicated nc_node entry
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Fix segfault when writing to sysfs elp_interval
Sven Eckelmann <sven(a)narfation.org>
batman-adv: Fix segfault when writing to throughput_override
Jozef Balga <jozef.balga(a)gmail.com>
media: af9035: prevent buffer overflow on write
-------------
Diffstat:
Makefile | 4 +-
arch/arc/Makefile | 24 +---------
arch/powerpc/kernel/tm.S | 20 +++++++--
arch/riscv/include/asm/asm-prototypes.h | 7 +++
arch/s390/appldata/appldata_os.c | 16 +++----
arch/x86/include/asm/pgtable_types.h | 2 +-
drivers/clocksource/timer-ti-32k.c | 3 ++
drivers/cpufreq/cpufreq.c | 6 +--
drivers/cpufreq/cpufreq_governor.c | 2 +-
drivers/cpufreq/cpufreq_stats.c | 1 -
drivers/gpu/drm/arm/malidp_drv.c | 1 +
drivers/hid/hid-core.c | 3 ++
drivers/hv/hv_kvp.c | 13 +++---
drivers/input/keyboard/atakbd.c | 74 ++++++++++++-------------------
drivers/iommu/amd_iommu.c | 6 +++
drivers/macintosh/rack-meter.c | 28 ++++++------
drivers/media/usb/dvb-usb-v2/af9035.c | 6 ++-
drivers/net/ethernet/mellanox/mlx4/eq.c | 3 +-
drivers/net/ethernet/renesas/ravb.h | 5 +++
drivers/net/ethernet/renesas/ravb_main.c | 11 ++---
drivers/net/ethernet/renesas/ravb_ptp.c | 2 +-
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 5 +--
drivers/scsi/sd.c | 3 +-
drivers/usb/gadget/function/u_serial.c | 2 +-
fs/ext4/ext4.h | 3 --
fs/ext4/inline.c | 38 +---------------
fs/ext4/xattr.c | 18 +-------
fs/proc/stat.c | 68 ++++++++++++++---------------
fs/proc/uptime.c | 7 +--
include/linux/huge_mm.h | 2 +-
kernel/sched/cpuacct.c | 2 +-
kernel/sched/cputime.c | 75 ++++++++++++++------------------
kernel/sched/sched.h | 12 +++--
mm/huge_memory.c | 10 ++---
mm/mremap.c | 30 ++++++-------
net/batman-adv/bat_v_elp.c | 8 +++-
net/batman-adv/bridge_loop_avoidance.c | 10 ++++-
net/batman-adv/network-coding.c | 27 +++++++-----
net/batman-adv/soft-interface.c | 25 ++++++++---
net/batman-adv/sysfs.c | 30 ++++++++-----
net/batman-adv/translation-table.c | 6 ++-
net/batman-adv/tvlv.c | 8 +++-
net/netfilter/nf_nat_core.c | 2 +-
43 files changed, 303 insertions(+), 325 deletions(-)
From: Ralph Campbell <rcampbell(a)nvidia.com>
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled
in check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back to any
private ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead to the CPU
constantly page fault looping on the special swap entry.
Changes since v2:
- add comments explaining what is going on
Changes since v1:
- properly lock pte directory in map_pte()
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
mm/page_vma_mapped.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index ae3c2a35d61b..11df03e71288 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
- if (!pte_present(*pvmw->pte))
+ /*
+ * We get here when we are trying to unmap a private
+ * device page from the process address space. Such
+ * page is not CPU accessible and thus is mapped as
+ * a special swap entry, nonetheless it still does
+ * count as a valid regular mapping for the page (and
+ * is accounted as such in page maps count).
+ *
+ * So handle this special case as if it was a normal
+ * page mapping ie lock CPU page table and returns
+ * true.
+ *
+ * For more details on device private memory see HMM
+ * (include/linux/hmm.h or mm/hmm.c).
+ */
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
+ return false;
+ } else if (!pte_present(*pvmw->pte))
return false;
}
}
--
2.17.2
Detaching of mark connector from fsnotify_put_mark() can race with
unmounting of the filesystem like:
CPU1 CPU2
fsnotify_put_mark()
spin_lock(&conn->lock);
...
inode = fsnotify_detach_connector_from_object(conn)
spin_unlock(&conn->lock);
generic_shutdown_super()
fsnotify_unmount_inodes()
sees connector detached for inode
-> nothing to do
evict_inode()
barfs on pending inode reference
iput(inode);
Resulting in "Busy inodes after unmount" message and possible kernel
oops. Make fsnotify_unmount_inodes() properly wait for outstanding inode
references from detached connectors.
Note that the accounting of outstanding inode references in the
superblock can cause some cacheline contention on the counter. OTOH it
happens only during deletion of the last notification mark from an inode
(or during unlinking of watched inode) and that is not too bad. I have
measured time to create & delete inotify watch 100000 times from 64
processes in parallel (each process having its own inotify group and its
own file on a shared superblock) on a 64 CPU machine. Average and
standard deviation of 15 runs look like:
Avg Stddev
Vanilla 9.817400 0.276165
Fixed 9.710467 0.228294
So there's no statistically significant difference.
Fixes: 6b3f05d24d35 ("fsnotify: Detach mark from object list when last reference is dropped")
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/notify/fsnotify.c | 3 +++
fs/notify/mark.c | 39 +++++++++++++++++++++++++++++++--------
include/linux/fs.h | 3 +++
3 files changed, 37 insertions(+), 8 deletions(-)
Changes since v1:
* added Fixes tag
* improved fsnotify_drop_object to take object type
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f174397b63a0..00d4f4357724 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -96,6 +96,9 @@ void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode)
iput(iput_inode);
+ /* Wait for outstanding inode references from connectors */
+ wait_var_event(&sb->s_fsnotify_inode_refs,
+ !atomic_long_read(&sb->s_fsnotify_inode_refs));
}
/*
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 59cdb27826de..f4e330b5b379 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -179,17 +179,20 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
-static struct inode *fsnotify_detach_connector_from_object(
- struct fsnotify_mark_connector *conn)
+static void *fsnotify_detach_connector_from_object(
+ struct fsnotify_mark_connector *conn,
+ unsigned int *type)
{
struct inode *inode = NULL;
+ *type = conn->type;
if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
return NULL;
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
+ atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
}
@@ -211,10 +214,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
fsnotify_put_group(group);
}
+/* Drop object reference originally held by a connector */
+static void fsnotify_drop_object(unsigned int type, void *objp)
+{
+ struct inode *inode;
+ struct super_block *sb;
+
+ if (!objp)
+ return;
+ /* Currently only inode references are passed to be dropped */
+ if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
+ return;
+ inode = objp;
+ sb = inode->i_sb;
+ iput(inode);
+ if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+ wake_up_var(&sb->s_fsnotify_inode_refs);
+}
+
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
struct fsnotify_mark_connector *conn;
- struct inode *inode = NULL;
+ void *objp = NULL;
+ unsigned int type;
bool free_conn = false;
/* Catch marks that were actually never attached to object */
@@ -234,7 +256,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
conn = mark->connector;
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
__fsnotify_recalc_mask(conn);
@@ -242,7 +264,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
mark->connector = NULL;
spin_unlock(&conn->lock);
- iput(inode);
+ fsnotify_drop_object(type, objp);
if (free_conn) {
spin_lock(&destroy_lock);
@@ -709,7 +731,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
{
struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark, *old_mark = NULL;
- struct inode *inode;
+ void *objp;
+ unsigned int type;
conn = fsnotify_grab_connector(connp);
if (!conn)
@@ -735,11 +758,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
* mark references get dropped. It would lead to strange results such
* as delaying inode deletion or blocking unmount.
*/
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
spin_unlock(&conn->lock);
if (old_mark)
fsnotify_put_mark(old_mark);
- iput(inode);
+ fsnotify_drop_object(type, objp);
}
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33322702c910..5090f3dcec3b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1428,6 +1428,9 @@ struct super_block {
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
+ /* Pending fsnotify inode refs */
+ atomic_long_t s_fsnotify_inode_refs;
+
/* Being remounted read-only */
int s_readonly_remount;
--
2.16.4
Detaching of mark connector from fsnotify_put_mark() can race with
unmounting of the filesystem like:
CPU1 CPU2
fsnotify_put_mark()
spin_lock(&conn->lock);
...
inode = fsnotify_detach_connector_from_object(conn)
spin_unlock(&conn->lock);
generic_shutdown_super()
fsnotify_unmount_inodes()
sees connector detached for inode
-> nothing to do
evict_inode()
barfs on pending inode reference
iput(inode);
Resulting in "Busy inodes after unmount" message and possible kernel
oops. Make fsnotify_unmount_inodes() properly wait for outstanding inode
references from detached connectors.
Note that the accounting of outstanding inode references in the
superblock can cause some cacheline contention on the counter. OTOH it
happens only during deletion of the last notification mark from an inode
(or during unlinking of watched inode) and that is not too bad. I have
measured time to create & delete inotify watch 100000 times from 64
processes in parallel (each process having its own inotify group and its
own file on a shared superblock) on a 64 CPU machine. Average and
standard deviation of 15 runs look like:
Avg Stddev
Vanilla 9.817400 0.276165
Fixed 9.710467 0.228294
So there's no statistically significant difference.
Fixes: 6b3f05d24d35 ("fsnotify: Detach mark from object list when last reference is dropped")
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/notify/fsnotify.c | 3 +++
fs/notify/mark.c | 39 +++++++++++++++++++++++++++++++--------
include/linux/fs.h | 3 +++
3 files changed, 37 insertions(+), 8 deletions(-)
Changes since v2:
* fixed uninitialized warning
Changes since v1:
* added Fixes tag
* fsnotify_drop_object() now takes type
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f174397b63a0..00d4f4357724 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -96,6 +96,9 @@ void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode)
iput(iput_inode);
+ /* Wait for outstanding inode references from connectors */
+ wait_var_event(&sb->s_fsnotify_inode_refs,
+ !atomic_long_read(&sb->s_fsnotify_inode_refs));
}
/*
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 59cdb27826de..09535f6423fc 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -179,17 +179,20 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
-static struct inode *fsnotify_detach_connector_from_object(
- struct fsnotify_mark_connector *conn)
+static void *fsnotify_detach_connector_from_object(
+ struct fsnotify_mark_connector *conn,
+ unsigned int *type)
{
struct inode *inode = NULL;
+ *type = conn->type;
if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
return NULL;
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
+ atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
}
@@ -211,10 +214,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
fsnotify_put_group(group);
}
+/* Drop object reference originally held by a connector */
+static void fsnotify_drop_object(unsigned int type, void *objp)
+{
+ struct inode *inode;
+ struct super_block *sb;
+
+ if (!objp)
+ return;
+ /* Currently only inode references are passed to be dropped */
+ if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
+ return;
+ inode = objp;
+ sb = inode->i_sb;
+ iput(inode);
+ if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+ wake_up_var(&sb->s_fsnotify_inode_refs);
+}
+
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
struct fsnotify_mark_connector *conn;
- struct inode *inode = NULL;
+ void *objp = NULL;
+ unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
bool free_conn = false;
/* Catch marks that were actually never attached to object */
@@ -234,7 +256,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
conn = mark->connector;
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
__fsnotify_recalc_mask(conn);
@@ -242,7 +264,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
mark->connector = NULL;
spin_unlock(&conn->lock);
- iput(inode);
+ fsnotify_drop_object(type, objp);
if (free_conn) {
spin_lock(&destroy_lock);
@@ -709,7 +731,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
{
struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark, *old_mark = NULL;
- struct inode *inode;
+ void *objp;
+ unsigned int type;
conn = fsnotify_grab_connector(connp);
if (!conn)
@@ -735,11 +758,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
* mark references get dropped. It would lead to strange results such
* as delaying inode deletion or blocking unmount.
*/
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
spin_unlock(&conn->lock);
if (old_mark)
fsnotify_put_mark(old_mark);
- iput(inode);
+ fsnotify_drop_object(type, objp);
}
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33322702c910..5090f3dcec3b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1428,6 +1428,9 @@ struct super_block {
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
+ /* Pending fsnotify inode refs */
+ atomic_long_t s_fsnotify_inode_refs;
+
/* Being remounted read-only */
int s_readonly_remount;
--
2.16.4
Since acpi_os_get_timer() may be called after the timer subsystem has
been suspended, use the jiffies counter instead of ktime_get(). This
patch avoids that the following warning is reported during hibernation:
WARNING: CPU: 0 PID: 612 at kernel/time/timekeeping.c:751 ktime_get+0x116/0x120
RIP: 0010:ktime_get+0x116/0x120
Call Trace:
acpi_os_get_timer+0xe/0x30
acpi_ds_exec_begin_control_op+0x175/0x1de
acpi_ds_exec_begin_op+0x2c7/0x39a
acpi_ps_create_op+0x573/0x5e4
acpi_ps_parse_loop+0x349/0x1220
acpi_ps_parse_aml+0x25b/0x6da
acpi_ps_execute_method+0x327/0x41b
acpi_ns_evaluate+0x4e9/0x6f5
acpi_ut_evaluate_object+0xd9/0x2f2
acpi_rs_get_method_data+0x8f/0x114
acpi_walk_resources+0x122/0x1b6
acpi_pci_link_get_current.isra.2+0x157/0x280
acpi_pci_link_set+0x32f/0x4a0
irqrouter_resume+0x58/0x80
syscore_resume+0x84/0x380
hibernation_snapshot+0x20c/0x4f0
hibernate+0x22d/0x3a6
state_store+0x99/0xa0
kobj_attr_store+0x37/0x50
sysfs_kf_write+0x87/0xa0
kernfs_fop_write+0x1a5/0x240
__vfs_write+0xd2/0x410
vfs_write+0x101/0x250
ksys_write+0xab/0x120
__x64_sys_write+0x43/0x50
do_syscall_64+0x71/0x220
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Fixes: 164a08cee135 ("ACPICA: Dispatcher: Introduce timeout mechanism for infinite loop detection")
Reported-by: Fengguang Wu <fengguang.wu(a)intel.com>
References: https://lists.01.org/pipermail/lkp/2018-April/008406.html
Cc: Rafael J. Wysocki <rjw(a)rjwysocki.net>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Len Brown <lenb(a)kernel.org>
Cc: Yu Chen <yu.c.chen(a)intel.com>
Cc: Fengguang Wu <fengguang.wu(a)intel.com>
Cc: linux-acpi(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
drivers/acpi/osl.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 8df9abfa947b..ed73f6fb0779 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -617,15 +617,18 @@ void acpi_os_stall(u32 us)
}
/*
- * Support ACPI 3.0 AML Timer operand
- * Returns 64-bit free-running, monotonically increasing timer
- * with 100ns granularity
+ * Support ACPI 3.0 AML Timer operand. Returns a 64-bit free-running,
+ * monotonically increasing timer with 100ns granularity. Do not use
+ * ktime_get() to implement this function because this function may get
+ * called after timekeeping has been suspended. Note: calling this function
+ * after timekeeping has been suspended may lead to unexpected results
+ * because when timekeeping is suspended the jiffies counter is not
+ * incremented. See also timekeeping_suspend().
*/
u64 acpi_os_get_timer(void)
{
- u64 time_ns = ktime_to_ns(ktime_get());
- do_div(time_ns, 100);
- return time_ns;
+ return (get_jiffies_64() - INITIAL_JIFFIES) *
+ (ACPI_100NSEC_PER_SEC / HZ);
}
acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width)
--
2.19.1.568.g152ad8e336-goog
The calculated ideal rate can easily overflow an unsigned long, thus
making the best div selection buggy as soon as no ideal match is found
before the overflow occurs.
Fixes: 4731a72df273 ("drm/sun4i: request exact rates to our parents")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon(a)bootlin.com>
Acked-by: Maxime Ripard <maxime.ripard(a)bootlin.com>
---
Changes in v2:
- Add a comment to explain why we bail out after an overflow
- Add Maxime ack
- Use a goto instead of a break
---
drivers/gpu/drm/sun4i/sun4i_dotclock.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index e36004fbe453..2a15f2f9271e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -81,9 +81,19 @@ static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
int i;
for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) {
- unsigned long ideal = rate * i;
+ u64 ideal = (u64)rate * i;
unsigned long rounded;
+ /*
+ * ideal has overflowed the max value that can be stored in an
+ * unsigned long, and every clk operation we might do on a
+ * truncated u64 value will give us incorrect results.
+ * Let's just stop there since bigger dividers will result in
+ * the same overflow issue.
+ */
+ if (ideal > ULONG_MAX)
+ goto out;
+
rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
ideal);
--
2.14.1
This is a note to let you know that I've just added the patch titled
usbip:vudc: BUG kmalloc-2048 (Not tainted): Poison overwritten
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
>From e28fd56ad5273be67d0fae5bedc7e1680e729952 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah(a)kernel.org>
Date: Thu, 18 Oct 2018 10:19:29 -0600
Subject: usbip:vudc: BUG kmalloc-2048 (Not tainted): Poison overwritten
In rmmod path, usbip_vudc does platform_device_put() twice once from
platform_device_unregister() and then from put_vudc_device().
The second put results in:
BUG kmalloc-2048 (Not tainted): Poison overwritten error or
BUG: KASAN: use-after-free in kobject_put+0x1e/0x230 if KASAN is
enabled.
[ 169.042156] calling init+0x0/0x1000 [usbip_vudc] @ 1697
[ 169.042396] =============================================================================
[ 169.043678] probe of usbip-vudc.0 returned 1 after 350 usecs
[ 169.044508] BUG kmalloc-2048 (Not tainted): Poison overwritten
[ 169.044509] -----------------------------------------------------------------------------
...
[ 169.057849] INFO: Freed in device_release+0x2b/0x80 age=4223 cpu=3 pid=1693
[ 169.057852] kobject_put+0x86/0x1b0
[ 169.057853] 0xffffffffc0c30a96
[ 169.057855] __x64_sys_delete_module+0x157/0x240
Fix it to call platform_device_del() instead and let put_vudc_device() do
the platform_device_put().
Reported-by: Randy Dunlap <rdunlap(a)infradead.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/usbip/vudc_main.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/usbip/vudc_main.c b/drivers/usb/usbip/vudc_main.c
index 3fc22037a82f..390733e6937e 100644
--- a/drivers/usb/usbip/vudc_main.c
+++ b/drivers/usb/usbip/vudc_main.c
@@ -73,6 +73,10 @@ static int __init init(void)
cleanup:
list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) {
list_del(&udc_dev->dev_entry);
+ /*
+ * Just do platform_device_del() here, put_vudc_device()
+ * calls the platform_device_put()
+ */
platform_device_del(udc_dev->pdev);
put_vudc_device(udc_dev);
}
@@ -89,7 +93,11 @@ static void __exit cleanup(void)
list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) {
list_del(&udc_dev->dev_entry);
- platform_device_unregister(udc_dev->pdev);
+ /*
+ * Just do platform_device_del() here, put_vudc_device()
+ * calls the platform_device_put()
+ */
+ platform_device_del(udc_dev->pdev);
put_vudc_device(udc_dev);
}
platform_driver_unregister(&vudc_driver);
--
2.19.1
The patch titled
Subject: hugetlbfs: dirty pages as they are added to pagecache
has been added to the -mm tree. Its filename is
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/hugetlbfs-dirty-pages-as-they-are-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/hugetlbfs-dirty-pages-as-they-are-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: dirty pages as they are added to pagecache
Some test systems were experiencing negative huge page reserve counts and
incorrect file block counts. This was traced to /proc/sys/vm/drop_caches
removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs
explicit code removes the pages, the appropriate accounting is not
performed.
This can be recreated as follows:
fallocate -l 2M /dev/hugepages/foo
echo 1 > /proc/sys/vm/drop_caches
fallocate -l 2M /dev/hugepages/foo
grep -i huge /proc/meminfo
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: 2047
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 4194304 kB
ls -lsh /dev/hugepages/foo
4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo
To address this issue, dirty pages as they are added to pagecache. This
can easily be reproduced with fallocate as shown above. Read faulted
pages will eventually end up being marked dirty. But there is a window
where they are clean and could be impacted by code such as drop_caches.
So, just dirty them all as they are added to the pagecache.
In addition, it makes little sense to even try to drop hugetlbfs pagecache
pages, so disable calls to these filesystems in drop_caches code.
Link: http://lkml.kernel.org/r/20181018041022.4529-1-mike.kravetz@oracle.com
Fixes: 70c3547e36f5 ("hugetlbfs: add hugetlbfs_fallocate()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/fs/drop_caches.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache
+++ a/fs/drop_caches.c
@@ -9,6 +9,7 @@
#include <linux/writeback.h>
#include <linux/sysctl.h>
#include <linux/gfp.h>
+#include <linux/magic.h>
#include "internal.h"
/* A global variable is a bit ugly, but it keeps the code simple */
@@ -18,6 +19,12 @@ static void drop_pagecache_sb(struct sup
{
struct inode *inode, *toput_inode = NULL;
+ /*
+ * It makes no sense to try and drop hugetlbfs page cache pages.
+ */
+ if (sb->s_magic == HUGETLBFS_MAGIC)
+ return;
+
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
--- a/mm/hugetlb.c~hugetlbfs-dirty-pages-as-they-are-added-to-pagecache
+++ a/mm/hugetlb.c
@@ -3690,6 +3690,12 @@ int huge_add_to_page_cache(struct page *
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlbfs-dirty-pages-as-they-are-added-to-pagecache.patch
On Thu, 18 Oct 2018 11:23:12 PDT (-0700), merker(a)debian.org wrote:
> On Thu, Oct 18, 2018 at 11:13:02AM +0200, gregkh(a)linuxfoundation.org wrote:
>>
>> This is a note to let you know that I've just added the patch titled
>>
>> RISC-V: include linux/ftrace.h in asm-prototypes.h
>>
>> to the 4.4-stable tree which can be found at:
>> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>>
>> The filename of the patch is:
>> risc-v-include-linux-ftrace.h-in-asm-prototypes.h.patch
>> and it can be found in the queue-4.4 subdirectory.
>>
>> If you, or anyone else, feels it should not be added to the stable tree,
>> please let <stable(a)vger.kernel.org> know about it.
> [...]
>> From: James Cowgill <jcowgill(a)debian.org>
>> Date: Thu, 6 Sep 2018 22:57:56 +0100
>> Subject: RISC-V: include linux/ftrace.h in asm-prototypes.h
>>
>> From: James Cowgill <jcowgill(a)debian.org>
>>
>> [ Upstream commit 57a489786de9ec37d6e25ef1305dc337047f0236 ]
>
> I guess it doesn't make much sense to add this patch to the 4.4
> and 3.18 stable trees. The patch creates an arch-specific header
> (arch/riscv/include/asm/asm-prototypes.h), but the first mainline
> kernel with support for the RISC-V architecture has been kernel
> 4.15.
I agree.
Recently Wang Jian reported some KVP issues on the v4.4 kernel:
https://github.com/LIS/lis-next/issues/593:
e.g. the /var/lib/hyperv/.kvp_pool_* files can not be updated, and
sometimes if the hv_kvp_daemon doesn't timely start, the host may not
be able to query the VM's IP address via KVP.
I identified these 4 mainline patches to fix the issues. The patches
can be applied cleanly to the latest 4.4.y branch (currently it's
v4.4.161).
The first 3 are simply cherry-picked from the mainline, and the 4th
has to be reworked for the v4.4 kernel.
Wang Jian tested the 4 patches, and the issues can be fixed.
I also did some tests and found no regression.
Thanks!
-- Dexuan
K. Y. Srinivasan (2):
Drivers: hv: utils: Invoke the poll function after handshake
Drivers: hv: util: Pass the channel information during the init call
Long Li (1): -- Reworked by Dexuan
HV: properly delay KVP packets when negotiation is in progress
Vitaly Kuznetsov (1):
Drivers: hv: kvp: fix IP Failover
drivers/hv/hv_fcopy.c | 2 +-
drivers/hv/hv_kvp.c | 40 +++++++++++++++++++++++++++++++++++++---
drivers/hv/hv_snapshot.c | 4 ++--
drivers/hv/hv_util.c | 1 +
drivers/hv/hyperv_vmbus.h | 5 +++++
include/linux/hyperv.h | 1 +
6 files changed, 47 insertions(+), 6 deletions(-)
--
2.7.4
The host may send multiple negotiation packets
(due to timeout) before the KVP user-mode daemon
is connected. KVP user-mode daemon is connected.
We need to defer processing those packets
until the daemon is negotiated and connected.
It's okay for guest to respond
to all negotiation packets.
In addition, the host may send multiple staged
KVP requests as soon as negotiation is done.
We need to properly process those packets using one
tasklet for exclusive access to ring buffer.
This patch is based on the work of
Nick Meier <Nick.Meier(a)microsoft.com>.
Signed-off-by: Long Li <longli(a)microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
The above is the original changelog of
a3ade8cc474d ("HV: properly delay KVP packets when negotiation is in progress"
Here I re-worked the original patch because the mainline version
can't work for the linux-4.4.y branch, on which channel->callback_event
doesn't exist yet. In the mainline, channel->callback_event was added by:
631e63a9f346 ("vmbus: change to per channel tasklet"). Here we don't want
to backport it to v4.4, as it requires extra supporting changes and fixes,
which are unnecessary as to the KVP bug we're trying to resolve.
NOTE: before this patch is used, we should cherry-pick the other related
3 patches from the mainline first:
The background of this backport request is that: recently Wang Jian reported
some KVP issues: https://github.com/LIS/lis-next/issues/593:
e.g. the /var/lib/hyperv/.kvp_pool_* files can not be updated, and sometimes
if the hv_kvp_daemon doesn't timely start, the host may not be able to query
the VM's IP address via KVP.
Reported-by: Wang Jian <jianjian.wang1(a)gmail.com>
Tested-by: Wang Jian <jianjian.wang1(a)gmail.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
---
This is re-worked by me from the mainline:
a3ade8cc474d ("HV: properly delay KVP packets when negotiation is in progress"
I added my Signed-off-by as I identified and tested the patches.
If this is unnecessary, please feel free to remove it.
drivers/hv/hv_kvp.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index ff0a426..1771a96 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -612,21 +612,22 @@ void hv_kvp_onchannelcallback(void *context)
NEGO_IN_PROGRESS,
NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
- if (host_negotiatied == NEGO_NOT_STARTED &&
- kvp_transaction.state < HVUTIL_READY) {
+ if (kvp_transaction.state < HVUTIL_READY) {
/*
* If userspace daemon is not connected and host is asking
* us to negotiate we need to delay to not lose messages.
* This is important for Failover IP setting.
*/
- host_negotiatied = NEGO_IN_PROGRESS;
- schedule_delayed_work(&kvp_host_handshake_work,
+ if (host_negotiatied == NEGO_NOT_STARTED) {
+ host_negotiatied = NEGO_IN_PROGRESS;
+ schedule_delayed_work(&kvp_host_handshake_work,
HV_UTIL_NEGO_TIMEOUT * HZ);
+ }
return;
}
if (kvp_transaction.state > HVUTIL_READY)
return;
-
+recheck:
vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen,
&requestid);
@@ -703,6 +704,8 @@ void hv_kvp_onchannelcallback(void *context)
VM_PKT_DATA_INBAND, 0);
host_negotiatied = NEGO_FINISHED;
+
+ goto recheck;
}
}
--
2.7.4
Hi Greg,
This series fixes issues we've seen with softirq time accounting in 4.9:
- when ksoftirqd is running at 100% on a CPU, none of the values
reported by /proc/stat for that CPU will change, sometimes for
dozens of seconds,
- large deviations in the total number of ticks accumulated over a
fixed time for a CPU, probably because of the first issue hitting
for shorter periods.
We found out that something pretty similar had been reported 9 months
ago, see the reference link below. In that discussion, Rabin Vincent had
made a 4.9 specific patch which fixes our first issue, but we were still
seeing some deviation from the total number of ticks (up to 1.7% from
expected, where we had only 0.2% on older kernels), and you had also
asked for a direct backport from the mainline series, if possible.
As mentioned in that thread, a lot of changes (probably 50+) went into
4.11 to remove cputime, but we could get something working with only the
4 attached patches to fix these two issues. Three of these patches apply
without change, and the second one in the series ("sched/cputime:
Convert kcpustat to nsecs") needed a minor change as a cast had been
added in 527b0a76f41d ("sched/cpuacct: Avoid %lld seq_printf warning")
to fix a build warning on s390. I guess we could also include that patch
in this series, let me know if this is the preferred way to handle this.
We ran our tests on 3.18, 4.4 and 4.9 and confirmed that only 4.9 would
need this series, and that this series indeed restores the behavior we
were seeing on those older kernels.
Thanks!
Reference: http://lkml.kernel.org/r/%3C1513159876-5125-1-git-send-email-rabin.vincent@…
v2: - drop "time: Introduce jiffies64_to_nsecs()" as it has already been
merged into v4.9.132,
- include backport of commit 564b733c899f ("macintosh/rack-meter:
Convert cputime64_t use to u64") to avoid introducing a build
failure on powerpc.
Frederic Weisbecker (4):
sched/cputime: Convert kcpustat to nsecs
macintosh/rack-meter: Convert cputime64_t use to u64
sched/cputime: Increment kcpustat directly on irqtime account
sched/cputime: Fix ksoftirqd cputime accounting regression
arch/s390/appldata/appldata_os.c | 16 +++----
drivers/cpufreq/cpufreq.c | 6 +--
drivers/cpufreq/cpufreq_governor.c | 2 +-
drivers/cpufreq/cpufreq_stats.c | 1 -
drivers/macintosh/rack-meter.c | 28 +++++------
fs/proc/stat.c | 68 +++++++++++++--------------
fs/proc/uptime.c | 7 +--
kernel/sched/cpuacct.c | 2 +-
kernel/sched/cputime.c | 75 +++++++++++++-----------------
kernel/sched/sched.h | 12 +++--
10 files changed, 104 insertions(+), 113 deletions(-)
--
2.19.1
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 05c72e77ccda89ff624108b1b59a0fc43843f343 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Tue, 17 Jul 2018 20:42:14 +0300
Subject: [PATCH] drm/i915: Nuke the LVDS lid notifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We broke the LVDS notifier resume thing in (presumably) commit
e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") as
we no longer duplicate the current state in the LVDS notifier and
thus we never resume it properly either.
Instead of trying to fix it again let's just kill off the lid
notifier entirely. None of the machines tested thus far have
apparently needed it. Originally the lid notifier was added to
work around cases where the VBIOS was clobbering some of the
hardware state behind the driver's back, mostly on Thinkpads.
We now have a few report of Thinkpads working just fine without
the notifier. So maybe it was misdiagnosed originally, or
something else has changed (ACPI video stuff perhaps?).
If we do end up finding a machine where the VBIOS is still causing
problems I would suggest that we first try setting various bits in
the VBIOS scratch registers. There are several to choose from that
may instruct the VBIOS to steer clear.
With the notifier gone we'll also stop looking at the panel status
in ->detect().
v2: Nuke enum modeset_restore (Rodrigo)
Cc: stable(a)vger.kernel.org
Cc: Wolfgang Draxinger <wdraxinger.maillist(a)draxit.de>
Cc: Vito Caputo <vcaputo(a)pengaru.com>
Cc: kitsunyan <kitsunyan(a)airmail.cc>
Cc: Joonas Saarinen <jza(a)saunalahti.fi>
Tested-by: Vito Caputo <vcaputo(a)pengaru.com> # Thinkapd X61s
Tested-by: kitsunyan <kitsunyan(a)airmail.cc> # ThinkPad X200
Tested-by: Joonas Saarinen <jza(a)saunalahti.fi> # Fujitsu Siemens U9210
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105902
References: https://lists.freedesktop.org/archives/intel-gfx/2018-June/169315.html
References: https://bugs.freedesktop.org/show_bug.cgi?id=21230
Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.")
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180717174216.22252-1-ville.…
Reviewed-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 337b1aad5212..343e79a44abd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -900,7 +900,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
spin_lock_init(&dev_priv->uncore.lock);
mutex_init(&dev_priv->sb_lock);
- mutex_init(&dev_priv->modeset_restore_lock);
mutex_init(&dev_priv->av_mutex);
mutex_init(&dev_priv->wm.wm_mutex);
mutex_init(&dev_priv->pps_mutex);
@@ -1570,11 +1569,6 @@ static int i915_drm_suspend(struct drm_device *dev)
struct pci_dev *pdev = dev_priv->drm.pdev;
pci_power_t opregion_target_state;
- /* ignore lid events during suspend */
- mutex_lock(&dev_priv->modeset_restore_lock);
- dev_priv->modeset_restore = MODESET_SUSPENDED;
- mutex_unlock(&dev_priv->modeset_restore_lock);
-
disable_rpm_wakeref_asserts(dev_priv);
/* We do a lot of poking in a lot of registers, make sure they work
@@ -1770,10 +1764,6 @@ static int i915_drm_resume(struct drm_device *dev)
intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false);
- mutex_lock(&dev_priv->modeset_restore_lock);
- dev_priv->modeset_restore = MODESET_DONE;
- mutex_unlock(&dev_priv->modeset_restore_lock);
-
intel_opregion_notify_adapter(dev_priv, PCI_D0);
enable_rpm_wakeref_asserts(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08d4303abb14..995656f51b57 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1002,12 +1002,6 @@ struct i915_gem_mm {
#define I915_ENGINE_WEDGED_TIMEOUT (60 * HZ) /* Reset but no recovery? */
-enum modeset_restore {
- MODESET_ON_LID_OPEN,
- MODESET_DONE,
- MODESET_SUSPENDED,
-};
-
#define DP_AUX_A 0x40
#define DP_AUX_B 0x10
#define DP_AUX_C 0x20
@@ -1730,8 +1724,6 @@ struct drm_i915_private {
unsigned long quirks;
- enum modeset_restore modeset_restore;
- struct mutex modeset_restore_lock;
struct drm_atomic_state *modeset_restore_state;
struct drm_modeset_acquire_ctx reset_ctx;
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index bb06744d28a4..a35404119257 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -44,8 +44,6 @@
/* Private structure for the integrated LVDS support */
struct intel_lvds_connector {
struct intel_connector base;
-
- struct notifier_block lid_notifier;
};
struct intel_lvds_pps {
@@ -452,26 +450,9 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder,
return true;
}
-/*
- * Detect the LVDS connection.
- *
- * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means
- * connected and closed means disconnected. We also send hotplug events as
- * needed, using lid status notification from the input layer.
- */
static enum drm_connector_status
intel_lvds_detect(struct drm_connector *connector, bool force)
{
- struct drm_i915_private *dev_priv = to_i915(connector->dev);
- enum drm_connector_status status;
-
- DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
- connector->base.id, connector->name);
-
- status = intel_panel_detect(dev_priv);
- if (status != connector_status_unknown)
- return status;
-
return connector_status_connected;
}
@@ -496,117 +477,6 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
return 1;
}
-static int intel_no_modeset_on_lid_dmi_callback(const struct dmi_system_id *id)
-{
- DRM_INFO("Skipping forced modeset for %s\n", id->ident);
- return 1;
-}
-
-/* The GPU hangs up on these systems if modeset is performed on LID open */
-static const struct dmi_system_id intel_no_modeset_on_lid[] = {
- {
- .callback = intel_no_modeset_on_lid_dmi_callback,
- .ident = "Toshiba Tecra A11",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
- DMI_MATCH(DMI_PRODUCT_NAME, "TECRA A11"),
- },
- },
-
- { } /* terminating entry */
-};
-
-/*
- * Lid events. Note the use of 'modeset':
- * - we set it to MODESET_ON_LID_OPEN on lid close,
- * and set it to MODESET_DONE on open
- * - we use it as a "only once" bit (ie we ignore
- * duplicate events where it was already properly set)
- * - the suspend/resume paths will set it to
- * MODESET_SUSPENDED and ignore the lid open event,
- * because they restore the mode ("lid open").
- */
-static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
- void *unused)
-{
- struct intel_lvds_connector *lvds_connector =
- container_of(nb, struct intel_lvds_connector, lid_notifier);
- struct drm_connector *connector = &lvds_connector->base.base;
- struct drm_device *dev = connector->dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
-
- if (dev->switch_power_state != DRM_SWITCH_POWER_ON)
- return NOTIFY_OK;
-
- mutex_lock(&dev_priv->modeset_restore_lock);
- if (dev_priv->modeset_restore == MODESET_SUSPENDED)
- goto exit;
- /*
- * check and update the status of LVDS connector after receiving
- * the LID nofication event.
- */
- connector->status = connector->funcs->detect(connector, false);
-
- /* Don't force modeset on machines where it causes a GPU lockup */
- if (dmi_check_system(intel_no_modeset_on_lid))
- goto exit;
- if (!acpi_lid_open()) {
- /* do modeset on next lid open event */
- dev_priv->modeset_restore = MODESET_ON_LID_OPEN;
- goto exit;
- }
-
- if (dev_priv->modeset_restore == MODESET_DONE)
- goto exit;
-
- /*
- * Some old platform's BIOS love to wreak havoc while the lid is closed.
- * We try to detect this here and undo any damage. The split for PCH
- * platforms is rather conservative and a bit arbitrary expect that on
- * those platforms VGA disabling requires actual legacy VGA I/O access,
- * and as part of the cleanup in the hw state restore we also redisable
- * the vga plane.
- */
- if (!HAS_PCH_SPLIT(dev_priv))
- intel_display_resume(dev);
-
- dev_priv->modeset_restore = MODESET_DONE;
-
-exit:
- mutex_unlock(&dev_priv->modeset_restore_lock);
- return NOTIFY_OK;
-}
-
-static int
-intel_lvds_connector_register(struct drm_connector *connector)
-{
- struct intel_lvds_connector *lvds = to_lvds_connector(connector);
- int ret;
-
- ret = intel_connector_register(connector);
- if (ret)
- return ret;
-
- lvds->lid_notifier.notifier_call = intel_lid_notify;
- if (acpi_lid_notifier_register(&lvds->lid_notifier)) {
- DRM_DEBUG_KMS("lid notifier registration failed\n");
- lvds->lid_notifier.notifier_call = NULL;
- }
-
- return 0;
-}
-
-static void
-intel_lvds_connector_unregister(struct drm_connector *connector)
-{
- struct intel_lvds_connector *lvds = to_lvds_connector(connector);
-
- if (lvds->lid_notifier.notifier_call)
- acpi_lid_notifier_unregister(&lvds->lid_notifier);
-
- intel_connector_unregister(connector);
-}
-
/**
* intel_lvds_destroy - unregister and free LVDS structures
* @connector: connector to free
@@ -639,8 +509,8 @@ static const struct drm_connector_funcs intel_lvds_connector_funcs = {
.fill_modes = drm_helper_probe_single_connector_modes,
.atomic_get_property = intel_digital_connector_atomic_get_property,
.atomic_set_property = intel_digital_connector_atomic_set_property,
- .late_register = intel_lvds_connector_register,
- .early_unregister = intel_lvds_connector_unregister,
+ .late_register = intel_connector_register,
+ .early_unregister = intel_connector_unregister,
.destroy = intel_lvds_destroy,
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
.atomic_duplicate_state = intel_digital_connector_duplicate_state,
@@ -1114,8 +984,6 @@ void intel_lvds_init(struct drm_i915_private *dev_priv)
* 2) check for VBT data
* 3) check to see if LVDS is already on
* if none of the above, no panel
- * 4) make sure lid is open
- * if closed, act like it's not there for now
*/
/*
This is a note to let you know that I've just added the patch titled
usbip:vudc: BUG kmalloc-2048 (Not tainted): Poison overwritten
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the usb-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
>From 55a9300cea58007741f7d6b4b132e37d84a4329e Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah(a)kernel.org>
Date: Thu, 18 Oct 2018 10:19:29 -0600
Subject: usbip:vudc: BUG kmalloc-2048 (Not tainted): Poison overwritten
In rmmod path, usbip_vudc does platform_device_put() twice once from
platform_device_unregister() and then from put_vudc_device().
The second put results in:
BUG kmalloc-2048 (Not tainted): Poison overwritten error or
BUG: KASAN: use-after-free in kobject_put+0x1e/0x230 if KASAN is
enabled.
[ 169.042156] calling init+0x0/0x1000 [usbip_vudc] @ 1697
[ 169.042396] =============================================================================
[ 169.043678] probe of usbip-vudc.0 returned 1 after 350 usecs
[ 169.044508] BUG kmalloc-2048 (Not tainted): Poison overwritten
[ 169.044509] -----------------------------------------------------------------------------
...
[ 169.057849] INFO: Freed in device_release+0x2b/0x80 age=4223 cpu=3 pid=1693
[ 169.057852] kobject_put+0x86/0x1b0
[ 169.057853] 0xffffffffc0c30a96
[ 169.057855] __x64_sys_delete_module+0x157/0x240
Fix it to call platform_device_del() instead and let put_vudc_device() do
the platform_device_put().
Reported-by: Randy Dunlap <rdunlap(a)infradead.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/usbip/vudc_main.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/usbip/vudc_main.c b/drivers/usb/usbip/vudc_main.c
index 3fc22037a82f..390733e6937e 100644
--- a/drivers/usb/usbip/vudc_main.c
+++ b/drivers/usb/usbip/vudc_main.c
@@ -73,6 +73,10 @@ static int __init init(void)
cleanup:
list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) {
list_del(&udc_dev->dev_entry);
+ /*
+ * Just do platform_device_del() here, put_vudc_device()
+ * calls the platform_device_put()
+ */
platform_device_del(udc_dev->pdev);
put_vudc_device(udc_dev);
}
@@ -89,7 +93,11 @@ static void __exit cleanup(void)
list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) {
list_del(&udc_dev->dev_entry);
- platform_device_unregister(udc_dev->pdev);
+ /*
+ * Just do platform_device_del() here, put_vudc_device()
+ * calls the platform_device_put()
+ */
+ platform_device_del(udc_dev->pdev);
put_vudc_device(udc_dev);
}
platform_driver_unregister(&vudc_driver);
--
2.19.1
Took the set of patches from 4.19 to handle IP fragmentation DoS
and applied them against 4.14.69. Most of these are from Eric.
In a couple case, it required some manual merge conflict resolution.
Tested normal IP fragmentation with iperf3 and malicious IP fragments
with fragmentsmack. Under fragmentation attack (700Kpps) the original
4.14.69 consumes 97% CPU; with this patch it drops to 5%.
v3 - send to wider audience
v2 - added patch from 4.19 linux-next to fix ip fragmentation crash
Dan Carpenter (1):
ipv4: frags: precedence bug in ip_expire()
Eric Dumazet (22):
inet: frags: change inet_frags_init_net() return value
inet: frags: add a pointer to struct netns_frags
inet: frags: refactor ipfrag_init()
inet: frags: refactor ipv6_frag_init()
inet: frags: refactor lowpan_net_frag_init()
ipv6: export ip6 fragments sysctl to unprivileged users
rhashtable: add schedule points
inet: frags: use rhashtables for reassembly units
inet: frags: remove some helpers
inet: frags: get rif of inet_frag_evicting()
inet: frags: remove inet_frag_maybe_warn_overflow()
inet: frags: break the 2GB limit for frags storage
inet: frags: do not clone skb in ip_expire()
ipv6: frags: rewrite ip6_expire_frag_queue()
rhashtable: reorganize struct rhashtable layout
inet: frags: reorganize struct netns_frags
inet: frags: get rid of ipfrag_skb_cb/FRAG_CB
inet: frags: fix ip6frag_low_thresh boundary
net: speed up skb_rbtree_purge()
net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends
net: add rb_to_skb() and other rb tree helpers
net: sk_buff rbnode reorg
Florian Westphal (1):
ipv6: defrag: drop non-last frags smaller than min mtu
Kees Cook (1):
inet: frags: Convert timers to use timer_setup()
Peter Oskolkov (4):
ip: discard IPv4 datagrams with overlapping segments.
net: modify skb_rbtree_purge to return the truesize of all purged
skbs.
ip: add helpers to process in-order fragments faster.
ip: process in-order fragments efficiently
Taehee Yoo (1):
ip: frags: fix crash in ip_do_fragment()
Documentation/networking/ip-sysctl.txt | 13 +-
include/linux/rhashtable.h | 8 +-
include/linux/skbuff.h | 50 +-
include/net/inet_frag.h | 135 +++---
include/net/ip.h | 1 -
include/net/ipv6.h | 26 +-
include/uapi/linux/snmp.h | 1 +
lib/rhashtable.c | 2 +
net/core/skbuff.c | 31 +-
net/ieee802154/6lowpan/6lowpan_i.h | 26 +-
net/ieee802154/6lowpan/reassembly.c | 153 ++++---
net/ipv4/inet_fragment.c | 378 ++++------------
net/ipv4/ip_fragment.c | 578 +++++++++++++-----------
net/ipv4/proc.c | 7 +-
net/ipv4/tcp_fastopen.c | 8 +-
net/ipv4/tcp_input.c | 33 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 105 ++---
net/ipv6/proc.c | 5 +-
net/ipv6/reassembly.c | 217 ++++-----
net/sched/sch_netem.c | 14 +-
20 files changed, 802 insertions(+), 989 deletions(-)
--
2.18.0
On Wed, Oct 17, 2018 at 5:55 PM Natanael Copa <ncopa(a)alpinelinux.org> wrote:
>
> On Wed, 17 Oct 2018 16:59:15 +0200
> Benjamin Tissoires <benjamin.tissoires(a)redhat.com> wrote:
>
> > Hi Natanael,
> >
> > On Wed, Oct 17, 2018 at 4:52 PM Natanael Copa <ncopa(a)alpinelinux.org> wrote:
> > >
> > > Commit ee3454924370 ("HID: add support for Apple Magic Keyboards") added
> > > support for the Magic Keyboard over Bluetooth, but did not add the
> > > BT_VENDOR_ID_APPLE to hid-quirks. Fix this so hid-apple driver is used
> > > over hid-generic.
> > >
> > > This fixes the Fn key, which does not work at all with hid-generic.
> > >
> > > Fixes: ee3454924370 ("HID: add support for Apple Magic Keyboards")
> > > Bugzilla-id: https://bugzilla.kernel.org/show_bug.cgi?id=99881
> > > Signed-off-by: Natanael Copa <ncopa(a)alpinelinux.org>
> > > ---
> > > This should be backported to stable too.
> > >
> > > drivers/hid/hid-quirks.c | 3 +++
> > > 1 file changed, 3 insertions(+)
> > >
> > > diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
> > > index 249d49b6b16c..a3b3aecf8628 100644
> > > --- a/drivers/hid/hid-quirks.c
> > > +++ b/drivers/hid/hid-quirks.c
> > > @@ -270,6 +270,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
> > > { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO) },
> > > { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS) },
> > > { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) },
> > > + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) },
> > > + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI) },
> > > + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI) },
> >
> > NACK, this should not be required with kernels v4.17+ IIRC.
> >
> > If it doesn't work on a recent kernel, please raise the issue, but I
> > am actually chasing down the new inclusions of these when we add new
> > device support.
>
> Fair enough. I think it may be needed for 4.14.y kernels though, to fix
> commit b6cc0ba2cbf4 (HID: add support for Apple Magic Keyboards).
>
> Fn key did not work without this patch on 4.14.76 for me.
Right, b6cc0ba2cbf4 has been added to 4.14.75 and is not working
because tweaking hid_have_special_driver[] is not required in current
kernels anymore.
@stable folks, would it be possible to take this patch in the v4.9 and
v4.14 trees? It can't go into Linus' tree, but I'd be glad to give my
Acked-by for a stable backport.
>
> > There is even a high chance that we remove the list entirely as this
> > would tremendously help the distributions to just have to ship
> > hid-generic in the initramfs instead of a bunch of random hid drivers.
>
> I doubt that distros will want Bluetooth keyboards there though. (which
> this is about)
I was talking more generally, killing this list of devices, as some
are keyboard and useful, and some are not needed as you say. But the
point is that distro folks won't have to decide which module to ship:
only hid-generic will be sufficient.
Cheers,
Benjamin
>
> > By the way, if the driver is not autoloaded by udev, it is a problem
> > in udev likely.
> >
> > Cheers,
> > Benjamin
> >
> > > { HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
> > > USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
> > > { HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
> > > USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, #endif --
> > > 2.19.1
> > >
>
blk_queue_split() does respect this limit via bio splitting, so no
need to do that in blkdev_issue_discard(), then we can align to
normal bio submit(bio_add_page() & submit_bio()).
More importantly, this patch fixes one issue introduced in a22c4d7e34402cc
("block: re-add discard_granularity and alignment checks"), in which
zero discard bio may be generated in case of zero alignment.
Fixes: a22c4d7e34402ccdf3 ("block: re-add discard_granularity and alignment checks")
Cc: stable(a)vger.kernel.org
Cc: Mariusz Dabrowski <mariusz.dabrowski(a)intel.com>
Cc: Ming Lin <ming.l(a)ssi.samsung.com>
Cc: Mike Snitzer <snitzer(a)redhat.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Xiao Ni <xni(a)redhat.com>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-lib.c | 28 ++--------------------------
1 file changed, 2 insertions(+), 26 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index d1b9dd03da25..bbd44666f2b5 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -29,9 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
{
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
- unsigned int granularity;
unsigned int op;
- int alignment;
sector_t bs_mask;
if (!q)
@@ -54,38 +52,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
- /* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
- alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
while (nr_sects) {
- unsigned int req_sects;
- sector_t end_sect, tmp;
+ unsigned int req_sects = nr_sects;
+ sector_t end_sect;
- /*
- * Issue in chunks of the user defined max discard setting,
- * ensuring that bi_size doesn't overflow
- */
- req_sects = min_t(sector_t, nr_sects,
- q->limits.max_discard_sectors);
if (!req_sects)
goto fail;
if (req_sects > UINT_MAX >> 9)
req_sects = UINT_MAX >> 9;
- /*
- * If splitting a request, and the next starting sector would be
- * misaligned, stop the discard at the previous aligned sector.
- */
end_sect = sector + req_sects;
- tmp = end_sect;
- if (req_sects < nr_sects &&
- sector_div(tmp, granularity) != alignment) {
- end_sect = end_sect - alignment;
- sector_div(end_sect, granularity);
- end_sect = end_sect * granularity + alignment;
- req_sects = end_sect - sector;
- }
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
--
2.9.5
Fix synthetic event to allow independent semicolon at end.
The synthetic_events interface accepts a semicolon after the
last word if there is no space.
# echo "myevent u64 var;" >> synthetic_events
But if there is a space, it returns an error.
# echo "myevent u64 var ;" > synthetic_events
sh: write error: Invalid argument
This behavior is difficult for users to understand. Let's
allow the last independent semicolon too.
Fixes: commit 4b147936fa50 ("tracing: Add support for 'synthetic' events")
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_hist.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6ff83941065a..d239004aaf29 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1088,7 +1088,7 @@ static int create_synth_event(int argc, char **argv)
i += consumed - 1;
}
- if (i < argc) {
+ if (i < argc && strcmp(argv[i], ";") != 0) {
ret = -EINVAL;
goto err;
}
Hello,
> -----Original Message-----
> From: Rob Herring [mailto:robh@kernel.org]
> Sent: Friday, September 14, 2018 12:04 AM
> To: Alexey.Brodkin(a)synopsys.com
> Cc: linux-snps-arc(a)lists.infradead.org; Linux Kernel Mailing List <linux-kernel(a)vger.kernel.org>; Vineet.Gupta1(a)synopsys.com
> Subject: Re: [PATCH] ARC: Get rid of toolchain check
>
> On Thu, Sep 13, 2018 at 3:24 PM Alexey Brodkin
> <Alexey.Brodkin(a)synopsys.com> wrote:
> >
> > This check is very naive: we simply test if GCC invoked without
> > "-mcpu=XXX" has ARC700 define set. In that case we think that GCC
> > was built with "--with-cpu=arc700" and has libgcc built for ARC700.
> >
> > Otherwise if ARC700 is not defined we think that everythng was built
> > for ARCv2.
> >
> > But in reality our life is much more interesting.
> >
> > 1. Regardless of GCC configuration (i.e. what we pass in "--with-cpu"
> > it may generate code for any ARC core).
> >
> > 2. libgcc might be built with explicitly specified "--mcpu=YYY"
> >
> > That's exactly what happens in case of multilibbed toolchains:
> > - GCC is configured with default settings
> > - All the libs built for many different CPU flavors
> >
> > I.e. that check gets in the way of usage of multilibbed
> > toolchains. And even non-multilibbed toolchains are affected.
> > OpenEmbedded also builds GCC without "--with-cpu" because
> > each and every target component later is compiled with explicitly
> > set "-mcpu=ZZZ".
> >
> > Signed-off-by: Alexey Brodkin <abrodkin(a)synopsys.com>
> > ---
> > arch/arc/Makefile | 14 --------------
> > 1 file changed, 14 deletions(-)
>
> +1 for this. Removing it also helps with my work to be able to build
> all the .dts files with only a host compiler. That also needs the hunk
> setting CROSS_COMPILE removed and not having a built-in dtb by
> default, but this is a step in the right direction.
>
> Acked-by: Rob Herring <robh(a)kernel.org>
May we get this one back-ported to stable trees?
Upstream commit in Linus' tree is
615f64458ad8 ("ARC: build: Get rid of toolchain check").
This fixes kernel configuration for ARC in case of missing
ARC cross-tools in current PATH.
-Alexey
Hello,
> -----Original Message-----
> From: Alexey Brodkin [mailto:abrodkin@synopsys.com]
> Sent: Sunday, September 16, 2018 11:48 PM
> To: linux-snps-arc(a)lists.infradead.org
> Cc: linux-kernel(a)vger.kernel.org; Vineet Gupta <vgupta(a)synopsys.com>; Alexey Brodkin <abrodkin(a)synopsys.com>; Masahiro
> Yamada <yamada.masahiro(a)socionext.com>; Rob Herring <robh(a)kernel.org>
> Subject: [PATCH] ARC: Don't set CROSS_COMPILE in arch's Makefile
>
> There's not much sense in doing that because if user or
> his build-system didn't set CROSS_COMPILE we still may
> very well make incorrect guess.
>
> But as it turned out setting CROSS_COMPILE is not as harmless
> as one may think: with recent changes that implemented automatic
> discovery of __host__ gcc features unconditional setup of
> CROSS_COMPILE leads to failures on execution of "make xxx_defconfig"
> with absent cross-compiler, for more info see [1].
>
> Set CROSS_COMPILE as well gets in the way if we want only to build
> .dtb's (again with absent cross-compiler which is not really needed
> for building .dtb's), see [2].
>
> Note, we had to change LIBGCC assignment type from ":=" to "="
> so that is is resolved on its usage, otherwise if it is resolved
> at declaration time with missing CROSS_COMPILE we're getting this
> error message from host GCC:
> ------------------------->8-------------------------
> gcc: error: unrecognized command line option ‘-mmedium-calls’
> gcc: error: unrecognized command line option ‘-mno-sdata’; did you mean ‘-fno-stats’?
> ------------------------->8-------------------------
>
> [1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-September/004308.h…
> [2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-September/004320.h…
>
> Signed-off-by: Alexey Brodkin <abrodkin(a)synopsys.com>
> Cc: Masahiro Yamada <yamada.masahiro(a)socionext.com>
> Cc: Rob Herring <robh(a)kernel.org>
> ---
> arch/arc/Makefile | 10 +---------
> 1 file changed, 1 insertion(+), 9 deletions(-)
>
> diff --git a/arch/arc/Makefile b/arch/arc/Makefile
> index 99cce77ab98f..5f6b67917dc2 100644
> --- a/arch/arc/Makefile
> +++ b/arch/arc/Makefile
> @@ -6,14 +6,6 @@
> # published by the Free Software Foundation.
> #
>
> -ifeq ($(CROSS_COMPILE),)
> -ifndef CONFIG_CPU_BIG_ENDIAN
> -CROSS_COMPILE := arc-linux-
> -else
> -CROSS_COMPILE := arceb-linux-
> -endif
> -endif
> -
> KBUILD_DEFCONFIG := nsim_700_defconfig
>
> cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
> @@ -79,7 +71,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp
> cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian
> ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB
>
> -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
> +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
>
> # Modules with short calls might break for calls into builtin-kernel
> KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode
> --
> 2.17.1
May we have this one back-ported to stable branches?
Upstream commit in Linus' tree is:
40660f1fcee8 ("ARC: build: Don't set CROSS_COMPILE in arch's Makefile").
Regards,
Alexey
Hi Greg,
Could you enqueue the following patch for -stable 4.9.x?
commit ab6dd1beac7be3c17f8bf3d38bdf29ecb7293f1e
Author: Xin Long <lucien.xin(a)gmail.com>
Date: Thu Aug 10 10:22:24 2017 +0800
netfilter: check for seqadj ext existence before adding it in nf_nat_setup_info
Cc'ing Laura, combining SNAT+DNAT+ftp helper is currently broken with
4.9.x. The patch above cures the issues.
Thanks.
Fix synthetic event to allow independent semicolon at end.
The synthetic_events interface accepts a semicolon after the
last word if there is no space.
# echo "myevent u64 var;" >> synthetic_events
But if there is a space, it returns an error.
# echo "myevent u64 var ;" > synthetic_events
sh: write error: Invalid argument
This behavior is difficult for users to understand. Let's
allow the last independent semicolon too.
Fixes: commit 4b147936fa50 ("tracing: Add support for 'synthetic' events")
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_hist.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6ff83941065a..d239004aaf29 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1088,7 +1088,7 @@ static int create_synth_event(int argc, char **argv)
i += consumed - 1;
}
- if (i < argc) {
+ if (i < argc && strcmp(argv[i], ";") != 0) {
ret = -EINVAL;
goto err;
}
We are an image team who can process 400+ images each day.
If you need any image editing service, please let us know.
Image cut out and clipping path, masking.
Such as for ecommerce photos, jewelry photos retouching, beauty and skin
images
and wedding photos.
We give test editing for your photos if you send us some.
Thanks,
Nancy
Attached are another couple of miscellaneous fixes for FS-Cache and
CacheFiles:
(1) Fix a race between object burial in cachefiles and external rmdir.
(2) Fix a race from a split atomic op.
(3) Fix incomplete initialisation of cookie key space.
(4) Fix out-of-bounds read.
The patches are tagged here:
git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git
fscache-fixes-20181017
and can also be found on the following branch:
http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=fs…
David
---
Al Viro (1):
cachefiles: fix the race between cachefiles_bury_object() and rmdir(2)
David Howells (1):
fscache: Fix incomplete initialisation of inline key space
Eric Sandeen (1):
fscache: Fix out of bound read in long cookie keys
kiran.modukuri (1):
fscache: Fix race in fscache_op_complete() due to split atomic_sub & read
fs/cachefiles/namei.c | 2 +-
fs/fscache/cookie.c | 31 ++++++++++---------------------
fs/fscache/internal.h | 1 -
fs/fscache/main.c | 4 +---
include/linux/fscache-cache.h | 4 ++--
5 files changed, 14 insertions(+), 28 deletions(-)
The calculated ideal rate can easily overflow an unsigned long, thus
making the best div selection buggy as soon as no ideal match is found
before the overflow occurs.
Fixes: 4731a72df273 ("drm/sun4i: request exact rates to our parents")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon(a)bootlin.com>
---
drivers/gpu/drm/sun4i/sun4i_dotclock.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index e36004fbe453..82132a9bd1d5 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -81,9 +81,12 @@ static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
int i;
for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) {
- unsigned long ideal = rate * i;
+ u64 ideal = (u64)rate * i;
unsigned long rounded;
+ if (ideal > ULONG_MAX)
+ break;
+
rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
ideal);
--
2.14.1
On Thu, Oct 18, 2018 at 08:51:46AM +0000, David Gounaris wrote:
> Hi, I can also confirm that it works after cherry-picking the proposed commit.
>
> Reported-and-tested-by: David Gounaris <david.gounaris(a)infinera.com<mailto:David.Gounaris@infinera.com>>
>
Now queued up, thanks.
greg k-h
AML opcodes come in two lengths: 1-byte opcodes and 2-byte, extended opcodes.
If an error occurs due to illegal opcodes during table load, the AML parser
needs to continue loading the table. In order to do this, it needs to skip
parsing of the offending opcode and operands associated with that opcode.
This change fixes the AML parse loop to correctly skip parsing of incorrect
extended opcodes. Previously, only the short opcodes were skipped correctly.
Signed-off-by: Erik Schmauss <erik.schmauss(a)intel.com>
---
drivers/acpi/acpica/psloop.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 34fc2f7476ed..b0789c483b0f 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -417,6 +417,7 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
union acpi_parse_object *op = NULL; /* current op */
struct acpi_parse_state *parser_state;
u8 *aml_op_start = NULL;
+ u8 opcode_length;
ACPI_FUNCTION_TRACE_PTR(ps_parse_loop, walk_state);
@@ -540,8 +541,19 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
"Skip parsing opcode %s",
acpi_ps_get_opcode_name
(walk_state->opcode)));
+
+ /*
+ * Determine the opcode length before skipping the opcode.
+ * An opcode can be 1 byte or 2 bytes in length.
+ */
+ opcode_length = 1;
+ if ((walk_state->opcode & 0xFF00) ==
+ AML_EXTENDED_OPCODE) {
+ opcode_length = 2;
+ }
walk_state->parser_state.aml =
- walk_state->aml + 1;
+ walk_state->aml + opcode_length;
+
walk_state->parser_state.aml =
acpi_ps_get_next_package_end
(&walk_state->parser_state);
--
2.17.1
The table load process omitted adding the operation region address
range to the global list. This omission is problematic because the OS
queries the global list to check for address range conflicts before
deciding which drivers to load. This commit may result in warning
messages that look like the following:
[ 7.871761] ACPI Warning: system_IO range 0x00000428-0x0000042F conflicts with op_region 0x00000400-0x0000047F (\PMIO) (20180531/utaddress-213)
[ 7.871769] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
However, these messages do not signify regressions. It is a result of
properly adding address ranges within the global address list.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200011
Tested-by: Jean-Marc Lenoir <archlinux(a)jihemel.com>
Signed-off-by: Erik Schmauss <erik.schmauss(a)intel.com>
---
drivers/acpi/acpica/dsopcode.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index e9fb0bf3c8d2..78f9de260d5f 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -417,6 +417,10 @@ acpi_ds_eval_region_operands(struct acpi_walk_state *walk_state,
ACPI_FORMAT_UINT64(obj_desc->region.address),
obj_desc->region.length));
+ status = acpi_ut_add_address_range(obj_desc->region.space_id,
+ obj_desc->region.address,
+ obj_desc->region.length, node);
+
/* Now the address and length are valid for this opregion */
obj_desc->region.flags |= AOPOBJ_DATA_VALID;
--
2.17.1
Hi,
I can now confirm that the boot failure is due to the absence of commit
8183d99f4a22 ("powerpc/lib/feature-fixups: use raw_patch_instruction()")
Greg, could you please apply that patch to 4.14 stable ?
Thanks
Christophe
Le 17/10/2018 à 18:36, Christophe LEROY a écrit :
> Hi,
>
> Yes I discovered the same issue today on MPC8321E, I plan to look at it
> more closely tomorrow morning (Paris Time).
>
> I think we are missing commit 8183d99f4a22c2abbc543847a588df3666ef0c0c ,
> I didn't realise it when we applied the serie to 4.14,
> patch_instruction() is called too early without that patch.
>
> If you have opportunity to test now, you are welcome, otherwise I'll
> test it tomorrow.
>
> Christophe
>
> Le 17/10/2018 à 17:18, David Gounaris a écrit :
>> Hello, I got into troubles when I upgraded to Linux kernel 4.14.76 on
>> boards with MPC8321.
>>
>>
>> The symptom that I see is that the boot process gets cyclic, and no
>> printouts are seen from the Linux kernel. It seems like it resets.
>>
>>
>> When I revert the following commits it works again.
>>
>> af1a8101794dfea897290e057f61086dabfe6c91, powerpc/lib: fix book3s/32
>> boot failure due to code patching
>> 609fbeddb24c4035d24fc32d82dc08b30ae3dfc0, powerpc: Avoid code patching
>> freed init sections
>>
>> Any ideas of how to continue?
>>
>> BR / David Gounaris
>>
>>
>>
Although the power management code never calls the system-wide and runtime
suspend callbacks concurrently, runtime power state changes can happen
while the system is being suspended or resumed. See also the dpm_suspend()
and dpm_resume() calls in hibernation_snapshot(). Make sure the sd driver
supports this. This patch avoids that the following call trace is reported
during system-wide suspend:
WARNING: CPU: 0 PID: 701 at drivers/scsi/scsi_lib.c:3047 scsi_device_quiesce+0x4b/0xd0
Workqueue: events_unbound async_run_entry_fn
RIP: 0010:scsi_device_quiesce+0x4b/0xd0
Call Trace:
scsi_bus_suspend_common+0x71/0xe0
scsi_bus_freeze+0x15/0x20
dpm_run_callback+0x88/0x360
__device_suspend+0x1c4/0x840
async_suspend+0x1f/0xb0
async_run_entry_fn+0x6e/0x2c0
process_one_work+0x4ae/0xa20
worker_thread+0x63/0x5a0
kthread+0x1cf/0x1f0
ret_from_fork+0x24/0x30
Fixes: cd84a62e0078 ("block, scsi: Change the preempt-only flag into a counter")
Cc: Lee Duncan <lduncan(a)suse.com>
Cc: Hannes Reinecke <hare(a)suse.com>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Johannes Thumshirn <jthumshirn(a)suse.de>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
drivers/scsi/scsi_lib.c | 15 ++++++---------
include/scsi/scsi_device.h | 1 -
2 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7db3c5fae469..6c18a61176e5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -3052,11 +3052,12 @@ scsi_device_quiesce(struct scsi_device *sdev)
int err;
/*
- * It is allowed to call scsi_device_quiesce() multiple times from
- * the same context but concurrent scsi_device_quiesce() calls are
- * not allowed.
+ * Since all scsi_device_quiesce() and scsi_device_resume() calls
+ * are serialized it is safe here to check the device state without
+ * holding the SCSI device state mutex.
*/
- WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
+ if (sdev->sdev_state == SDEV_QUIESCE)
+ return 0;
blk_set_preempt_only(q);
@@ -3072,9 +3073,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
mutex_lock(&sdev->state_mutex);
err = scsi_device_set_state(sdev, SDEV_QUIESCE);
- if (err == 0)
- sdev->quiesced_by = current;
- else
+ if (err)
blk_clear_preempt_only(q);
mutex_unlock(&sdev->state_mutex);
@@ -3098,8 +3097,6 @@ void scsi_device_resume(struct scsi_device *sdev)
* device deleted during suspend)
*/
mutex_lock(&sdev->state_mutex);
- WARN_ON_ONCE(!sdev->quiesced_by);
- sdev->quiesced_by = NULL;
blk_clear_preempt_only(sdev->request_queue);
if (sdev->sdev_state == SDEV_QUIESCE)
scsi_device_set_state(sdev, SDEV_RUNNING);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 202f4d6a4342..ef86c8adc5d5 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -226,7 +226,6 @@ struct scsi_device {
unsigned char access_state;
struct mutex state_mutex;
enum scsi_device_state sdev_state;
- struct task_struct *quiesced_by;
unsigned long sdev_data[0];
} __attribute__((aligned(sizeof(unsigned long))));
--
2.19.1.568.g152ad8e336-goog
Although the power management code never calls the system-wide and runtime
suspend callbacks concurrently, runtime power state changes can happen
while the system is being suspended or resumed. See also the dpm_suspend()
and dpm_resume() calls in hibernation_snapshot(). Make sure the sd driver
supports this. This patch avoids that the following call trace is reported
during system-wide suspend:
WARNING: CPU: 0 PID: 701 at drivers/scsi/scsi_lib.c:3047 scsi_device_quiesce+0x4b/0xd0
Workqueue: events_unbound async_run_entry_fn
RIP: 0010:scsi_device_quiesce+0x4b/0xd0
Call Trace:
scsi_bus_suspend_common+0x71/0xe0
scsi_bus_freeze+0x15/0x20
dpm_run_callback+0x88/0x360
__device_suspend+0x1c4/0x840
async_suspend+0x1f/0xb0
async_run_entry_fn+0x6e/0x2c0
process_one_work+0x4ae/0xa20
worker_thread+0x63/0x5a0
kthread+0x1cf/0x1f0
ret_from_fork+0x24/0x30
Fixes: cd84a62e0078 ("block, scsi: Change the preempt-only flag into a counter")
Cc: Lee Duncan <lduncan(a)suse.com>
Cc: Hannes Reinecke <hare(a)suse.com>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Johannes Thumshirn <jthumshirn(a)suse.de>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
drivers/scsi/scsi_lib.c | 16 +++++-----------
include/scsi/scsi_device.h | 1 -
2 files changed, 5 insertions(+), 12 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 62348412ed1b..3106e910e766 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -3040,13 +3040,11 @@ scsi_device_quiesce(struct scsi_device *sdev)
int err;
/*
- * It is allowed to call scsi_device_quiesce() multiple times from
- * the same context but concurrent scsi_device_quiesce() calls are
- * not allowed.
+ * Since all scsi_device_quiesce() and scsi_device_resume() calls
+ * are serialized it is safe to check the device state without holding
+ * the SCSI device state mutex.
*/
- WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
-
- if (sdev->quiesced_by == current)
+ if (sdev->sdev_state == SDEV_QUIESCE)
return 0;
blk_set_pm_only(q);
@@ -3063,9 +3061,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
mutex_lock(&sdev->state_mutex);
err = scsi_device_set_state(sdev, SDEV_QUIESCE);
- if (err == 0)
- sdev->quiesced_by = current;
- else
+ if (err)
blk_clear_pm_only(q);
mutex_unlock(&sdev->state_mutex);
@@ -3089,8 +3085,6 @@ void scsi_device_resume(struct scsi_device *sdev)
* device deleted during suspend)
*/
mutex_lock(&sdev->state_mutex);
- WARN_ON_ONCE(!sdev->quiesced_by);
- sdev->quiesced_by = NULL;
blk_clear_pm_only(sdev->request_queue);
if (sdev->sdev_state == SDEV_QUIESCE)
scsi_device_set_state(sdev, SDEV_RUNNING);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 202f4d6a4342..ef86c8adc5d5 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -226,7 +226,6 @@ struct scsi_device {
unsigned char access_state;
struct mutex state_mutex;
enum scsi_device_state sdev_state;
- struct task_struct *quiesced_by;
unsigned long sdev_data[0];
} __attribute__((aligned(sizeof(unsigned long))));
--
2.19.1.568.g152ad8e336-goog
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
The preemptirq_delay_test module is used for the ftrace selftest code that
tests the latency tracers. The problem is that it uses ktime for the delay
loop, and then checks the tracer to see if the delay loop is caught, but the
tracer uses trace_clock_local() which uses various different other clocks to
measure the latency. As ktime uses the clock cycles, and the code then
converts that to nanoseconds, it causes rounding errors, and the preemptirq
latency tests are failing due to being off by 1 (it expects to see a delay
of 500000 us, but the delay is only 499999 us). This is happening due to a
rounding error in the ktime (which is totally legit). The purpose of the
test is to see if it can catch the delay, not to test the accuracy between
trace_clock_local() and ktime_get(). Best to use apples to apples, and have
the delay loop use the same clock as the latency tracer does.
Cc: stable(a)vger.kernel.org
Fixes: f96e8577da102 ("lib: Add module for testing preemptoff/irqsoff latency tracers")
Acked-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/preemptirq_delay_test.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index f704390db9fc..d8765c952fab 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -5,12 +5,12 @@
* Copyright (C) 2018 Joel Fernandes (Google) <joel(a)joelfernandes.org>
*/
+#include <linux/trace_clock.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
-#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/string.h>
@@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir
static void busy_wait(ulong time)
{
- ktime_t start, end;
- start = ktime_get();
+ u64 start, end;
+ start = trace_clock_local();
do {
- end = ktime_get();
+ end = trace_clock_local();
if (kthread_should_stop())
break;
- } while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000));
+ } while ((end - start) < (time * 1000));
}
static int preemptirq_delay_run(void *data)
--
2.19.0
Attached are another couple of miscellaneous fixes for FS-Cache and
CacheFiles:
(1) Fix a race between object burial in cachefiles and external rmdir.
(2) Fix a race from a split atomic op.
(3) Fix incomplete initialisation of cookie key space.
(4) Fix out-of-bounds read.
The patches are tagged here:
git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git
fscache-fixes-20181017
and can also be found on the following branch:
http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=fs…
David
---
Al Viro (1):
cachefiles: fix the race between cachefiles_bury_object() and rmdir(2)
David Howells (1):
fscache: Fix incomplete initialisation of inline key space
Eric Sandeen (1):
fscache: Fix out of bound read in long cookie keys
kiran.modukuri (1):
fscache: Fix race in fscache_op_complete() due to split atomic_sub & read
fs/cachefiles/namei.c | 2 +-
fs/fscache/cookie.c | 31 ++++++++++---------------------
fs/fscache/internal.h | 1 -
fs/fscache/main.c | 4 +---
include/linux/fscache-cache.h | 4 ++--
5 files changed, 14 insertions(+), 28 deletions(-)
We provide photoshop services to some of the companies from around the
world.
Some online stores use our services for retouching portraits, jewelry,
apparels, furnitures etc.
Here are the details of what we provide:
Clipping path
Deep etching
Image masking
Portrait retouching
Jewelry retouching
Fashion retouching
Please reply back for further info.
We can provide testing for your photos if needed.
Thanks,
Jenny
If BIOS configured a Y tiled FB we failed to set up the backing object
tiling accordingly, leading to a lack of GT fence installed and a
garbled console.
The problem was bisected to
commit 011f22eb545a ("drm/i915: Do NOT skip the first 4k of stolen memory for pre-allocated buffers v2")
but it just revealed a pre-existing issue.
Kudos to Ville who suspected a missing fence looking at the corruption
on the screen.
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Cc: Hans de Goede <hdegoede(a)redhat.com>
Cc: ronald(a)innovation.ch
Cc: <stable(a)vger.kernel.org>
Reported-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Reported-by: ronald(a)innovation.ch
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108264
Fixes: bc8d7dffacb1 ("drm/i915/skl: Provide a Skylake version of get_plane_config()")
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/i915/intel_display.c | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a2e729fa8d64..3d34b98c4634 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2674,6 +2674,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
if (size_aligned * 2 > dev_priv->stolen_usable_size)
return false;
+ switch (fb->modifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ case I915_FORMAT_MOD_X_TILED:
+ case I915_FORMAT_MOD_Y_TILED:
+ break;
+ default:
+ DRM_DEBUG_DRIVER("Unsupported modifier for initial FB: 0x%llx\n",
+ fb->modifier);
+ return false;
+ }
+
mutex_lock(&dev->struct_mutex);
obj = i915_gem_object_create_stolen_for_preallocated(dev_priv,
base_aligned,
@@ -2683,8 +2694,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
if (!obj)
return false;
- if (plane_config->tiling == I915_TILING_X)
- obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
+ switch (plane_config->tiling) {
+ case I915_TILING_NONE:
+ break;
+ case I915_TILING_X:
+ case I915_TILING_Y:
+ obj->tiling_and_stride = fb->pitches[0] | plane_config->tiling;
+ break;
+ default:
+ MISSING_CASE(plane_config->tiling);
+ return false;
+ }
mode_cmd.pixel_format = fb->format->format;
mode_cmd.width = fb->width;
@@ -8827,6 +8847,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
fb->modifier = I915_FORMAT_MOD_X_TILED;
break;
case PLANE_CTL_TILED_Y:
+ plane_config->tiling = I915_TILING_Y;
if (val & PLANE_CTL_RENDER_DECOMPRESSION_ENABLE)
fb->modifier = I915_FORMAT_MOD_Y_TILED_CCS;
else
--
2.13.2
Unfortunately, it appears our fix in:
commit b5d29843d8ef ("drm/atomic_helper: Allow DPMS On<->Off changes
for unregistered connectors")
Which attempted to work around the problems introduced by:
commit 4d80273976bf ("drm/atomic_helper: Disallow new modesets on
unregistered connectors")
Is still not the right solution, as modesets can still be triggered
outside of drm_atomic_set_crtc_for_connector().
So in order to fix this, while still being careful that we don't break
modesets that a driver may perform before being registered with
userspace, we replace connector->registered with a tristate member,
connector->registration_state. This allows us to keep track of whether
or not a connector is still initializing and hasn't been exposed to
userspace, is currently registered and exposed to userspace, or has been
legitimately removed from the system after having once been present.
Using this info, we can prevent userspace from performing new modesets
on unregistered connectors while still allowing the driver to perform
modesets on unregistered connectors before the driver has finished being
registered.
Changes since v1:
- Fix WARN_ON() in drm_connector_cleanup() that CI caught with this
patchset in igt@drv_module_reload@basic-reload-inject and
igt@drv_module_reload@basic-reload by checking if the connector is
registered instead of unregistered, as calling drm_connector_cleanup()
on a connector that hasn't been registered with userspace yet should
stay valid.
- Remove unregistered_connector_check(), and just go back to what we
were doing before in commit 4d80273976bf ("drm/atomic_helper: Disallow
new modesets on unregistered connectors") except replacing
READ_ONCE(connector->registered) with drm_connector_is_unregistered().
This gets rid of the behavior of allowing DPMS On<->Off, but that should
be fine as it's more consistent with the UAPI we had before - danvet
- s/drm_connector_unregistered/drm_connector_is_unregistered/ - danvet
- Update documentation, fix some typos.
Fixes: b5d29843d8ef ("drm/atomic_helper: Allow DPMS On<->Off changes for unregistered connectors")
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: stable(a)vger.kernel.org
Cc: David Airlie <airlied(a)linux.ie>
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
---
drivers/gpu/drm/drm_atomic_helper.c | 21 ++++++++-
drivers/gpu/drm/drm_atomic_uapi.c | 21 ---------
drivers/gpu/drm/drm_connector.c | 11 +++--
drivers/gpu/drm/i915/intel_dp_mst.c | 8 ++--
include/drm/drm_connector.h | 71 ++++++++++++++++++++++++++++-
5 files changed, 99 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 6f66777dca4b..ee6b2987a3c7 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -319,6 +319,26 @@ update_connector_routing(struct drm_atomic_state *state,
return 0;
}
+ crtc_state = drm_atomic_get_new_crtc_state(state,
+ new_connector_state->crtc);
+ /*
+ * For compatibility with legacy users, we want to make sure that
+ * we allow DPMS On->Off modesets on unregistered connectors. Modesets
+ * which would result in anything else must be considered invalid, to
+ * avoid turning on new displays on dead connectors.
+ *
+ * Since the connector can be unregistered at any point during an
+ * atomic check or commit, this is racy. But that's OK: all we care
+ * about is ensuring that userspace can't do anything but shut off the
+ * display on a connector that was destroyed after its been notified,
+ * not before.
+ */
+ if (drm_connector_is_unregistered(connector) && crtc_state->active) {
+ DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
+ connector->base.id, connector->name);
+ return -EINVAL;
+ }
+
funcs = connector->helper_private;
if (funcs->atomic_best_encoder)
@@ -363,7 +383,6 @@ update_connector_routing(struct drm_atomic_state *state,
set_best_encoder(state, new_connector_state, new_encoder);
- crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc);
crtc_state->connectors_changed = true;
DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index a22d6f269b07..d5b7f315098c 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -299,27 +299,6 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
struct drm_connector *connector = conn_state->connector;
struct drm_crtc_state *crtc_state;
- /*
- * For compatibility with legacy users, we want to make sure that
- * we allow DPMS On<->Off modesets on unregistered connectors, since
- * legacy modesetting users will not be expecting these to fail. We do
- * not however, want to allow legacy users to assign a connector
- * that's been unregistered from sysfs to another CRTC, since doing
- * this with a now non-existent connector could potentially leave us
- * in an invalid state.
- *
- * Since the connector can be unregistered at any point during an
- * atomic check or commit, this is racy. But that's OK: all we care
- * about is ensuring that userspace can't use this connector for new
- * configurations after it's been notified that the connector is no
- * longer present.
- */
- if (!READ_ONCE(connector->registered) && crtc) {
- DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
- connector->base.id, connector->name);
- return -EINVAL;
- }
-
if (conn_state->crtc == crtc)
return 0;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 5d01414ec9f7..891f9458d29e 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -396,7 +396,8 @@ void drm_connector_cleanup(struct drm_connector *connector)
/* The connector should have been removed from userspace long before
* it is finally destroyed.
*/
- if (WARN_ON(connector->registered))
+ if (WARN_ON(connector->registration_state ==
+ DRM_CONNECTOR_REGISTERED))
drm_connector_unregister(connector);
if (connector->tile_group) {
@@ -453,7 +454,7 @@ int drm_connector_register(struct drm_connector *connector)
return 0;
mutex_lock(&connector->mutex);
- if (connector->registered)
+ if (connector->registration_state != DRM_CONNECTOR_INITIALIZING)
goto unlock;
ret = drm_sysfs_connector_add(connector);
@@ -473,7 +474,7 @@ int drm_connector_register(struct drm_connector *connector)
drm_mode_object_register(connector->dev, &connector->base);
- connector->registered = true;
+ connector->registration_state = DRM_CONNECTOR_REGISTERED;
goto unlock;
err_debugfs:
@@ -495,7 +496,7 @@ EXPORT_SYMBOL(drm_connector_register);
void drm_connector_unregister(struct drm_connector *connector)
{
mutex_lock(&connector->mutex);
- if (!connector->registered) {
+ if (connector->registration_state != DRM_CONNECTOR_REGISTERED) {
mutex_unlock(&connector->mutex);
return;
}
@@ -506,7 +507,7 @@ void drm_connector_unregister(struct drm_connector *connector)
drm_sysfs_connector_remove(connector);
drm_debugfs_connector_remove(connector);
- connector->registered = false;
+ connector->registration_state = DRM_CONNECTOR_UNREGISTERED;
mutex_unlock(&connector->mutex);
}
EXPORT_SYMBOL(drm_connector_unregister);
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index b268bdd71bd3..8b71d64ebd9d 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -78,7 +78,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
pipe_config->pbn = mst_pbn;
/* Zombie connectors can't have VCPI slots */
- if (READ_ONCE(connector->registered)) {
+ if (!drm_connector_is_unregistered(connector)) {
slots = drm_dp_atomic_find_vcpi_slots(state,
&intel_dp->mst_mgr,
port,
@@ -314,7 +314,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector)
struct edid *edid;
int ret;
- if (!READ_ONCE(connector->registered))
+ if (drm_connector_is_unregistered(connector))
return intel_connector_update_modes(connector, NULL);
edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port);
@@ -330,7 +330,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
struct intel_connector *intel_connector = to_intel_connector(connector);
struct intel_dp *intel_dp = intel_connector->mst_port;
- if (!READ_ONCE(connector->registered))
+ if (drm_connector_is_unregistered(connector))
return connector_status_disconnected;
return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr,
intel_connector->port);
@@ -361,7 +361,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
int bpp = 24; /* MST uses fixed bpp */
int max_rate, mode_rate, max_lanes, max_link_clock;
- if (!READ_ONCE(connector->registered))
+ if (drm_connector_is_unregistered(connector))
return MODE_ERROR;
if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 5b3cf909fd5e..dd0552cb7472 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -82,6 +82,53 @@ enum drm_connector_status {
connector_status_unknown = 3,
};
+/**
+ * enum drm_connector_registration_status - userspace registration status for
+ * a &drm_connector
+ *
+ * This enum is used to track the status of initializing a connector and
+ * registering it with userspace, so that DRM can prevent bogus modesets on
+ * connectors that no longer exist.
+ */
+enum drm_connector_registration_state {
+ /**
+ * @DRM_CONNECTOR_INITIALIZING: The connector has just been created,
+ * but has yet to be exposed to userspace. There should be no
+ * additional restrictions to how the state of this connector may be
+ * modified.
+ */
+ DRM_CONNECTOR_INITIALIZING = 0,
+
+ /**
+ * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized
+ * and registered with sysfs, as such it has been exposed to
+ * userspace. There should be no additional restrictions to how the
+ * state of this connector may be modified.
+ */
+ DRM_CONNECTOR_REGISTERED = 1,
+
+ /**
+ * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed
+ * to userspace and has since been unregistered and removed from
+ * userspace, or the connector was unregistered before it had a chance
+ * to be exposed to userspace (e.g. still in the
+ * @DRM_CONNECTOR_INITIALIZING state). When a connector is
+ * unregistered, there are additional restrictions to how its state
+ * may be modified:
+ *
+ * - An unregistered connector may only have its DPMS changed from
+ * On->Off. Once DPMS is changed to Off, it may not be switched back
+ * to On.
+ * - Modesets are not allowed on unregistered connectors, unless they
+ * would result in disabling its assigned CRTCs. This means
+ * disabling a CRTC on an unregistered connector is OK, but enabling
+ * one is not.
+ * - Removing a CRTC from an unregistered connector is OK, but new
+ * CRTCs may never be assigned to an unregistered connector.
+ */
+ DRM_CONNECTOR_UNREGISTERED = 2,
+};
+
enum subpixel_order {
SubPixelUnknown = 0,
SubPixelHorizontalRGB,
@@ -853,10 +900,12 @@ struct drm_connector {
bool ycbcr_420_allowed;
/**
- * @registered: Is this connector exposed (registered) with userspace?
+ * @registration_state: Is this connector initializing, exposed
+ * (registered) with userspace, or unregistered?
+ *
* Protected by @mutex.
*/
- bool registered;
+ enum drm_connector_registration_state registration_state;
/**
* @modes:
@@ -1167,6 +1216,24 @@ static inline void drm_connector_unreference(struct drm_connector *connector)
drm_connector_put(connector);
}
+/**
+ * drm_connector_is_unregistered - has the connector been unregistered from
+ * userspace?
+ * @connector: DRM connector
+ *
+ * Checks whether or not @connector has been unregistered from userspace.
+ *
+ * Returns:
+ * True if the connector was unregistered, false if the connector is
+ * registered or has not yet been registered with userspace.
+ */
+static inline bool
+drm_connector_is_unregistered(struct drm_connector *connector)
+{
+ return READ_ONCE(connector->registration_state) ==
+ DRM_CONNECTOR_UNREGISTERED;
+}
+
const char *drm_get_connector_status_name(enum drm_connector_status status);
const char *drm_get_subpixel_order_name(enum subpixel_order order);
const char *drm_get_dpms_name(int val);
--
2.17.2
From: Dexuan Cui <decui(a)microsoft.com>
I didn't find a real issue. Let's just make it consistent with the
next "case REG_U64:" where %llu is used.
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
---
drivers/hv/hv_kvp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 9fbb15c62c6c..3b8590ff94ba 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -437,7 +437,7 @@ kvp_send_key(struct work_struct *dummy)
val32 = in_msg->body.kvp_set.data.value_u32;
message->body.kvp_set.data.value_size =
sprintf(message->body.kvp_set.data.value,
- "%d", val32) + 1;
+ "%u", val32) + 1;
break;
case REG_U64:
--
2.18.0
Hyper-V VMs can be replicated to another hosts and there is a feature to
set different IP for replicas, it is called 'Failover TCP/IP'. When
such guest starts Hyper-V host sends it KVP_OP_SET_IP_INFO message as soon
as we finish negotiation procedure. The problem is that it can happen (and
it actually happens) before userspace daemon connects and we reply with
HV_E_FAIL to the message. As there are no repetitions we fail to set the
requested IP.
Solve the issue by postponing our reply to the negotiation message till
userspace daemon is connected. We can't wait too long as there is a
host-side timeout (cca. 75 seconds) and if we fail to reply in this time
frame the whole KVP service will become inactive. The solution is not
ideal - if it takes userspace daemon more than 60 seconds to connect
IP Failover will still fail but I don't see a solution with our current
separation between kernel and userspace parts.
Other two modules (VSS and FCOPY) don't require such delay, leave them
untouched.
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
---
This is cherry-picked from the mainline:
4dbfc2e ("Drivers: hv: kvp: fix IP Failover")
I added my Signed-off-by as I identified and tested the patches.
If this is unnecessary, please feel free to remove it.
drivers/hv/hv_kvp.c | 31 +++++++++++++++++++++++++++++++
drivers/hv/hyperv_vmbus.h | 5 +++++
2 files changed, 36 insertions(+)
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index cd3fb01..ff0a426 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -78,9 +78,11 @@ static void kvp_send_key(struct work_struct *dummy);
static void kvp_respond_to_host(struct hv_kvp_msg *msg, int error);
static void kvp_timeout_func(struct work_struct *dummy);
+static void kvp_host_handshake_func(struct work_struct *dummy);
static void kvp_register(int);
static DECLARE_DELAYED_WORK(kvp_timeout_work, kvp_timeout_func);
+static DECLARE_DELAYED_WORK(kvp_host_handshake_work, kvp_host_handshake_func);
static DECLARE_WORK(kvp_sendkey_work, kvp_send_key);
static const char kvp_devname[] = "vmbus/hv_kvp";
@@ -131,6 +133,11 @@ static void kvp_timeout_func(struct work_struct *dummy)
hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
}
+static void kvp_host_handshake_func(struct work_struct *dummy)
+{
+ hv_poll_channel(kvp_transaction.recv_channel, hv_kvp_onchannelcallback);
+}
+
static int kvp_handle_handshake(struct hv_kvp_msg *msg)
{
switch (msg->kvp_hdr.operation) {
@@ -155,6 +162,12 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg)
pr_debug("KVP: userspace daemon ver. %d registered\n",
KVP_OP_REGISTER);
kvp_register(dm_reg_value);
+
+ /*
+ * If we're still negotiating with the host cancel the timeout
+ * work to not poll the channel twice.
+ */
+ cancel_delayed_work_sync(&kvp_host_handshake_work);
hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
return 0;
@@ -595,7 +608,22 @@ void hv_kvp_onchannelcallback(void *context)
struct icmsg_negotiate *negop = NULL;
int util_fw_version;
int kvp_srv_version;
+ static enum {NEGO_NOT_STARTED,
+ NEGO_IN_PROGRESS,
+ NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
+ if (host_negotiatied == NEGO_NOT_STARTED &&
+ kvp_transaction.state < HVUTIL_READY) {
+ /*
+ * If userspace daemon is not connected and host is asking
+ * us to negotiate we need to delay to not lose messages.
+ * This is important for Failover IP setting.
+ */
+ host_negotiatied = NEGO_IN_PROGRESS;
+ schedule_delayed_work(&kvp_host_handshake_work,
+ HV_UTIL_NEGO_TIMEOUT * HZ);
+ return;
+ }
if (kvp_transaction.state > HVUTIL_READY)
return;
@@ -673,6 +701,8 @@ void hv_kvp_onchannelcallback(void *context)
vmbus_sendpacket(channel, recv_buffer,
recvlen, requestid,
VM_PKT_DATA_INBAND, 0);
+
+ host_negotiatied = NEGO_FINISHED;
}
}
@@ -711,6 +741,7 @@ hv_kvp_init(struct hv_util_service *srv)
void hv_kvp_deinit(void)
{
kvp_transaction.state = HVUTIL_DEVICE_DYING;
+ cancel_delayed_work_sync(&kvp_host_handshake_work);
cancel_delayed_work_sync(&kvp_timeout_work);
cancel_work_sync(&kvp_sendkey_work);
hvutil_transport_destroy(hvt);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 75e383e..15e0649 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -36,6 +36,11 @@
#define HV_UTIL_TIMEOUT 30
/*
+ * Timeout for guest-host handshake for services.
+ */
+#define HV_UTIL_NEGO_TIMEOUT 60
+
+/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
* is set by CPUID(HVCPUID_VERSION_FEATURES).
*/
--
2.7.4
Pass the channel information to the util drivers that need to defer
reading the channel while they are processing a request. This would address
the following issue reported by Vitaly:
Commit 3cace4a61610 ("Drivers: hv: utils: run polling callback always in
interrupt context") removed direct *_transaction.state = HVUTIL_READY
assignments from *_handle_handshake() functions introducing the following
race: if a userspace daemon connects before we get first non-negotiation
request from the server hv_poll_channel() won't set transaction state to
HVUTIL_READY as (!channel) condition will fail, we set it to non-NULL on
the first real request from the server.
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Reported-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
---
This is cherry-picked from the mainline:
b9830d1 ("Drivers: hv: util: Pass the channel information during the init call")
I added my Signed-off-by as I identified and tested the patches.
If this is unnecessary, please feel free to remove it.
drivers/hv/hv_fcopy.c | 2 +-
drivers/hv/hv_kvp.c | 2 +-
drivers/hv/hv_snapshot.c | 2 +-
drivers/hv/hv_util.c | 1 +
include/linux/hyperv.h | 1 +
5 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 12dcbd8..2cce48d 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -256,7 +256,6 @@ void hv_fcopy_onchannelcallback(void *context)
*/
fcopy_transaction.recv_len = recvlen;
- fcopy_transaction.recv_channel = channel;
fcopy_transaction.recv_req_id = requestid;
fcopy_transaction.fcopy_msg = fcopy_msg;
@@ -323,6 +322,7 @@ static void fcopy_on_reset(void)
int hv_fcopy_init(struct hv_util_service *srv)
{
recv_buffer = srv->recv_buffer;
+ fcopy_transaction.recv_channel = srv->channel;
init_completion(&release_event);
/*
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index b97ef3e..cd3fb01 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -640,7 +640,6 @@ void hv_kvp_onchannelcallback(void *context)
*/
kvp_transaction.recv_len = recvlen;
- kvp_transaction.recv_channel = channel;
kvp_transaction.recv_req_id = requestid;
kvp_transaction.kvp_msg = kvp_msg;
@@ -690,6 +689,7 @@ int
hv_kvp_init(struct hv_util_service *srv)
{
recv_buffer = srv->recv_buffer;
+ kvp_transaction.recv_channel = srv->channel;
init_completion(&release_event);
/*
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index c5fb249..b0feddb 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -264,7 +264,6 @@ void hv_vss_onchannelcallback(void *context)
*/
vss_transaction.recv_len = recvlen;
- vss_transaction.recv_channel = channel;
vss_transaction.recv_req_id = requestid;
vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
@@ -340,6 +339,7 @@ hv_vss_init(struct hv_util_service *srv)
return -ENOTSUPP;
}
recv_buffer = srv->recv_buffer;
+ vss_transaction.recv_channel = srv->channel;
/*
* When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 41f5896..9dc6372 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -326,6 +326,7 @@ static int util_probe(struct hv_device *dev,
srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
if (!srv->recv_buffer)
return -ENOMEM;
+ srv->channel = dev->channel;
if (srv->util_init) {
ret = srv->util_init(srv);
if (ret) {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ae6a711..281bb00 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1179,6 +1179,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
struct hv_util_service {
u8 *recv_buffer;
+ void *channel;
void (*util_cb)(void *);
int (*util_init)(struct hv_util_service *);
void (*util_deinit)(void);
--
2.7.4
When the handshake with daemon is complete, we should poll the channel since
during the handshake, we will not be processing any messages. This is a
potential bug if the host is waiting for a response from the guest.
I would like to thank Dexuan for pointing this out.
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
---
This is cherry-picked from the mainline:
2d0c3b5 ("Drivers: hv: utils: Invoke the poll function after handshake")
I added my Signed-off-by as I identified and tested the patches.
If this is unnecessary, please feel free to remove it.
drivers/hv/hv_kvp.c | 2 +-
drivers/hv/hv_snapshot.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index ce4d3a9..b97ef3e 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -155,7 +155,7 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg)
pr_debug("KVP: userspace daemon ver. %d registered\n",
KVP_OP_REGISTER);
kvp_register(dm_reg_value);
- kvp_transaction.state = HVUTIL_READY;
+ hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
return 0;
}
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index faad79a..c5fb249 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -114,7 +114,7 @@ static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
default:
return -EINVAL;
}
- vss_transaction.state = HVUTIL_READY;
+ hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
pr_debug("VSS: userspace daemon ver. %d registered\n", dm_reg_value);
return 0;
}
--
2.7.4
The host may send multiple negotiation packets
(due to timeout) before the KVP user-mode daemon
is connected. KVP user-mode daemon is connected.
We need to defer processing those packets
until the daemon is negotiated and connected.
It's okay for guest to respond
to all negotiation packets.
In addition, the host may send multiple staged
KVP requests as soon as negotiation is done.
We need to properly process those packets using one
tasklet for exclusive access to ring buffer.
This patch is based on the work of
Nick Meier <Nick.Meier(a)microsoft.com>.
Signed-off-by: Long Li <longli(a)microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
The above is the original changelog of
a3ade8cc474d ("HV: properly delay KVP packets when negotiation is in progress"
Here I re-worked the original patch because the mainline version
can't work for the linux-4.4.y branch, on which channel->callback_event
doesn't exist yet. In the mainline, channel->callback_event was added by:
631e63a9f346 ("vmbus: change to per channel tasklet"). Here we don't want
to backport it to v4.4, as it requires extra supporting changes and fixes,
which are unnecessary as to the KVP bug we're trying to resolve.
NOTE: before this patch is used, we should cherry-pick the other related
3 patches from the mainline first:
2d0c3b5 ("Drivers: hv: utils: Invoke the poll function after handshake")
b9830d1 ("Drivers: hv: util: Pass the channel information during the init call")
4dbfc2e ("Drivers: hv: kvp: fix IP Failover")
And, actually it would better if we can cherry-pick more fixes from the
mainline first (the 3 above patches are also included in this 27-patch list):
01 b003596 Drivers: hv: utils: use memdup_user in hvt_op_write
02 2d0c3b5 Drivers: hv: utils: Invoke the poll function after handshake
03 1f75338 Drivers: hv: utils: fix memory leak on on_msg() failure
04 a72f3a4 Drivers: hv: utils: rename outmsg_lock
05 a150256 Drivers: hv: utils: introduce HVUTIL_TRANSPORT_DESTROY mode
06 9420098 Drivers: hv: utils: fix crash when device is removed from host side
07 77b744a Drivers: hv: utils: fix hvt_op_poll() return value on transport destroy
08 b9830d1 Drivers: hv: util: Pass the channel information during the init call
09 e66853b Drivers: hv: utils: Remove util transport handler from list if registration fails
10 4dbfc2e Drivers: hv: kvp: fix IP Failover
11 e0fa3e5 Drivers: hv: utils: fix a race on userspace daemons registration
12 497af84 Drivers: hv: utils: Continue to poll VSS channel after handling requests.
13 db886e4 Drivers: hv: utils: Check VSS daemon is listening before a hot backup
14 abeda47 Drivers: hv: utils: Rename version definitions to reflect protocol version.
15 2e338f7 Drivers: hv: utils: Use TimeSync samples to adjust the clock after boot.
16 8e1d260 Drivers: hv: utils: Support TimeSync version 4.0 protocol samples.
17 3ba1eb1 Drivers: hv: hv_util: Avoid dynamic allocation in time synch
18 3da0401b Drivers: hv: utils: Fix the mapping between host version and protocol to use
19 23d2cc0 Drivers: hv: vss: Improve log messages.
20 b357fd3 Drivers: hv: vss: Operation timeouts should match host expectation
21 1724462 hv_util: switch to using timespec64
22 a165645 Drivers: hv: vmbus: Use all supported IC versions to negotiate
23 1274a69 Drivers: hv: Log the negotiated IC versions.
24 bb6a4db Drivers: hv: util: Fix a typo
25 e9c18ae Drivers: hv: util: move waiting for release to hv_utils_transport itself
26 bdc1dd4 vmbus: fix spelling errors
27 ddce54b Drivers: hv: kvp: Use MAX_ADAPTER_ID_SIZE for translating adapter id
This to to say, we're requesting a backport of 4 patches or 28 patches.
If 28 patches seem too many, we hope at least the 4 patches can be backported.
The patches can be applied cleanly to the latest v4.4 branch (currently it's
v4.4.160).
The background of this backport request is that: recently Wang Jian reported
some KVP issues: https://github.com/LIS/lis-next/issues/593:
e.g. the /var/lib/hyperv/.kvp_pool_* files can not be updated, and sometimes
if the hv_kvp_daemon doesn't timely start, the host may not be able to query
the VM's IP address via KVP.
Wang Jian tested the 4 patches and the 28 patches, and the issues can be
fixed by the patches.
Reported-by: Wang Jian <jianjian.wang1(a)gmail.com>
Tested-by: Wang Jian <jianjian.wang1(a)gmail.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
---
drivers/hv/hv_kvp.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index f3d3d75ac913e..e4fbc17bbe190 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -627,21 +627,22 @@ void hv_kvp_onchannelcallback(void *context)
NEGO_IN_PROGRESS,
NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
- if (host_negotiatied == NEGO_NOT_STARTED &&
- kvp_transaction.state < HVUTIL_READY) {
+ if (kvp_transaction.state < HVUTIL_READY) {
/*
* If userspace daemon is not connected and host is asking
* us to negotiate we need to delay to not lose messages.
* This is important for Failover IP setting.
*/
- host_negotiatied = NEGO_IN_PROGRESS;
- schedule_delayed_work(&kvp_host_handshake_work,
+ if (host_negotiatied == NEGO_NOT_STARTED) {
+ host_negotiatied = NEGO_IN_PROGRESS;
+ schedule_delayed_work(&kvp_host_handshake_work,
HV_UTIL_NEGO_TIMEOUT * HZ);
+ }
return;
}
if (kvp_transaction.state > HVUTIL_READY)
return;
-
+recheck:
vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen,
&requestid);
@@ -704,6 +705,8 @@ void hv_kvp_onchannelcallback(void *context)
VM_PKT_DATA_INBAND, 0);
host_negotiatied = NEGO_FINISHED;
+
+ goto recheck;
}
}
Hi Greg,
While looking at android-4.14, I found a NULL pointer deref with
stm32-dma driver using Coccicheck errors. I found that upstream had a
bunch of patches on stm32-dma that have fixed this and other issues, I
applied these patches cleanly onto Android 4.14. I believe these should
goto stable and flow into Android 4.14 from there, but I haven't tested
this since I have no hardware to do so.
Atleast I can say that the coccicheck error below goes away when running:
make coccicheck MODE=report
./drivers/dma/stm32-dma.c:567:18-24: ERROR: chan -> desc is NULL but dereferenced.
Anyway, please consider this series for 4.14 stable, I have CC'd the
author and others, thanks.
Pierre Yves MORDRET (7):
dmaengine: stm32-dma: threshold manages with bitfield feature
dmaengine: stm32-dma: fix incomplete configuration in cyclic mode
dmaengine: stm32-dma: fix typo and reported checkpatch warnings
dmaengine: stm32-dma: Improve memory burst management
dmaengine: stm32-dma: fix DMA IRQ status handling
dmaengine: stm32-dma: fix max items per transfer
dmaengine: stm32-dma: properly mask irq bits
drivers/dma/stm32-dma.c | 287 +++++++++++++++++++++++++++++++++-------
1 file changed, 240 insertions(+), 47 deletions(-)
--
2.19.0.605.g01d371f741-goog
Booting a 486 with "no387 nofxsr" ends with
| math_emulate: 0060:c101987d
| Kernel panic - not syncing: Math emulation needed in kernel
on the first context switch in user land. The reason is that
copy_fpregs_to_fpstate() tries `fnsave' which does not work. This
happens since commit f1c8cd0176078 ("x86/fpu: Change fpu->fpregs_active
users to fpu->fpstate_active").
Add a check for X86_FEATURE_FPU before trying to save FPU registers (we
have such a check switch_fpu_finish() already).
Fixes: f1c8cd0176078 ("x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
---
arch/x86/include/asm/fpu/internal.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37ad..69dcdf195b611 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (old_fpu->initialized) {
+ if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
--
2.19.1
When driver is built as module and DT node contains clocks compatible
(e.g. "samsung,s2mps11-clk"), the module will not be autoloaded because
module aliases won't match.
The modalias from uevent: of:NclocksT<NULL>Csamsung,s2mps11-clk
The modalias from driver: platform:s2mps11-clk
The devices are instantiated by parent's MFD. However both Device Tree
bindings and parent define the compatible for clocks devices. In case
of module matching this DT compatible will be used.
The issue will not happen if this is a built-in (no need for module
matching) or when clocks DT node does not contain compatible (not
correct from bindings perspective but working for driver).
Note when backporting to stable kernels: adjust the list of device ID
entries.
Cc: <stable(a)vger.kernel.org>
Fixes: 53c31b3437a6 ("mfd: sec-core: Add of_compatible strings for clock MFD cells")
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
Acked-by: Stephen Boyd <sboyd(a)kernel.org>
---
Changes since v1:
1. Add Stephen's ack.
2. Minor language changes to comment.
Stephen, can you apply it to clk tree? I think you acked it so I could take
it... but anyway I cannot combine it with DT changes.
---
drivers/clk/clk-s2mps11.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
index d44e0eea31ec..0934d3724495 100644
--- a/drivers/clk/clk-s2mps11.c
+++ b/drivers/clk/clk-s2mps11.c
@@ -245,6 +245,36 @@ static const struct platform_device_id s2mps11_clk_id[] = {
};
MODULE_DEVICE_TABLE(platform, s2mps11_clk_id);
+#ifdef CONFIG_OF
+/*
+ * Device is instantiated through parent MFD device and device matching is done
+ * through platform_device_id.
+ *
+ * However if device's DT node contains proper clock compatible and driver is
+ * built as a module, then the *module* matching will be done trough DT aliases.
+ * This requires of_device_id table. In the same time this will not change the
+ * actual *device* matching so do not add .of_match_table.
+ */
+static const struct of_device_id s2mps11_dt_match[] = {
+ {
+ .compatible = "samsung,s2mps11-clk",
+ .data = (void *)S2MPS11X,
+ }, {
+ .compatible = "samsung,s2mps13-clk",
+ .data = (void *)S2MPS13X,
+ }, {
+ .compatible = "samsung,s2mps14-clk",
+ .data = (void *)S2MPS14X,
+ }, {
+ .compatible = "samsung,s5m8767-clk",
+ .data = (void *)S5M8767X,
+ }, {
+ /* Sentinel */
+ },
+};
+MODULE_DEVICE_TABLE(of, s2mps11_dt_match);
+#endif
+
static struct platform_driver s2mps11_clk_driver = {
.driver = {
.name = "s2mps11-clk",
--
2.14.1
From: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
commit b4a4957d3d1c328b733fce783b7264996f866ad2 upstream.
rvt_destroy_qp() cannot complete until all in process packets have
been released from the underlying hardware. If a link down event
occurs, an application can hang with a kernel stack similar to:
cat /proc/<app PID>/stack
quiesce_qp+0x178/0x250 [hfi1]
rvt_reset_qp+0x23d/0x400 [rdmavt]
rvt_destroy_qp+0x69/0x210 [rdmavt]
ib_destroy_qp+0xba/0x1c0 [ib_core]
nvme_rdma_destroy_queue_ib+0x46/0x80 [nvme_rdma]
nvme_rdma_free_queue+0x3c/0xd0 [nvme_rdma]
nvme_rdma_destroy_io_queues+0x88/0xd0 [nvme_rdma]
nvme_rdma_error_recovery_work+0x52/0xf0 [nvme_rdma]
process_one_work+0x17a/0x440
worker_thread+0x126/0x3c0
kthread+0xcf/0xe0
ret_from_fork+0x58/0x90
0xffffffffffffffff
quiesce_qp() waits until all outstanding packets have been freed.
This wait should be momentary. During a link down event, the cleanup
handling does not ensure that all packets caught by the link down are
flushed properly.
This is caused by the fact that the freeze path and the link down
event is handled the same. This is not correct. The freeze path
waits until the HFI is unfrozen and then restarts PIO. A link down
is not a freeze event. The link down path cannot restart the PIO
until link is restored. If the PIO path is restarted before the link
comes up, the application (QP) using the PIO path will hang (until
link is restored).
Fix by separating the linkdown path from the freeze path and use the
link down path for link down events.
Close a race condition sc_disable() by acquiring both the progress
and release locks.
Close a race condition in sc_stop() by moving the setting of the flag
bits under the alloc lock.
Fixes: 7724105686e7 ("IB/hfi1: add driver files")
Cc: <stable(a)vger.kernel.org> # 4.14.x
Reviewed-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro(a)intel.com>
---
drivers/infiniband/hw/hfi1/chip.c | 7 +++++-
drivers/infiniband/hw/hfi1/pio.c | 42 ++++++++++++++++++++++++++++++-------
drivers/infiniband/hw/hfi1/pio.h | 2 ++
3 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 33cf173..f9faacc 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6722,6 +6722,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
struct hfi1_devdata *dd = ppd->dd;
struct send_context *sc;
int i;
+ int sc_flags;
if (flags & FREEZE_SELF)
write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
@@ -6732,11 +6733,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
/* notify all SDMA engines that they are going into a freeze */
sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
+ sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ?
+ SCF_LINK_DOWN : 0);
/* do halt pre-handling on all enabled send contexts */
for (i = 0; i < dd->num_send_contexts; i++) {
sc = dd->send_contexts[i].sc;
if (sc && (sc->flags & SCF_ENABLED))
- sc_stop(sc, SCF_FROZEN | SCF_HALTED);
+ sc_stop(sc, sc_flags);
}
/* Send context are frozen. Notify user space */
@@ -10646,6 +10649,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ pio_kernel_linkup(dd);
+
ppd->host_link_state = HLS_UP_INIT;
break;
case HLS_UP_ARMED:
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index a95ac62..44a8940 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -937,20 +937,18 @@ void sc_free(struct send_context *sc)
void sc_disable(struct send_context *sc)
{
u64 reg;
- unsigned long flags;
struct pio_buf *pbuf;
if (!sc)
return;
/* do all steps, even if already disabled */
- spin_lock_irqsave(&sc->alloc_lock, flags);
+ spin_lock_irq(&sc->alloc_lock);
reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
sc->flags &= ~SCF_ENABLED;
sc_wait_for_packet_egress(sc, 1);
write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
- spin_unlock_irqrestore(&sc->alloc_lock, flags);
/*
* Flush any waiters. Once the context is disabled,
@@ -960,7 +958,7 @@ void sc_disable(struct send_context *sc)
* proceed with the flush.
*/
udelay(1);
- spin_lock_irqsave(&sc->release_lock, flags);
+ spin_lock(&sc->release_lock);
if (sc->sr) { /* this context has a shadow ring */
while (sc->sr_tail != sc->sr_head) {
pbuf = &sc->sr[sc->sr_tail].pbuf;
@@ -971,7 +969,8 @@ void sc_disable(struct send_context *sc)
sc->sr_tail = 0;
}
}
- spin_unlock_irqrestore(&sc->release_lock, flags);
+ spin_unlock(&sc->release_lock);
+ spin_unlock_irq(&sc->alloc_lock);
}
/* return SendEgressCtxtStatus.PacketOccupancy */
@@ -1194,11 +1193,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd)
sc = dd->send_contexts[i].sc;
if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
continue;
+ if (sc->flags & SCF_LINK_DOWN)
+ continue;
sc_enable(sc); /* will clear the sc frozen flag */
}
}
+/**
+ * pio_kernel_linkup() - Re-enable send contexts after linkup event
+ * @dd: valid devive data
+ *
+ * When the link goes down, the freeze path is taken. However, a link down
+ * event is different from a freeze because if the send context is re-enabled
+ * whowever is sending data will start sending data again, which will hang
+ * any QP that is sending data.
+ *
+ * The freeze path now looks at the type of event that occurs and takes this
+ * path for link down event.
+ */
+void pio_kernel_linkup(struct hfi1_devdata *dd)
+{
+ struct send_context *sc;
+ int i;
+
+ for (i = 0; i < dd->num_send_contexts; i++) {
+ sc = dd->send_contexts[i].sc;
+ if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
+ continue;
+
+ sc_enable(sc); /* will clear the sc link down flag */
+ }
+}
+
/*
* Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
* Returns:
@@ -1398,11 +1425,10 @@ void sc_stop(struct send_context *sc, int flag)
{
unsigned long flags;
- /* mark the context */
- sc->flags |= flag;
-
/* stop buffer allocations */
spin_lock_irqsave(&sc->alloc_lock, flags);
+ /* mark the context */
+ sc->flags |= flag;
sc->flags &= ~SCF_ENABLED;
spin_unlock_irqrestore(&sc->alloc_lock, flags);
wake_up(&sc->halt_wait);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 99ca5ed..c7c4e6e 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -145,6 +145,7 @@ struct send_context {
#define SCF_IN_FREE 0x02
#define SCF_HALTED 0x04
#define SCF_FROZEN 0x08
+#define SCF_LINK_DOWN 0x10
struct send_context_info {
struct send_context *sc; /* allocated working context */
@@ -312,6 +313,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
void pio_reset_all(struct hfi1_devdata *dd);
void pio_freeze(struct hfi1_devdata *dd);
void pio_kernel_unfreeze(struct hfi1_devdata *dd);
+void pio_kernel_linkup(struct hfi1_devdata *dd);
/* global PIO send control operations */
#define PSC_GLOBAL_ENABLE 0