The patch
ASoC: codecs: msm8916-wcd: Fix supported formats
has been applied to the asoc tree at
https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git
All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.
You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.
If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.
Please add any relevant lists and maintainers to the CCs when replying
to this mail.
Thanks,
Mark
>From 51f493ae71adc2c49a317a13c38e54e1cdf46005 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Date: Thu, 30 Nov 2017 10:15:02 +0000
Subject: [PATCH] ASoC: codecs: msm8916-wcd: Fix supported formats
This codec is configurable for only 16 bit and 32 bit samples, so reflect
this in the supported formats also remove 24bit sample from supported list.
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
sound/soc/codecs/msm8916-wcd-analog.c | 2 +-
sound/soc/codecs/msm8916-wcd-digital.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
index 5f3c42c4f74a..066ea2f4ce7b 100644
--- a/sound/soc/codecs/msm8916-wcd-analog.c
+++ b/sound/soc/codecs/msm8916-wcd-analog.c
@@ -267,7 +267,7 @@
#define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
#define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
- SNDRV_PCM_FMTBIT_S24_LE)
+ SNDRV_PCM_FMTBIT_S32_LE)
static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index a10a724eb448..13354d6304a8 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -194,7 +194,7 @@
SNDRV_PCM_RATE_32000 | \
SNDRV_PCM_RATE_48000)
#define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
- SNDRV_PCM_FMTBIT_S24_LE)
+ SNDRV_PCM_FMTBIT_S32_LE)
struct msm8916_wcd_digital_priv {
struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
RX_I2S_CTL_RX_I2S_MODE_MASK,
RX_I2S_CTL_RX_I2S_MODE_16);
break;
- case SNDRV_PCM_FORMAT_S24_LE:
+ case SNDRV_PCM_FORMAT_S32_LE:
snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
TX_I2S_CTL_TX_I2S_MODE_MASK,
TX_I2S_CTL_TX_I2S_MODE_32);
--
2.15.0
d_prune_aliases() does not prune dentry that has non-zero reference
count. If we want trim_caps() to trim 'N' caps, the first 'N' inodes
in session->s_caps are all directories with child dentry, trim_caps()
can fail to trim anything.
Cc: stable(a)vger.kernel.org
Signed-off-by: "Yan, Zheng" <zyan(a)redhat.com>
---
fs/ceph/mds_client.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 80339e278ad8..da181acd4a61 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1490,16 +1490,20 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
if ((used | wanted) & ~oissued & mine)
goto out; /* we need these caps */
- session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
__ceph_remove_cap(cap, true);
+ session->s_trim_caps--;
} else {
+ int refs;
/* try dropping referring dentries */
spin_unlock(&ci->i_ceph_lock);
d_prune_aliases(inode);
+ refs = atomic_read(&inode->i_count);
+ if (refs == 1)
+ session->s_trim_caps--;
dout("trim_caps_cb %p cap %p pruned, count now %d\n",
- inode, cap, atomic_read(&inode->i_count));
+ inode, cap, refs);
return 0;
}
--
2.13.6
Tomas Charvat <tc(a)excello.cz> wrote:
[ CC stable, Steffen ]
> Hi Florian and David, I'm running several servers that use XFRM ipsec.
> It do work well on all kernels bellow 4.14.0.
>
> It doesnt work on 4.14.0-2. There is no any error in dmesg or in
> userspace when I do configure policies.
>
> Since there is not much info about XFRM in dmesg I have no clue, where
> to start when I want to debug this issue.
David, please consider picking up
94802151894d482e82c324edf2c658f8e6b96508
("Revert "xfrm: Fix stack-out-of-bounds read in xfrm_state_find.")
for the 4.14.y stable queue.
I think its a pretty safe bet that this fixes the problem, it broke
transport mode wildcard policy lookup.
> I have seen that you have removed flow-cache that we have fixed 2 time.
> Do you have clue where to start with debug of this issue ?
If the revert doesn't help, please do a bug report to
netdev(a)vger.kernel.org and provide /proc/net/xfrm_stat content
and the list of policies/SAs.
[BUG]
fstrim on some btrfs only trims the unallocated space, not trimming any
space in existing block groups.
[CAUSE]
Before fstrim_range passed to btrfs_trim_fs(), it get truncated to
range [0, super->total_bytes).
So later btrfs_trim_fs() will only be able to trim block groups in range
[0, super->total_bytes).
While for btrfs, any bytenr aligned to sector size is valid, since btrfs use
its logical address space, there is nothing limiting the location where
we put block groups.
For btrfs with routine balance, it's quite easy to relocate all
block groups and bytenr of block groups will start beyond super->total_bytes.
In that case, btrfs will not trim existing block groups.
[FIX]
Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs()
can get the unmodified range, which is normally set to [0, U64_MAX].
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl")
Cc: <stable(a)vger.kernel.org> # v4.0+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
changelog:
v2:
Locate the root cause in btrfs_ioctl_fitrim(), remove the truncation
so we can still allow user to trim custom range.
---
fs/btrfs/ioctl.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fd172a93d11a..017fda31400d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -365,7 +365,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -389,11 +388,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return -EOPNOTSUPP;
if (copy_from_user(&range, arg, sizeof(range)))
return -EFAULT;
- if (range.start > total_bytes ||
- range.len < fs_info->sb->s_blocksize)
+
+ /*
+ * NOTE: Don't truncate the range using super->total_bytes.
+ * Bytenr of btrfs block group is in btrfs logical address space,
+ * which can be any sector size aligned bytenr in [0, U64_MAX].
+ */
+ if (range.len < fs_info->sb->s_blocksize)
return -EINVAL;
- range.len = min(range.len, total_bytes - range.start);
range.minlen = max(range.minlen, minlen);
ret = btrfs_trim_fs(fs_info, &range);
if (ret < 0)
--
2.15.0
From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Subject: mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine
I made a mistake during converting hugetlb code to 5-level paging: in
huge_pte_alloc() we have to use p4d_alloc(), not p4d_offset(). Otherwise
it leads to crash -- NULL-pointer dereference in pud_alloc() if p4d table
is not yet allocated.
It only can happen in 5-level paging mode. In 4-level paging mode
p4d_offset() always returns pgd, so we are fine.
Link: http://lkml.kernel.org/r/20171122121921.64822-1-kirill.shutemov@linux.intel…
Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: <stable(a)vger.kernel.org> [4.11+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff -puN mm/hugetlb.c~mm-hugetlb-fix-null-pointer-dereference-on-5-level-paging-machine mm/hugetlb.c
--- a/mm/hugetlb.c~mm-hugetlb-fix-null-pointer-dereference-on-5-level-paging-machine
+++ a/mm/hugetlb.c
@@ -4635,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
- p4d = p4d_offset(pgd, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return NULL;
pud = pud_alloc(mm, p4d, addr);
if (pud) {
if (sz == PUD_SIZE) {
_
From: Ian Kent <raven(a)themaw.net>
Subject: autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored"
42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored") allowed the
fstatat(2) system call to properly honor the AT_NO_AUTOMOUNT flag but
introduced a semantic change.
In order to honor AT_NO_AUTOMOUNT a semantic change was made to the
negative dentry case for stat family system calls in follow_automount().
This changed the unconditional triggering of an automount in this case to
no longer be done and an error returned instead.
This has caused more problems than I expected so reverting the change is
needed.
In a discussion with Neil Brown it was concluded that the automount(8)
daemon can implement this change without kernel modifications. So that
will be done instead and the autofs module documentation updated with a
description of the problem and what needs to be done by module users for
this specific case.
Link: http://lkml.kernel.org/r/151174730120.6162.3848002191530283984.stgit@pluto.…
Fixes: 42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored")
Signed-off-by: Ian Kent <raven(a)themaw.net>
Cc: Neil Brown <neilb(a)suse.com>
Cc: Al Viro <viro(a)ZenIV.linux.org.uk>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Colin Walters <walters(a)redhat.com>
Cc: Ondrej Holy <oholy(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [4.11+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/namei.c | 15 +++------------
include/linux/fs.h | 3 ++-
2 files changed, 5 insertions(+), 13 deletions(-)
diff -puN fs/namei.c~autofs-revert-fix-at_no_automount-not-being-honored fs/namei.c
--- a/fs/namei.c~autofs-revert-fix-at_no_automount-not-being-honored
+++ a/fs/namei.c
@@ -1129,18 +1129,9 @@ static int follow_automount(struct path
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE |
- LOOKUP_AUTOMOUNT))) {
- /* Positive dentry that isn't meant to trigger an
- * automount, EISDIR will allow it to be used,
- * otherwise there's no mount here "now" so return
- * ENOENT.
- */
- if (path->dentry->d_inode)
- return -EISDIR;
- else
- return -ENOENT;
- }
+ LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
+ path->dentry->d_inode)
+ return -EISDIR;
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES;
diff -puN include/linux/fs.h~autofs-revert-fix-at_no_automount-not-being-honored include/linux/fs.h
--- a/include/linux/fs.h~autofs-revert-fix-at_no_automount-not-being-honored
+++ a/include/linux/fs.h
@@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char _
static inline int vfs_fstatat(int dfd, const char __user *filename,
struct kstat *stat, int flags)
{
- return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
+ return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
+ stat, STATX_BASIC_STATS);
}
static inline int vfs_fstat(int fd, struct kstat *stat)
{
_
From: Ian Kent <raven(a)themaw.net>
Subject: autofs: revert "autofs: take more care to not update last_used on path walk"
While 092a53452b ("autofs: take more care to not update last_used on path
walk") helped (partially) resolve a problem where automounts were not
expiring due to aggressive accesses from user space it has a side effect
for very large environments.
This change helps with the expire problem by making the expire more
aggressive but, for very large environments, that means more mount
requests from clients. When there are a lot of clients that can mean
fairly significant server load increases.
It turns out I put the last_used in this position to solve this very
problem and failed to update my own thinking of the autofs expire policy.
So the patch being reverted introduces a regression which should be fixed.
Link: http://lkml.kernel.org/r/151174729420.6162.1832622523537052460.stgit@pluto.…
Fixes: 092a53452b ("autofs: take more care to not update last_used on path walk")
Signed-off-by: Ian Kent <raven(a)themaw.net>
Reviewed-by: NeilBrown <neilb(a)suse.com>
Cc: Al Viro <viro(a)ZenIV.linux.org.uk>
Cc: <stable(a)vger.kernel.org> [4.11+]
Cc: Colin Walters <walters(a)redhat.com>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Ondrej Holy <oholy(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/autofs4/root.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff -puN fs/autofs4/root.c~autofs-revert-take-more-care-to-not-update-last_used-on-path-walk fs/autofs4/root.c
--- a/fs/autofs4/root.c~autofs-revert-take-more-care-to-not-update-last_used-on-path-walk
+++ a/fs/autofs4/root.c
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const stru
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
- ino->last_used = jiffies;
}
+ ino->last_used = jiffies;
return status;
}
@@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
+ struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
- if (new == dentry)
- dput(new);
- else {
- struct autofs_info *ino;
-
- ino = autofs4_dentry_ino(new);
- ino->last_used = jiffies;
- dput(path->dentry);
- path->dentry = new;
- }
+ ino = autofs4_dentry_ino(new);
+ ino->last_used = jiffies;
+ dput(path->dentry);
+ path->dentry = new;
}
return path->dentry;
}
_
From: Shakeel Butt <shakeelb(a)google.com>
Subject: mm, memcg: fix mem_cgroup_swapout() for THPs
d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP")
changed mem_cgroup_swapout() to support transparent huge page (THP).
However the patch missed one location which should be changed for
correctly handling THPs. The resulting bug will cause the memory cgroups
whose THPs were swapped out to become zombies on deletion.
Link: http://lkml.kernel.org/r/20171128161941.20931-1-shakeelb@google.com
Fixes: d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP")
Signed-off-by: Shakeel Butt <shakeelb(a)google.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: Greg Thelen <gthelen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -puN mm/memcontrol.c~mm-memcg-fix-mem_cgroup_swapout-for-thps mm/memcontrol.c
--- a/mm/memcontrol.c~mm-memcg-fix-mem_cgroup_swapout-for-thps
+++ a/mm/memcontrol.c
@@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *pag
memcg_check_events(memcg, page);
if (!mem_cgroup_is_root(memcg))
- css_put(&memcg->css);
+ css_put_many(&memcg->css, nr_entries);
}
/**
_
From: Zi Yan <zi.yan(a)cs.rutgers.edu>
Subject: mm: migrate: fix an incorrect call of prep_transhuge_page()
In https://lkml.org/lkml/2017/11/20/411, Andrea reported that during
memory hotplug/hot remove prep_transhuge_page() is called incorrectly on
non-THP pages for migration, when THP is on but THP migration is not
enabled. This leads to a bad state of target pages for migration.
By inspecting the code, if called on a non-THP, prep_transhuge_page() will
1) change the value of the mapping of (page + 2), since it is used
for THP deferred list;
2) change the lru value of (page + 1), since it is used for THP's
dtor.
Both can lead to data corruption of these two pages.
Andrea said:
: Pragmatically and from the point of view of the memory_hotplug subsys, the
: effect is a kernel crash when pages are being migrated during a memory hot
: remove offline and migration target pages are found in a bad state.
This patch fixes it by only calling prep_transhuge_page() when we are
certain that the target page is THP.
Link: http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com
Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration")
Signed-off-by: Zi Yan <zi.yan(a)cs.rutgers.edu>
Reported-by: Andrea Reale <ar(a)linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: "Jérôme Glisse" <jglisse(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [4.14]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/migrate.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -puN include/linux/migrate.h~mm-migrate-fix-an-incorrect-call-of-prep_transhuge_page include/linux/migrate.h
--- a/include/linux/migrate.h~mm-migrate-fix-an-incorrect-call-of-prep_transhuge_page
+++ a/include/linux/migrate.h
@@ -54,7 +54,7 @@ static inline struct page *new_page_node
new_page = __alloc_pages_nodemask(gfp_mask, order,
preferred_nid, nodemask);
- if (new_page && PageTransHuge(page))
+ if (new_page && PageTransHuge(new_page))
prep_transhuge_page(new_page);
return new_page;
_
From: chenjie <chenjie6(a)huawei.com>
Subject: mm/madvise.c: fix madvise() infinite loop under special circumstances
MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings.
Unfortunately madvise_willneed() doesn't communicate this information
properly to the generic madvise syscall implementation. The calling
convention is quite subtle there. madvise_vma() is supposed to either
return an error or update &prev otherwise the main loop will never advance
to the next vma and it will keep looping for ever without a way to get out
of the kernel.
It seems this has been broken since introduction. Nobody has noticed
because nobody seems to be using MADVISE_WILLNEED on these DAX mappings.
[mhocko(a)suse.com: rewrite changelog]
Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com
Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place")
Signed-off-by: chenjie <chenjie6(a)huawei.com>
Signed-off-by: guoxuenan <guoxuenan(a)huawei.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: zhangyi (F) <yi.zhang(a)huawei.com>
Cc: Miao Xie <miaoxie(a)huawei.com>
Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com>
Cc: Shaohua Li <shli(a)fb.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Anshuman Khandual <khandual(a)linux.vnet.ibm.com>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Carsten Otte <cotte(a)de.ibm.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/madvise.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff -puN mm/madvise.c~mmmadvise-bugfix-of-madvise-systemcall-infinite-loop-under-special-circumstances mm/madvise.c
--- a/mm/madvise.c~mmmadvise-bugfix-of-madvise-systemcall-infinite-loop-under-special-circumstances
+++ a/mm/madvise.c
@@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_a
{
struct file *file = vma->vm_file;
+ *prev = vma;
#ifdef CONFIG_SWAP
if (!file) {
- *prev = vma;
force_swapin_readahead(vma, start, end);
return 0;
}
if (shmem_mapping(file->f_mapping)) {
- *prev = vma;
force_shm_swapin_readahead(vma, start, end,
file->f_mapping);
return 0;
@@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_a
return 0;
}
- *prev = vma;
start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (end > vma->vm_end)
end = vma->vm_end;
_
From: Kees Cook <keescook(a)chromium.org>
Subject: exec: avoid RLIMIT_STACK races with prlimit()
While the defense-in-depth RLIMIT_STACK limit on setuid processes was
protected against races from other threads calling setrlimit(), I missed
protecting it against races from external processes calling prlimit().
This adds locking around the change and makes sure that rlim_max is set
too.
Link: http://lkml.kernel.org/r/20171127193457.GA11348@beast
Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec")
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Reported-by: Ben Hutchings <ben.hutchings(a)codethink.co.uk>
Reported-by: Brad Spengler <spender(a)grsecurity.net>
Acked-by: Serge Hallyn <serge(a)hallyn.com>
Cc: James Morris <james.l.morris(a)oracle.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Jiri Slaby <jslaby(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/exec.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff -puN fs/exec.c~exec-avoid-rlimit_stack-races-with-prlimit fs/exec.c
--- a/fs/exec.c~exec-avoid-rlimit_stack-races-with-prlimit
+++ a/fs/exec.c
@@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm
* avoid bad behavior from the prior rlimits. This has to
* happen before arch_pick_mmap_layout(), which examines
* RLIMIT_STACK, but after the point of no return to avoid
- * needing to clean up the change on failure.
+ * races from other threads changing the limits. This also
+ * must be protected from races with prlimit() calls.
*/
+ task_lock(current->group_leader);
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+ if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
+ current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
+ task_unlock(current->group_leader);
}
arch_pick_mmap_layout(current->mm);
_
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: IB/core: disable memory registration of filesystem-dax vmas
Until there is a solution to the dma-to-dax vs truncate problem it is not
safe to allow RDMA to create long standing memory registrations against
filesytem-dax vmas.
Link: http://lkml.kernel.org/r/151068941011.7446.7766030590347262502.stgit@dwilli…
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Reported-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Acked-by: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Sean Hefty <sean.hefty(a)intel.com>
Cc: Doug Ledford <dledford(a)redhat.com>
Cc: Hal Rosenstock <hal.rosenstock(a)gmail.com>
Cc: Jeff Moyer <jmoyer(a)redhat.com>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Inki Dae <inki.dae(a)samsung.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Joonyoung Shim <jy0922.shim(a)samsung.com>
Cc: Kyungmin Park <kyungmin.park(a)samsung.com>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Seung-Woo Kim <sw0312.kim(a)samsung.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/infiniband/core/umem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -puN drivers/infiniband/core/umem.c~ib-core-disable-memory-registration-of-fileystem-dax-vmas drivers/infiniband/core/umem.c
--- a/drivers/infiniband/core/umem.c~ib-core-disable-memory-registration-of-fileystem-dax-vmas
+++ a/drivers/infiniband/core/umem.c
@@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_uc
sg_list_start = umem->sg_head.sgl;
while (npages) {
- ret = get_user_pages(cur_base,
+ ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list);
_
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: v4l2: disable filesystem-dax mapping support
V4L2 memory registrations are incompatible with filesystem-dax that needs
the ability to revoke dma access to a mapping at will, or otherwise allow
the kernel to wait for completion of DMA. The filesystem-dax
implementation breaks the traditional solution of truncate of active file
backed mappings since there is no page-cache page we can orphan to sustain
ongoing DMA.
If v4l2 wants to support long lived DMA mappings it needs to arrange to
hold a file lease or use some other mechanism so that the kernel can
coordinate revoking DMA access when the filesystem needs to truncate
mappings.
Link: http://lkml.kernel.org/r/151068940499.7446.12846708245365671207.stgit@dwill…
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Reported-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Doug Ledford <dledford(a)redhat.com>
Cc: Hal Rosenstock <hal.rosenstock(a)gmail.com>
Cc: Inki Dae <inki.dae(a)samsung.com>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Jeff Moyer <jmoyer(a)redhat.com>
Cc: Joonyoung Shim <jy0922.shim(a)samsung.com>
Cc: Kyungmin Park <kyungmin.park(a)samsung.com>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Sean Hefty <sean.hefty(a)intel.com>
Cc: Seung-Woo Kim <sw0312.kim(a)samsung.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/media/v4l2-core/videobuf-dma-sg.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff -puN drivers/media/v4l2-core/videobuf-dma-sg.c~v4l2-disable-filesystem-dax-mapping-support drivers/media/v4l2-core/videobuf-dma-sg.c
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c~v4l2-disable-filesystem-dax-mapping-support
+++ a/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
data, size, dma->nr_pages);
- err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
+ err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
flags, dma->pages, NULL);
if (err != dma->nr_pages) {
dma->nr_pages = (err >= 0) ? err : 0;
- dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages);
+ dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
+ dma->nr_pages);
return err < 0 ? err : -EINVAL;
}
return 0;
_
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: mm: fail get_vaddr_frames() for filesystem-dax mappings
Until there is a solution to the dma-to-dax vs truncate problem it is not
safe to allow V4L2, Exynos, and other frame vector users to create long
standing / irrevocable memory registrations against filesytem-dax vmas.
[dan.j.williams(a)intel.com: add comment for vma_is_fsdax() check in get_vaddr_frames(), per Jan]
Link: http://lkml.kernel.org/r/151197874035.26211.4061781453123083667.stgit@dwill…
Link: http://lkml.kernel.org/r/151068939985.7446.15684639617389154187.stgit@dwill…
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Cc: Inki Dae <inki.dae(a)samsung.com>
Cc: Seung-Woo Kim <sw0312.kim(a)samsung.com>
Cc: Joonyoung Shim <jy0922.shim(a)samsung.com>
Cc: Kyungmin Park <kyungmin.park(a)samsung.com>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Doug Ledford <dledford(a)redhat.com>
Cc: Hal Rosenstock <hal.rosenstock(a)gmail.com>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Jeff Moyer <jmoyer(a)redhat.com>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Sean Hefty <sean.hefty(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/frame_vector.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff -puN mm/frame_vector.c~mm-fail-get_vaddr_frames-for-filesystem-dax-mappings mm/frame_vector.c
--- a/mm/frame_vector.c~mm-fail-get_vaddr_frames-for-filesystem-dax-mappings
+++ a/mm/frame_vector.c
@@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start
ret = -EFAULT;
goto out;
}
+
+ /*
+ * While get_vaddr_frames() could be used for transient (kernel
+ * controlled lifetime) pinning of memory pages all current
+ * users establish long term (userspace controlled lifetime)
+ * page pinning. Treat get_vaddr_frames() like
+ * get_user_pages_longterm() and disallow it for filesystem-dax
+ * mappings.
+ */
+ if (vma_is_fsdax(vma))
+ return -EOPNOTSUPP;
+
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
vec->got_ref = true;
vec->is_pfns = false;
_
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: mm: introduce get_user_pages_longterm
Patch series "introduce get_user_pages_longterm()", v2.
Here is a new get_user_pages api for cases where a driver intends to keep
an elevated page count indefinitely. This is distinct from usages like
iov_iter_get_pages where the elevated page counts are transient. The
iov_iter_get_pages cases immediately turn around and submit the pages to a
device driver which will put_page when the i/o operation completes (under
kernel control).
In the longterm case userspace is responsible for dropping the page
reference at some undefined point in the future. This is untenable for
filesystem-dax case where the filesystem is in control of the lifetime of
the block / page and needs reasonable limits on how long it can wait for
pages in a mapping to become idle.
Fixing filesystems to actually wait for dax pages to be idle before blocks
from a truncate/hole-punch operation are repurposed is saved for a later
patch series.
Also, allowing longterm registration of dax mappings is a future patch
series that introduces a "map with lease" semantic where the kernel can
revoke a lease and force userspace to drop its page references.
I have also tagged these for -stable to purposely break cases that might
assume that longterm memory registrations for filesystem-dax mappings were
supported by the kernel. The behavior regression this policy change
implies is one of the reasons we maintain the "dax enabled. Warning:
EXPERIMENTAL, use at your own risk" notification when mounting a
filesystem in dax mode.
It is worth noting the device-dax interface does not suffer the same
constraints since it does not support file space management operations
like hole-punch.
This patch (of 4):
Until there is a solution to the dma-to-dax vs truncate problem it is not
safe to allow long standing memory registrations against filesytem-dax
vmas. Device-dax vmas do not have this problem and are explicitly
allowed.
This is temporary until a "memory registration with layout-lease"
mechanism can be implemented for the affected sub-systems (RDMA and V4L2).
[akpm(a)linux-foundation.org: use kcalloc()]
Link: http://lkml.kernel.org/r/151068939435.7446.13560129395419350737.stgit@dwill…
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Suggested-by: Christoph Hellwig <hch(a)lst.de>
Cc: Doug Ledford <dledford(a)redhat.com>
Cc: Hal Rosenstock <hal.rosenstock(a)gmail.com>
Cc: Inki Dae <inki.dae(a)samsung.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Jeff Moyer <jmoyer(a)redhat.com>
Cc: Joonyoung Shim <jy0922.shim(a)samsung.com>
Cc: Kyungmin Park <kyungmin.park(a)samsung.com>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Sean Hefty <sean.hefty(a)intel.com>
Cc: Seung-Woo Kim <sw0312.kim(a)samsung.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/fs.h | 14 +++++++++
include/linux/mm.h | 13 ++++++++
mm/gup.c | 64 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 91 insertions(+)
diff -puN include/linux/fs.h~mm-introduce-get_user_pages_longterm include/linux/fs.h
--- a/include/linux/fs.h~mm-introduce-get_user_pages_longterm
+++ a/include/linux/fs.h
@@ -3194,6 +3194,20 @@ static inline bool vma_is_dax(struct vm_
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
+static inline bool vma_is_fsdax(struct vm_area_struct *vma)
+{
+ struct inode *inode;
+
+ if (!vma->vm_file)
+ return false;
+ if (!vma_is_dax(vma))
+ return false;
+ inode = file_inode(vma->vm_file);
+ if (inode->i_mode == S_IFCHR)
+ return false; /* device-dax */
+ return true;
+}
+
static inline int iocb_flags(struct file *file)
{
int res = 0;
diff -puN include/linux/mm.h~mm-introduce-get_user_pages_longterm include/linux/mm.h
--- a/include/linux/mm.h~mm-introduce-get_user_pages_longterm
+++ a/include/linux/mm.h
@@ -1380,6 +1380,19 @@ long get_user_pages_locked(unsigned long
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
+#ifdef CONFIG_FS_DAX
+long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas);
+#else
+static inline long get_user_pages_longterm(unsigned long start,
+ unsigned long nr_pages, unsigned int gup_flags,
+ struct page **pages, struct vm_area_struct **vmas)
+{
+ return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+}
+#endif /* CONFIG_FS_DAX */
+
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
diff -puN mm/gup.c~mm-introduce-get_user_pages_longterm mm/gup.c
--- a/mm/gup.c~mm-introduce-get_user_pages_longterm
+++ a/mm/gup.c
@@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start,
}
EXPORT_SYMBOL(get_user_pages);
+#ifdef CONFIG_FS_DAX
+/*
+ * This is the same as get_user_pages() in that it assumes we are
+ * operating on the current task's mm, but it goes further to validate
+ * that the vmas associated with the address range are suitable for
+ * longterm elevated page reference counts. For example, filesystem-dax
+ * mappings are subject to the lifetime enforced by the filesystem and
+ * we need guarantees that longterm users like RDMA and V4L2 only
+ * establish mappings that have a kernel enforced revocation mechanism.
+ *
+ * "longterm" == userspace controlled elevated page count lifetime.
+ * Contrast this to iov_iter_get_pages() usages which are transient.
+ */
+long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas_arg)
+{
+ struct vm_area_struct **vmas = vmas_arg;
+ struct vm_area_struct *vma_prev = NULL;
+ long rc, i;
+
+ if (!pages)
+ return -EINVAL;
+
+ if (!vmas) {
+ vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
+ GFP_KERNEL);
+ if (!vmas)
+ return -ENOMEM;
+ }
+
+ rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+
+ for (i = 0; i < rc; i++) {
+ struct vm_area_struct *vma = vmas[i];
+
+ if (vma == vma_prev)
+ continue;
+
+ vma_prev = vma;
+
+ if (vma_is_fsdax(vma))
+ break;
+ }
+
+ /*
+ * Either get_user_pages() failed, or the vma validation
+ * succeeded, in either case we don't need to put_page() before
+ * returning.
+ */
+ if (i >= rc)
+ goto out;
+
+ for (i = 0; i < rc; i++)
+ put_page(pages[i]);
+ rc = -EOPNOTSUPP;
+out:
+ if (vmas != vmas_arg)
+ kfree(vmas);
+ return rc;
+}
+EXPORT_SYMBOL(get_user_pages_longterm);
+#endif /* CONFIG_FS_DAX */
+
/**
* populate_vma_page_range() - populate a range of pages in the vma.
* @vma: target vma
_