This is a note to let you know that I've just added the patch titled
netfilter: fix IS_ERR_VALUE usage
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
netfilter-fix-is_err_value-usage.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 92b4423e3a0bc5d43ecde4bcad871f8b5ba04efd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
Date: Fri, 29 Apr 2016 10:39:34 +0200
Subject: netfilter: fix IS_ERR_VALUE usage
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
commit 92b4423e3a0bc5d43ecde4bcad871f8b5ba04efd upstream.
This is a forward-port of the original patch from Andrzej Hajda,
he said:
"IS_ERR_VALUE should be used only with unsigned long type.
Otherwise it can work incorrectly. To achieve this function
xt_percpu_counter_alloc is modified to return unsigned long,
and its result is assigned to temporary variable to perform
error checking, before assigning to .pcnt field.
The patch follows conclusion from discussion on LKML [1][2].
[1]: http://permalink.gmane.org/gmane.linux.kernel/2120927
[2]: http://permalink.gmane.org/gmane.linux.kernel/2150581"
Original patch from Andrzej is here:
http://patchwork.ozlabs.org/patch/582970/
This patch has clashed with input validation fixes for x_tables.
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Acked-by: Michal Kubecek <mkubecek(a)suse.cz>
---
include/linux/netfilter/x_tables.h | 6 +++---
net/ipv4/netfilter/arp_tables.c | 6 ++++--
net/ipv4/netfilter/ip_tables.c | 6 ++++--
net/ipv6/netfilter/ip6_tables.c | 6 ++++--
4 files changed, 15 insertions(+), 9 deletions(-)
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -381,16 +381,16 @@ static inline unsigned long ifname_compa
* allows us to return 0 for single core systems without forcing
* callers to deal with SMP vs. NONSMP issues.
*/
-static inline u64 xt_percpu_counter_alloc(void)
+static inline unsigned long xt_percpu_counter_alloc(void)
{
if (nr_cpu_ids > 1) {
void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
sizeof(struct xt_counters));
if (res == NULL)
- return (u64) -ENOMEM;
+ return -ENOMEM;
- return (u64) (__force unsigned long) res;
+ return (__force unsigned long) res;
}
return 0;
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -511,11 +511,13 @@ find_check_entry(struct arpt_entry *e, c
{
struct xt_entry_target *t;
struct xt_target *target;
+ unsigned long pcnt;
int ret;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -653,10 +653,12 @@ find_check_entry(struct ipt_entry *e, st
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ unsigned long pcnt;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -666,10 +666,12 @@ find_check_entry(struct ip6t_entry *e, s
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ unsigned long pcnt;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
Patches currently in stable-queue which might be from pablo(a)netfilter.org are
queue-4.4/netfilter-x_tables-speed-up-jump-target-validation.patch
queue-4.4/netfilter-nf_dup_ipv6-set-again-flowi_flag_known_nh-at-flowi6_flags.patch
queue-4.4/netfilter-fix-is_err_value-usage.patch
queue-4.4/netfilter-nf_ct_expect-remove-the-redundant-slash-when-policy-name-is-empty.patch
queue-4.4/netfilter-xt_osf-add-missing-permission-checks.patch
queue-4.4/netfilter-arp_tables-fix-invoking-32bit-iptable-p-input-accept-failed-in-64bit-kernel.patch
queue-4.4/netfilter-nf_conntrack_sip-extend-request-line-validation.patch
queue-4.4/netfilter-restart-search-if-moved-to-other-chain.patch
queue-4.4/netfilter-use-fwmark_reflect-in-nf_send_reset.patch
queue-4.4/netfilter-nfnetlink_queue-reject-verdict-request-from-different-portid.patch
queue-4.4/netfilter-nfnetlink_cthelper-add-missing-permission-checks.patch
This is a note to let you know that I've just added the patch titled
netfilter: arp_tables: fix invoking 32bit "iptable -P INPUT ACCEPT" failed in 64bit kernel
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
netfilter-arp_tables-fix-invoking-32bit-iptable-p-input-accept-failed-in-64bit-kernel.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 17a49cd549d9dc8707dc9262210166455c612dde Mon Sep 17 00:00:00 2001
From: Hongxu Jia <hongxu.jia(a)windriver.com>
Date: Tue, 29 Nov 2016 21:56:26 -0500
Subject: netfilter: arp_tables: fix invoking 32bit "iptable -P INPUT ACCEPT" failed in 64bit kernel
From: Hongxu Jia <hongxu.jia(a)windriver.com>
commit 17a49cd549d9dc8707dc9262210166455c612dde upstream.
Since 09d9686047db ("netfilter: x_tables: do compat validation via
translate_table"), it used compatr structure to assign newinfo
structure. In translate_compat_table of ip_tables.c and ip6_tables.c,
it used compatr->hook_entry to replace info->hook_entry and
compatr->underflow to replace info->underflow, but not do the same
replacement in arp_tables.c.
It caused invoking 32-bit "arptbale -P INPUT ACCEPT" failed in 64bit
kernel.
--------------------------------------
root@qemux86-64:~# arptables -P INPUT ACCEPT
root@qemux86-64:~# arptables -P INPUT ACCEPT
ERROR: Policy for `INPUT' offset 448 != underflow 0
arptables: Incompatible with this kernel
--------------------------------------
Fixes: 09d9686047db ("netfilter: x_tables: do compat validation via translate_table")
Signed-off-by: Hongxu Jia <hongxu.jia(a)windriver.com>
Acked-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Acked-by: Michal Kubecek <mkubecek(a)suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/netfilter/arp_tables.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1339,8 +1339,8 @@ static int translate_compat_table(struct
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
Patches currently in stable-queue which might be from hongxu.jia(a)windriver.com are
queue-4.4/netfilter-arp_tables-fix-invoking-32bit-iptable-p-input-accept-failed-in-64bit-kernel.patch
This is a note to let you know that I've just added the patch titled
mm, page_alloc: fix potential false positive in __zone_watermark_ok
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
mm-page_alloc-fix-potential-false-positive-in-__zone_watermark_ok.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From b050e3769c6b4013bb937e879fc43bf1847ee819 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka(a)suse.cz>
Date: Wed, 15 Nov 2017 17:38:30 -0800
Subject: mm, page_alloc: fix potential false positive in __zone_watermark_ok
From: Vlastimil Babka <vbabka(a)suse.cz>
commit b050e3769c6b4013bb937e879fc43bf1847ee819 upstream.
Since commit 97a16fc82a7c ("mm, page_alloc: only enforce watermarks for
order-0 allocations"), __zone_watermark_ok() check for high-order
allocations will shortcut per-migratetype free list checks for
ALLOC_HARDER allocations, and return true as long as there's free page
of any migratetype. The intention is that ALLOC_HARDER can allocate
from MIGRATE_HIGHATOMIC free lists, while normal allocations can't.
However, as a side effect, the watermark check will then also return
true when there are pages only on the MIGRATE_ISOLATE list, or (prior to
CMA conversion to ZONE_MOVABLE) on the MIGRATE_CMA list. Since the
allocation cannot actually obtain isolated pages, and might not be able
to obtain CMA pages, this can result in a false positive.
The condition should be rare and perhaps the outcome is not a fatal one.
Still, it's better if the watermark check is correct. There also
shouldn't be a performance tradeoff here.
Link: http://lkml.kernel.org/r/20171102125001.23708-1-vbabka@suse.cz
Fixes: 97a16fc82a7c ("mm, page_alloc: only enforce watermarks for order-0 allocations")
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Acked-by: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
mm/page_alloc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2468,9 +2468,6 @@ static bool __zone_watermark_ok(struct z
if (!area->nr_free)
continue;
- if (alloc_harder)
- return true;
-
for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
if (!list_empty(&area->free_list[mt]))
return true;
@@ -2482,6 +2479,9 @@ static bool __zone_watermark_ok(struct z
return true;
}
#endif
+ if (alloc_harder &&
+ !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ return true;
}
return false;
}
Patches currently in stable-queue which might be from vbabka(a)suse.cz are
queue-4.4/fs-select-add-vmalloc-fallback-for-select-2.patch
queue-4.4/mm-mmap.c-do-not-blow-on-prot_none-map_fixed-holes-in-the-stack.patch
queue-4.4/cma-fix-calculation-of-aligned-offset.patch
queue-4.4/mm-page_alloc-fix-potential-false-positive-in-__zone_watermark_ok.patch
This is a note to let you know that I've just added the patch titled
mm/mmap.c: do not blow on PROT_NONE MAP_FIXED holes in the stack
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
mm-mmap.c-do-not-blow-on-prot_none-map_fixed-holes-in-the-stack.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 561b5e0709e4a248c67d024d4d94b6e31e3edf2f Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko(a)suse.com>
Date: Mon, 10 Jul 2017 15:49:51 -0700
Subject: mm/mmap.c: do not blow on PROT_NONE MAP_FIXED holes in the stack
From: Michal Hocko <mhocko(a)suse.com>
commit 561b5e0709e4a248c67d024d4d94b6e31e3edf2f upstream.
Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") has
introduced a regression in some rust and Java environments which are
trying to implement their own stack guard page. They are punching a new
MAP_FIXED mapping inside the existing stack Vma.
This will confuse expand_{downwards,upwards} into thinking that the
stack expansion would in fact get us too close to an existing non-stack
vma which is a correct behavior wrt safety. It is a real regression on
the other hand.
Let's work around the problem by considering PROT_NONE mapping as a part
of the stack. This is a gros hack but overflowing to such a mapping
would trap anyway an we only can hope that usespace knows what it is
doing and handle it propely.
Fixes: 1be7107fbe18 ("mm: larger stack guard gap, between vmas")
Link: http://lkml.kernel.org/r/20170705182849.GA18027@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Debugged-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Ben Hutchings <ben(a)decadent.org.uk>
Cc: Willy Tarreau <w(a)1wt.eu>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
mm/mmap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2188,7 +2188,8 @@ int expand_upwards(struct vm_area_struct
gap_addr = TASK_SIZE;
next = vma->vm_next;
- if (next && next->vm_start < gap_addr) {
+ if (next && next->vm_start < gap_addr &&
+ (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
if (!(next->vm_flags & VM_GROWSUP))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
@@ -2273,7 +2274,8 @@ int expand_downwards(struct vm_area_stru
if (gap_addr > address)
return -ENOMEM;
prev = vma->vm_prev;
- if (prev && prev->vm_end > gap_addr) {
+ if (prev && prev->vm_end > gap_addr &&
+ (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
if (!(prev->vm_flags & VM_GROWSDOWN))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
Patches currently in stable-queue which might be from mhocko(a)suse.com are
queue-4.4/fs-select-add-vmalloc-fallback-for-select-2.patch
queue-4.4/hwpoison-memcg-forcibly-uncharge-lru-pages.patch
queue-4.4/mm-mmap.c-do-not-blow-on-prot_none-map_fixed-holes-in-the-stack.patch
This is a note to let you know that I've just added the patch titled
ipc: msg, make msgrcv work with LONG_MIN
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipc-msg-make-msgrcv-work-with-long_min.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 999898355e08ae3b92dfd0a08db706e0c6703d30 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby(a)suse.cz>
Date: Wed, 14 Dec 2016 15:06:07 -0800
Subject: ipc: msg, make msgrcv work with LONG_MIN
From: Jiri Slaby <jslaby(a)suse.cz>
commit 999898355e08ae3b92dfd0a08db706e0c6703d30 upstream.
When LONG_MIN is passed to msgrcv, one would expect to recieve any
message. But convert_mode does *msgtyp = -*msgtyp and -LONG_MIN is
undefined. In particular, with my gcc -LONG_MIN produces -LONG_MIN
again.
So handle this case properly by assigning LONG_MAX to *msgtyp if
LONG_MIN was specified as msgtyp to msgrcv.
This code:
long msg[] = { 100, 200 };
int m = msgget(IPC_PRIVATE, IPC_CREAT | 0644);
msgsnd(m, &msg, sizeof(msg), 0);
msgrcv(m, &msg, sizeof(msg), LONG_MIN, 0);
produces currently nothing:
msgget(IPC_PRIVATE, IPC_CREAT|0644) = 65538
msgsnd(65538, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, 0) = 0
msgrcv(65538, ...
Except a UBSAN warning:
UBSAN: Undefined behaviour in ipc/msg.c:745:13
negation of -9223372036854775808 cannot be represented in type 'long int':
With the patch, I see what I expect:
msgget(IPC_PRIVATE, IPC_CREAT|0644) = 0
msgsnd(0, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, 0) = 0
msgrcv(0, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, -9223372036854775808, 0) = 16
Link: http://lkml.kernel.org/r/20161024082633.10148-1-jslaby@suse.cz
Signed-off-by: Jiri Slaby <jslaby(a)suse.cz>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Manfred Spraul <manfred(a)colorfullife.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
ipc/msg.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -742,7 +742,10 @@ static inline int convert_mode(long *msg
if (*msgtyp == 0)
return SEARCH_ANY;
if (*msgtyp < 0) {
- *msgtyp = -*msgtyp;
+ if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
+ *msgtyp = LONG_MAX;
+ else
+ *msgtyp = -*msgtyp;
return SEARCH_LESSEQUAL;
}
if (msgflg & MSG_EXCEPT)
Patches currently in stable-queue which might be from jslaby(a)suse.cz are
queue-4.4/ipc-msg-make-msgrcv-work-with-long_min.patch
queue-4.4/fs-fcntl-f_setown-avoid-undefined-behaviour.patch
queue-4.4/pm-sleep-declare-__tracedata-symbols-as-char-rather-than-char.patch
queue-4.4/x86-cpu-intel-introduce-macros-for-intel-family-numbers.patch
queue-4.4/x86-retpoline-fill-rsb-on-context-switch-for-affected-cpus.patch
queue-4.4/acpi-scan-prefer-devices-without-_hid-_cid-for-_adr-matching.patch
queue-4.4/time-avoid-undefined-behaviour-in-ktime_add_safe.patch
This is a note to let you know that I've just added the patch titled
hwpoison, memcg: forcibly uncharge LRU pages
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
hwpoison-memcg-forcibly-uncharge-lru-pages.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 18365225f0440d09708ad9daade2ec11275c3df9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko(a)suse.com>
Date: Fri, 12 May 2017 15:46:26 -0700
Subject: hwpoison, memcg: forcibly uncharge LRU pages
From: Michal Hocko <mhocko(a)suse.com>
commit 18365225f0440d09708ad9daade2ec11275c3df9 upstream.
Laurent Dufour has noticed that hwpoinsoned pages are kept charged. In
his particular case he has hit a bad_page("page still charged to
cgroup") when onlining a hwpoison page. While this looks like something
that shouldn't happen in the first place because onlining hwpages and
returning them to the page allocator makes only little sense it shows a
real problem.
hwpoison pages do not get freed usually so we do not uncharge them (at
least not since commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge
API")). Each charge pins memcg (since e8ea14cc6ead ("mm: memcontrol:
take a css reference for each charged page")) as well and so the
mem_cgroup and the associated state will never go away. Fix this leak
by forcibly uncharging a LRU hwpoisoned page in delete_from_lru_cache().
We also have to tweak uncharge_list because it cannot rely on zero ref
count for these pages.
[akpm(a)linux-foundation.org: coding-style fixes]
Fixes: 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API")
Link: http://lkml.kernel.org/r/20170502185507.GB19165@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Reported-by: Laurent Dufour <ldufour(a)linux.vnet.ibm.com>
Tested-by: Laurent Dufour <ldufour(a)linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
mm/memcontrol.c | 2 +-
mm/memory-failure.c | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5576,7 +5576,7 @@ static void uncharge_list(struct list_he
next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
+ VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
if (!page->mem_cgroup)
continue;
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct
*/
ClearPageActive(p);
ClearPageUnevictable(p);
+
+ /*
+ * Poisoned page might never drop its ref count to 0 so we have
+ * to uncharge it manually from its memcg.
+ */
+ mem_cgroup_uncharge(p);
+
/*
* drop the page count elevated by isolate_lru_page()
*/
Patches currently in stable-queue which might be from mhocko(a)suse.com are
queue-4.4/fs-select-add-vmalloc-fallback-for-select-2.patch
queue-4.4/hwpoison-memcg-forcibly-uncharge-lru-pages.patch
queue-4.4/mm-mmap.c-do-not-blow-on-prot_none-map_fixed-holes-in-the-stack.patch
This is a note to let you know that I've just added the patch titled
fs/fcntl: f_setown, avoid undefined behaviour
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
fs-fcntl-f_setown-avoid-undefined-behaviour.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From fc3dc67471461c0efcb1ed22fb7595121d65fad9 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby(a)suse.cz>
Date: Tue, 13 Jun 2017 13:35:51 +0200
Subject: fs/fcntl: f_setown, avoid undefined behaviour
From: Jiri Slaby <jslaby(a)suse.cz>
commit fc3dc67471461c0efcb1ed22fb7595121d65fad9 upstream.
fcntl(0, F_SETOWN, 0x80000000) triggers:
UBSAN: Undefined behaviour in fs/fcntl.c:118:7
negation of -2147483648 cannot be represented in type 'int':
CPU: 1 PID: 18261 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1
...
Call Trace:
...
[<ffffffffad8f0868>] ? f_setown+0x1d8/0x200
[<ffffffffad8f19a9>] ? SyS_fcntl+0x999/0xf30
[<ffffffffaed1fb00>] ? entry_SYSCALL_64_fastpath+0x23/0xc1
Fix that by checking the arg parameter properly (against INT_MAX) before
"who = -who". And return immediatelly with -EINVAL in case it is wrong.
Note that according to POSIX we can return EINVAL:
http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html
[EINVAL]
The cmd argument is F_SETOWN and the value of the argument
is not valid as a process or process group identifier.
[v2] returns an error, v1 used to fail silently
[v3] implement proper check for the bad value INT_MIN
Signed-off-by: Jiri Slaby <jslaby(a)suse.cz>
Cc: Jeff Layton <jlayton(a)poochiereds.net>
Cc: "J. Bruce Fields" <bfields(a)fieldses.org>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: linux-fsdevel(a)vger.kernel.org
Signed-off-by: Jeff Layton <jlayton(a)redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/fcntl.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -113,6 +113,10 @@ void f_setown(struct file *filp, unsigne
int who = arg;
type = PIDTYPE_PID;
if (who < 0) {
+ /* avoid overflow below */
+ if (who == INT_MIN)
+ return;
+
type = PIDTYPE_PGID;
who = -who;
}
Patches currently in stable-queue which might be from jslaby(a)suse.cz are
queue-4.4/ipc-msg-make-msgrcv-work-with-long_min.patch
queue-4.4/fs-fcntl-f_setown-avoid-undefined-behaviour.patch
queue-4.4/pm-sleep-declare-__tracedata-symbols-as-char-rather-than-char.patch
queue-4.4/x86-cpu-intel-introduce-macros-for-intel-family-numbers.patch
queue-4.4/x86-retpoline-fill-rsb-on-context-switch-for-affected-cpus.patch
queue-4.4/acpi-scan-prefer-devices-without-_hid-_cid-for-_adr-matching.patch
queue-4.4/time-avoid-undefined-behaviour-in-ktime_add_safe.patch
This is a note to let you know that I've just added the patch titled
ext2: Don't clear SGID when inheriting ACLs
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ext2-don-t-clear-sgid-when-inheriting-acls.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From a992f2d38e4ce17b8c7d1f7f67b2de0eebdea069 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Wed, 21 Jun 2017 14:34:15 +0200
Subject: ext2: Don't clear SGID when inheriting ACLs
From: Jan Kara <jack(a)suse.cz>
commit a992f2d38e4ce17b8c7d1f7f67b2de0eebdea069 upstream.
When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit
set, DIR1 is expected to have SGID bit set (and owning group equal to
the owning group of 'DIR0'). However when 'DIR0' also has some default
ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on
'DIR1' to get cleared if user is not member of the owning group.
Fix the problem by creating __ext2_set_acl() function that does not call
posix_acl_update_mode() and use it when inheriting ACLs. That prevents
SGID bit clearing and the mode has been properly set by
posix_acl_create() anyway.
Fixes: 073931017b49d9458aa351605b43a7e34598caef
CC: stable(a)vger.kernel.org
CC: linux-ext4(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/ext2/acl.c | 36 ++++++++++++++++++++++--------------
1 file changed, 22 insertions(+), 14 deletions(-)
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -178,11 +178,8 @@ ext2_get_acl(struct inode *inode, int ty
return acl;
}
-/*
- * inode->i_mutex: down
- */
-int
-ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+static int
+__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
int name_index;
void *value = NULL;
@@ -192,13 +189,6 @@ ext2_set_acl(struct inode *inode, struct
switch(type) {
case ACL_TYPE_ACCESS:
name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (error)
- return error;
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- }
break;
case ACL_TYPE_DEFAULT:
@@ -225,6 +215,24 @@ ext2_set_acl(struct inode *inode, struct
}
/*
+ * inode->i_mutex: down
+ */
+int
+ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ int error;
+
+ if (type == ACL_TYPE_ACCESS && acl) {
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
+ return error;
+ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+ }
+ return __ext2_set_acl(inode, acl, type);
+}
+
+/*
* Initialize the ACLs of a new inode. Called from ext2_new_inode.
*
* dir->i_mutex: down
@@ -241,12 +249,12 @@ ext2_init_acl(struct inode *inode, struc
return error;
if (default_acl) {
- error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
}
if (acl) {
if (!error)
- error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
}
return error;
Patches currently in stable-queue which might be from jack(a)suse.cz are
queue-4.4/reiserfs-don-t-preallocate-blocks-for-extended-attributes.patch
queue-4.4/ext2-don-t-clear-sgid-when-inheriting-acls.patch
queue-4.4/reiserfs-fix-race-in-prealloc-discard.patch
queue-4.4/reiserfs-don-t-clear-sgid-when-inheriting-acls.patch
This is a note to let you know that I've just added the patch titled
cma: fix calculation of aligned offset
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
cma-fix-calculation-of-aligned-offset.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From e048cb32f69038aa1c8f11e5c1b331be4181659d Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb(a)gmail.com>
Date: Mon, 10 Jul 2017 15:49:44 -0700
Subject: cma: fix calculation of aligned offset
From: Doug Berger <opendmb(a)gmail.com>
commit e048cb32f69038aa1c8f11e5c1b331be4181659d upstream.
The align_offset parameter is used by bitmap_find_next_zero_area_off()
to represent the offset of map's base from the previous alignment
boundary; the function ensures that the returned index, plus the
align_offset, honors the specified align_mask.
The logic introduced by commit b5be83e308f7 ("mm: cma: align to physical
address, not CMA region position") has the cma driver calculate the
offset to the *next* alignment boundary. In most cases, the base
alignment is greater than that specified when making allocations,
resulting in a zero offset whether we align up or down. In the example
given with the commit, the base alignment (8MB) was half the requested
alignment (16MB) so the math also happened to work since the offset is
8MB in both directions. However, when requesting allocations with an
alignment greater than twice that of the base, the returned index would
not be correctly aligned.
Also, the align_order arguments of cma_bitmap_aligned_mask() and
cma_bitmap_aligned_offset() should not be negative so the argument type
was made unsigned.
Fixes: b5be83e308f7 ("mm: cma: align to physical address, not CMA region position")
Link: http://lkml.kernel.org/r/20170628170742.2895-1-opendmb@gmail.com
Signed-off-by: Angus Clark <angus(a)angusclark.org>
Signed-off-by: Doug Berger <opendmb(a)gmail.com>
Acked-by: Gregory Fong <gregory.0xf0(a)gmail.com>
Cc: Doug Berger <opendmb(a)gmail.com>
Cc: Angus Clark <angus(a)angusclark.org>
Cc: Laura Abbott <labbott(a)redhat.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Lucas Stach <l.stach(a)pengutronix.de>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Shiraz Hashim <shashim(a)codeaurora.org>
Cc: Jaewon Kim <jaewon31.kim(a)samsung.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
mm/cma.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct
}
static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
- int align_order)
+ unsigned int align_order)
{
if (align_order <= cma->order_per_bit)
return 0;
@@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_
}
/*
- * Find a PFN aligned to the specified order and return an offset represented in
- * order_per_bits.
+ * Find the offset of the base PFN from the specified align_order.
+ * The value returned is represented in order_per_bits.
*/
static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
- int align_order)
+ unsigned int align_order)
{
- if (align_order <= cma->order_per_bit)
- return 0;
-
- return (ALIGN(cma->base_pfn, (1UL << align_order))
- - cma->base_pfn) >> cma->order_per_bit;
+ return (cma->base_pfn & ((1UL << align_order) - 1))
+ >> cma->order_per_bit;
}
static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
Patches currently in stable-queue which might be from opendmb(a)gmail.com are
queue-4.4/cma-fix-calculation-of-aligned-offset.patch
This is a note to let you know that I've just added the patch titled
ACPICA: Namespace: fix operand cache leak
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
acpica-namespace-fix-operand-cache-leak.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 3b2d69114fefa474fca542e51119036dceb4aa6f Mon Sep 17 00:00:00 2001
From: Seunghun Han <kkamagui(a)gmail.com>
Date: Wed, 26 Apr 2017 16:18:08 +0800
Subject: ACPICA: Namespace: fix operand cache leak
From: Seunghun Han <kkamagui(a)gmail.com>
commit 3b2d69114fefa474fca542e51119036dceb4aa6f upstream.
ACPICA commit a23325b2e583556eae88ed3f764e457786bf4df6
I found some ACPI operand cache leaks in ACPI early abort cases.
Boot log of ACPI operand cache leak is as follows:
>[ 0.174332] ACPI: Added _OSI(Module Device)
>[ 0.175504] ACPI: Added _OSI(Processor Device)
>[ 0.176010] ACPI: Added _OSI(3.0 _SCP Extensions)
>[ 0.177032] ACPI: Added _OSI(Processor Aggregator Device)
>[ 0.178284] ACPI: SCI (IRQ16705) allocation failed
>[ 0.179352] ACPI Exception: AE_NOT_ACQUIRED, Unable to install
System Control Interrupt handler (20160930/evevent-131)
>[ 0.180008] ACPI: Unable to start the ACPI Interpreter
>[ 0.181125] ACPI Error: Could not remove SCI handler
(20160930/evmisc-281)
>[ 0.184068] kmem_cache_destroy Acpi-Operand: Slab cache still has
objects
>[ 0.185358] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-rc3 #2
>[ 0.186820] Hardware name: innotek gmb_h virtual_box/virtual_box, BIOS
virtual_box 12/01/2006
>[ 0.188000] Call Trace:
>[ 0.188000] ? dump_stack+0x5c/0x7d
>[ 0.188000] ? kmem_cache_destroy+0x224/0x230
>[ 0.188000] ? acpi_sleep_proc_init+0x22/0x22
>[ 0.188000] ? acpi_os_delete_cache+0xa/0xd
>[ 0.188000] ? acpi_ut_delete_caches+0x3f/0x7b
>[ 0.188000] ? acpi_terminate+0x5/0xf
>[ 0.188000] ? acpi_init+0x288/0x32e
>[ 0.188000] ? __class_create+0x4c/0x80
>[ 0.188000] ? video_setup+0x7a/0x7a
>[ 0.188000] ? do_one_initcall+0x4e/0x1b0
>[ 0.188000] ? kernel_init_freeable+0x194/0x21a
>[ 0.188000] ? rest_init+0x80/0x80
>[ 0.188000] ? kernel_init+0xa/0x100
>[ 0.188000] ? ret_from_fork+0x25/0x30
When early abort is occurred due to invalid ACPI information, Linux kernel
terminates ACPI by calling acpi_terminate() function. The function calls
acpi_ns_terminate() function to delete namespace data and ACPI operand cache
(acpi_gbl_module_code_list).
But the deletion code in acpi_ns_terminate() function is wrapped in
ACPI_EXEC_APP definition, therefore the code is only executed when the
definition exists. If the define doesn't exist, ACPI operand cache
(acpi_gbl_module_code_list) is leaked, and stack dump is shown in kernel log.
This causes a security threat because the old kernel (<= 4.9) shows memory
locations of kernel functions in stack dump, therefore kernel ASLR can be
neutralized.
To fix ACPI operand leak for enhancing security, I made a patch which
removes the ACPI_EXEC_APP define in acpi_ns_terminate() function for
executing the deletion code unconditionally.
Link: https://github.com/acpica/acpica/commit/a23325b2
Signed-off-by: Seunghun Han <kkamagui(a)gmail.com>
Signed-off-by: Lv Zheng <lv.zheng(a)intel.com>
Signed-off-by: Bob Moore <robert.moore(a)intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Acked-by: Lee, Chun-Yi <jlee(a)suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/acpi/acpica/nsutils.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -593,25 +593,20 @@ struct acpi_namespace_node *acpi_ns_vali
void acpi_ns_terminate(void)
{
acpi_status status;
+ union acpi_operand_object *prev;
+ union acpi_operand_object *next;
ACPI_FUNCTION_TRACE(ns_terminate);
-#ifdef ACPI_EXEC_APP
- {
- union acpi_operand_object *prev;
- union acpi_operand_object *next;
+ /* Delete any module-level code blocks */
- /* Delete any module-level code blocks */
-
- next = acpi_gbl_module_code_list;
- while (next) {
- prev = next;
- next = next->method.mutex;
- prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */
- acpi_ut_remove_reference(prev);
- }
+ next = acpi_gbl_module_code_list;
+ while (next) {
+ prev = next;
+ next = next->method.mutex;
+ prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */
+ acpi_ut_remove_reference(prev);
}
-#endif
/*
* Free the entire namespace -- all nodes and all objects
Patches currently in stable-queue which might be from kkamagui(a)gmail.com are
queue-4.4/acpica-namespace-fix-operand-cache-leak.patch