This reverts commit 2fd3e5efe791946be0957c8e1eed9560b541fe46.
The above commit replaces page_address(bv->bv_page) by bvec_virt(bv) to
avoid directly access to bv->bv_page, but in situation bv->bv_offset is
not zero and page_address(bv->bv_page) is not equal to bvec_virt(bv). In
such case a memory corruption may happen because memory in next page is
tainted by following line in do_btree_node_write(),
memcpy(bvec_virt(bv), addr, PAGE_SIZE);
This patch reverts the mentioned commit to avoid the memory corruption.
Fixes: 2fd3e5efe791 ("bcache: use bvec_virt")
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: stable(a)vger.kernel.org # 5.15
---
drivers/md/bcache/btree.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 93b67b8d31c3..88c573eeb598 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -378,7 +378,7 @@ static void do_btree_node_write(struct btree *b)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bv, b->bio, iter_all) {
- memcpy(bvec_virt(bv), addr, PAGE_SIZE);
+ memcpy(page_address(bv->bv_page), addr, PAGE_SIZE);
addr += PAGE_SIZE;
}
--
2.31.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: [PATCH] binder: use cred instead of task for getsecid
Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.
Cc: stable(a)vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
u32 secid;
size_t added_size;
- /*
- * Arguably this should be the task's subjective LSM secid but
- * we can't reliably access the subjective creds of a task
- * other than our own so we must use the objective creds, which
- * are safe to access. The downside is that if a task is
- * temporarily overriding it's creds it will not be reflected
- * here; however, it isn't clear that binder would handle that
- * case well anyway.
- */
- security_task_getsecid_obj(proc->tsk, &secid);
+ security_cred_getsecid(proc->cred, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
if (ret) {
return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
{
}
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+ *secid = 0;
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: [PATCH] binder: use cred instead of task for getsecid
Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.
Cc: stable(a)vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
u32 secid;
size_t added_size;
- /*
- * Arguably this should be the task's subjective LSM secid but
- * we can't reliably access the subjective creds of a task
- * other than our own so we must use the objective creds, which
- * are safe to access. The downside is that if a task is
- * temporarily overriding it's creds it will not be reflected
- * here; however, it isn't clear that binder would handle that
- * case well anyway.
- */
- security_task_getsecid_obj(proc->tsk, &secid);
+ security_cred_getsecid(proc->cred, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
if (ret) {
return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
{
}
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+ *secid = 0;
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: [PATCH] binder: use cred instead of task for getsecid
Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.
Cc: stable(a)vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
u32 secid;
size_t added_size;
- /*
- * Arguably this should be the task's subjective LSM secid but
- * we can't reliably access the subjective creds of a task
- * other than our own so we must use the objective creds, which
- * are safe to access. The downside is that if a task is
- * temporarily overriding it's creds it will not be reflected
- * here; however, it isn't clear that binder would handle that
- * case well anyway.
- */
- security_task_getsecid_obj(proc->tsk, &secid);
+ security_cred_getsecid(proc->cred, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
if (ret) {
return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
{
}
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+ *secid = 0;
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: [PATCH] binder: use cred instead of task for getsecid
Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.
Cc: stable(a)vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
u32 secid;
size_t added_size;
- /*
- * Arguably this should be the task's subjective LSM secid but
- * we can't reliably access the subjective creds of a task
- * other than our own so we must use the objective creds, which
- * are safe to access. The downside is that if a task is
- * temporarily overriding it's creds it will not be reflected
- * here; however, it isn't clear that binder would handle that
- * case well anyway.
- */
- security_task_getsecid_obj(proc->tsk, &secid);
+ security_cred_getsecid(proc->cred, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
if (ret) {
return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
{
}
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+ *secid = 0;
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: [PATCH] binder: use cred instead of task for getsecid
Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.
Cc: stable(a)vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
u32 secid;
size_t added_size;
- /*
- * Arguably this should be the task's subjective LSM secid but
- * we can't reliably access the subjective creds of a task
- * other than our own so we must use the objective creds, which
- * are safe to access. The downside is that if a task is
- * temporarily overriding it's creds it will not be reflected
- * here; however, it isn't clear that binder would handle that
- * case well anyway.
- */
- security_task_getsecid_obj(proc->tsk, &secid);
+ security_cred_getsecid(proc->cred, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
if (ret) {
return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
{
}
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+ *secid = 0;
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: [PATCH] binder: use cred instead of task for getsecid
Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.
Cc: stable(a)vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
u32 secid;
size_t added_size;
- /*
- * Arguably this should be the task's subjective LSM secid but
- * we can't reliably access the subjective creds of a task
- * other than our own so we must use the objective creds, which
- * are safe to access. The downside is that if a task is
- * temporarily overriding it's creds it will not be reflected
- * here; however, it isn't clear that binder would handle that
- * case well anyway.
- */
- security_task_getsecid_obj(proc->tsk, &secid);
+ security_cred_getsecid(proc->cred, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
if (ret) {
return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
{
}
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+ *secid = 0;
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29bc22ac5e5bc63275e850f0c8fc549e3d0e306b Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:12 -0700
Subject: [PATCH] binder: use euid from cred instead of using task
Save the 'struct cred' associated with a binder process
at initial open to avoid potential race conditions
when converting to an euid.
Set a transaction's sender_euid from the 'struct cred'
saved at binder_open() instead of looking up the euid
from the binder proc's 'struct task'. This ensures
the euid is associated with the security context that
of the task that opened binder.
Cc: stable(a)vger.kernel.org # 4.4+
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Suggested-by: Jann Horn <jannh(a)google.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d9030cb6b1e4..231cff9b3b75 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2702,7 +2702,7 @@ static void binder_transaction(struct binder_proc *proc,
t->from = thread;
else
t->from = NULL;
- t->sender_euid = task_euid(proc->tsk);
+ t->sender_euid = proc->cred->euid;
t->to_proc = target_proc;
t->to_thread = target_thread;
t->code = tr->code;
@@ -4343,6 +4343,7 @@ static void binder_free_proc(struct binder_proc *proc)
}
binder_alloc_deferred_release(&proc->alloc);
put_task_struct(proc->tsk);
+ put_cred(proc->cred);
binder_stats_deleted(BINDER_STAT_PROC);
kfree(proc);
}
@@ -5021,6 +5022,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
spin_lock_init(&proc->outer_lock);
get_task_struct(current->group_leader);
proc->tsk = current->group_leader;
+ proc->cred = get_cred(filp->f_cred);
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->freeze_wait);
proc->default_priority = task_nice(current);
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 810c0b84d3f8..e7d4920b3368 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -364,6 +364,9 @@ struct binder_ref {
* (invariant after initialized)
* @tsk task_struct for group_leader of process
* (invariant after initialized)
+ * @cred struct cred associated with the `struct file`
+ * in binder_open()
+ * (invariant after initialized)
* @deferred_work_node: element for binder_deferred_list
* (protected by binder_deferred_lock)
* @deferred_work: bitmap of deferred work to perform
@@ -424,6 +427,7 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
+ const struct cred *cred;
struct hlist_node deferred_work_node;
int deferred_work;
int outstanding_txns;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29bc22ac5e5bc63275e850f0c8fc549e3d0e306b Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:12 -0700
Subject: [PATCH] binder: use euid from cred instead of using task
Save the 'struct cred' associated with a binder process
at initial open to avoid potential race conditions
when converting to an euid.
Set a transaction's sender_euid from the 'struct cred'
saved at binder_open() instead of looking up the euid
from the binder proc's 'struct task'. This ensures
the euid is associated with the security context that
of the task that opened binder.
Cc: stable(a)vger.kernel.org # 4.4+
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Suggested-by: Jann Horn <jannh(a)google.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d9030cb6b1e4..231cff9b3b75 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2702,7 +2702,7 @@ static void binder_transaction(struct binder_proc *proc,
t->from = thread;
else
t->from = NULL;
- t->sender_euid = task_euid(proc->tsk);
+ t->sender_euid = proc->cred->euid;
t->to_proc = target_proc;
t->to_thread = target_thread;
t->code = tr->code;
@@ -4343,6 +4343,7 @@ static void binder_free_proc(struct binder_proc *proc)
}
binder_alloc_deferred_release(&proc->alloc);
put_task_struct(proc->tsk);
+ put_cred(proc->cred);
binder_stats_deleted(BINDER_STAT_PROC);
kfree(proc);
}
@@ -5021,6 +5022,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
spin_lock_init(&proc->outer_lock);
get_task_struct(current->group_leader);
proc->tsk = current->group_leader;
+ proc->cred = get_cred(filp->f_cred);
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->freeze_wait);
proc->default_priority = task_nice(current);
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 810c0b84d3f8..e7d4920b3368 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -364,6 +364,9 @@ struct binder_ref {
* (invariant after initialized)
* @tsk task_struct for group_leader of process
* (invariant after initialized)
+ * @cred struct cred associated with the `struct file`
+ * in binder_open()
+ * (invariant after initialized)
* @deferred_work_node: element for binder_deferred_list
* (protected by binder_deferred_lock)
* @deferred_work: bitmap of deferred work to perform
@@ -424,6 +427,7 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
+ const struct cred *cred;
struct hlist_node deferred_work_node;
int deferred_work;
int outstanding_txns;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29bc22ac5e5bc63275e850f0c8fc549e3d0e306b Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:12 -0700
Subject: [PATCH] binder: use euid from cred instead of using task
Save the 'struct cred' associated with a binder process
at initial open to avoid potential race conditions
when converting to an euid.
Set a transaction's sender_euid from the 'struct cred'
saved at binder_open() instead of looking up the euid
from the binder proc's 'struct task'. This ensures
the euid is associated with the security context that
of the task that opened binder.
Cc: stable(a)vger.kernel.org # 4.4+
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Suggested-by: Jann Horn <jannh(a)google.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d9030cb6b1e4..231cff9b3b75 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2702,7 +2702,7 @@ static void binder_transaction(struct binder_proc *proc,
t->from = thread;
else
t->from = NULL;
- t->sender_euid = task_euid(proc->tsk);
+ t->sender_euid = proc->cred->euid;
t->to_proc = target_proc;
t->to_thread = target_thread;
t->code = tr->code;
@@ -4343,6 +4343,7 @@ static void binder_free_proc(struct binder_proc *proc)
}
binder_alloc_deferred_release(&proc->alloc);
put_task_struct(proc->tsk);
+ put_cred(proc->cred);
binder_stats_deleted(BINDER_STAT_PROC);
kfree(proc);
}
@@ -5021,6 +5022,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
spin_lock_init(&proc->outer_lock);
get_task_struct(current->group_leader);
proc->tsk = current->group_leader;
+ proc->cred = get_cred(filp->f_cred);
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->freeze_wait);
proc->default_priority = task_nice(current);
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 810c0b84d3f8..e7d4920b3368 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -364,6 +364,9 @@ struct binder_ref {
* (invariant after initialized)
* @tsk task_struct for group_leader of process
* (invariant after initialized)
+ * @cred struct cred associated with the `struct file`
+ * in binder_open()
+ * (invariant after initialized)
* @deferred_work_node: element for binder_deferred_list
* (protected by binder_deferred_lock)
* @deferred_work: bitmap of deferred work to perform
@@ -424,6 +427,7 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
+ const struct cred *cred;
struct hlist_node deferred_work_node;
int deferred_work;
int outstanding_txns;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29bc22ac5e5bc63275e850f0c8fc549e3d0e306b Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:12 -0700
Subject: [PATCH] binder: use euid from cred instead of using task
Save the 'struct cred' associated with a binder process
at initial open to avoid potential race conditions
when converting to an euid.
Set a transaction's sender_euid from the 'struct cred'
saved at binder_open() instead of looking up the euid
from the binder proc's 'struct task'. This ensures
the euid is associated with the security context that
of the task that opened binder.
Cc: stable(a)vger.kernel.org # 4.4+
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Suggested-by: Jann Horn <jannh(a)google.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d9030cb6b1e4..231cff9b3b75 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2702,7 +2702,7 @@ static void binder_transaction(struct binder_proc *proc,
t->from = thread;
else
t->from = NULL;
- t->sender_euid = task_euid(proc->tsk);
+ t->sender_euid = proc->cred->euid;
t->to_proc = target_proc;
t->to_thread = target_thread;
t->code = tr->code;
@@ -4343,6 +4343,7 @@ static void binder_free_proc(struct binder_proc *proc)
}
binder_alloc_deferred_release(&proc->alloc);
put_task_struct(proc->tsk);
+ put_cred(proc->cred);
binder_stats_deleted(BINDER_STAT_PROC);
kfree(proc);
}
@@ -5021,6 +5022,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
spin_lock_init(&proc->outer_lock);
get_task_struct(current->group_leader);
proc->tsk = current->group_leader;
+ proc->cred = get_cred(filp->f_cred);
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->freeze_wait);
proc->default_priority = task_nice(current);
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 810c0b84d3f8..e7d4920b3368 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -364,6 +364,9 @@ struct binder_ref {
* (invariant after initialized)
* @tsk task_struct for group_leader of process
* (invariant after initialized)
+ * @cred struct cred associated with the `struct file`
+ * in binder_open()
+ * (invariant after initialized)
* @deferred_work_node: element for binder_deferred_list
* (protected by binder_deferred_lock)
* @deferred_work: bitmap of deferred work to perform
@@ -424,6 +427,7 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
+ const struct cred *cred;
struct hlist_node deferred_work_node;
int deferred_work;
int outstanding_txns;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29bc22ac5e5bc63275e850f0c8fc549e3d0e306b Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos(a)google.com>
Date: Tue, 12 Oct 2021 09:56:12 -0700
Subject: [PATCH] binder: use euid from cred instead of using task
Save the 'struct cred' associated with a binder process
at initial open to avoid potential race conditions
when converting to an euid.
Set a transaction's sender_euid from the 'struct cred'
saved at binder_open() instead of looking up the euid
from the binder proc's 'struct task'. This ensures
the euid is associated with the security context that
of the task that opened binder.
Cc: stable(a)vger.kernel.org # 4.4+
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
Suggested-by: Jann Horn <jannh(a)google.com>
Acked-by: Casey Schaufler <casey(a)schaufler-ca.com>
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d9030cb6b1e4..231cff9b3b75 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2702,7 +2702,7 @@ static void binder_transaction(struct binder_proc *proc,
t->from = thread;
else
t->from = NULL;
- t->sender_euid = task_euid(proc->tsk);
+ t->sender_euid = proc->cred->euid;
t->to_proc = target_proc;
t->to_thread = target_thread;
t->code = tr->code;
@@ -4343,6 +4343,7 @@ static void binder_free_proc(struct binder_proc *proc)
}
binder_alloc_deferred_release(&proc->alloc);
put_task_struct(proc->tsk);
+ put_cred(proc->cred);
binder_stats_deleted(BINDER_STAT_PROC);
kfree(proc);
}
@@ -5021,6 +5022,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
spin_lock_init(&proc->outer_lock);
get_task_struct(current->group_leader);
proc->tsk = current->group_leader;
+ proc->cred = get_cred(filp->f_cred);
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->freeze_wait);
proc->default_priority = task_nice(current);
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 810c0b84d3f8..e7d4920b3368 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -364,6 +364,9 @@ struct binder_ref {
* (invariant after initialized)
* @tsk task_struct for group_leader of process
* (invariant after initialized)
+ * @cred struct cred associated with the `struct file`
+ * in binder_open()
+ * (invariant after initialized)
* @deferred_work_node: element for binder_deferred_list
* (protected by binder_deferred_lock)
* @deferred_work: bitmap of deferred work to perform
@@ -424,6 +427,7 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
+ const struct cred *cred;
struct hlist_node deferred_work_node;
int deferred_work;
int outstanding_txns;
The SGX driver maintains a single global free page counter,
sgx_nr_free_pages, that reflects the number of free pages available
across all NUMA nodes. Correspondingly, a list of free pages is
associated with each NUMA node and sgx_nr_free_pages is updated
every time a page is added or removed from any of the free page
lists. The main usage of sgx_nr_free_pages is by the reclaimer
that will run when the total free pages go below a watermark to
ensure that there are always some free pages available to, for
example, support efficient page faults.
With sgx_nr_free_pages accessed and modified from a few places
it is essential to ensure that these accesses are done safely but
this is not the case. sgx_nr_free_pages is sometimes accessed
without any protection and when it is protected it is done
inconsistently with any one of the spin locks associated with the
individual NUMA nodes.
The consequence of sgx_nr_free_pages not being protected is that
its value may not accurately reflect the actual number of free
pages on the system, impacting the availability of free pages in
support of many flows. The problematic scenario is when the
reclaimer never runs because it believes there to be sufficient
free pages while any attempt to allocate a page fails because there
are no free pages available. The worst scenario observed was a
user space hang because of repeated page faults caused by
no free pages ever made available.
Change the global free page counter to an atomic type that
ensures simultaneous updates are done safely. While doing so, move
the updating of the variable outside of the spin lock critical
section to which it does not belong.
Cc: stable(a)vger.kernel.org
Fixes: 901ddbb9ecf5 ("x86/sgx: Add a basic NUMA allocation scheme to sgx_alloc_epc_page()")
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre(a)intel.com>
---
arch/x86/kernel/cpu/sgx/main.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 63d3de02bbcc..8558d7d5f3e7 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
static LIST_HEAD(sgx_active_page_list);
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
-/* The free page list lock protected variables prepend the lock. */
-static unsigned long sgx_nr_free_pages;
+atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
/* Nodes with one or more EPC sections. */
static nodemask_t sgx_numa_mask;
@@ -403,14 +402,15 @@ static void sgx_reclaim_pages(void)
spin_lock(&node->lock);
list_add_tail(&epc_page->list, &node->free_page_list);
- sgx_nr_free_pages++;
spin_unlock(&node->lock);
+ atomic_long_inc(&sgx_nr_free_pages);
}
}
static bool sgx_should_reclaim(unsigned long watermark)
{
- return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
+ return atomic_long_read(&sgx_nr_free_pages) < watermark &&
+ !list_empty(&sgx_active_page_list);
}
static int ksgxd(void *p)
@@ -471,9 +471,9 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
list_del_init(&page->list);
- sgx_nr_free_pages--;
spin_unlock(&node->lock);
+ atomic_long_dec(&sgx_nr_free_pages);
return page;
}
@@ -625,9 +625,9 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
spin_lock(&node->lock);
list_add_tail(&page->list, &node->free_page_list);
- sgx_nr_free_pages++;
spin_unlock(&node->lock);
+ atomic_long_inc(&sgx_nr_free_pages);
}
static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
--
2.25.1
The patch titled
Subject: shm: extend forced shm destroy to support objects from several IPC nses
has been added to the -mm tree. Its filename is
shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses-simplified.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/shm-extend-forced-shm-destroy-to-…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/shm-extend-forced-shm-destroy-to-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Alexander Mikhalitsyn <alexander.mikhalitsyn(a)virtuozzo.com>
Subject: shm: extend forced shm destroy to support objects from several IPC nses
Currently, exit_shm function not designed to work properly when
task->sysvshm.shm_clist holds shm objects from different IPC namespaces.
This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
leads to use-after-free (reproducer exists).
That particular patch is attempt to fix the problem by extending exit_shm
mechanism to handle shm's destroy from several IPC ns'es.
To achieve that we do several things:
1. add namespace (non-refcounted) pointer to the struct shmid_kernel
2. during new shm object creation (newseg()/shmget syscall) we
initialize this pointer by current task IPC ns
3. exit_shm() fully reworked such that it traverses over all shp's in
task->sysvshm.shm_clist and gets IPC namespace not from current task as
it was before but from shp's object itself, then call shm_destroy(shp,
ns).
Note. We need to be really careful here, because as it was said before
(1), our pointer to IPC ns non-refcnt'ed. To be on the safe side we using
special helper get_ipc_ns_not_zero() which allows to get IPC ns refcounter
only if IPC ns not in the "state of destruction".
Q/A
Q: Why we can access shp->ns memory using non-refcounted pointer?
A: Because shp object lifetime is always shorther
than IPC namespace lifetime, so, if we get shp object from the
task->sysvshm.shm_clist while holding task_lock(task) nobody can
steal our namespace.
Q: Does this patch change semantics of unshare/setns/clone syscalls?
A: Not. It's just fixes non-covered case when process may leave
IPC namespace without getting task->sysvshm.shm_clist list cleaned up.
Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife…
Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity")
Co-developed-by: Manfred Spraul <manfred(a)colorfullife.com>
Signed-off-by: Manfred Spraul <manfred(a)colorfullife.com>
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn(a)virtuozzo.com>
Cc: "Eric W. Biederman" <ebiederm(a)xmission.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov(a)virtuozzo.com>
Cc: Vasily Averin <vvs(a)virtuozzo.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/ipc_namespace.h | 15 ++
include/linux/sched/task.h | 2
ipc/shm.c | 189 ++++++++++++++++++++++++--------
3 files changed, 159 insertions(+), 47 deletions(-)
--- a/include/linux/ipc_namespace.h~shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses-simplified
+++ a/include/linux/ipc_namespace.h
@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_
return ns;
}
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+ if (ns) {
+ if (refcount_inc_not_zero(&ns->ns.count))
+ return ns;
+ }
+
+ return NULL;
+}
+
extern void put_ipc_ns(struct ipc_namespace *ns);
#else
static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -146,6 +156,11 @@ static inline struct ipc_namespace *get_
{
return ns;
}
+
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+ return ns;
+}
static inline void put_ipc_ns(struct ipc_namespace *ns)
{
--- a/include/linux/sched/task.h~shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses-simplified
+++ a/include/linux/sched/task.h
@@ -157,7 +157,7 @@ static inline struct vm_struct *task_sta
* Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
* subscriptions and synchronises with wait4(). Also used in procfs. Also
* pins the final release of task.io_context. Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
+ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
*
* Nests both inside and outside of read_lock(&tasklist_lock).
* It must not be nested with write_lock_irq(&tasklist_lock),
--- a/ipc/shm.c~shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses-simplified
+++ a/ipc/shm.c
@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the ke
struct pid *shm_lprid;
struct ucounts *mlock_ucounts;
- /* The task created the shm object. NULL if the task is dead. */
+ /*
+ * The task created the shm object, for
+ * task_lock(shp->shm_creator)
+ */
struct task_struct *shm_creator;
- struct list_head shm_clist; /* list by creator */
+
+ /*
+ * List by creator. task_lock(->shm_creator) required for read/write.
+ * If list_empty(), then the creator is dead already.
+ */
+ struct list_head shm_clist;
+ struct ipc_namespace *ns;
} __randomize_layout;
/* shm_mode upper byte flags */
@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_names
struct shmid_kernel *shp;
shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+ WARN_ON(ns != shp->ns);
if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST;
@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head
kfree(shp);
}
-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+/*
+ * It has to be called with shp locked.
+ * It must be called before ipc_rmid()
+ */
+static inline void shm_clist_rm(struct shmid_kernel *shp)
+{
+ struct task_struct *creator;
+
+ /* ensure that shm_creator does not disappear */
+ rcu_read_lock();
+
+ /*
+ * A concurrent exit_shm may do a list_del_init() as well.
+ * Just do nothing if exit_shm already did the work
+ */
+ if (!list_empty(&shp->shm_clist)) {
+ /*
+ * shp->shm_creator is guaranteed to be valid *only*
+ * if shp->shm_clist is not empty.
+ */
+ creator = shp->shm_creator;
+
+ task_lock(creator);
+ /*
+ * list_del_init() is a nop if the entry was already removed
+ * from the list.
+ */
+ list_del_init(&shp->shm_clist);
+ task_unlock(creator);
+ }
+ rcu_read_unlock();
+}
+
+static inline void shm_rmid(struct shmid_kernel *s)
{
- list_del(&s->shm_clist);
- ipc_rmid(&shm_ids(ns), &s->shm_perm);
+ shm_clist_rm(s);
+ ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
}
@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_names
shm_file = shp->shm_file;
shp->shm_file = NULL;
ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
- shm_rmid(ns, shp);
+ shm_rmid(shp);
shm_unlock(shp);
if (!is_file_hugepages(shm_file))
shmem_lock(shm_file, 0, shp->mlock_ucounts);
@@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_names
*
* 2) sysctl kernel.shm_rmid_forced is set to 1.
*/
-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static bool shm_may_destroy(struct shmid_kernel *shp)
{
return (shp->shm_nattch == 0) &&
- (ns->shm_rmid_forced ||
+ (shp->ns->shm_rmid_forced ||
(shp->shm_perm.mode & SHM_DEST));
}
@@ -340,7 +383,7 @@ static void shm_close(struct vm_area_str
ipc_update_pid(&shp->shm_lprid, task_tgid(current));
shp->shm_dtim = ktime_get_real_seconds();
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
@@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int
*
* As shp->* are changed under rwsem, it's safe to skip shp locking.
*/
- if (shp->shm_creator != NULL)
+ if (!list_empty(&shp->shm_clist))
return 0;
- if (shm_may_destroy(ns, shp)) {
+ if (shm_may_destroy(shp)) {
shm_lock_by_ptr(shp);
shm_destroy(ns, shp);
}
@@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_nam
/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
+ for (;;) {
+ struct shmid_kernel *shp;
+ struct ipc_namespace *ns;
- if (list_empty(&task->sysvshm.shm_clist))
- return;
+ task_lock(task);
+
+ if (list_empty(&task->sysvshm.shm_clist)) {
+ task_unlock(task);
+ break;
+ }
+
+ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
+ shm_clist);
- /*
- * If kernel.shm_rmid_forced is not set then only keep track of
- * which shmids are orphaned, so that a later set of the sysctl
- * can clean them up.
- */
- if (!ns->shm_rmid_forced) {
- down_read(&shm_ids(ns).rwsem);
- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
- shp->shm_creator = NULL;
/*
- * Only under read lock but we are only called on current
- * so no entry on the list will be shared.
+ * 1) Get pointer to the ipc namespace. It is worth to say
+ * that this pointer is guaranteed to be valid because
+ * shp lifetime is always shorter than namespace lifetime
+ * in which shp lives.
+ * We taken task_lock it means that shp won't be freed.
*/
- list_del(&task->sysvshm.shm_clist);
- up_read(&shm_ids(ns).rwsem);
- return;
- }
+ ns = shp->ns;
- /*
- * Destroy all already created segments, that were not yet mapped,
- * and mark any mapped as orphan to cover the sysctl toggling.
- * Destroy is skipped if shm_may_destroy() returns false.
- */
- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
- shp->shm_creator = NULL;
+ /*
+ * 2) If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced)
+ goto unlink_continue;
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
+ /*
+ * 3) get a reference to the namespace.
+ * The refcount could be already 0. If it is 0, then
+ * the shm objects will be free by free_ipc_work().
+ */
+ ns = get_ipc_ns_not_zero(ns);
+ if (!ns) {
+unlink_continue:
+ list_del_init(&shp->shm_clist);
+ task_unlock(task);
+ continue;
}
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
+ /*
+ * 4) get a reference to shp.
+ * This cannot fail: shm_clist_rm() is called before
+ * ipc_rmid(), thus the refcount cannot be 0.
+ */
+ WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
+
+ /*
+ * 5) unlink the shm segment from the list of segments
+ * created by current.
+ * This must be done last. After unlinking,
+ * only the refcounts obtained above prevent IPC_RMID
+ * from destroying the segment or the namespace.
+ */
+ list_del_init(&shp->shm_clist);
+
+ task_unlock(task);
+
+ /*
+ * 6) we have all references
+ * Thus lock & if needed destroy shp.
+ */
+ down_write(&shm_ids(ns).rwsem);
+ shm_lock_by_ptr(shp);
+ /*
+ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
+ * safe to call ipc_rcu_putref here
+ */
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+
+ if (ipc_valid_object(&shp->shm_perm)) {
+ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+ } else {
+ /*
+ * Someone else deleted the shp from namespace
+ * idr/kht while we have waited.
+ * Just unlock and continue.
+ */
+ shm_unlock(shp);
+ }
+
+ up_write(&shm_ids(ns).rwsem);
+ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
+ }
}
static vm_fault_t shm_fault(struct vm_fault *vmf)
@@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *
if (error < 0)
goto no_id;
+ shp->ns = ns;
+
+ task_lock(current);
list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist);
+ task_unlock(current);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
@@ -1573,7 +1669,8 @@ out_nattch:
down_write(&shm_ids(ns).rwsem);
shp = shm_lock(ns, shmid);
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
_
Patches currently in -mm which might be from alexander.mikhalitsyn(a)virtuozzo.com are
ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch
shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses-simplified.patch
The backing memory was not freed, after loading it back to the SGX
reserved memory. This problem was not prevalent with the systems that
were available at the time when SGX was first upstreamed, as the swapped
memory could grow only up to 128 MB. However, Icelake Server can have
gigabytes of SGX memory, and thus this has become a real bottleneck.
Free the swap space for other use by calling shmem_truncate_range(),
when a page is faulted back.
Cc: stable(a)vger.kernel.org
Fixes: 1728ab54b4be ("x86/sgx: Add a page reclaimer")
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
arch/x86/kernel/cpu/sgx/encl.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 001808e3901c..f2d3f2e5028f 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -22,6 +22,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
{
unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
struct sgx_encl *encl = encl_page->encl;
+ struct inode *inode = file_inode(encl->backing);
struct sgx_pageinfo pginfo;
struct sgx_backing b;
pgoff_t page_index;
@@ -60,6 +61,9 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
sgx_encl_put_backing(&b, false);
+ /* Free the backing memory. */
+ shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
+
return ret;
}
--
2.32.0
The patch titled
Subject: shm: extend forced shm destroy to support objects from several IPC nses
has been added to the -mm tree. Its filename is
shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/shm-extend-forced-shm-destroy-to-…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/shm-extend-forced-shm-destroy-to-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Alexander Mikhalitsyn <alexander.mikhalitsyn(a)virtuozzo.com>
Subject: shm: extend forced shm destroy to support objects from several IPC nses
Currently, exit_shm function not designed to work properly when
task->sysvshm.shm_clist holds shm objects from different IPC namespaces.
This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
leads to use-after-free (reproducer exists).
That particular patch is attempt to fix the problem by extending exit_shm
mechanism to handle shm's destroy from several IPC ns'es.
To achieve that we do several things:
1. add namespace (non-refcounted) pointer to the struct shmid_kernel
2. during new shm object creation (newseg()/shmget syscall) we
initialize this pointer by current task IPC ns
3. exit_shm() fully reworked such that it traverses over all shp's in
task->sysvshm.shm_clist and gets IPC namespace not from current task as
it was before but from shp's object itself, then call shm_destroy(shp,
ns).
Note. We need to be really careful here, because as it was said before
(1), our pointer to IPC ns non-refcnt'ed. To be on the safe side we using
special helper get_ipc_ns_not_zero() which allows to get IPC ns refcounter
only if IPC ns not in the "state of destruction".
Q/A
Q: Why we can access shp->ns memory using non-refcounted pointer?
A: Because shp object lifetime is always shorther
than IPC namespace lifetime, so, if we get shp object from the
task->sysvshm.shm_clist while holding task_lock(task) nobody can
steal our namespace.
Q: Does this patch change semantics of unshare/setns/clone syscalls?
A: Not. It's just fixes non-covered case when process may leave
IPC namespace without getting task->sysvshm.shm_clist list cleaned up.
Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity")
Co-developed-by: Manfred Spraul <manfred(a)colorfullife.com>
Signed-off-by: Manfred Spraul <manfred(a)colorfullife.com>
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn(a)virtuozzo.com>
Cc: "Eric W. Biederman" <ebiederm(a)xmission.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov(a)virtuozzo.com>
Cc: Vasily Averin <vvs(a)virtuozzo.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/ipc_namespace.h | 15 ++
include/linux/sched/task.h | 2
ipc/shm.c | 186 ++++++++++++++++++++++++--------
3 files changed, 156 insertions(+), 47 deletions(-)
--- a/include/linux/ipc_namespace.h~shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses
+++ a/include/linux/ipc_namespace.h
@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_
return ns;
}
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+ if (ns) {
+ if (refcount_inc_not_zero(&ns->ns.count))
+ return ns;
+ }
+
+ return NULL;
+}
+
extern void put_ipc_ns(struct ipc_namespace *ns);
#else
static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -146,6 +156,11 @@ static inline struct ipc_namespace *get_
{
return ns;
}
+
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+ return ns;
+}
static inline void put_ipc_ns(struct ipc_namespace *ns)
{
--- a/include/linux/sched/task.h~shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses
+++ a/include/linux/sched/task.h
@@ -157,7 +157,7 @@ static inline struct vm_struct *task_sta
* Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
* subscriptions and synchronises with wait4(). Also used in procfs. Also
* pins the final release of task.io_context. Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
+ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
*
* Nests both inside and outside of read_lock(&tasklist_lock).
* It must not be nested with write_lock_irq(&tasklist_lock),
--- a/ipc/shm.c~shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses
+++ a/ipc/shm.c
@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the ke
struct pid *shm_lprid;
struct ucounts *mlock_ucounts;
- /* The task created the shm object. NULL if the task is dead. */
+ /*
+ * The task created the shm object, for
+ * task_lock(shp->shm_creator)
+ */
struct task_struct *shm_creator;
- struct list_head shm_clist; /* list by creator */
+
+ /*
+ * List by creator. task_lock(->shm_creator) required for read/write.
+ * If list_empty(), then the creator is dead already.
+ */
+ struct list_head shm_clist;
+ struct ipc_namespace *ns;
} __randomize_layout;
/* shm_mode upper byte flags */
@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_names
struct shmid_kernel *shp;
shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+ WARN_ON(ns != shp->ns);
if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST;
@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head
kfree(shp);
}
-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+/*
+ * It has to be called with shp locked.
+ * It must be called before ipc_rmid()
+ */
+static inline void shm_clist_rm(struct shmid_kernel *shp)
{
- list_del(&s->shm_clist);
- ipc_rmid(&shm_ids(ns), &s->shm_perm);
+ struct task_struct *creator;
+
+ /* ensure that shm_creator does not disappear */
+ rcu_read_lock();
+
+ /*
+ * A concurrent exit_shm may do a list_del_init() as well.
+ * Just do nothing if exit_shm already did the work
+ */
+ if (!list_empty(&shp->shm_clist)) {
+ /*
+ * shp->shm_creator is guaranteed to be valid *only*
+ * if shp->shm_clist is not empty.
+ */
+ creator = shp->shm_creator;
+
+ task_lock(creator);
+ /*
+ * list_del_init() is a nop if the entry was already removed
+ * from the list.
+ */
+ list_del_init(&shp->shm_clist);
+ task_unlock(creator);
+ }
+ rcu_read_unlock();
+}
+
+static inline void shm_rmid(struct shmid_kernel *s)
+{
+ shm_clist_rm(s);
+ ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
}
@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_names
shm_file = shp->shm_file;
shp->shm_file = NULL;
ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
- shm_rmid(ns, shp);
+ shm_rmid(shp);
shm_unlock(shp);
if (!is_file_hugepages(shm_file))
shmem_lock(shm_file, 0, shp->mlock_ucounts);
@@ -303,10 +346,10 @@ static void shm_destroy(struct ipc_names
*
* 2) sysctl kernel.shm_rmid_forced is set to 1.
*/
-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static bool shm_may_destroy(struct shmid_kernel *shp)
{
return (shp->shm_nattch == 0) &&
- (ns->shm_rmid_forced ||
+ (shp->ns->shm_rmid_forced ||
(shp->shm_perm.mode & SHM_DEST));
}
@@ -337,7 +380,7 @@ static void shm_close(struct vm_area_str
ipc_update_pid(&shp->shm_lprid, task_tgid(current));
shp->shm_dtim = ktime_get_real_seconds();
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
@@ -358,10 +401,10 @@ static int shm_try_destroy_orphaned(int
*
* As shp->* are changed under rwsem, it's safe to skip shp locking.
*/
- if (shp->shm_creator != NULL)
+ if (!list_empty(&shp->shm_clist))
return 0;
- if (shm_may_destroy(ns, shp)) {
+ if (shm_may_destroy(shp)) {
shm_lock_by_ptr(shp);
shm_destroy(ns, shp);
}
@@ -379,48 +422,94 @@ void shm_destroy_orphaned(struct ipc_nam
/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
+ for (;;) {
+ struct shmid_kernel *shp;
+ struct ipc_namespace *ns;
- if (list_empty(&task->sysvshm.shm_clist))
- return;
+ task_lock(task);
+
+ if (list_empty(&task->sysvshm.shm_clist)) {
+ task_unlock(task);
+ break;
+ }
+
+ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
+ shm_clist);
- /*
- * If kernel.shm_rmid_forced is not set then only keep track of
- * which shmids are orphaned, so that a later set of the sysctl
- * can clean them up.
- */
- if (!ns->shm_rmid_forced) {
- down_read(&shm_ids(ns).rwsem);
- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
- shp->shm_creator = NULL;
/*
- * Only under read lock but we are only called on current
- * so no entry on the list will be shared.
+ * 1) get a reference to shp.
+ * This must be done first: Right now, task_lock() prevents
+ * any concurrent IPC_RMID calls. After the list_del_init(),
+ * IPC_RMID will not acquire task_lock(->shm_creator)
+ * anymore.
*/
- list_del(&task->sysvshm.shm_clist);
- up_read(&shm_ids(ns).rwsem);
- return;
- }
+ WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
- /*
- * Destroy all already created segments, that were not yet mapped,
- * and mark any mapped as orphan to cover the sysctl toggling.
- * Destroy is skipped if shm_may_destroy() returns false.
- */
- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
- shp->shm_creator = NULL;
+ /* 2) unlink */
+ list_del_init(&shp->shm_clist);
+
+ /*
+ * 3) Get pointer to the ipc namespace. It is worth to say
+ * that this pointer is guaranteed to be valid because
+ * shp lifetime is always shorter than namespace lifetime
+ * in which shp lives.
+ * We taken task_lock it means that shp won't be freed.
+ */
+ ns = shp->ns;
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
+ /*
+ * 4) If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced) {
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+ task_unlock(task);
+ continue;
}
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
+ /*
+ * 5) get a reference to the namespace.
+ * The refcount could be already 0. If it is 0, then
+ * the shm objects will be free by free_ipc_work().
+ */
+ ns = get_ipc_ns_not_zero(ns);
+ if (!ns) {
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+ task_unlock(task);
+ continue;
+ }
+ task_unlock(task);
+
+ /*
+ * 6) we have all references
+ * Thus lock & if needed destroy shp.
+ */
+ down_write(&shm_ids(ns).rwsem);
+ shm_lock_by_ptr(shp);
+ /*
+ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
+ * safe to call ipc_rcu_putref here
+ */
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+
+ if (ipc_valid_object(&shp->shm_perm)) {
+ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+ } else {
+ /*
+ * Someone else deleted the shp from namespace
+ * idr/kht while we have waited.
+ * Just unlock and continue.
+ */
+ shm_unlock(shp);
+ }
+
+ up_write(&shm_ids(ns).rwsem);
+ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
+ }
}
static vm_fault_t shm_fault(struct vm_fault *vmf)
@@ -676,7 +765,11 @@ static int newseg(struct ipc_namespace *
if (error < 0)
goto no_id;
+ shp->ns = ns;
+
+ task_lock(current);
list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist);
+ task_unlock(current);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
@@ -1567,7 +1660,8 @@ out_nattch:
down_write(&shm_ids(ns).rwsem);
shp = shm_lock(ns, shmid);
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
_
Patches currently in -mm which might be from alexander.mikhalitsyn(a)virtuozzo.com are
ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch
shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch
A long story behind all of that...
Some time ago I met kernel crash after CRIU restore procedure,
fortunately, it was CRIU restore, so, I had dump files and could
do restore many times and crash reproduced easily. After some
investigation I've constructed the minimal reproducer. It was
found that it's use-after-free and it happens only if
sysctl kernel.shm_rmid_forced = 1.
The key of the problem is that the exit_shm() function
not handles shp's object destroy when task->sysvshm.shm_clist
contains items from different IPC namespaces. In most cases
this list will contain only items from one IPC namespace.
Why this list may contain object from different namespaces?
Function exit_shm() designed to clean up this list always when
process leaves IPC namespace. But we made a mistake a long time ago
and not add exit_shm() call into setns() syscall procedures.
1st second idea was just to add this call to setns() syscall but
it's obviously changes semantics of setns() syscall and that's
userspace-visible change. So, I gave up this idea.
First real attempt to address the issue was just to omit forced destroy
if we meet shp object not from current task IPC namespace [1]. But
that was not the best idea because task->sysvshm.shm_clist was
protected by rwsem which belongs to current task IPC namespace.
It means that list corruption may occur.
Second approach is just extend exit_shm() to properly handle
shp's from different IPC namespaces [2]. This is really
non-trivial thing, I've put a lot of effort into that but
not believed that it's possible to make it fully safe, clean
and clear.
Thanks to the efforts of Manfred Spraul working and elegant
solution was designed. Thanks a lot, Manfred!
Eric also suggested the way to address the issue in
("[RFC][PATCH] shm: In shm_exit destroy all created and never attached segments")
Eric's idea was to maintain a list of shm_clists one per IPC namespace,
use lock-less lists. But there is some extra memory consumption-related concerns.
Alternative solution which was suggested by me was implemented in
("shm: reset shm_clist on setns but omit forced shm destroy")
Idea is pretty simple, we add exit_shm() syscall to setns() but DO NOT
destroy shm segments even if sysctl kernel.shm_rmid_forced = 1, we just
clean up the task->sysvshm.shm_clist list. This chages semantics of
setns() syscall a little bit but in comparision to "naive" solution
when we just add exit_shm() without any special exclusions this looks
like a safer option.
[1] https://lkml.org/lkml/2021/7/6/1108
[2] https://lkml.org/lkml/2021/7/14/736
Cc: "Eric W. Biederman" <ebiederm(a)xmission.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov(a)virtuozzo.com>
Cc: Vasily Averin <vvs(a)virtuozzo.com>
Cc: Manfred Spraul <manfred(a)colorfullife.com>
Cc: Alexander Mikhalitsyn <alexander(a)mihalicyn.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn(a)virtuozzo.com>
Alexander Mikhalitsyn (2):
ipc: WARN if trying to remove ipc object which is absent
shm: extend forced shm destroy to support objects from several IPC
nses
include/linux/ipc_namespace.h | 15 +++
include/linux/sched/task.h | 2 +-
include/linux/shm.h | 2 +-
ipc/shm.c | 170 +++++++++++++++++++++++++---------
ipc/util.c | 6 +-
5 files changed, 145 insertions(+), 50 deletions(-)
--
2.31.1
Some devices tend to trigger SYS_ERR interrupt while the host handling
SYS_ERR state of the device during power up. This creates a race
condition and causes a failure in booting up the device.
The issue is seen on the Sierra Wireless EM9191 modem during SYS_ERR
handling in mhi_async_power_up(). Once the host detects that the device
is in SYS_ERR state, it issues MHI_RESET and waits for the device to
process the reset request. During this time, the device triggers SYS_ERR
interrupt to the host and host starts handling SYS_ERR execution.
So by the time the device has completed reset, host starts SYS_ERR
handling. This causes the race condition and the modem fails to boot.
Hence, register the IRQ handler only after handling the SYS_ERR check
to avoid getting spurious IRQs from the device.
Cc: stable(a)vger.kernel.org
Fixes: e18d4e9fa79b ("bus: mhi: core: Handle syserr during power_up")
Reported-by: Aleksander Morgado <aleksander(a)aleksander.es>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
drivers/bus/mhi/core/pm.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
index fb99e3727155..ec5f11166820 100644
--- a/drivers/bus/mhi/core/pm.c
+++ b/drivers/bus/mhi/core/pm.c
@@ -1055,10 +1055,6 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
mutex_lock(&mhi_cntrl->pm_mutex);
mhi_cntrl->pm_state = MHI_PM_DISABLE;
- ret = mhi_init_irq_setup(mhi_cntrl);
- if (ret)
- goto error_setup_irq;
-
/* Setup BHI INTVEC */
write_lock_irq(&mhi_cntrl->pm_lock);
mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
@@ -1072,7 +1068,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
dev_err(dev, "%s is not a valid EE for power on\n",
TO_MHI_EXEC_STR(current_ee));
ret = -EIO;
- goto error_async_power_up;
+ goto error_setup_irq;
}
state = mhi_get_mhi_state(mhi_cntrl);
@@ -1082,19 +1078,18 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
if (state == MHI_STATE_SYS_ERR) {
mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
ret = wait_event_timeout(mhi_cntrl->state_event,
- MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state) ||
- mhi_read_reg_field(mhi_cntrl,
- mhi_cntrl->regs,
- MHICTRL,
- MHICTRL_RESET_MASK,
- MHICTRL_RESET_SHIFT,
+ mhi_read_reg_field(mhi_cntrl,
+ mhi_cntrl->regs,
+ MHICTRL,
+ MHICTRL_RESET_MASK,
+ MHICTRL_RESET_SHIFT,
&val) ||
!val,
msecs_to_jiffies(mhi_cntrl->timeout_ms));
if (!ret) {
ret = -EIO;
dev_info(dev, "Failed to reset MHI due to syserr state\n");
- goto error_async_power_up;
+ goto error_setup_irq;
}
/*
@@ -1104,6 +1099,10 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
}
+ ret = mhi_init_irq_setup(mhi_cntrl);
+ if (ret)
+ goto error_setup_irq;
+
/* Transition to next state */
next_state = MHI_IN_PBL(current_ee) ?
DEV_ST_TRANSITION_PBL : DEV_ST_TRANSITION_READY;
@@ -1116,9 +1115,6 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
return 0;
-error_async_power_up:
- mhi_deinit_free_irq(mhi_cntrl);
-
error_setup_irq:
mhi_cntrl->pm_state = MHI_PM_DISABLE;
mutex_unlock(&mhi_cntrl->pm_mutex);
--
2.25.1
From: Charan Teja Reddy <charante(a)codeaurora.org>
[ Upstream commit f492283b157053e9555787262f058ae33096f568 ]
It is expected from the clients to follow the below steps on an imported
dmabuf fd:
a) dmabuf = dma_buf_get(fd) // Get the dmabuf from fd
b) dma_buf_attach(dmabuf); // Clients attach to the dmabuf
o Here the kernel does some slab allocations, say for
dma_buf_attachment and may be some other slab allocation in the
dmabuf->ops->attach().
c) Client may need to do dma_buf_map_attachment().
d) Accordingly dma_buf_unmap_attachment() should be called.
e) dma_buf_detach () // Clients detach to the dmabuf.
o Here the slab allocations made in b) are freed.
f) dma_buf_put(dmabuf) // Can free the dmabuf if it is the last
reference.
Now say an erroneous client failed at step c) above thus it directly
called dma_buf_put(), step f) above. Considering that it may be the last
reference to the dmabuf, buffer will be freed with pending attachments
left to the dmabuf which can show up as the 'memory leak'. This should
at least be reported as the WARN().
Signed-off-by: Charan Teja Reddy <charante(a)codeaurora.org>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1627043468-16381-1-git-send-e…
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/dma-buf/dma-buf.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 758de0e9b2ddc..16bbc9bc9e6d1 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -79,6 +79,7 @@ static void dma_buf_release(struct dentry *dentry)
if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
dma_resv_fini(dmabuf->resv);
+ WARN_ON(!list_empty(&dmabuf->attachments));
module_put(dmabuf->owner);
kfree(dmabuf->name);
kfree(dmabuf);
--
2.33.0
From: Charan Teja Reddy <charante(a)codeaurora.org>
[ Upstream commit f492283b157053e9555787262f058ae33096f568 ]
It is expected from the clients to follow the below steps on an imported
dmabuf fd:
a) dmabuf = dma_buf_get(fd) // Get the dmabuf from fd
b) dma_buf_attach(dmabuf); // Clients attach to the dmabuf
o Here the kernel does some slab allocations, say for
dma_buf_attachment and may be some other slab allocation in the
dmabuf->ops->attach().
c) Client may need to do dma_buf_map_attachment().
d) Accordingly dma_buf_unmap_attachment() should be called.
e) dma_buf_detach () // Clients detach to the dmabuf.
o Here the slab allocations made in b) are freed.
f) dma_buf_put(dmabuf) // Can free the dmabuf if it is the last
reference.
Now say an erroneous client failed at step c) above thus it directly
called dma_buf_put(), step f) above. Considering that it may be the last
reference to the dmabuf, buffer will be freed with pending attachments
left to the dmabuf which can show up as the 'memory leak'. This should
at least be reported as the WARN().
Signed-off-by: Charan Teja Reddy <charante(a)codeaurora.org>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1627043468-16381-1-git-send-e…
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/dma-buf/dma-buf.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 511fe0d217a08..733c8b1c8467c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -79,6 +79,7 @@ static void dma_buf_release(struct dentry *dentry)
if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
dma_resv_fini(dmabuf->resv);
+ WARN_ON(!list_empty(&dmabuf->attachments));
module_put(dmabuf->owner);
kfree(dmabuf->name);
kfree(dmabuf);
--
2.33.0
In commit 221abd4d478a ("staging: r8188eu: Remove no more necessary definitions
and code"), two entries were removed from RTW_ChannelPlanMap[], but not replaced
with zeros. The position within this table is important, thus the patch broke
systems operating in regulatory domains osted later than entry 0x13 in the table.
Unfortunately, the FCC entry comes before that point and most testers did not see
this problem.
Reported-and-tested-by: Zameer Manji <zmanji(a)gmail.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Fixes: 221abd4d478a ("staging: r8188eu: Remove no more necessary definitions and code")
Cc: Stable <stable(a)vger.kernel.org> # v5.5+
Signed-off-by: Larry Finger <Larry.Finger(a)lwfinger.net>
---
v2 - fixed use of () rsther than {} - found by kernel test robot
---
drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 55c3d4a6faeb..5b60e6df5f87 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -107,6 +107,7 @@ static struct rt_channel_plan_map RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
{0x01}, /* 0x10, RT_CHANNEL_DOMAIN_JAPAN */
{0x02}, /* 0x11, RT_CHANNEL_DOMAIN_FCC_NO_DFS */
{0x01}, /* 0x12, RT_CHANNEL_DOMAIN_JAPAN_NO_DFS */
+ {0x00}, /* 0x13 */
{0x02}, /* 0x14, RT_CHANNEL_DOMAIN_TAIWAN_NO_DFS */
{0x00}, /* 0x15, RT_CHANNEL_DOMAIN_ETSI_NO_DFS */
{0x00}, /* 0x16, RT_CHANNEL_DOMAIN_KOREA_NO_DFS */
@@ -118,6 +119,7 @@ static struct rt_channel_plan_map RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
{0x00}, /* 0x1C, */
{0x00}, /* 0x1D, */
{0x00}, /* 0x1E, */
+ {0x00}, /* 0x1F, */
/* 0x20 ~ 0x7F , New Define ===== */
{0x00}, /* 0x20, RT_CHANNEL_DOMAIN_WORLD_NULL */
{0x01}, /* 0x21, RT_CHANNEL_DOMAIN_ETSI1_NULL */
--
2.33.1
Good Day,
My name is Luis Fernandez.I am contacting you because we have
investors that have the capacity to invest in any massive project
in your country or invest in your existing project that requires
funding.
Kindly get back to me for more details.
Best Regards
Luis Fernandez
Since ARMv8.0 the upper 32 bits of ESR_ELx have been RES0, and recently
some of the upper bits gained a meaning and can be non-zero. For
example, when FEAT_LS64 is implemented, ESR_ELx[36:32] contain ISS2,
which for an ST64BV or ST64BV0 can be non-zero. This can be seen in ARM
DDI 0487G.b, page D13-3145, section D13.2.37.
Generally, we must not rely on RES0 bit remaining zero in future, and
when extracting ESR_ELx.EC we must mask out all other bits.
All C code uses the ESR_ELx_EC() macro, which masks out the irrelevant
bits, and therefore no alterations are required to C code to avoid
consuming irrelevant bits.
In a couple of places the KVM assembly extracts ESR_ELx.EC using LSR on
an X register, and so could in theory consume previously RES0 bits. In
both cases this is for comparison with EC values ESR_ELx_EC_HVC32 and
ESR_ELx_EC_HVC64, for which the upper bits of ESR_ELx must currently be
zero, but this could change in future.
This patch adjusts the KVM vectors to use UBFX rather than LSR to
extract ESR_ELx.EC, ensuring these are robust to future additions to
ESR_ELx.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Cc: Alexandru Elisei <alexandru.elisei(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
---
arch/arm64/include/asm/esr.h | 1 +
arch/arm64/kvm/hyp/hyp-entry.S | 2 +-
arch/arm64/kvm/hyp/nvhe/host.S | 2 +-
3 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index a305ce256090..d52a0b269ee8 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -68,6 +68,7 @@
#define ESR_ELx_EC_MAX (0x3F)
#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_WIDTH (6)
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9aa9b73475c9..b6b6801d96d5 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -44,7 +44,7 @@
el1_sync: // Guest trapped into EL2
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 4b652ffb591d..d310d2b2c8b4 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -115,7 +115,7 @@ SYM_FUNC_END(__hyp_do_panic)
.L__vect_start\@:
stp x0, x1, [sp, #-16]!
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
b.ne __host_exit
--
2.11.0
Commit 761ed4a94582 ("tty: serial_core: convert uart_close to use
tty_port_close") converted serial core to use tty_port_close() but
failed to notice that the transmit buffer still needs to be freed on
final close.
Not freeing the transmit buffer means that the buffer is no longer
cleared on next open so that any ioctl() waiting for the buffer to drain
might wait indefinitely (e.g. on termios changes) or that stale data can
end up being transmitted in case tx is restarted.
Furthermore, the buffer of any port that has been opened would leak on
driver unbind.
Note that the port lock is held when clearing the buffer pointer due to
the ldisc race worked around by commit a5ba1d95e46e ("uart: fix race
between uart_put_char() and uart_shutdown()").
Also note that the tty-port shutdown() callback is not called for
console ports so it is not strictly necessary to free the buffer page
after releasing the lock (cf. d72402145ace ("tty/serial: do not free
trasnmit buffer page under port lock")).
Reported-by: Baruch Siach <baruch(a)tkos.co.il>
Link: https://lore.kernel.org/r/319321886d97c456203d5c6a576a5480d07c3478.16357816…
Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close")
Cc: stable(a)vger.kernel.org # 4.9
Cc: Rob Herring <robh(a)kernel.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/tty/serial/serial_core.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0e2e35ab64c7..58834698739c 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1542,6 +1542,7 @@ static void uart_tty_port_shutdown(struct tty_port *port)
{
struct uart_state *state = container_of(port, struct uart_state, port);
struct uart_port *uport = uart_port_check(state);
+ char *buf;
/*
* At this point, we stop accepting input. To do this, we
@@ -1563,8 +1564,18 @@ static void uart_tty_port_shutdown(struct tty_port *port)
*/
tty_port_set_suspended(port, 0);
- uart_change_pm(state, UART_PM_STATE_OFF);
+ /*
+ * Free the transmit buffer.
+ */
+ spin_lock_irq(&uport->lock);
+ buf = state->xmit.buf;
+ state->xmit.buf = NULL;
+ spin_unlock_irq(&uport->lock);
+ if (buf)
+ free_page((unsigned long)buf);
+
+ uart_change_pm(state, UART_PM_STATE_OFF);
}
static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
--
2.32.0
Dear Linux stable folks,
Am 28.10.21 um 16:21 schrieb Alex Deucher:
> The DMA mask on SI parts is 40 bits not 44. Copy
> paste typo.
>
> Fixes: 244511f386ccb9 ("drm/amdgpu: simplify and cleanup setting the dma mask")
> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1762
> Acked-by: Christian König <christian.koenig(a)amd.com>
> Tested-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
This is commit 403475be6d8b122c3e6b8a47e075926d7299e5ef in Linus’ master
branch. Could you please apply it to the stable series (5.4+)?
Kind regards,
Paul
commit c7cb42e94473aafe553c0f2a3d8ca904599399ed upstream.
When handling THP hwpoison checked if the THP is in allocation or free
stage since hwpoison may mistreat it as hugetlb page. After commit
415c64c1453a ("mm/memory-failure: split thp earlier in memory error
handling") the problem has been fixed, so this check is no longer
needed. Remove it. The side effect of the removal is hwpoison may
report unsplit THP instead of unknown error for shmem THP. It seems not
like a big deal.
The following patch "mm: filemap: check if THP has hwpoisoned subpage
for PMD page fault" depends on this, which fixes shmem THP with
hwpoisoned subpage(s) are mapped PMD wrongly. So this patch needs to be
backported to -stable as well.
Link: https://lkml.kernel.org/r/20211020210755.23964-2-shy828301@gmail.com
Signed-off-by: Yang Shi <shy828301(a)gmail.com>
Suggested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
mm/memory-failure.c | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 01445ddff58d..bd2cd4dd59b6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -956,20 +956,6 @@ static int get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
- if (!PageHuge(head) && PageTransHuge(head)) {
- /*
- * Non anonymous thp exists only in allocation/free time. We
- * can't handle such a case correctly, so let's give it up.
- * This should be better than triggering BUG_ON when kernel
- * tries to touch the "partially handled" page.
- */
- if (!PageAnon(head)) {
- pr_err("Memory failure: %#lx: non anonymous thp\n",
- page_to_pfn(page));
- return 0;
- }
- }
-
if (get_page_unless_zero(head)) {
if (head == compound_head(page))
return 1;
--
2.26.2
From: Mike Marciniszyn <mike.marciniszyn(a)cornelisnetworks.com>
This series ports upstream commit:
d39bf40e55e6 ("IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields")
Gustavo A. R. Silva (1):
IB/qib: Use struct_size() helper
Mike Marciniszyn (1):
IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt
fields
drivers/infiniband/hw/qib/qib_user_sdma.c | 35 ++++++++++++++++++++++---------
1 file changed, 25 insertions(+), 10 deletions(-)
--
Changes from v1:
Correct signed off for Mike Marciniszyn
Hi Greg and Sasha,
Please apply commit 3d5e7a28b1ea ("KVM: x86: avoid warning with
-Wbitwise-instead-of-logical") to 5.10, 5.14, and 5.15, where it
resolves a build error with tip of tree clang due to -Werror:
arch/x86/kvm/mmu/mmu.c:3548:15: error: use of bitwise '|' with boolean operands [-Werror,-Wbitwise-instead-of-logical]
reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
||
arch/x86/kvm/mmu/mmu.c:3548:15: note: cast one or both operands to int to silence this warning
1 error generated.
It applies cleanly to 5.14 and 5.15 and I have attached a backport for
5.10. I have added Paolo in case he has any objections to this.
Cheers,
Nathan
Good Day,
My name is Luis Fernandez.I am contacting you because we have
investors that have the capacity to invest in any massive project
in your country or invest in your existing project that requires
funding.
Kindly get back to me for more details.
Best Regards
Luis Fernandez
2021-11-08 0:22 GMT+09:00, Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>:
> All the error handling paths of 'smb2_sess_setup()' end to 'out_err'.
>
> All but the new error handling path added by the commit given in the Fixes
> tag below.
>
> Fix this error handling path and branch to 'out_err' as well.
>
> Fixes: 0d994cd482ee ("ksmbd: add buffer validation in session setup")
Cc: stable(a)vger.kernel.org # v5.15
> Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Thanks!
Hi Greg,
could you cherry-pick the following three commits to 5.14.y tree?
The Cc-to-stable was missing on those, although they should have been
picked up.
cbea6e5a7772b7a5b80baa8f98fd77853487fd2a
ALSA: pcm: Check mmap capability of runtime dma buffer at first
0899a7a23047f106c06888769d6cd6ff43d7395f
ALSA: pci: rme: Set up buffer type properly
4d9e9153f1c64d91a125c6967bc0bfb0bb653ea0
ALSA: pci: cs46xx: Fix set up buffer type properly
They are needed only for 5.14.y, not for older versions.
The relevant bug report is found at:
https://bugzilla.kernel.org/show_bug.cgi?id=214947
Thanks!
Takashi
The function can_rx_offload_threaded_irq_finish() is needed to trigger
the NAPI thread to deliver read CAN frames to the networking stack.
This patch adds the missing call to can_rx_offload_threaded_irq_finish()
in case of a bus off, before leaving the interrupt handler to avoid
packet starvation.
Link: https://lore.kernel.org/all/20211106201526.44292-1-mkl@pengutronix.de
Fixes: 30bfec4fec59 ("can: rx-offload: can_rx_offload_threaded_irq_finish(): add new function to be called from threaded interrupt")
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 673861ab665a..212fcd1554e4 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2290,8 +2290,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
* check will fail, too. So leave IRQ handler
* directly.
*/
- if (priv->can.state == CAN_STATE_BUS_OFF)
+ if (priv->can.state == CAN_STATE_BUS_OFF) {
+ can_rx_offload_threaded_irq_finish(&priv->offload);
return IRQ_HANDLED;
+ }
}
handled = IRQ_HANDLED;
--
2.33.0
From: Zhang Changzhong <zhangchangzhong(a)huawei.com>
The TP.CM_BAM message must be sent to the global address [1], so add a
check to drop TP.CM_BAM sent to a non-global address.
Without this patch, the receiver will treat the following packets as
normal RTS/CTS transport:
18EC0102#20090002FF002301
18EB0102#0100000000000000
18EB0102#020000FFFFFFFFFF
[1] SAE-J1939-82 2015 A.3.3 Row 1.
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Link: https://lore.kernel.org/all/1635431907-15617-4-git-send-email-zhangchangzho…
Cc: stable(a)vger.kernel.org
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Acked-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
net/can/j1939/transport.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 05eb3d059e17..a271688780a2 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -2023,6 +2023,11 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
extd = J1939_ETP;
fallthrough;
case J1939_TP_CMD_BAM:
+ if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) {
+ netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n",
+ __func__, skcb->addr.sa);
+ return;
+ }
fallthrough;
case J1939_TP_CMD_RTS:
if (skcb->addr.type != extd)
--
2.33.0