From: Martijn Coenen <maco(a)android.com>
binder_poll() passes the thread->wait waitqueue that
can be slept on for work. When a thread that uses
epoll explicitly exits using BINDER_THREAD_EXIT,
the waitqueue is freed, but it is never removed
from the corresponding epoll data structure. When
the process subsequently exits, the epoll cleanup
code tries to access the waitlist, which results in
a use-after-free.
Prevent this by using POLLFREE when the thread exits.
Signed-off-by: Martijn Coenen <maco(a)android.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Cc: stable <stable(a)vger.kernel.org> # 4.14
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c96623d..77b1b95 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -4269,6 +4269,18 @@ static int binder_thread_release(struct binder_proc *proc,
if (t)
spin_lock(&t->lock);
}
+
+ /*
+ * If this thread used poll, make sure we remove the waitqueue
+ * from any epoll data structures holding it with POLLFREE.
+ * waitqueue_active() is safe to use here because we're holding
+ * the inner lock.
+ */
+ if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
+ waitqueue_active(&thread->wait)) {
+ wake_up_poll(&thread->wait, POLLHUP | POLLFREE);
+ }
+
binder_inner_proc_unlock(thread->proc);
if (send_reply)
--
2.7.4
From: Ganesh Mahendran <opensource.ganesh(a)gmail.com>
VM_IOREMAP is used to access hardware through a mechanism called
I/O mapped memory. Android binder is a IPC machanism which will
not access I/O memory.
And VM_IOREMAP has alignment requiement which may not needed in
binder.
__get_vm_area_node()
{
...
if (flags & VM_IOREMAP)
align = 1ul << clamp_t(int, fls_long(size),
PAGE_SHIFT, IOREMAP_MAX_ORDER);
...
}
This patch will save some kernel vm area, especially for 32bit os.
In 32bit OS, kernel vm area is only 240MB. We may got below
error when launching a app:
<3>[ 4482.440053] binder_alloc: binder_alloc_mmap_handler: 15728 8ce67000-8cf65000 get_vm_area failed -12
<3>[ 4483.218817] binder_alloc: binder_alloc_mmap_handler: 15745 8ce67000-8cf65000 get_vm_area failed -12
Signed-off-by: Ganesh Mahendran <opensource.ganesh(a)gmail.com>
Acked-by: Martijn Coenen <maco(a)android.com>
Acked-by: Todd Kjos <tkjos(a)google.com>
Cc: stable <stable(a)vger.kernel.org>
----
V3: update comments
V2: update comments
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder_alloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 8989e31..6da12c5 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -626,7 +626,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
goto err_already_mapped;
}
- area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP);
+ area = get_vm_area(vma->vm_end - vma->vm_start, VM_ALLOC);
if (area == NULL) {
ret = -ENOMEM;
failure_string = "get_vm_area";
--
2.7.4
From: Todd Kjos <tkjos(a)android.com>
binder_send_failed_reply() is called when a synchronous
transaction fails. It reports an error to the thread that
is waiting for the completion. Given that the transaction
is synchronous, there should never be more than 1 error
response to that thread -- this was being asserted with
a WARN().
However, when exercising the driver with syzbot tests, cases
were observed where multiple "synchronous" requests were
sent without waiting for responses, so it is possible that
multiple errors would be reported to the thread. This testing
was conducted with panic_on_warn set which forced the crash.
This is easily reproduced by sending back-to-back
"synchronous" transactions without checking for any
response (eg, set read_size to 0):
bwr.write_buffer = (uintptr_t)&bc1;
bwr.write_size = sizeof(bc1);
bwr.read_buffer = (uintptr_t)&br;
bwr.read_size = 0;
ioctl(fd, BINDER_WRITE_READ, &bwr);
sleep(1);
bwr2.write_buffer = (uintptr_t)&bc2;
bwr2.write_size = sizeof(bc2);
bwr2.read_buffer = (uintptr_t)&br;
bwr2.read_size = 0;
ioctl(fd, BINDER_WRITE_READ, &bwr2);
sleep(1);
The first transaction is sent to the servicemanager and the reply
fails because no VMA is set up by this client. After
binder_send_failed_reply() is called, the BINDER_WORK_RETURN_ERROR
is sitting on the thread's todo list since the read_size was 0 and
the client is not waiting for a response.
The 2nd transaction is sent and the BINDER_WORK_RETURN_ERROR has not
been consumed, so the thread's reply_error.cmd is still set (normally
cleared when the BINDER_WORK_RETURN_ERROR is handled). Therefore
when the servicemanager attempts to reply to the 2nd failed
transaction, the error is already set and it triggers this warning.
This is a user error since it is not waiting for the synchronous
transaction to complete. If it ever does check, it will see an
error.
Changed the WARN() to a pr_warn().
Signed-off-by: Todd Kjos <tkjos(a)android.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c88582a..abf05aa 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1919,8 +1919,14 @@ static void binder_send_failed_reply(struct binder_transaction *t,
&target_thread->todo);
wake_up_interruptible(&target_thread->wait);
} else {
- WARN(1, "Unexpected reply error: %u\n",
- target_thread->reply_error.cmd);
+ /*
+ * Cannot get here for normal operation, but
+ * we can if multiple synchronous transactions
+ * are sent without blocking for responses.
+ * Just ignore the 2nd error in this case.
+ */
+ pr_warn("Unexpected reply error: %u\n",
+ target_thread->reply_error.cmd);
}
binder_inner_proc_unlock(target_thread->proc);
binder_thread_dec_tmpref(target_thread);
--
2.7.4
From: Todd Kjos <tkjos(a)android.com>
User-space normally keeps the node alive when creating a transaction
since it has a reference to the target. The local strong ref keeps it
alive if the sending process dies before the target process processes
the transaction. If the source process is malicious or has a reference
counting bug, this can fail.
In this case, when we attempt to decrement the node in the failure
path, the node has already been freed.
This is fixed by taking a tmpref on the node while constructing
the transaction. To avoid re-acquiring the node lock and inner
proc lock to increment the proc's tmpref, a helper is used that
does the ref increments on both the node and proc.
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder.c | 93 ++++++++++++++++++++++++++++++++++--------------
1 file changed, 66 insertions(+), 27 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 3f9d702..fa23e45 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2582,6 +2582,48 @@ static bool binder_proc_transaction(struct binder_transaction *t,
return true;
}
+/**
+ * binder_get_node_refs_for_txn() - Get required refs on node for txn
+ * @node: struct binder_node for which to get refs
+ * @proc: returns @node->proc if valid
+ * @error: if no @proc then returns BR_DEAD_REPLY
+ *
+ * User-space normally keeps the node alive when creating a transaction
+ * since it has a reference to the target. The local strong ref keeps it
+ * alive if the sending process dies before the target process processes
+ * the transaction. If the source process is malicious or has a reference
+ * counting bug, relying on the local strong ref can fail.
+ *
+ * Since user-space can cause the local strong ref to go away, we also take
+ * a tmpref on the node to ensure it survives while we are constructing
+ * the transaction. We also need a tmpref on the proc while we are
+ * constructing the transaction, so we take that here as well.
+ *
+ * Return: The target_node with refs taken or NULL if no @node->proc is NULL.
+ * Also sets @proc if valid. If the @node->proc is NULL indicating that the
+ * target proc has died, @error is set to BR_DEAD_REPLY
+ */
+static struct binder_node *binder_get_node_refs_for_txn(
+ struct binder_node *node,
+ struct binder_proc **procp,
+ uint32_t *error)
+{
+ struct binder_node *target_node = NULL;
+
+ binder_node_inner_lock(node);
+ if (node->proc) {
+ target_node = node;
+ binder_inc_node_nilocked(node, 1, 0, NULL);
+ binder_inc_node_tmpref_ilocked(node);
+ node->proc->tmp_ref++;
+ *procp = node->proc;
+ } else
+ *error = BR_DEAD_REPLY;
+ binder_node_inner_unlock(node);
+
+ return target_node;
+}
+
static void binder_transaction(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_transaction_data *tr, int reply,
@@ -2685,43 +2727,35 @@ static void binder_transaction(struct binder_proc *proc,
ref = binder_get_ref_olocked(proc, tr->target.handle,
true);
if (ref) {
- binder_inc_node(ref->node, 1, 0, NULL);
- target_node = ref->node;
- }
- binder_proc_unlock(proc);
- if (target_node == NULL) {
+ target_node = binder_get_node_refs_for_txn(
+ ref->node, &target_proc,
+ &return_error);
+ } else {
binder_user_error("%d:%d got transaction to invalid handle\n",
- proc->pid, thread->pid);
+ proc->pid, thread->pid);
return_error = BR_FAILED_REPLY;
- return_error_param = -EINVAL;
- return_error_line = __LINE__;
- goto err_invalid_target_handle;
}
+ binder_proc_unlock(proc);
} else {
mutex_lock(&context->context_mgr_node_lock);
target_node = context->binder_context_mgr_node;
- if (target_node == NULL) {
+ if (target_node)
+ target_node = binder_get_node_refs_for_txn(
+ target_node, &target_proc,
+ &return_error);
+ else
return_error = BR_DEAD_REPLY;
- mutex_unlock(&context->context_mgr_node_lock);
- return_error_line = __LINE__;
- goto err_no_context_mgr_node;
- }
- binder_inc_node(target_node, 1, 0, NULL);
mutex_unlock(&context->context_mgr_node_lock);
}
- e->to_node = target_node->debug_id;
- binder_node_lock(target_node);
- target_proc = target_node->proc;
- if (target_proc == NULL) {
- binder_node_unlock(target_node);
- return_error = BR_DEAD_REPLY;
+ if (!target_node) {
+ /*
+ * return_error is set above
+ */
+ return_error_param = -EINVAL;
return_error_line = __LINE__;
goto err_dead_binder;
}
- binder_inner_proc_lock(target_proc);
- target_proc->tmp_ref++;
- binder_inner_proc_unlock(target_proc);
- binder_node_unlock(target_node);
+ e->to_node = target_node->debug_id;
if (security_binder_transaction(proc->tsk,
target_proc->tsk) < 0) {
return_error = BR_FAILED_REPLY;
@@ -3071,6 +3105,8 @@ static void binder_transaction(struct binder_proc *proc,
if (target_thread)
binder_thread_dec_tmpref(target_thread);
binder_proc_dec_tmpref(target_proc);
+ if (target_node)
+ binder_dec_node_tmpref(target_node);
/*
* write barrier to synchronize with initialization
* of log entry
@@ -3089,6 +3125,8 @@ err_bad_parent:
err_copy_data_failed:
trace_binder_transaction_failed_buffer_release(t->buffer);
binder_transaction_buffer_release(target_proc, t->buffer, offp);
+ if (target_node)
+ binder_dec_node_tmpref(target_node);
target_node = NULL;
t->buffer->transaction = NULL;
binder_alloc_free_buf(&target_proc->alloc, t->buffer);
@@ -3103,13 +3141,14 @@ err_bad_call_stack:
err_empty_call_stack:
err_dead_binder:
err_invalid_target_handle:
-err_no_context_mgr_node:
if (target_thread)
binder_thread_dec_tmpref(target_thread);
if (target_proc)
binder_proc_dec_tmpref(target_proc);
- if (target_node)
+ if (target_node) {
binder_dec_node(target_node, 1, 0);
+ binder_dec_node_tmpref(target_node);
+ }
binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
"%d:%d transaction failed %d/%d, size %lld-%lld line %d\n",
--
2.7.4
From: Todd Kjos <tkjos(a)android.com>
The binder allocator assumes that the thread that
called binder_open will never die for the lifetime of
that proc. That thread is normally the group_leader,
however it may not be. Use the group_leader instead
of current.
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Cc: stable <stable(a)vger.kernel.org> # 4.4+
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index fbe0eb9..a24ad74 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3039,8 +3039,8 @@ static int binder_open(struct inode *nodp, struct file *filp)
proc = kzalloc(sizeof(*proc), GFP_KERNEL);
if (proc == NULL)
return -ENOMEM;
- get_task_struct(current);
- proc->tsk = current;
+ get_task_struct(current->group_leader);
+ proc->tsk = current->group_leader;
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->wait);
proc->default_priority = task_nice(current);
--
2.7.4
From: Todd Kjos <tkjos(a)android.com>
This reverts commit a906d6931f3ccaf7de805643190765ddd7378e27.
The patch introduced a race in the binder driver. An attempt to fix the
race was submitted in "[PATCH v2] android: binder: fix dangling pointer
comparison", however the conclusion in the discussion for that patch
was that the original patch should be reverted.
The reversion is being done as part of the fine-grained locking
patchset since the patch would need to be refactored when
proc->vmm_vm_mm is removed from struct binder_proc and added
in the binder allocator.
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Cc: stable <stable(a)vger.kernel.org> # 4.6+
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 15b263a..b51fedc 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3246,10 +3246,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/*pr_info("binder_ioctl: %d:%d %x %lx\n",
proc->pid, current->pid, cmd, arg);*/
- if (unlikely(current->mm != proc->vma_vm_mm)) {
- pr_err("current mm mismatch proc mm\n");
- return -EINVAL;
- }
trace_binder_ioctl(cmd, arg);
ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
@@ -3465,7 +3461,6 @@ static int binder_open(struct inode *nodp, struct file *filp)
return -ENOMEM;
get_task_struct(current);
proc->tsk = current;
- proc->vma_vm_mm = current->mm;
INIT_LIST_HEAD(&proc->todo);
init_waitqueue_head(&proc->wait);
proc->default_priority = task_nice(current);
--
2.7.4
From: Martijn Coenen <maco(a)android.com>
binder_fd_array_object starts with a 4-byte header,
followed by a few fields that are 8 bytes when
ANDROID_BINDER_IPC_32BIT=N.
This can cause alignment issues in a 64-bit kernel
with a 32-bit userspace, as on x86_32 an 8-byte primitive
may be aligned to a 4-byte address. Pad with a __u32
to fix this.
Signed-off-by: Martijn Coenen <maco(a)android.com>
Cc: stable <stable(a)vger.kernel.org> # 4.11+
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/uapi/linux/android/binder.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 51f891f..7668b57 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -132,6 +132,7 @@ enum {
/* struct binder_fd_array_object - object describing an array of fds in a buffer
* @hdr: common header structure
+ * @pad: padding to ensure correct alignment
* @num_fds: number of file descriptors in the buffer
* @parent: index in offset array to buffer holding the fd array
* @parent_offset: start offset of fd array in the buffer
@@ -152,6 +153,7 @@ enum {
*/
struct binder_fd_array_object {
struct binder_object_header hdr;
+ __u32 pad;
binder_size_t num_fds;
binder_size_t parent;
binder_size_t parent_offset;
--
2.7.4
From: Lisa Du <cldu(a)marvell.com>
There's one point was missed in the patch commit da49889deb34 ("staging:
binder: Support concurrent 32 bit and 64 bit processes."). When configure
BINDER_IPC_32BIT, the size of binder_uintptr_t was 32bits, but size of
void * is 64bit on 64bit system. Correct it here.
Signed-off-by: Lisa Du <cldu(a)marvell.com>
Signed-off-by: Nicolas Boichat <drinkcat(a)chromium.org>
Fixes: da49889deb34 ("staging: binder: Support concurrent 32 bit and 64 bit processes.")
Cc: <stable(a)vger.kernel.org>
Acked-by: Olof Johansson <olof(a)lixom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/android/binder.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 796301a..16288e7 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2081,7 +2081,7 @@ static int binder_thread_write(struct binder_proc *proc,
if (get_user(cookie, (binder_uintptr_t __user *)ptr))
return -EFAULT;
- ptr += sizeof(void *);
+ ptr += sizeof(cookie);
list_for_each_entry(w, &proc->delivered_death, entry) {
struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
--
2.7.4
1. With the patch "x86/vector/msi: Switch to global reservation mode"
(4900be8360), the recent v4.15 and newer kernels always hang for 1-vCPU
Hyper-V VM with SR-IOV. This is because when we reach hv_compose_msi_msg()
by request_irq() -> request_threaded_irq() -> __setup_irq()->irq_startup()
-> __irq_startup() -> irq_domain_activate_irq() -> ... ->
msi_domain_activate() -> ... -> hv_compose_msi_msg(), local irq is
disabled in __setup_irq().
Fix this by polling the channel.
2. If the host is ejecting the VF device before we reach
hv_compose_msi_msg(), in a UP VM, we can hang in hv_compose_msi_msg()
forever, because at this time the host doesn't respond to the
CREATE_INTERRUPT request. This issue also happens to old kernels like
v4.14, v4.13, etc.
Fix this by polling the channel for the PCI_EJECT message and
hpdev->state, and by checking the PCI vendor ID.
Note: actually the above issues also happen to a SMP VM, if
"hbus->hdev->channel->target_cpu == smp_processor_id()" is true.
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Tested-by: Adrian Suhov <v-adsuho(a)microsoft.com>
Tested-by: Chris Valean <v-chvale(a)microsoft.com>
Cc: stable(a)vger.kernel.org
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Jack Morgenstein <jackm(a)mellanox.com>
---
drivers/pci/host/pci-hyperv.c | 58 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 57 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index d3aa6736a9bb..114624dfbd97 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -521,6 +521,8 @@ struct hv_pci_compl {
s32 completion_status;
};
+static void hv_pci_onchannelcallback(void *context);
+
/**
* hv_pci_generic_compl() - Invoked for a completion packet
* @context: Set up by the sender of the packet.
@@ -665,6 +667,31 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
}
}
+static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
+{
+ u16 ret;
+ unsigned long flags;
+ void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
+ PCI_VENDOR_ID;
+
+ spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
+
+ /* Choose the function to be read. (See comment above) */
+ writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
+ /* Make sure the function was chosen before we start reading. */
+ mb();
+ /* Read from that function's config space. */
+ ret = readw(addr);
+ /*
+ * mb() is not required here, because the spin_unlock_irqrestore()
+ * is a barrier.
+ */
+
+ spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
+
+ return ret;
+}
+
/**
* _hv_pcifront_write_config() - Internal PCI config write
* @hpdev: The PCI driver's representation of the device
@@ -1107,8 +1134,37 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
* Since this function is called with IRQ locks held, can't
* do normal wait for completion; instead poll.
*/
- while (!try_wait_for_completion(&comp.comp_pkt.host_event))
+ while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
+ /* 0xFFFF means an invalid PCI VENDOR ID. */
+ if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
+ dev_err_once(&hbus->hdev->device,
+ "the device has gone\n");
+ goto free_int_desc;
+ }
+
+ /*
+ * When the higher level interrupt code calls us with
+ * interrupt disabled, we must poll the channel by calling
+ * the channel callback directly when channel->target_cpu is
+ * the current CPU. When the higher level interrupt code
+ * calls us with interrupt enabled, let's add the
+ * local_bh_disable()/enable() to avoid race.
+ */
+ local_bh_disable();
+
+ if (hbus->hdev->channel->target_cpu == smp_processor_id())
+ hv_pci_onchannelcallback(hbus);
+
+ local_bh_enable();
+
+ if (hpdev->state == hv_pcichild_ejecting) {
+ dev_err_once(&hbus->hdev->device,
+ "the device is being ejected\n");
+ goto free_int_desc;
+ }
+
udelay(100);
+ }
if (comp.comp_pkt.completion_status < 0) {
dev_err(&hbus->hdev->device,
--
2.7.4
Since we serialize the present/eject work items now, we don't need the
semaphore any more.
This is suggested by Michael Kelley.
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Jack Morgenstein <jackm(a)mellanox.com>
Cc: stable(a)vger.kernel.org
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: Michael Kelley (EOSG) <Michael.H.Kelley(a)microsoft.com>
---
drivers/pci/host/pci-hyperv.c | 17 ++---------------
1 file changed, 2 insertions(+), 15 deletions(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index aaee41faf55f..3a385212f666 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -447,7 +447,6 @@ struct hv_pcibus_device {
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;
- struct semaphore enum_sem;
struct list_head resources_for_children;
struct list_head children;
@@ -1592,12 +1591,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
* It must also treat the omission of a previously observed device as
* notification that the device no longer exists.
*
- * Note that this function is a work item, and it may not be
- * invoked in the order that it was queued. Back to back
- * updates of the list of present devices may involve queuing
- * multiple work items, and this one may run before ones that
- * were sent later. As such, this function only does something
- * if is the last one in the queue.
+ * Note that this function is serialized with hv_eject_device_work(),
+ * because both are pushed to the ordered workqueue hbus->wq.
*/
static void pci_devices_present_work(struct work_struct *work)
{
@@ -1618,11 +1613,6 @@ static void pci_devices_present_work(struct work_struct *work)
INIT_LIST_HEAD(&removed);
- if (down_interruptible(&hbus->enum_sem)) {
- put_hvpcibus(hbus);
- return;
- }
-
/* Pull this off the queue and process it if it was the last one. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
while (!list_empty(&hbus->dr_list)) {
@@ -1639,7 +1629,6 @@ static void pci_devices_present_work(struct work_struct *work)
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
if (!dr) {
- up(&hbus->enum_sem);
put_hvpcibus(hbus);
return;
}
@@ -1726,7 +1715,6 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
- up(&hbus->enum_sem);
put_hvpcibus(hbus);
kfree(dr);
}
@@ -2460,7 +2448,6 @@ static int hv_pci_probe(struct hv_device *hdev,
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
spin_lock_init(&hbus->retarget_msi_interrupt_lock);
- sema_init(&hbus->enum_sem, 1);
init_completion(&hbus->remove_event);
hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
hbus->sysdata.domain);
--
2.7.4
When we're in the function, hpdev->state must be hv_pcichild_ejecting:
see hv_pci_eject_device().
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Jack Morgenstein <jackm(a)mellanox.com>
Cc: stable(a)vger.kernel.org
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: Michael Kelley (EOSG) <Michael.H.Kelley(a)microsoft.com>
---
drivers/pci/host/pci-hyperv.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 1233300f41c6..04edb24c92ee 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1796,10 +1796,7 @@ static void hv_eject_device_work(struct work_struct *work)
hpdev = container_of(work, struct hv_pci_dev, wrk);
- if (hpdev->state != hv_pcichild_ejecting) {
- put_pcichild(hpdev, hv_pcidev_ref_pnp);
- return;
- }
+ WARN_ON(hpdev->state != hv_pcichild_ejecting);
/*
* Ejection can come before or after the PCI bus has been set up, so
--
2.7.4
No functional change.
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Fixes: bdd74440d9e8 ("PCI: hv: Add explicit barriers to config space access")
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
---
drivers/pci/host/pci-hyperv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 2faf38eab785..1233300f41c6 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -653,7 +653,7 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
break;
}
/*
- * Make sure the write was done before we release the spinlock
+ * Make sure the read was done before we release the spinlock
* allowing consecutive reads/writes.
*/
mb();
--
2.7.4
From: James Bottomley <James.Bottomley(a)HansenPartnership.com>
My Nuvoton 6xx in a Dell XPS-13 has been intermittently failing to work
(necessitating a reboot). The problem seems to be that the TPM gets into a
state where the partial self-test doesn't return TPM_RC_SUCCESS (meaning
all tests have run to completion), but instead returns TPM_RC_TESTING
(meaning some tests are still running in the background). There are
various theories that resending the self-test command actually causes the
tests to restart and thus triggers more TPM_RC_TESTING returns until the
timeout is exceeded.
There are several issues here: firstly being we shouldn't slow down the
boot sequence waiting for the self test to complete once the TPM
backgrounds them. It will actually make available all functions that have
passed and if it gets a failure return TPM_RC_FAILURE to every subsequent
command. So the fix is to kick off self tests once and if they return
TPM_RC_TESTING log that as a backgrounded self test and continue on. In
order to prevent other tpm users from seeing any TPM_RC_TESTING returns
(which it might if they send a command that needs a TPM subsystem which is
still under test), we loop in tpm_transmit_cmd until either a timeout or we
don't get a TPM_RC_TESTING return.
Finally, there have been observations of strange returns from a partial
test. One Nuvoton is occasionally returning TPM_RC_COMMAND_CODE, so treat
any unexpected return from a partial self test as an indication we need to
run a full self test.
[jarkko.sakkinen(a)linux.intel.com: cleaned up James' original commit and
added a proper Fixes line]
Fixes: 2482b1bba5122 ("tpm: Trigger only missing TPM 2.0 self tests")
Cc: stable(a)vger.kernel.org
Signed-off-by: James Bottomley <James.Bottomley(a)HansenPartnership.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkine(a)linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkine(a)linux.intel.com>
---
drivers/char/tpm/tpm-interface.c | 20 ++++++++++++---
drivers/char/tpm/tpm.h | 1 +
drivers/char/tpm/tpm2-cmd.c | 54 ++++++++++++----------------------------
3 files changed, 33 insertions(+), 42 deletions(-)
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 9e80a953d693..1adb976a2e37 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -537,14 +537,26 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space,
const char *desc)
{
const struct tpm_output_header *header = buf;
+ unsigned int delay_msec = TPM2_DURATION_SHORT;
int err;
ssize_t len;
- len = tpm_transmit(chip, space, (u8 *)buf, bufsiz, flags);
- if (len < 0)
- return len;
+ for (;;) {
+ len = tpm_transmit(chip, space, (u8 *)buf, bufsiz, flags);
+ if (len < 0)
+ return len;
+ err = be32_to_cpu(header->return_code);
+ if (err != TPM2_RC_TESTING)
+ break;
+
+ delay_msec *= 2;
+ if (delay_msec > TPM2_DURATION_LONG) {
+ dev_err(&chip->dev, "the self test is still running\n");
+ break;
+ }
+ tpm_msleep(delay_msec);
+ }
- err = be32_to_cpu(header->return_code);
if (err != 0 && desc)
dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err,
desc);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index f895fba4e20d..cccd5994a0e1 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -104,6 +104,7 @@ enum tpm2_return_codes {
TPM2_RC_HASH = 0x0083, /* RC_FMT1 */
TPM2_RC_HANDLE = 0x008B,
TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */
+ TPM2_RC_FAILURE = 0x0101,
TPM2_RC_DISABLED = 0x0120,
TPM2_RC_COMMAND_CODE = 0x0143,
TPM2_RC_TESTING = 0x090A, /* RC_WARN */
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index a700f8f9ead7..89a5397b18d2 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -31,10 +31,6 @@ struct tpm2_startup_in {
__be16 startup_type;
} __packed;
-struct tpm2_self_test_in {
- u8 full_test;
-} __packed;
-
struct tpm2_get_tpm_pt_in {
__be32 cap_id;
__be32 property_id;
@@ -60,7 +56,6 @@ struct tpm2_get_random_out {
union tpm2_cmd_params {
struct tpm2_startup_in startup_in;
- struct tpm2_self_test_in selftest_in;
struct tpm2_get_tpm_pt_in get_tpm_pt_in;
struct tpm2_get_tpm_pt_out get_tpm_pt_out;
struct tpm2_get_random_in getrandom_in;
@@ -827,16 +822,6 @@ unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal)
}
EXPORT_SYMBOL_GPL(tpm2_calc_ordinal_duration);
-#define TPM2_SELF_TEST_IN_SIZE \
- (sizeof(struct tpm_input_header) + \
- sizeof(struct tpm2_self_test_in))
-
-static const struct tpm_input_header tpm2_selftest_header = {
- .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
- .length = cpu_to_be32(TPM2_SELF_TEST_IN_SIZE),
- .ordinal = cpu_to_be32(TPM2_CC_SELF_TEST)
-};
-
/**
* tpm2_do_selftest() - ensure that all self tests have passed
*
@@ -852,27 +837,24 @@ static const struct tpm_input_header tpm2_selftest_header = {
*/
static int tpm2_do_selftest(struct tpm_chip *chip)
{
+ struct tpm_buf buf;
+ int full;
int rc;
- unsigned int delay_msec = 10;
- long duration;
- struct tpm2_cmd cmd;
- duration = jiffies_to_msecs(
- tpm2_calc_ordinal_duration(chip, TPM2_CC_SELF_TEST));
-
- while (1) {
- cmd.header.in = tpm2_selftest_header;
- cmd.params.selftest_in.full_test = 0;
-
- rc = tpm_transmit_cmd(chip, NULL, &cmd, TPM2_SELF_TEST_IN_SIZE,
- 0, 0, "continue selftest");
+ for (full = 0; full < 2; full++) {
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_SELF_TEST);
+ if (rc)
+ return rc;
- if (rc != TPM2_RC_TESTING || delay_msec >= duration)
- break;
+ tpm_buf_append_u8(&buf, full);
+ rc = tpm_transmit_cmd(chip, NULL, buf.data, PAGE_SIZE, 0, 0,
+ "attempting the self test");
+ tpm_buf_destroy(&buf);
- /* wait longer than before */
- delay_msec *= 2;
- tpm_msleep(delay_msec);
+ if (rc == TPM2_RC_TESTING)
+ rc = TPM2_RC_SUCCESS;
+ if (rc == TPM2_RC_INITIALIZE || rc == TPM2_RC_SUCCESS)
+ return rc;
}
return rc;
@@ -1058,10 +1040,8 @@ int tpm2_auto_startup(struct tpm_chip *chip)
goto out;
rc = tpm2_do_selftest(chip);
- if (rc != 0 && rc != TPM2_RC_INITIALIZE) {
- dev_err(&chip->dev, "TPM self test failed\n");
+ if (rc && rc != TPM2_RC_INITIALIZE)
goto out;
- }
if (rc == TPM2_RC_INITIALIZE) {
rc = tpm_startup(chip);
@@ -1069,10 +1049,8 @@ int tpm2_auto_startup(struct tpm_chip *chip)
goto out;
rc = tpm2_do_selftest(chip);
- if (rc) {
- dev_err(&chip->dev, "TPM self test failed\n");
+ if (rc)
goto out;
- }
}
rc = tpm2_get_pcr_allocation(chip);
--
2.15.1