There is a potential race condition in amdtee_close_session that may
cause use-after-free in amdtee_open_session. For instance, if a session
has refcount == 1, and one thread tries to free this session via:
kref_put(&sess->refcount, destroy_session);
the reference count will get decremented, and the next step would be to
call destroy_session(). However, if in another thread,
amdtee_open_session() is called before destroy_session() has completed
execution, alloc_session() may return 'sess' that will be freed up
later in destroy_session() leading to use-after-free in
amdtee_open_session.
To fix this issue, treat decrement of sess->refcount and invocation of
destroy_session() as a single critical section, so that it is executed
atomically.
Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Rijo Thomas <Rijo-john.Thomas(a)amd.com>
---
drivers/tee/amdtee/core.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c
index 372d64756ed6..04cee03bec9d 100644
--- a/drivers/tee/amdtee/core.c
+++ b/drivers/tee/amdtee/core.c
@@ -217,14 +217,13 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta,
return rc;
}
+/* mutex must be held by caller */
static void destroy_session(struct kref *ref)
{
struct amdtee_session *sess = container_of(ref, struct amdtee_session,
refcount);
- mutex_lock(&session_list_mutex);
list_del(&sess->list_node);
- mutex_unlock(&session_list_mutex);
kfree(sess);
}
@@ -272,7 +271,9 @@ int amdtee_open_session(struct tee_context *ctx,
if (arg->ret != TEEC_SUCCESS) {
pr_err("open_session failed %d\n", arg->ret);
handle_unload_ta(ta_handle);
+ mutex_lock(&session_list_mutex);
kref_put(&sess->refcount, destroy_session);
+ mutex_unlock(&session_list_mutex);
goto out;
}
@@ -290,7 +291,9 @@ int amdtee_open_session(struct tee_context *ctx,
pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS);
handle_close_session(ta_handle, session_info);
handle_unload_ta(ta_handle);
+ mutex_lock(&session_list_mutex);
kref_put(&sess->refcount, destroy_session);
+ mutex_unlock(&session_list_mutex);
rc = -ENOMEM;
goto out;
}
@@ -331,7 +334,9 @@ int amdtee_close_session(struct tee_context *ctx, u32 session)
handle_close_session(ta_handle, session_info);
handle_unload_ta(ta_handle);
+ mutex_lock(&session_list_mutex);
kref_put(&sess->refcount, destroy_session);
+ mutex_unlock(&session_list_mutex);
return 0;
}
--
2.25.1
During SMBus block data read process, we have seen high interrupt rate
because of TX_EMPTY irq status while waiting for block length byte (the
first data byte after the address phase). The interrupt handler does not
do anything because the internal state is kept as STATUS_WRITE_IN_PROGRESS.
Hence, we should disable TX_EMPTY irq until I2C DW receives first data
byte from I2C device, then re-enable it.
It takes 0.789 ms for host to receive data length from slave.
Without the patch, i2c_dw_isr is called 99 times by TX_EMPTY interrupt.
And it is none after applying the patch.
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuong Tran <chuong(a)os.amperecomputing.com>
Signed-off-by: Tam Nguyen <tamnguyenchi(a)os.amperecomputing.com>
---
drivers/i2c/busses/i2c-designware-master.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 55ea91a63382..2152b1f9b27c 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -462,6 +462,13 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
if (buf_len > 0 || flags & I2C_M_RECV_LEN) {
/* more bytes to be written */
dev->status |= STATUS_WRITE_IN_PROGRESS;
+ /*
+ * In I2C_FUNC_SMBUS_BLOCK_DATA case, there is no data
+ * to send before receiving data length from slave.
+ * Disable TX_EMPTY while waiting for data length byte
+ */
+ if (flags & I2C_M_RECV_LEN)
+ intr_mask &= ~DW_IC_INTR_TX_EMPTY;
break;
} else
dev->status &= ~STATUS_WRITE_IN_PROGRESS;
@@ -485,6 +492,7 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
{
struct i2c_msg *msgs = dev->msgs;
u32 flags = msgs[dev->msg_read_idx].flags;
+ u32 intr_mask;
/*
* Adjust the buffer length and mask the flag
@@ -495,6 +503,11 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
msgs[dev->msg_read_idx].len = len;
msgs[dev->msg_read_idx].flags &= ~I2C_M_RECV_LEN;
+ /* Re-enable TX_EMPTY interrupt. */
+ regmap_read(dev->map, DW_IC_INTR_MASK, &intr_mask);
+ intr_mask |= DW_IC_INTR_TX_EMPTY;
+ regmap_write(dev->map, DW_IC_INTR_MASK, intr_mask);
+
return len;
}
--
2.25.1
svm_leave_nested() similar to a nested VM exit, get the vCPU out of nested
mode and thus should end the local inhibition of AVIC on this vCPU.
Failure to do so, can lead to hangs on guest reboot.
Raise the KVM_REQ_APICV_UPDATE request to refresh the AVIC state of the
current vCPU in this case.
Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running")
Cc: stable(a)vger.kernel.org
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
---
arch/x86/kvm/svm/nested.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dd496c9e5f91f28..3fea8c47679e689 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
+
+ if (kvm_apicv_activated(vcpu->kvm))
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
--
2.26.3
In later revisions of AMD's APM, there is a new 'incomplete IPI' exit code:
"Invalid IPI Vector - The vector for the specified IPI was set to an
illegal value (VEC < 16)"
Note that tests on Zen2 machine show that this VM exit doesn't happen and
instead AVIC just does nothing.
Add support for this exit code by doing nothing, instead of filling
the kernel log with errors.
Also replace an unthrottled 'pr_err()' if another unknown incomplete
IPI exit happens with vcpu_unimpl()
(e.g in case AMD adds yet another 'Invalid IPI' exit reason)
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
---
arch/x86/include/asm/svm.h | 1 +
arch/x86/kvm/svm/avic.c | 5 ++++-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 19bf955b67e0da0..3ac0ffc4f3e202b 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
AVIC_IPI_FAILURE_INVALID_TARGET,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+ AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 2092db892d7d052..4b74ea91f4e6bb6 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
+ case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
+ /* Invalid IPI with vector < 16 */
+ break;
default:
- pr_err("Unknown IPI interception\n");
+ vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
}
return 1;
--
2.26.3
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: c6c2adcba50c2622ed25ba5d5e7f05f584711358
Gitweb: https://git.kernel.org/tip/c6c2adcba50c2622ed25ba5d5e7f05f584711358
Author: Haitao Huang <haitao.huang(a)linux.intel.com>
AuthorDate: Thu, 27 Jul 2023 22:10:24 -07:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Thu, 28 Sep 2023 16:16:40 -07:00
x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race
The SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC page for an
enclave and set secs.epc_page to NULL. The SECS page is used for EAUG
and ELDU in the SGX page fault handler. However, the NULL check for
secs.epc_page is only done for ELDU, not EAUG before being used.
Fix this by doing the same NULL check and reloading of the SECS page as
needed for both EAUG and ELDU.
The SECS page holds global enclave metadata. It can only be reclaimed
when there are no other enclave pages remaining. At that point,
virtually nothing can be done with the enclave until the SECS page is
paged back in.
An enclave can not run nor generate page faults without a resident SECS
page. But it is still possible for a #PF for a non-SECS page to race
with paging out the SECS page: when the last resident non-SECS page A
triggers a #PF in a non-resident page B, and then page A and the SECS
both are paged out before the #PF on B is handled.
Hitting this bug requires that race triggered with a #PF for EAUG.
Following is a trace when it happens.
BUG: kernel NULL pointer dereference, address: 0000000000000000
RIP: 0010:sgx_encl_eaug_page+0xc7/0x210
Call Trace:
? __kmem_cache_alloc_node+0x16a/0x440
? xa_load+0x6e/0xa0
sgx_vma_fault+0x119/0x230
__do_fault+0x36/0x140
do_fault+0x12f/0x400
__handle_mm_fault+0x728/0x1110
handle_mm_fault+0x105/0x310
do_user_addr_fault+0x1ee/0x750
? __this_cpu_preempt_check+0x13/0x20
exc_page_fault+0x76/0x180
asm_exc_page_fault+0x27/0x30
Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
Signed-off-by: Haitao Huang <haitao.huang(a)linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko(a)kernel.org>
Reviewed-by: Kai Huang <kai.huang(a)intel.com>
Acked-by: Reinette Chatre <reinette.chatre(a)intel.com>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20230728051024.33063-1-haitao.huang%40linux.int…
---
arch/x86/kernel/cpu/sgx/encl.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 91fa70e..279148e 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -235,6 +235,21 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}
+/*
+ * Ensure the SECS page is not swapped out. Must be called with encl->lock
+ * to protect the enclave states including SECS and ensure the SECS page is
+ * not swapped out again while being used.
+ */
+static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
+{
+ struct sgx_epc_page *epc_page = encl->secs.epc_page;
+
+ if (!epc_page)
+ epc_page = sgx_encl_eldu(&encl->secs, NULL);
+
+ return epc_page;
+}
+
static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
struct sgx_encl_page *entry)
{
@@ -248,11 +263,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}
- if (!(encl->secs.epc_page)) {
- epc_page = sgx_encl_eldu(&encl->secs, NULL);
- if (IS_ERR(epc_page))
- return ERR_CAST(epc_page);
- }
+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page))
+ return ERR_CAST(epc_page);
epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
if (IS_ERR(epc_page))
@@ -339,6 +352,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
mutex_lock(&encl->lock);
+ epc_page = sgx_encl_load_secs(encl);
+ if (IS_ERR(epc_page)) {
+ if (PTR_ERR(epc_page) == -EBUSY)
+ vmret = VM_FAULT_NOPAGE;
+ goto err_out_unlock;
+ }
+
epc_page = sgx_alloc_epc_page(encl_page, false);
if (IS_ERR(epc_page)) {
if (PTR_ERR(epc_page) == -EBUSY)
From: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Among other things uart_sanitize_serial_rs485() tests the sanity of the RTS
settings in a RS485 configuration that has been passed by userspace.
If RTS-on-send and RTS-after-send are both set or unset the configuration
is adjusted and RTS-after-send is disabled and RTS-on-send enabled.
This however makes only sense if both RTS modes are actually supported by
the driver.
With commit be2e2cb1d281 ("serial: Sanitize rs485_struct") the code does
take the driver support into account but only checks if one of both RTS
modes are supported. This may lead to the errorneous result of RTS-on-send
being set even if only RTS-after-send is supported.
Fix this by changing the implemented logic: First clear all unsupported
flags in the RS485 configuration, then adjust an invalid RTS setting by
taking into account which RTS mode is supported.
Cc: stable(a)vger.kernel.org
Fixes: be2e2cb1d281 ("serial: Sanitize rs485_struct")
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
---
drivers/tty/serial/serial_core.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 697c36dc7ec8..f4feebf8200f 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1370,19 +1370,27 @@ static void uart_sanitize_serial_rs485(struct uart_port *port, struct serial_rs4
return;
}
+ rs485->flags &= supported_flags;
+
/* Pick sane settings if the user hasn't */
- if ((supported_flags & (SER_RS485_RTS_ON_SEND|SER_RS485_RTS_AFTER_SEND)) &&
- !(rs485->flags & SER_RS485_RTS_ON_SEND) ==
+ if (!(rs485->flags & SER_RS485_RTS_ON_SEND) ==
!(rs485->flags & SER_RS485_RTS_AFTER_SEND)) {
- dev_warn_ratelimited(port->dev,
- "%s (%d): invalid RTS setting, using RTS_ON_SEND instead\n",
- port->name, port->line);
- rs485->flags |= SER_RS485_RTS_ON_SEND;
- rs485->flags &= ~SER_RS485_RTS_AFTER_SEND;
- supported_flags |= SER_RS485_RTS_ON_SEND|SER_RS485_RTS_AFTER_SEND;
- }
+ if (supported_flags & SER_RS485_RTS_ON_SEND) {
+ rs485->flags |= SER_RS485_RTS_ON_SEND;
+ rs485->flags &= ~SER_RS485_RTS_AFTER_SEND;
- rs485->flags &= supported_flags;
+ dev_warn_ratelimited(port->dev,
+ "%s (%d): invalid RTS setting, using RTS_ON_SEND instead\n",
+ port->name, port->line);
+ } else {
+ rs485->flags |= SER_RS485_RTS_AFTER_SEND;
+ rs485->flags &= ~SER_RS485_RTS_ON_SEND;
+
+ dev_warn_ratelimited(port->dev,
+ "%s (%d): invalid RTS setting, using RTS_AFTER_SEND instead\n",
+ port->name, port->line);
+ }
+ }
uart_sanitize_serial_rs485_delays(port, rs485);
--
2.40.1
From: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
If the RS485 feature RX-during-TX is supported by means of a GPIO set the
according supported flag. Otherwise setting this feature from userspace may
not be possible, since in uart_sanitize_serial_rs485() the passed RS485
configuration is matched against the supported features and unsupported
settings are thereby removed and thus take no effect.
Cc: stable(a)vger.kernel.org
Fixes: 163f080eb717 ("serial: core: Add option to output RS485 RX_DURING_TX state via GPIO")
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
---
drivers/tty/serial/serial_core.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index ef0500be3553..697c36dc7ec8 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3622,6 +3622,8 @@ int uart_get_rs485_mode(struct uart_port *port)
port->rs485_rx_during_tx_gpio = NULL;
return dev_err_probe(dev, ret, "Cannot get rs485-rx-during-tx-gpios\n");
}
+ if (port->rs485_rx_during_tx_gpio)
+ port->rs485_supported.flags |= SER_RS485_RX_DURING_TX;
return 0;
}
--
2.40.1
During RCU-boost testing with the TREE03 rcutorture config, I found that
after a few hours, the machine locks up.
On tracing, I found that there is a live lock happening between 2 CPUs.
One CPU has an RT task running, while another CPU is being offlined
which also has an RT task running. During this offlining, all threads
are migrated. The migration thread is repeatedly scheduled to migrate
actively running tasks on the CPU being offlined. This results in a live
lock because select_fallback_rq() keeps picking the CPU that an RT task
is already running on only to get pushed back to the CPU being offlined.
It is anyway pointless to pick CPUs for pushing tasks to if they are
being offlined only to get migrated away to somewhere else. This could
also add unwanted latency to this task.
Fix these issues by not selecting CPUs in RT if they are not 'active'
for scheduling, using the cpu_active_mask. Other parts in core.c already
use cpu_active_mask to prevent tasks from being put on CPUs going
offline.
Tested-by: Paul E. McKenney <paulmck(a)kernel.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
---
kernel/sched/cpupri.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index a286e726eb4b..42c40cfdf836 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -101,6 +101,7 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
+ cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
/*
* We have to ensure that we have at least one bit
--
2.42.0.515.g380fc7ccd1-goog