Imagine an mmap()'d file. Two threads touch the same address at the same time and fault. Both allocate a physical page and race to install a PTE for that page. Only one will win the race. The loser frees its page, but still continues handling the fault as a success and returns VM_FAULT_NOPAGE from the fault handler.
The same race can happen with SGX. But there's a bug: the loser in the SGX steers into a failure path. The loser EREMOVE's the winner's EPC page, then returns SIGBUS, likely killing the app.
Fix the SGX loser's behavior. Check whether another thread already allocated the page and if yes, return with VM_FAULT_NOPAGE.
The race can be illustrated as follows:
/* /* * Fault on CPU1 * Fault on CPU2 * on enclave page X * on enclave page X */ */ sgx_vma_fault() { sgx_vma_fault() {
xa_load(&encl->page_array) xa_load(&encl->page_array) == NULL --> == NULL -->
sgx_encl_eaug_page() { sgx_encl_eaug_page() {
... ...
/* /* * alloc encl_page * alloc encl_page */ */ mutex_lock(&encl->lock); /* * alloc EPC page */ epc_page = sgx_alloc_epc_page(...); /* * add page to enclave's xarray */ xa_insert(&encl->page_array, ...); /* * add page to enclave via EAUG * (page is in pending state) */ /* * add PTE entry */ vmf_insert_pfn(...);
mutex_unlock(&encl->lock); return VM_FAULT_NOPAGE; } } /* * All good up to here: enclave page * successfully added to enclave, * ready for EACCEPT from user space */ mutex_lock(&encl->lock); /* * alloc EPC page */ epc_page = sgx_alloc_epc_page(...); /* * add page to enclave's xarray, * this fails with -EBUSY as this * page was already added by CPU2 */ xa_insert(&encl->page_array, ...);
err_out_shrink: sgx_encl_free_epc_page(epc_page) { /* * remove page via EREMOVE * * *BUG*: page added by CPU2 is * yanked from enclave while it * remains accessible from OS * perspective (PTE installed) */ /* * free EPC page */ sgx_free_epc_page(epc_page); }
mutex_unlock(&encl->lock); /* * *BUG*: SIGBUS is returned * for a valid enclave page */ return VM_FAULT_SIGBUS; } }
Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave") Cc: stable@vger.kernel.org Reported-by: Marcelina Kościelnicka mwk@invisiblethingslab.com Suggested-by: Kai Huang kai.huang@intel.com Signed-off-by: Dmitrii Kuvaiskii dmitrii.kuvaiskii@intel.com --- arch/x86/kernel/cpu/sgx/encl.c | 36 ++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index c0a3c00284c8..2aa7ced0e4a0 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -337,6 +337,16 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) return VM_FAULT_SIGBUS;
+ mutex_lock(&encl->lock); + + /* + * Multiple threads may try to fault on the same page concurrently. + * Re-check if another thread has already done that. + */ + encl_page = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (encl_page) + goto done; + /* * Ignore internal permission checking for dynamically added pages. * They matter only for data added during the pre-initialization @@ -345,23 +355,23 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, */ secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X; encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags); - if (IS_ERR(encl_page)) - return VM_FAULT_OOM; - - mutex_lock(&encl->lock); + if (IS_ERR(encl_page)) { + vmret = VM_FAULT_OOM; + goto err_out_unlock; + }
epc_page = sgx_encl_load_secs(encl); if (IS_ERR(epc_page)) { if (PTR_ERR(epc_page) == -EBUSY) vmret = VM_FAULT_NOPAGE; - goto err_out_unlock; + goto err_out_encl; }
epc_page = sgx_alloc_epc_page(encl_page, false); if (IS_ERR(epc_page)) { if (PTR_ERR(epc_page) == -EBUSY) vmret = VM_FAULT_NOPAGE; - goto err_out_unlock; + goto err_out_encl; }
va_page = sgx_encl_grow(encl, false); @@ -376,10 +386,6 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc), encl_page, GFP_KERNEL); - /* - * If ret == -EBUSY then page was created in another flow while - * running without encl->lock - */ if (ret) goto err_out_shrink;
@@ -389,7 +395,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page)); if (ret) - goto err_out; + goto err_out_eaug;
encl_page->encl = encl; encl_page->epc_page = epc_page; @@ -408,20 +414,20 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, mutex_unlock(&encl->lock); return VM_FAULT_SIGBUS; } +done: mutex_unlock(&encl->lock); return VM_FAULT_NOPAGE;
-err_out: +err_out_eaug: xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc)); - err_out_shrink: sgx_encl_shrink(encl, va_page); err_out_epc: sgx_encl_free_epc_page(epc_page); +err_out_encl: + kfree(encl_page); err_out_unlock: mutex_unlock(&encl->lock); - kfree(encl_page); - return vmret; }
On Wed, 2024-08-21 at 03:02 -0700, Dmitrii Kuvaiskii wrote:
Imagine an mmap()'d file. Two threads touch the same address at the same time and fault. Both allocate a physical page and race to install a PTE for that page. Only one will win the race. The loser frees its page, but still continues handling the fault as a success and returns VM_FAULT_NOPAGE from the fault handler.
The same race can happen with SGX. But there's a bug: the loser in the SGX steers into a failure path. The loser EREMOVE's the winner's EPC page, then returns SIGBUS, likely killing the app.
Fix the SGX loser's behavior. Check whether another thread already allocated the page and if yes, return with VM_FAULT_NOPAGE.
[...]
Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave") Cc: stable@vger.kernel.org Reported-by: Marcelina Kościelnicka mwk@invisiblethingslab.com Suggested-by: Kai Huang kai.huang@intel.com Signed-off-by: Dmitrii Kuvaiskii dmitrii.kuvaiskii@intel.com
Reviewed-by: Kai Huang kai.huang@intel.com
On Wed Aug 21, 2024 at 1:02 PM EEST, Dmitrii Kuvaiskii wrote:
Imagine an mmap()'d file. Two threads touch the same address at the same time and fault. Both allocate a physical page and race to install a PTE for that page. Only one will win the race. The loser frees its page, but still continues handling the fault as a success and returns VM_FAULT_NOPAGE from the fault handler.
The same race can happen with SGX. But there's a bug: the loser in the SGX steers into a failure path. The loser EREMOVE's the winner's EPC page, then returns SIGBUS, likely killing the app.
Fix the SGX loser's behavior. Check whether another thread already allocated the page and if yes, return with VM_FAULT_NOPAGE.
The race can be illustrated as follows:
/* /*
- Fault on CPU1 * Fault on CPU2
- on enclave page X * on enclave page X
*/ */ sgx_vma_fault() { sgx_vma_fault() {
xa_load(&encl->page_array) xa_load(&encl->page_array) == NULL --> == NULL -->
sgx_encl_eaug_page() { sgx_encl_eaug_page() {
... ... /* /* * alloc encl_page * alloc encl_page */ */ mutex_lock(&encl->lock); /* * alloc EPC page */ epc_page = sgx_alloc_epc_page(...); /* * add page to enclave's xarray */ xa_insert(&encl->page_array, ...); /* * add page to enclave via EAUG * (page is in pending state) */ /* * add PTE entry */ vmf_insert_pfn(...); mutex_unlock(&encl->lock); return VM_FAULT_NOPAGE; } } /* * All good up to here: enclave page * successfully added to enclave, * ready for EACCEPT from user space */ mutex_lock(&encl->lock); /* * alloc EPC page */ epc_page = sgx_alloc_epc_page(...); /* * add page to enclave's xarray, * this fails with -EBUSY as this * page was already added by CPU2 */ xa_insert(&encl->page_array, ...);
err_out_shrink: sgx_encl_free_epc_page(epc_page) { /* * remove page via EREMOVE * * *BUG*: page added by CPU2 is * yanked from enclave while it * remains accessible from OS * perspective (PTE installed) */ /* * free EPC page */ sgx_free_epc_page(epc_page); }
mutex_unlock(&encl->lock); /* * *BUG*: SIGBUS is returned * for a valid enclave page */ return VM_FAULT_SIGBUS;
} }
Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave") Cc: stable@vger.kernel.org Reported-by: Marcelina Kościelnicka mwk@invisiblethingslab.com Suggested-by: Kai Huang kai.huang@intel.com Signed-off-by: Dmitrii Kuvaiskii dmitrii.kuvaiskii@intel.com
arch/x86/kernel/cpu/sgx/encl.c | 36 ++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index c0a3c00284c8..2aa7ced0e4a0 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -337,6 +337,16 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) return VM_FAULT_SIGBUS;
- mutex_lock(&encl->lock);
- /*
* Multiple threads may try to fault on the same page concurrently.
* Re-check if another thread has already done that.
*/
- encl_page = xa_load(&encl->page_array, PFN_DOWN(addr));
- if (encl_page)
goto done;
- /*
- Ignore internal permission checking for dynamically added pages.
- They matter only for data added during the pre-initialization
@@ -345,23 +355,23 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, */ secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X; encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
- if (IS_ERR(encl_page))
return VM_FAULT_OOM;
- mutex_lock(&encl->lock);
- if (IS_ERR(encl_page)) {
vmret = VM_FAULT_OOM;
goto err_out_unlock;
- }
epc_page = sgx_encl_load_secs(encl); if (IS_ERR(epc_page)) { if (PTR_ERR(epc_page) == -EBUSY) vmret = VM_FAULT_NOPAGE;
goto err_out_unlock;
}goto err_out_encl;
epc_page = sgx_alloc_epc_page(encl_page, false); if (IS_ERR(epc_page)) { if (PTR_ERR(epc_page) == -EBUSY) vmret = VM_FAULT_NOPAGE;
goto err_out_unlock;
}goto err_out_encl;
va_page = sgx_encl_grow(encl, false); @@ -376,10 +386,6 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc), encl_page, GFP_KERNEL);
- /*
* If ret == -EBUSY then page was created in another flow while
* running without encl->lock
if (ret) goto err_out_shrink;*/
@@ -389,7 +395,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page)); if (ret)
goto err_out;
goto err_out_eaug;
encl_page->encl = encl; encl_page->epc_page = epc_page; @@ -408,20 +414,20 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, mutex_unlock(&encl->lock); return VM_FAULT_SIGBUS; } +done: mutex_unlock(&encl->lock); return VM_FAULT_NOPAGE; -err_out: +err_out_eaug: xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
err_out_shrink: sgx_encl_shrink(encl, va_page); err_out_epc: sgx_encl_free_epc_page(epc_page); +err_out_encl:
- kfree(encl_page);
err_out_unlock: mutex_unlock(&encl->lock);
- kfree(encl_page);
- return vmret;
}
Reviewed-by: Jarkko Sakkinen jarkko@kernel.org
BR, Jarkko
linux-stable-mirror@lists.linaro.org