From: Joerg Roedel <jroedel(a)suse.de>
The sev_es_get_ghcb() is called from several places, but only one of
them checks the return value. The reaction to returning NULL is always
the same: Calling panic() and kill the machine.
Instead of adding checks to all call-places, move the panic() into the
function itself so that it will no longer return NULL.
Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/kernel/sev.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 4fa111becc93..82bced88153b 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
if (unlikely(data->ghcb_active)) {
/* GHCB is already in use - save its contents */
- if (unlikely(data->backup_ghcb_active))
- return NULL;
+ if (unlikely(data->backup_ghcb_active)) {
+ /*
+ * Backup-GHCB is also already in use. There is no way
+ * to continue here so just kill the machine. To make
+ * panic() work, mark GHCBs inactive so that messages
+ * can be printed out.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ }
/* Mark backup_ghcb active before writing to it */
data->backup_ghcb_active = true;
@@ -1289,7 +1299,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
*/
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{
- struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
@@ -1315,16 +1324,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
*/
ghcb = sev_es_get_ghcb(&state);
- if (!ghcb) {
- /*
- * Mark GHCBs inactive so that panic() is able to print the
- * message.
- */
- data->ghcb_active = false;
- data->backup_ghcb_active = false;
-
- panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
- }
vc_ghcb_invalidate(ghcb);
result = vc_init_em_ctxt(&ctxt, regs, error_code);
--
2.31.1
From: Joerg Roedel <jroedel(a)suse.de>
The put_user() and get_user() functions do checks on the address which is
passed to them. They check whether the address is actually a user-space
address and whether its fine to access it. They also call might_fault()
to indicate that they could fault and possibly sleep.
All of these checks are neither wanted nor needed in the #VC exception
handler, which can be invoked from almost any context and also for MMIO
instructions from kernel space on kernel memory. All the #VC handler
wants to know is whether a fault happened when the access was tried.
This is provided by __put_user()/__get_user(), which just do the access
no matter what. Also add comments explaining why __get_user() and
__put_user() are the best choice here and why it is safe to use them
in this context. Also explain why copy_to/from_user can't be used.
In addition, also revert commit
024f60d6552 x86/sev-es: ("Handle string port IO to kernel memory properly")
because using __get_user()/__put_user() fixes the same problem while
the above commit introduced several problems:
1) It uses access_ok() which is only allowed in task context.
2) It uses memcpy() which has no fault handling at all and is
thus unsafe to use here.
Fixes: f980f9c31a92 ("x86/sev-es: Compile early handler code into kernel image")
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/kernel/sev.c | 66 ++++++++++++++++++++++++++++++-------------
1 file changed, 46 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 1f428f401bed..651b81cd648e 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -315,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
- /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
- if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
- memcpy(dst, buf, size);
- return ES_OK;
- }
-
+ /*
+ * This function uses __put_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __put_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __put_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_to_user() here because
+ * vc_write_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
switch (size) {
case 1:
memcpy(&d1, buf, 1);
- if (put_user(d1, target))
+ if (__put_user(d1, target))
goto fault;
break;
case 2:
memcpy(&d2, buf, 2);
- if (put_user(d2, target))
+ if (__put_user(d2, target))
goto fault;
break;
case 4:
memcpy(&d4, buf, 4);
- if (put_user(d4, target))
+ if (__put_user(d4, target))
goto fault;
break;
case 8:
memcpy(&d8, buf, 8);
- if (put_user(d8, target))
+ if (__put_user(d8, target))
goto fault;
break;
default:
@@ -370,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
- /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
- if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
- memcpy(buf, src, size);
- return ES_OK;
- }
-
+ /*
+ * This function uses __get_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __get_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __get_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_from_user() here because
+ * vc_read_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
switch (size) {
case 1:
- if (get_user(d1, s))
+ if (__get_user(d1, s))
goto fault;
memcpy(buf, &d1, 1);
break;
case 2:
- if (get_user(d2, s))
+ if (__get_user(d2, s))
goto fault;
memcpy(buf, &d2, 2);
break;
case 4:
- if (get_user(d4, s))
+ if (__get_user(d4, s))
goto fault;
memcpy(buf, &d4, 4);
break;
case 8:
- if (get_user(d8, s))
+ if (__get_user(d8, s))
goto fault;
memcpy(buf, &d8, 8);
break;
--
2.31.1
From: Joerg Roedel <jroedel(a)suse.de>
When emulating guest instructions for MMIO or IOIO accesses the #VC
handler might get a page-fault and will not be able to complete. Forward
the page-fault in this case to the correct handler instead of killing
the machine.
Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/kernel/sev.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 82bced88153b..1f428f401bed 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1270,6 +1270,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
case X86_TRAP_UD:
exc_invalid_op(ctxt->regs);
break;
+ case X86_TRAP_PF:
+ write_cr2(ctxt->fi.cr2);
+ exc_page_fault(ctxt->regs, error_code);
+ break;
case X86_TRAP_AC:
exc_alignment_check(ctxt->regs, error_code);
break;
--
2.31.1
Dear colleaguse
I've submitted a patch for 5.10.37 that wasn't included in 5.10.38,
which would have corrected a patch that has been reverted instead.
More info:
https://bugzilla.kernel.org/show_bug.cgi?id=213077
Now sending to the other kernel list, according to autoresponse
from Greg Kroah-Hartman.
Thanks for any feedback & Kind regards, Joerg Sigle
-------- Weitergeleitete Nachricht --------
From: 15 2021 <>
X-Mozilla-Status: 0001
X-Mozilla-Status2: 00800000
X-Mozilla-Keys: Subject: [PATCH] iommu/vt-d: Fix kernel panic caused by 416fa531c816: Preset Access/Dirty bits for IOVA over FL
Cc: Lu Baolu <baolu.lu(a)linux.intel.com>, Ashok Raj <ashok.raj(a)intel.com>, joro(a)8bytes.org, sashal(a)kernel.org, linux-kernel(a)vger.kernel.org
References: <bug-213077-5531-kGNQn2oCe1(a)https.bugzilla.kernel.org/> <8f485610-7560-239f-207b-cda3234869f2(a)jsigle.com> <041d48f5-0c3c-a435-1980-33492c377e8b(a)linux.intel.com>
From: Joerg M. Sigle <joerg.sigle(a)jsigle.com>
Openpgp: preference=signencrypt
Autocrypt: addr=joerg.sigle(a)jsigle.com; prefer-encrypt=mutual; keydata= xsDiBEQYHMMRBADRvakjCgWbUtuZFxoKohCVAFgjhJ5RtxG3x7NfZj4k/Bm18GXLea1qIwKf aO55x4KCj+/ecbdAaFHFirPAZi45DzvFshgEBKY0w89A4qo7UvX3mqfg/G9RZFT55YDqPMJh
VO3X0r+Qz6ID7BgOVZnmbpnyMiAPx5OpRly+aA4ZQQCg/6ll3zyL6q6AAHhjT0OSgdKXcfkD /3ZQUfDD4+ZbV6IG4fdeXzc8qHyLrqWEf+aQWQjtjxe3+vQIL6VDaACz3eeERETMrnyVLG+p wrIiccShYYkLUt+PeMNiEFMZNi8FzsLv8GiEvxPVaRuHgteX5LgdHsDceqou3UJb4hPQtO1n
8YatK5MfMB3vXFox74rpj0Hh9+yyBACzc6O8F7SYNVvy3oDU9AJR1kkHiXf9Y8Z0SOB13zDW GDPKAewIxGXk6PKaArRugPzd7caUBd8Cha/COUwoWfxdCe1RGZTdSVCoe1TvqqdGtwrw+fis 6XddsfTfLsuPXR3yW1ESPB00utIE/rVG6XbFQ0s5kZQep4ZfftyHBFKUVs0oSm9lcmctTWlj
aGFlbCBTaWdsZSA8am9lcmcuc2lnbGVAd2ViLmRlPsJLBBARAgALBQJEGBziBAsDAQIACgkQ CJ3K818VBio/PwCg1wv3nkMEOCc8Oh+UPDCAID2ZmZcAn1vcO7SDQrp2FGmPqr+g6NH7Qr8N zsNNBEQYHMQQEAD5GKB+WgZhekOQldwFbIeG7GHszUUfDtjgo3nGydx6C6zkP+NGlLYwSlPX
fAIWSIC1FeUpmamfB3TT/+OhxZYgTphluNgN7hBdq7YXHFHYUMoiV0MpvpXoVis4eFwL2/hM TdXjqkbM+84X6CqdFGHjhKlP0YOEqHm274+nQ0YIxswdd1ckOErixPDojhNnl06SE2H22+sl Dhf99pj3yHx5sHIdOHX79sFzxIMRJitDYMPj6NYK/aEoJguuqa6zZQ+iAFMBoHzWq6MSHvoP
Ks4fdIRPyvMX86RA6dfSd7ZCLQI2wSbLaF6dfJgJCo1+Le3kXXn11JJPmxiO/CqnS3wy9kJX twh/CBdyorrWqULzBej5UxE5T7bxbrlLOCDaAadWoxTpj0BV89AHxstDqZSt90xkhkn4DIO9 ZekX1KHTUPj1WV/cdlJPPT2N286Z4VeSWc39uK50T8X8dryDxUcwYc58yWb/Ffm7/ZFexwGq
01uejaClcjrUGvC/RgBYK+X0iP1YTknbzSC0neSRBzZrM2w4DUUdD3yIsxx8Wy2O9vPJI8BD 8KVbGI2Ou1WMuF040zT9fBdXQ6MdGGzeMyEstSr/POGxKUAYEY18hKcKctaGxAMZyAcpesqV DNmWn6vQClCbAkbTCD1mpF1Bn5x8vYlLIhkmuquiXsNV6z3WFwACAg//XFEPM51xtB19Vzdp
V65oFdf9LCNoR9+N2yPyEx/Y4+bmymhhJpJGWLeSiicBx2VONvKpDBlPd0jX3GImm2FjQzbg o38IaAqc1VzjAJ8p7AV0eOttmh5rNUqe8NKPmuXIzNIiHMBjZ6Vsg44aFnOkDVyMTxC08QxJ t6WAKCb3KersKv6AxcTvAuKKIggIzLhrcfbyD61NlxLJRSvNxwmVMhblb5ngZ2ri1SigOC2u
eW527nX6m4vJFvqZ2kGg0KiM9Zam34m4/QCQcUCFAcaoWtQYT0lwwXGuCKhKUBSQO86shLqF yO4jYGYhLJskvVkHbiGtjqqEBjQIag67N9uk1EQFy32e0Vv7nfVmyzCUqHv9EixAN+DtBENz R70xrCFmwBiPNb1HixrGRa8VzeNI66pJPsyCb4+yc/Pc17J2e/Pltyfee/5scr+6Tln2VQb2
Ru89XVni2UI7xj6CR6wfP6hiBKF9DI4nIxEv8r3aLKBLCCKDvS+YAPRtBpSVnk0Cwiri1KHo l38mzjiLqW5LBZ4NkcV3PAMYsAmv/80zY+eGb8YRPnOv/rHCLSesw9Wo8MtH7MXc+PqrZnio 50U8+WpViaE1A5GDCP1KNPTs5ghAM2cHQCPyFxf0GLIeyCdQyAr5JbM4UyJblqNT4+bdgaxy
foletFZEk/WkMXPpFX/CPwMFGEQYHMQIncrzXxUGKhECA0wAoPP81KOLYdkMjQYN7sbcyA3k 8PuOAKC9roFUBE+MA3ttuTAdqMIxhIo1cw==
To: bp(a)alien8.de
Message-ID: <095f9639-2708-48bf-bf56-57ab0866dcee(a)jsigle.com>
Date: Mon, 17 May 2021 08:49:14 +0200
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1
MIME-Version: 1.0
In-Reply-To: <041d48f5-0c3c-a435-1980-33492c377e8b(a)linux.intel.com>
Content-Type: multipart/mixed; boundary="------------B30AAE90B5865B3ACE0C512C"
Content-Language: de-DE-1901
From: Joerg M. Sigle <joerg.sigle(a)jsigle.com>
Patch 416fa531c816 commit 416fa531c8160151090206a51b829b9218b804d9 caused
an immediate kernel panic on boot at RIP: 0010:__domain_mapping+0xa7/0x3a0
with longterm kernel 5.10.37 configured w/ CONFIG_INTEL_IOMMU_DEFAULT_ON=y
due to removal of a check. Putting the check back in place fixes this.
The kernel panic was observed on various Intel Core i7 i5 i3 CPUs from
Sandy Bridge, Haswell, Broadwell and Kaby Lake generations (at least).
It may NOT be reproducible on some older CPU generations.
Suppressing the panic with boot parameter intel_iommu=off is diagnostic.
See: https://bugzilla.kernel.org/show_bug.cgi?id=213077https://bugzilla.kernel.org/show_bug.cgi?id=213087https://bugzilla.kernel.org/show_bug.cgi?id=213095
Signed-off-by: Joerg M. Sigle <joerg.sigle(a)jsigle.com>
Acked-by: Lu Baolu <baolu.lu(a)linux.intel.com>
---
Dear colleagues,
Please find the suggested patch in the attachment, now reformatted to include the affected C function.
It fixes a problem in LT kernel 5.10.37; I'm asking for inclusion into LT kernel 5.10.38.
I'm submitting this now, after receiving Lu Baolu's positive response attached below.
Baolu, I hope that the line "Acked-by: Lu Baolu ..." is ok given your comment.
I hope I'm providing this in a useful way, following
https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html
I'm still unsure whether this line should be added above:
Cc: stable(a)vger.kernel.org
Please add this if needed, also considering Baolu's comment re. upstream/backported.
Thanks and kind regards to all! Joerg
Am 17.05.2021 um 04:51 schrieb Lu Baolu:
> Hi Joerg,
>
> On 5/16/21 7:57 AM, Joerg M. Sigle wrote:
>> Dear colleagues at Intel
>>
>> could you please check the enclosed bug report
>> and confirm whether the suggested patch is valid.
>>
>> Thank you very much & kind regards - Joerg
>>
>>
>> -------- Weitergeleitete Nachricht --------
>> From: bugzilla-daemon(a)bugzilla.kernel.org
>> To: joerg.sigle(a)jsigle.com
>> Subject: [Bug 213077] Kernel 5.10.37 immediately panics at boot w/ Intel Core i7-4910MQ Haswell or Core i3-5010U Broadwell w/ custom .config CONFIG_INTEL_IOMMU_DEFAULT_ON=y, same config worked with 5.10.36, due to commit 416fa531c816 =
>> a8ce9ebbecdfda3322bbcece6b3b25888217f8e3
>> Date: Sat, 15 May 2021 23:47:39 +0000
>> X-Envelope-To: joerg.sigle(a)jsigle.com
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=213077
>>
>> --- Comment #7 from Joerg M. Sigle (joerg.sigle(a)jsigle.com) ---
>> This patch:
>>
>> 416fa531c816 iommu/vt-d: Preset Access/Dirty bits for IOVA over FL
>> commit 416fa531c8160151090206a51b829b9218b804d9
>> Upstream commit a8ce9ebbecdfda3322bbcece6b3b25888217f8e3
>>
>> https://github.com/arter97/x86-kernel/commit/416fa531c8160151090206a51b829b…
>>
>> while doing other things, changed the conditional:
>>
>> if (!sg) { ...
>> sg_res = nr_pages;
>> pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
>> }
>>
>> to an unconditional:
>>
>> pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
>>
>> Reinserting the check for !sg fixed the immediate panic on boot for me.
>> Reverting the remainder of the same patch had not helped before.
>>
>> Here's a possible patch for 5.10.37:
>>
>> -------------------------------------------------------------------------
>> --- a/drivers/iommu/intel/iommu.c 2021-05-14 09:50:46.000000000 +0200
>> +++ b/drivers/iommu/intel/iommu.c 2021-05-16 01:02:17.816810690 +0200
>> @@ -2373,7 +2373,10 @@
>> }
>> }
>>
>> - pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
>> + if (!sg) {
>> + sg_res = nr_pages;
>> + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
>> + }
>>
>> while (nr_pages > 0) {
>> uint64_t tmp;
>> -------------------------------------------------------------------------
>>
>> Could you please check this patch submission and pass it to upstream?
>
>
> Above fix looks good to me.
>
> This issue is caused by the back-ported patch for stable v5.10.37.
> There's no need for upstream.
>
> Best regards,
> baolu
>
>>
>> I have, however, NOT tried to understand what the code really does.
>> So please ask the suppliers of patch 416fa531c816 whether their
>> removal of the condition was intentional or a mere lapsus. Thanks!
>>
>> Thanks and kind regards, Joerg
>>
>
--
-------------------------------------------------------------------
Dr. med. Jörg M. Sigle +41 76 276 86 94
http://www.ql-recorder.com +41 32 510 23 46
http://www.jsigle.com +49 176 96 43 54 13
Build results:
total: 155 pass: 153 fail: 2
Failed builds:
arm:allmodconfig
arm:axm55xx_defconfig
Qemu test results:
total: 424 pass: 423 fail: 1
Failed tests:
mipsel:mips32r6-generic:malta_32r6_defconfig:nocd:smp:net,pcnet:ide:rootfs
Failures are similar to the build failures in v5.4.y, plus:
drivers/crypto/omap-aes.c: In function 'omap_aes_hw_init':
drivers/crypto/omap-aes.c:110:8: error: implicit declaration of function 'pm_runtime_resume_and_get'
Guenter
Build results:
total: 168 pass: 168 fail: 0
Qemu test results:
total: 406 pass: 405 fail: 1
Failed tests:
mipsel:mips32r6-generic:malta_32r6_defconfig:nocd:smp:net,pcnet:ide:rootfs
This is the same mips assembler related build failure that was also seen
with all other release candidates.
Guenter