Memory access #VE's are hard for Linux to handle in contexts like the
entry code or NMIs. But other OSes need them for functionality.
There's a static (pre-guest-boot) way for a VMM to choose one or the
other. But VMMs don't always know which OS they are booting, so they
choose to deliver those #VE's so the "other" OSes will work. That,
unfortunately has left us in the lurch and exposed to these
hard-to-handle #VEs.
The TDX module has introduced a new feature. Even if the static
configuration is "send nasty #VE's", the kernel can dynamically request
that they be disabled.
Check if the feature is available and disable SEPT #VE if possible.
If the TD allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
attribute is no longer reliable. It reflects the initial state of the
control for the TD, but it will not be updated if someone (e.g. bootloader)
changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
determine if SEPT #VEs are enabled or disabled.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Fixes: 373e715e31bf ("x86/tdx: Panic on bad configs that #VE on "private" memory access")
Cc: stable(a)vger.kernel.org
---
arch/x86/coco/tdx/tdx.c | 76 ++++++++++++++++++++++++-------
arch/x86/include/asm/shared/tdx.h | 10 +++-
2 files changed, 69 insertions(+), 17 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 08ce488b54d0..ba3103877b21 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -78,7 +78,7 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
}
/* Read TD-scoped metadata */
-static inline u64 __maybe_unused tdg_vm_rd(u64 field, u64 *value)
+static inline u64 tdg_vm_rd(u64 field, u64 *value)
{
struct tdx_module_args args = {
.rdx = field,
@@ -193,6 +193,62 @@ static void __noreturn tdx_panic(const char *msg)
__tdx_hypercall(&args);
}
+/*
+ * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
+ * that no #VE will be delivered for accesses to TD-private memory.
+ *
+ * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
+ * controls if the guest will receive such #VE with TD attribute
+ * ATTR_SEPT_VE_DISABLE.
+ *
+ * Newer TDX module allows the guest to control if it wants to receive SEPT
+ * violation #VEs.
+ *
+ * Check if the feature is available and disable SEPT #VE if possible.
+ *
+ * If the TD allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
+ * attribute is no longer reliable. It reflects the initial state of the
+ * control for the TD, but it will not be updated if someone (e.g. bootloader)
+ * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
+ * determine if SEPT #VEs are enabled or disabled.
+ */
+static void disable_sept_ve(u64 td_attr)
+{
+ const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
+ bool debug = td_attr & ATTR_DEBUG;
+ u64 config, controls;
+
+ /* Is this TD allowed to disable SEPT #VE */
+ tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
+ if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
+ /* No SEPT #VE controls for the guest: check the attribute */
+ if (td_attr & ATTR_SEPT_VE_DISABLE)
+ return;
+
+ /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
+ if (debug)
+ pr_warn("%s\n", msg);
+ else
+ tdx_panic(msg);
+ return;
+ }
+
+ /* Check if SEPT #VE has been disabled before us */
+ tdg_vm_rd(TDCS_TD_CTLS, &controls);
+ if (controls & TD_CTLS_PENDING_VE_DISABLE)
+ return;
+
+ /* Keep #VEs enabled for splats in debugging environments */
+ if (debug)
+ return;
+
+ /* Disable SEPT #VEs */
+ tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
+ TD_CTLS_PENDING_VE_DISABLE);
+
+ return;
+}
+
static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
@@ -218,24 +274,12 @@ static void tdx_setup(u64 *cc_mask)
gpa_width = args.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);
+ td_attr = args.rdx;
+
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
- /*
- * The kernel can not handle #VE's when accessing normal kernel
- * memory. Ensure that no #VE will be delivered for accesses to
- * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
- */
- td_attr = args.rdx;
- if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
- const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
-
- /* Relax SEPT_VE_DISABLE check for debug TD. */
- if (td_attr & ATTR_DEBUG)
- pr_warn("%s\n", msg);
- else
- tdx_panic(msg);
- }
+ disable_sept_ve(td_attr);
}
/*
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 7e12cfa28bec..fecb2a6e864b 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -19,9 +19,17 @@
#define TDG_VM_RD 7
#define TDG_VM_WR 8
-/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS 0x1110000300000016
+#define TDCS_TD_CTLS 0x1110000300000017
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0)
+
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_GET_QUOTE 0x10002
--
2.43.0
Make is possible to use ACPI without having CONFIG_PCI set.
When initialising ACPI the following call chain occurs:
acpi_init() ->
acpi_bus_init() ->
acpi_load_tables() ->
acpi_ev_install_region_handlers() ->
acpi_ev_install_region_handlers() calls acpi_ev_install_space_handler() on
each of the default address spaces defined as:
u8 acpi_gbl_default_address_spaces[ACPI_NUM_DEFAULT_SPACES] = {
ACPI_ADR_SPACE_SYSTEM_MEMORY,
ACPI_ADR_SPACE_SYSTEM_IO,
ACPI_ADR_SPACE_PCI_CONFIG,
ACPI_ADR_SPACE_DATA_TABLE
};
However in acpi_ev_install_space_handler() the case statement for
ACPI_ADR_SPACE_PCI_CONFIG is ifdef'd as:
#ifdef ACPI_PCI_CONFIGURED
case ACPI_ADR_SPACE_PCI_CONFIG:
handler = acpi_ex_pci_config_space_handler;
setup = acpi_ev_pci_config_region_setup;
break;
#endif
ACPI_PCI_CONFIGURED is not defined if CONFIG_PCI is not enabled, thus the
attempt to install the handler fails.
Fix this by ifdef'ing ACPI_ADR_SPACE_PCI_CONFIG in the list of default
address spaces.
Fixes: bd23fac3eaaa ("ACPICA: Remove PCI bits from ACPICA when CONFIG_PCI is unset")
CC: stable(a)vger.kernel.org # 5.0.x-
Signed-off-by: Suraj Jitindar Singh <surajjs(a)amazon.com>
---
drivers/acpi/acpica/evhandler.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c
index 1c8cb6d924df..371093acb362 100644
--- a/drivers/acpi/acpica/evhandler.c
+++ b/drivers/acpi/acpica/evhandler.c
@@ -26,7 +26,9 @@ acpi_ev_install_handler(acpi_handle obj_handle,
u8 acpi_gbl_default_address_spaces[ACPI_NUM_DEFAULT_SPACES] = {
ACPI_ADR_SPACE_SYSTEM_MEMORY,
ACPI_ADR_SPACE_SYSTEM_IO,
+#ifdef ACPI_PCI_CONFIGURED
ACPI_ADR_SPACE_PCI_CONFIG,
+#endif
ACPI_ADR_SPACE_DATA_TABLE
};
--
2.34.1
The stop_tx() callback is used to implement software flow control and
must not discard data as the Qualcomm GENI driver is currently doing
when there is an active TX command.
Cancelling an active command can also leave data in the hardware FIFO,
which prevents the watermark interrupt from being enabled when TX is
later restarted. This results in a soft lockup and is easily triggered
by stopping TX using software flow control in a serial console but this
can also happen after suspend.
Fix this by only stopping any active command, and effectively clearing
the hardware fifo, when shutting down the port. Make sure to temporarily
raise the watermark level so that the interrupt fires when TX is
restarted.
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Cc: stable(a)vger.kernel.org # 4.17
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/tty/serial/qcom_geni_serial.c | 28 +++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 1d5d6045879a..72addeb9f461 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -651,13 +651,8 @@ static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport)
{
u32 irq_en;
- if (qcom_geni_serial_main_active(uport) ||
- !qcom_geni_serial_tx_empty(uport))
- return;
-
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
irq_en |= M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN;
-
writel(DEF_TX_WM, uport->membase + SE_GENI_TX_WATERMARK_REG);
writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
}
@@ -665,16 +660,28 @@ static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport)
static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
{
u32 irq_en;
- struct qcom_geni_serial_port *port = to_dev_port(uport);
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
irq_en &= ~(M_CMD_DONE_EN | M_TX_FIFO_WATERMARK_EN);
writel(0, uport->membase + SE_GENI_TX_WATERMARK_REG);
writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
- /* Possible stop tx is called multiple times. */
+}
+
+static void qcom_geni_serial_clear_tx_fifo(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
if (!qcom_geni_serial_main_active(uport))
return;
+ /*
+ * Increase watermark level so that TX can be restarted and wait for
+ * sequencer to start to prevent lockups.
+ */
+ writel(port->tx_fifo_depth, uport->membase + SE_GENI_TX_WATERMARK_REG);
+ qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
+ M_TX_FIFO_WATERMARK_EN, true);
+
geni_se_cancel_m_cmd(&port->se);
if (!qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
M_CMD_CANCEL_EN, true)) {
@@ -684,6 +691,8 @@ static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
}
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+
+ port->tx_remaining = 0;
}
static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
@@ -1069,11 +1078,10 @@ static void qcom_geni_serial_shutdown(struct uart_port *uport)
{
disable_irq(uport->irq);
- if (uart_console(uport))
- return;
-
qcom_geni_serial_stop_tx(uport);
qcom_geni_serial_stop_rx(uport);
+
+ qcom_geni_serial_clear_tx_fifo(uport);
}
static int qcom_geni_serial_port_setup(struct uart_port *uport)
--
2.44.1
The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.
However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.
Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.
So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.
Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.
Signed-off-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra <pallavi.mishra(a)intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Effie Yu <effie.yu(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Jose Souza <jose.souza(a)intel.com>
Cc: Michal Mrozek <michal.mrozek(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_bo.c | 47 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_bo_types.h | 3 +-
include/uapi/drm/xe_drm.h | 8 +++++-
3 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..31192d983d9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
- enum ttm_caching caching;
+ enum ttm_caching caching = ttm_cached;
int err;
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
PAGE_SIZE);
- switch (bo->cpu_caching) {
- case DRM_XE_GEM_CPU_CACHING_WC:
- caching = ttm_write_combined;
- break;
- default:
- caching = ttm_cached;
- break;
- }
-
- WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
- * Display scanout is always non-coherent with the CPU cache.
- *
- * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
- * require a CPU:WC mapping.
+ * DGFX system memory is always WB / ttm_cached, since
+ * other caching modes are only supported on x86. DGFX
+ * GPU system memory accesses are always coherent with the
+ * CPU.
*/
- if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
- (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
- caching = ttm_write_combined;
+ if (!IS_DGFX(xe)) {
+ switch (bo->cpu_caching) {
+ case DRM_XE_GEM_CPU_CACHING_WC:
+ caching = ttm_write_combined;
+ break;
+ default:
+ caching = ttm_cached;
+ break;
+ }
+
+ WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+ /*
+ * Display scanout is always non-coherent with the CPU cache.
+ *
+ * For Xe_LPG and beyond, PPGTT PTE lookups are also
+ * non-coherent and require a CPU:WC mapping.
+ */
+ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+ (xe->info.graphics_verx100 >= 1270 &&
+ bo->flags & XE_BO_FLAG_PAGETABLE))
+ caching = ttm_write_combined;
+ }
if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 86422e113d39..10450f1fbbde 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -66,7 +66,8 @@ struct xe_bo {
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
- * objects.
+ * objects. Exceptions are system memory on DGFX, which is always
+ * WB.
*/
u16 cpu_caching;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 93e00be44b2d..1189b3044723 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_create {
#define DRM_XE_GEM_CPU_CACHING_WC 2
/**
* @cpu_caching: The CPU caching mode to select for this object. If
- * mmaping the object the mode selected here will also be used.
+ * mmaping the object the mode selected here will also be used. The
+ * exception is when mapping system memory (including evicted
+ * system memory) on discrete GPUs. The caching mode selected will
+ * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
+ * between GPU- and CPU is guaranteed. The caching mode of
+ * existing CPU-mappings will be updated transparently to
+ * user-space clients.
*/
__u16 cpu_caching;
/** @pad: MBZ */
--
2.44.0
The for_each_child_of_node() macro requires an explicit call to
of_node_put() on early exits to decrement the child refcount and avoid a
memory leak.
The child node is not required outsie the loop, and the resource must be
released before the function returns.
Add the missing of_node_put().
Cc: stable(a)vger.kernel.org
Fixes: 82e82130a78b ("usb: core: Set connect_type of ports based on DT node")
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
---
This bug was found while doing some code analysis, and I could not test
it with real hardware. Although the issue and it solution are
straightforward, any validation beyond compilation and static analysis
is always welcome.
---
drivers/usb/core/of.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c
index f1a499ee482c..763e4122ed5b 100644
--- a/drivers/usb/core/of.c
+++ b/drivers/usb/core/of.c
@@ -84,9 +84,12 @@ static bool usb_of_has_devices_or_graph(const struct usb_device *hub)
if (of_graph_is_present(np))
return true;
- for_each_child_of_node(np, child)
- if (of_property_present(child, "reg"))
+ for_each_child_of_node(np, child) {
+ if (of_property_present(child, "reg")) {
+ of_node_put(child);
return true;
+ }
+ }
return false;
}
---
base-commit: 62c97045b8f720c2eac807a5f38e26c9ed512371
change-id: 20240624-usb_core_of_memleak-79161623b62e
Best regards,
--
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>