The SPMI GPIO driver assumes that the parent device is an SPMI device
and accesses random data when backcasting the parent struct device
pointer for non-SPMI devices.
Fortunately this does not seem to cause any issues currently when the
parent device is an I2C client like the PM8008, but this could change if
the structures are reorganised (e.g. using structure randomisation).
Notably the interrupt implementation is also broken for non-SPMI devices.
Also note that the two GPIO pins on PM8008 are used for interrupts and
reset so their practical use should be limited.
Drop the broken GPIO support for PM8008 for now.
Fixes: ea119e5a482a ("pinctrl: qcom-pmic-gpio: Add support for pm8008")
Cc: stable(a)vger.kernel.org # 5.13
Reviewed-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Reviewed-by: Stephen Boyd <swboyd(a)chromium.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index f4e2c88a7c82..e61be7d05494 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -1206,7 +1206,6 @@ static const struct of_device_id pmic_gpio_of_match[] = {
{ .compatible = "qcom,pm7325-gpio", .data = (void *) 10 },
{ .compatible = "qcom,pm7550ba-gpio", .data = (void *) 8},
{ .compatible = "qcom,pm8005-gpio", .data = (void *) 4 },
- { .compatible = "qcom,pm8008-gpio", .data = (void *) 2 },
{ .compatible = "qcom,pm8019-gpio", .data = (void *) 6 },
/* pm8150 has 10 GPIOs with holes on 2, 5, 7 and 8 */
{ .compatible = "qcom,pm8150-gpio", .data = (void *) 10 },
--
2.44.1
This is a note to let you know that I've just added the patch titled
iio: temperature: mlx90635: Fix ERR_PTR dereference in
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From a23c14b062d8800a2192077d83273bbfe6c7552d Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Date: Mon, 13 May 2024 13:34:27 -0700
Subject: iio: temperature: mlx90635: Fix ERR_PTR dereference in
mlx90635_probe()
When devm_regmap_init_i2c() fails, regmap_ee could be error pointer,
instead of checking for IS_ERR(regmap_ee), regmap is checked which looks
like a copy paste error.
Fixes: a1d1ba5e1c28 ("iio: temperature: mlx90635 MLX90635 IR Temperature sensor")
Reviewed-by: Crt Mori<cmo(a)melexis.com>
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
Link: https://lore.kernel.org/r/20240513203427.3208696-1-harshit.m.mogalapalli@or…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/temperature/mlx90635.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/iio/temperature/mlx90635.c b/drivers/iio/temperature/mlx90635.c
index 1f5c962c1818..f7f88498ba0e 100644
--- a/drivers/iio/temperature/mlx90635.c
+++ b/drivers/iio/temperature/mlx90635.c
@@ -947,9 +947,9 @@ static int mlx90635_probe(struct i2c_client *client)
"failed to allocate regmap\n");
regmap_ee = devm_regmap_init_i2c(client, &mlx90635_regmap_ee);
- if (IS_ERR(regmap))
- return dev_err_probe(&client->dev, PTR_ERR(regmap),
- "failed to allocate regmap\n");
+ if (IS_ERR(regmap_ee))
+ return dev_err_probe(&client->dev, PTR_ERR(regmap_ee),
+ "failed to allocate EEPROM regmap\n");
mlx90635 = iio_priv(indio_dev);
i2c_set_clientdata(client, indio_dev);
--
2.45.2
This is a note to let you know that I've just added the patch titled
iio: imu: bmi323: Fix trigger notification in case of error
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From bedb2ccb566de5ca0c336ca3fd3588cea6d50414 Mon Sep 17 00:00:00 2001
From: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Date: Wed, 8 May 2024 17:54:07 +0200
Subject: iio: imu: bmi323: Fix trigger notification in case of error
In case of error in the bmi323_trigger_handler() function, the
function exits without calling the iio_trigger_notify_done()
which is responsible for informing the attached trigger that
the process is done and in case there is a .reenable(), to
call it.
Fixes: 8a636db3aa57 ("iio: imu: Add driver for BMI323 IMU")
Signed-off-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Link: https://lore.kernel.org/r/20240508155407.139805-1-vassilisamir@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/imu/bmi323/bmi323_core.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c
index 5d42ab9b176a..67d74a1a1b26 100644
--- a/drivers/iio/imu/bmi323/bmi323_core.c
+++ b/drivers/iio/imu/bmi323/bmi323_core.c
@@ -1391,7 +1391,7 @@ static irqreturn_t bmi323_trigger_handler(int irq, void *p)
&data->buffer.channels,
ARRAY_SIZE(data->buffer.channels));
if (ret)
- return IRQ_NONE;
+ goto out;
} else {
for_each_set_bit(bit, indio_dev->active_scan_mask,
BMI323_CHAN_MAX) {
@@ -1400,13 +1400,14 @@ static irqreturn_t bmi323_trigger_handler(int irq, void *p)
&data->buffer.channels[index++],
BMI323_BYTES_PER_SAMPLE);
if (ret)
- return IRQ_NONE;
+ goto out;
}
}
iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer,
iio_get_time_ns(indio_dev));
+out:
iio_trigger_notify_done(indio_dev->trig);
return IRQ_HANDLED;
--
2.45.2
This is a note to let you know that I've just added the patch titled
iio: dac: ad5592r: fix temperature channel scaling value
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 279428df888319bf68f2686934897301a250bb84 Mon Sep 17 00:00:00 2001
From: Marc Ferland <marc.ferland(a)sonatest.com>
Date: Wed, 1 May 2024 11:05:54 -0400
Subject: iio: dac: ad5592r: fix temperature channel scaling value
The scale value for the temperature channel is (assuming Vref=2.5 and
the datasheet):
376.7897513
When calculating both val and val2 for the temperature scale we
use (3767897513/25) and multiply it by Vref (here I assume 2500mV) to
obtain:
2500 * (3767897513/25) ==> 376789751300
Finally we divide with remainder by 10^9 to get:
val = 376
val2 = 789751300
However, we return IIO_VAL_INT_PLUS_MICRO (should have been NANO) as
the scale type. So when converting the raw temperature value to the
'processed' temperature value we will get (assuming raw=810,
offset=-753):
processed = (raw + offset) * scale_val
= (810 + -753) * 376
= 21432
processed += div((raw + offset) * scale_val2, 10^6)
+= div((810 + -753) * 789751300, 10^6)
+= 45015
==> 66447
==> 66.4 Celcius
instead of the expected 21.5 Celsius.
Fix this issue by changing IIO_VAL_INT_PLUS_MICRO to
IIO_VAL_INT_PLUS_NANO.
Fixes: 56ca9db862bf ("iio: dac: Add support for the AD5592R/AD5593R ADCs/DACs")
Signed-off-by: Marc Ferland <marc.ferland(a)sonatest.com>
Link: https://lore.kernel.org/r/20240501150554.1871390-1-marc.ferland@sonatest.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/dac/ad5592r-base.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c
index 076bc9ecfb49..4763402dbcd6 100644
--- a/drivers/iio/dac/ad5592r-base.c
+++ b/drivers/iio/dac/ad5592r-base.c
@@ -415,7 +415,7 @@ static int ad5592r_read_raw(struct iio_dev *iio_dev,
s64 tmp = *val * (3767897513LL / 25LL);
*val = div_s64_rem(tmp, 1000000000LL, val2);
- return IIO_VAL_INT_PLUS_MICRO;
+ return IIO_VAL_INT_PLUS_NANO;
}
mutex_lock(&st->lock);
--
2.45.2
This is a note to let you know that I've just added the patch titled
iio: pressure: bmp280: Fix BMP580 temperature reading
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 0f0f6306617cb4b6231fc9d4ec68ab9a56dba7c0 Mon Sep 17 00:00:00 2001
From: Adam Rizkalla <ajarizzo(a)gmail.com>
Date: Thu, 25 Apr 2024 01:22:49 -0500
Subject: iio: pressure: bmp280: Fix BMP580 temperature reading
Fix overflow issue when storing BMP580 temperature reading and
properly preserve sign of 24-bit data.
Signed-off-by: Adam Rizkalla <ajarizzo(a)gmail.com>
Tested-By: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Acked-by: Angel Iglesias <ang.iglesiasg(a)gmail.com>
Link: https://lore.kernel.org/r/Zin2udkXRD0+GrML@adam-asahi.lan
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-core.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index 09f53d987c7d..221fa2c552ae 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1394,12 +1394,12 @@ static int bmp580_read_temp(struct bmp280_data *data, int *val, int *val2)
/*
* Temperature is returned in Celsius degrees in fractional
- * form down 2^16. We rescale by x1000 to return milli Celsius
- * to respect IIO ABI.
+ * form down 2^16. We rescale by x1000 to return millidegrees
+ * Celsius to respect IIO ABI.
*/
- *val = raw_temp * 1000;
- *val2 = 16;
- return IIO_VAL_FRACTIONAL_LOG2;
+ raw_temp = sign_extend32(raw_temp, 23);
+ *val = ((s64)raw_temp * 1000) / (1 << 16);
+ return IIO_VAL_INT;
}
static int bmp580_read_press(struct bmp280_data *data, int *val, int *val2)
--
2.45.2
This is a note to let you know that I've just added the patch titled
iio: adc: ad9467: fix scan type sign
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 8a01ef749b0a632f0e1f4ead0f08b3310d99fcb1 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Fri, 3 May 2024 14:45:05 -0500
Subject: iio: adc: ad9467: fix scan type sign
According to the IIO documentation, the sign in the scan type should be
lower case. The ad9467 driver was incorrectly using upper case.
Fix by changing to lower case.
Fixes: 4606d0f4b05f ("iio: adc: ad9467: add support for AD9434 high-speed ADC")
Fixes: ad6797120238 ("iio: adc: ad9467: add support AD9467 ADC")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Link: https://lore.kernel.org/r/20240503-ad9467-fix-scan-type-sign-v1-1-c7a1a066e…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad9467.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c
index e85b763b9ffc..8f5b9c3f6e3d 100644
--- a/drivers/iio/adc/ad9467.c
+++ b/drivers/iio/adc/ad9467.c
@@ -243,11 +243,11 @@ static void __ad9467_get_scale(struct ad9467_state *st, int index,
}
static const struct iio_chan_spec ad9434_channels[] = {
- AD9467_CHAN(0, 0, 12, 'S'),
+ AD9467_CHAN(0, 0, 12, 's'),
};
static const struct iio_chan_spec ad9467_channels[] = {
- AD9467_CHAN(0, 0, 16, 'S'),
+ AD9467_CHAN(0, 0, 16, 's'),
};
static const struct ad9467_chip_info ad9467_chip_tbl = {
--
2.45.2
A Rembrandt-based HP thin client is reported to have problems where
the NVME disk isn't present after resume from s2idle.
This is because the NVME disk wasn't put into D3 at suspend, and
that happened because the StorageD3Enable _DSD was missing in the BIOS.
As AMD's architecture requires that the NVME is in D3 for s2idle, adjust
the criteria for force_storage_d3 to match *all* Zen SoCs when the FADT
advertises low power idle support.
This will ensure that any future products with this BIOS deficiency don't
need to be added to the allow list of overrides.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
drivers/acpi/x86/utils.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index 90c3d2eab9e9..7507a7706898 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -197,16 +197,16 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s
}
/*
- * AMD systems from Renoir and Lucienne *require* that the NVME controller
+ * AMD systems from Renoir onwards *require* that the NVME controller
* is put into D3 over a Modern Standby / suspend-to-idle cycle.
*
* This is "typically" accomplished using the `StorageD3Enable`
* property in the _DSD that is checked via the `acpi_storage_d3` function
- * but this property was introduced after many of these systems launched
- * and most OEM systems don't have it in their BIOS.
+ * but some OEM systems still don't have it in their BIOS.
*
* The Microsoft documentation for StorageD3Enable mentioned that Windows has
- * a hardcoded allowlist for D3 support, which was used for these platforms.
+ * a hardcoded allowlist for D3 support as well as a registry key to override
+ * the BIOS, which has been used for these cases.
*
* This allows quirking on Linux in a similar fashion.
*
@@ -219,19 +219,15 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s
* https://bugzilla.kernel.org/show_bug.cgi?id=216773
* https://bugzilla.kernel.org/show_bug.cgi?id=217003
* 2) On at least one HP system StorageD3Enable is missing on the second NVME
- disk in the system.
+ * disk in the system.
+ * 3) On at least one HP Rembrandt system StorageD3Enable is missing on the only
+ * NVME device.
*/
-static const struct x86_cpu_id storage_d3_cpu_ids[] = {
- X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 24, NULL), /* Picasso */
- X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */
- X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */
- X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */
- {}
-};
-
bool force_storage_d3(void)
{
- return x86_match_cpu(storage_d3_cpu_ids);
+ if (!cpu_feature_enabled(X86_FEATURE_ZEN))
+ return false;
+ return acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0;
}
/*
--
2.43.0
Two enclave threads may try to add and remove the same enclave page
simultaneously (e.g., if the SGX runtime supports both lazy allocation
and MADV_DONTNEED semantics). Consider some enclave page added to the
enclave. User space decides to temporarily remove this page (e.g.,
emulating the MADV_DONTNEED semantics) on CPU1. At the same time, user
space performs a memory access on the same page on CPU2, which results
in a #PF and ultimately in sgx_vma_fault(). Scenario proceeds as
follows:
/*
* CPU1: User space performs
* ioctl(SGX_IOC_ENCLAVE_REMOVE_PAGES)
* on enclave page X
*/
sgx_encl_remove_pages() {
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl);
/*
* verify that page is
* trimmed and accepted
*/
mutex_unlock(&encl->lock);
/*
* remove PTE entry; cannot
* be performed under lock
*/
sgx_zap_enclave_ptes(encl);
/*
* Fault on CPU2 on same page X
*/
sgx_vma_fault() {
/*
* PTE entry was removed, but the
* page is still in enclave's xarray
*/
xa_load(&encl->page_array) != NULL ->
/*
* SGX driver thinks that this page
* was swapped out and loads it
*/
mutex_lock(&encl->lock);
/*
* this is effectively a no-op
*/
entry = sgx_encl_load_page_in_vma();
/*
* add PTE entry
*
* *BUG*: a PTE is installed for a
* page in process of being removed
*/
vmf_insert_pfn(...);
mutex_unlock(&encl->lock);
return VM_FAULT_NOPAGE;
}
/*
* continue with page removal
*/
mutex_lock(&encl->lock);
sgx_encl_free_epc_page(epc_page) {
/*
* remove page via EREMOVE
*/
/*
* free EPC page
*/
sgx_free_epc_page(epc_page);
}
xa_erase(&encl->page_array);
mutex_unlock(&encl->lock);
}
Here, CPU1 removed the page. However CPU2 installed the PTE entry on the
same page. This enclave page becomes perpetually inaccessible (until
another SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl). This is because the page is
marked accessible in the PTE entry but is not EAUGed, and any subsequent
access to this page raises a fault: with the kernel believing there to
be a valid VMA, the unlikely error code X86_PF_SGX encountered by code
path do_user_addr_fault() -> access_error() causes the SGX driver's
sgx_vma_fault() to be skipped and user space receives a SIGSEGV instead.
The userspace SIGSEGV handler cannot perform EACCEPT because the page
was not EAUGed. Thus, the user space is stuck with the inaccessible
page.
Fix this race by forcing the fault handler on CPU2 to back off if the
page is currently being removed (on CPU1). This is achieved by
introducing a new flag SGX_ENCL_PAGE_BEING_REMOVED, which is unset by
default and set only right-before the first mutex_unlock() in
sgx_encl_remove_pages(). Upon loading the page, CPU2 checks whether this
page is being removed, and if yes then CPU2 backs off and waits until
the page is completely removed. After that, any memory access to this
page results in a normal "allocate and EAUG a page on #PF" flow.
Fixes: 9849bb27152c ("x86/sgx: Support complete page removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii(a)intel.com>
Reviewed-by: Haitao Huang <haitao.huang(a)linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko(a)kernel.org>
Acked-by: Reinette Chatre <reinette.chatre(a)intel.com>
---
arch/x86/kernel/cpu/sgx/encl.c | 3 ++-
arch/x86/kernel/cpu/sgx/encl.h | 3 +++
arch/x86/kernel/cpu/sgx/ioctl.c | 1 +
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 41f14b1a3025..7ccd8b2fce5f 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -257,7 +257,8 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
/* Entry successfully located. */
if (entry->epc_page) {
- if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)
+ if (entry->desc & (SGX_ENCL_PAGE_BEING_RECLAIMED |
+ SGX_ENCL_PAGE_BEING_REMOVED))
return ERR_PTR(-EBUSY);
return entry;
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index f94ff14c9486..fff5f2293ae7 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -25,6 +25,9 @@
/* 'desc' bit marking that the page is being reclaimed. */
#define SGX_ENCL_PAGE_BEING_RECLAIMED BIT(3)
+/* 'desc' bit marking that the page is being removed. */
+#define SGX_ENCL_PAGE_BEING_REMOVED BIT(2)
+
struct sgx_encl_page {
unsigned long desc;
unsigned long vm_max_prot_bits:8;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 5d390df21440..de59219ae794 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -1142,6 +1142,7 @@ static long sgx_encl_remove_pages(struct sgx_encl *encl,
* Do not keep encl->lock because of dependency on
* mmap_lock acquired in sgx_zap_enclave_ptes().
*/
+ entry->desc |= SGX_ENCL_PAGE_BEING_REMOVED;
mutex_unlock(&encl->lock);
sgx_zap_enclave_ptes(encl, addr);
--
2.34.1
From: Jeff Xu <jeffxu(a)google.com>
By default, memfd_create() creates a non-sealable MFD, unless the
MFD_ALLOW_SEALING flag is set.
When the MFD_NOEXEC_SEAL flag is initially introduced, the MFD created
with that flag is sealable, even though MFD_ALLOW_SEALING is not set.
This patch changes MFD_NOEXEC_SEAL to be non-sealable by default,
unless MFD_ALLOW_SEALING is explicitly set.
This is a non-backward compatible change. However, as MFD_NOEXEC_SEAL
is new, we expect not many applications will rely on the nature of
MFD_NOEXEC_SEAL being sealable. In most cases, the application already
sets MFD_ALLOW_SEALING if they need a sealable MFD.
Additionally, this enhances the useability of pid namespace sysctl
vm.memfd_noexec. When vm.memfd_noexec equals 1 or 2, the kernel will
add MFD_NOEXEC_SEAL if mfd_create does not specify MFD_EXEC or
MFD_NOEXEC_SEAL, and the addition of MFD_NOEXEC_SEAL enables the MFD
to be sealable. This means, any application that does not desire this
behavior will be unable to utilize vm.memfd_noexec = 1 or 2 to
migrate/enforce non-executable MFD. This adjustment ensures that
applications can anticipate that the sealable characteristic will
remain unmodified by vm.memfd_noexec.
This patch was initially developed by Barnabás Pőcze, and Barnabás
used Debian Code Search and GitHub to try to find potential breakages
and could only find a single one. Dbus-broker's memfd_create() wrapper
is aware of this implicit `MFD_ALLOW_SEALING` behavior, and tries to
work around it [1]. This workaround will break. Luckily, this only
affects the test suite, it does not affect
the normal operations of dbus-broker. There is a PR with a fix[2]. In
addition, David Rheinsberg also raised similar fix in [3]
[1]: https://github.com/bus1/dbus-broker/blob/9eb0b7e5826fc76cad7b025bc46f267d4a…
[2]: https://github.com/bus1/dbus-broker/pull/366
[3]: https://lore.kernel.org/lkml/20230714114753.170814-1-david@readahead.eu/
Cc: stable(a)vger.kernel.org
Fixes: 105ff5339f498a ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Barnabás Pőcze <pobrn(a)protonmail.com>
Signed-off-by: Jeff Xu <jeffxu(a)google.com>
Reviewed-by: David Rheinsberg <david(a)readahead.eu>
---
mm/memfd.c | 9 ++++----
tools/testing/selftests/memfd/memfd_test.c | 26 +++++++++++++++++++++-
2 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/mm/memfd.c b/mm/memfd.c
index 7d8d3ab3fa37..8b7f6afee21d 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -356,12 +356,11 @@ SYSCALL_DEFINE2(memfd_create,
inode->i_mode &= ~0111;
file_seals = memfd_file_seals_ptr(file);
- if (file_seals) {
- *file_seals &= ~F_SEAL_SEAL;
+ if (file_seals)
*file_seals |= F_SEAL_EXEC;
- }
- } else if (flags & MFD_ALLOW_SEALING) {
- /* MFD_EXEC and MFD_ALLOW_SEALING are set */
+ }
+
+ if (flags & MFD_ALLOW_SEALING) {
file_seals = memfd_file_seals_ptr(file);
if (file_seals)
*file_seals &= ~F_SEAL_SEAL;
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 95af2d78fd31..8579a93d006b 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1151,7 +1151,7 @@ static void test_noexec_seal(void)
mfd_def_size,
MFD_CLOEXEC | MFD_NOEXEC_SEAL);
mfd_assert_mode(fd, 0666);
- mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL | F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
close(fd);
}
@@ -1169,6 +1169,14 @@ static void test_sysctl_sysctl0(void)
mfd_assert_has_seals(fd, 0);
mfd_assert_chmod(fd, 0644);
close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ mfd_assert_chmod(fd, 0644);
+ close(fd);
}
static void test_sysctl_set_sysctl0(void)
@@ -1206,6 +1214,14 @@ static void test_sysctl_sysctl1(void)
mfd_assert_has_seals(fd, F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_noexec_nosealable",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC | F_SEAL_SEAL);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
}
static void test_sysctl_set_sysctl1(void)
@@ -1238,6 +1254,14 @@ static void test_sysctl_sysctl2(void)
mfd_assert_has_seals(fd, F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec_notsealable",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC | F_SEAL_SEAL);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
}
static void test_sysctl_set_sysctl2(void)
--
2.45.1.288.g0e0cd299f1-goog